feat(monitoring): Add comprehensive oCIS monitoring
Integrates oCIS services into the monitoring stack by: - Adding a new scrape configuration to VictoriaMetrics to collect metrics from oCIS services in the 'ocis' namespace. - Introducing a new "ocis Overview" Grafana dashboard. This dashboard includes panels for user experience (proxy), service health, storage activity (uploads/downloads), and resource utilization, all leveraging the VictoriaMetrics datasource.
This commit is contained in:
parent
33c52be1c5
commit
e3c472a828
|
|
@ -62,3 +62,81 @@ dashboards:
|
||||||
gnetId: 15757
|
gnetId: 15757
|
||||||
revision: 37
|
revision: 37
|
||||||
datasource: VictoriaMetrics
|
datasource: VictoriaMetrics
|
||||||
|
ocis:
|
||||||
|
datasource: VictoriaMetrics
|
||||||
|
json: |-
|
||||||
|
{
|
||||||
|
"annotations": {"list": []},
|
||||||
|
"editable": true,
|
||||||
|
"graphTooltip": 1,
|
||||||
|
"links": [],
|
||||||
|
"panels": [
|
||||||
|
{"collapsed": false, "gridPos": {"h": 1, "w": 24, "x": 0, "y": 0}, "id": 100, "panels": [], "title": "User experience (proxy)", "type": "row"},
|
||||||
|
|
||||||
|
{"datasource": "VictoriaMetrics", "fieldConfig": {"defaults": {"color": {"mode": "thresholds"}, "thresholds": {"mode": "absolute", "steps": [{"color": "green", "value": null}]}, "unit": "reqps"}, "overrides": []}, "gridPos": {"h": 4, "w": 6, "x": 0, "y": 1}, "id": 1, "options": {"colorMode": "value", "graphMode": "area", "justifyMode": "auto", "orientation": "auto", "reduceOptions": {"calcs": ["lastNotNull"]}, "textMode": "auto"}, "targets": [{"datasource": "VictoriaMetrics", "expr": "sum(rate(ocis_proxy_requests_total[5m]))", "refId": "A"}], "title": "Proxy req/s", "type": "stat"},
|
||||||
|
|
||||||
|
{"datasource": "VictoriaMetrics", "fieldConfig": {"defaults": {"color": {"mode": "thresholds"}, "thresholds": {"mode": "absolute", "steps": [{"color": "green", "value": null}, {"color": "yellow", "value": 1}, {"color": "red", "value": 5}]}, "unit": "percent", "min": 0, "max": 100}, "overrides": []}, "gridPos": {"h": 4, "w": 6, "x": 6, "y": 1}, "id": 2, "options": {"colorMode": "value", "graphMode": "area", "justifyMode": "auto", "orientation": "auto", "reduceOptions": {"calcs": ["lastNotNull"]}, "textMode": "auto"}, "targets": [{"datasource": "VictoriaMetrics", "expr": "100 * sum(rate(ocis_proxy_errors_total[5m])) / clamp_min(sum(rate(ocis_proxy_requests_total[5m])), 0.001)", "refId": "A"}], "title": "Proxy error %", "type": "stat"},
|
||||||
|
|
||||||
|
{"datasource": "VictoriaMetrics", "fieldConfig": {"defaults": {"color": {"mode": "palette-classic"}, "custom": {"drawStyle": "line", "fillOpacity": 10, "lineWidth": 1, "showPoints": "never"}, "unit": "s"}, "overrides": []}, "gridPos": {"h": 8, "w": 12, "x": 12, "y": 1}, "id": 3, "options": {"legend": {"displayMode": "table", "placement": "right", "showLegend": true, "calcs": ["lastNotNull"]}, "tooltip": {"mode": "multi"}}, "targets": [{"datasource": "VictoriaMetrics", "expr": "histogram_quantile(0.50, sum by (method, le) (rate(ocis_proxy_duration_seconds_bucket[5m])))", "legendFormat": "p50 {{method}}", "refId": "A"}, {"datasource": "VictoriaMetrics", "expr": "histogram_quantile(0.95, sum by (method, le) (rate(ocis_proxy_duration_seconds_bucket[5m])))", "legendFormat": "p95 {{method}}", "refId": "B"}, {"datasource": "VictoriaMetrics", "expr": "histogram_quantile(0.99, sum by (method, le) (rate(ocis_proxy_duration_seconds_bucket[5m])))", "legendFormat": "p99 {{method}}", "refId": "C"}], "title": "Proxy latency by method", "type": "timeseries"},
|
||||||
|
|
||||||
|
{"datasource": "VictoriaMetrics", "fieldConfig": {"defaults": {"color": {"mode": "palette-classic"}, "custom": {"drawStyle": "line", "fillOpacity": 20, "lineWidth": 1, "stacking": {"mode": "normal"}}, "unit": "reqps"}, "overrides": []}, "gridPos": {"h": 4, "w": 12, "x": 0, "y": 5}, "id": 4, "options": {"legend": {"displayMode": "list", "placement": "bottom", "showLegend": true}, "tooltip": {"mode": "multi"}}, "targets": [{"datasource": "VictoriaMetrics", "expr": "sum by (method) (rate(ocis_proxy_requests_total[5m]))", "legendFormat": "{{method}}", "refId": "A"}], "title": "Proxy requests by method", "type": "timeseries"},
|
||||||
|
|
||||||
|
{"collapsed": false, "gridPos": {"h": 1, "w": 24, "x": 0, "y": 9}, "id": 101, "panels": [], "title": "Service health", "type": "row"},
|
||||||
|
|
||||||
|
{"datasource": "VictoriaMetrics", "fieldConfig": {"defaults": {"color": {"mode": "thresholds"}, "custom": {"fillOpacity": 80, "lineWidth": 0}, "mappings": [{"type": "value", "options": {"0": {"text": "DOWN", "color": "red"}, "1": {"text": "UP", "color": "green"}}}], "thresholds": {"mode": "absolute", "steps": [{"color": "red", "value": null}, {"color": "green", "value": 1}]}}, "overrides": []}, "gridPos": {"h": 24, "w": 18, "x": 0, "y": 10}, "id": 5, "options": {"alignValue": "left", "legend": {"displayMode": "list", "placement": "bottom", "showLegend": false}, "mergeValues": true, "rowHeight": 0.9, "showValue": "never", "tooltip": {"mode": "single"}}, "targets": [{"datasource": "VictoriaMetrics", "expr": "up{job=\"ocis\"}", "legendFormat": "{{ocis_service}}", "refId": "A"}], "title": "ocis services up/down", "type": "state-timeline"},
|
||||||
|
|
||||||
|
{"datasource": "VictoriaMetrics", "fieldConfig": {"defaults": {"color": {"mode": "thresholds"}, "thresholds": {"mode": "absolute", "steps": [{"color": "red", "value": null}, {"color": "yellow", "value": 95}, {"color": "green", "value": 100}]}, "unit": "percent", "min": 0, "max": 100}, "overrides": []}, "gridPos": {"h": 4, "w": 6, "x": 18, "y": 10}, "id": 6, "options": {"colorMode": "value", "graphMode": "none", "justifyMode": "auto", "orientation": "auto", "reduceOptions": {"calcs": ["lastNotNull"]}, "textMode": "auto"}, "targets": [{"datasource": "VictoriaMetrics", "expr": "100 * count(up{job=\"ocis\"} == 1) / count(up{job=\"ocis\"})", "refId": "A"}], "title": "Services up", "type": "stat"},
|
||||||
|
|
||||||
|
{"datasource": "VictoriaMetrics", "fieldConfig": {"defaults": {"color": {"mode": "thresholds"}, "thresholds": {"mode": "absolute", "steps": [{"color": "green", "value": null}, {"color": "yellow", "value": 1}, {"color": "red", "value": 5}]}}, "overrides": []}, "gridPos": {"h": 4, "w": 6, "x": 18, "y": 14}, "id": 7, "options": {"colorMode": "value", "graphMode": "area", "justifyMode": "auto", "orientation": "auto", "reduceOptions": {"calcs": ["lastNotNull"]}, "textMode": "auto"}, "targets": [{"datasource": "VictoriaMetrics", "expr": "sum(changes(process_start_time_seconds{job=\"ocis\"}[1h]))", "refId": "A"}], "title": "Restarts (last 1h)", "type": "stat"},
|
||||||
|
|
||||||
|
{"collapsed": false, "gridPos": {"h": 1, "w": 24, "x": 0, "y": 34}, "id": 102, "panels": [], "title": "Storage activity (uploads/downloads)", "type": "row"},
|
||||||
|
|
||||||
|
{"datasource": "VictoriaMetrics", "fieldConfig": {"defaults": {"color": {"mode": "thresholds"}, "thresholds": {"mode": "absolute", "steps": [{"color": "green", "value": null}]}}, "overrides": []}, "gridPos": {"h": 4, "w": 6, "x": 0, "y": 35}, "id": 8, "options": {"colorMode": "value", "graphMode": "area", "justifyMode": "auto", "orientation": "auto", "reduceOptions": {"calcs": ["lastNotNull"]}, "textMode": "auto"}, "targets": [{"datasource": "VictoriaMetrics", "expr": "sum(reva_upload_active{ocis_service=\"storageusers\"})", "refId": "A"}], "title": "Active uploads", "type": "stat"},
|
||||||
|
|
||||||
|
{"datasource": "VictoriaMetrics", "fieldConfig": {"defaults": {"color": {"mode": "thresholds"}, "thresholds": {"mode": "absolute", "steps": [{"color": "green", "value": null}]}}, "overrides": []}, "gridPos": {"h": 4, "w": 6, "x": 6, "y": 35}, "id": 9, "options": {"colorMode": "value", "graphMode": "area", "justifyMode": "auto", "orientation": "auto", "reduceOptions": {"calcs": ["lastNotNull"]}, "textMode": "auto"}, "targets": [{"datasource": "VictoriaMetrics", "expr": "sum(reva_download_active{ocis_service=\"storageusers\"})", "refId": "A"}], "title": "Active downloads", "type": "stat"},
|
||||||
|
|
||||||
|
{"datasource": "VictoriaMetrics", "fieldConfig": {"defaults": {"color": {"mode": "palette-classic"}, "custom": {"drawStyle": "line", "fillOpacity": 10, "lineWidth": 1}, "unit": "ops"}, "overrides": [{"matcher": {"id": "byName", "options": "aborted"}, "properties": [{"id": "color", "value": {"mode": "fixed", "fixedColor": "red"}}]}, {"matcher": {"id": "byName", "options": "finalized"}, "properties": [{"id": "color", "value": {"mode": "fixed", "fixedColor": "green"}}]}]}, "gridPos": {"h": 8, "w": 12, "x": 12, "y": 35}, "id": 10, "options": {"legend": {"displayMode": "table", "placement": "bottom", "showLegend": true, "calcs": ["lastNotNull", "sum"]}, "tooltip": {"mode": "multi"}}, "targets": [{"datasource": "VictoriaMetrics", "expr": "rate(reva_upload_sessions_initiated{ocis_service=\"storageusers\"}[5m])", "legendFormat": "initiated", "refId": "A"}, {"datasource": "VictoriaMetrics", "expr": "rate(reva_upload_sessions_finalized{ocis_service=\"storageusers\"}[5m])", "legendFormat": "finalized", "refId": "B"}, {"datasource": "VictoriaMetrics", "expr": "rate(reva_upload_sessions_aborted{ocis_service=\"storageusers\"}[5m])", "legendFormat": "aborted", "refId": "C"}, {"datasource": "VictoriaMetrics", "expr": "rate(reva_upload_sessions_restarted{ocis_service=\"storageusers\"}[5m])", "legendFormat": "restarted", "refId": "D"}], "title": "Upload sessions/s", "type": "timeseries"},
|
||||||
|
|
||||||
|
{"datasource": "VictoriaMetrics", "fieldConfig": {"defaults": {"color": {"mode": "palette-classic"}, "custom": {"drawStyle": "line", "fillOpacity": 20, "lineWidth": 1}, "unit": "Bps"}, "overrides": []}, "gridPos": {"h": 4, "w": 12, "x": 0, "y": 39}, "id": 11, "options": {"legend": {"displayMode": "table", "placement": "bottom", "showLegend": true, "calcs": ["lastNotNull", "max"]}, "tooltip": {"mode": "multi"}}, "targets": [{"datasource": "VictoriaMetrics", "expr": "sum by (pod) (rate(container_network_receive_bytes_total{namespace=\"ocis\", pod=~\"(proxy|storageusers)-.*\", interface=\"eth0\"}[5m]))", "legendFormat": "rx {{pod}}", "refId": "A"}, {"datasource": "VictoriaMetrics", "expr": "sum by (pod) (rate(container_network_transmit_bytes_total{namespace=\"ocis\", pod=~\"(proxy|storageusers)-.*\", interface=\"eth0\"}[5m]))", "legendFormat": "tx {{pod}}", "refId": "B"}], "title": "Network throughput (proxy + storageusers)", "type": "timeseries"},
|
||||||
|
|
||||||
|
{"collapsed": false, "gridPos": {"h": 1, "w": 24, "x": 0, "y": 43}, "id": 103, "panels": [], "title": "Resources (filtered by $ocis_service)", "type": "row"},
|
||||||
|
|
||||||
|
{"datasource": "VictoriaMetrics", "fieldConfig": {"defaults": {"color": {"mode": "palette-classic"}, "custom": {"drawStyle": "line", "fillOpacity": 5, "lineWidth": 1, "showPoints": "never"}, "unit": "bytes"}, "overrides": []}, "gridPos": {"h": 8, "w": 12, "x": 0, "y": 44}, "id": 12, "options": {"legend": {"displayMode": "table", "placement": "right", "showLegend": true, "calcs": ["lastNotNull", "max"]}, "tooltip": {"mode": "multi", "sort": "desc"}}, "targets": [{"datasource": "VictoriaMetrics", "expr": "go_memstats_heap_inuse_bytes{job=\"ocis\", ocis_service=~\"$ocis_service\"}", "legendFormat": "{{ocis_service}}", "refId": "A"}], "title": "Heap in use by service", "type": "timeseries"},
|
||||||
|
|
||||||
|
{"datasource": "VictoriaMetrics", "fieldConfig": {"defaults": {"color": {"mode": "palette-classic"}, "custom": {"drawStyle": "line", "fillOpacity": 5, "lineWidth": 1, "showPoints": "never"}, "unit": "short"}, "overrides": []}, "gridPos": {"h": 8, "w": 12, "x": 12, "y": 44}, "id": 13, "options": {"legend": {"displayMode": "table", "placement": "right", "showLegend": true, "calcs": ["lastNotNull", "max"]}, "tooltip": {"mode": "multi", "sort": "desc"}}, "targets": [{"datasource": "VictoriaMetrics", "expr": "go_goroutines{job=\"ocis\", ocis_service=~\"$ocis_service\"}", "legendFormat": "{{ocis_service}}", "refId": "A"}], "title": "Goroutines by service (leak detector)", "type": "timeseries"},
|
||||||
|
|
||||||
|
{"datasource": "VictoriaMetrics", "fieldConfig": {"defaults": {"color": {"mode": "palette-classic"}, "custom": {"drawStyle": "line", "fillOpacity": 5, "lineWidth": 1}, "unit": "percentunit"}, "overrides": []}, "gridPos": {"h": 8, "w": 12, "x": 0, "y": 52}, "id": 14, "options": {"legend": {"displayMode": "table", "placement": "right", "showLegend": true, "calcs": ["lastNotNull", "max"]}, "tooltip": {"mode": "multi", "sort": "desc"}}, "targets": [{"datasource": "VictoriaMetrics", "expr": "rate(process_cpu_seconds_total{job=\"ocis\", ocis_service=~\"$ocis_service\"}[5m])", "legendFormat": "{{ocis_service}}", "refId": "A"}], "title": "CPU by service (cores)", "type": "timeseries"},
|
||||||
|
|
||||||
|
{"datasource": "VictoriaMetrics", "fieldConfig": {"defaults": {"color": {"mode": "palette-classic"}, "custom": {"drawStyle": "line", "fillOpacity": 5, "lineWidth": 1}, "unit": "bytes"}, "overrides": []}, "gridPos": {"h": 8, "w": 12, "x": 12, "y": 52}, "id": 15, "options": {"legend": {"displayMode": "table", "placement": "right", "showLegend": true, "calcs": ["lastNotNull", "max"]}, "tooltip": {"mode": "multi", "sort": "desc"}}, "targets": [{"datasource": "VictoriaMetrics", "expr": "process_resident_memory_bytes{job=\"ocis\", ocis_service=~\"$ocis_service\"}", "legendFormat": "{{ocis_service}}", "refId": "A"}], "title": "Resident memory by service", "type": "timeseries"}
|
||||||
|
],
|
||||||
|
"refresh": "30s",
|
||||||
|
"schemaVersion": 39,
|
||||||
|
"tags": ["ocis"],
|
||||||
|
"templating": {
|
||||||
|
"list": [
|
||||||
|
{
|
||||||
|
"current": {"selected": false, "text": "All", "value": "$__all"},
|
||||||
|
"datasource": "VictoriaMetrics",
|
||||||
|
"hide": 0,
|
||||||
|
"includeAll": true,
|
||||||
|
"label": "Service",
|
||||||
|
"multi": true,
|
||||||
|
"name": "ocis_service",
|
||||||
|
"options": [],
|
||||||
|
"query": {"query": "label_values(up{job=\"ocis\"}, ocis_service)", "refId": "StandardVariableQuery"},
|
||||||
|
"refresh": 2,
|
||||||
|
"regex": "",
|
||||||
|
"skipUrlSync": false,
|
||||||
|
"sort": 1,
|
||||||
|
"type": "query"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"time": {"from": "now-3h", "to": "now"},
|
||||||
|
"timepicker": {},
|
||||||
|
"timezone": "browser",
|
||||||
|
"title": "ocis Overview",
|
||||||
|
"uid": "ocis-overview",
|
||||||
|
"version": 1,
|
||||||
|
"weekStart": ""
|
||||||
|
}
|
||||||
|
|
|
||||||
|
|
@ -80,3 +80,16 @@ server:
|
||||||
- source_labels: [__meta_kubernetes_namespace, __meta_kubernetes_service_name, __meta_kubernetes_endpoint_port_name]
|
- source_labels: [__meta_kubernetes_namespace, __meta_kubernetes_service_name, __meta_kubernetes_endpoint_port_name]
|
||||||
action: keep
|
action: keep
|
||||||
regex: cert-manager;cert-manager;tcp-prometheus-servicemonitor
|
regex: cert-manager;cert-manager;tcp-prometheus-servicemonitor
|
||||||
|
- job_name: ocis
|
||||||
|
kubernetes_sd_configs:
|
||||||
|
- role: endpoints
|
||||||
|
namespaces:
|
||||||
|
names: [ocis]
|
||||||
|
relabel_configs:
|
||||||
|
- source_labels: [__meta_kubernetes_service_label_ocis_metrics, __meta_kubernetes_endpoint_port_name]
|
||||||
|
action: keep
|
||||||
|
regex: enabled;metrics-debug
|
||||||
|
- source_labels: [__meta_kubernetes_service_name]
|
||||||
|
target_label: ocis_service
|
||||||
|
- source_labels: [__meta_kubernetes_pod_name]
|
||||||
|
target_label: pod
|
||||||
|
|
|
||||||
|
|
@ -1,5 +1,81 @@
|
||||||
apiVersion: v1
|
apiVersion: v1
|
||||||
data: {}
|
data:
|
||||||
|
ocis.json: |-
|
||||||
|
{
|
||||||
|
"annotations": {"list": []},
|
||||||
|
"editable": true,
|
||||||
|
"graphTooltip": 1,
|
||||||
|
"links": [],
|
||||||
|
"panels": [
|
||||||
|
{"collapsed": false, "gridPos": {"h": 1, "w": 24, "x": 0, "y": 0}, "id": 100, "panels": [], "title": "User experience (proxy)", "type": "row"},
|
||||||
|
|
||||||
|
{"datasource": "VictoriaMetrics", "fieldConfig": {"defaults": {"color": {"mode": "thresholds"}, "thresholds": {"mode": "absolute", "steps": [{"color": "green", "value": null}]}, "unit": "reqps"}, "overrides": []}, "gridPos": {"h": 4, "w": 6, "x": 0, "y": 1}, "id": 1, "options": {"colorMode": "value", "graphMode": "area", "justifyMode": "auto", "orientation": "auto", "reduceOptions": {"calcs": ["lastNotNull"]}, "textMode": "auto"}, "targets": [{"datasource": "VictoriaMetrics", "expr": "sum(rate(ocis_proxy_requests_total[5m]))", "refId": "A"}], "title": "Proxy req/s", "type": "stat"},
|
||||||
|
|
||||||
|
{"datasource": "VictoriaMetrics", "fieldConfig": {"defaults": {"color": {"mode": "thresholds"}, "thresholds": {"mode": "absolute", "steps": [{"color": "green", "value": null}, {"color": "yellow", "value": 1}, {"color": "red", "value": 5}]}, "unit": "percent", "min": 0, "max": 100}, "overrides": []}, "gridPos": {"h": 4, "w": 6, "x": 6, "y": 1}, "id": 2, "options": {"colorMode": "value", "graphMode": "area", "justifyMode": "auto", "orientation": "auto", "reduceOptions": {"calcs": ["lastNotNull"]}, "textMode": "auto"}, "targets": [{"datasource": "VictoriaMetrics", "expr": "100 * sum(rate(ocis_proxy_errors_total[5m])) / clamp_min(sum(rate(ocis_proxy_requests_total[5m])), 0.001)", "refId": "A"}], "title": "Proxy error %", "type": "stat"},
|
||||||
|
|
||||||
|
{"datasource": "VictoriaMetrics", "fieldConfig": {"defaults": {"color": {"mode": "palette-classic"}, "custom": {"drawStyle": "line", "fillOpacity": 10, "lineWidth": 1, "showPoints": "never"}, "unit": "s"}, "overrides": []}, "gridPos": {"h": 8, "w": 12, "x": 12, "y": 1}, "id": 3, "options": {"legend": {"displayMode": "table", "placement": "right", "showLegend": true, "calcs": ["lastNotNull"]}, "tooltip": {"mode": "multi"}}, "targets": [{"datasource": "VictoriaMetrics", "expr": "histogram_quantile(0.50, sum by (method, le) (rate(ocis_proxy_duration_seconds_bucket[5m])))", "legendFormat": "p50 {{method}}", "refId": "A"}, {"datasource": "VictoriaMetrics", "expr": "histogram_quantile(0.95, sum by (method, le) (rate(ocis_proxy_duration_seconds_bucket[5m])))", "legendFormat": "p95 {{method}}", "refId": "B"}, {"datasource": "VictoriaMetrics", "expr": "histogram_quantile(0.99, sum by (method, le) (rate(ocis_proxy_duration_seconds_bucket[5m])))", "legendFormat": "p99 {{method}}", "refId": "C"}], "title": "Proxy latency by method", "type": "timeseries"},
|
||||||
|
|
||||||
|
{"datasource": "VictoriaMetrics", "fieldConfig": {"defaults": {"color": {"mode": "palette-classic"}, "custom": {"drawStyle": "line", "fillOpacity": 20, "lineWidth": 1, "stacking": {"mode": "normal"}}, "unit": "reqps"}, "overrides": []}, "gridPos": {"h": 4, "w": 12, "x": 0, "y": 5}, "id": 4, "options": {"legend": {"displayMode": "list", "placement": "bottom", "showLegend": true}, "tooltip": {"mode": "multi"}}, "targets": [{"datasource": "VictoriaMetrics", "expr": "sum by (method) (rate(ocis_proxy_requests_total[5m]))", "legendFormat": "{{method}}", "refId": "A"}], "title": "Proxy requests by method", "type": "timeseries"},
|
||||||
|
|
||||||
|
{"collapsed": false, "gridPos": {"h": 1, "w": 24, "x": 0, "y": 9}, "id": 101, "panels": [], "title": "Service health", "type": "row"},
|
||||||
|
|
||||||
|
{"datasource": "VictoriaMetrics", "fieldConfig": {"defaults": {"color": {"mode": "thresholds"}, "custom": {"fillOpacity": 80, "lineWidth": 0}, "mappings": [{"type": "value", "options": {"0": {"text": "DOWN", "color": "red"}, "1": {"text": "UP", "color": "green"}}}], "thresholds": {"mode": "absolute", "steps": [{"color": "red", "value": null}, {"color": "green", "value": 1}]}}, "overrides": []}, "gridPos": {"h": 24, "w": 18, "x": 0, "y": 10}, "id": 5, "options": {"alignValue": "left", "legend": {"displayMode": "list", "placement": "bottom", "showLegend": false}, "mergeValues": true, "rowHeight": 0.9, "showValue": "never", "tooltip": {"mode": "single"}}, "targets": [{"datasource": "VictoriaMetrics", "expr": "up{job=\"ocis\"}", "legendFormat": "{{ocis_service}}", "refId": "A"}], "title": "ocis services up/down", "type": "state-timeline"},
|
||||||
|
|
||||||
|
{"datasource": "VictoriaMetrics", "fieldConfig": {"defaults": {"color": {"mode": "thresholds"}, "thresholds": {"mode": "absolute", "steps": [{"color": "red", "value": null}, {"color": "yellow", "value": 95}, {"color": "green", "value": 100}]}, "unit": "percent", "min": 0, "max": 100}, "overrides": []}, "gridPos": {"h": 4, "w": 6, "x": 18, "y": 10}, "id": 6, "options": {"colorMode": "value", "graphMode": "none", "justifyMode": "auto", "orientation": "auto", "reduceOptions": {"calcs": ["lastNotNull"]}, "textMode": "auto"}, "targets": [{"datasource": "VictoriaMetrics", "expr": "100 * count(up{job=\"ocis\"} == 1) / count(up{job=\"ocis\"})", "refId": "A"}], "title": "Services up", "type": "stat"},
|
||||||
|
|
||||||
|
{"datasource": "VictoriaMetrics", "fieldConfig": {"defaults": {"color": {"mode": "thresholds"}, "thresholds": {"mode": "absolute", "steps": [{"color": "green", "value": null}, {"color": "yellow", "value": 1}, {"color": "red", "value": 5}]}}, "overrides": []}, "gridPos": {"h": 4, "w": 6, "x": 18, "y": 14}, "id": 7, "options": {"colorMode": "value", "graphMode": "area", "justifyMode": "auto", "orientation": "auto", "reduceOptions": {"calcs": ["lastNotNull"]}, "textMode": "auto"}, "targets": [{"datasource": "VictoriaMetrics", "expr": "sum(changes(process_start_time_seconds{job=\"ocis\"}[1h]))", "refId": "A"}], "title": "Restarts (last 1h)", "type": "stat"},
|
||||||
|
|
||||||
|
{"collapsed": false, "gridPos": {"h": 1, "w": 24, "x": 0, "y": 34}, "id": 102, "panels": [], "title": "Storage activity (uploads/downloads)", "type": "row"},
|
||||||
|
|
||||||
|
{"datasource": "VictoriaMetrics", "fieldConfig": {"defaults": {"color": {"mode": "thresholds"}, "thresholds": {"mode": "absolute", "steps": [{"color": "green", "value": null}]}}, "overrides": []}, "gridPos": {"h": 4, "w": 6, "x": 0, "y": 35}, "id": 8, "options": {"colorMode": "value", "graphMode": "area", "justifyMode": "auto", "orientation": "auto", "reduceOptions": {"calcs": ["lastNotNull"]}, "textMode": "auto"}, "targets": [{"datasource": "VictoriaMetrics", "expr": "sum(reva_upload_active{ocis_service=\"storageusers\"})", "refId": "A"}], "title": "Active uploads", "type": "stat"},
|
||||||
|
|
||||||
|
{"datasource": "VictoriaMetrics", "fieldConfig": {"defaults": {"color": {"mode": "thresholds"}, "thresholds": {"mode": "absolute", "steps": [{"color": "green", "value": null}]}}, "overrides": []}, "gridPos": {"h": 4, "w": 6, "x": 6, "y": 35}, "id": 9, "options": {"colorMode": "value", "graphMode": "area", "justifyMode": "auto", "orientation": "auto", "reduceOptions": {"calcs": ["lastNotNull"]}, "textMode": "auto"}, "targets": [{"datasource": "VictoriaMetrics", "expr": "sum(reva_download_active{ocis_service=\"storageusers\"})", "refId": "A"}], "title": "Active downloads", "type": "stat"},
|
||||||
|
|
||||||
|
{"datasource": "VictoriaMetrics", "fieldConfig": {"defaults": {"color": {"mode": "palette-classic"}, "custom": {"drawStyle": "line", "fillOpacity": 10, "lineWidth": 1}, "unit": "ops"}, "overrides": [{"matcher": {"id": "byName", "options": "aborted"}, "properties": [{"id": "color", "value": {"mode": "fixed", "fixedColor": "red"}}]}, {"matcher": {"id": "byName", "options": "finalized"}, "properties": [{"id": "color", "value": {"mode": "fixed", "fixedColor": "green"}}]}]}, "gridPos": {"h": 8, "w": 12, "x": 12, "y": 35}, "id": 10, "options": {"legend": {"displayMode": "table", "placement": "bottom", "showLegend": true, "calcs": ["lastNotNull", "sum"]}, "tooltip": {"mode": "multi"}}, "targets": [{"datasource": "VictoriaMetrics", "expr": "rate(reva_upload_sessions_initiated{ocis_service=\"storageusers\"}[5m])", "legendFormat": "initiated", "refId": "A"}, {"datasource": "VictoriaMetrics", "expr": "rate(reva_upload_sessions_finalized{ocis_service=\"storageusers\"}[5m])", "legendFormat": "finalized", "refId": "B"}, {"datasource": "VictoriaMetrics", "expr": "rate(reva_upload_sessions_aborted{ocis_service=\"storageusers\"}[5m])", "legendFormat": "aborted", "refId": "C"}, {"datasource": "VictoriaMetrics", "expr": "rate(reva_upload_sessions_restarted{ocis_service=\"storageusers\"}[5m])", "legendFormat": "restarted", "refId": "D"}], "title": "Upload sessions/s", "type": "timeseries"},
|
||||||
|
|
||||||
|
{"datasource": "VictoriaMetrics", "fieldConfig": {"defaults": {"color": {"mode": "palette-classic"}, "custom": {"drawStyle": "line", "fillOpacity": 20, "lineWidth": 1}, "unit": "Bps"}, "overrides": []}, "gridPos": {"h": 4, "w": 12, "x": 0, "y": 39}, "id": 11, "options": {"legend": {"displayMode": "table", "placement": "bottom", "showLegend": true, "calcs": ["lastNotNull", "max"]}, "tooltip": {"mode": "multi"}}, "targets": [{"datasource": "VictoriaMetrics", "expr": "sum by (pod) (rate(container_network_receive_bytes_total{namespace=\"ocis\", pod=~\"(proxy|storageusers)-.*\", interface=\"eth0\"}[5m]))", "legendFormat": "rx {{pod}}", "refId": "A"}, {"datasource": "VictoriaMetrics", "expr": "sum by (pod) (rate(container_network_transmit_bytes_total{namespace=\"ocis\", pod=~\"(proxy|storageusers)-.*\", interface=\"eth0\"}[5m]))", "legendFormat": "tx {{pod}}", "refId": "B"}], "title": "Network throughput (proxy + storageusers)", "type": "timeseries"},
|
||||||
|
|
||||||
|
{"collapsed": false, "gridPos": {"h": 1, "w": 24, "x": 0, "y": 43}, "id": 103, "panels": [], "title": "Resources (filtered by $ocis_service)", "type": "row"},
|
||||||
|
|
||||||
|
{"datasource": "VictoriaMetrics", "fieldConfig": {"defaults": {"color": {"mode": "palette-classic"}, "custom": {"drawStyle": "line", "fillOpacity": 5, "lineWidth": 1, "showPoints": "never"}, "unit": "bytes"}, "overrides": []}, "gridPos": {"h": 8, "w": 12, "x": 0, "y": 44}, "id": 12, "options": {"legend": {"displayMode": "table", "placement": "right", "showLegend": true, "calcs": ["lastNotNull", "max"]}, "tooltip": {"mode": "multi", "sort": "desc"}}, "targets": [{"datasource": "VictoriaMetrics", "expr": "go_memstats_heap_inuse_bytes{job=\"ocis\", ocis_service=~\"$ocis_service\"}", "legendFormat": "{{ocis_service}}", "refId": "A"}], "title": "Heap in use by service", "type": "timeseries"},
|
||||||
|
|
||||||
|
{"datasource": "VictoriaMetrics", "fieldConfig": {"defaults": {"color": {"mode": "palette-classic"}, "custom": {"drawStyle": "line", "fillOpacity": 5, "lineWidth": 1, "showPoints": "never"}, "unit": "short"}, "overrides": []}, "gridPos": {"h": 8, "w": 12, "x": 12, "y": 44}, "id": 13, "options": {"legend": {"displayMode": "table", "placement": "right", "showLegend": true, "calcs": ["lastNotNull", "max"]}, "tooltip": {"mode": "multi", "sort": "desc"}}, "targets": [{"datasource": "VictoriaMetrics", "expr": "go_goroutines{job=\"ocis\", ocis_service=~\"$ocis_service\"}", "legendFormat": "{{ocis_service}}", "refId": "A"}], "title": "Goroutines by service (leak detector)", "type": "timeseries"},
|
||||||
|
|
||||||
|
{"datasource": "VictoriaMetrics", "fieldConfig": {"defaults": {"color": {"mode": "palette-classic"}, "custom": {"drawStyle": "line", "fillOpacity": 5, "lineWidth": 1}, "unit": "percentunit"}, "overrides": []}, "gridPos": {"h": 8, "w": 12, "x": 0, "y": 52}, "id": 14, "options": {"legend": {"displayMode": "table", "placement": "right", "showLegend": true, "calcs": ["lastNotNull", "max"]}, "tooltip": {"mode": "multi", "sort": "desc"}}, "targets": [{"datasource": "VictoriaMetrics", "expr": "rate(process_cpu_seconds_total{job=\"ocis\", ocis_service=~\"$ocis_service\"}[5m])", "legendFormat": "{{ocis_service}}", "refId": "A"}], "title": "CPU by service (cores)", "type": "timeseries"},
|
||||||
|
|
||||||
|
{"datasource": "VictoriaMetrics", "fieldConfig": {"defaults": {"color": {"mode": "palette-classic"}, "custom": {"drawStyle": "line", "fillOpacity": 5, "lineWidth": 1}, "unit": "bytes"}, "overrides": []}, "gridPos": {"h": 8, "w": 12, "x": 12, "y": 52}, "id": 15, "options": {"legend": {"displayMode": "table", "placement": "right", "showLegend": true, "calcs": ["lastNotNull", "max"]}, "tooltip": {"mode": "multi", "sort": "desc"}}, "targets": [{"datasource": "VictoriaMetrics", "expr": "process_resident_memory_bytes{job=\"ocis\", ocis_service=~\"$ocis_service\"}", "legendFormat": "{{ocis_service}}", "refId": "A"}], "title": "Resident memory by service", "type": "timeseries"}
|
||||||
|
],
|
||||||
|
"refresh": "30s",
|
||||||
|
"schemaVersion": 39,
|
||||||
|
"tags": ["ocis"],
|
||||||
|
"templating": {
|
||||||
|
"list": [
|
||||||
|
{
|
||||||
|
"current": {"selected": false, "text": "All", "value": "$__all"},
|
||||||
|
"datasource": "VictoriaMetrics",
|
||||||
|
"hide": 0,
|
||||||
|
"includeAll": true,
|
||||||
|
"label": "Service",
|
||||||
|
"multi": true,
|
||||||
|
"name": "ocis_service",
|
||||||
|
"options": [],
|
||||||
|
"query": {"query": "label_values(up{job=\"ocis\"}, ocis_service)", "refId": "StandardVariableQuery"},
|
||||||
|
"refresh": 2,
|
||||||
|
"regex": "",
|
||||||
|
"skipUrlSync": false,
|
||||||
|
"sort": 1,
|
||||||
|
"type": "query"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"time": {"from": "now-3h", "to": "now"},
|
||||||
|
"timepicker": {},
|
||||||
|
"timezone": "browser",
|
||||||
|
"title": "ocis Overview",
|
||||||
|
"uid": "ocis-overview",
|
||||||
|
"version": 1,
|
||||||
|
"weekStart": ""
|
||||||
|
}
|
||||||
kind: ConfigMap
|
kind: ConfigMap
|
||||||
metadata:
|
metadata:
|
||||||
annotations:
|
annotations:
|
||||||
|
|
|
||||||
|
|
@ -23,7 +23,7 @@ spec:
|
||||||
metadata:
|
metadata:
|
||||||
annotations:
|
annotations:
|
||||||
checksum/config: de8d6f16e9721409f5848bcc101e6aa9815e6455bd4fb9b59306159e705ac1cb
|
checksum/config: de8d6f16e9721409f5848bcc101e6aa9815e6455bd4fb9b59306159e705ac1cb
|
||||||
checksum/dashboards-json-config: 63ff5f7bd5ab0b6c241c689c0aa4d78be9bef984e63c1089dc988905fbb61f74
|
checksum/dashboards-json-config: 19d5c3d30cfa62a4b77ac98cab64f0bdcc3897e09531e37af7c9198f59124bf2
|
||||||
checksum/sc-dashboard-provider-config: e70bf6a851099d385178a76de9757bb0bef8299da6d8443602590e44f05fdf24
|
checksum/sc-dashboard-provider-config: e70bf6a851099d385178a76de9757bb0bef8299da6d8443602590e44f05fdf24
|
||||||
kubectl.kubernetes.io/default-container: grafana
|
kubectl.kubernetes.io/default-container: grafana
|
||||||
labels:
|
labels:
|
||||||
|
|
@ -113,6 +113,9 @@ spec:
|
||||||
name: storage
|
name: storage
|
||||||
- mountPath: /var/lib/grafana-search
|
- mountPath: /var/lib/grafana-search
|
||||||
name: search
|
name: search
|
||||||
|
- mountPath: /var/lib/grafana/dashboards/default/ocis.json
|
||||||
|
name: dashboards-default
|
||||||
|
subPath: ocis.json
|
||||||
- mountPath: /etc/grafana/provisioning/datasources/datasources.yaml
|
- mountPath: /etc/grafana/provisioning/datasources/datasources.yaml
|
||||||
name: config
|
name: config
|
||||||
subPath: datasources.yaml
|
subPath: datasources.yaml
|
||||||
|
|
|
||||||
|
|
@ -83,6 +83,24 @@ data:
|
||||||
- __meta_kubernetes_namespace
|
- __meta_kubernetes_namespace
|
||||||
- __meta_kubernetes_service_name
|
- __meta_kubernetes_service_name
|
||||||
- __meta_kubernetes_endpoint_port_name
|
- __meta_kubernetes_endpoint_port_name
|
||||||
|
- job_name: ocis
|
||||||
|
kubernetes_sd_configs:
|
||||||
|
- namespaces:
|
||||||
|
names:
|
||||||
|
- ocis
|
||||||
|
role: endpoints
|
||||||
|
relabel_configs:
|
||||||
|
- action: keep
|
||||||
|
regex: enabled;metrics-debug
|
||||||
|
source_labels:
|
||||||
|
- __meta_kubernetes_service_label_ocis_metrics
|
||||||
|
- __meta_kubernetes_endpoint_port_name
|
||||||
|
- source_labels:
|
||||||
|
- __meta_kubernetes_service_name
|
||||||
|
target_label: ocis_service
|
||||||
|
- source_labels:
|
||||||
|
- __meta_kubernetes_pod_name
|
||||||
|
target_label: pod
|
||||||
kind: ConfigMap
|
kind: ConfigMap
|
||||||
metadata:
|
metadata:
|
||||||
annotations:
|
annotations:
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue