feat(monitoring): Add comprehensive oCIS monitoring

Integrates oCIS services into the monitoring stack by:
- Adding a new scrape configuration to VictoriaMetrics to collect metrics from oCIS services in the 'ocis' namespace.
- Introducing a new "ocis Overview" Grafana dashboard. This dashboard includes panels for user experience (proxy), service health, storage activity (uploads/downloads), and resource utilization, all leveraging the VictoriaMetrics datasource.
This commit is contained in:
Felix Wolf 2026-05-03 01:19:53 +02:00
parent 33c52be1c5
commit ab4007a009
5 changed files with 190 additions and 2 deletions

View file

@ -62,3 +62,81 @@ dashboards:
gnetId: 15757
revision: 37
datasource: VictoriaMetrics
ocis:
datasource: VictoriaMetrics
json: |-
{
"annotations": {"list": []},
"editable": true,
"graphTooltip": 1,
"links": [],
"panels": [
{"collapsed": false, "gridPos": {"h": 1, "w": 24, "x": 0, "y": 0}, "id": 100, "panels": [], "title": "User experience (proxy)", "type": "row"},
{"datasource": "VictoriaMetrics", "fieldConfig": {"defaults": {"color": {"mode": "thresholds"}, "thresholds": {"mode": "absolute", "steps": [{"color": "green", "value": null}]}, "unit": "reqps"}, "overrides": []}, "gridPos": {"h": 4, "w": 6, "x": 0, "y": 1}, "id": 1, "options": {"colorMode": "value", "graphMode": "area", "justifyMode": "auto", "orientation": "auto", "reduceOptions": {"calcs": ["lastNotNull"]}, "textMode": "auto"}, "targets": [{"datasource": "VictoriaMetrics", "expr": "sum(rate(ocis_proxy_requests_total[5m]))", "refId": "A"}], "title": "Proxy req/s", "type": "stat"},
{"datasource": "VictoriaMetrics", "fieldConfig": {"defaults": {"color": {"mode": "thresholds"}, "thresholds": {"mode": "absolute", "steps": [{"color": "green", "value": null}, {"color": "yellow", "value": 1}, {"color": "red", "value": 5}]}, "unit": "percent", "min": 0, "max": 100}, "overrides": []}, "gridPos": {"h": 4, "w": 6, "x": 6, "y": 1}, "id": 2, "options": {"colorMode": "value", "graphMode": "area", "justifyMode": "auto", "orientation": "auto", "reduceOptions": {"calcs": ["lastNotNull"]}, "textMode": "auto"}, "targets": [{"datasource": "VictoriaMetrics", "expr": "100 * sum(rate(ocis_proxy_errors_total[5m])) / clamp_min(sum(rate(ocis_proxy_requests_total[5m])), 0.001)", "refId": "A"}], "title": "Proxy error %", "type": "stat"},
{"datasource": "VictoriaMetrics", "fieldConfig": {"defaults": {"color": {"mode": "palette-classic"}, "custom": {"drawStyle": "line", "fillOpacity": 10, "lineWidth": 1, "showPoints": "never"}, "unit": "s"}, "overrides": []}, "gridPos": {"h": 8, "w": 12, "x": 12, "y": 1}, "id": 3, "options": {"legend": {"displayMode": "table", "placement": "right", "showLegend": true, "calcs": ["lastNotNull"]}, "tooltip": {"mode": "multi"}}, "targets": [{"datasource": "VictoriaMetrics", "expr": "histogram_quantile(0.50, sum by (method, le) (rate(ocis_proxy_duration_seconds_bucket[5m])))", "legendFormat": "p50 {{method}}", "refId": "A"}, {"datasource": "VictoriaMetrics", "expr": "histogram_quantile(0.95, sum by (method, le) (rate(ocis_proxy_duration_seconds_bucket[5m])))", "legendFormat": "p95 {{method}}", "refId": "B"}, {"datasource": "VictoriaMetrics", "expr": "histogram_quantile(0.99, sum by (method, le) (rate(ocis_proxy_duration_seconds_bucket[5m])))", "legendFormat": "p99 {{method}}", "refId": "C"}], "title": "Proxy latency by method", "type": "timeseries"},
{"datasource": "VictoriaMetrics", "fieldConfig": {"defaults": {"color": {"mode": "palette-classic"}, "custom": {"drawStyle": "line", "fillOpacity": 20, "lineWidth": 1, "stacking": {"mode": "normal"}}, "unit": "reqps"}, "overrides": []}, "gridPos": {"h": 4, "w": 12, "x": 0, "y": 5}, "id": 4, "options": {"legend": {"displayMode": "list", "placement": "bottom", "showLegend": true}, "tooltip": {"mode": "multi"}}, "targets": [{"datasource": "VictoriaMetrics", "expr": "sum by (method) (rate(ocis_proxy_requests_total[5m]))", "legendFormat": "{{method}}", "refId": "A"}], "title": "Proxy requests by method", "type": "timeseries"},
{"collapsed": false, "gridPos": {"h": 1, "w": 24, "x": 0, "y": 9}, "id": 101, "panels": [], "title": "Service health", "type": "row"},
{"datasource": "VictoriaMetrics", "fieldConfig": {"defaults": {"color": {"mode": "thresholds"}, "custom": {"fillOpacity": 80, "lineWidth": 0}, "mappings": [{"type": "value", "options": {"0": {"text": "DOWN", "color": "red"}, "1": {"text": "UP", "color": "green"}}}], "thresholds": {"mode": "absolute", "steps": [{"color": "red", "value": null}, {"color": "green", "value": 1}]}}, "overrides": []}, "gridPos": {"h": 14, "w": 18, "x": 0, "y": 10}, "id": 5, "options": {"alignValue": "left", "legend": {"displayMode": "list", "placement": "bottom", "showLegend": false}, "mergeValues": true, "rowHeight": 0.9, "showValue": "never", "tooltip": {"mode": "single"}}, "targets": [{"datasource": "VictoriaMetrics", "expr": "up{job=\"ocis\"}", "legendFormat": "{{ocis_service}}", "refId": "A"}], "title": "ocis services up/down", "type": "state-timeline"},
{"datasource": "VictoriaMetrics", "fieldConfig": {"defaults": {"color": {"mode": "thresholds"}, "thresholds": {"mode": "absolute", "steps": [{"color": "red", "value": null}, {"color": "yellow", "value": 95}, {"color": "green", "value": 100}]}, "unit": "percent", "min": 0, "max": 100}, "overrides": []}, "gridPos": {"h": 4, "w": 6, "x": 18, "y": 10}, "id": 6, "options": {"colorMode": "value", "graphMode": "none", "justifyMode": "auto", "orientation": "auto", "reduceOptions": {"calcs": ["lastNotNull"]}, "textMode": "auto"}, "targets": [{"datasource": "VictoriaMetrics", "expr": "100 * count(up{job=\"ocis\"} == 1) / count(up{job=\"ocis\"})", "refId": "A"}], "title": "Services up", "type": "stat"},
{"datasource": "VictoriaMetrics", "fieldConfig": {"defaults": {"color": {"mode": "thresholds"}, "thresholds": {"mode": "absolute", "steps": [{"color": "green", "value": null}, {"color": "yellow", "value": 1}, {"color": "red", "value": 5}]}}, "overrides": []}, "gridPos": {"h": 4, "w": 6, "x": 18, "y": 14}, "id": 7, "options": {"colorMode": "value", "graphMode": "area", "justifyMode": "auto", "orientation": "auto", "reduceOptions": {"calcs": ["lastNotNull"]}, "textMode": "auto"}, "targets": [{"datasource": "VictoriaMetrics", "expr": "sum(changes(process_start_time_seconds{job=\"ocis\"}[1h]))", "refId": "A"}], "title": "Restarts (last 1h)", "type": "stat"},
{"collapsed": false, "gridPos": {"h": 1, "w": 24, "x": 0, "y": 24}, "id": 102, "panels": [], "title": "Storage activity (uploads/downloads)", "type": "row"},
{"datasource": "VictoriaMetrics", "fieldConfig": {"defaults": {"color": {"mode": "thresholds"}, "thresholds": {"mode": "absolute", "steps": [{"color": "green", "value": null}]}}, "overrides": []}, "gridPos": {"h": 4, "w": 6, "x": 0, "y": 25}, "id": 8, "options": {"colorMode": "value", "graphMode": "area", "justifyMode": "auto", "orientation": "auto", "reduceOptions": {"calcs": ["lastNotNull"]}, "textMode": "auto"}, "targets": [{"datasource": "VictoriaMetrics", "expr": "sum(reva_upload_active{ocis_service=\"storageusers\"})", "refId": "A"}], "title": "Active uploads", "type": "stat"},
{"datasource": "VictoriaMetrics", "fieldConfig": {"defaults": {"color": {"mode": "thresholds"}, "thresholds": {"mode": "absolute", "steps": [{"color": "green", "value": null}]}}, "overrides": []}, "gridPos": {"h": 4, "w": 6, "x": 6, "y": 25}, "id": 9, "options": {"colorMode": "value", "graphMode": "area", "justifyMode": "auto", "orientation": "auto", "reduceOptions": {"calcs": ["lastNotNull"]}, "textMode": "auto"}, "targets": [{"datasource": "VictoriaMetrics", "expr": "sum(reva_download_active{ocis_service=\"storageusers\"})", "refId": "A"}], "title": "Active downloads", "type": "stat"},
{"datasource": "VictoriaMetrics", "fieldConfig": {"defaults": {"color": {"mode": "palette-classic"}, "custom": {"drawStyle": "line", "fillOpacity": 10, "lineWidth": 1}, "unit": "ops"}, "overrides": [{"matcher": {"id": "byName", "options": "aborted"}, "properties": [{"id": "color", "value": {"mode": "fixed", "fixedColor": "red"}}]}, {"matcher": {"id": "byName", "options": "finalized"}, "properties": [{"id": "color", "value": {"mode": "fixed", "fixedColor": "green"}}]}]}, "gridPos": {"h": 8, "w": 12, "x": 12, "y": 25}, "id": 10, "options": {"legend": {"displayMode": "table", "placement": "bottom", "showLegend": true, "calcs": ["lastNotNull", "sum"]}, "tooltip": {"mode": "multi"}}, "targets": [{"datasource": "VictoriaMetrics", "expr": "rate(reva_upload_sessions_initiated{ocis_service=\"storageusers\"}[5m])", "legendFormat": "initiated", "refId": "A"}, {"datasource": "VictoriaMetrics", "expr": "rate(reva_upload_sessions_finalized{ocis_service=\"storageusers\"}[5m])", "legendFormat": "finalized", "refId": "B"}, {"datasource": "VictoriaMetrics", "expr": "rate(reva_upload_sessions_aborted{ocis_service=\"storageusers\"}[5m])", "legendFormat": "aborted", "refId": "C"}, {"datasource": "VictoriaMetrics", "expr": "rate(reva_upload_sessions_restarted{ocis_service=\"storageusers\"}[5m])", "legendFormat": "restarted", "refId": "D"}], "title": "Upload sessions/s", "type": "timeseries"},
{"datasource": "VictoriaMetrics", "fieldConfig": {"defaults": {"color": {"mode": "palette-classic"}, "custom": {"drawStyle": "line", "fillOpacity": 20, "lineWidth": 1}, "unit": "Bps"}, "overrides": []}, "gridPos": {"h": 4, "w": 12, "x": 0, "y": 29}, "id": 11, "options": {"legend": {"displayMode": "list", "placement": "bottom", "showLegend": true}, "tooltip": {"mode": "single"}}, "targets": [{"datasource": "VictoriaMetrics", "expr": "rate(reva_upload_sessions_bytes_received{ocis_service=\"storageusers\"}[5m])", "legendFormat": "bytes received", "refId": "A"}], "title": "Upload throughput", "type": "timeseries"},
{"collapsed": false, "gridPos": {"h": 1, "w": 24, "x": 0, "y": 33}, "id": 103, "panels": [], "title": "Resources (filtered by $ocis_service)", "type": "row"},
{"datasource": "VictoriaMetrics", "fieldConfig": {"defaults": {"color": {"mode": "palette-classic"}, "custom": {"drawStyle": "line", "fillOpacity": 5, "lineWidth": 1, "showPoints": "never"}, "unit": "bytes"}, "overrides": []}, "gridPos": {"h": 8, "w": 12, "x": 0, "y": 34}, "id": 12, "options": {"legend": {"displayMode": "table", "placement": "right", "showLegend": true, "calcs": ["lastNotNull", "max"]}, "tooltip": {"mode": "multi", "sort": "desc"}}, "targets": [{"datasource": "VictoriaMetrics", "expr": "go_memstats_heap_inuse_bytes{job=\"ocis\", ocis_service=~\"$ocis_service\"}", "legendFormat": "{{ocis_service}}", "refId": "A"}], "title": "Heap in use by service", "type": "timeseries"},
{"datasource": "VictoriaMetrics", "fieldConfig": {"defaults": {"color": {"mode": "palette-classic"}, "custom": {"drawStyle": "line", "fillOpacity": 5, "lineWidth": 1, "showPoints": "never"}, "unit": "short"}, "overrides": []}, "gridPos": {"h": 8, "w": 12, "x": 12, "y": 34}, "id": 13, "options": {"legend": {"displayMode": "table", "placement": "right", "showLegend": true, "calcs": ["lastNotNull", "max"]}, "tooltip": {"mode": "multi", "sort": "desc"}}, "targets": [{"datasource": "VictoriaMetrics", "expr": "go_goroutines{job=\"ocis\", ocis_service=~\"$ocis_service\"}", "legendFormat": "{{ocis_service}}", "refId": "A"}], "title": "Goroutines by service (leak detector)", "type": "timeseries"},
{"datasource": "VictoriaMetrics", "fieldConfig": {"defaults": {"color": {"mode": "palette-classic"}, "custom": {"drawStyle": "line", "fillOpacity": 5, "lineWidth": 1}, "unit": "percentunit"}, "overrides": []}, "gridPos": {"h": 8, "w": 12, "x": 0, "y": 42}, "id": 14, "options": {"legend": {"displayMode": "table", "placement": "right", "showLegend": true, "calcs": ["lastNotNull", "max"]}, "tooltip": {"mode": "multi", "sort": "desc"}}, "targets": [{"datasource": "VictoriaMetrics", "expr": "rate(process_cpu_seconds_total{job=\"ocis\", ocis_service=~\"$ocis_service\"}[5m])", "legendFormat": "{{ocis_service}}", "refId": "A"}], "title": "CPU by service (cores)", "type": "timeseries"},
{"datasource": "VictoriaMetrics", "fieldConfig": {"defaults": {"color": {"mode": "palette-classic"}, "custom": {"drawStyle": "line", "fillOpacity": 5, "lineWidth": 1}, "unit": "bytes"}, "overrides": []}, "gridPos": {"h": 8, "w": 12, "x": 12, "y": 42}, "id": 15, "options": {"legend": {"displayMode": "table", "placement": "right", "showLegend": true, "calcs": ["lastNotNull", "max"]}, "tooltip": {"mode": "multi", "sort": "desc"}}, "targets": [{"datasource": "VictoriaMetrics", "expr": "process_resident_memory_bytes{job=\"ocis\", ocis_service=~\"$ocis_service\"}", "legendFormat": "{{ocis_service}}", "refId": "A"}], "title": "Resident memory by service", "type": "timeseries"}
],
"refresh": "30s",
"schemaVersion": 39,
"tags": ["ocis"],
"templating": {
"list": [
{
"current": {"selected": false, "text": "All", "value": "$__all"},
"datasource": "VictoriaMetrics",
"hide": 0,
"includeAll": true,
"label": "Service",
"multi": true,
"name": "ocis_service",
"options": [],
"query": {"query": "label_values(up{job=\"ocis\"}, ocis_service)", "refId": "StandardVariableQuery"},
"refresh": 2,
"regex": "",
"skipUrlSync": false,
"sort": 1,
"type": "query"
}
]
},
"time": {"from": "now-3h", "to": "now"},
"timepicker": {},
"timezone": "browser",
"title": "ocis Overview",
"uid": "ocis-overview",
"version": 1,
"weekStart": ""
}

View file

@ -80,3 +80,16 @@ server:
- source_labels: [__meta_kubernetes_namespace, __meta_kubernetes_service_name, __meta_kubernetes_endpoint_port_name]
action: keep
regex: cert-manager;cert-manager;tcp-prometheus-servicemonitor
- job_name: ocis
kubernetes_sd_configs:
- role: endpoints
namespaces:
names: [ocis]
relabel_configs:
- source_labels: [__meta_kubernetes_service_label_ocis_metrics, __meta_kubernetes_endpoint_port_name]
action: keep
regex: enabled;metrics-debug
- source_labels: [__meta_kubernetes_service_name]
target_label: ocis_service
- source_labels: [__meta_kubernetes_pod_name]
target_label: pod

View file

@ -1,5 +1,81 @@
apiVersion: v1
data: {}
data:
ocis.json: |-
{
"annotations": {"list": []},
"editable": true,
"graphTooltip": 1,
"links": [],
"panels": [
{"collapsed": false, "gridPos": {"h": 1, "w": 24, "x": 0, "y": 0}, "id": 100, "panels": [], "title": "User experience (proxy)", "type": "row"},
{"datasource": "VictoriaMetrics", "fieldConfig": {"defaults": {"color": {"mode": "thresholds"}, "thresholds": {"mode": "absolute", "steps": [{"color": "green", "value": null}]}, "unit": "reqps"}, "overrides": []}, "gridPos": {"h": 4, "w": 6, "x": 0, "y": 1}, "id": 1, "options": {"colorMode": "value", "graphMode": "area", "justifyMode": "auto", "orientation": "auto", "reduceOptions": {"calcs": ["lastNotNull"]}, "textMode": "auto"}, "targets": [{"datasource": "VictoriaMetrics", "expr": "sum(rate(ocis_proxy_requests_total[5m]))", "refId": "A"}], "title": "Proxy req/s", "type": "stat"},
{"datasource": "VictoriaMetrics", "fieldConfig": {"defaults": {"color": {"mode": "thresholds"}, "thresholds": {"mode": "absolute", "steps": [{"color": "green", "value": null}, {"color": "yellow", "value": 1}, {"color": "red", "value": 5}]}, "unit": "percent", "min": 0, "max": 100}, "overrides": []}, "gridPos": {"h": 4, "w": 6, "x": 6, "y": 1}, "id": 2, "options": {"colorMode": "value", "graphMode": "area", "justifyMode": "auto", "orientation": "auto", "reduceOptions": {"calcs": ["lastNotNull"]}, "textMode": "auto"}, "targets": [{"datasource": "VictoriaMetrics", "expr": "100 * sum(rate(ocis_proxy_errors_total[5m])) / clamp_min(sum(rate(ocis_proxy_requests_total[5m])), 0.001)", "refId": "A"}], "title": "Proxy error %", "type": "stat"},
{"datasource": "VictoriaMetrics", "fieldConfig": {"defaults": {"color": {"mode": "palette-classic"}, "custom": {"drawStyle": "line", "fillOpacity": 10, "lineWidth": 1, "showPoints": "never"}, "unit": "s"}, "overrides": []}, "gridPos": {"h": 8, "w": 12, "x": 12, "y": 1}, "id": 3, "options": {"legend": {"displayMode": "table", "placement": "right", "showLegend": true, "calcs": ["lastNotNull"]}, "tooltip": {"mode": "multi"}}, "targets": [{"datasource": "VictoriaMetrics", "expr": "histogram_quantile(0.50, sum by (method, le) (rate(ocis_proxy_duration_seconds_bucket[5m])))", "legendFormat": "p50 {{method}}", "refId": "A"}, {"datasource": "VictoriaMetrics", "expr": "histogram_quantile(0.95, sum by (method, le) (rate(ocis_proxy_duration_seconds_bucket[5m])))", "legendFormat": "p95 {{method}}", "refId": "B"}, {"datasource": "VictoriaMetrics", "expr": "histogram_quantile(0.99, sum by (method, le) (rate(ocis_proxy_duration_seconds_bucket[5m])))", "legendFormat": "p99 {{method}}", "refId": "C"}], "title": "Proxy latency by method", "type": "timeseries"},
{"datasource": "VictoriaMetrics", "fieldConfig": {"defaults": {"color": {"mode": "palette-classic"}, "custom": {"drawStyle": "line", "fillOpacity": 20, "lineWidth": 1, "stacking": {"mode": "normal"}}, "unit": "reqps"}, "overrides": []}, "gridPos": {"h": 4, "w": 12, "x": 0, "y": 5}, "id": 4, "options": {"legend": {"displayMode": "list", "placement": "bottom", "showLegend": true}, "tooltip": {"mode": "multi"}}, "targets": [{"datasource": "VictoriaMetrics", "expr": "sum by (method) (rate(ocis_proxy_requests_total[5m]))", "legendFormat": "{{method}}", "refId": "A"}], "title": "Proxy requests by method", "type": "timeseries"},
{"collapsed": false, "gridPos": {"h": 1, "w": 24, "x": 0, "y": 9}, "id": 101, "panels": [], "title": "Service health", "type": "row"},
{"datasource": "VictoriaMetrics", "fieldConfig": {"defaults": {"color": {"mode": "thresholds"}, "custom": {"fillOpacity": 80, "lineWidth": 0}, "mappings": [{"type": "value", "options": {"0": {"text": "DOWN", "color": "red"}, "1": {"text": "UP", "color": "green"}}}], "thresholds": {"mode": "absolute", "steps": [{"color": "red", "value": null}, {"color": "green", "value": 1}]}}, "overrides": []}, "gridPos": {"h": 14, "w": 18, "x": 0, "y": 10}, "id": 5, "options": {"alignValue": "left", "legend": {"displayMode": "list", "placement": "bottom", "showLegend": false}, "mergeValues": true, "rowHeight": 0.9, "showValue": "never", "tooltip": {"mode": "single"}}, "targets": [{"datasource": "VictoriaMetrics", "expr": "up{job=\"ocis\"}", "legendFormat": "{{ocis_service}}", "refId": "A"}], "title": "ocis services up/down", "type": "state-timeline"},
{"datasource": "VictoriaMetrics", "fieldConfig": {"defaults": {"color": {"mode": "thresholds"}, "thresholds": {"mode": "absolute", "steps": [{"color": "red", "value": null}, {"color": "yellow", "value": 95}, {"color": "green", "value": 100}]}, "unit": "percent", "min": 0, "max": 100}, "overrides": []}, "gridPos": {"h": 4, "w": 6, "x": 18, "y": 10}, "id": 6, "options": {"colorMode": "value", "graphMode": "none", "justifyMode": "auto", "orientation": "auto", "reduceOptions": {"calcs": ["lastNotNull"]}, "textMode": "auto"}, "targets": [{"datasource": "VictoriaMetrics", "expr": "100 * count(up{job=\"ocis\"} == 1) / count(up{job=\"ocis\"})", "refId": "A"}], "title": "Services up", "type": "stat"},
{"datasource": "VictoriaMetrics", "fieldConfig": {"defaults": {"color": {"mode": "thresholds"}, "thresholds": {"mode": "absolute", "steps": [{"color": "green", "value": null}, {"color": "yellow", "value": 1}, {"color": "red", "value": 5}]}}, "overrides": []}, "gridPos": {"h": 4, "w": 6, "x": 18, "y": 14}, "id": 7, "options": {"colorMode": "value", "graphMode": "area", "justifyMode": "auto", "orientation": "auto", "reduceOptions": {"calcs": ["lastNotNull"]}, "textMode": "auto"}, "targets": [{"datasource": "VictoriaMetrics", "expr": "sum(changes(process_start_time_seconds{job=\"ocis\"}[1h]))", "refId": "A"}], "title": "Restarts (last 1h)", "type": "stat"},
{"collapsed": false, "gridPos": {"h": 1, "w": 24, "x": 0, "y": 24}, "id": 102, "panels": [], "title": "Storage activity (uploads/downloads)", "type": "row"},
{"datasource": "VictoriaMetrics", "fieldConfig": {"defaults": {"color": {"mode": "thresholds"}, "thresholds": {"mode": "absolute", "steps": [{"color": "green", "value": null}]}}, "overrides": []}, "gridPos": {"h": 4, "w": 6, "x": 0, "y": 25}, "id": 8, "options": {"colorMode": "value", "graphMode": "area", "justifyMode": "auto", "orientation": "auto", "reduceOptions": {"calcs": ["lastNotNull"]}, "textMode": "auto"}, "targets": [{"datasource": "VictoriaMetrics", "expr": "sum(reva_upload_active{ocis_service=\"storageusers\"})", "refId": "A"}], "title": "Active uploads", "type": "stat"},
{"datasource": "VictoriaMetrics", "fieldConfig": {"defaults": {"color": {"mode": "thresholds"}, "thresholds": {"mode": "absolute", "steps": [{"color": "green", "value": null}]}}, "overrides": []}, "gridPos": {"h": 4, "w": 6, "x": 6, "y": 25}, "id": 9, "options": {"colorMode": "value", "graphMode": "area", "justifyMode": "auto", "orientation": "auto", "reduceOptions": {"calcs": ["lastNotNull"]}, "textMode": "auto"}, "targets": [{"datasource": "VictoriaMetrics", "expr": "sum(reva_download_active{ocis_service=\"storageusers\"})", "refId": "A"}], "title": "Active downloads", "type": "stat"},
{"datasource": "VictoriaMetrics", "fieldConfig": {"defaults": {"color": {"mode": "palette-classic"}, "custom": {"drawStyle": "line", "fillOpacity": 10, "lineWidth": 1}, "unit": "ops"}, "overrides": [{"matcher": {"id": "byName", "options": "aborted"}, "properties": [{"id": "color", "value": {"mode": "fixed", "fixedColor": "red"}}]}, {"matcher": {"id": "byName", "options": "finalized"}, "properties": [{"id": "color", "value": {"mode": "fixed", "fixedColor": "green"}}]}]}, "gridPos": {"h": 8, "w": 12, "x": 12, "y": 25}, "id": 10, "options": {"legend": {"displayMode": "table", "placement": "bottom", "showLegend": true, "calcs": ["lastNotNull", "sum"]}, "tooltip": {"mode": "multi"}}, "targets": [{"datasource": "VictoriaMetrics", "expr": "rate(reva_upload_sessions_initiated{ocis_service=\"storageusers\"}[5m])", "legendFormat": "initiated", "refId": "A"}, {"datasource": "VictoriaMetrics", "expr": "rate(reva_upload_sessions_finalized{ocis_service=\"storageusers\"}[5m])", "legendFormat": "finalized", "refId": "B"}, {"datasource": "VictoriaMetrics", "expr": "rate(reva_upload_sessions_aborted{ocis_service=\"storageusers\"}[5m])", "legendFormat": "aborted", "refId": "C"}, {"datasource": "VictoriaMetrics", "expr": "rate(reva_upload_sessions_restarted{ocis_service=\"storageusers\"}[5m])", "legendFormat": "restarted", "refId": "D"}], "title": "Upload sessions/s", "type": "timeseries"},
{"datasource": "VictoriaMetrics", "fieldConfig": {"defaults": {"color": {"mode": "palette-classic"}, "custom": {"drawStyle": "line", "fillOpacity": 20, "lineWidth": 1}, "unit": "Bps"}, "overrides": []}, "gridPos": {"h": 4, "w": 12, "x": 0, "y": 29}, "id": 11, "options": {"legend": {"displayMode": "list", "placement": "bottom", "showLegend": true}, "tooltip": {"mode": "single"}}, "targets": [{"datasource": "VictoriaMetrics", "expr": "rate(reva_upload_sessions_bytes_received{ocis_service=\"storageusers\"}[5m])", "legendFormat": "bytes received", "refId": "A"}], "title": "Upload throughput", "type": "timeseries"},
{"collapsed": false, "gridPos": {"h": 1, "w": 24, "x": 0, "y": 33}, "id": 103, "panels": [], "title": "Resources (filtered by $ocis_service)", "type": "row"},
{"datasource": "VictoriaMetrics", "fieldConfig": {"defaults": {"color": {"mode": "palette-classic"}, "custom": {"drawStyle": "line", "fillOpacity": 5, "lineWidth": 1, "showPoints": "never"}, "unit": "bytes"}, "overrides": []}, "gridPos": {"h": 8, "w": 12, "x": 0, "y": 34}, "id": 12, "options": {"legend": {"displayMode": "table", "placement": "right", "showLegend": true, "calcs": ["lastNotNull", "max"]}, "tooltip": {"mode": "multi", "sort": "desc"}}, "targets": [{"datasource": "VictoriaMetrics", "expr": "go_memstats_heap_inuse_bytes{job=\"ocis\", ocis_service=~\"$ocis_service\"}", "legendFormat": "{{ocis_service}}", "refId": "A"}], "title": "Heap in use by service", "type": "timeseries"},
{"datasource": "VictoriaMetrics", "fieldConfig": {"defaults": {"color": {"mode": "palette-classic"}, "custom": {"drawStyle": "line", "fillOpacity": 5, "lineWidth": 1, "showPoints": "never"}, "unit": "short"}, "overrides": []}, "gridPos": {"h": 8, "w": 12, "x": 12, "y": 34}, "id": 13, "options": {"legend": {"displayMode": "table", "placement": "right", "showLegend": true, "calcs": ["lastNotNull", "max"]}, "tooltip": {"mode": "multi", "sort": "desc"}}, "targets": [{"datasource": "VictoriaMetrics", "expr": "go_goroutines{job=\"ocis\", ocis_service=~\"$ocis_service\"}", "legendFormat": "{{ocis_service}}", "refId": "A"}], "title": "Goroutines by service (leak detector)", "type": "timeseries"},
{"datasource": "VictoriaMetrics", "fieldConfig": {"defaults": {"color": {"mode": "palette-classic"}, "custom": {"drawStyle": "line", "fillOpacity": 5, "lineWidth": 1}, "unit": "percentunit"}, "overrides": []}, "gridPos": {"h": 8, "w": 12, "x": 0, "y": 42}, "id": 14, "options": {"legend": {"displayMode": "table", "placement": "right", "showLegend": true, "calcs": ["lastNotNull", "max"]}, "tooltip": {"mode": "multi", "sort": "desc"}}, "targets": [{"datasource": "VictoriaMetrics", "expr": "rate(process_cpu_seconds_total{job=\"ocis\", ocis_service=~\"$ocis_service\"}[5m])", "legendFormat": "{{ocis_service}}", "refId": "A"}], "title": "CPU by service (cores)", "type": "timeseries"},
{"datasource": "VictoriaMetrics", "fieldConfig": {"defaults": {"color": {"mode": "palette-classic"}, "custom": {"drawStyle": "line", "fillOpacity": 5, "lineWidth": 1}, "unit": "bytes"}, "overrides": []}, "gridPos": {"h": 8, "w": 12, "x": 12, "y": 42}, "id": 15, "options": {"legend": {"displayMode": "table", "placement": "right", "showLegend": true, "calcs": ["lastNotNull", "max"]}, "tooltip": {"mode": "multi", "sort": "desc"}}, "targets": [{"datasource": "VictoriaMetrics", "expr": "process_resident_memory_bytes{job=\"ocis\", ocis_service=~\"$ocis_service\"}", "legendFormat": "{{ocis_service}}", "refId": "A"}], "title": "Resident memory by service", "type": "timeseries"}
],
"refresh": "30s",
"schemaVersion": 39,
"tags": ["ocis"],
"templating": {
"list": [
{
"current": {"selected": false, "text": "All", "value": "$__all"},
"datasource": "VictoriaMetrics",
"hide": 0,
"includeAll": true,
"label": "Service",
"multi": true,
"name": "ocis_service",
"options": [],
"query": {"query": "label_values(up{job=\"ocis\"}, ocis_service)", "refId": "StandardVariableQuery"},
"refresh": 2,
"regex": "",
"skipUrlSync": false,
"sort": 1,
"type": "query"
}
]
},
"time": {"from": "now-3h", "to": "now"},
"timepicker": {},
"timezone": "browser",
"title": "ocis Overview",
"uid": "ocis-overview",
"version": 1,
"weekStart": ""
}
kind: ConfigMap
metadata:
annotations:

View file

@ -23,7 +23,7 @@ spec:
metadata:
annotations:
checksum/config: de8d6f16e9721409f5848bcc101e6aa9815e6455bd4fb9b59306159e705ac1cb
checksum/dashboards-json-config: 63ff5f7bd5ab0b6c241c689c0aa4d78be9bef984e63c1089dc988905fbb61f74
checksum/dashboards-json-config: 698d1d0719d0584936b511bb0b549352844bc1e4a4780a15ac3a165a571d6633
checksum/sc-dashboard-provider-config: e70bf6a851099d385178a76de9757bb0bef8299da6d8443602590e44f05fdf24
kubectl.kubernetes.io/default-container: grafana
labels:
@ -113,6 +113,9 @@ spec:
name: storage
- mountPath: /var/lib/grafana-search
name: search
- mountPath: /var/lib/grafana/dashboards/default/ocis.json
name: dashboards-default
subPath: ocis.json
- mountPath: /etc/grafana/provisioning/datasources/datasources.yaml
name: config
subPath: datasources.yaml

View file

@ -83,6 +83,24 @@ data:
- __meta_kubernetes_namespace
- __meta_kubernetes_service_name
- __meta_kubernetes_endpoint_port_name
- job_name: ocis
kubernetes_sd_configs:
- namespaces:
names:
- ocis
role: endpoints
relabel_configs:
- action: keep
regex: enabled;metrics-debug
source_labels:
- __meta_kubernetes_service_label_ocis_metrics
- __meta_kubernetes_endpoint_port_name
- source_labels:
- __meta_kubernetes_service_name
target_label: ocis_service
- source_labels:
- __meta_kubernetes_pod_name
target_label: pod
kind: ConfigMap
metadata:
annotations: