diff --git a/install/kubernetes/prometheus/grafana/dashboard/all-node.json b/install/kubernetes/prometheus/grafana/dashboard/all-node.json index ee71efee..8d26ae30 100644 --- a/install/kubernetes/prometheus/grafana/dashboard/all-node.json +++ b/install/kubernetes/prometheus/grafana/dashboard/all-node.json @@ -24,7 +24,6 @@ "editable": true, "fiscalYearStartMonth": 0, "graphTooltip": 0, - "id": 33, "links": [], "liveNow": false, "panels": [ @@ -52,6 +51,108 @@ "y": 0 }, "hiddenSeries": false, + "id": 13, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": true, + "min": false, + "rightSide": true, + "show": true, + "sort": "avg", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "paceLength": 10, + "percentage": false, + "pluginVersion": "9.1.5", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "P1809F7CD0C75ACF3" + }, + "exemplar": true, + "expr": "DCGM_FI_DEV_GPU_UTIL{exported_pod=''}", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": " {{instance}} GPU {{gpu}}", + "refId": "A" + } + ], + "thresholds": [], + "timeRegions": [], + "title": "机器 非平台应用 GPU 利用率", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "percent", + "logBase": 1, + "max": "100", + "min": "0", + "show": true + }, + { + "format": "short", + "logBase": 1, + "show": true + } + ], + "yaxis": { + "align": false + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": { + "type": "prometheus", + "uid": "P1809F7CD0C75ACF3" + }, + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 24, + "x": 0, + "y": 8 + }, + "hiddenSeries": false, "id": 14, "legend": { "alignAsTable": true, @@ -61,7 +162,7 @@ "min": false, "rightSide": true, "show": true, - "sort": "max", + "sort": "avg", "sortDesc": true, "total": false, "values": true @@ -131,109 +232,45 @@ } }, { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": { - "type": "prometheus", - "uid": "P1809F7CD0C75ACF3" + "alert": { + "conditions": [ + { + "evaluator": { + "params": [ + 50000 + ], + "type": "gt" + }, + "operator": { + "type": "and" + }, + "query": { + "params": [ + "A", + "5m", + "now" + ] + }, + "reducer": { + "params": [], + "type": "avg" + }, + "type": "query" + } + ], + "executionErrorState": "alerting", + "for": "5m", + "frequency": "5m", + "handler": 1, + "message": "负载过高", + "name": "System load alert", + "noDataState": "no_data", + "notifications": [ + { + "uid": "FMG-3Bv7k" + } + ] }, - "fieldConfig": { - "defaults": { - "links": [] - }, - "overrides": [] - }, - "fill": 1, - "fillGradient": 0, - "gridPos": { - "h": 8, - "w": 24, - "x": 0, - "y": 8 - }, - "hiddenSeries": false, - "id": 16, - "legend": { - "alignAsTable": true, - "avg": true, - "current": true, - "max": true, - "min": false, - "rightSide": true, - "show": true, - "sort": "avg", - "sortDesc": true, - "total": false, - "values": true - }, - "lines": true, - "linewidth": 2, - "links": [], - "nullPointMode": "null", - "options": { - "alertThreshold": true - }, - "paceLength": 10, - "percentage": false, - "pluginVersion": "9.1.5", - "pointradius": 2, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "P1809F7CD0C75ACF3" - }, - "editorMode": "code", - "expr": "DCGM_FI_DEV_MEM_COPY_UTIL{exported_pod!=''}", - "format": "time_series", - "interval": "", - "intervalFactor": 1, - "legendFormat": "{{instance}} {{exported_pod}} GPU {{gpu}}", - "range": true, - "refId": "A" - } - ], - "thresholds": [], - "timeRegions": [], - "title": "pod GPU 占用率", - "tooltip": { - "shared": true, - "sort": 2, - "value_type": "cumulative" - }, - "type": "graph", - "xaxis": { - "mode": "time", - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "percent", - "logBase": 1, - "max": "100", - "min": "0", - "show": true - }, - { - "format": "short", - "logBase": 1, - "show": true - } - ], - "yaxis": { - "align": false - } - }, - { "aliasColors": {}, "bars": false, "dashLength": 10, @@ -245,25 +282,23 @@ "fill": 1, "fillGradient": 0, "gridPos": { - "h": 8, - "w": 24, + "h": 9, + "w": 8, "x": 0, "y": 16 }, "hiddenSeries": false, "id": 3, "legend": { - "alignAsTable": true, - "avg": true, - "current": true, - "max": true, + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, "min": false, - "rightSide": true, - "show": true, - "sort": "max", - "sortDesc": true, + "rightSide": false, + "show": false, "total": false, - "values": true + "values": false }, "lines": true, "linewidth": 1, @@ -293,9 +328,42 @@ "intervalFactor": 2, "legendFormat": "{{instance}}", "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "P1809F7CD0C75ACF3" + }, + "expr": "node_load5{job=\"node-exporter\"} * 100", + "format": "time_series", + "hide": true, + "intervalFactor": 2, + "legendFormat": "{{instance}}", + "refId": "B" + }, + { + "datasource": { + "type": "prometheus", + "uid": "P1809F7CD0C75ACF3" + }, + "expr": "node_load15{job=\"node-exporter\"} * 100", + "format": "time_series", + "hide": true, + "intervalFactor": 2, + "legendFormat": "{{instance}}", + "refId": "C" + } + ], + "thresholds": [ + { + "colorMode": "critical", + "fill": true, + "line": true, + "op": "gt", + "value": 50000, + "visible": true } ], - "thresholds": [], "timeRegions": [], "title": "System load", "tooltip": { @@ -338,22 +406,22 @@ "fillGradient": 0, "gridPos": { "h": 9, - "w": 24, - "x": 0, - "y": 24 + "w": 8, + "x": 8, + "y": 16 }, "hiddenSeries": false, "id": 2, "legend": { - "alignAsTable": true, - "avg": true, - "current": true, - "max": true, + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, "min": false, - "rightSide": true, - "show": true, + "rightSide": false, + "show": false, "total": false, - "values": true + "values": false }, "lines": true, "linewidth": 1, @@ -378,18 +446,16 @@ "type": "prometheus", "uid": "P1809F7CD0C75ACF3" }, - "editorMode": "code", "expr": "100 - (avg by (instance) (irate(node_cpu{job=\"node-exporter\", mode=\"idle\"}[5m])) * 100)\n", "format": "time_series", "intervalFactor": 1, "legendFormat": "{{instance}}", - "range": true, "refId": "B" } ], "thresholds": [], "timeRegions": [], - "title": "机器 CPU", + "title": "Idle CPU", "tooltip": { "shared": true, "sort": 1, @@ -434,22 +500,22 @@ "fillGradient": 0, "gridPos": { "h": 9, - "w": 24, - "x": 0, - "y": 33 + "w": 8, + "x": 16, + "y": 16 }, "hiddenSeries": false, "id": 4, "legend": { - "alignAsTable": true, - "avg": true, - "current": true, - "max": true, + "alignAsTable": false, + "avg": false, + "current": false, + "max": false, "min": false, - "rightSide": true, - "show": true, + "rightSide": false, + "show": false, "total": false, - "values": true + "values": false }, "lines": true, "linewidth": 1, @@ -526,7 +592,7 @@ "h": 7, "w": 12, "x": 0, - "y": 42 + "y": 25 }, "hiddenSeries": false, "id": 8, @@ -538,8 +604,6 @@ "min": false, "rightSide": true, "show": true, - "sort": "max", - "sortDesc": true, "total": false, "values": true }, @@ -618,7 +682,7 @@ "h": 7, "w": 12, "x": 12, - "y": 42 + "y": 25 }, "hiddenSeries": false, "id": 9, @@ -708,7 +772,7 @@ "h": 10, "w": 24, "x": 0, - "y": 49 + "y": 32 }, "hiddenSeries": false, "id": 11, @@ -757,12 +821,10 @@ "type": "prometheus", "uid": "P1809F7CD0C75ACF3" }, - "editorMode": "code", "expr": "sum by (instance) (rate(node_disk_bytes_read{job=\"node-exporter\"}[2m]))", "format": "time_series", "intervalFactor": 2, "legendFormat": "{{instance}} read", - "range": true, "refId": "A" }, { @@ -858,6 +920,6 @@ "timezone": "browser", "title": "all-node", "uid": "all-node", - "version": 9, + "version": 1, "weekStart": "" } \ No newline at end of file diff --git a/install/kubernetes/prometheus/grafana/dashboard/dcgm.json b/install/kubernetes/prometheus/grafana/dashboard/dcgm.json index b032dea0..683c7d84 100644 --- a/install/kubernetes/prometheus/grafana/dashboard/dcgm.json +++ b/install/kubernetes/prometheus/grafana/dashboard/dcgm.json @@ -3,28 +3,47 @@ "list": [ { "builtIn": 1, - "datasource": "-- Grafana --", + "datasource": { + "type": "datasource", + "uid": "grafana" + }, "enable": true, "hide": true, "iconColor": "rgba(0, 211, 255, 1)", "name": "Annotations & Alerts", + "target": { + "limit": 100, + "matchAny": false, + "tags": [], + "type": "dashboard" + }, "type": "dashboard" } ] }, "description": "This dashboard is to display the metrics from DCGM Exporter on a Kubernetes (1.13+) cluster", "editable": true, + "fiscalYearStartMonth": 0, "gnetId": 12239, "graphTooltip": 0, - "id": 25, "links": [], + "liveNow": false, "panels": [ { "aliasColors": {}, "bars": false, "dashLength": 10, "dashes": false, - "datasource": "prometheus", + "datasource": { + "type": "prometheus", + "uid": "P1809F7CD0C75ACF3" + }, + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, "fill": 1, "fillGradient": 0, "gridPos": { @@ -34,7 +53,7 @@ "y": 0 }, "hiddenSeries": false, - "id": 6, + "id": 21, "legend": { "alignAsTable": true, "avg": true, @@ -44,7 +63,7 @@ "rightSide": true, "show": true, "sort": "avg", - "sortDesc": true, + "sortDesc": false, "total": false, "values": true }, @@ -53,10 +72,11 @@ "links": [], "nullPointMode": "null", "options": { - "dataLinks": [] + "alertThreshold": true }, "paceLength": 10, "percentage": false, + "pluginVersion": "9.1.5", "pointradius": 2, "points": false, "renderer": "flot", @@ -66,6 +86,10 @@ "steppedLine": false, "targets": [ { + "datasource": { + "type": "prometheus", + "uid": "P1809F7CD0C75ACF3" + }, "expr": "DCGM_FI_DEV_GPU_UTIL{exported_pod!=''}", "format": "time_series", "interval": "", @@ -75,9 +99,7 @@ } ], "thresholds": [], - "timeFrom": null, "timeRegions": [], - "timeShift": null, "title": "pod GPU 利用率", "tooltip": { "shared": true, @@ -86,16 +108,13 @@ }, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", - "name": null, "show": true, "values": [] }, "yaxes": [ { "format": "percent", - "label": null, "logBase": 1, "max": "100", "min": "0", @@ -103,16 +122,12 @@ }, { "format": "short", - "label": null, "logBase": 1, - "max": null, - "min": null, "show": true } ], "yaxis": { - "align": false, - "alignLevel": null + "align": false } }, { @@ -120,7 +135,16 @@ "bars": false, "dashLength": 10, "dashes": false, - "datasource": "prometheus", + "datasource": { + "type": "prometheus", + "uid": "P1809F7CD0C75ACF3" + }, + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, "fill": 1, "fillGradient": 0, "gridPos": { @@ -149,10 +173,11 @@ "links": [], "nullPointMode": "null", "options": { - "dataLinks": [] + "alertThreshold": true }, "paceLength": 10, "percentage": false, + "pluginVersion": "9.1.5", "pointradius": 2, "points": false, "renderer": "flot", @@ -162,6 +187,10 @@ "steppedLine": false, "targets": [ { + "datasource": { + "type": "prometheus", + "uid": "P1809F7CD0C75ACF3" + }, "expr": "DCGM_FI_DEV_MEM_COPY_UTIL{exported_pod!=''}", "format": "time_series", "interval": "", @@ -171,9 +200,7 @@ } ], "thresholds": [], - "timeFrom": null, "timeRegions": [], - "timeShift": null, "title": "pod GPU 占用率", "tooltip": { "shared": true, @@ -182,16 +209,13 @@ }, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", - "name": null, "show": true, "values": [] }, "yaxes": [ { "format": "percent", - "label": null, "logBase": 1, "max": "100", "min": "0", @@ -199,16 +223,12 @@ }, { "format": "short", - "label": null, "logBase": 1, - "max": null, - "min": null, "show": true } ], "yaxis": { - "align": false, - "alignLevel": null + "align": false } }, { @@ -216,7 +236,16 @@ "bars": false, "dashLength": 10, "dashes": false, - "datasource": "prometheus", + "datasource": { + "type": "prometheus", + "uid": "P1809F7CD0C75ACF3" + }, + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, "fill": 1, "fillGradient": 0, "gridPos": { @@ -244,10 +273,11 @@ "linewidth": 2, "nullPointMode": "null", "options": { - "dataLinks": [] + "alertThreshold": true }, "paceLength": 10, "percentage": false, + "pluginVersion": "9.1.5", "pointradius": 2, "points": false, "renderer": "flot", @@ -257,6 +287,10 @@ "steppedLine": false, "targets": [ { + "datasource": { + "type": "prometheus", + "uid": "P1809F7CD0C75ACF3" + }, "expr": "DCGM_FI_DEV_GPU_UTIL", "format": "time_series", "interval": "", @@ -266,9 +300,7 @@ } ], "thresholds": [], - "timeFrom": null, "timeRegions": [], - "timeShift": null, "title": "GPU Utilization", "tooltip": { "shared": true, @@ -277,16 +309,13 @@ }, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", - "name": null, "show": true, "values": [] }, "yaxes": [ { "format": "percent", - "label": null, "logBase": 1, "max": "100", "min": "0", @@ -294,16 +323,12 @@ }, { "format": "short", - "label": null, "logBase": 1, - "max": null, - "min": null, "show": true } ], "yaxis": { - "align": false, - "alignLevel": null + "align": false } }, { @@ -311,7 +336,16 @@ "bars": false, "dashLength": 10, "dashes": false, - "datasource": "prometheus", + "datasource": { + "type": "prometheus", + "uid": "P1809F7CD0C75ACF3" + }, + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, "fill": 1, "fillGradient": 0, "gridPos": { @@ -338,10 +372,11 @@ "links": [], "nullPointMode": "null", "options": { - "dataLinks": [] + "alertThreshold": true }, "paceLength": 10, "percentage": false, + "pluginVersion": "9.1.5", "pointradius": 2, "points": false, "renderer": "flot", @@ -351,6 +386,10 @@ "steppedLine": false, "targets": [ { + "datasource": { + "type": "prometheus", + "uid": "P1809F7CD0C75ACF3" + }, "expr": "DCGM_FI_DEV_MEM_COPY_UTIL", "format": "time_series", "interval": "", @@ -360,9 +399,7 @@ } ], "thresholds": [], - "timeFrom": null, "timeRegions": [], - "timeShift": null, "title": "GPU 内存利用率", "tooltip": { "shared": true, @@ -371,33 +408,24 @@ }, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", - "name": null, "show": true, "values": [] }, "yaxes": [ { "format": "percent", - "label": null, "logBase": 1, - "max": null, - "min": null, "show": true }, { "format": "short", - "label": null, "logBase": 1, - "max": null, - "min": null, "show": true } ], "yaxis": { - "align": false, - "alignLevel": null + "align": false } }, { @@ -405,7 +433,16 @@ "bars": false, "dashLength": 10, "dashes": false, - "datasource": "prometheus", + "datasource": { + "type": "prometheus", + "uid": "P1809F7CD0C75ACF3" + }, + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, "fill": 1, "fillGradient": 0, "gridPos": { @@ -431,10 +468,11 @@ "linewidth": 2, "nullPointMode": "null", "options": { - "dataLinks": [] + "alertThreshold": true }, "paceLength": 10, "percentage": false, + "pluginVersion": "9.1.5", "pointradius": 2, "points": false, "renderer": "flot", @@ -444,6 +482,10 @@ "steppedLine": false, "targets": [ { + "datasource": { + "type": "prometheus", + "uid": "P1809F7CD0C75ACF3" + }, "expr": "DCGM_FI_DEV_GPU_TEMP", "format": "time_series", "instant": false, @@ -454,9 +496,7 @@ } ], "thresholds": [], - "timeFrom": null, "timeRegions": [], - "timeShift": null, "title": "GPU Temperature", "tooltip": { "shared": true, @@ -465,33 +505,24 @@ }, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", - "name": null, "show": true, "values": [] }, "yaxes": [ { "format": "celsius", - "label": null, "logBase": 1, - "max": null, - "min": null, "show": true }, { "format": "short", - "label": null, "logBase": 1, - "max": null, - "min": null, "show": true } ], "yaxis": { - "align": false, - "alignLevel": null + "align": false } }, { @@ -499,7 +530,16 @@ "bars": false, "dashLength": 10, "dashes": false, - "datasource": "prometheus", + "datasource": { + "type": "prometheus", + "uid": "P1809F7CD0C75ACF3" + }, + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, "fill": 1, "fillGradient": 0, "gridPos": { @@ -525,11 +565,11 @@ "linewidth": 2, "nullPointMode": "null", "options": { - "dataLinks": [] + "alertThreshold": true }, "paceLength": 10, "percentage": false, - "pluginVersion": "6.5.2", + "pluginVersion": "9.1.5", "pointradius": 2, "points": false, "renderer": "flot", @@ -539,6 +579,10 @@ "steppedLine": false, "targets": [ { + "datasource": { + "type": "prometheus", + "uid": "P1809F7CD0C75ACF3" + }, "expr": "DCGM_FI_DEV_POWER_USAGE", "format": "time_series", "interval": "", @@ -548,9 +592,7 @@ } ], "thresholds": [], - "timeFrom": null, "timeRegions": [], - "timeShift": null, "title": "GPU Power Usage", "tooltip": { "shared": true, @@ -559,33 +601,24 @@ }, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", - "name": null, "show": true, "values": [] }, "yaxes": [ { "format": "watt", - "label": null, "logBase": 1, - "max": null, - "min": null, "show": true }, { "format": "short", - "label": null, "logBase": 1, - "max": null, - "min": null, "show": true } ], "yaxis": { - "align": false, - "alignLevel": null + "align": false } }, { @@ -593,7 +626,16 @@ "bars": false, "dashLength": 10, "dashes": false, - "datasource": "prometheus", + "datasource": { + "type": "prometheus", + "uid": "P1809F7CD0C75ACF3" + }, + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, "fill": 1, "fillGradient": 0, "gridPos": { @@ -613,7 +655,6 @@ "min": false, "rightSide": true, "show": true, - "sideWidth": null, "total": false, "values": true }, @@ -621,10 +662,11 @@ "linewidth": 2, "nullPointMode": "null", "options": { - "dataLinks": [] + "alertThreshold": true }, "paceLength": 10, "percentage": false, + "pluginVersion": "9.1.5", "pointradius": 2, "points": false, "renderer": "flot", @@ -634,6 +676,10 @@ "steppedLine": false, "targets": [ { + "datasource": { + "type": "prometheus", + "uid": "P1809F7CD0C75ACF3" + }, "expr": "DCGM_FI_DEV_SM_CLOCK* 1000000", "format": "time_series", "interval": "", @@ -643,9 +689,7 @@ } ], "thresholds": [], - "timeFrom": null, "timeRegions": [], - "timeShift": null, "title": "GPU SM Clocks", "tooltip": { "shared": true, @@ -654,46 +698,37 @@ }, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", - "name": null, "show": true, "values": [] }, "yaxes": [ { - "decimals": null, "format": "hertz", "label": "", "logBase": 1, - "max": null, - "min": null, "show": true }, { "format": "short", - "label": null, "logBase": 1, - "max": null, - "min": null, "show": true } ], "yaxis": { - "align": false, - "alignLevel": null + "align": false } } ], "refresh": false, - "schemaVersion": 18, + "schemaVersion": 37, "style": "dark", "tags": [], "templating": { "list": [] }, "time": { - "from": "now/d", + "from": "now-24h", "to": "now" }, "timepicker": { @@ -722,7 +757,8 @@ ] }, "timezone": "", - "title": "gpu监控", + "title": "gpu", "uid": "dcgm", - "version": 26 + "version": 1, + "weekStart": "" } \ No newline at end of file diff --git a/install/kubernetes/prometheus/grafana/dashboard/istio-service.json b/install/kubernetes/prometheus/grafana/dashboard/istio-service.json index 79a3fca8..b80384d3 100644 --- a/install/kubernetes/prometheus/grafana/dashboard/istio-service.json +++ b/install/kubernetes/prometheus/grafana/dashboard/istio-service.json @@ -3,31 +3,38 @@ "list": [ { "builtIn": 1, - "datasource": "-- Grafana --", + "datasource": { + "type": "datasource", + "uid": "grafana" + }, "enable": true, "hide": true, "iconColor": "rgba(0, 211, 255, 1)", "name": "Annotations & Alerts", + "target": { + "limit": 100, + "matchAny": false, + "tags": [], + "type": "dashboard" + }, "type": "dashboard" } ] }, "editable": true, - "gnetId": null, + "fiscalYearStartMonth": 0, "graphTooltip": 0, - "id": 17, - "iteration": 1657856261371, "links": [], + "liveNow": false, "panels": [ { "aliasColors": {}, "bars": false, "dashLength": 10, "dashes": false, - "datasource": "prometheus", - "fieldConfig": { - "defaults": {}, - "overrides": [] + "datasource": { + "type": "prometheus", + "uid": "P1809F7CD0C75ACF3" }, "fill": 1, "fillGradient": 0, @@ -61,7 +68,7 @@ }, "paceLength": 10, "percentage": false, - "pluginVersion": "7.5.2", + "pluginVersion": "9.1.5", "pointradius": 2, "points": false, "renderer": "flot", @@ -71,6 +78,10 @@ "steppedLine": false, "targets": [ { + "datasource": { + "type": "prometheus", + "uid": "P1809F7CD0C75ACF3" + }, "exemplar": true, "expr": "sum by (destination_workload) (istio_requests_total{destination_service_namespace=\"$namespace\",destination_workload=~\"($service)\"})", "format": "time_series", @@ -81,6 +92,10 @@ "refId": "B" }, { + "datasource": { + "type": "prometheus", + "uid": "P1809F7CD0C75ACF3" + }, "exemplar": true, "expr": "sum by (destination_workload,response_code) (irate(istio_requests_total{destination_service_namespace=\"$namespace\",destination_workload=~\"($service)\"}[1m]))", "hide": false, @@ -90,10 +105,8 @@ } ], "thresholds": [], - "timeFrom": null, "timeRegions": [], - "timeShift": null, - "title": "域名服务平均qps请求量(1分钟内)", + "title": "域名服务平均qps请求量(1分钟内平均)", "tooltip": { "shared": true, "sort": 1, @@ -101,9 +114,7 @@ }, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", - "name": null, "show": true, "values": [] }, @@ -111,25 +122,18 @@ { "$$hashKey": "object:311", "format": "short", - "label": null, "logBase": 1, - "max": null, - "min": null, "show": true }, { "$$hashKey": "object:312", "format": "short", - "label": null, "logBase": 1, - "max": null, - "min": null, "show": true } ], "yaxis": { - "align": false, - "alignLevel": null + "align": false } }, { @@ -137,10 +141,9 @@ "bars": false, "dashLength": 10, "dashes": false, - "datasource": "prometheus", - "fieldConfig": { - "defaults": {}, - "overrides": [] + "datasource": { + "type": "prometheus", + "uid": "P1809F7CD0C75ACF3" }, "fill": 1, "fillGradient": 0, @@ -172,7 +175,7 @@ }, "paceLength": 10, "percentage": false, - "pluginVersion": "7.5.2", + "pluginVersion": "9.1.5", "pointradius": 2, "points": false, "renderer": "flot", @@ -182,6 +185,10 @@ "steppedLine": false, "targets": [ { + "datasource": { + "type": "prometheus", + "uid": "P1809F7CD0C75ACF3" + }, "exemplar": true, "expr": "sum by (destination_workload) (irate(istio_request_bytes_sum{destination_service_namespace=\"$namespace\",destination_workload=~\"($service)\"}[1m]))", "format": "time_series", @@ -193,9 +200,7 @@ } ], "thresholds": [], - "timeFrom": null, "timeRegions": [], - "timeShift": null, "title": "域名服务吞吐率(1分钟)", "tooltip": { "shared": true, @@ -204,33 +209,24 @@ }, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", - "name": null, "show": true, "values": [] }, "yaxes": [ { "format": "bytes", - "label": null, "logBase": 1, - "max": null, - "min": null, "show": true }, { "format": "short", - "label": null, "logBase": 1, - "max": null, - "min": null, "show": true } ], "yaxis": { - "align": false, - "alignLevel": null + "align": false } }, { @@ -238,114 +234,9 @@ "bars": false, "dashLength": 10, "dashes": false, - "datasource": "prometheus", - "fieldConfig": { - "defaults": {}, - "overrides": [] - }, - "fill": 1, - "fillGradient": 0, - "gridPos": { - "h": 8, - "w": 24, - "x": 0, - "y": 8 - }, - "hiddenSeries": false, - "id": 3, - "legend": { - "alignAsTable": true, - "avg": false, - "current": false, - "max": true, - "min": false, - "rightSide": true, - "show": true, - "total": false, - "values": true - }, - "lines": true, - "linewidth": 1, - "links": [], - "nullPointMode": "null", - "options": { - "alertThreshold": true - }, - "paceLength": 10, - "percentage": false, - "pluginVersion": "7.5.2", - "pointradius": 2, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "exemplar": true, - "expr": "avg by (destination_workload) (rate(istio_request_duration_milliseconds_sum{destination_service_namespace=\"$namespace\",destination_workload=~\"($service)\"}[1m])/rate(istio_request_duration_milliseconds_count{destination_service_namespace=\"$namespace\",destination_workload=~\"($service)\"}[1m]))", - "format": "time_series", - "hide": false, - "interval": "", - "intervalFactor": 1, - "legendFormat": "{{destination_workload}}", - "refId": "B" - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "域名服务 平均时延ms(1分钟平均)", - "tooltip": { - "shared": true, - "sort": 1, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "$$hashKey": "object:83", - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "$$hashKey": "object:84", - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "prometheus", - "decimals": null, - "fieldConfig": { - "defaults": {}, - "overrides": [] + "datasource": { + "type": "prometheus", + "uid": "P1809F7CD0C75ACF3" }, "fill": 1, "fillGradient": 0, @@ -353,7 +244,7 @@ "h": 10, "w": 6, "x": 0, - "y": 16 + "y": 8 }, "hiddenSeries": false, "id": 6, @@ -377,17 +268,20 @@ }, "paceLength": 10, "percentage": false, - "pluginVersion": "7.5.2", + "pluginVersion": "9.1.5", "pointradius": 5, "points": false, "renderer": "flot", - "repeat": null, "seriesOverrides": [], "spaceLength": 10, "stack": false, "steppedLine": false, "targets": [ { + "datasource": { + "type": "prometheus", + "uid": "P1809F7CD0C75ACF3" + }, "exemplar": true, "expr": "sum by (pod) (container_memory_working_set_bytes{job=\"kubelet\", image!=\"\",container_name!=\"POD\",pod=~\".*$service.*\"})", "format": "time_series", @@ -398,9 +292,7 @@ } ], "thresholds": [], - "timeFrom": null, "timeRegions": [], - "timeShift": null, "title": "Memory Usage", "tooltip": { "shared": true, @@ -409,9 +301,7 @@ }, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", - "name": null, "show": true, "values": [] }, @@ -419,25 +309,20 @@ { "$$hashKey": "object:204", "format": "bytes", - "label": null, "logBase": 1, - "max": null, "min": 0, "show": true }, { "$$hashKey": "object:205", "format": "bytes", - "label": null, "logBase": 1, - "max": null, "min": 0, "show": false } ], "yaxis": { - "align": false, - "alignLevel": null + "align": false } }, { @@ -445,10 +330,9 @@ "bars": false, "dashLength": 10, "dashes": false, - "datasource": "prometheus", - "fieldConfig": { - "defaults": {}, - "overrides": [] + "datasource": { + "type": "prometheus", + "uid": "P1809F7CD0C75ACF3" }, "fill": 1, "fillGradient": 0, @@ -456,7 +340,7 @@ "h": 10, "w": 6, "x": 6, - "y": 16 + "y": 8 }, "hiddenSeries": false, "id": 8, @@ -481,17 +365,20 @@ }, "paceLength": 10, "percentage": false, - "pluginVersion": "7.5.2", + "pluginVersion": "9.1.5", "pointradius": 5, "points": false, "renderer": "flot", - "repeat": null, "seriesOverrides": [], "spaceLength": 10, "stack": false, "steppedLine": false, "targets": [ { + "datasource": { + "type": "prometheus", + "uid": "P1809F7CD0C75ACF3" + }, "exemplar": true, "expr": "sum by (pod) (rate(container_cpu_usage_seconds_total{job=\"kubelet\", image!=\"\",container_name!=\"POD\",pod=~\".*$service.*\"}[5m]))", "format": "time_series", @@ -502,6 +389,10 @@ "refId": "A" }, { + "datasource": { + "type": "prometheus", + "uid": "P1809F7CD0C75ACF3" + }, "expr": "", "format": "time_series", "intervalFactor": 1, @@ -509,9 +400,7 @@ } ], "thresholds": [], - "timeFrom": null, "timeRegions": [], - "timeShift": null, "title": "CPU Usage", "tooltip": { "shared": true, @@ -520,9 +409,7 @@ }, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", - "name": null, "show": true, "values": [] }, @@ -530,25 +417,20 @@ { "$$hashKey": "object:483", "format": "short", - "label": null, "logBase": 1, - "max": null, "min": 0, "show": true }, { "$$hashKey": "object:484", "format": "short", - "label": null, "logBase": 1, - "max": null, "min": 0, "show": false } ], "yaxis": { - "align": false, - "alignLevel": null + "align": false } }, { @@ -556,7 +438,10 @@ "bars": false, "dashLength": 10, "dashes": false, - "datasource": "prometheus", + "datasource": { + "type": "prometheus", + "uid": "P1809F7CD0C75ACF3" + }, "fieldConfig": { "defaults": { "links": [] @@ -569,7 +454,7 @@ "h": 10, "w": 6, "x": 12, - "y": 16 + "y": 8 }, "hiddenSeries": false, "id": 12, @@ -595,7 +480,7 @@ }, "paceLength": 10, "percentage": false, - "pluginVersion": "7.5.2", + "pluginVersion": "9.1.5", "pointradius": 2, "points": false, "renderer": "flot", @@ -605,6 +490,10 @@ "steppedLine": false, "targets": [ { + "datasource": { + "type": "prometheus", + "uid": "P1809F7CD0C75ACF3" + }, "exemplar": true, "expr": "DCGM_FI_DEV_MEM_COPY_UTIL{exported_pod=~\".*$service.*\"}", "format": "time_series", @@ -615,9 +504,7 @@ } ], "thresholds": [], - "timeFrom": null, "timeRegions": [], - "timeShift": null, "title": "pod GPU 显存占用率", "tooltip": { "shared": true, @@ -626,16 +513,13 @@ }, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", - "name": null, "show": true, "values": [] }, "yaxes": [ { "format": "percent", - "label": null, "logBase": 1, "max": "100", "min": "0", @@ -643,16 +527,12 @@ }, { "format": "short", - "label": null, "logBase": 1, - "max": null, - "min": null, "show": true } ], "yaxis": { - "align": false, - "alignLevel": null + "align": false } }, { @@ -660,10 +540,9 @@ "bars": false, "dashLength": 10, "dashes": false, - "datasource": "prometheus", - "fieldConfig": { - "defaults": {}, - "overrides": [] + "datasource": { + "type": "prometheus", + "uid": "P1809F7CD0C75ACF3" }, "fill": 1, "fillGradient": 0, @@ -671,7 +550,7 @@ "h": 10, "w": 6, "x": 18, - "y": 16 + "y": 8 }, "hiddenSeries": false, "id": 10, @@ -696,7 +575,7 @@ }, "paceLength": 10, "percentage": false, - "pluginVersion": "7.5.2", + "pluginVersion": "9.1.5", "pointradius": 5, "points": false, "renderer": "flot", @@ -706,6 +585,10 @@ "steppedLine": false, "targets": [ { + "datasource": { + "type": "prometheus", + "uid": "P1809F7CD0C75ACF3" + }, "exemplar": true, "expr": "DCGM_FI_DEV_GPU_UTIL{exported_pod=~\".*$service.*\"}", "format": "time_series", @@ -717,9 +600,7 @@ } ], "thresholds": [], - "timeFrom": null, "timeRegions": [], - "timeShift": null, "title": "GPU 使用率", "tooltip": { "shared": true, @@ -728,33 +609,26 @@ }, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", - "name": null, "show": true, "values": [] }, "yaxes": [ { "format": "short", - "label": null, "logBase": 1, - "max": null, "min": 0, "show": true }, { "format": "short", - "label": null, "logBase": 1, - "max": null, "min": 0, "show": false } ], "yaxis": { - "align": false, - "alignLevel": null + "align": false } }, { @@ -762,10 +636,9 @@ "bars": false, "dashLength": 10, "dashes": false, - "datasource": "prometheus", - "fieldConfig": { - "defaults": {}, - "overrides": [] + "datasource": { + "type": "prometheus", + "uid": "P1809F7CD0C75ACF3" }, "fill": 1, "fillGradient": 0, @@ -773,7 +646,7 @@ "h": 8, "w": 24, "x": 0, - "y": 26 + "y": 18 }, "hiddenSeries": false, "id": 13, @@ -797,7 +670,7 @@ }, "paceLength": 10, "percentage": false, - "pluginVersion": "7.5.2", + "pluginVersion": "9.1.5", "pointradius": 2, "points": false, "renderer": "flot", @@ -807,6 +680,10 @@ "steppedLine": false, "targets": [ { + "datasource": { + "type": "prometheus", + "uid": "P1809F7CD0C75ACF3" + }, "exemplar": true, "expr": "sum by (destination_workload) (istio_requests_total{destination_service_namespace=\"$namespace\"})", "format": "time_series", @@ -818,9 +695,7 @@ } ], "thresholds": [], - "timeFrom": null, "timeRegions": [], - "timeShift": null, "title": "全部域名服务监控", "tooltip": { "shared": true, @@ -829,56 +704,46 @@ }, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", - "name": null, "show": true, "values": [] }, "yaxes": [ { "format": "short", - "label": null, "logBase": 1, - "max": null, - "min": null, "show": true }, { "format": "short", - "label": null, "logBase": 1, - "max": null, - "min": null, "show": true } ], "yaxis": { - "align": false, - "alignLevel": null + "align": false } } ], "refresh": false, - "schemaVersion": 27, + "schemaVersion": 37, "style": "dark", "tags": [], "templating": { "list": [ { - "allValue": null, "current": { - "selected": true, + "selected": false, "text": "service", "value": "service" }, - "datasource": "prometheus", + "datasource": { + "type": "prometheus", + "uid": "P1809F7CD0C75ACF3" + }, "definition": "label_values(istio_requests_total, destination_service_namespace)", - "description": null, - "error": null, "hide": 0, "includeAll": true, - "label": null, "multi": false, "name": "namespace", "options": [], @@ -891,25 +756,23 @@ "skipUrlSync": false, "sort": 0, "tagValuesQuery": "", - "tags": [], "tagsQuery": "", "type": "query", "useTags": false }, { - "allValue": null, "current": { "selected": false, "text": "All", "value": "$__all" }, - "datasource": "prometheus", + "datasource": { + "type": "prometheus", + "uid": "P1809F7CD0C75ACF3" + }, "definition": "label_values(istio_requests_total{destination_service_namespace=\"$namespace\"}, destination_service_name)", - "description": null, - "error": null, "hide": 0, "includeAll": true, - "label": null, "multi": true, "name": "service", "options": [], @@ -922,7 +785,6 @@ "skipUrlSync": false, "sort": 0, "tagValuesQuery": "", - "tags": [], "tagsQuery": "", "type": "query", "useTags": false @@ -961,5 +823,6 @@ "timezone": "", "title": "istio service", "uid": "istio-service", - "version": 42 + "version": 1, + "weekStart": "" } \ No newline at end of file diff --git a/install/kubernetes/prometheus/grafana/dashboard/node.json b/install/kubernetes/prometheus/grafana/dashboard/node.json index deb02cdf..d4ca15dc 100644 --- a/install/kubernetes/prometheus/grafana/dashboard/node.json +++ b/install/kubernetes/prometheus/grafana/dashboard/node.json @@ -3,39 +3,150 @@ "list": [ { "builtIn": 1, - "datasource": "-- Grafana --", + "datasource": { + "type": "datasource", + "uid": "grafana" + }, "enable": true, "hide": true, "iconColor": "rgba(0, 211, 255, 1)", "name": "Annotations & Alerts", + "target": { + "limit": 100, + "matchAny": false, + "tags": [], + "type": "dashboard" + }, "type": "dashboard" } ] }, "editable": true, - "gnetId": null, + "fiscalYearStartMonth": 0, "graphTooltip": 0, - "id": 11, - "iteration": 1638349496659, "links": [], + "liveNow": false, "panels": [ { "aliasColors": {}, "bars": false, "dashLength": 10, "dashes": false, - "datasource": "prometheus", + "datasource": { + "type": "prometheus", + "uid": "P1809F7CD0C75ACF3" + }, "fieldConfig": { - "defaults": {}, + "defaults": { + "links": [] + }, "overrides": [] }, "fill": 1, "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 24, + "x": 0, + "y": 0 + }, + "hiddenSeries": false, + "id": 14, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": true, + "min": false, + "rightSide": true, + "show": true, + "sort": "avg", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "paceLength": 10, + "percentage": false, + "pluginVersion": "9.1.5", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "P1809F7CD0C75ACF3" + }, + "editorMode": "code", + "exemplar": true, + "expr": "DCGM_FI_DEV_GPU_UTIL{exported_pod!='',instance=~\"$node.*\"}", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": " {{instance}},{{exported_pod}} GPU {{gpu}}", + "range": true, + "refId": "A" + } + ], + "thresholds": [], + "timeRegions": [], + "title": "机器 平台应用 GPU 利用率", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "percent", + "logBase": 1, + "max": "100", + "min": "0", + "show": true + }, + { + "format": "short", + "logBase": 1, + "show": true + } + ], + "yaxis": { + "align": false + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": { + "type": "prometheus", + "uid": "P1809F7CD0C75ACF3" + }, + "fill": 1, + "fillGradient": 0, "gridPos": { "h": 7, "w": 24, "x": 0, - "y": 0 + "y": 8 }, "hiddenSeries": false, "id": 11, @@ -47,8 +158,6 @@ "min": false, "rightSide": true, "show": true, - "sort": null, - "sortDesc": null, "total": false, "values": true }, @@ -61,7 +170,7 @@ }, "paceLength": 10, "percentage": false, - "pluginVersion": "7.5.2", + "pluginVersion": "9.1.5", "pointradius": 2, "points": false, "renderer": "flot", @@ -71,6 +180,10 @@ "steppedLine": false, "targets": [ { + "datasource": { + "type": "prometheus", + "uid": "P1809F7CD0C75ACF3" + }, "exemplar": true, "expr": "sum by (pod) (rate(container_cpu_usage_seconds_total{job=\"kubelet\", image!=\"\",container_name!=\"POD\",instance=~\"$node.*\"}[5m]))", "format": "time_series", @@ -82,9 +195,7 @@ } ], "thresholds": [], - "timeFrom": null, "timeRegions": [], - "timeShift": null, "title": "机器pod cpu", "tooltip": { "shared": true, @@ -93,33 +204,24 @@ }, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", - "name": null, "show": true, "values": [] }, "yaxes": [ { "format": "short", - "label": null, "logBase": 1, - "max": null, - "min": null, "show": true }, { "format": "short", - "label": null, "logBase": 1, - "max": null, - "min": null, "show": true } ], "yaxis": { - "align": false, - "alignLevel": null + "align": false } }, { @@ -127,10 +229,9 @@ "bars": false, "dashLength": 10, "dashes": false, - "datasource": "prometheus", - "fieldConfig": { - "defaults": {}, - "overrides": [] + "datasource": { + "type": "prometheus", + "uid": "P1809F7CD0C75ACF3" }, "fill": 1, "fillGradient": 0, @@ -138,7 +239,7 @@ "h": 7, "w": 24, "x": 0, - "y": 7 + "y": 15 }, "hiddenSeries": false, "id": 12, @@ -164,7 +265,7 @@ }, "paceLength": 10, "percentage": false, - "pluginVersion": "7.5.2", + "pluginVersion": "9.1.5", "pointradius": 2, "points": false, "renderer": "flot", @@ -174,6 +275,10 @@ "steppedLine": false, "targets": [ { + "datasource": { + "type": "prometheus", + "uid": "P1809F7CD0C75ACF3" + }, "exemplar": true, "expr": "sum by(pod) (container_memory_working_set_bytes{job=\"kubelet\", image!=\"\",container_name!=\"POD\",instance=~\"$node.*\"})", "format": "time_series", @@ -184,9 +289,7 @@ } ], "thresholds": [], - "timeFrom": null, "timeRegions": [], - "timeShift": null, "title": "机器pod 内存", "tooltip": { "shared": true, @@ -195,33 +298,24 @@ }, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", - "name": null, "show": true, "values": [] }, "yaxes": [ { "format": "bytes", - "label": null, "logBase": 1, - "max": null, - "min": null, "show": true }, { "format": "short", - "label": null, "logBase": 1, - "max": null, - "min": null, "show": true } ], "yaxis": { - "align": false, - "alignLevel": null + "align": false } }, { @@ -229,10 +323,9 @@ "bars": false, "dashLength": 10, "dashes": false, - "datasource": "prometheus", - "fieldConfig": { - "defaults": {}, - "overrides": [] + "datasource": { + "type": "prometheus", + "uid": "P1809F7CD0C75ACF3" }, "fill": 1, "fillGradient": 0, @@ -240,7 +333,7 @@ "h": 7, "w": 12, "x": 0, - "y": 14 + "y": 22 }, "hiddenSeries": false, "id": 2, @@ -264,31 +357,34 @@ }, "paceLength": 10, "percentage": false, - "pluginVersion": "7.5.2", + "pluginVersion": "9.1.5", "pointradius": 5, "points": false, "renderer": "flot", - "repeat": null, "seriesOverrides": [], "spaceLength": 10, "stack": false, "steppedLine": false, "targets": [ { + "datasource": { + "type": "prometheus", + "uid": "P1809F7CD0C75ACF3" + }, + "editorMode": "code", "exemplar": true, "expr": "100 - (avg by (cpu) (irate(node_cpu{job=\"node-exporter\", mode=\"idle\", instance=~\"$node.*\"}[5m])) * 100)\n", "format": "time_series", "interval": "", "intervalFactor": 10, "legendFormat": "{{cpu}}", + "range": true, "refId": "A" } ], "thresholds": [], - "timeFrom": null, "timeRegions": [], - "timeShift": null, - "title": "Idle CPU", + "title": "CPU", "tooltip": { "shared": true, "sort": 0, @@ -296,16 +392,13 @@ }, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", - "name": null, "show": true, "values": [] }, "yaxes": [ { "format": "percent", - "label": null, "logBase": 1, "max": 100, "min": 0, @@ -313,7 +406,6 @@ }, { "format": "percent", - "label": null, "logBase": 1, "max": 100, "min": 0, @@ -321,8 +413,7 @@ } ], "yaxis": { - "align": false, - "alignLevel": null + "align": false } }, { @@ -330,10 +421,9 @@ "bars": false, "dashLength": 10, "dashes": false, - "datasource": "prometheus", - "fieldConfig": { - "defaults": {}, - "overrides": [] + "datasource": { + "type": "prometheus", + "uid": "P1809F7CD0C75ACF3" }, "fill": 1, "fillGradient": 0, @@ -341,7 +431,7 @@ "h": 7, "w": 12, "x": 12, - "y": 14 + "y": 22 }, "hiddenSeries": false, "id": 3, @@ -365,17 +455,20 @@ }, "paceLength": 10, "percentage": false, - "pluginVersion": "7.5.2", + "pluginVersion": "9.1.5", "pointradius": 5, "points": false, "renderer": "flot", - "repeat": null, "seriesOverrides": [], "spaceLength": 10, "stack": false, "steppedLine": false, "targets": [ { + "datasource": { + "type": "prometheus", + "uid": "P1809F7CD0C75ACF3" + }, "expr": "node_load1{job=\"node-exporter\", instance=~\"$node.*\"} * 100", "format": "time_series", "intervalFactor": 2, @@ -383,6 +476,10 @@ "refId": "A" }, { + "datasource": { + "type": "prometheus", + "uid": "P1809F7CD0C75ACF3" + }, "expr": "node_load5{job=\"node-exporter\", instance=~\"$node.*\"} * 100", "format": "time_series", "intervalFactor": 2, @@ -390,6 +487,10 @@ "refId": "B" }, { + "datasource": { + "type": "prometheus", + "uid": "P1809F7CD0C75ACF3" + }, "expr": "node_load15{job=\"node-exporter\", instance=~\"$node.*\"} * 100", "format": "time_series", "intervalFactor": 2, @@ -398,9 +499,7 @@ } ], "thresholds": [], - "timeFrom": null, "timeRegions": [], - "timeShift": null, "title": "System load", "tooltip": { "shared": true, @@ -409,33 +508,24 @@ }, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", - "name": null, "show": true, "values": [] }, "yaxes": [ { "format": "percent", - "label": null, "logBase": 1, - "max": null, - "min": null, "show": true }, { "format": "percent", - "label": null, "logBase": 1, - "max": null, - "min": null, "show": true } ], "yaxis": { - "align": false, - "alignLevel": null + "align": false } }, { @@ -443,10 +533,9 @@ "bars": false, "dashLength": 10, "dashes": false, - "datasource": "prometheus", - "fieldConfig": { - "defaults": {}, - "overrides": [] + "datasource": { + "type": "prometheus", + "uid": "P1809F7CD0C75ACF3" }, "fill": 1, "fillGradient": 0, @@ -454,7 +543,7 @@ "h": 7, "w": 18, "x": 0, - "y": 21 + "y": 29 }, "hiddenSeries": false, "id": 4, @@ -478,17 +567,20 @@ }, "paceLength": 10, "percentage": false, - "pluginVersion": "7.5.2", + "pluginVersion": "9.1.5", "pointradius": 5, "points": false, "renderer": "flot", - "repeat": null, "seriesOverrides": [], "spaceLength": 10, "stack": false, "steppedLine": false, "targets": [ { + "datasource": { + "type": "prometheus", + "uid": "P1809F7CD0C75ACF3" + }, "expr": "node_memory_MemTotal{job=\"node-exporter\", instance=~\"$node.*\"}\n- node_memory_MemFree{job=\"node-exporter\", instance=~\"$node.*\"}\n- node_memory_Buffers{job=\"node-exporter\", instance=~\"$node.*\"}\n- node_memory_Cached{job=\"node-exporter\", instance=~\"$node.*\"}\n", "format": "time_series", "intervalFactor": 2, @@ -496,6 +588,10 @@ "refId": "A" }, { + "datasource": { + "type": "prometheus", + "uid": "P1809F7CD0C75ACF3" + }, "expr": "node_memory_Buffers{job=\"node-exporter\", instance=~\"$node.*\"}", "format": "time_series", "intervalFactor": 2, @@ -503,6 +599,10 @@ "refId": "B" }, { + "datasource": { + "type": "prometheus", + "uid": "P1809F7CD0C75ACF3" + }, "expr": "node_memory_Cached{job=\"node-exporter\", instance=~\"$node.*\"}", "format": "time_series", "intervalFactor": 2, @@ -510,6 +610,10 @@ "refId": "C" }, { + "datasource": { + "type": "prometheus", + "uid": "P1809F7CD0C75ACF3" + }, "expr": "node_memory_MemFree{job=\"node-exporter\", instance=~\"$node.*\"}", "format": "time_series", "intervalFactor": 2, @@ -518,9 +622,7 @@ } ], "thresholds": [], - "timeFrom": null, "timeRegions": [], - "timeShift": null, "title": "Memory Usage", "tooltip": { "shared": true, @@ -529,100 +631,98 @@ }, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", - "name": null, "show": true, "values": [] }, "yaxes": [ { "format": "bytes", - "label": null, "logBase": 1, - "max": null, - "min": null, "show": true }, { "format": "bytes", - "label": null, "logBase": 1, - "max": null, - "min": null, "show": true } ], "yaxis": { - "align": false, - "alignLevel": null + "align": false } }, { - "cacheTimeout": null, - "colorBackground": false, - "colorValue": false, - "colors": [ - "rgba(50, 172, 45, 0.97)", - "rgba(237, 129, 40, 0.89)", - "rgba(245, 54, 54, 0.9)" - ], - "datasource": "prometheus", - "fieldConfig": { - "defaults": {}, - "overrides": [] + "datasource": { + "type": "prometheus", + "uid": "P1809F7CD0C75ACF3" }, - "format": "percent", - "gauge": { - "maxValue": 100, - "minValue": 0, - "show": true, - "thresholdLabels": false, - "thresholdMarkers": true + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [ + { + "options": { + "match": "null", + "result": { + "text": "N/A" + } + }, + "type": "special" + } + ], + "max": 100, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "rgba(50, 172, 45, 0.97)", + "value": null + }, + { + "color": "rgba(237, 129, 40, 0.89)", + "value": 80 + }, + { + "color": "rgba(245, 54, 54, 0.9)", + "value": 90 + } + ] + }, + "unit": "percent" + }, + "overrides": [] }, "gridPos": { "h": 7, "w": 6, "x": 18, - "y": 21 + "y": 29 }, "id": 5, - "interval": null, "links": [], - "mappingType": 1, - "mappingTypes": [ - { - "name": "value to text", - "value": 1 - }, - { - "name": "range to text", - "value": 2 - } - ], "maxDataPoints": 100, - "nullPointMode": "connected", - "nullText": null, - "postfix": "", - "postfixFontSize": "50%", - "prefix": "", - "prefixFontSize": "50%", - "rangeMaps": [ - { - "from": "null", - "text": "N/A", - "to": "null" - } - ], - "sparkline": { - "fillColor": "rgba(31, 118, 189, 0.18)", - "full": false, - "lineColor": "rgb(31, 120, 193)", - "show": false + "options": { + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showThresholdLabels": false, + "showThresholdMarkers": true }, - "tableColumn": "", + "pluginVersion": "9.1.5", "targets": [ { + "datasource": { + "type": "prometheus", + "uid": "P1809F7CD0C75ACF3" + }, "expr": "(\n node_memory_MemTotal{job=\"node-exporter\", instance=~\"$node.*\"}\n- node_memory_MemFree{job=\"node-exporter\", instance=~\"$node.*\"}\n- node_memory_Buffers{job=\"node-exporter\", instance=~\"$node.*\"}\n- node_memory_Cached{job=\"node-exporter\", instance=~\"$node.*\"}\n) * 100\n /\nnode_memory_MemTotal{job=\"node-exporter\", instance=~\"$node.*\"}\n", "format": "time_series", "intervalFactor": 2, @@ -630,28 +730,17 @@ "refId": "A" } ], - "thresholds": "80, 90", "title": "Memory Usage", - "type": "singlestat", - "valueFontSize": "80%", - "valueMaps": [ - { - "op": "=", - "text": "N/A", - "value": "null" - } - ], - "valueName": "current" + "type": "gauge" }, { "aliasColors": {}, "bars": false, "dashLength": 10, "dashes": false, - "datasource": "prometheus", - "fieldConfig": { - "defaults": {}, - "overrides": [] + "datasource": { + "type": "prometheus", + "uid": "P1809F7CD0C75ACF3" }, "fill": 1, "fillGradient": 0, @@ -659,7 +748,7 @@ "h": 7, "w": 18, "x": 0, - "y": 28 + "y": 36 }, "hiddenSeries": false, "id": 6, @@ -683,11 +772,10 @@ }, "paceLength": 10, "percentage": false, - "pluginVersion": "7.5.2", + "pluginVersion": "9.1.5", "pointradius": 5, "points": false, "renderer": "flot", - "repeat": null, "seriesOverrides": [ { "alias": "read", @@ -703,6 +791,10 @@ "steppedLine": false, "targets": [ { + "datasource": { + "type": "prometheus", + "uid": "P1809F7CD0C75ACF3" + }, "expr": "sum by (instance) (rate(node_disk_bytes_read{job=\"node-exporter\", instance=~\"$node.*\"}[2m]))", "format": "time_series", "intervalFactor": 2, @@ -710,6 +802,10 @@ "refId": "A" }, { + "datasource": { + "type": "prometheus", + "uid": "P1809F7CD0C75ACF3" + }, "expr": "sum by (instance) (rate(node_disk_bytes_written{job=\"node-exporter\", instance=~\"$node.*\"}[2m]))", "format": "time_series", "intervalFactor": 2, @@ -717,6 +813,10 @@ "refId": "B" }, { + "datasource": { + "type": "prometheus", + "uid": "P1809F7CD0C75ACF3" + }, "expr": "sum by (instance) (rate(node_disk_io_time_ms{job=\"node-exporter\", instance=~\"$node.*\"}[2m]))", "format": "time_series", "intervalFactor": 2, @@ -725,9 +825,7 @@ } ], "thresholds": [], - "timeFrom": null, "timeRegions": [], - "timeShift": null, "title": "Disk I/O", "tooltip": { "shared": true, @@ -736,100 +834,98 @@ }, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", - "name": null, "show": true, "values": [] }, "yaxes": [ { "format": "bytes", - "label": null, "logBase": 1, - "max": null, - "min": null, "show": true }, { "format": "ms", - "label": null, "logBase": 1, - "max": null, - "min": null, "show": true } ], "yaxis": { - "align": false, - "alignLevel": null + "align": false } }, { - "cacheTimeout": null, - "colorBackground": false, - "colorValue": false, - "colors": [ - "rgba(50, 172, 45, 0.97)", - "rgba(237, 129, 40, 0.89)", - "rgba(245, 54, 54, 0.9)" - ], - "datasource": "prometheus", - "fieldConfig": { - "defaults": {}, - "overrides": [] + "datasource": { + "type": "prometheus", + "uid": "P1809F7CD0C75ACF3" }, - "format": "percent", - "gauge": { - "maxValue": 100, - "minValue": 0, - "show": true, - "thresholdLabels": false, - "thresholdMarkers": true + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [ + { + "options": { + "match": "null", + "result": { + "text": "N/A" + } + }, + "type": "special" + } + ], + "max": 100, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "rgba(50, 172, 45, 0.97)", + "value": null + }, + { + "color": "rgba(237, 129, 40, 0.89)", + "value": 80 + }, + { + "color": "rgba(245, 54, 54, 0.9)", + "value": 90 + } + ] + }, + "unit": "percent" + }, + "overrides": [] }, "gridPos": { "h": 7, "w": 6, "x": 18, - "y": 28 + "y": 36 }, "id": 7, - "interval": null, "links": [], - "mappingType": 1, - "mappingTypes": [ - { - "name": "value to text", - "value": 1 - }, - { - "name": "range to text", - "value": 2 - } - ], "maxDataPoints": 100, - "nullPointMode": "connected", - "nullText": null, - "postfix": "", - "postfixFontSize": "50%", - "prefix": "", - "prefixFontSize": "50%", - "rangeMaps": [ - { - "from": "null", - "text": "N/A", - "to": "null" - } - ], - "sparkline": { - "fillColor": "rgba(31, 118, 189, 0.18)", - "full": false, - "lineColor": "rgb(31, 120, 193)", - "show": false + "options": { + "orientation": "horizontal", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showThresholdLabels": false, + "showThresholdMarkers": true }, - "tableColumn": "", + "pluginVersion": "9.1.5", "targets": [ { + "datasource": { + "type": "prometheus", + "uid": "P1809F7CD0C75ACF3" + }, "expr": "(\n sum(node_filesystem_size{job=\"node-exporter\", device!=\"rootfs\", instance=~\"$node.*\"})\n- sum(node_filesystem_avail{job=\"node-exporter\", device!=\"rootfs\", instance=~\"$node.*\"})\n) * 100\n /\nsum(node_filesystem_size{job=\"node-exporter\", device!=\"rootfs\", instance=~\"$node.*\"})\n", "format": "time_series", "intervalFactor": 2, @@ -837,28 +933,17 @@ "refId": "A" } ], - "thresholds": "80, 90", "title": "Disk Space Usage", - "type": "singlestat", - "valueFontSize": "80%", - "valueMaps": [ - { - "op": "=", - "text": "N/A", - "value": "null" - } - ], - "valueName": "current" + "type": "gauge" }, { "aliasColors": {}, "bars": false, "dashLength": 10, "dashes": false, - "datasource": "prometheus", - "fieldConfig": { - "defaults": {}, - "overrides": [] + "datasource": { + "type": "prometheus", + "uid": "P1809F7CD0C75ACF3" }, "fill": 1, "fillGradient": 0, @@ -866,7 +951,7 @@ "h": 7, "w": 12, "x": 0, - "y": 35 + "y": 43 }, "hiddenSeries": false, "id": 8, @@ -890,17 +975,20 @@ }, "paceLength": 10, "percentage": false, - "pluginVersion": "7.5.2", + "pluginVersion": "9.1.5", "pointradius": 5, "points": false, "renderer": "flot", - "repeat": null, "seriesOverrides": [], "spaceLength": 10, "stack": false, "steppedLine": false, "targets": [ { + "datasource": { + "type": "prometheus", + "uid": "P1809F7CD0C75ACF3" + }, "expr": "rate(node_network_receive_bytes{job=\"node-exporter\", instance=~\"$node.*\", device!~\"lo\"}[5m])", "format": "time_series", "intervalFactor": 2, @@ -909,9 +997,7 @@ } ], "thresholds": [], - "timeFrom": null, "timeRegions": [], - "timeShift": null, "title": "Network Received", "tooltip": { "shared": true, @@ -920,33 +1006,24 @@ }, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", - "name": null, "show": true, "values": [] }, "yaxes": [ { "format": "bytes", - "label": null, "logBase": 1, - "max": null, - "min": null, "show": true }, { "format": "bytes", - "label": null, "logBase": 1, - "max": null, - "min": null, "show": true } ], "yaxis": { - "align": false, - "alignLevel": null + "align": false } }, { @@ -954,10 +1031,9 @@ "bars": false, "dashLength": 10, "dashes": false, - "datasource": "prometheus", - "fieldConfig": { - "defaults": {}, - "overrides": [] + "datasource": { + "type": "prometheus", + "uid": "P1809F7CD0C75ACF3" }, "fill": 1, "fillGradient": 0, @@ -965,7 +1041,7 @@ "h": 7, "w": 12, "x": 12, - "y": 35 + "y": 43 }, "hiddenSeries": false, "id": 9, @@ -989,17 +1065,20 @@ }, "paceLength": 10, "percentage": false, - "pluginVersion": "7.5.2", + "pluginVersion": "9.1.5", "pointradius": 5, "points": false, "renderer": "flot", - "repeat": null, "seriesOverrides": [], "spaceLength": 10, "stack": false, "steppedLine": false, "targets": [ { + "datasource": { + "type": "prometheus", + "uid": "P1809F7CD0C75ACF3" + }, "expr": "rate(node_network_transmit_bytes{job=\"node-exporter\", instance=~\"$node.*\", device!~\"lo\"}[5m])", "format": "time_series", "intervalFactor": 2, @@ -1008,9 +1087,7 @@ } ], "thresholds": [], - "timeFrom": null, "timeRegions": [], - "timeShift": null, "title": "Network Transmitted", "tooltip": { "shared": true, @@ -1019,56 +1096,46 @@ }, "type": "graph", "xaxis": { - "buckets": null, "mode": "time", - "name": null, "show": true, "values": [] }, "yaxes": [ { "format": "bytes", - "label": null, "logBase": 1, - "max": null, - "min": null, "show": true }, { "format": "bytes", - "label": null, "logBase": 1, - "max": null, - "min": null, "show": true } ], "yaxis": { - "align": false, - "alignLevel": null + "align": false } } ], "refresh": "", - "schemaVersion": 27, + "schemaVersion": 37, "style": "dark", "tags": [], "templating": { "list": [ { - "allValue": null, "current": { "selected": false, - "text": "10.101.132.177", - "value": "10.101.132.177" + "text": "10.101.133.143", + "value": "10.101.133.143" + }, + "datasource": { + "type": "prometheus", + "uid": "P1809F7CD0C75ACF3" }, - "datasource": "prometheus", "definition": "label_values(node_boot_time{job=\"node-exporter\"}, instance)", - "description": null, - "error": null, "hide": 0, "includeAll": false, - "label": null, "multi": false, "name": "node", "options": [], @@ -1081,7 +1148,6 @@ "skipUrlSync": false, "sort": 0, "tagValuesQuery": "", - "tags": [], "tagsQuery": "", "type": "query", "useTags": false @@ -1120,5 +1186,6 @@ "timezone": "browser", "title": "node", "uid": "node", - "version": 8 + "version": 1, + "weekStart": "" } \ No newline at end of file diff --git a/install/kubernetes/prometheus/grafana/dashboard/nodes.json b/install/kubernetes/prometheus/grafana/dashboard/nodes.json deleted file mode 100644 index 811c36dc..00000000 --- a/install/kubernetes/prometheus/grafana/dashboard/nodes.json +++ /dev/null @@ -1,920 +0,0 @@ -{ - "annotations": { - "list": [ - - ] - }, - "editable": false, - "gnetId": null, - "graphTooltip": 0, - "hideControls": false, - "id": null, - "links": [ - - ], - "refresh": "", - "rows": [ - { - "collapse": false, - "collapsed": false, - "height": "250px", - "panels": [ - { - "aliasColors": { - - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "gridPos": { - - }, - "id": 2, - "legend": { - "alignAsTable": false, - "avg": false, - "current": false, - "max": false, - "min": false, - "rightSide": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ - - ], - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "repeat": null, - "seriesOverrides": [ - - ], - "spaceLength": 10, - "span": 6, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "100 - (avg by (cpu) (irate(node_cpu{job=\"node-exporter\", mode=\"idle\", instance=\"$instance\"}[5m])) * 100)\n", - "format": "time_series", - "intervalFactor": 10, - "legendFormat": "{{cpu}}", - "refId": "A" - } - ], - "thresholds": [ - - ], - "timeFrom": null, - "timeShift": null, - "title": "Idle CPU", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ - - ] - }, - "yaxes": [ - { - "format": "percent", - "label": null, - "logBase": 1, - "max": 100, - "min": 0, - "show": true - }, - { - "format": "percent", - "label": null, - "logBase": 1, - "max": 100, - "min": 0, - "show": true - } - ] - }, - { - "aliasColors": { - - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "gridPos": { - - }, - "id": 3, - "legend": { - "alignAsTable": false, - "avg": false, - "current": false, - "max": false, - "min": false, - "rightSide": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ - - ], - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "repeat": null, - "seriesOverrides": [ - - ], - "spaceLength": 10, - "span": 6, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "node_load1{job=\"node-exporter\", instance=\"$instance\"} * 100", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "load 1m", - "refId": "A" - }, - { - "expr": "node_load5{job=\"node-exporter\", instance=\"$instance\"} * 100", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "load 5m", - "refId": "B" - }, - { - "expr": "node_load15{job=\"node-exporter\", instance=\"$instance\"} * 100", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "load 15m", - "refId": "C" - } - ], - "thresholds": [ - - ], - "timeFrom": null, - "timeShift": null, - "title": "System load", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ - - ] - }, - "yaxes": [ - { - "format": "percent", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "percent", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": false, - "title": "Dashboard Row", - "titleSize": "h6", - "type": "row" - }, - { - "collapse": false, - "collapsed": false, - "height": "250px", - "panels": [ - { - "aliasColors": { - - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "gridPos": { - - }, - "id": 4, - "legend": { - "alignAsTable": false, - "avg": false, - "current": false, - "max": false, - "min": false, - "rightSide": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ - - ], - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "repeat": null, - "seriesOverrides": [ - - ], - "spaceLength": 10, - "span": 9, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "node_memory_MemTotal{job=\"node-exporter\", instance=\"$instance\"}\n- node_memory_MemFree{job=\"node-exporter\", instance=\"$instance\"}\n- node_memory_Buffers{job=\"node-exporter\", instance=\"$instance\"}\n- node_memory_Cached{job=\"node-exporter\", instance=\"$instance\"}\n", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "memory used", - "refId": "A" - }, - { - "expr": "node_memory_Buffers{job=\"node-exporter\", instance=\"$instance\"}", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "memory buffers", - "refId": "B" - }, - { - "expr": "node_memory_Cached{job=\"node-exporter\", instance=\"$instance\"}", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "memory cached", - "refId": "C" - }, - { - "expr": "node_memory_MemFree{job=\"node-exporter\", instance=\"$instance\"}", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "memory free", - "refId": "D" - } - ], - "thresholds": [ - - ], - "timeFrom": null, - "timeShift": null, - "title": "Memory Usage", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ - - ] - }, - "yaxes": [ - { - "format": "bytes", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "bytes", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ] - }, - { - "cacheTimeout": null, - "colorBackground": false, - "colorValue": false, - "colors": [ - "rgba(50, 172, 45, 0.97)", - "rgba(237, 129, 40, 0.89)", - "rgba(245, 54, 54, 0.9)" - ], - "datasource": "$datasource", - "format": "percent", - "gauge": { - "maxValue": 100, - "minValue": 0, - "show": true, - "thresholdLabels": false, - "thresholdMarkers": true - }, - "gridPos": { - - }, - "id": 5, - "interval": null, - "links": [ - - ], - "mappingType": 1, - "mappingTypes": [ - { - "name": "value to text", - "value": 1 - }, - { - "name": "range to text", - "value": 2 - } - ], - "maxDataPoints": 100, - "nullPointMode": "connected", - "nullText": null, - "postfix": "", - "postfixFontSize": "50%", - "prefix": "", - "prefixFontSize": "50%", - "rangeMaps": [ - { - "from": "null", - "text": "N/A", - "to": "null" - } - ], - "span": 3, - "sparkline": { - "fillColor": "rgba(31, 118, 189, 0.18)", - "full": false, - "lineColor": "rgb(31, 120, 193)", - "show": false - }, - "tableColumn": "", - "targets": [ - { - "expr": "(\n node_memory_MemTotal{job=\"node-exporter\", instance=\"$instance\"}\n- node_memory_MemFree{job=\"node-exporter\", instance=\"$instance\"}\n- node_memory_Buffers{job=\"node-exporter\", instance=\"$instance\"}\n- node_memory_Cached{job=\"node-exporter\", instance=\"$instance\"}\n) * 100\n /\nnode_memory_MemTotal{job=\"node-exporter\", instance=\"$instance\"}\n", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "" - } - ], - "thresholds": "80, 90", - "title": "Memory Usage", - "type": "singlestat", - "valueFontSize": "80%", - "valueMaps": [ - { - "op": "=", - "text": "N/A", - "value": "null" - } - ], - "valueName": "current" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": false, - "title": "Dashboard Row", - "titleSize": "h6", - "type": "row" - }, - { - "collapse": false, - "collapsed": false, - "height": "250px", - "panels": [ - { - "aliasColors": { - - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "gridPos": { - - }, - "id": 6, - "legend": { - "alignAsTable": false, - "avg": false, - "current": false, - "max": false, - "min": false, - "rightSide": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ - - ], - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "repeat": null, - "seriesOverrides": [ - { - "alias": "read", - "yaxis": 1 - }, - { - "alias": "io time", - "yaxis": 2 - } - ], - "spaceLength": 10, - "span": 9, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum by (instance) (rate(node_disk_bytes_read{job=\"node-exporter\", instance=\"$instance\"}[2m]))", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "read", - "refId": "A" - }, - { - "expr": "sum by (instance) (rate(node_disk_bytes_written{job=\"node-exporter\", instance=\"$instance\"}[2m]))", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "written", - "refId": "B" - }, - { - "expr": "sum by (instance) (rate(node_disk_io_time_ms{job=\"node-exporter\", instance=\"$instance\"}[2m]))", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "io time", - "refId": "C" - } - ], - "thresholds": [ - - ], - "timeFrom": null, - "timeShift": null, - "title": "Disk I/O", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ - - ] - }, - "yaxes": [ - { - "format": "bytes", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "ms", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ] - }, - { - "cacheTimeout": null, - "colorBackground": false, - "colorValue": false, - "colors": [ - "rgba(50, 172, 45, 0.97)", - "rgba(237, 129, 40, 0.89)", - "rgba(245, 54, 54, 0.9)" - ], - "datasource": "$datasource", - "format": "percent", - "gauge": { - "maxValue": 100, - "minValue": 0, - "show": true, - "thresholdLabels": false, - "thresholdMarkers": true - }, - "gridPos": { - - }, - "id": 7, - "interval": null, - "links": [ - - ], - "mappingType": 1, - "mappingTypes": [ - { - "name": "value to text", - "value": 1 - }, - { - "name": "range to text", - "value": 2 - } - ], - "maxDataPoints": 100, - "nullPointMode": "connected", - "nullText": null, - "postfix": "", - "postfixFontSize": "50%", - "prefix": "", - "prefixFontSize": "50%", - "rangeMaps": [ - { - "from": "null", - "text": "N/A", - "to": "null" - } - ], - "span": 3, - "sparkline": { - "fillColor": "rgba(31, 118, 189, 0.18)", - "full": false, - "lineColor": "rgb(31, 120, 193)", - "show": false - }, - "tableColumn": "", - "targets": [ - { - "expr": "(\n sum(node_filesystem_size{job=\"node-exporter\", device!=\"rootfs\", instance=\"$instance\"})\n- sum(node_filesystem_avail{job=\"node-exporter\", device!=\"rootfs\", instance=\"$instance\"})\n) * 100\n /\nsum(node_filesystem_size{job=\"node-exporter\", device!=\"rootfs\", instance=\"$instance\"})\n", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "" - } - ], - "thresholds": "80, 90", - "title": "Disk Space Usage", - "type": "singlestat", - "valueFontSize": "80%", - "valueMaps": [ - { - "op": "=", - "text": "N/A", - "value": "null" - } - ], - "valueName": "current" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": false, - "title": "Dashboard Row", - "titleSize": "h6", - "type": "row" - }, - { - "collapse": false, - "collapsed": false, - "height": "250px", - "panels": [ - { - "aliasColors": { - - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "gridPos": { - - }, - "id": 8, - "legend": { - "alignAsTable": false, - "avg": false, - "current": false, - "max": false, - "min": false, - "rightSide": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ - - ], - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "repeat": null, - "seriesOverrides": [ - - ], - "spaceLength": 10, - "span": 6, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "rate(node_network_receive_bytes{job=\"node-exporter\", instance=\"$instance\", device!\u007e\"lo\"}[5m])", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{device}}", - "refId": "A" - } - ], - "thresholds": [ - - ], - "timeFrom": null, - "timeShift": null, - "title": "Network Received", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ - - ] - }, - "yaxes": [ - { - "format": "bytes", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "bytes", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ] - }, - { - "aliasColors": { - - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "gridPos": { - - }, - "id": 9, - "legend": { - "alignAsTable": false, - "avg": false, - "current": false, - "max": false, - "min": false, - "rightSide": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ - - ], - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "repeat": null, - "seriesOverrides": [ - - ], - "spaceLength": 10, - "span": 6, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "rate(node_network_transmit_bytes{job=\"node-exporter\", instance=\"$instance\", device!\u007e\"lo\"}[5m])", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{device}}", - "refId": "A" - } - ], - "thresholds": [ - - ], - "timeFrom": null, - "timeShift": null, - "title": "Network Transmitted", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ - - ] - }, - "yaxes": [ - { - "format": "bytes", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "bytes", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": false, - "title": "Dashboard Row", - "titleSize": "h6", - "type": "row" - } - ], - "schemaVersion": 14, - "style": "dark", - "tags": [ - - ], - "templating": { - "list": [ - { - "current": { - "text": "Prometheus", - "value": "Prometheus" - }, - "hide": 0, - "label": null, - "name": "datasource", - "options": [ - - ], - "query": "prometheus", - "refresh": 1, - "regex": "", - "type": "datasource" - }, - { - "allValue": null, - "current": { - - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": false, - "label": null, - "multi": false, - "name": "instance", - "options": [ - - ], - "query": "label_values(node_boot_time{job=\"node-exporter\"}, instance)", - "refresh": 2, - "regex": "", - "sort": 0, - "tagValuesQuery": "", - "tags": [ - - ], - "tagsQuery": "", - "type": "query", - "useTags": false - } - ] - }, - "time": { - "from": "now-1h", - "to": "now" - }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d" - ] - }, - "timezone": "browser", - "title": "Nodes", - "uid": "fa49a4706d07a042595b664c87fb33ea", - "version": 0 -} \ No newline at end of file diff --git a/install/kubernetes/prometheus/grafana/dashboard/pod-info.json b/install/kubernetes/prometheus/grafana/dashboard/pod-info.json index 740617ba..a0374cea 100644 --- a/install/kubernetes/prometheus/grafana/dashboard/pod-info.json +++ b/install/kubernetes/prometheus/grafana/dashboard/pod-info.json @@ -3,24 +3,540 @@ "list": [ { "builtIn": 1, - "datasource": "-- Grafana --", + "datasource": { + "type": "datasource", + "uid": "grafana" + }, "enable": true, "hide": true, "iconColor": "rgba(0, 211, 255, 1)", "name": "Annotations & Alerts", + "target": { + "limit": 100, + "matchAny": false, + "tags": [], + "type": "dashboard" + }, "type": "dashboard" } ] }, "editable": true, - "gnetId": null, + "fiscalYearStartMonth": 0, "graphTooltip": 0, - "id": 12, - "iteration": 1661163099924, "links": [], + "liveNow": false, "panels": [ { - "datasource": "prometheus", + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": { + "type": "prometheus", + "uid": "P1809F7CD0C75ACF3" + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 24, + "x": 0, + "y": 0 + }, + "hiddenSeries": false, + "id": 8, + "legend": { + "alignAsTable": true, + "avg": false, + "current": false, + "max": true, + "min": false, + "rightSide": true, + "show": true, + "sort": "max", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "paceLength": 10, + "percentage": false, + "pluginVersion": "9.1.5", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "P1809F7CD0C75ACF3" + }, + "expr": "sum by (pod) (container_memory_working_set_bytes{job=\"kubelet\", image!=\"\",container_name!=\"POD\",pod=~\".*$pod.*\"})", + "format": "time_series", + "intervalFactor": 2, + "legendFormat": "{{ pod}}", + "refId": "B" + } + ], + "thresholds": [], + "timeRegions": [], + "title": "Memory Usage", + "tooltip": { + "shared": true, + "sort": 1, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "bytes", + "logBase": 1, + "min": 0, + "show": true + }, + { + "format": "bytes", + "logBase": 1, + "min": 0, + "show": false + } + ], + "yaxis": { + "align": false + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": { + "type": "prometheus", + "uid": "P1809F7CD0C75ACF3" + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 24, + "x": 0, + "y": 7 + }, + "hiddenSeries": false, + "id": 3, + "legend": { + "alignAsTable": true, + "avg": false, + "current": false, + "hideEmpty": false, + "max": true, + "min": false, + "rightSide": true, + "show": true, + "sort": "max", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "paceLength": 10, + "percentage": false, + "pluginVersion": "9.1.5", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "P1809F7CD0C75ACF3" + }, + "exemplar": true, + "expr": "sum by (pod) (rate(container_cpu_usage_seconds_total{job=\"kubelet\", image!=\"\",container_name!=\"POD\",pod=~\".*$pod.*\"}[2m]))", + "format": "time_series", + "instant": false, + "interval": "30s", + "intervalFactor": 1, + "legendFormat": "{{pod}}", + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "P1809F7CD0C75ACF3" + }, + "expr": "", + "format": "time_series", + "intervalFactor": 1, + "refId": "B" + } + ], + "thresholds": [], + "timeRegions": [], + "title": "CPU Usage", + "tooltip": { + "shared": true, + "sort": 1, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "logBase": 1, + "min": 0, + "show": true + }, + { + "format": "short", + "logBase": 1, + "min": 0, + "show": false + } + ], + "yaxis": { + "align": false + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": { + "type": "prometheus", + "uid": "P1809F7CD0C75ACF3" + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 24, + "x": 0, + "y": 14 + }, + "hiddenSeries": false, + "id": 5, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "hideEmpty": false, + "max": true, + "min": false, + "rightSide": true, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "paceLength": 10, + "percentage": false, + "pluginVersion": "9.1.5", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "P1809F7CD0C75ACF3" + }, + "expr": "DCGM_FI_DEV_GPU_UTIL{exported_pod=~\".*$pod.*\"}", + "format": "time_series", + "instant": false, + "interval": "30s", + "intervalFactor": 1, + "legendFormat": "{{exported_pod}}", + "refId": "A" + } + ], + "thresholds": [], + "timeRegions": [], + "title": "GPU 使用率", + "tooltip": { + "shared": true, + "sort": 1, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "logBase": 1, + "min": 0, + "show": true + }, + { + "format": "short", + "logBase": 1, + "min": 0, + "show": false + } + ], + "yaxis": { + "align": false + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": { + "type": "prometheus", + "uid": "P1809F7CD0C75ACF3" + }, + "fieldConfig": { + "defaults": { + "links": [] + }, + "overrides": [] + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 8, + "w": 24, + "x": 0, + "y": 21 + }, + "hiddenSeries": false, + "id": 7, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": true, + "min": false, + "rightSide": true, + "show": true, + "sort": "avg", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "paceLength": 10, + "percentage": false, + "pluginVersion": "9.1.5", + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "P1809F7CD0C75ACF3" + }, + "expr": "DCGM_FI_DEV_MEM_COPY_UTIL{exported_pod=~\".*$pod.*\"}", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{exported_pod}} GPU {{gpu}}", + "refId": "A" + } + ], + "thresholds": [], + "timeRegions": [], + "title": "pod GPU 显存占用率", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "percent", + "logBase": 1, + "max": "100", + "min": "0", + "show": true + }, + { + "format": "short", + "logBase": 1, + "show": true + } + ], + "yaxis": { + "align": false + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": { + "type": "prometheus", + "uid": "P1809F7CD0C75ACF3" + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 24, + "x": 0, + "y": 29 + }, + "hiddenSeries": false, + "id": 4, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null", + "options": { + "alertThreshold": true + }, + "paceLength": 10, + "percentage": false, + "pluginVersion": "9.1.5", + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "P1809F7CD0C75ACF3" + }, + "exemplar": true, + "expr": "sum by (pod) (rate(container_network_receive_bytes_total{job=\"kubelet\", image!=\"\",container_name!=\"POD\",pod=~\".*$pod.*\"}[2m]))", + "format": "time_series", + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{ pod}} receive", + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "P1809F7CD0C75ACF3" + }, + "exemplar": true, + "expr": "- sum by (pod) (rate(container_network_transmit_bytes_total{job=\"kubelet\", image!=\"\",container_name!=\"POD\",pod=~\".*$pod.*\"}[2m]))", + "format": "time_series", + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{ pod}} send", + "refId": "B" + } + ], + "thresholds": [], + "timeRegions": [], + "title": "Network I/O", + "tooltip": { + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "mode": "time", + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "bytes", + "logBase": 1, + "min": 0, + "show": true + }, + { + "format": "bytes", + "logBase": 1, + "min": 0, + "show": true + } + ], + "yaxis": { + "align": false + } + }, + { + "datasource": { + "type": "prometheus", + "uid": "P1809F7CD0C75ACF3" + }, "description": "", "fieldConfig": { "defaults": { @@ -30,7 +546,8 @@ "custom": { "align": "left", "displayMode": "color-text", - "filterable": true + "filterable": true, + "inspect": false }, "mappings": [], "thresholds": { @@ -66,11 +583,18 @@ "h": 7, "w": 24, "x": 0, - "y": 0 + "y": 36 }, "id": 2, "links": [], "options": { + "footer": { + "fields": "", + "reducer": [ + "sum" + ], + "show": false + }, "showHeader": true, "sortBy": [ { @@ -79,10 +603,13 @@ } ] }, - "pluginVersion": "7.5.2", - "repeat": null, + "pluginVersion": "9.1.5", "targets": [ { + "datasource": { + "type": "prometheus", + "uid": "P1809F7CD0C75ACF3" + }, "exemplar": true, "expr": "container_spec_memory_limit_bytes{job=\"kubelet\", image!=\"\",container_name!=\"POD\",pod!=\"\",namespace=~\"pipeline|jupyter|service|automl\"}/1000/1000/1000", "format": "table", @@ -93,6 +620,10 @@ "refId": "A" }, { + "datasource": { + "type": "prometheus", + "uid": "P1809F7CD0C75ACF3" + }, "exemplar": true, "expr": "container_memory_working_set_bytes{job=\"kubelet\", image!=\"\",container_name!=\"POD\",pod!=\"\",namespace=~\"pipeline|jupyter|service|automl\"}/1000/1000/1000", "format": "table", @@ -104,6 +635,10 @@ "refId": "B" }, { + "datasource": { + "type": "prometheus", + "uid": "P1809F7CD0C75ACF3" + }, "exemplar": true, "expr": "sum by (node,namespace,pod) (rate(container_cpu_usage_seconds_total{job=\"kubelet\", image!=\"\",container_name!=\"POD\",pod!=\"\",namespace=~\"pipeline|jupyter|service|automl\"}[2m]))", "format": "table", @@ -114,6 +649,10 @@ "refId": "C" }, { + "datasource": { + "type": "prometheus", + "uid": "P1809F7CD0C75ACF3" + }, "exemplar": true, "expr": "container_spec_cpu_quota{job=\"kubelet\", image!=\"\",container_name!=\"POD\",pod!=\"\",namespace=~\"pipeline|jupyter|service|automl\"}/100000", "format": "table", @@ -124,14 +663,16 @@ "refId": "D" } ], - "timeFrom": null, - "timeShift": null, "title": "全局pod资源使用", "transformations": [ { "id": "labelsToFields", "options": {} }, + { + "id": "merge", + "options": {} + }, { "id": "organize", "options": { @@ -249,544 +790,20 @@ } ], "type": "table" - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "prometheus", - "decimals": null, - "fieldConfig": { - "defaults": {}, - "overrides": [] - }, - "fill": 1, - "fillGradient": 0, - "gridPos": { - "h": 7, - "w": 24, - "x": 0, - "y": 7 - }, - "hiddenSeries": false, - "id": 8, - "legend": { - "alignAsTable": true, - "avg": false, - "current": false, - "max": true, - "min": false, - "rightSide": true, - "show": true, - "sort": "max", - "sortDesc": true, - "total": false, - "values": true - }, - "lines": true, - "linewidth": 1, - "links": [], - "nullPointMode": "null", - "options": { - "alertThreshold": true - }, - "paceLength": 10, - "percentage": false, - "pluginVersion": "7.5.2", - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum by (pod) (container_memory_working_set_bytes{job=\"kubelet\", image!=\"\",container_name!=\"POD\",pod=~\".*$pod.*\"})", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{ pod}}", - "refId": "B" - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "Memory Usage", - "tooltip": { - "shared": true, - "sort": 1, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "bytes", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "bytes", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": false - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "prometheus", - "fieldConfig": { - "defaults": {}, - "overrides": [] - }, - "fill": 1, - "fillGradient": 0, - "gridPos": { - "h": 7, - "w": 24, - "x": 0, - "y": 14 - }, - "hiddenSeries": false, - "id": 3, - "legend": { - "alignAsTable": true, - "avg": false, - "current": false, - "hideEmpty": false, - "max": true, - "min": false, - "rightSide": true, - "show": true, - "sort": "max", - "sortDesc": true, - "total": false, - "values": true - }, - "lines": true, - "linewidth": 1, - "links": [], - "nullPointMode": "null", - "options": { - "alertThreshold": true - }, - "paceLength": 10, - "percentage": false, - "pluginVersion": "7.5.2", - "pointradius": 5, - "points": false, - "renderer": "flot", - "repeat": null, - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "exemplar": true, - "expr": "sum by (pod) (rate(container_cpu_usage_seconds_total{job=\"kubelet\", image!=\"\",container_name!=\"POD\",pod=~\".*$pod.*\"}[2m]))", - "format": "time_series", - "instant": false, - "interval": "30s", - "intervalFactor": 1, - "legendFormat": "{{pod}}", - "refId": "A" - }, - { - "expr": "", - "format": "time_series", - "intervalFactor": 1, - "refId": "B" - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "CPU Usage", - "tooltip": { - "shared": true, - "sort": 1, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": false - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "prometheus", - "fieldConfig": { - "defaults": {}, - "overrides": [] - }, - "fill": 1, - "fillGradient": 0, - "gridPos": { - "h": 7, - "w": 24, - "x": 0, - "y": 21 - }, - "hiddenSeries": false, - "id": 5, - "legend": { - "alignAsTable": true, - "avg": true, - "current": true, - "hideEmpty": false, - "max": true, - "min": false, - "rightSide": true, - "show": true, - "total": false, - "values": true - }, - "lines": true, - "linewidth": 1, - "links": [], - "nullPointMode": "null", - "options": { - "alertThreshold": true - }, - "paceLength": 10, - "percentage": false, - "pluginVersion": "7.5.2", - "pointradius": 5, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "DCGM_FI_DEV_GPU_UTIL{exported_pod=~\".*$pod.*\"}", - "format": "time_series", - "instant": false, - "interval": "30s", - "intervalFactor": 1, - "legendFormat": "{{exported_pod}}", - "refId": "A" - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "GPU 使用率", - "tooltip": { - "shared": true, - "sort": 1, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": false - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "prometheus", - "fieldConfig": { - "defaults": { - "links": [] - }, - "overrides": [] - }, - "fill": 1, - "fillGradient": 0, - "gridPos": { - "h": 8, - "w": 24, - "x": 0, - "y": 28 - }, - "hiddenSeries": false, - "id": 7, - "legend": { - "alignAsTable": true, - "avg": true, - "current": true, - "max": true, - "min": false, - "rightSide": true, - "show": true, - "sort": "avg", - "sortDesc": true, - "total": false, - "values": true - }, - "lines": true, - "linewidth": 2, - "links": [], - "nullPointMode": "null", - "options": { - "alertThreshold": true - }, - "paceLength": 10, - "percentage": false, - "pluginVersion": "7.5.2", - "pointradius": 2, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "DCGM_FI_DEV_MEM_COPY_UTIL{exported_pod=~\".*$pod.*\"}", - "format": "time_series", - "interval": "", - "intervalFactor": 1, - "legendFormat": "{{exported_pod}} GPU {{gpu}}", - "refId": "A" - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "pod GPU 显存占用率", - "tooltip": { - "shared": true, - "sort": 2, - "value_type": "cumulative" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "percent", - "label": null, - "logBase": 1, - "max": "100", - "min": "0", - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "prometheus", - "fieldConfig": { - "defaults": {}, - "overrides": [] - }, - "fill": 1, - "fillGradient": 0, - "gridPos": { - "h": 7, - "w": 24, - "x": 0, - "y": 36 - }, - "hiddenSeries": false, - "id": 4, - "legend": { - "alignAsTable": true, - "avg": true, - "current": true, - "max": false, - "min": false, - "rightSide": true, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [], - "nullPointMode": "null", - "options": { - "alertThreshold": true - }, - "paceLength": 10, - "percentage": false, - "pluginVersion": "7.5.2", - "pointradius": 5, - "points": false, - "renderer": "flot", - "repeat": null, - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "exemplar": true, - "expr": "sum by (pod) (rate(container_network_receive_bytes_total{job=\"kubelet\", image!=\"\",container_name!=\"POD\",pod=~\".*$pod.*\"}[2m]))", - "format": "time_series", - "interval": "", - "intervalFactor": 2, - "legendFormat": "{{ pod}} receive", - "refId": "A" - }, - { - "exemplar": true, - "expr": "- sum by (pod) (rate(container_network_transmit_bytes_total{job=\"kubelet\", image!=\"\",container_name!=\"POD\",pod=~\".*$pod.*\"}[2m]))", - "format": "time_series", - "interval": "", - "intervalFactor": 1, - "legendFormat": "{{ pod}} send", - "refId": "B" - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "Network I/O", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "bytes", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "bytes", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } } ], "refresh": "5s", - "schemaVersion": 27, + "schemaVersion": 37, "style": "dark", "tags": [], "templating": { "list": [ { - "current": {}, - "description": null, - "error": null, + "current": { + "selected": false, + "text": "a", + "value": "a" + }, "hide": 0, "label": "Pod", "name": "pod", @@ -797,7 +814,7 @@ "value": "rikochen-mtl-daily-usermode-abt" } ], - "query": "rikochen-mtl-daily-usermode-abt", + "query": "a", "skipUrlSync": false, "type": "textbox" } @@ -835,5 +852,6 @@ "timezone": "browser", "title": "pod-info", "uid": "pod-info", - "version": 27 + "version": 1, + "weekStart": "" } \ No newline at end of file diff --git a/install/kubernetes/prometheus/grafana/dashboard/pods.json b/install/kubernetes/prometheus/grafana/dashboard/pods.json deleted file mode 100644 index f0635c5e..00000000 --- a/install/kubernetes/prometheus/grafana/dashboard/pods.json +++ /dev/null @@ -1,483 +0,0 @@ -{ - "annotations": { - "list": [ - - ] - }, - "editable": false, - "gnetId": null, - "graphTooltip": 0, - "hideControls": false, - "id": null, - "links": [ - - ], - "refresh": "", - "rows": [ - { - "collapse": false, - "collapsed": false, - "height": "250px", - "panels": [ - { - "aliasColors": { - - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "gridPos": { - - }, - "id": 2, - "legend": { - "alignAsTable": true, - "avg": true, - "current": true, - "max": false, - "min": false, - "rightSide": true, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ - - ], - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "repeat": null, - "seriesOverrides": [ - - ], - "spaceLength": 10, - "span": 12, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum by(container_name) (container_memory_working_set_bytes{job=\"kubelet\", namespace=\"$namespace\", pod_name=\"$pod\", container_name=\u007e\"$container\", container_name!=\"POD\"})", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "Current: {{ container_name }}", - "refId": "A" - }, - { - "expr": "sum by(container) (kube_pod_container_resource_requests_memory_bytes{job=\"kubelet\", namespace=\"$namespace\", pod=\"$pod\", container=\u007e\"$container\", container!=\"POD\"})", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "Requested: {{ container }}", - "refId": "B" - }, - { - "expr": "sum by(container) (kube_pod_container_resource_limits_memory_bytes{job=\"kubelet\", namespace=\"$namespace\", pod=\"$pod\", container=\u007e\"$container\", container!=\"POD\"})", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "Limit: {{ container }}", - "refId": "C" - } - ], - "thresholds": [ - - ], - "timeFrom": null, - "timeShift": null, - "title": "Memory Usage", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ - - ] - }, - "yaxes": [ - { - "format": "bytes", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "bytes", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": false, - "title": "Dashboard Row", - "titleSize": "h6", - "type": "row" - }, - { - "collapse": false, - "collapsed": false, - "height": "250px", - "panels": [ - { - "aliasColors": { - - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "gridPos": { - - }, - "id": 3, - "legend": { - "alignAsTable": true, - "avg": true, - "current": true, - "max": false, - "min": false, - "rightSide": true, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ - - ], - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "repeat": null, - "seriesOverrides": [ - - ], - "spaceLength": 10, - "span": 12, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sum by (container_name) (rate(container_cpu_usage_seconds_total{job=\"kubelet\", image!=\"\",container_name!=\"POD\",pod_name=\"$pod\"}[1m]))", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{ container_name }}", - "refId": "A" - } - ], - "thresholds": [ - - ], - "timeFrom": null, - "timeShift": null, - "title": "CPU Usage", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ - - ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": false, - "title": "Dashboard Row", - "titleSize": "h6", - "type": "row" - }, - { - "collapse": false, - "collapsed": false, - "height": "250px", - "panels": [ - { - "aliasColors": { - - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$datasource", - "fill": 1, - "gridPos": { - - }, - "id": 4, - "legend": { - "alignAsTable": true, - "avg": true, - "current": true, - "max": false, - "min": false, - "rightSide": true, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "links": [ - - ], - "nullPointMode": "null", - "percentage": false, - "pointradius": 5, - "points": false, - "renderer": "flot", - "repeat": null, - "seriesOverrides": [ - - ], - "spaceLength": 10, - "span": 12, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "sort_desc(sum by (pod_name) (rate(container_network_receive_bytes_total{job=\"kubelet\", pod_name=\"$pod\"}[1m])))", - "format": "time_series", - "intervalFactor": 2, - "legendFormat": "{{ pod_name }}", - "refId": "A" - } - ], - "thresholds": [ - - ], - "timeFrom": null, - "timeShift": null, - "title": "Network I/O", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ - - ] - }, - "yaxes": [ - { - "format": "bytes", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - }, - { - "format": "bytes", - "label": null, - "logBase": 1, - "max": null, - "min": 0, - "show": true - } - ] - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": false, - "title": "Dashboard Row", - "titleSize": "h6", - "type": "row" - } - ], - "schemaVersion": 14, - "style": "dark", - "tags": [ - - ], - "templating": { - "list": [ - { - "current": { - "text": "Prometheus", - "value": "Prometheus" - }, - "hide": 0, - "label": null, - "name": "datasource", - "options": [ - - ], - "query": "prometheus", - "refresh": 1, - "regex": "", - "type": "datasource" - }, - { - "allValue": null, - "current": { - - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": false, - "label": "Namespace", - "multi": false, - "name": "namespace", - "options": [ - - ], - "query": "label_values(kube_pod_info, namespace)", - "refresh": 2, - "regex": "", - "sort": 0, - "tagValuesQuery": "", - "tags": [ - - ], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "allValue": null, - "current": { - - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": false, - "label": "Pod", - "multi": false, - "name": "pod", - "options": [ - - ], - "query": "label_values(kube_pod_info{namespace=\u007e\"$namespace\"}, pod)", - "refresh": 2, - "regex": "", - "sort": 0, - "tagValuesQuery": "", - "tags": [ - - ], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "allValue": null, - "current": { - - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": true, - "label": "Container", - "multi": false, - "name": "container", - "options": [ - - ], - "query": "label_values(kube_pod_container_info{namespace=\"$namespace\", pod=\"$pod\"}, container)", - "refresh": 2, - "regex": "", - "sort": 0, - "tagValuesQuery": "", - "tags": [ - - ], - "tagsQuery": "", - "type": "query", - "useTags": false - } - ] - }, - "time": { - "from": "now-1h", - "to": "now" - }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d" - ] - }, - "timezone": "browser", - "title": "Pods", - "uid": "ab4f13a9892a76a4d21ce8c2445bf4ea", - "version": 0 -} \ No newline at end of file diff --git a/install/kubernetes/prometheus/node-exporter/node-exporter-ds.yml b/install/kubernetes/prometheus/node-exporter/node-exporter-ds.yml index 4702af73..32e32d14 100755 --- a/install/kubernetes/prometheus/node-exporter/node-exporter-ds.yml +++ b/install/kubernetes/prometheus/node-exporter/node-exporter-ds.yml @@ -33,9 +33,6 @@ spec: - --path.procfs=/host/proc - --path.sysfs=/host/sys resources: - limits: - cpu: 102m - memory: 180Mi requests: cpu: 102m memory: 180Mi @@ -56,9 +53,6 @@ spec: hostPort: 9100 name: https resources: - limits: - cpu: 20m - memory: 40Mi requests: cpu: 10m memory: 20Mi