change monitoring and grafana dashboard

This commit is contained in:
chendile 2023-04-06 22:52:13 +08:00
parent b98a081a19
commit 227bae6122
8 changed files with 1362 additions and 2725 deletions

View File

@ -24,7 +24,6 @@
"editable": true,
"fiscalYearStartMonth": 0,
"graphTooltip": 0,
"id": 33,
"links": [],
"liveNow": false,
"panels": [
@ -52,6 +51,108 @@
"y": 0
},
"hiddenSeries": false,
"id": 13,
"legend": {
"alignAsTable": true,
"avg": true,
"current": true,
"max": true,
"min": false,
"rightSide": true,
"show": true,
"sort": "avg",
"sortDesc": true,
"total": false,
"values": true
},
"lines": true,
"linewidth": 2,
"links": [],
"nullPointMode": "null",
"options": {
"alertThreshold": true
},
"paceLength": 10,
"percentage": false,
"pluginVersion": "9.1.5",
"pointradius": 2,
"points": false,
"renderer": "flot",
"seriesOverrides": [],
"spaceLength": 10,
"stack": false,
"steppedLine": false,
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "P1809F7CD0C75ACF3"
},
"exemplar": true,
"expr": "DCGM_FI_DEV_GPU_UTIL{exported_pod=''}",
"format": "time_series",
"interval": "",
"intervalFactor": 1,
"legendFormat": " {{instance}} GPU {{gpu}}",
"refId": "A"
}
],
"thresholds": [],
"timeRegions": [],
"title": "机器 非平台应用 GPU 利用率",
"tooltip": {
"shared": true,
"sort": 2,
"value_type": "cumulative"
},
"type": "graph",
"xaxis": {
"mode": "time",
"show": true,
"values": []
},
"yaxes": [
{
"format": "percent",
"logBase": 1,
"max": "100",
"min": "0",
"show": true
},
{
"format": "short",
"logBase": 1,
"show": true
}
],
"yaxis": {
"align": false
}
},
{
"aliasColors": {},
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": {
"type": "prometheus",
"uid": "P1809F7CD0C75ACF3"
},
"fieldConfig": {
"defaults": {
"links": []
},
"overrides": []
},
"fill": 1,
"fillGradient": 0,
"gridPos": {
"h": 8,
"w": 24,
"x": 0,
"y": 8
},
"hiddenSeries": false,
"id": 14,
"legend": {
"alignAsTable": true,
@ -61,7 +162,7 @@
"min": false,
"rightSide": true,
"show": true,
"sort": "max",
"sort": "avg",
"sortDesc": true,
"total": false,
"values": true
@ -131,109 +232,45 @@
}
},
{
"aliasColors": {},
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": {
"type": "prometheus",
"uid": "P1809F7CD0C75ACF3"
"alert": {
"conditions": [
{
"evaluator": {
"params": [
50000
],
"type": "gt"
},
"operator": {
"type": "and"
},
"query": {
"params": [
"A",
"5m",
"now"
]
},
"reducer": {
"params": [],
"type": "avg"
},
"type": "query"
}
],
"executionErrorState": "alerting",
"for": "5m",
"frequency": "5m",
"handler": 1,
"message": "负载过高",
"name": "System load alert",
"noDataState": "no_data",
"notifications": [
{
"uid": "FMG-3Bv7k"
}
]
},
"fieldConfig": {
"defaults": {
"links": []
},
"overrides": []
},
"fill": 1,
"fillGradient": 0,
"gridPos": {
"h": 8,
"w": 24,
"x": 0,
"y": 8
},
"hiddenSeries": false,
"id": 16,
"legend": {
"alignAsTable": true,
"avg": true,
"current": true,
"max": true,
"min": false,
"rightSide": true,
"show": true,
"sort": "avg",
"sortDesc": true,
"total": false,
"values": true
},
"lines": true,
"linewidth": 2,
"links": [],
"nullPointMode": "null",
"options": {
"alertThreshold": true
},
"paceLength": 10,
"percentage": false,
"pluginVersion": "9.1.5",
"pointradius": 2,
"points": false,
"renderer": "flot",
"seriesOverrides": [],
"spaceLength": 10,
"stack": false,
"steppedLine": false,
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "P1809F7CD0C75ACF3"
},
"editorMode": "code",
"expr": "DCGM_FI_DEV_MEM_COPY_UTIL{exported_pod!=''}",
"format": "time_series",
"interval": "",
"intervalFactor": 1,
"legendFormat": "{{instance}} {{exported_pod}} GPU {{gpu}}",
"range": true,
"refId": "A"
}
],
"thresholds": [],
"timeRegions": [],
"title": "pod GPU 占用率",
"tooltip": {
"shared": true,
"sort": 2,
"value_type": "cumulative"
},
"type": "graph",
"xaxis": {
"mode": "time",
"show": true,
"values": []
},
"yaxes": [
{
"format": "percent",
"logBase": 1,
"max": "100",
"min": "0",
"show": true
},
{
"format": "short",
"logBase": 1,
"show": true
}
],
"yaxis": {
"align": false
}
},
{
"aliasColors": {},
"bars": false,
"dashLength": 10,
@ -245,25 +282,23 @@
"fill": 1,
"fillGradient": 0,
"gridPos": {
"h": 8,
"w": 24,
"h": 9,
"w": 8,
"x": 0,
"y": 16
},
"hiddenSeries": false,
"id": 3,
"legend": {
"alignAsTable": true,
"avg": true,
"current": true,
"max": true,
"alignAsTable": false,
"avg": false,
"current": false,
"max": false,
"min": false,
"rightSide": true,
"show": true,
"sort": "max",
"sortDesc": true,
"rightSide": false,
"show": false,
"total": false,
"values": true
"values": false
},
"lines": true,
"linewidth": 1,
@ -293,9 +328,42 @@
"intervalFactor": 2,
"legendFormat": "{{instance}}",
"refId": "A"
},
{
"datasource": {
"type": "prometheus",
"uid": "P1809F7CD0C75ACF3"
},
"expr": "node_load5{job=\"node-exporter\"} * 100",
"format": "time_series",
"hide": true,
"intervalFactor": 2,
"legendFormat": "{{instance}}",
"refId": "B"
},
{
"datasource": {
"type": "prometheus",
"uid": "P1809F7CD0C75ACF3"
},
"expr": "node_load15{job=\"node-exporter\"} * 100",
"format": "time_series",
"hide": true,
"intervalFactor": 2,
"legendFormat": "{{instance}}",
"refId": "C"
}
],
"thresholds": [
{
"colorMode": "critical",
"fill": true,
"line": true,
"op": "gt",
"value": 50000,
"visible": true
}
],
"thresholds": [],
"timeRegions": [],
"title": "System load",
"tooltip": {
@ -338,22 +406,22 @@
"fillGradient": 0,
"gridPos": {
"h": 9,
"w": 24,
"x": 0,
"y": 24
"w": 8,
"x": 8,
"y": 16
},
"hiddenSeries": false,
"id": 2,
"legend": {
"alignAsTable": true,
"avg": true,
"current": true,
"max": true,
"alignAsTable": false,
"avg": false,
"current": false,
"max": false,
"min": false,
"rightSide": true,
"show": true,
"rightSide": false,
"show": false,
"total": false,
"values": true
"values": false
},
"lines": true,
"linewidth": 1,
@ -378,18 +446,16 @@
"type": "prometheus",
"uid": "P1809F7CD0C75ACF3"
},
"editorMode": "code",
"expr": "100 - (avg by (instance) (irate(node_cpu{job=\"node-exporter\", mode=\"idle\"}[5m])) * 100)\n",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "{{instance}}",
"range": true,
"refId": "B"
}
],
"thresholds": [],
"timeRegions": [],
"title": "机器 CPU",
"title": "Idle CPU",
"tooltip": {
"shared": true,
"sort": 1,
@ -434,22 +500,22 @@
"fillGradient": 0,
"gridPos": {
"h": 9,
"w": 24,
"x": 0,
"y": 33
"w": 8,
"x": 16,
"y": 16
},
"hiddenSeries": false,
"id": 4,
"legend": {
"alignAsTable": true,
"avg": true,
"current": true,
"max": true,
"alignAsTable": false,
"avg": false,
"current": false,
"max": false,
"min": false,
"rightSide": true,
"show": true,
"rightSide": false,
"show": false,
"total": false,
"values": true
"values": false
},
"lines": true,
"linewidth": 1,
@ -526,7 +592,7 @@
"h": 7,
"w": 12,
"x": 0,
"y": 42
"y": 25
},
"hiddenSeries": false,
"id": 8,
@ -538,8 +604,6 @@
"min": false,
"rightSide": true,
"show": true,
"sort": "max",
"sortDesc": true,
"total": false,
"values": true
},
@ -618,7 +682,7 @@
"h": 7,
"w": 12,
"x": 12,
"y": 42
"y": 25
},
"hiddenSeries": false,
"id": 9,
@ -708,7 +772,7 @@
"h": 10,
"w": 24,
"x": 0,
"y": 49
"y": 32
},
"hiddenSeries": false,
"id": 11,
@ -757,12 +821,10 @@
"type": "prometheus",
"uid": "P1809F7CD0C75ACF3"
},
"editorMode": "code",
"expr": "sum by (instance) (rate(node_disk_bytes_read{job=\"node-exporter\"}[2m]))",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{instance}} read",
"range": true,
"refId": "A"
},
{
@ -858,6 +920,6 @@
"timezone": "browser",
"title": "all-node",
"uid": "all-node",
"version": 9,
"version": 1,
"weekStart": ""
}

View File

@ -3,28 +3,47 @@
"list": [
{
"builtIn": 1,
"datasource": "-- Grafana --",
"datasource": {
"type": "datasource",
"uid": "grafana"
},
"enable": true,
"hide": true,
"iconColor": "rgba(0, 211, 255, 1)",
"name": "Annotations & Alerts",
"target": {
"limit": 100,
"matchAny": false,
"tags": [],
"type": "dashboard"
},
"type": "dashboard"
}
]
},
"description": "This dashboard is to display the metrics from DCGM Exporter on a Kubernetes (1.13+) cluster",
"editable": true,
"fiscalYearStartMonth": 0,
"gnetId": 12239,
"graphTooltip": 0,
"id": 25,
"links": [],
"liveNow": false,
"panels": [
{
"aliasColors": {},
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": "prometheus",
"datasource": {
"type": "prometheus",
"uid": "P1809F7CD0C75ACF3"
},
"fieldConfig": {
"defaults": {
"links": []
},
"overrides": []
},
"fill": 1,
"fillGradient": 0,
"gridPos": {
@ -34,7 +53,7 @@
"y": 0
},
"hiddenSeries": false,
"id": 6,
"id": 21,
"legend": {
"alignAsTable": true,
"avg": true,
@ -44,7 +63,7 @@
"rightSide": true,
"show": true,
"sort": "avg",
"sortDesc": true,
"sortDesc": false,
"total": false,
"values": true
},
@ -53,10 +72,11 @@
"links": [],
"nullPointMode": "null",
"options": {
"dataLinks": []
"alertThreshold": true
},
"paceLength": 10,
"percentage": false,
"pluginVersion": "9.1.5",
"pointradius": 2,
"points": false,
"renderer": "flot",
@ -66,6 +86,10 @@
"steppedLine": false,
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "P1809F7CD0C75ACF3"
},
"expr": "DCGM_FI_DEV_GPU_UTIL{exported_pod!=''}",
"format": "time_series",
"interval": "",
@ -75,9 +99,7 @@
}
],
"thresholds": [],
"timeFrom": null,
"timeRegions": [],
"timeShift": null,
"title": "pod GPU 利用率",
"tooltip": {
"shared": true,
@ -86,16 +108,13 @@
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": []
},
"yaxes": [
{
"format": "percent",
"label": null,
"logBase": 1,
"max": "100",
"min": "0",
@ -103,16 +122,12 @@
},
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
}
],
"yaxis": {
"align": false,
"alignLevel": null
"align": false
}
},
{
@ -120,7 +135,16 @@
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": "prometheus",
"datasource": {
"type": "prometheus",
"uid": "P1809F7CD0C75ACF3"
},
"fieldConfig": {
"defaults": {
"links": []
},
"overrides": []
},
"fill": 1,
"fillGradient": 0,
"gridPos": {
@ -149,10 +173,11 @@
"links": [],
"nullPointMode": "null",
"options": {
"dataLinks": []
"alertThreshold": true
},
"paceLength": 10,
"percentage": false,
"pluginVersion": "9.1.5",
"pointradius": 2,
"points": false,
"renderer": "flot",
@ -162,6 +187,10 @@
"steppedLine": false,
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "P1809F7CD0C75ACF3"
},
"expr": "DCGM_FI_DEV_MEM_COPY_UTIL{exported_pod!=''}",
"format": "time_series",
"interval": "",
@ -171,9 +200,7 @@
}
],
"thresholds": [],
"timeFrom": null,
"timeRegions": [],
"timeShift": null,
"title": "pod GPU 占用率",
"tooltip": {
"shared": true,
@ -182,16 +209,13 @@
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": []
},
"yaxes": [
{
"format": "percent",
"label": null,
"logBase": 1,
"max": "100",
"min": "0",
@ -199,16 +223,12 @@
},
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
}
],
"yaxis": {
"align": false,
"alignLevel": null
"align": false
}
},
{
@ -216,7 +236,16 @@
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": "prometheus",
"datasource": {
"type": "prometheus",
"uid": "P1809F7CD0C75ACF3"
},
"fieldConfig": {
"defaults": {
"links": []
},
"overrides": []
},
"fill": 1,
"fillGradient": 0,
"gridPos": {
@ -244,10 +273,11 @@
"linewidth": 2,
"nullPointMode": "null",
"options": {
"dataLinks": []
"alertThreshold": true
},
"paceLength": 10,
"percentage": false,
"pluginVersion": "9.1.5",
"pointradius": 2,
"points": false,
"renderer": "flot",
@ -257,6 +287,10 @@
"steppedLine": false,
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "P1809F7CD0C75ACF3"
},
"expr": "DCGM_FI_DEV_GPU_UTIL",
"format": "time_series",
"interval": "",
@ -266,9 +300,7 @@
}
],
"thresholds": [],
"timeFrom": null,
"timeRegions": [],
"timeShift": null,
"title": "GPU Utilization",
"tooltip": {
"shared": true,
@ -277,16 +309,13 @@
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": []
},
"yaxes": [
{
"format": "percent",
"label": null,
"logBase": 1,
"max": "100",
"min": "0",
@ -294,16 +323,12 @@
},
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
}
],
"yaxis": {
"align": false,
"alignLevel": null
"align": false
}
},
{
@ -311,7 +336,16 @@
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": "prometheus",
"datasource": {
"type": "prometheus",
"uid": "P1809F7CD0C75ACF3"
},
"fieldConfig": {
"defaults": {
"links": []
},
"overrides": []
},
"fill": 1,
"fillGradient": 0,
"gridPos": {
@ -338,10 +372,11 @@
"links": [],
"nullPointMode": "null",
"options": {
"dataLinks": []
"alertThreshold": true
},
"paceLength": 10,
"percentage": false,
"pluginVersion": "9.1.5",
"pointradius": 2,
"points": false,
"renderer": "flot",
@ -351,6 +386,10 @@
"steppedLine": false,
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "P1809F7CD0C75ACF3"
},
"expr": "DCGM_FI_DEV_MEM_COPY_UTIL",
"format": "time_series",
"interval": "",
@ -360,9 +399,7 @@
}
],
"thresholds": [],
"timeFrom": null,
"timeRegions": [],
"timeShift": null,
"title": "GPU 内存利用率",
"tooltip": {
"shared": true,
@ -371,33 +408,24 @@
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": []
},
"yaxes": [
{
"format": "percent",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
},
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
}
],
"yaxis": {
"align": false,
"alignLevel": null
"align": false
}
},
{
@ -405,7 +433,16 @@
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": "prometheus",
"datasource": {
"type": "prometheus",
"uid": "P1809F7CD0C75ACF3"
},
"fieldConfig": {
"defaults": {
"links": []
},
"overrides": []
},
"fill": 1,
"fillGradient": 0,
"gridPos": {
@ -431,10 +468,11 @@
"linewidth": 2,
"nullPointMode": "null",
"options": {
"dataLinks": []
"alertThreshold": true
},
"paceLength": 10,
"percentage": false,
"pluginVersion": "9.1.5",
"pointradius": 2,
"points": false,
"renderer": "flot",
@ -444,6 +482,10 @@
"steppedLine": false,
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "P1809F7CD0C75ACF3"
},
"expr": "DCGM_FI_DEV_GPU_TEMP",
"format": "time_series",
"instant": false,
@ -454,9 +496,7 @@
}
],
"thresholds": [],
"timeFrom": null,
"timeRegions": [],
"timeShift": null,
"title": "GPU Temperature",
"tooltip": {
"shared": true,
@ -465,33 +505,24 @@
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": []
},
"yaxes": [
{
"format": "celsius",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
},
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
}
],
"yaxis": {
"align": false,
"alignLevel": null
"align": false
}
},
{
@ -499,7 +530,16 @@
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": "prometheus",
"datasource": {
"type": "prometheus",
"uid": "P1809F7CD0C75ACF3"
},
"fieldConfig": {
"defaults": {
"links": []
},
"overrides": []
},
"fill": 1,
"fillGradient": 0,
"gridPos": {
@ -525,11 +565,11 @@
"linewidth": 2,
"nullPointMode": "null",
"options": {
"dataLinks": []
"alertThreshold": true
},
"paceLength": 10,
"percentage": false,
"pluginVersion": "6.5.2",
"pluginVersion": "9.1.5",
"pointradius": 2,
"points": false,
"renderer": "flot",
@ -539,6 +579,10 @@
"steppedLine": false,
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "P1809F7CD0C75ACF3"
},
"expr": "DCGM_FI_DEV_POWER_USAGE",
"format": "time_series",
"interval": "",
@ -548,9 +592,7 @@
}
],
"thresholds": [],
"timeFrom": null,
"timeRegions": [],
"timeShift": null,
"title": "GPU Power Usage",
"tooltip": {
"shared": true,
@ -559,33 +601,24 @@
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": []
},
"yaxes": [
{
"format": "watt",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
},
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
}
],
"yaxis": {
"align": false,
"alignLevel": null
"align": false
}
},
{
@ -593,7 +626,16 @@
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": "prometheus",
"datasource": {
"type": "prometheus",
"uid": "P1809F7CD0C75ACF3"
},
"fieldConfig": {
"defaults": {
"links": []
},
"overrides": []
},
"fill": 1,
"fillGradient": 0,
"gridPos": {
@ -613,7 +655,6 @@
"min": false,
"rightSide": true,
"show": true,
"sideWidth": null,
"total": false,
"values": true
},
@ -621,10 +662,11 @@
"linewidth": 2,
"nullPointMode": "null",
"options": {
"dataLinks": []
"alertThreshold": true
},
"paceLength": 10,
"percentage": false,
"pluginVersion": "9.1.5",
"pointradius": 2,
"points": false,
"renderer": "flot",
@ -634,6 +676,10 @@
"steppedLine": false,
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "P1809F7CD0C75ACF3"
},
"expr": "DCGM_FI_DEV_SM_CLOCK* 1000000",
"format": "time_series",
"interval": "",
@ -643,9 +689,7 @@
}
],
"thresholds": [],
"timeFrom": null,
"timeRegions": [],
"timeShift": null,
"title": "GPU SM Clocks",
"tooltip": {
"shared": true,
@ -654,46 +698,37 @@
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": []
},
"yaxes": [
{
"decimals": null,
"format": "hertz",
"label": "",
"logBase": 1,
"max": null,
"min": null,
"show": true
},
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
}
],
"yaxis": {
"align": false,
"alignLevel": null
"align": false
}
}
],
"refresh": false,
"schemaVersion": 18,
"schemaVersion": 37,
"style": "dark",
"tags": [],
"templating": {
"list": []
},
"time": {
"from": "now/d",
"from": "now-24h",
"to": "now"
},
"timepicker": {
@ -722,7 +757,8 @@
]
},
"timezone": "",
"title": "gpu监控",
"title": "gpu",
"uid": "dcgm",
"version": 26
"version": 1,
"weekStart": ""
}

View File

@ -3,31 +3,38 @@
"list": [
{
"builtIn": 1,
"datasource": "-- Grafana --",
"datasource": {
"type": "datasource",
"uid": "grafana"
},
"enable": true,
"hide": true,
"iconColor": "rgba(0, 211, 255, 1)",
"name": "Annotations & Alerts",
"target": {
"limit": 100,
"matchAny": false,
"tags": [],
"type": "dashboard"
},
"type": "dashboard"
}
]
},
"editable": true,
"gnetId": null,
"fiscalYearStartMonth": 0,
"graphTooltip": 0,
"id": 17,
"iteration": 1657856261371,
"links": [],
"liveNow": false,
"panels": [
{
"aliasColors": {},
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": "prometheus",
"fieldConfig": {
"defaults": {},
"overrides": []
"datasource": {
"type": "prometheus",
"uid": "P1809F7CD0C75ACF3"
},
"fill": 1,
"fillGradient": 0,
@ -61,7 +68,7 @@
},
"paceLength": 10,
"percentage": false,
"pluginVersion": "7.5.2",
"pluginVersion": "9.1.5",
"pointradius": 2,
"points": false,
"renderer": "flot",
@ -71,6 +78,10 @@
"steppedLine": false,
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "P1809F7CD0C75ACF3"
},
"exemplar": true,
"expr": "sum by (destination_workload) (istio_requests_total{destination_service_namespace=\"$namespace\",destination_workload=~\"($service)\"})",
"format": "time_series",
@ -81,6 +92,10 @@
"refId": "B"
},
{
"datasource": {
"type": "prometheus",
"uid": "P1809F7CD0C75ACF3"
},
"exemplar": true,
"expr": "sum by (destination_workload,response_code) (irate(istio_requests_total{destination_service_namespace=\"$namespace\",destination_workload=~\"($service)\"}[1m]))",
"hide": false,
@ -90,10 +105,8 @@
}
],
"thresholds": [],
"timeFrom": null,
"timeRegions": [],
"timeShift": null,
"title": "域名服务平均qps请求量(1分钟内)",
"title": "域名服务平均qps请求量(1分钟内平均)",
"tooltip": {
"shared": true,
"sort": 1,
@ -101,9 +114,7 @@
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": []
},
@ -111,25 +122,18 @@
{
"$$hashKey": "object:311",
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
},
{
"$$hashKey": "object:312",
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
}
],
"yaxis": {
"align": false,
"alignLevel": null
"align": false
}
},
{
@ -137,10 +141,9 @@
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": "prometheus",
"fieldConfig": {
"defaults": {},
"overrides": []
"datasource": {
"type": "prometheus",
"uid": "P1809F7CD0C75ACF3"
},
"fill": 1,
"fillGradient": 0,
@ -172,7 +175,7 @@
},
"paceLength": 10,
"percentage": false,
"pluginVersion": "7.5.2",
"pluginVersion": "9.1.5",
"pointradius": 2,
"points": false,
"renderer": "flot",
@ -182,6 +185,10 @@
"steppedLine": false,
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "P1809F7CD0C75ACF3"
},
"exemplar": true,
"expr": "sum by (destination_workload) (irate(istio_request_bytes_sum{destination_service_namespace=\"$namespace\",destination_workload=~\"($service)\"}[1m]))",
"format": "time_series",
@ -193,9 +200,7 @@
}
],
"thresholds": [],
"timeFrom": null,
"timeRegions": [],
"timeShift": null,
"title": "域名服务吞吐率1分钟",
"tooltip": {
"shared": true,
@ -204,33 +209,24 @@
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": []
},
"yaxes": [
{
"format": "bytes",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
},
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
}
],
"yaxis": {
"align": false,
"alignLevel": null
"align": false
}
},
{
@ -238,114 +234,9 @@
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": "prometheus",
"fieldConfig": {
"defaults": {},
"overrides": []
},
"fill": 1,
"fillGradient": 0,
"gridPos": {
"h": 8,
"w": 24,
"x": 0,
"y": 8
},
"hiddenSeries": false,
"id": 3,
"legend": {
"alignAsTable": true,
"avg": false,
"current": false,
"max": true,
"min": false,
"rightSide": true,
"show": true,
"total": false,
"values": true
},
"lines": true,
"linewidth": 1,
"links": [],
"nullPointMode": "null",
"options": {
"alertThreshold": true
},
"paceLength": 10,
"percentage": false,
"pluginVersion": "7.5.2",
"pointradius": 2,
"points": false,
"renderer": "flot",
"seriesOverrides": [],
"spaceLength": 10,
"stack": false,
"steppedLine": false,
"targets": [
{
"exemplar": true,
"expr": "avg by (destination_workload) (rate(istio_request_duration_milliseconds_sum{destination_service_namespace=\"$namespace\",destination_workload=~\"($service)\"}[1m])/rate(istio_request_duration_milliseconds_count{destination_service_namespace=\"$namespace\",destination_workload=~\"($service)\"}[1m]))",
"format": "time_series",
"hide": false,
"interval": "",
"intervalFactor": 1,
"legendFormat": "{{destination_workload}}",
"refId": "B"
}
],
"thresholds": [],
"timeFrom": null,
"timeRegions": [],
"timeShift": null,
"title": "域名服务 平均时延ms(1分钟平均)",
"tooltip": {
"shared": true,
"sort": 1,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": []
},
"yaxes": [
{
"$$hashKey": "object:83",
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
},
{
"$$hashKey": "object:84",
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
}
],
"yaxis": {
"align": false,
"alignLevel": null
}
},
{
"aliasColors": {},
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": "prometheus",
"decimals": null,
"fieldConfig": {
"defaults": {},
"overrides": []
"datasource": {
"type": "prometheus",
"uid": "P1809F7CD0C75ACF3"
},
"fill": 1,
"fillGradient": 0,
@ -353,7 +244,7 @@
"h": 10,
"w": 6,
"x": 0,
"y": 16
"y": 8
},
"hiddenSeries": false,
"id": 6,
@ -377,17 +268,20 @@
},
"paceLength": 10,
"percentage": false,
"pluginVersion": "7.5.2",
"pluginVersion": "9.1.5",
"pointradius": 5,
"points": false,
"renderer": "flot",
"repeat": null,
"seriesOverrides": [],
"spaceLength": 10,
"stack": false,
"steppedLine": false,
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "P1809F7CD0C75ACF3"
},
"exemplar": true,
"expr": "sum by (pod) (container_memory_working_set_bytes{job=\"kubelet\", image!=\"\",container_name!=\"POD\",pod=~\".*$service.*\"})",
"format": "time_series",
@ -398,9 +292,7 @@
}
],
"thresholds": [],
"timeFrom": null,
"timeRegions": [],
"timeShift": null,
"title": "Memory Usage",
"tooltip": {
"shared": true,
@ -409,9 +301,7 @@
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": []
},
@ -419,25 +309,20 @@
{
"$$hashKey": "object:204",
"format": "bytes",
"label": null,
"logBase": 1,
"max": null,
"min": 0,
"show": true
},
{
"$$hashKey": "object:205",
"format": "bytes",
"label": null,
"logBase": 1,
"max": null,
"min": 0,
"show": false
}
],
"yaxis": {
"align": false,
"alignLevel": null
"align": false
}
},
{
@ -445,10 +330,9 @@
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": "prometheus",
"fieldConfig": {
"defaults": {},
"overrides": []
"datasource": {
"type": "prometheus",
"uid": "P1809F7CD0C75ACF3"
},
"fill": 1,
"fillGradient": 0,
@ -456,7 +340,7 @@
"h": 10,
"w": 6,
"x": 6,
"y": 16
"y": 8
},
"hiddenSeries": false,
"id": 8,
@ -481,17 +365,20 @@
},
"paceLength": 10,
"percentage": false,
"pluginVersion": "7.5.2",
"pluginVersion": "9.1.5",
"pointradius": 5,
"points": false,
"renderer": "flot",
"repeat": null,
"seriesOverrides": [],
"spaceLength": 10,
"stack": false,
"steppedLine": false,
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "P1809F7CD0C75ACF3"
},
"exemplar": true,
"expr": "sum by (pod) (rate(container_cpu_usage_seconds_total{job=\"kubelet\", image!=\"\",container_name!=\"POD\",pod=~\".*$service.*\"}[5m]))",
"format": "time_series",
@ -502,6 +389,10 @@
"refId": "A"
},
{
"datasource": {
"type": "prometheus",
"uid": "P1809F7CD0C75ACF3"
},
"expr": "",
"format": "time_series",
"intervalFactor": 1,
@ -509,9 +400,7 @@
}
],
"thresholds": [],
"timeFrom": null,
"timeRegions": [],
"timeShift": null,
"title": "CPU Usage",
"tooltip": {
"shared": true,
@ -520,9 +409,7 @@
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": []
},
@ -530,25 +417,20 @@
{
"$$hashKey": "object:483",
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": 0,
"show": true
},
{
"$$hashKey": "object:484",
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": 0,
"show": false
}
],
"yaxis": {
"align": false,
"alignLevel": null
"align": false
}
},
{
@ -556,7 +438,10 @@
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": "prometheus",
"datasource": {
"type": "prometheus",
"uid": "P1809F7CD0C75ACF3"
},
"fieldConfig": {
"defaults": {
"links": []
@ -569,7 +454,7 @@
"h": 10,
"w": 6,
"x": 12,
"y": 16
"y": 8
},
"hiddenSeries": false,
"id": 12,
@ -595,7 +480,7 @@
},
"paceLength": 10,
"percentage": false,
"pluginVersion": "7.5.2",
"pluginVersion": "9.1.5",
"pointradius": 2,
"points": false,
"renderer": "flot",
@ -605,6 +490,10 @@
"steppedLine": false,
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "P1809F7CD0C75ACF3"
},
"exemplar": true,
"expr": "DCGM_FI_DEV_MEM_COPY_UTIL{exported_pod=~\".*$service.*\"}",
"format": "time_series",
@ -615,9 +504,7 @@
}
],
"thresholds": [],
"timeFrom": null,
"timeRegions": [],
"timeShift": null,
"title": "pod GPU 显存占用率",
"tooltip": {
"shared": true,
@ -626,16 +513,13 @@
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": []
},
"yaxes": [
{
"format": "percent",
"label": null,
"logBase": 1,
"max": "100",
"min": "0",
@ -643,16 +527,12 @@
},
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
}
],
"yaxis": {
"align": false,
"alignLevel": null
"align": false
}
},
{
@ -660,10 +540,9 @@
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": "prometheus",
"fieldConfig": {
"defaults": {},
"overrides": []
"datasource": {
"type": "prometheus",
"uid": "P1809F7CD0C75ACF3"
},
"fill": 1,
"fillGradient": 0,
@ -671,7 +550,7 @@
"h": 10,
"w": 6,
"x": 18,
"y": 16
"y": 8
},
"hiddenSeries": false,
"id": 10,
@ -696,7 +575,7 @@
},
"paceLength": 10,
"percentage": false,
"pluginVersion": "7.5.2",
"pluginVersion": "9.1.5",
"pointradius": 5,
"points": false,
"renderer": "flot",
@ -706,6 +585,10 @@
"steppedLine": false,
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "P1809F7CD0C75ACF3"
},
"exemplar": true,
"expr": "DCGM_FI_DEV_GPU_UTIL{exported_pod=~\".*$service.*\"}",
"format": "time_series",
@ -717,9 +600,7 @@
}
],
"thresholds": [],
"timeFrom": null,
"timeRegions": [],
"timeShift": null,
"title": "GPU 使用率",
"tooltip": {
"shared": true,
@ -728,33 +609,26 @@
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": []
},
"yaxes": [
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": 0,
"show": true
},
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": 0,
"show": false
}
],
"yaxis": {
"align": false,
"alignLevel": null
"align": false
}
},
{
@ -762,10 +636,9 @@
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": "prometheus",
"fieldConfig": {
"defaults": {},
"overrides": []
"datasource": {
"type": "prometheus",
"uid": "P1809F7CD0C75ACF3"
},
"fill": 1,
"fillGradient": 0,
@ -773,7 +646,7 @@
"h": 8,
"w": 24,
"x": 0,
"y": 26
"y": 18
},
"hiddenSeries": false,
"id": 13,
@ -797,7 +670,7 @@
},
"paceLength": 10,
"percentage": false,
"pluginVersion": "7.5.2",
"pluginVersion": "9.1.5",
"pointradius": 2,
"points": false,
"renderer": "flot",
@ -807,6 +680,10 @@
"steppedLine": false,
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "P1809F7CD0C75ACF3"
},
"exemplar": true,
"expr": "sum by (destination_workload) (istio_requests_total{destination_service_namespace=\"$namespace\"})",
"format": "time_series",
@ -818,9 +695,7 @@
}
],
"thresholds": [],
"timeFrom": null,
"timeRegions": [],
"timeShift": null,
"title": "全部域名服务监控",
"tooltip": {
"shared": true,
@ -829,56 +704,46 @@
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": []
},
"yaxes": [
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
},
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
}
],
"yaxis": {
"align": false,
"alignLevel": null
"align": false
}
}
],
"refresh": false,
"schemaVersion": 27,
"schemaVersion": 37,
"style": "dark",
"tags": [],
"templating": {
"list": [
{
"allValue": null,
"current": {
"selected": true,
"selected": false,
"text": "service",
"value": "service"
},
"datasource": "prometheus",
"datasource": {
"type": "prometheus",
"uid": "P1809F7CD0C75ACF3"
},
"definition": "label_values(istio_requests_total, destination_service_namespace)",
"description": null,
"error": null,
"hide": 0,
"includeAll": true,
"label": null,
"multi": false,
"name": "namespace",
"options": [],
@ -891,25 +756,23 @@
"skipUrlSync": false,
"sort": 0,
"tagValuesQuery": "",
"tags": [],
"tagsQuery": "",
"type": "query",
"useTags": false
},
{
"allValue": null,
"current": {
"selected": false,
"text": "All",
"value": "$__all"
},
"datasource": "prometheus",
"datasource": {
"type": "prometheus",
"uid": "P1809F7CD0C75ACF3"
},
"definition": "label_values(istio_requests_total{destination_service_namespace=\"$namespace\"}, destination_service_name)",
"description": null,
"error": null,
"hide": 0,
"includeAll": true,
"label": null,
"multi": true,
"name": "service",
"options": [],
@ -922,7 +785,6 @@
"skipUrlSync": false,
"sort": 0,
"tagValuesQuery": "",
"tags": [],
"tagsQuery": "",
"type": "query",
"useTags": false
@ -961,5 +823,6 @@
"timezone": "",
"title": "istio service",
"uid": "istio-service",
"version": 42
"version": 1,
"weekStart": ""
}

File diff suppressed because it is too large Load Diff

View File

@ -1,920 +0,0 @@
{
"annotations": {
"list": [
]
},
"editable": false,
"gnetId": null,
"graphTooltip": 0,
"hideControls": false,
"id": null,
"links": [
],
"refresh": "",
"rows": [
{
"collapse": false,
"collapsed": false,
"height": "250px",
"panels": [
{
"aliasColors": {
},
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
"fill": 1,
"gridPos": {
},
"id": 2,
"legend": {
"alignAsTable": false,
"avg": false,
"current": false,
"max": false,
"min": false,
"rightSide": false,
"show": true,
"total": false,
"values": false
},
"lines": true,
"linewidth": 1,
"links": [
],
"nullPointMode": "null",
"percentage": false,
"pointradius": 5,
"points": false,
"renderer": "flot",
"repeat": null,
"seriesOverrides": [
],
"spaceLength": 10,
"span": 6,
"stack": false,
"steppedLine": false,
"targets": [
{
"expr": "100 - (avg by (cpu) (irate(node_cpu{job=\"node-exporter\", mode=\"idle\", instance=\"$instance\"}[5m])) * 100)\n",
"format": "time_series",
"intervalFactor": 10,
"legendFormat": "{{cpu}}",
"refId": "A"
}
],
"thresholds": [
],
"timeFrom": null,
"timeShift": null,
"title": "Idle CPU",
"tooltip": {
"shared": true,
"sort": 0,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": [
]
},
"yaxes": [
{
"format": "percent",
"label": null,
"logBase": 1,
"max": 100,
"min": 0,
"show": true
},
{
"format": "percent",
"label": null,
"logBase": 1,
"max": 100,
"min": 0,
"show": true
}
]
},
{
"aliasColors": {
},
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
"fill": 1,
"gridPos": {
},
"id": 3,
"legend": {
"alignAsTable": false,
"avg": false,
"current": false,
"max": false,
"min": false,
"rightSide": false,
"show": true,
"total": false,
"values": false
},
"lines": true,
"linewidth": 1,
"links": [
],
"nullPointMode": "null",
"percentage": false,
"pointradius": 5,
"points": false,
"renderer": "flot",
"repeat": null,
"seriesOverrides": [
],
"spaceLength": 10,
"span": 6,
"stack": false,
"steppedLine": false,
"targets": [
{
"expr": "node_load1{job=\"node-exporter\", instance=\"$instance\"} * 100",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "load 1m",
"refId": "A"
},
{
"expr": "node_load5{job=\"node-exporter\", instance=\"$instance\"} * 100",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "load 5m",
"refId": "B"
},
{
"expr": "node_load15{job=\"node-exporter\", instance=\"$instance\"} * 100",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "load 15m",
"refId": "C"
}
],
"thresholds": [
],
"timeFrom": null,
"timeShift": null,
"title": "System load",
"tooltip": {
"shared": true,
"sort": 0,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": [
]
},
"yaxes": [
{
"format": "percent",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
},
{
"format": "percent",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
}
]
}
],
"repeat": null,
"repeatIteration": null,
"repeatRowId": null,
"showTitle": false,
"title": "Dashboard Row",
"titleSize": "h6",
"type": "row"
},
{
"collapse": false,
"collapsed": false,
"height": "250px",
"panels": [
{
"aliasColors": {
},
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
"fill": 1,
"gridPos": {
},
"id": 4,
"legend": {
"alignAsTable": false,
"avg": false,
"current": false,
"max": false,
"min": false,
"rightSide": false,
"show": true,
"total": false,
"values": false
},
"lines": true,
"linewidth": 1,
"links": [
],
"nullPointMode": "null",
"percentage": false,
"pointradius": 5,
"points": false,
"renderer": "flot",
"repeat": null,
"seriesOverrides": [
],
"spaceLength": 10,
"span": 9,
"stack": false,
"steppedLine": false,
"targets": [
{
"expr": "node_memory_MemTotal{job=\"node-exporter\", instance=\"$instance\"}\n- node_memory_MemFree{job=\"node-exporter\", instance=\"$instance\"}\n- node_memory_Buffers{job=\"node-exporter\", instance=\"$instance\"}\n- node_memory_Cached{job=\"node-exporter\", instance=\"$instance\"}\n",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "memory used",
"refId": "A"
},
{
"expr": "node_memory_Buffers{job=\"node-exporter\", instance=\"$instance\"}",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "memory buffers",
"refId": "B"
},
{
"expr": "node_memory_Cached{job=\"node-exporter\", instance=\"$instance\"}",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "memory cached",
"refId": "C"
},
{
"expr": "node_memory_MemFree{job=\"node-exporter\", instance=\"$instance\"}",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "memory free",
"refId": "D"
}
],
"thresholds": [
],
"timeFrom": null,
"timeShift": null,
"title": "Memory Usage",
"tooltip": {
"shared": true,
"sort": 0,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": [
]
},
"yaxes": [
{
"format": "bytes",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
},
{
"format": "bytes",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
}
]
},
{
"cacheTimeout": null,
"colorBackground": false,
"colorValue": false,
"colors": [
"rgba(50, 172, 45, 0.97)",
"rgba(237, 129, 40, 0.89)",
"rgba(245, 54, 54, 0.9)"
],
"datasource": "$datasource",
"format": "percent",
"gauge": {
"maxValue": 100,
"minValue": 0,
"show": true,
"thresholdLabels": false,
"thresholdMarkers": true
},
"gridPos": {
},
"id": 5,
"interval": null,
"links": [
],
"mappingType": 1,
"mappingTypes": [
{
"name": "value to text",
"value": 1
},
{
"name": "range to text",
"value": 2
}
],
"maxDataPoints": 100,
"nullPointMode": "connected",
"nullText": null,
"postfix": "",
"postfixFontSize": "50%",
"prefix": "",
"prefixFontSize": "50%",
"rangeMaps": [
{
"from": "null",
"text": "N/A",
"to": "null"
}
],
"span": 3,
"sparkline": {
"fillColor": "rgba(31, 118, 189, 0.18)",
"full": false,
"lineColor": "rgb(31, 120, 193)",
"show": false
},
"tableColumn": "",
"targets": [
{
"expr": "(\n node_memory_MemTotal{job=\"node-exporter\", instance=\"$instance\"}\n- node_memory_MemFree{job=\"node-exporter\", instance=\"$instance\"}\n- node_memory_Buffers{job=\"node-exporter\", instance=\"$instance\"}\n- node_memory_Cached{job=\"node-exporter\", instance=\"$instance\"}\n) * 100\n /\nnode_memory_MemTotal{job=\"node-exporter\", instance=\"$instance\"}\n",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": ""
}
],
"thresholds": "80, 90",
"title": "Memory Usage",
"type": "singlestat",
"valueFontSize": "80%",
"valueMaps": [
{
"op": "=",
"text": "N/A",
"value": "null"
}
],
"valueName": "current"
}
],
"repeat": null,
"repeatIteration": null,
"repeatRowId": null,
"showTitle": false,
"title": "Dashboard Row",
"titleSize": "h6",
"type": "row"
},
{
"collapse": false,
"collapsed": false,
"height": "250px",
"panels": [
{
"aliasColors": {
},
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
"fill": 1,
"gridPos": {
},
"id": 6,
"legend": {
"alignAsTable": false,
"avg": false,
"current": false,
"max": false,
"min": false,
"rightSide": false,
"show": true,
"total": false,
"values": false
},
"lines": true,
"linewidth": 1,
"links": [
],
"nullPointMode": "null",
"percentage": false,
"pointradius": 5,
"points": false,
"renderer": "flot",
"repeat": null,
"seriesOverrides": [
{
"alias": "read",
"yaxis": 1
},
{
"alias": "io time",
"yaxis": 2
}
],
"spaceLength": 10,
"span": 9,
"stack": false,
"steppedLine": false,
"targets": [
{
"expr": "sum by (instance) (rate(node_disk_bytes_read{job=\"node-exporter\", instance=\"$instance\"}[2m]))",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "read",
"refId": "A"
},
{
"expr": "sum by (instance) (rate(node_disk_bytes_written{job=\"node-exporter\", instance=\"$instance\"}[2m]))",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "written",
"refId": "B"
},
{
"expr": "sum by (instance) (rate(node_disk_io_time_ms{job=\"node-exporter\", instance=\"$instance\"}[2m]))",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "io time",
"refId": "C"
}
],
"thresholds": [
],
"timeFrom": null,
"timeShift": null,
"title": "Disk I/O",
"tooltip": {
"shared": true,
"sort": 0,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": [
]
},
"yaxes": [
{
"format": "bytes",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
},
{
"format": "ms",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
}
]
},
{
"cacheTimeout": null,
"colorBackground": false,
"colorValue": false,
"colors": [
"rgba(50, 172, 45, 0.97)",
"rgba(237, 129, 40, 0.89)",
"rgba(245, 54, 54, 0.9)"
],
"datasource": "$datasource",
"format": "percent",
"gauge": {
"maxValue": 100,
"minValue": 0,
"show": true,
"thresholdLabels": false,
"thresholdMarkers": true
},
"gridPos": {
},
"id": 7,
"interval": null,
"links": [
],
"mappingType": 1,
"mappingTypes": [
{
"name": "value to text",
"value": 1
},
{
"name": "range to text",
"value": 2
}
],
"maxDataPoints": 100,
"nullPointMode": "connected",
"nullText": null,
"postfix": "",
"postfixFontSize": "50%",
"prefix": "",
"prefixFontSize": "50%",
"rangeMaps": [
{
"from": "null",
"text": "N/A",
"to": "null"
}
],
"span": 3,
"sparkline": {
"fillColor": "rgba(31, 118, 189, 0.18)",
"full": false,
"lineColor": "rgb(31, 120, 193)",
"show": false
},
"tableColumn": "",
"targets": [
{
"expr": "(\n sum(node_filesystem_size{job=\"node-exporter\", device!=\"rootfs\", instance=\"$instance\"})\n- sum(node_filesystem_avail{job=\"node-exporter\", device!=\"rootfs\", instance=\"$instance\"})\n) * 100\n /\nsum(node_filesystem_size{job=\"node-exporter\", device!=\"rootfs\", instance=\"$instance\"})\n",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": ""
}
],
"thresholds": "80, 90",
"title": "Disk Space Usage",
"type": "singlestat",
"valueFontSize": "80%",
"valueMaps": [
{
"op": "=",
"text": "N/A",
"value": "null"
}
],
"valueName": "current"
}
],
"repeat": null,
"repeatIteration": null,
"repeatRowId": null,
"showTitle": false,
"title": "Dashboard Row",
"titleSize": "h6",
"type": "row"
},
{
"collapse": false,
"collapsed": false,
"height": "250px",
"panels": [
{
"aliasColors": {
},
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
"fill": 1,
"gridPos": {
},
"id": 8,
"legend": {
"alignAsTable": false,
"avg": false,
"current": false,
"max": false,
"min": false,
"rightSide": false,
"show": true,
"total": false,
"values": false
},
"lines": true,
"linewidth": 1,
"links": [
],
"nullPointMode": "null",
"percentage": false,
"pointradius": 5,
"points": false,
"renderer": "flot",
"repeat": null,
"seriesOverrides": [
],
"spaceLength": 10,
"span": 6,
"stack": false,
"steppedLine": false,
"targets": [
{
"expr": "rate(node_network_receive_bytes{job=\"node-exporter\", instance=\"$instance\", device!\u007e\"lo\"}[5m])",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{device}}",
"refId": "A"
}
],
"thresholds": [
],
"timeFrom": null,
"timeShift": null,
"title": "Network Received",
"tooltip": {
"shared": true,
"sort": 0,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": [
]
},
"yaxes": [
{
"format": "bytes",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
},
{
"format": "bytes",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
}
]
},
{
"aliasColors": {
},
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
"fill": 1,
"gridPos": {
},
"id": 9,
"legend": {
"alignAsTable": false,
"avg": false,
"current": false,
"max": false,
"min": false,
"rightSide": false,
"show": true,
"total": false,
"values": false
},
"lines": true,
"linewidth": 1,
"links": [
],
"nullPointMode": "null",
"percentage": false,
"pointradius": 5,
"points": false,
"renderer": "flot",
"repeat": null,
"seriesOverrides": [
],
"spaceLength": 10,
"span": 6,
"stack": false,
"steppedLine": false,
"targets": [
{
"expr": "rate(node_network_transmit_bytes{job=\"node-exporter\", instance=\"$instance\", device!\u007e\"lo\"}[5m])",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{device}}",
"refId": "A"
}
],
"thresholds": [
],
"timeFrom": null,
"timeShift": null,
"title": "Network Transmitted",
"tooltip": {
"shared": true,
"sort": 0,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": [
]
},
"yaxes": [
{
"format": "bytes",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
},
{
"format": "bytes",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
}
]
}
],
"repeat": null,
"repeatIteration": null,
"repeatRowId": null,
"showTitle": false,
"title": "Dashboard Row",
"titleSize": "h6",
"type": "row"
}
],
"schemaVersion": 14,
"style": "dark",
"tags": [
],
"templating": {
"list": [
{
"current": {
"text": "Prometheus",
"value": "Prometheus"
},
"hide": 0,
"label": null,
"name": "datasource",
"options": [
],
"query": "prometheus",
"refresh": 1,
"regex": "",
"type": "datasource"
},
{
"allValue": null,
"current": {
},
"datasource": "$datasource",
"hide": 0,
"includeAll": false,
"label": null,
"multi": false,
"name": "instance",
"options": [
],
"query": "label_values(node_boot_time{job=\"node-exporter\"}, instance)",
"refresh": 2,
"regex": "",
"sort": 0,
"tagValuesQuery": "",
"tags": [
],
"tagsQuery": "",
"type": "query",
"useTags": false
}
]
},
"time": {
"from": "now-1h",
"to": "now"
},
"timepicker": {
"refresh_intervals": [
"5s",
"10s",
"30s",
"1m",
"5m",
"15m",
"30m",
"1h",
"2h",
"1d"
],
"time_options": [
"5m",
"15m",
"1h",
"6h",
"12h",
"24h",
"2d",
"7d",
"30d"
]
},
"timezone": "browser",
"title": "Nodes",
"uid": "fa49a4706d07a042595b664c87fb33ea",
"version": 0
}

File diff suppressed because it is too large Load Diff

View File

@ -1,483 +0,0 @@
{
"annotations": {
"list": [
]
},
"editable": false,
"gnetId": null,
"graphTooltip": 0,
"hideControls": false,
"id": null,
"links": [
],
"refresh": "",
"rows": [
{
"collapse": false,
"collapsed": false,
"height": "250px",
"panels": [
{
"aliasColors": {
},
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
"fill": 1,
"gridPos": {
},
"id": 2,
"legend": {
"alignAsTable": true,
"avg": true,
"current": true,
"max": false,
"min": false,
"rightSide": true,
"show": true,
"total": false,
"values": false
},
"lines": true,
"linewidth": 1,
"links": [
],
"nullPointMode": "null",
"percentage": false,
"pointradius": 5,
"points": false,
"renderer": "flot",
"repeat": null,
"seriesOverrides": [
],
"spaceLength": 10,
"span": 12,
"stack": false,
"steppedLine": false,
"targets": [
{
"expr": "sum by(container_name) (container_memory_working_set_bytes{job=\"kubelet\", namespace=\"$namespace\", pod_name=\"$pod\", container_name=\u007e\"$container\", container_name!=\"POD\"})",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "Current: {{ container_name }}",
"refId": "A"
},
{
"expr": "sum by(container) (kube_pod_container_resource_requests_memory_bytes{job=\"kubelet\", namespace=\"$namespace\", pod=\"$pod\", container=\u007e\"$container\", container!=\"POD\"})",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "Requested: {{ container }}",
"refId": "B"
},
{
"expr": "sum by(container) (kube_pod_container_resource_limits_memory_bytes{job=\"kubelet\", namespace=\"$namespace\", pod=\"$pod\", container=\u007e\"$container\", container!=\"POD\"})",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "Limit: {{ container }}",
"refId": "C"
}
],
"thresholds": [
],
"timeFrom": null,
"timeShift": null,
"title": "Memory Usage",
"tooltip": {
"shared": true,
"sort": 0,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": [
]
},
"yaxes": [
{
"format": "bytes",
"label": null,
"logBase": 1,
"max": null,
"min": 0,
"show": true
},
{
"format": "bytes",
"label": null,
"logBase": 1,
"max": null,
"min": 0,
"show": true
}
]
}
],
"repeat": null,
"repeatIteration": null,
"repeatRowId": null,
"showTitle": false,
"title": "Dashboard Row",
"titleSize": "h6",
"type": "row"
},
{
"collapse": false,
"collapsed": false,
"height": "250px",
"panels": [
{
"aliasColors": {
},
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
"fill": 1,
"gridPos": {
},
"id": 3,
"legend": {
"alignAsTable": true,
"avg": true,
"current": true,
"max": false,
"min": false,
"rightSide": true,
"show": true,
"total": false,
"values": false
},
"lines": true,
"linewidth": 1,
"links": [
],
"nullPointMode": "null",
"percentage": false,
"pointradius": 5,
"points": false,
"renderer": "flot",
"repeat": null,
"seriesOverrides": [
],
"spaceLength": 10,
"span": 12,
"stack": false,
"steppedLine": false,
"targets": [
{
"expr": "sum by (container_name) (rate(container_cpu_usage_seconds_total{job=\"kubelet\", image!=\"\",container_name!=\"POD\",pod_name=\"$pod\"}[1m]))",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{ container_name }}",
"refId": "A"
}
],
"thresholds": [
],
"timeFrom": null,
"timeShift": null,
"title": "CPU Usage",
"tooltip": {
"shared": true,
"sort": 0,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": [
]
},
"yaxes": [
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": 0,
"show": true
},
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": 0,
"show": true
}
]
}
],
"repeat": null,
"repeatIteration": null,
"repeatRowId": null,
"showTitle": false,
"title": "Dashboard Row",
"titleSize": "h6",
"type": "row"
},
{
"collapse": false,
"collapsed": false,
"height": "250px",
"panels": [
{
"aliasColors": {
},
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": "$datasource",
"fill": 1,
"gridPos": {
},
"id": 4,
"legend": {
"alignAsTable": true,
"avg": true,
"current": true,
"max": false,
"min": false,
"rightSide": true,
"show": true,
"total": false,
"values": false
},
"lines": true,
"linewidth": 1,
"links": [
],
"nullPointMode": "null",
"percentage": false,
"pointradius": 5,
"points": false,
"renderer": "flot",
"repeat": null,
"seriesOverrides": [
],
"spaceLength": 10,
"span": 12,
"stack": false,
"steppedLine": false,
"targets": [
{
"expr": "sort_desc(sum by (pod_name) (rate(container_network_receive_bytes_total{job=\"kubelet\", pod_name=\"$pod\"}[1m])))",
"format": "time_series",
"intervalFactor": 2,
"legendFormat": "{{ pod_name }}",
"refId": "A"
}
],
"thresholds": [
],
"timeFrom": null,
"timeShift": null,
"title": "Network I/O",
"tooltip": {
"shared": true,
"sort": 0,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": [
]
},
"yaxes": [
{
"format": "bytes",
"label": null,
"logBase": 1,
"max": null,
"min": 0,
"show": true
},
{
"format": "bytes",
"label": null,
"logBase": 1,
"max": null,
"min": 0,
"show": true
}
]
}
],
"repeat": null,
"repeatIteration": null,
"repeatRowId": null,
"showTitle": false,
"title": "Dashboard Row",
"titleSize": "h6",
"type": "row"
}
],
"schemaVersion": 14,
"style": "dark",
"tags": [
],
"templating": {
"list": [
{
"current": {
"text": "Prometheus",
"value": "Prometheus"
},
"hide": 0,
"label": null,
"name": "datasource",
"options": [
],
"query": "prometheus",
"refresh": 1,
"regex": "",
"type": "datasource"
},
{
"allValue": null,
"current": {
},
"datasource": "$datasource",
"hide": 0,
"includeAll": false,
"label": "Namespace",
"multi": false,
"name": "namespace",
"options": [
],
"query": "label_values(kube_pod_info, namespace)",
"refresh": 2,
"regex": "",
"sort": 0,
"tagValuesQuery": "",
"tags": [
],
"tagsQuery": "",
"type": "query",
"useTags": false
},
{
"allValue": null,
"current": {
},
"datasource": "$datasource",
"hide": 0,
"includeAll": false,
"label": "Pod",
"multi": false,
"name": "pod",
"options": [
],
"query": "label_values(kube_pod_info{namespace=\u007e\"$namespace\"}, pod)",
"refresh": 2,
"regex": "",
"sort": 0,
"tagValuesQuery": "",
"tags": [
],
"tagsQuery": "",
"type": "query",
"useTags": false
},
{
"allValue": null,
"current": {
},
"datasource": "$datasource",
"hide": 0,
"includeAll": true,
"label": "Container",
"multi": false,
"name": "container",
"options": [
],
"query": "label_values(kube_pod_container_info{namespace=\"$namespace\", pod=\"$pod\"}, container)",
"refresh": 2,
"regex": "",
"sort": 0,
"tagValuesQuery": "",
"tags": [
],
"tagsQuery": "",
"type": "query",
"useTags": false
}
]
},
"time": {
"from": "now-1h",
"to": "now"
},
"timepicker": {
"refresh_intervals": [
"5s",
"10s",
"30s",
"1m",
"5m",
"15m",
"30m",
"1h",
"2h",
"1d"
],
"time_options": [
"5m",
"15m",
"1h",
"6h",
"12h",
"24h",
"2d",
"7d",
"30d"
]
},
"timezone": "browser",
"title": "Pods",
"uid": "ab4f13a9892a76a4d21ce8c2445bf4ea",
"version": 0
}

View File

@ -33,9 +33,6 @@ spec:
- --path.procfs=/host/proc
- --path.sysfs=/host/sys
resources:
limits:
cpu: 102m
memory: 180Mi
requests:
cpu: 102m
memory: 180Mi
@ -56,9 +53,6 @@ spec:
hostPort: 9100
name: https
resources:
limits:
cpu: 20m
memory: 40Mi
requests:
cpu: 10m
memory: 20Mi