mirror of
https://github.com/tencentmusic/cube-studio.git
synced 2025-02-11 14:34:22 +08:00
修正istio prometheus部署
This commit is contained in:
parent
7ad80bcc0f
commit
0b6b2aa03d
@ -15,8 +15,8 @@
|
||||
"editable": true,
|
||||
"gnetId": null,
|
||||
"graphTooltip": 0,
|
||||
"id": 22,
|
||||
"iteration": 1634283475928,
|
||||
"id": 17,
|
||||
"iteration": 1655264819796,
|
||||
"links": [],
|
||||
"panels": [
|
||||
{
|
||||
@ -24,101 +24,20 @@
|
||||
"bars": false,
|
||||
"dashLength": 10,
|
||||
"dashes": false,
|
||||
"datasource": "Prometheus-istio",
|
||||
"datasource": "prometheus",
|
||||
"fieldConfig": {
|
||||
"defaults": {},
|
||||
"overrides": []
|
||||
},
|
||||
"fill": 1,
|
||||
"fillGradient": 0,
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
"w": 24,
|
||||
"w": 12,
|
||||
"x": 0,
|
||||
"y": 0
|
||||
},
|
||||
"id": 3,
|
||||
"legend": {
|
||||
"alignAsTable": true,
|
||||
"avg": false,
|
||||
"current": false,
|
||||
"max": true,
|
||||
"min": false,
|
||||
"rightSide": true,
|
||||
"show": true,
|
||||
"total": false,
|
||||
"values": true
|
||||
},
|
||||
"lines": true,
|
||||
"linewidth": 1,
|
||||
"links": [],
|
||||
"nullPointMode": "null",
|
||||
"paceLength": 10,
|
||||
"percentage": false,
|
||||
"pointradius": 2,
|
||||
"points": false,
|
||||
"renderer": "flot",
|
||||
"seriesOverrides": [],
|
||||
"stack": false,
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum by (destination_service_name) (istio_requests_total{destination_service_namespace=\"$namespace\"})",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 1,
|
||||
"legendFormat": "{{destination_service_name}}",
|
||||
"refId": "B"
|
||||
}
|
||||
],
|
||||
"thresholds": [],
|
||||
"timeFrom": null,
|
||||
"timeRegions": [],
|
||||
"timeShift": null,
|
||||
"title": "全部服务监控",
|
||||
"tooltip": {
|
||||
"shared": true,
|
||||
"sort": 1,
|
||||
"value_type": "individual"
|
||||
},
|
||||
"type": "graph",
|
||||
"xaxis": {
|
||||
"buckets": null,
|
||||
"mode": "time",
|
||||
"name": null,
|
||||
"show": true,
|
||||
"values": []
|
||||
},
|
||||
"yaxes": [
|
||||
{
|
||||
"format": "short",
|
||||
"label": null,
|
||||
"logBase": 1,
|
||||
"max": null,
|
||||
"min": null,
|
||||
"show": true
|
||||
},
|
||||
{
|
||||
"format": "short",
|
||||
"label": null,
|
||||
"logBase": 1,
|
||||
"max": null,
|
||||
"min": null,
|
||||
"show": true
|
||||
}
|
||||
],
|
||||
"yaxis": {
|
||||
"align": false,
|
||||
"alignLevel": null
|
||||
}
|
||||
},
|
||||
{
|
||||
"aliasColors": {},
|
||||
"bars": false,
|
||||
"dashLength": 10,
|
||||
"dashes": false,
|
||||
"datasource": "Prometheus-istio",
|
||||
"fill": 1,
|
||||
"gridPos": {
|
||||
"h": 7,
|
||||
"w": 24,
|
||||
"x": 0,
|
||||
"y": 8
|
||||
},
|
||||
"hiddenSeries": false,
|
||||
"id": 2,
|
||||
"legend": {
|
||||
"alignAsTable": true,
|
||||
@ -126,7 +45,7 @@
|
||||
"current": false,
|
||||
"max": true,
|
||||
"min": false,
|
||||
"rightSide": true,
|
||||
"rightSide": false,
|
||||
"show": true,
|
||||
"total": false,
|
||||
"values": true
|
||||
@ -135,29 +54,44 @@
|
||||
"linewidth": 1,
|
||||
"links": [],
|
||||
"nullPointMode": "null",
|
||||
"options": {
|
||||
"alertThreshold": true
|
||||
},
|
||||
"paceLength": 10,
|
||||
"percentage": false,
|
||||
"pluginVersion": "7.5.2",
|
||||
"pointradius": 2,
|
||||
"points": false,
|
||||
"renderer": "flot",
|
||||
"seriesOverrides": [],
|
||||
"spaceLength": 10,
|
||||
"stack": false,
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"exemplar": true,
|
||||
"expr": "sum by (destination_service_name) (istio_requests_total{destination_service_namespace=\"$namespace\",destination_service_name=~\"($service)\"})",
|
||||
"format": "time_series",
|
||||
"hide": false,
|
||||
"hide": true,
|
||||
"interval": "",
|
||||
"intervalFactor": 1,
|
||||
"legendFormat": "{{destination_service_name}}",
|
||||
"refId": "B"
|
||||
},
|
||||
{
|
||||
"exemplar": true,
|
||||
"expr": "sum by (destination_service_name) (irate(istio_requests_total{destination_service_namespace=\"$namespace\",destination_service_name=~\"($service)\"}[1m]))",
|
||||
"hide": false,
|
||||
"interval": "",
|
||||
"legendFormat": "{{destination_service_name}}",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"thresholds": [],
|
||||
"timeFrom": null,
|
||||
"timeRegions": [],
|
||||
"timeShift": null,
|
||||
"title": "服务请求量",
|
||||
"title": "域名服务平均请求量(1分钟内)",
|
||||
"tooltip": {
|
||||
"shared": true,
|
||||
"sort": 1,
|
||||
@ -199,14 +133,20 @@
|
||||
"bars": false,
|
||||
"dashLength": 10,
|
||||
"dashes": false,
|
||||
"datasource": "Prometheus-istio",
|
||||
"fill": 1,
|
||||
"gridPos": {
|
||||
"h": 7,
|
||||
"w": 24,
|
||||
"x": 0,
|
||||
"y": 15
|
||||
"datasource": "prometheus",
|
||||
"fieldConfig": {
|
||||
"defaults": {},
|
||||
"overrides": []
|
||||
},
|
||||
"fill": 1,
|
||||
"fillGradient": 0,
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 12,
|
||||
"y": 0
|
||||
},
|
||||
"hiddenSeries": false,
|
||||
"id": 4,
|
||||
"legend": {
|
||||
"alignAsTable": true,
|
||||
@ -214,7 +154,7 @@
|
||||
"current": false,
|
||||
"max": true,
|
||||
"min": false,
|
||||
"rightSide": true,
|
||||
"rightSide": false,
|
||||
"show": true,
|
||||
"total": false,
|
||||
"values": true
|
||||
@ -223,19 +163,26 @@
|
||||
"linewidth": 1,
|
||||
"links": [],
|
||||
"nullPointMode": "null",
|
||||
"options": {
|
||||
"alertThreshold": true
|
||||
},
|
||||
"paceLength": 10,
|
||||
"percentage": false,
|
||||
"pluginVersion": "7.5.2",
|
||||
"pointradius": 2,
|
||||
"points": false,
|
||||
"renderer": "flot",
|
||||
"seriesOverrides": [],
|
||||
"spaceLength": 10,
|
||||
"stack": false,
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum by (destination_service_name) (istio_request_bytes_sum{destination_service_namespace=\"$namespace\",destination_service_name=~\"($service)\"})",
|
||||
"exemplar": true,
|
||||
"expr": "sum by (destination_service_name) (irate(istio_request_bytes_sum{destination_service_namespace=\"$namespace\",destination_service_name=~\"($service)\"}[1m]))",
|
||||
"format": "time_series",
|
||||
"hide": false,
|
||||
"interval": "",
|
||||
"intervalFactor": 1,
|
||||
"legendFormat": "{{destination_service_name}}",
|
||||
"refId": "B"
|
||||
@ -245,7 +192,7 @@
|
||||
"timeFrom": null,
|
||||
"timeRegions": [],
|
||||
"timeShift": null,
|
||||
"title": "服务吞吐",
|
||||
"title": "域名服务吞吐率(1分钟)",
|
||||
"tooltip": {
|
||||
"shared": true,
|
||||
"sort": 1,
|
||||
@ -281,10 +228,529 @@
|
||||
"align": false,
|
||||
"alignLevel": null
|
||||
}
|
||||
},
|
||||
{
|
||||
"aliasColors": {},
|
||||
"bars": false,
|
||||
"dashLength": 10,
|
||||
"dashes": false,
|
||||
"datasource": "prometheus",
|
||||
"decimals": null,
|
||||
"fieldConfig": {
|
||||
"defaults": {},
|
||||
"overrides": []
|
||||
},
|
||||
"fill": 1,
|
||||
"fillGradient": 0,
|
||||
"gridPos": {
|
||||
"h": 10,
|
||||
"w": 6,
|
||||
"x": 0,
|
||||
"y": 8
|
||||
},
|
||||
"hiddenSeries": false,
|
||||
"id": 6,
|
||||
"legend": {
|
||||
"alignAsTable": true,
|
||||
"avg": true,
|
||||
"current": false,
|
||||
"max": true,
|
||||
"min": false,
|
||||
"rightSide": false,
|
||||
"show": true,
|
||||
"total": false,
|
||||
"values": true
|
||||
},
|
||||
"lines": true,
|
||||
"linewidth": 1,
|
||||
"links": [],
|
||||
"nullPointMode": "null",
|
||||
"options": {
|
||||
"alertThreshold": true
|
||||
},
|
||||
"paceLength": 10,
|
||||
"percentage": false,
|
||||
"pluginVersion": "7.5.2",
|
||||
"pointradius": 5,
|
||||
"points": false,
|
||||
"renderer": "flot",
|
||||
"repeat": null,
|
||||
"seriesOverrides": [],
|
||||
"spaceLength": 10,
|
||||
"stack": false,
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"exemplar": true,
|
||||
"expr": "sum by (pod) (container_memory_working_set_bytes{job=\"kubelet\", image!=\"\",container_name!=\"POD\",pod=~\".*$service.*\"})",
|
||||
"format": "time_series",
|
||||
"interval": "",
|
||||
"intervalFactor": 2,
|
||||
"legendFormat": "{{ pod}}",
|
||||
"refId": "B"
|
||||
}
|
||||
],
|
||||
"thresholds": [],
|
||||
"timeFrom": null,
|
||||
"timeRegions": [],
|
||||
"timeShift": null,
|
||||
"title": "Memory Usage",
|
||||
"tooltip": {
|
||||
"shared": true,
|
||||
"sort": 1,
|
||||
"value_type": "individual"
|
||||
},
|
||||
"type": "graph",
|
||||
"xaxis": {
|
||||
"buckets": null,
|
||||
"mode": "time",
|
||||
"name": null,
|
||||
"show": true,
|
||||
"values": []
|
||||
},
|
||||
"yaxes": [
|
||||
{
|
||||
"$$hashKey": "object:204",
|
||||
"format": "bytes",
|
||||
"label": null,
|
||||
"logBase": 1,
|
||||
"max": null,
|
||||
"min": 0,
|
||||
"show": true
|
||||
},
|
||||
{
|
||||
"$$hashKey": "object:205",
|
||||
"format": "bytes",
|
||||
"label": null,
|
||||
"logBase": 1,
|
||||
"max": null,
|
||||
"min": 0,
|
||||
"show": false
|
||||
}
|
||||
],
|
||||
"yaxis": {
|
||||
"align": false,
|
||||
"alignLevel": null
|
||||
}
|
||||
},
|
||||
{
|
||||
"aliasColors": {},
|
||||
"bars": false,
|
||||
"dashLength": 10,
|
||||
"dashes": false,
|
||||
"datasource": "prometheus",
|
||||
"fieldConfig": {
|
||||
"defaults": {},
|
||||
"overrides": []
|
||||
},
|
||||
"fill": 1,
|
||||
"fillGradient": 0,
|
||||
"gridPos": {
|
||||
"h": 10,
|
||||
"w": 6,
|
||||
"x": 6,
|
||||
"y": 8
|
||||
},
|
||||
"hiddenSeries": false,
|
||||
"id": 8,
|
||||
"legend": {
|
||||
"alignAsTable": true,
|
||||
"avg": true,
|
||||
"current": false,
|
||||
"hideEmpty": false,
|
||||
"max": true,
|
||||
"min": false,
|
||||
"rightSide": false,
|
||||
"show": true,
|
||||
"total": false,
|
||||
"values": true
|
||||
},
|
||||
"lines": true,
|
||||
"linewidth": 1,
|
||||
"links": [],
|
||||
"nullPointMode": "null",
|
||||
"options": {
|
||||
"alertThreshold": true
|
||||
},
|
||||
"paceLength": 10,
|
||||
"percentage": false,
|
||||
"pluginVersion": "7.5.2",
|
||||
"pointradius": 5,
|
||||
"points": false,
|
||||
"renderer": "flot",
|
||||
"repeat": null,
|
||||
"seriesOverrides": [],
|
||||
"spaceLength": 10,
|
||||
"stack": false,
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"exemplar": true,
|
||||
"expr": "sum by (pod) (rate(container_cpu_usage_seconds_total{job=\"kubelet\", image!=\"\",container_name!=\"POD\",pod=~\".*$service.*\"}[5m]))",
|
||||
"format": "time_series",
|
||||
"instant": false,
|
||||
"interval": "30s",
|
||||
"intervalFactor": 1,
|
||||
"legendFormat": "{{pod}}",
|
||||
"refId": "A"
|
||||
},
|
||||
{
|
||||
"expr": "",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 1,
|
||||
"refId": "B"
|
||||
}
|
||||
],
|
||||
"thresholds": [],
|
||||
"timeFrom": null,
|
||||
"timeRegions": [],
|
||||
"timeShift": null,
|
||||
"title": "CPU Usage",
|
||||
"tooltip": {
|
||||
"shared": true,
|
||||
"sort": 1,
|
||||
"value_type": "individual"
|
||||
},
|
||||
"type": "graph",
|
||||
"xaxis": {
|
||||
"buckets": null,
|
||||
"mode": "time",
|
||||
"name": null,
|
||||
"show": true,
|
||||
"values": []
|
||||
},
|
||||
"yaxes": [
|
||||
{
|
||||
"$$hashKey": "object:483",
|
||||
"format": "short",
|
||||
"label": null,
|
||||
"logBase": 1,
|
||||
"max": null,
|
||||
"min": 0,
|
||||
"show": true
|
||||
},
|
||||
{
|
||||
"$$hashKey": "object:484",
|
||||
"format": "short",
|
||||
"label": null,
|
||||
"logBase": 1,
|
||||
"max": null,
|
||||
"min": 0,
|
||||
"show": false
|
||||
}
|
||||
],
|
||||
"yaxis": {
|
||||
"align": false,
|
||||
"alignLevel": null
|
||||
}
|
||||
},
|
||||
{
|
||||
"aliasColors": {},
|
||||
"bars": false,
|
||||
"dashLength": 10,
|
||||
"dashes": false,
|
||||
"datasource": "prometheus",
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"links": []
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"fill": 1,
|
||||
"fillGradient": 0,
|
||||
"gridPos": {
|
||||
"h": 10,
|
||||
"w": 6,
|
||||
"x": 12,
|
||||
"y": 8
|
||||
},
|
||||
"hiddenSeries": false,
|
||||
"id": 12,
|
||||
"legend": {
|
||||
"alignAsTable": true,
|
||||
"avg": true,
|
||||
"current": false,
|
||||
"max": true,
|
||||
"min": false,
|
||||
"rightSide": false,
|
||||
"show": true,
|
||||
"sort": "avg",
|
||||
"sortDesc": true,
|
||||
"total": false,
|
||||
"values": true
|
||||
},
|
||||
"lines": true,
|
||||
"linewidth": 2,
|
||||
"links": [],
|
||||
"nullPointMode": "null",
|
||||
"options": {
|
||||
"alertThreshold": true
|
||||
},
|
||||
"paceLength": 10,
|
||||
"percentage": false,
|
||||
"pluginVersion": "7.5.2",
|
||||
"pointradius": 2,
|
||||
"points": false,
|
||||
"renderer": "flot",
|
||||
"seriesOverrides": [],
|
||||
"spaceLength": 10,
|
||||
"stack": false,
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"exemplar": true,
|
||||
"expr": "DCGM_FI_DEV_MEM_COPY_UTIL{exported_pod=~\".*$service.*\"}",
|
||||
"format": "time_series",
|
||||
"interval": "",
|
||||
"intervalFactor": 1,
|
||||
"legendFormat": "{{exported_pod}} GPU {{gpu}}",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"thresholds": [],
|
||||
"timeFrom": null,
|
||||
"timeRegions": [],
|
||||
"timeShift": null,
|
||||
"title": "pod GPU 显存占用率",
|
||||
"tooltip": {
|
||||
"shared": true,
|
||||
"sort": 2,
|
||||
"value_type": "cumulative"
|
||||
},
|
||||
"type": "graph",
|
||||
"xaxis": {
|
||||
"buckets": null,
|
||||
"mode": "time",
|
||||
"name": null,
|
||||
"show": true,
|
||||
"values": []
|
||||
},
|
||||
"yaxes": [
|
||||
{
|
||||
"format": "percent",
|
||||
"label": null,
|
||||
"logBase": 1,
|
||||
"max": "100",
|
||||
"min": "0",
|
||||
"show": true
|
||||
},
|
||||
{
|
||||
"format": "short",
|
||||
"label": null,
|
||||
"logBase": 1,
|
||||
"max": null,
|
||||
"min": null,
|
||||
"show": true
|
||||
}
|
||||
],
|
||||
"yaxis": {
|
||||
"align": false,
|
||||
"alignLevel": null
|
||||
}
|
||||
},
|
||||
{
|
||||
"aliasColors": {},
|
||||
"bars": false,
|
||||
"dashLength": 10,
|
||||
"dashes": false,
|
||||
"datasource": "prometheus",
|
||||
"fieldConfig": {
|
||||
"defaults": {},
|
||||
"overrides": []
|
||||
},
|
||||
"fill": 1,
|
||||
"fillGradient": 0,
|
||||
"gridPos": {
|
||||
"h": 10,
|
||||
"w": 6,
|
||||
"x": 18,
|
||||
"y": 8
|
||||
},
|
||||
"hiddenSeries": false,
|
||||
"id": 10,
|
||||
"legend": {
|
||||
"alignAsTable": true,
|
||||
"avg": true,
|
||||
"current": false,
|
||||
"hideEmpty": false,
|
||||
"max": true,
|
||||
"min": false,
|
||||
"rightSide": false,
|
||||
"show": true,
|
||||
"total": false,
|
||||
"values": true
|
||||
},
|
||||
"lines": true,
|
||||
"linewidth": 1,
|
||||
"links": [],
|
||||
"nullPointMode": "null",
|
||||
"options": {
|
||||
"alertThreshold": true
|
||||
},
|
||||
"paceLength": 10,
|
||||
"percentage": false,
|
||||
"pluginVersion": "7.5.2",
|
||||
"pointradius": 5,
|
||||
"points": false,
|
||||
"renderer": "flot",
|
||||
"seriesOverrides": [],
|
||||
"spaceLength": 10,
|
||||
"stack": false,
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"exemplar": true,
|
||||
"expr": "DCGM_FI_DEV_GPU_UTIL{exported_pod=~\".*$service.*\"}",
|
||||
"format": "time_series",
|
||||
"instant": false,
|
||||
"interval": "30s",
|
||||
"intervalFactor": 1,
|
||||
"legendFormat": "{{exported_pod}}",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"thresholds": [],
|
||||
"timeFrom": null,
|
||||
"timeRegions": [],
|
||||
"timeShift": null,
|
||||
"title": "GPU 使用率",
|
||||
"tooltip": {
|
||||
"shared": true,
|
||||
"sort": 1,
|
||||
"value_type": "individual"
|
||||
},
|
||||
"type": "graph",
|
||||
"xaxis": {
|
||||
"buckets": null,
|
||||
"mode": "time",
|
||||
"name": null,
|
||||
"show": true,
|
||||
"values": []
|
||||
},
|
||||
"yaxes": [
|
||||
{
|
||||
"format": "short",
|
||||
"label": null,
|
||||
"logBase": 1,
|
||||
"max": null,
|
||||
"min": 0,
|
||||
"show": true
|
||||
},
|
||||
{
|
||||
"format": "short",
|
||||
"label": null,
|
||||
"logBase": 1,
|
||||
"max": null,
|
||||
"min": 0,
|
||||
"show": false
|
||||
}
|
||||
],
|
||||
"yaxis": {
|
||||
"align": false,
|
||||
"alignLevel": null
|
||||
}
|
||||
},
|
||||
{
|
||||
"aliasColors": {},
|
||||
"bars": false,
|
||||
"dashLength": 10,
|
||||
"dashes": false,
|
||||
"datasource": "prometheus",
|
||||
"fieldConfig": {
|
||||
"defaults": {},
|
||||
"overrides": []
|
||||
},
|
||||
"fill": 1,
|
||||
"fillGradient": 0,
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
"w": 24,
|
||||
"x": 0,
|
||||
"y": 18
|
||||
},
|
||||
"hiddenSeries": false,
|
||||
"id": 3,
|
||||
"legend": {
|
||||
"alignAsTable": true,
|
||||
"avg": false,
|
||||
"current": false,
|
||||
"max": true,
|
||||
"min": false,
|
||||
"rightSide": true,
|
||||
"show": true,
|
||||
"total": false,
|
||||
"values": true
|
||||
},
|
||||
"lines": true,
|
||||
"linewidth": 1,
|
||||
"links": [],
|
||||
"nullPointMode": "null",
|
||||
"options": {
|
||||
"alertThreshold": true
|
||||
},
|
||||
"paceLength": 10,
|
||||
"percentage": false,
|
||||
"pluginVersion": "7.5.2",
|
||||
"pointradius": 2,
|
||||
"points": false,
|
||||
"renderer": "flot",
|
||||
"seriesOverrides": [],
|
||||
"spaceLength": 10,
|
||||
"stack": false,
|
||||
"steppedLine": false,
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sum by (destination_service_name) (istio_requests_total{destination_service_namespace=\"$namespace\"})",
|
||||
"format": "time_series",
|
||||
"intervalFactor": 1,
|
||||
"legendFormat": "{{destination_service_name}}",
|
||||
"refId": "B"
|
||||
}
|
||||
],
|
||||
"thresholds": [],
|
||||
"timeFrom": null,
|
||||
"timeRegions": [],
|
||||
"timeShift": null,
|
||||
"title": "全部域名服务监控",
|
||||
"tooltip": {
|
||||
"shared": true,
|
||||
"sort": 1,
|
||||
"value_type": "individual"
|
||||
},
|
||||
"type": "graph",
|
||||
"xaxis": {
|
||||
"buckets": null,
|
||||
"mode": "time",
|
||||
"name": null,
|
||||
"show": true,
|
||||
"values": []
|
||||
},
|
||||
"yaxes": [
|
||||
{
|
||||
"format": "short",
|
||||
"label": null,
|
||||
"logBase": 1,
|
||||
"max": null,
|
||||
"min": null,
|
||||
"show": true
|
||||
},
|
||||
{
|
||||
"format": "short",
|
||||
"label": null,
|
||||
"logBase": 1,
|
||||
"max": null,
|
||||
"min": null,
|
||||
"show": true
|
||||
}
|
||||
],
|
||||
"yaxis": {
|
||||
"align": false,
|
||||
"alignLevel": null
|
||||
}
|
||||
}
|
||||
],
|
||||
"refresh": false,
|
||||
"schemaVersion": 18,
|
||||
"schemaVersion": 27,
|
||||
"style": "dark",
|
||||
"tags": [],
|
||||
"templating": {
|
||||
@ -292,19 +758,24 @@
|
||||
{
|
||||
"allValue": null,
|
||||
"current": {
|
||||
"selected": true,
|
||||
"selected": false,
|
||||
"text": "service",
|
||||
"value": "service"
|
||||
},
|
||||
"datasource": "Prometheus-istio",
|
||||
"datasource": "prometheus",
|
||||
"definition": "label_values(galley_istio_networking_virtualservices, namespace)",
|
||||
"description": null,
|
||||
"error": null,
|
||||
"hide": 0,
|
||||
"includeAll": true,
|
||||
"label": null,
|
||||
"multi": false,
|
||||
"name": "namespace",
|
||||
"options": [],
|
||||
"query": "label_values(galley_istio_networking_virtualservices, namespace)",
|
||||
"query": {
|
||||
"query": "label_values(galley_istio_networking_virtualservices, namespace)",
|
||||
"refId": "Prometheus-istio-namespace-Variable-Query"
|
||||
},
|
||||
"refresh": 1,
|
||||
"regex": "",
|
||||
"skipUrlSync": false,
|
||||
@ -322,15 +793,20 @@
|
||||
"text": "swallow-mysql",
|
||||
"value": "swallow-mysql"
|
||||
},
|
||||
"datasource": "Prometheus-istio",
|
||||
"datasource": "prometheus",
|
||||
"definition": "label_values(istio_requests_total{destination_service_namespace=\"$namespace\"}, destination_service_name)",
|
||||
"description": null,
|
||||
"error": null,
|
||||
"hide": 0,
|
||||
"includeAll": true,
|
||||
"label": null,
|
||||
"multi": true,
|
||||
"name": "service",
|
||||
"options": [],
|
||||
"query": "label_values(istio_requests_total{destination_service_namespace=\"$namespace\"}, destination_service_name)",
|
||||
"query": {
|
||||
"query": "label_values(istio_requests_total{destination_service_namespace=\"$namespace\"}, destination_service_name)",
|
||||
"refId": "Prometheus-istio-service-Variable-Query"
|
||||
},
|
||||
"refresh": 1,
|
||||
"regex": "",
|
||||
"skipUrlSync": false,
|
||||
@ -375,5 +851,5 @@
|
||||
"timezone": "",
|
||||
"title": "istio service",
|
||||
"uid": "istio-service",
|
||||
"version": 40
|
||||
"version": 19
|
||||
}
|
@ -1,178 +0,0 @@
|
||||
|
||||
|
||||
# Scrape config for envoy stats
|
||||
- job_name: 'envoy-stats'
|
||||
metrics_path: /stats/prometheus
|
||||
kubernetes_sd_configs:
|
||||
- role: pod
|
||||
|
||||
relabel_configs:
|
||||
- source_labels: [__meta_kubernetes_pod_container_port_name]
|
||||
action: keep
|
||||
regex: '.*-envoy-prom'
|
||||
- source_labels: [__address__, __meta_kubernetes_pod_annotation_prometheus_io_port]
|
||||
action: replace
|
||||
regex: ([^:]+)(?::\d+)?;(\d+)
|
||||
replacement: $1:15090
|
||||
target_label: __address__
|
||||
- action: labelmap
|
||||
regex: __meta_kubernetes_pod_label_(.+)
|
||||
- source_labels: [__meta_kubernetes_namespace]
|
||||
action: replace
|
||||
target_label: namespace
|
||||
- source_labels: [__meta_kubernetes_pod_name]
|
||||
action: replace
|
||||
target_label: pod_name
|
||||
|
||||
metric_relabel_configs:
|
||||
# Exclude some of the envoy metrics that have massive cardinality
|
||||
# This list may need to be pruned further moving forward, as informed
|
||||
# by performance and scalability testing.
|
||||
- source_labels: [ cluster_name ]
|
||||
regex: '(outbound|inbound|prometheus_stats).*'
|
||||
action: drop
|
||||
- source_labels: [ tcp_prefix ]
|
||||
regex: '(outbound|inbound|prometheus_stats).*'
|
||||
action: drop
|
||||
- source_labels: [ listener_address ]
|
||||
regex: '(.+)'
|
||||
action: drop
|
||||
- source_labels: [ http_conn_manager_listener_prefix ]
|
||||
regex: '(.+)'
|
||||
action: drop
|
||||
- source_labels: [ http_conn_manager_prefix ]
|
||||
regex: '(.+)'
|
||||
action: drop
|
||||
- source_labels: [ __name__ ]
|
||||
regex: 'envoy_tls.*'
|
||||
action: drop
|
||||
- source_labels: [ __name__ ]
|
||||
regex: 'envoy_tcp_downstream.*'
|
||||
action: drop
|
||||
- source_labels: [ __name__ ]
|
||||
regex: 'envoy_http_(stats|admin).*'
|
||||
action: drop
|
||||
- source_labels: [ __name__ ]
|
||||
regex: 'envoy_cluster_(lb|retry|bind|internal|max|original).*'
|
||||
action: drop
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
- job_name: 'kubernetes-cadvisor'
|
||||
scheme: https
|
||||
tls_config:
|
||||
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
|
||||
bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
|
||||
kubernetes_sd_configs:
|
||||
- role: node
|
||||
relabel_configs:
|
||||
- action: labelmap
|
||||
regex: __meta_kubernetes_node_label_(.+)
|
||||
- target_label: __address__
|
||||
replacement: kubernetes.default.svc:443
|
||||
- source_labels: [__meta_kubernetes_node_name]
|
||||
regex: (.+)
|
||||
target_label: __metrics_path__
|
||||
replacement: /api/v1/nodes/${1}/proxy/metrics/cadvisor
|
||||
|
||||
# scrape config for service endpoints.
|
||||
- job_name: 'kubernetes-service-endpoints'
|
||||
kubernetes_sd_configs:
|
||||
- role: endpoints
|
||||
relabel_configs:
|
||||
- source_labels: [__meta_kubernetes_service_annotation_prometheus_io_scrape]
|
||||
action: keep
|
||||
regex: true
|
||||
- source_labels: [__meta_kubernetes_service_annotation_prometheus_io_scheme]
|
||||
action: replace
|
||||
target_label: __scheme__
|
||||
regex: (https?)
|
||||
- source_labels: [__meta_kubernetes_service_annotation_prometheus_io_path]
|
||||
action: replace
|
||||
target_label: __metrics_path__
|
||||
regex: (.+)
|
||||
- source_labels: [__address__, __meta_kubernetes_service_annotation_prometheus_io_port]
|
||||
action: replace
|
||||
target_label: __address__
|
||||
regex: ([^:]+)(?::\d+)?;(\d+)
|
||||
replacement: $1:$2
|
||||
- action: labelmap
|
||||
regex: __meta_kubernetes_service_label_(.+)
|
||||
- source_labels: [__meta_kubernetes_namespace]
|
||||
action: replace
|
||||
target_label: kubernetes_namespace
|
||||
- source_labels: [__meta_kubernetes_service_name]
|
||||
action: replace
|
||||
target_label: kubernetes_name
|
||||
|
||||
- job_name: 'kubernetes-pods'
|
||||
kubernetes_sd_configs:
|
||||
- role: pod
|
||||
relabel_configs: # If first two labels are present, pod should be scraped by the istio-secure job.
|
||||
- source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_scrape]
|
||||
action: keep
|
||||
regex: true
|
||||
- source_labels: [__meta_kubernetes_pod_annotation_sidecar_istio_io_status]
|
||||
action: drop
|
||||
regex: (.+)
|
||||
- source_labels: [__meta_kubernetes_pod_annotation_istio_mtls]
|
||||
action: drop
|
||||
regex: (true)
|
||||
- source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_path]
|
||||
action: replace
|
||||
target_label: __metrics_path__
|
||||
regex: (.+)
|
||||
- source_labels: [__address__, __meta_kubernetes_pod_annotation_prometheus_io_port]
|
||||
action: replace
|
||||
regex: ([^:]+)(?::\d+)?;(\d+)
|
||||
replacement: $1:$2
|
||||
target_label: __address__
|
||||
- action: labelmap
|
||||
regex: __meta_kubernetes_pod_label_(.+)
|
||||
- source_labels: [__meta_kubernetes_namespace]
|
||||
action: replace
|
||||
target_label: namespace
|
||||
- source_labels: [__meta_kubernetes_pod_name]
|
||||
action: replace
|
||||
target_label: pod_name
|
||||
|
||||
- job_name: 'kubernetes-pods-istio-secure'
|
||||
scheme: https
|
||||
tls_config:
|
||||
ca_file: /etc/istio-certs/root-cert.pem
|
||||
cert_file: /etc/istio-certs/cert-chain.pem
|
||||
key_file: /etc/istio-certs/key.pem
|
||||
insecure_skip_verify: true # prometheus does not support secure naming.
|
||||
kubernetes_sd_configs:
|
||||
- role: pod
|
||||
relabel_configs:
|
||||
- source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_scrape]
|
||||
action: keep
|
||||
regex: true
|
||||
# sidecar status annotation is added by sidecar injector and
|
||||
# istio_workload_mtls_ability can be specifically placed on a pod to indicate its ability to receive mtls traffic.
|
||||
- source_labels: [__meta_kubernetes_pod_annotation_sidecar_istio_io_status, __meta_kubernetes_pod_annotation_istio_mtls]
|
||||
action: keep
|
||||
regex: (([^;]+);([^;]*))|(([^;]*);(true))
|
||||
- source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_path]
|
||||
action: replace
|
||||
target_label: __metrics_path__
|
||||
regex: (.+)
|
||||
- source_labels: [__address__] # Only keep address that is host:port
|
||||
action: keep # otherwise an extra target with ':443' is added for https scheme
|
||||
regex: ([^:]+):(\d+)
|
||||
- source_labels: [__address__, __meta_kubernetes_pod_annotation_prometheus_io_port]
|
||||
action: replace
|
||||
regex: ([^:]+)(?::\d+)?;(\d+)
|
||||
replacement: $1:$2
|
||||
target_label: __address__
|
||||
- action: labelmap
|
||||
regex: __meta_kubernetes_pod_label_(.+)
|
||||
- source_labels: [__meta_kubernetes_namespace]
|
||||
action: replace
|
||||
target_label: namespace
|
||||
- source_labels: [__meta_kubernetes_pod_name]
|
||||
action: replace
|
||||
target_label: pod_name
|
@ -1,153 +0,0 @@
|
||||
apiVersion: apps/v1
|
||||
kind: StatefulSet
|
||||
metadata:
|
||||
annotations:
|
||||
project.cattle.io/namespaces: '["monitoring"]'
|
||||
labels:
|
||||
prometheus: k8s
|
||||
name: prometheus-k8s
|
||||
namespace: monitoring
|
||||
spec:
|
||||
podManagementPolicy: OrderedReady
|
||||
replicas: 1
|
||||
revisionHistoryLimit: 10
|
||||
selector:
|
||||
matchLabels:
|
||||
app: prometheus
|
||||
prometheus: k8s
|
||||
serviceName: prometheus-operated
|
||||
template:
|
||||
metadata:
|
||||
creationTimestamp: null
|
||||
labels:
|
||||
app: prometheus
|
||||
prometheus: k8s
|
||||
spec:
|
||||
affinity:
|
||||
nodeAffinity:
|
||||
requiredDuringSchedulingIgnoredDuringExecution:
|
||||
nodeSelectorTerms:
|
||||
- matchExpressions:
|
||||
- key: monitoring
|
||||
operator: In
|
||||
values:
|
||||
- "true"
|
||||
containers:
|
||||
- args:
|
||||
- --web.console.templates=/etc/prometheus/consoles
|
||||
- --web.console.libraries=/etc/prometheus/console-libraries
|
||||
- --config.file=/etc/prometheus/config_out/prometheus.env.yaml
|
||||
- --storage.tsdb.path=/prometheus
|
||||
- --storage.tsdb.retention=7d
|
||||
- --web.enable-lifecycle
|
||||
- --storage.tsdb.no-lockfile
|
||||
- --web.route-prefix=/
|
||||
image: prom/prometheus:v2.24.0
|
||||
imagePullPolicy: IfNotPresent
|
||||
livenessProbe:
|
||||
failureThreshold: 6
|
||||
httpGet:
|
||||
path: /-/healthy
|
||||
port: web
|
||||
scheme: HTTP
|
||||
periodSeconds: 5
|
||||
successThreshold: 1
|
||||
timeoutSeconds: 3
|
||||
name: prometheus
|
||||
ports:
|
||||
- containerPort: 9090
|
||||
name: web
|
||||
protocol: TCP
|
||||
readinessProbe:
|
||||
failureThreshold: 120
|
||||
httpGet:
|
||||
path: /-/ready
|
||||
port: web
|
||||
scheme: HTTP
|
||||
periodSeconds: 5
|
||||
successThreshold: 1
|
||||
timeoutSeconds: 3
|
||||
resources:
|
||||
limits:
|
||||
memory: 6000Mi
|
||||
requests:
|
||||
memory: 2000Mi
|
||||
terminationMessagePath: /dev/termination-log
|
||||
terminationMessagePolicy: File
|
||||
volumeMounts:
|
||||
- mountPath: /etc/prometheus/config_out
|
||||
name: config-out
|
||||
readOnly: true
|
||||
- mountPath: /prometheus
|
||||
name: prometheus-k8s-db
|
||||
- mountPath: /etc/prometheus/rules/prometheus-k8s-rulefiles-0
|
||||
name: prometheus-k8s-rulefiles-0
|
||||
- args:
|
||||
- --reload-url=http://localhost:9090/-/reload
|
||||
- --config-file=/etc/prometheus/config/prometheus.yaml
|
||||
- --config-envsubst-file=/etc/prometheus/config_out/prometheus.env.yaml
|
||||
command:
|
||||
- /bin/prometheus-config-reloader
|
||||
env:
|
||||
- name: POD_NAME
|
||||
valueFrom:
|
||||
fieldRef:
|
||||
apiVersion: v1
|
||||
fieldPath: metadata.name
|
||||
image: quay.io/coreos/prometheus-config-reloader:v0.22.0
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: prometheus-config-reloader
|
||||
# resources:
|
||||
# limits:
|
||||
# cpu: 100m
|
||||
# memory: 500Mi
|
||||
terminationMessagePath: /dev/termination-log
|
||||
terminationMessagePolicy: File
|
||||
volumeMounts:
|
||||
- mountPath: /etc/prometheus/config
|
||||
name: config
|
||||
- mountPath: /etc/prometheus/config_out
|
||||
name: config-out
|
||||
- args:
|
||||
- --webhook-url=http://localhost:9090/-/reload
|
||||
- --volume-dir=/etc/prometheus/rules/prometheus-k8s-rulefiles-0
|
||||
image: quay.io/coreos/configmap-reload:v0.0.1
|
||||
imagePullPolicy: IfNotPresent
|
||||
name: rules-configmap-reloader
|
||||
# resources:
|
||||
# limits:
|
||||
# cpu: 50m
|
||||
# memory: 100Mi
|
||||
terminationMessagePath: /dev/termination-log
|
||||
terminationMessagePolicy: File
|
||||
volumeMounts:
|
||||
- mountPath: /etc/prometheus/rules/prometheus-k8s-rulefiles-0
|
||||
name: prometheus-k8s-rulefiles-0
|
||||
dnsPolicy: ClusterFirst
|
||||
restartPolicy: Always
|
||||
schedulerName: default-scheduler
|
||||
securityContext:
|
||||
fsGroup: 2000
|
||||
runAsNonRoot: true
|
||||
runAsUser: 1000
|
||||
serviceAccount: prometheus-k8s
|
||||
serviceAccountName: prometheus-k8s
|
||||
terminationGracePeriodSeconds: 600
|
||||
volumes:
|
||||
- name: config-volume
|
||||
configMap:
|
||||
name: prometheus
|
||||
- name: config
|
||||
secret:
|
||||
defaultMode: 420
|
||||
secretName: prometheus-k8s
|
||||
- emptyDir: {}
|
||||
name: config-out
|
||||
- configMap:
|
||||
defaultMode: 420
|
||||
name: prometheus-k8s-rulefiles-0
|
||||
name: prometheus-k8s-rulefiles-0
|
||||
- emptyDir: {}
|
||||
name: prometheus-k8s-db
|
||||
updateStrategy:
|
||||
type: RollingUpdate
|
@ -1,151 +0,0 @@
|
||||
apiVersion: rbac.authorization.k8s.io/v1
|
||||
kind: ClusterRole
|
||||
metadata:
|
||||
name: prometheus-k8s
|
||||
rules:
|
||||
- apiGroups:
|
||||
- ""
|
||||
resources:
|
||||
- nodes/metrics
|
||||
verbs:
|
||||
- get
|
||||
- nonResourceURLs:
|
||||
- /metrics
|
||||
verbs:
|
||||
- get
|
||||
---
|
||||
apiVersion: rbac.authorization.k8s.io/v1
|
||||
kind: ClusterRoleBinding
|
||||
metadata:
|
||||
name: prometheus-k8s
|
||||
roleRef:
|
||||
apiGroup: rbac.authorization.k8s.io
|
||||
kind: ClusterRole
|
||||
name: prometheus-k8s
|
||||
subjects:
|
||||
- kind: ServiceAccount
|
||||
name: prometheus-k8s
|
||||
namespace: monitoring
|
||||
---
|
||||
apiVersion: rbac.authorization.k8s.io/v1
|
||||
kind: Role
|
||||
metadata:
|
||||
name: prometheus-k8s
|
||||
namespace: monitoring
|
||||
rules:
|
||||
- apiGroups:
|
||||
- ""
|
||||
resources:
|
||||
- nodes
|
||||
- services
|
||||
- endpoints
|
||||
- pods
|
||||
verbs:
|
||||
- get
|
||||
- list
|
||||
- watch
|
||||
---
|
||||
apiVersion: rbac.authorization.k8s.io/v1
|
||||
kind: RoleBinding
|
||||
metadata:
|
||||
name: prometheus-k8s
|
||||
namespace: monitoring
|
||||
roleRef:
|
||||
apiGroup: rbac.authorization.k8s.io
|
||||
kind: Role
|
||||
name: prometheus-k8s
|
||||
subjects:
|
||||
- kind: ServiceAccount
|
||||
name: prometheus-k8s
|
||||
namespace: monitoring
|
||||
---
|
||||
apiVersion: rbac.authorization.k8s.io/v1
|
||||
kind: Role
|
||||
metadata:
|
||||
name: prometheus-k8s
|
||||
namespace: kube-system
|
||||
rules:
|
||||
- apiGroups:
|
||||
- ""
|
||||
resources:
|
||||
- nodes
|
||||
- services
|
||||
- endpoints
|
||||
- pods
|
||||
verbs:
|
||||
- get
|
||||
- list
|
||||
- watch
|
||||
---
|
||||
apiVersion: rbac.authorization.k8s.io/v1
|
||||
kind: RoleBinding
|
||||
metadata:
|
||||
name: prometheus-k8s
|
||||
namespace: kube-system
|
||||
roleRef:
|
||||
apiGroup: rbac.authorization.k8s.io
|
||||
kind: Role
|
||||
name: prometheus-k8s
|
||||
subjects:
|
||||
- kind: ServiceAccount
|
||||
name: prometheus-k8s
|
||||
namespace: monitoring
|
||||
---
|
||||
apiVersion: rbac.authorization.k8s.io/v1
|
||||
kind: Role
|
||||
metadata:
|
||||
name: prometheus-k8s
|
||||
namespace: default
|
||||
rules:
|
||||
- apiGroups:
|
||||
- ""
|
||||
resources:
|
||||
- nodes
|
||||
- services
|
||||
- endpoints
|
||||
- pods
|
||||
verbs:
|
||||
- get
|
||||
- list
|
||||
- watch
|
||||
---
|
||||
apiVersion: rbac.authorization.k8s.io/v1
|
||||
kind: RoleBinding
|
||||
metadata:
|
||||
name: prometheus-k8s
|
||||
namespace: default
|
||||
roleRef:
|
||||
apiGroup: rbac.authorization.k8s.io
|
||||
kind: Role
|
||||
name: prometheus-k8s
|
||||
subjects:
|
||||
- kind: ServiceAccount
|
||||
name: prometheus-k8s
|
||||
namespace: monitoring
|
||||
---
|
||||
apiVersion: rbac.authorization.k8s.io/v1
|
||||
kind: Role
|
||||
metadata:
|
||||
name: prometheus-k8s-config
|
||||
namespace: monitoring
|
||||
rules:
|
||||
- apiGroups:
|
||||
- ""
|
||||
resources:
|
||||
- configmaps
|
||||
verbs:
|
||||
- get
|
||||
---
|
||||
apiVersion: rbac.authorization.k8s.io/v1
|
||||
kind: RoleBinding
|
||||
metadata:
|
||||
name: prometheus-k8s-config
|
||||
namespace: monitoring
|
||||
roleRef:
|
||||
apiGroup: rbac.authorization.k8s.io
|
||||
kind: Role
|
||||
name: prometheus-k8s-config
|
||||
subjects:
|
||||
- kind: ServiceAccount
|
||||
name: prometheus-k8s
|
||||
namespace: monitoring
|
@ -34,9 +34,11 @@ kubectl create -f kube-batch/deploy.yaml
|
||||
cd prometheus
|
||||
mkdir -p /data/k8s/monitoring/grafana/
|
||||
chmod -R 777 /data/k8s/monitoring/grafana/
|
||||
kubectl apply -f ./operator/operator-rbac.yml
|
||||
kubectl apply -f ./operator/operator-dp.yml
|
||||
kubectl delete -f ./operator/operator-crd.yml
|
||||
kubectl apply -f ./operator/operator-crd.yml
|
||||
kubectl apply -f ./operator/operator-rbac.yml
|
||||
kubectl wait crd/podmonitors.monitoring.coreos.com --for condition=established --timeout=60s
|
||||
kubectl apply -f ./operator/operator-dp.yml
|
||||
kubectl apply -f ./alertmanater/alertmanager-main-sa.yml
|
||||
kubectl apply -f ./alertmanater/alertmanager-main-secret.yml
|
||||
kubectl apply -f ./alertmanater/alertmanager-main-svc.yml
|
||||
@ -64,6 +66,8 @@ kubectl apply -f ./prometheus/prometheus-secret.yml
|
||||
kubectl apply -f ./prometheus/prometheus-rules.yml
|
||||
kubectl apply -f ./prometheus/prometheus-rbac.yml
|
||||
kubectl apply -f ./prometheus/prometheus-svc.yml
|
||||
kubectl wait crd/prometheuses.monitoring.coreos.com --for condition=established --timeout=60s
|
||||
kubectl delete -f ./prometheus/prometheus-main.yml
|
||||
kubectl apply -f ./prometheus/prometheus-main.yml
|
||||
kubectl apply -f ./servicemonitor/alertmanager-sm.yml
|
||||
kubectl apply -f ./servicemonitor/coredns-sm.yml
|
||||
|
Loading…
Reference in New Issue
Block a user