修正istio prometheus部署

This commit is contained in:
pengluan 2022-06-15 12:04:30 +08:00
parent 7ad80bcc0f
commit 0b6b2aa03d
5 changed files with 593 additions and 595 deletions

View File

@ -15,8 +15,8 @@
"editable": true,
"gnetId": null,
"graphTooltip": 0,
"id": 22,
"iteration": 1634283475928,
"id": 17,
"iteration": 1655264819796,
"links": [],
"panels": [
{
@ -24,101 +24,20 @@
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": "Prometheus-istio",
"datasource": "prometheus",
"fieldConfig": {
"defaults": {},
"overrides": []
},
"fill": 1,
"fillGradient": 0,
"gridPos": {
"h": 8,
"w": 24,
"w": 12,
"x": 0,
"y": 0
},
"id": 3,
"legend": {
"alignAsTable": true,
"avg": false,
"current": false,
"max": true,
"min": false,
"rightSide": true,
"show": true,
"total": false,
"values": true
},
"lines": true,
"linewidth": 1,
"links": [],
"nullPointMode": "null",
"paceLength": 10,
"percentage": false,
"pointradius": 2,
"points": false,
"renderer": "flot",
"seriesOverrides": [],
"stack": false,
"steppedLine": false,
"targets": [
{
"expr": "sum by (destination_service_name) (istio_requests_total{destination_service_namespace=\"$namespace\"})",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "{{destination_service_name}}",
"refId": "B"
}
],
"thresholds": [],
"timeFrom": null,
"timeRegions": [],
"timeShift": null,
"title": "全部服务监控",
"tooltip": {
"shared": true,
"sort": 1,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": []
},
"yaxes": [
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
},
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
}
],
"yaxis": {
"align": false,
"alignLevel": null
}
},
{
"aliasColors": {},
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": "Prometheus-istio",
"fill": 1,
"gridPos": {
"h": 7,
"w": 24,
"x": 0,
"y": 8
},
"hiddenSeries": false,
"id": 2,
"legend": {
"alignAsTable": true,
@ -126,7 +45,7 @@
"current": false,
"max": true,
"min": false,
"rightSide": true,
"rightSide": false,
"show": true,
"total": false,
"values": true
@ -135,29 +54,44 @@
"linewidth": 1,
"links": [],
"nullPointMode": "null",
"options": {
"alertThreshold": true
},
"paceLength": 10,
"percentage": false,
"pluginVersion": "7.5.2",
"pointradius": 2,
"points": false,
"renderer": "flot",
"seriesOverrides": [],
"spaceLength": 10,
"stack": false,
"steppedLine": false,
"targets": [
{
"exemplar": true,
"expr": "sum by (destination_service_name) (istio_requests_total{destination_service_namespace=\"$namespace\",destination_service_name=~\"($service)\"})",
"format": "time_series",
"hide": false,
"hide": true,
"interval": "",
"intervalFactor": 1,
"legendFormat": "{{destination_service_name}}",
"refId": "B"
},
{
"exemplar": true,
"expr": "sum by (destination_service_name) (irate(istio_requests_total{destination_service_namespace=\"$namespace\",destination_service_name=~\"($service)\"}[1m]))",
"hide": false,
"interval": "",
"legendFormat": "{{destination_service_name}}",
"refId": "A"
}
],
"thresholds": [],
"timeFrom": null,
"timeRegions": [],
"timeShift": null,
"title": "服务请求量",
"title": "域名服务平均请求量(1分钟内)",
"tooltip": {
"shared": true,
"sort": 1,
@ -199,14 +133,20 @@
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": "Prometheus-istio",
"fill": 1,
"gridPos": {
"h": 7,
"w": 24,
"x": 0,
"y": 15
"datasource": "prometheus",
"fieldConfig": {
"defaults": {},
"overrides": []
},
"fill": 1,
"fillGradient": 0,
"gridPos": {
"h": 8,
"w": 12,
"x": 12,
"y": 0
},
"hiddenSeries": false,
"id": 4,
"legend": {
"alignAsTable": true,
@ -214,7 +154,7 @@
"current": false,
"max": true,
"min": false,
"rightSide": true,
"rightSide": false,
"show": true,
"total": false,
"values": true
@ -223,19 +163,26 @@
"linewidth": 1,
"links": [],
"nullPointMode": "null",
"options": {
"alertThreshold": true
},
"paceLength": 10,
"percentage": false,
"pluginVersion": "7.5.2",
"pointradius": 2,
"points": false,
"renderer": "flot",
"seriesOverrides": [],
"spaceLength": 10,
"stack": false,
"steppedLine": false,
"targets": [
{
"expr": "sum by (destination_service_name) (istio_request_bytes_sum{destination_service_namespace=\"$namespace\",destination_service_name=~\"($service)\"})",
"exemplar": true,
"expr": "sum by (destination_service_name) (irate(istio_request_bytes_sum{destination_service_namespace=\"$namespace\",destination_service_name=~\"($service)\"}[1m]))",
"format": "time_series",
"hide": false,
"interval": "",
"intervalFactor": 1,
"legendFormat": "{{destination_service_name}}",
"refId": "B"
@ -245,7 +192,7 @@
"timeFrom": null,
"timeRegions": [],
"timeShift": null,
"title": "服务吞吐",
"title": "域名服务吞吐1分钟",
"tooltip": {
"shared": true,
"sort": 1,
@ -281,10 +228,529 @@
"align": false,
"alignLevel": null
}
},
{
"aliasColors": {},
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": "prometheus",
"decimals": null,
"fieldConfig": {
"defaults": {},
"overrides": []
},
"fill": 1,
"fillGradient": 0,
"gridPos": {
"h": 10,
"w": 6,
"x": 0,
"y": 8
},
"hiddenSeries": false,
"id": 6,
"legend": {
"alignAsTable": true,
"avg": true,
"current": false,
"max": true,
"min": false,
"rightSide": false,
"show": true,
"total": false,
"values": true
},
"lines": true,
"linewidth": 1,
"links": [],
"nullPointMode": "null",
"options": {
"alertThreshold": true
},
"paceLength": 10,
"percentage": false,
"pluginVersion": "7.5.2",
"pointradius": 5,
"points": false,
"renderer": "flot",
"repeat": null,
"seriesOverrides": [],
"spaceLength": 10,
"stack": false,
"steppedLine": false,
"targets": [
{
"exemplar": true,
"expr": "sum by (pod) (container_memory_working_set_bytes{job=\"kubelet\", image!=\"\",container_name!=\"POD\",pod=~\".*$service.*\"})",
"format": "time_series",
"interval": "",
"intervalFactor": 2,
"legendFormat": "{{ pod}}",
"refId": "B"
}
],
"thresholds": [],
"timeFrom": null,
"timeRegions": [],
"timeShift": null,
"title": "Memory Usage",
"tooltip": {
"shared": true,
"sort": 1,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": []
},
"yaxes": [
{
"$$hashKey": "object:204",
"format": "bytes",
"label": null,
"logBase": 1,
"max": null,
"min": 0,
"show": true
},
{
"$$hashKey": "object:205",
"format": "bytes",
"label": null,
"logBase": 1,
"max": null,
"min": 0,
"show": false
}
],
"yaxis": {
"align": false,
"alignLevel": null
}
},
{
"aliasColors": {},
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": "prometheus",
"fieldConfig": {
"defaults": {},
"overrides": []
},
"fill": 1,
"fillGradient": 0,
"gridPos": {
"h": 10,
"w": 6,
"x": 6,
"y": 8
},
"hiddenSeries": false,
"id": 8,
"legend": {
"alignAsTable": true,
"avg": true,
"current": false,
"hideEmpty": false,
"max": true,
"min": false,
"rightSide": false,
"show": true,
"total": false,
"values": true
},
"lines": true,
"linewidth": 1,
"links": [],
"nullPointMode": "null",
"options": {
"alertThreshold": true
},
"paceLength": 10,
"percentage": false,
"pluginVersion": "7.5.2",
"pointradius": 5,
"points": false,
"renderer": "flot",
"repeat": null,
"seriesOverrides": [],
"spaceLength": 10,
"stack": false,
"steppedLine": false,
"targets": [
{
"exemplar": true,
"expr": "sum by (pod) (rate(container_cpu_usage_seconds_total{job=\"kubelet\", image!=\"\",container_name!=\"POD\",pod=~\".*$service.*\"}[5m]))",
"format": "time_series",
"instant": false,
"interval": "30s",
"intervalFactor": 1,
"legendFormat": "{{pod}}",
"refId": "A"
},
{
"expr": "",
"format": "time_series",
"intervalFactor": 1,
"refId": "B"
}
],
"thresholds": [],
"timeFrom": null,
"timeRegions": [],
"timeShift": null,
"title": "CPU Usage",
"tooltip": {
"shared": true,
"sort": 1,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": []
},
"yaxes": [
{
"$$hashKey": "object:483",
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": 0,
"show": true
},
{
"$$hashKey": "object:484",
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": 0,
"show": false
}
],
"yaxis": {
"align": false,
"alignLevel": null
}
},
{
"aliasColors": {},
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": "prometheus",
"fieldConfig": {
"defaults": {
"links": []
},
"overrides": []
},
"fill": 1,
"fillGradient": 0,
"gridPos": {
"h": 10,
"w": 6,
"x": 12,
"y": 8
},
"hiddenSeries": false,
"id": 12,
"legend": {
"alignAsTable": true,
"avg": true,
"current": false,
"max": true,
"min": false,
"rightSide": false,
"show": true,
"sort": "avg",
"sortDesc": true,
"total": false,
"values": true
},
"lines": true,
"linewidth": 2,
"links": [],
"nullPointMode": "null",
"options": {
"alertThreshold": true
},
"paceLength": 10,
"percentage": false,
"pluginVersion": "7.5.2",
"pointradius": 2,
"points": false,
"renderer": "flot",
"seriesOverrides": [],
"spaceLength": 10,
"stack": false,
"steppedLine": false,
"targets": [
{
"exemplar": true,
"expr": "DCGM_FI_DEV_MEM_COPY_UTIL{exported_pod=~\".*$service.*\"}",
"format": "time_series",
"interval": "",
"intervalFactor": 1,
"legendFormat": "{{exported_pod}} GPU {{gpu}}",
"refId": "A"
}
],
"thresholds": [],
"timeFrom": null,
"timeRegions": [],
"timeShift": null,
"title": "pod GPU 显存占用率",
"tooltip": {
"shared": true,
"sort": 2,
"value_type": "cumulative"
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": []
},
"yaxes": [
{
"format": "percent",
"label": null,
"logBase": 1,
"max": "100",
"min": "0",
"show": true
},
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
}
],
"yaxis": {
"align": false,
"alignLevel": null
}
},
{
"aliasColors": {},
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": "prometheus",
"fieldConfig": {
"defaults": {},
"overrides": []
},
"fill": 1,
"fillGradient": 0,
"gridPos": {
"h": 10,
"w": 6,
"x": 18,
"y": 8
},
"hiddenSeries": false,
"id": 10,
"legend": {
"alignAsTable": true,
"avg": true,
"current": false,
"hideEmpty": false,
"max": true,
"min": false,
"rightSide": false,
"show": true,
"total": false,
"values": true
},
"lines": true,
"linewidth": 1,
"links": [],
"nullPointMode": "null",
"options": {
"alertThreshold": true
},
"paceLength": 10,
"percentage": false,
"pluginVersion": "7.5.2",
"pointradius": 5,
"points": false,
"renderer": "flot",
"seriesOverrides": [],
"spaceLength": 10,
"stack": false,
"steppedLine": false,
"targets": [
{
"exemplar": true,
"expr": "DCGM_FI_DEV_GPU_UTIL{exported_pod=~\".*$service.*\"}",
"format": "time_series",
"instant": false,
"interval": "30s",
"intervalFactor": 1,
"legendFormat": "{{exported_pod}}",
"refId": "A"
}
],
"thresholds": [],
"timeFrom": null,
"timeRegions": [],
"timeShift": null,
"title": "GPU 使用率",
"tooltip": {
"shared": true,
"sort": 1,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": []
},
"yaxes": [
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": 0,
"show": true
},
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": 0,
"show": false
}
],
"yaxis": {
"align": false,
"alignLevel": null
}
},
{
"aliasColors": {},
"bars": false,
"dashLength": 10,
"dashes": false,
"datasource": "prometheus",
"fieldConfig": {
"defaults": {},
"overrides": []
},
"fill": 1,
"fillGradient": 0,
"gridPos": {
"h": 8,
"w": 24,
"x": 0,
"y": 18
},
"hiddenSeries": false,
"id": 3,
"legend": {
"alignAsTable": true,
"avg": false,
"current": false,
"max": true,
"min": false,
"rightSide": true,
"show": true,
"total": false,
"values": true
},
"lines": true,
"linewidth": 1,
"links": [],
"nullPointMode": "null",
"options": {
"alertThreshold": true
},
"paceLength": 10,
"percentage": false,
"pluginVersion": "7.5.2",
"pointradius": 2,
"points": false,
"renderer": "flot",
"seriesOverrides": [],
"spaceLength": 10,
"stack": false,
"steppedLine": false,
"targets": [
{
"expr": "sum by (destination_service_name) (istio_requests_total{destination_service_namespace=\"$namespace\"})",
"format": "time_series",
"intervalFactor": 1,
"legendFormat": "{{destination_service_name}}",
"refId": "B"
}
],
"thresholds": [],
"timeFrom": null,
"timeRegions": [],
"timeShift": null,
"title": "全部域名服务监控",
"tooltip": {
"shared": true,
"sort": 1,
"value_type": "individual"
},
"type": "graph",
"xaxis": {
"buckets": null,
"mode": "time",
"name": null,
"show": true,
"values": []
},
"yaxes": [
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
},
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": true
}
],
"yaxis": {
"align": false,
"alignLevel": null
}
}
],
"refresh": false,
"schemaVersion": 18,
"schemaVersion": 27,
"style": "dark",
"tags": [],
"templating": {
@ -292,19 +758,24 @@
{
"allValue": null,
"current": {
"selected": true,
"selected": false,
"text": "service",
"value": "service"
},
"datasource": "Prometheus-istio",
"datasource": "prometheus",
"definition": "label_values(galley_istio_networking_virtualservices, namespace)",
"description": null,
"error": null,
"hide": 0,
"includeAll": true,
"label": null,
"multi": false,
"name": "namespace",
"options": [],
"query": "label_values(galley_istio_networking_virtualservices, namespace)",
"query": {
"query": "label_values(galley_istio_networking_virtualservices, namespace)",
"refId": "Prometheus-istio-namespace-Variable-Query"
},
"refresh": 1,
"regex": "",
"skipUrlSync": false,
@ -322,15 +793,20 @@
"text": "swallow-mysql",
"value": "swallow-mysql"
},
"datasource": "Prometheus-istio",
"datasource": "prometheus",
"definition": "label_values(istio_requests_total{destination_service_namespace=\"$namespace\"}, destination_service_name)",
"description": null,
"error": null,
"hide": 0,
"includeAll": true,
"label": null,
"multi": true,
"name": "service",
"options": [],
"query": "label_values(istio_requests_total{destination_service_namespace=\"$namespace\"}, destination_service_name)",
"query": {
"query": "label_values(istio_requests_total{destination_service_namespace=\"$namespace\"}, destination_service_name)",
"refId": "Prometheus-istio-service-Variable-Query"
},
"refresh": 1,
"regex": "",
"skipUrlSync": false,
@ -375,5 +851,5 @@
"timezone": "",
"title": "istio service",
"uid": "istio-service",
"version": 40
"version": 19
}

View File

@ -1,178 +0,0 @@
# Scrape config for envoy stats
- job_name: 'envoy-stats'
metrics_path: /stats/prometheus
kubernetes_sd_configs:
- role: pod
relabel_configs:
- source_labels: [__meta_kubernetes_pod_container_port_name]
action: keep
regex: '.*-envoy-prom'
- source_labels: [__address__, __meta_kubernetes_pod_annotation_prometheus_io_port]
action: replace
regex: ([^:]+)(?::\d+)?;(\d+)
replacement: $1:15090
target_label: __address__
- action: labelmap
regex: __meta_kubernetes_pod_label_(.+)
- source_labels: [__meta_kubernetes_namespace]
action: replace
target_label: namespace
- source_labels: [__meta_kubernetes_pod_name]
action: replace
target_label: pod_name
metric_relabel_configs:
# Exclude some of the envoy metrics that have massive cardinality
# This list may need to be pruned further moving forward, as informed
# by performance and scalability testing.
- source_labels: [ cluster_name ]
regex: '(outbound|inbound|prometheus_stats).*'
action: drop
- source_labels: [ tcp_prefix ]
regex: '(outbound|inbound|prometheus_stats).*'
action: drop
- source_labels: [ listener_address ]
regex: '(.+)'
action: drop
- source_labels: [ http_conn_manager_listener_prefix ]
regex: '(.+)'
action: drop
- source_labels: [ http_conn_manager_prefix ]
regex: '(.+)'
action: drop
- source_labels: [ __name__ ]
regex: 'envoy_tls.*'
action: drop
- source_labels: [ __name__ ]
regex: 'envoy_tcp_downstream.*'
action: drop
- source_labels: [ __name__ ]
regex: 'envoy_http_(stats|admin).*'
action: drop
- source_labels: [ __name__ ]
regex: 'envoy_cluster_(lb|retry|bind|internal|max|original).*'
action: drop
- job_name: 'kubernetes-cadvisor'
scheme: https
tls_config:
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
kubernetes_sd_configs:
- role: node
relabel_configs:
- action: labelmap
regex: __meta_kubernetes_node_label_(.+)
- target_label: __address__
replacement: kubernetes.default.svc:443
- source_labels: [__meta_kubernetes_node_name]
regex: (.+)
target_label: __metrics_path__
replacement: /api/v1/nodes/${1}/proxy/metrics/cadvisor
# scrape config for service endpoints.
- job_name: 'kubernetes-service-endpoints'
kubernetes_sd_configs:
- role: endpoints
relabel_configs:
- source_labels: [__meta_kubernetes_service_annotation_prometheus_io_scrape]
action: keep
regex: true
- source_labels: [__meta_kubernetes_service_annotation_prometheus_io_scheme]
action: replace
target_label: __scheme__
regex: (https?)
- source_labels: [__meta_kubernetes_service_annotation_prometheus_io_path]
action: replace
target_label: __metrics_path__
regex: (.+)
- source_labels: [__address__, __meta_kubernetes_service_annotation_prometheus_io_port]
action: replace
target_label: __address__
regex: ([^:]+)(?::\d+)?;(\d+)
replacement: $1:$2
- action: labelmap
regex: __meta_kubernetes_service_label_(.+)
- source_labels: [__meta_kubernetes_namespace]
action: replace
target_label: kubernetes_namespace
- source_labels: [__meta_kubernetes_service_name]
action: replace
target_label: kubernetes_name
- job_name: 'kubernetes-pods'
kubernetes_sd_configs:
- role: pod
relabel_configs: # If first two labels are present, pod should be scraped by the istio-secure job.
- source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_scrape]
action: keep
regex: true
- source_labels: [__meta_kubernetes_pod_annotation_sidecar_istio_io_status]
action: drop
regex: (.+)
- source_labels: [__meta_kubernetes_pod_annotation_istio_mtls]
action: drop
regex: (true)
- source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_path]
action: replace
target_label: __metrics_path__
regex: (.+)
- source_labels: [__address__, __meta_kubernetes_pod_annotation_prometheus_io_port]
action: replace
regex: ([^:]+)(?::\d+)?;(\d+)
replacement: $1:$2
target_label: __address__
- action: labelmap
regex: __meta_kubernetes_pod_label_(.+)
- source_labels: [__meta_kubernetes_namespace]
action: replace
target_label: namespace
- source_labels: [__meta_kubernetes_pod_name]
action: replace
target_label: pod_name
- job_name: 'kubernetes-pods-istio-secure'
scheme: https
tls_config:
ca_file: /etc/istio-certs/root-cert.pem
cert_file: /etc/istio-certs/cert-chain.pem
key_file: /etc/istio-certs/key.pem
insecure_skip_verify: true # prometheus does not support secure naming.
kubernetes_sd_configs:
- role: pod
relabel_configs:
- source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_scrape]
action: keep
regex: true
# sidecar status annotation is added by sidecar injector and
# istio_workload_mtls_ability can be specifically placed on a pod to indicate its ability to receive mtls traffic.
- source_labels: [__meta_kubernetes_pod_annotation_sidecar_istio_io_status, __meta_kubernetes_pod_annotation_istio_mtls]
action: keep
regex: (([^;]+);([^;]*))|(([^;]*);(true))
- source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_path]
action: replace
target_label: __metrics_path__
regex: (.+)
- source_labels: [__address__] # Only keep address that is host:port
action: keep # otherwise an extra target with ':443' is added for https scheme
regex: ([^:]+):(\d+)
- source_labels: [__address__, __meta_kubernetes_pod_annotation_prometheus_io_port]
action: replace
regex: ([^:]+)(?::\d+)?;(\d+)
replacement: $1:$2
target_label: __address__
- action: labelmap
regex: __meta_kubernetes_pod_label_(.+)
- source_labels: [__meta_kubernetes_namespace]
action: replace
target_label: namespace
- source_labels: [__meta_kubernetes_pod_name]
action: replace
target_label: pod_name

View File

@ -1,153 +0,0 @@
apiVersion: apps/v1
kind: StatefulSet
metadata:
annotations:
project.cattle.io/namespaces: '["monitoring"]'
labels:
prometheus: k8s
name: prometheus-k8s
namespace: monitoring
spec:
podManagementPolicy: OrderedReady
replicas: 1
revisionHistoryLimit: 10
selector:
matchLabels:
app: prometheus
prometheus: k8s
serviceName: prometheus-operated
template:
metadata:
creationTimestamp: null
labels:
app: prometheus
prometheus: k8s
spec:
affinity:
nodeAffinity:
requiredDuringSchedulingIgnoredDuringExecution:
nodeSelectorTerms:
- matchExpressions:
- key: monitoring
operator: In
values:
- "true"
containers:
- args:
- --web.console.templates=/etc/prometheus/consoles
- --web.console.libraries=/etc/prometheus/console-libraries
- --config.file=/etc/prometheus/config_out/prometheus.env.yaml
- --storage.tsdb.path=/prometheus
- --storage.tsdb.retention=7d
- --web.enable-lifecycle
- --storage.tsdb.no-lockfile
- --web.route-prefix=/
image: prom/prometheus:v2.24.0
imagePullPolicy: IfNotPresent
livenessProbe:
failureThreshold: 6
httpGet:
path: /-/healthy
port: web
scheme: HTTP
periodSeconds: 5
successThreshold: 1
timeoutSeconds: 3
name: prometheus
ports:
- containerPort: 9090
name: web
protocol: TCP
readinessProbe:
failureThreshold: 120
httpGet:
path: /-/ready
port: web
scheme: HTTP
periodSeconds: 5
successThreshold: 1
timeoutSeconds: 3
resources:
limits:
memory: 6000Mi
requests:
memory: 2000Mi
terminationMessagePath: /dev/termination-log
terminationMessagePolicy: File
volumeMounts:
- mountPath: /etc/prometheus/config_out
name: config-out
readOnly: true
- mountPath: /prometheus
name: prometheus-k8s-db
- mountPath: /etc/prometheus/rules/prometheus-k8s-rulefiles-0
name: prometheus-k8s-rulefiles-0
- args:
- --reload-url=http://localhost:9090/-/reload
- --config-file=/etc/prometheus/config/prometheus.yaml
- --config-envsubst-file=/etc/prometheus/config_out/prometheus.env.yaml
command:
- /bin/prometheus-config-reloader
env:
- name: POD_NAME
valueFrom:
fieldRef:
apiVersion: v1
fieldPath: metadata.name
image: quay.io/coreos/prometheus-config-reloader:v0.22.0
imagePullPolicy: IfNotPresent
name: prometheus-config-reloader
# resources:
# limits:
# cpu: 100m
# memory: 500Mi
terminationMessagePath: /dev/termination-log
terminationMessagePolicy: File
volumeMounts:
- mountPath: /etc/prometheus/config
name: config
- mountPath: /etc/prometheus/config_out
name: config-out
- args:
- --webhook-url=http://localhost:9090/-/reload
- --volume-dir=/etc/prometheus/rules/prometheus-k8s-rulefiles-0
image: quay.io/coreos/configmap-reload:v0.0.1
imagePullPolicy: IfNotPresent
name: rules-configmap-reloader
# resources:
# limits:
# cpu: 50m
# memory: 100Mi
terminationMessagePath: /dev/termination-log
terminationMessagePolicy: File
volumeMounts:
- mountPath: /etc/prometheus/rules/prometheus-k8s-rulefiles-0
name: prometheus-k8s-rulefiles-0
dnsPolicy: ClusterFirst
restartPolicy: Always
schedulerName: default-scheduler
securityContext:
fsGroup: 2000
runAsNonRoot: true
runAsUser: 1000
serviceAccount: prometheus-k8s
serviceAccountName: prometheus-k8s
terminationGracePeriodSeconds: 600
volumes:
- name: config-volume
configMap:
name: prometheus
- name: config
secret:
defaultMode: 420
secretName: prometheus-k8s
- emptyDir: {}
name: config-out
- configMap:
defaultMode: 420
name: prometheus-k8s-rulefiles-0
name: prometheus-k8s-rulefiles-0
- emptyDir: {}
name: prometheus-k8s-db
updateStrategy:
type: RollingUpdate

View File

@ -1,151 +0,0 @@
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
name: prometheus-k8s
rules:
- apiGroups:
- ""
resources:
- nodes/metrics
verbs:
- get
- nonResourceURLs:
- /metrics
verbs:
- get
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
name: prometheus-k8s
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
name: prometheus-k8s
subjects:
- kind: ServiceAccount
name: prometheus-k8s
namespace: monitoring
---
apiVersion: rbac.authorization.k8s.io/v1
kind: Role
metadata:
name: prometheus-k8s
namespace: monitoring
rules:
- apiGroups:
- ""
resources:
- nodes
- services
- endpoints
- pods
verbs:
- get
- list
- watch
---
apiVersion: rbac.authorization.k8s.io/v1
kind: RoleBinding
metadata:
name: prometheus-k8s
namespace: monitoring
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: Role
name: prometheus-k8s
subjects:
- kind: ServiceAccount
name: prometheus-k8s
namespace: monitoring
---
apiVersion: rbac.authorization.k8s.io/v1
kind: Role
metadata:
name: prometheus-k8s
namespace: kube-system
rules:
- apiGroups:
- ""
resources:
- nodes
- services
- endpoints
- pods
verbs:
- get
- list
- watch
---
apiVersion: rbac.authorization.k8s.io/v1
kind: RoleBinding
metadata:
name: prometheus-k8s
namespace: kube-system
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: Role
name: prometheus-k8s
subjects:
- kind: ServiceAccount
name: prometheus-k8s
namespace: monitoring
---
apiVersion: rbac.authorization.k8s.io/v1
kind: Role
metadata:
name: prometheus-k8s
namespace: default
rules:
- apiGroups:
- ""
resources:
- nodes
- services
- endpoints
- pods
verbs:
- get
- list
- watch
---
apiVersion: rbac.authorization.k8s.io/v1
kind: RoleBinding
metadata:
name: prometheus-k8s
namespace: default
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: Role
name: prometheus-k8s
subjects:
- kind: ServiceAccount
name: prometheus-k8s
namespace: monitoring
---
apiVersion: rbac.authorization.k8s.io/v1
kind: Role
metadata:
name: prometheus-k8s-config
namespace: monitoring
rules:
- apiGroups:
- ""
resources:
- configmaps
verbs:
- get
---
apiVersion: rbac.authorization.k8s.io/v1
kind: RoleBinding
metadata:
name: prometheus-k8s-config
namespace: monitoring
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: Role
name: prometheus-k8s-config
subjects:
- kind: ServiceAccount
name: prometheus-k8s
namespace: monitoring

View File

@ -34,9 +34,11 @@ kubectl create -f kube-batch/deploy.yaml
cd prometheus
mkdir -p /data/k8s/monitoring/grafana/
chmod -R 777 /data/k8s/monitoring/grafana/
kubectl apply -f ./operator/operator-rbac.yml
kubectl apply -f ./operator/operator-dp.yml
kubectl delete -f ./operator/operator-crd.yml
kubectl apply -f ./operator/operator-crd.yml
kubectl apply -f ./operator/operator-rbac.yml
kubectl wait crd/podmonitors.monitoring.coreos.com --for condition=established --timeout=60s
kubectl apply -f ./operator/operator-dp.yml
kubectl apply -f ./alertmanater/alertmanager-main-sa.yml
kubectl apply -f ./alertmanater/alertmanager-main-secret.yml
kubectl apply -f ./alertmanater/alertmanager-main-svc.yml
@ -64,6 +66,8 @@ kubectl apply -f ./prometheus/prometheus-secret.yml
kubectl apply -f ./prometheus/prometheus-rules.yml
kubectl apply -f ./prometheus/prometheus-rbac.yml
kubectl apply -f ./prometheus/prometheus-svc.yml
kubectl wait crd/prometheuses.monitoring.coreos.com --for condition=established --timeout=60s
kubectl delete -f ./prometheus/prometheus-main.yml
kubectl apply -f ./prometheus/prometheus-main.yml
kubectl apply -f ./servicemonitor/alertmanager-sm.yml
kubectl apply -f ./servicemonitor/coredns-sm.yml