Merge pull request 'feat: add prometheusrule for metrics service' (#105) from icecheng/feature into master
Reviewed-on: https://gitea.freeleaps.mathmast.com/freeleaps/freeleaps-ops/pulls/105
This commit is contained in:
commit
7f4c30bd70
@ -0,0 +1,32 @@
|
||||
# Prometheus Alter Rule Config
|
||||
|
||||
Add `prometheusrule.yaml` to `<helm-pkg>/templates`.
|
||||
see
|
||||
```
|
||||
{{- /*
|
||||
Copyright Broadcom, Inc. All Rights Reserved.
|
||||
SPDX-License-Identifier: APACHE-2.0
|
||||
*/}}
|
||||
|
||||
{{- if and .Values.metrics.enabled .Values.metrics.prometheusRule.enabled }}
|
||||
apiVersion: monitoring.coreos.com/v1
|
||||
kind: PrometheusRule
|
||||
metadata:
|
||||
name: {{ include "common.names.fullname" . }}
|
||||
namespace: {{ default (include "common.names.namespace" .) .Values.metrics.prometheusRule.namespace | quote}}
|
||||
labels: {{- include "common.labels.standard" ( dict "customLabels" .Values.commonLabels "context" $ ) | nindent 4 }}
|
||||
{{- if .Values.metrics.prometheusRule.additionalLabels }}
|
||||
{{- include "common.tplvalues.render" (dict "value" .Values.metrics.prometheusRule.additionalLabels "context" $) | nindent 4 }}
|
||||
{{- end }}
|
||||
{{- if .Values.commonAnnotations }}
|
||||
annotations: {{- include "common.tplvalues.render" ( dict "value" .Values.commonAnnotations "context" $ ) | nindent 4 }}
|
||||
{{- end }}
|
||||
spec:
|
||||
groups:
|
||||
{{- with .Values.metrics.prometheusRule.rules }}
|
||||
- name: {{ template "common.names.name" $ }}
|
||||
rules: {{- include "common.tplvalues.render" (dict "value" . "context" $) | nindent 8 }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
|
||||
```
|
||||
@ -0,0 +1,37 @@
|
||||
{{- /*
|
||||
Copyright Broadcom, Inc. All Rights Reserved.
|
||||
SPDX-License-Identifier: APACHE-2.0
|
||||
*/}}
|
||||
|
||||
{{- if .Values.metrics.prometheusRule.enabled }}
|
||||
apiVersion: monitoring.coreos.com/v1
|
||||
kind: PrometheusRule
|
||||
metadata:
|
||||
name: {{ .Values.metrics.prometheusRule.name }}
|
||||
namespace: {{ .Values.metrics.prometheusRule.namespace | quote }}
|
||||
{{- with .Values.metrics.prometheusRule.labels }}
|
||||
labels:
|
||||
{{- toYaml . | nindent 4 }}
|
||||
{{- end }}
|
||||
spec:
|
||||
groups:
|
||||
{{- with .Values.metrics.prometheusRule.rules }}
|
||||
- name: {{ $.Values.metrics.prometheusRule.name }}
|
||||
rules:
|
||||
{{- range . }}
|
||||
- alert: {{ .alert }}
|
||||
expr: {{ .expr | quote }}
|
||||
{{- if .for }}
|
||||
for: {{ .for }}
|
||||
{{- end }}
|
||||
{{- if .labels }}
|
||||
labels:
|
||||
{{- toYaml .labels | nindent 12 }}
|
||||
{{- end }}
|
||||
{{- if .annotations }}
|
||||
annotations:
|
||||
{{- toYaml .annotations | nindent 12 }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
@ -81,3 +81,30 @@ metrics:
|
||||
controlledResources:
|
||||
- cpu
|
||||
- memory
|
||||
prometheusRule:
|
||||
name: freepeals-alpha-metrics
|
||||
enabled: false
|
||||
namespace: "freeleaps-monitoring-system"
|
||||
rules:
|
||||
- alert: FreeleapsMetricsServiceDown
|
||||
expr: up{job="metrics-service"} == 0
|
||||
for: 1m
|
||||
labels:
|
||||
severity: critical
|
||||
service: metrics-service
|
||||
annotations:
|
||||
summary: "Freeleaps Metrics service is down (instance {{ $labels.instance }})"
|
||||
description: "Freeleaps Metrics service has been down for more than 1 minutes."
|
||||
runbook_url: "https://netorgft10898514.sharepoint.com/:w:/s/FreeleapsEngineeringTeam/EUlvzumTsPxCpPAzI3gm9OIB0DCLTjQzzYVL6VsHYZFjxg?e=0dxVr7"
|
||||
|
||||
- alert: FreeleapsMetricsServiceHighErrorRate
|
||||
expr: rate(http_requests_total{job="metrics-service",status=~"5.."}[5m]) > 0.1
|
||||
for: 5m
|
||||
labels:
|
||||
severity: warning
|
||||
service: metrics-service
|
||||
annotations:
|
||||
summary: "High error rate in freeleaps metrics service (instance {{ $labels.instance }})"
|
||||
description: "Freeleaps Metrics service error rate is {{ $value }} errors per second."
|
||||
runbook_url: "https://netorgft10898514.sharepoint.com/:w:/s/FreeleapsEngineeringTeam/EUlvzumTsPxCpPAzI3gm9OIB0DCLTjQzzYVL6VsHYZFjxg?e=0dxVr7"
|
||||
|
||||
|
||||
@ -81,3 +81,29 @@ metrics:
|
||||
controlledResources:
|
||||
- cpu
|
||||
- memory
|
||||
prometheusRule:
|
||||
name: freepeals-prod-metrics
|
||||
enabled: true
|
||||
namespace: "freeleaps-monitoring-system"
|
||||
rules:
|
||||
- alert: FreeleapsMetricsServiceDown
|
||||
expr: up{job="metrics-service"} == 0
|
||||
for: 1m
|
||||
labels:
|
||||
severity: critical
|
||||
service: metrics-service
|
||||
annotations:
|
||||
summary: "Freeleaps Metrics service is down (instance {{ $labels.instance }})"
|
||||
description: "Freeleaps Metrics service has been down for more than 1 minutes."
|
||||
runbook_url: "https://netorgft10898514.sharepoint.com/:w:/s/FreeleapsEngineeringTeam/EUlvzumTsPxCpPAzI3gm9OIB0DCLTjQzzYVL6VsHYZFjxg?e=0dxVr7"
|
||||
|
||||
- alert: FreeleapsMetricsServiceHighErrorRate
|
||||
expr: rate(http_requests_total{job="metrics-service",status=~"5.."}[5m]) > 0.1
|
||||
for: 5m
|
||||
labels:
|
||||
severity: warning
|
||||
service: metrics-service
|
||||
annotations:
|
||||
summary: "High error rate in freeleaps metrics service (instance {{ $labels.instance }})"
|
||||
description: "Freeleaps Metrics service error rate is {{ $value }} errors per second."
|
||||
runbook_url: "https://netorgft10898514.sharepoint.com/:w:/s/FreeleapsEngineeringTeam/EUlvzumTsPxCpPAzI3gm9OIB0DCLTjQzzYVL6VsHYZFjxg?e=0dxVr7"
|
||||
@ -55,12 +55,12 @@ metrics:
|
||||
port: 8009
|
||||
targetPort: 8009
|
||||
serviceMonitor:
|
||||
enabled: false
|
||||
enabled: true
|
||||
labels:
|
||||
release: kube-prometheus-stack
|
||||
namespace: freeleaps-monitoring-system
|
||||
internal: 30s
|
||||
scrapeTimeout: ''
|
||||
interval: 30s
|
||||
scrapeTimeout: 10s
|
||||
configs:
|
||||
starrocksHost: ""
|
||||
starrocksPort: 8009
|
||||
@ -80,3 +80,69 @@ metrics:
|
||||
controlledResources:
|
||||
- cpu
|
||||
- memory
|
||||
prometheusRule:
|
||||
name: freepeals-metrics
|
||||
enabled: true
|
||||
namespace: "freeleaps-monitoring-system"
|
||||
rules:
|
||||
- alert: FreeleapsMetricsServiceDown
|
||||
expr: up{job="metrics-service"} == 0
|
||||
for: 1m
|
||||
labels:
|
||||
severity: critical
|
||||
service: metrics-service
|
||||
annotations:
|
||||
summary: "Freeleaps Metrics service is down (instance {{ $labels.instance }})"
|
||||
description: "Freeleaps Metrics service has been down for more than 1 minutes."
|
||||
runbook_url: "https://netorgft10898514.sharepoint.com/:w:/s/FreeleapsEngineeringTeam/EUlvzumTsPxCpPAzI3gm9OIB0DCLTjQzzYVL6VsHYZFjxg?e=0dxVr7"
|
||||
|
||||
- alert: FreeleapsMetricsServiceHighErrorRate
|
||||
expr: rate(http_requests_total{job="metrics-service",status=~"5.."}[5m]) > 0.1
|
||||
for: 5m
|
||||
labels:
|
||||
severity: warning
|
||||
service: metrics-service
|
||||
annotations:
|
||||
summary: "High error rate in freeleaps metrics service (instance {{ $labels.instance }})"
|
||||
description: "Freeleaps Metrics service error rate is {{ $value }} errors per second."
|
||||
runbook_url: "https://netorgft10898514.sharepoint.com/:w:/s/FreeleapsEngineeringTeam/EUlvzumTsPxCpPAzI3gm9OIB0DCLTjQzzYVL6VsHYZFjxg?e=0dxVr7"
|
||||
|
||||
# - alert: MetricsServiceHighLatency
|
||||
# expr: histogram_quantile(0.95, rate(http_request_duration_seconds_bucket{job="metrics-service"}[5m])) > 1
|
||||
# for: 5m
|
||||
# labels:
|
||||
# severity: warning
|
||||
# service: metrics-service
|
||||
# annotations:
|
||||
# summary: "High latency in metrics service (instance {{ $labels.instance }})"
|
||||
# description: "95th percentile latency is {{ $value }} seconds."
|
||||
|
||||
# - alert: MetricsServiceHighMemoryUsage
|
||||
# expr: (process_resident_memory_bytes{job="metrics-service"} / 1024 / 1024) > 512
|
||||
# for: 5m
|
||||
# labels:
|
||||
# severity: warning
|
||||
# service: metrics
|
||||
# annotations:
|
||||
# summary: "High memory usage in metrics service (instance {{ $labels.instance }})"
|
||||
# description: "Memory usage is {{ $value }} MB."
|
||||
|
||||
# - alert: MetricsServiceHighCPUUsage
|
||||
# expr: rate(process_cpu_seconds_total{job="metrics-service"}[5m]) * 100 > 80
|
||||
# for: 5m
|
||||
# labels:
|
||||
# severity: warning
|
||||
# service: metrics
|
||||
# annotations:
|
||||
# summary: "High CPU usage in metrics service (instance {{ $labels.instance }})"
|
||||
# description: "CPU usage is {{ $value }}%."
|
||||
|
||||
# - alert: MetricsServiceNoData
|
||||
# expr: absent(up{job="metrics-service"})
|
||||
# for: 5m
|
||||
# labels:
|
||||
# severity: critical
|
||||
# service: metrics
|
||||
# annotations:
|
||||
# summary: "No data from metrics service (instance {{ $labels.instance }})"
|
||||
# description: "No metrics data received from metrics service for more than 5 minutes."
|
||||
|
||||
@ -10,6 +10,27 @@ executeFreeleapsPipeline {
|
||||
executeMode = 'fully'
|
||||
commitMessageLintEnabled = false
|
||||
components = [
|
||||
[
|
||||
name: 'metrics',
|
||||
root: 'apps/metrics',
|
||||
language: 'python',
|
||||
dependenciesManager: 'pip',
|
||||
requirementsFile: 'requirements.txt',
|
||||
buildCacheEnabled: true,
|
||||
buildAgentImage: 'python:3.12-slim',
|
||||
buildArtifacts: ['.'],
|
||||
lintEnabled: false,
|
||||
sastEnabled: false,
|
||||
imageRegistry: 'docker.io',
|
||||
imageRepository: 'freeleaps',
|
||||
imageName: 'devops',
|
||||
imageBuilder: 'dind',
|
||||
dockerfilePath: 'Dockerfile',
|
||||
imageBuildRoot: '.',
|
||||
imageReleaseArchitectures: ['linux/amd64', 'linux/arm64/v8'],
|
||||
registryCredentialsId: 'freeleaps-devops-docker-hub-credentials',
|
||||
semanticReleaseEnabled: true
|
||||
],
|
||||
[
|
||||
name: 'authentication',
|
||||
root: 'apps/authentication',
|
||||
@ -135,27 +156,6 @@ executeFreeleapsPipeline {
|
||||
imageReleaseArchitectures: ['linux/amd64', 'linux/arm64/v8'],
|
||||
registryCredentialsId: 'freeleaps-devops-docker-hub-credentials',
|
||||
semanticReleaseEnabled: true
|
||||
],
|
||||
[
|
||||
name: 'metrics',
|
||||
root: 'apps/metrcis',
|
||||
language: 'python',
|
||||
dependenciesManager: 'pip',
|
||||
requirementsFile: 'requirements.txt',
|
||||
buildCacheEnabled: true,
|
||||
buildAgentImage: 'python:3.12-slim',
|
||||
buildArtifacts: ['.'],
|
||||
lintEnabled: false,
|
||||
sastEnabled: false,
|
||||
imageRegistry: 'docker.io',
|
||||
imageRepository: 'freeleaps',
|
||||
imageName: 'devops',
|
||||
imageBuilder: 'dind',
|
||||
dockerfilePath: 'Dockerfile',
|
||||
imageBuildRoot: '.',
|
||||
imageReleaseArchitectures: ['linux/amd64', 'linux/arm64/v8'],
|
||||
registryCredentialsId: 'freeleaps-devops-docker-hub-credentials',
|
||||
semanticReleaseEnabled: true
|
||||
]
|
||||
]
|
||||
}
|
||||
Loading…
Reference in New Issue
Block a user