fix: optimize resource requests and limits for RabbitMQ and update cAdvisor configuration

Signed-off-by: zhenyus <zhenyus@mathmast.com>
This commit is contained in:
zhenyus 2025-03-08 20:18:12 +08:00
parent 119c2ce149
commit 149d68874d
5 changed files with 516 additions and 24 deletions

View File

@ -0,0 +1,3 @@
# Why this?
K8s 1.24+ has removed the Docker plugin from cAdvisor. So while you can use cri-dockerd (Docker by Mirantis) to adjust the container runtime, kubelet can no longer retrieve Docker container information such as image, pod, container labels, etc. through cAdvisor.

View File

@ -0,0 +1,185 @@
apiVersion: v1
kind: ServiceAccount
metadata:
labels:
app: cadvisor
name: cadvisor
namespace: "freeleaps-monitoring-system"
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
labels:
app: cadvisor
name: cadvisor
rules:
- apiGroups:
- policy
resourceNames:
- cadvisor
resources:
- podsecuritypolicies
verbs:
- use
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
labels:
app: cadvisor
name: cadvisor
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
name: cadvisor
subjects:
- kind: ServiceAccount
name: cadvisor
namespace: "freeleaps-monitoring-system"
---
apiVersion: apps/v1
kind: DaemonSet
metadata:
annotations:
seccomp.security.alpha.kubernetes.io/pod: docker/default
labels:
app: cadvisor
name: cadvisor
namespace: "freeleaps-monitoring-system"
spec:
selector:
matchLabels:
app: cadvisor
name: cadvisor
template:
metadata:
annotations:
scheduler.alpha.kubernetes.io/critical-pod: ""
labels:
app: cadvisor
name: cadvisor
spec:
automountServiceAccountToken: false
containers:
- args:
- --housekeeping_interval=10s
- --max_housekeeping_interval=15s
- --event_storage_event_limit=default=0
- --event_storage_age_limit=default=0
- --enable_metrics=app,cpu,disk,diskIO,memory,network,process
- --docker_only
- --store_container_labels=false
- --whitelisted_container_labels=io.kubernetes.container.name,io.kubernetes.pod.name,io.kubernetes.pod.namespace
image: gcr.io/cadvisor/cadvisor:v0.45.0
name: cadvisor
ports:
- containerPort: 8080
name: http
protocol: TCP
resources:
limits:
cpu: 800m
memory: 2000Mi
requests:
cpu: 400m
memory: 400Mi
volumeMounts:
- mountPath: /rootfs
name: rootfs
readOnly: true
- mountPath: /var/run
name: var-run
readOnly: true
- mountPath: /sys
name: sys
readOnly: true
- mountPath: /var/lib/docker
name: docker
readOnly: true
- mountPath: /dev/disk
name: disk
readOnly: true
priorityClassName: system-node-critical
serviceAccountName: cadvisor
terminationGracePeriodSeconds: 30
tolerations:
- effect: NoSchedule
operator: Exists
volumes:
- hostPath:
path: /
name: rootfs
- hostPath:
path: /var/run
name: var-run
- hostPath:
path: /sys
name: sys
- hostPath:
path: /var/lib/docker
name: docker
- hostPath:
path: /dev/disk
name: disk
---
apiVersion: v1
kind: Service
metadata:
name: cadvisor
labels:
app: cadvisor
namespace: "freeleaps-monitoring-system"
spec:
selector:
app: cadvisor
ports:
- name: cadvisor
port: 8080
protocol: TCP
targetPort: 8080
---
apiVersion: monitoring.coreos.com/v1
kind: ServiceMonitor
metadata:
labels:
app: cadvisor
release: kube-prometheus-stack
name: cadvisor
namespace: "freeleaps-monitoring-system"
spec:
endpoints:
- metricRelabelings:
- sourceLabels:
- container_label_io_kubernetes_pod_name
targetLabel: pod
- sourceLabels:
- container_label_io_kubernetes_container_name
targetLabel: container
- sourceLabels:
- container_label_io_kubernetes_pod_namespace
targetLabel: namespace
- action: labeldrop
regex: container_label_io_kubernetes_pod_name
- action: labeldrop
regex: container_label_io_kubernetes_container_name
- action: labeldrop
regex: container_label_io_kubernetes_pod_namespace
port: cadvisor
relabelings:
- sourceLabels:
- __meta_kubernetes_pod_node_name
targetLabel: node
- sourceLabels:
- __metrics_path__
targetLabel: metrics_path
replacement: /metrics/cadvisor
- sourceLabels:
- job
targetLabel: job
replacement: kubelet
namespaceSelector:
matchNames:
- "freeleaps-monitoring-system"
selector:
matchLabels:
app: cadvisor

View File

@ -1389,7 +1389,7 @@ kubelet:
## Enable scraping /metrics/cadvisor from kubelet's service
##
cAdvisor: true
cAdvisor: false
## Configure the scrape interval for cAdvisor. This is configured to the default Kubelet cAdvisor
## minimum housekeeping interval in order to avoid missing samples. Note, this value is ignored
## if kubelet.serviceMonitor.interval is not empty.
@ -2179,21 +2179,21 @@ kube-state-metrics:
## MetricRelabelConfigs to apply to samples after scraping, but before ingestion.
## ref: https://github.com/prometheus-operator/prometheus-operator/blob/main/Documentation/api.md#relabelconfig
##
metricRelabelings: []
# - action: keep
# regex: 'kube_(daemonset|deployment|pod|namespace|node|statefulset).+'
# sourceLabels: [__name__]
metricRelabelings:
- action: keep
regex: 'kube_(daemonset|deployment|pod|namespace|node|statefulset).+'
sourceLabels: [__name__]
## RelabelConfigs to apply to samples before scraping
## ref: https://github.com/prometheus-operator/prometheus-operator/blob/main/Documentation/api.md#relabelconfig
##
relabelings: []
# - sourceLabels: [__meta_kubernetes_pod_node_name]
# separator: ;
# regex: ^(.*)$
# targetLabel: nodename
# replacement: $1
# action: replace
relabelings:
- sourceLabels: [__meta_kubernetes_pod_node_name]
separator: ;
regex: ^(.*)$
targetLabel: node
replacement: $1
action: replace
selfMonitor:
enabled: false
@ -2286,18 +2286,18 @@ prometheus-node-exporter:
## RelabelConfigs to apply to samples before scraping
## ref: https://github.com/prometheus-operator/prometheus-operator/blob/main/Documentation/api.md#relabelconfig
##
relabelings: []
# - sourceLabels: [__meta_kubernetes_pod_node_name]
# separator: ;
# regex: ^(.*)$
# targetLabel: nodename
# replacement: $1
# action: replace
relabelings:
- sourceLabels: [__meta_kubernetes_pod_node_name]
separator: ;
regex: ^(.*)$
targetLabel: node
replacement: $1
action: replace
## Attach node metadata to discovered targets. Requires Prometheus v2.35.0 and above.
##
# attachMetadata:
# node: false
attachMetadata:
node: true
rbac:
## If true, create PSPs for node-exporter

View File

@ -0,0 +1,304 @@
affinity: {}
topologySpreadConstraints: []
image:
repository: registry.k8s.io/prometheus-adapter/prometheus-adapter
# if not set appVersion field from Chart.yaml is used
tag: ""
pullPolicy: IfNotPresent
pullSecrets: []
# - foo
logLevel: 4
metricsRelistInterval: 1m
listenPort: 6443
nodeSelector: {}
priorityClassName: ""
## Override the release namespace (for multi-namespace deployments in combined charts)
namespaceOverride: ""
## Additional annotations to add to all resources
customAnnotations: {}
# role: custom-metrics
## Additional labels to add to all resources
customLabels: {}
# monitoring: prometheus-adapter
# Url to access prometheus
prometheus:
# Value is templated
url: http://kube-prometheus-stack-prometheus.freeleaps-monitoring-system.svc
port: 9090
path: ""
replicas: 1
# k8s 1.21 needs fsGroup to be set for non root deployments
# ref: https://github.com/kubernetes/kubernetes/issues/70679
podSecurityContext:
fsGroup: 10001
# SecurityContext of the container
# ref. https://kubernetes.io/docs/tasks/configure-pod-container/security-context
securityContext:
allowPrivilegeEscalation: false
capabilities:
drop: ["ALL"]
readOnlyRootFilesystem: true
runAsNonRoot: true
runAsUser: 10001
seccompProfile:
type: RuntimeDefault
rbac:
# Specifies whether RBAC resources should be created
create: true
# Specifies if a Cluster Role should be used for the Auth Reader
useAuthReaderClusterRole: false
externalMetrics:
resources: ["*"]
customMetrics:
resources: ["*"]
psp:
# Specifies whether PSP resources should be created
create: false
# Annotations added to the pod security policy
annotations: {}
## Ref: https://kubernetes.io/docs/concepts/policy/pod-security-policy/#apparmor
## Ref: https://kubernetes.io/docs/concepts/policy/pod-security-policy/#seccomp
## Ref: https://kubernetes.io/docs/concepts/policy/pod-security-policy/#sysctl
# If false then the user will opt out of automounting API credentials.
automountServiceAccountToken: true
serviceAccount:
# Specifies whether a service account should be created
create: true
# The name of the service account to use.
# If not set and create is true, a name is generated using the fullname template
name:
# ServiceAccount annotations.
# Use case: AWS EKS IAM roles for service accounts
# ref: https://docs.aws.amazon.com/eks/latest/userguide/specify-service-account-role.html
annotations: {}
# If false then the user will opt out of automounting API credentials.
automountServiceAccountToken: true
# Custom DNS configuration to be added to prometheus-adapter pods
dnsConfig: {}
# nameservers:
# - 1.2.3.4
# searches:
# - ns1.svc.cluster-domain.example
# - my.dns.search.suffix
# options:
# - name: ndots
# value: "2"
# - name: edns0
resources: {}
# requests:
# cpu: 100m
# memory: 128Mi
# limits:
# cpu: 100m
# memory: 128Mi
# Configure liveness probe
# https://kubernetes.io/docs/reference/kubernetes-api/workload-resources/pod-v1/#Probe
livenessProbe:
httpGet:
path: /healthz
port: https
scheme: HTTPS
initialDelaySeconds: 30
timeoutSeconds: 5
# Configure readiness probe
readinessProbe:
httpGet:
path: /healthz
port: https
scheme: HTTPS
initialDelaySeconds: 30
timeoutSeconds: 5
# Configure startup probe
# Use if prometheus-adapter takes a long time to finish startup e.g. polling a lot of API versions in cluster
startupProbe: {}
rules:
default: true
custom: []
# - seriesQuery: '{__name__=~"^some_metric_count$"}'
# resources:
# template: <<.Resource>>
# name:
# matches: ""
# as: "my_custom_metric"
# metricsQuery: sum(<<.Series>>{<<.LabelMatchers>>}) by (<<.GroupBy>>)
# Mounts a configMap with pre-generated rules for use. Overrides the
# default, custom, external and resource entries
existing:
external: []
# - seriesQuery: '{__name__=~"^some_metric_count$"}'
# resources:
# template: <<.Resource>>
# name:
# matches: ""
# as: "my_external_metric"
# metricsQuery: sum(<<.Series>>{<<.LabelMatchers>>}) by (<<.GroupBy>>)
resource:
cpu:
containerQuery: |
sum by (<<.GroupBy>>) (
rate(container_cpu_usage_seconds_total{container!="",<<.LabelMatchers>>}[3m])
)
nodeQuery: |
sum by (<<.GroupBy>>) (
rate(node_cpu_seconds_total{mode!="idle",mode!="iowait",mode!="steal",<<.LabelMatchers>>}[3m])
)
resources:
overrides:
node:
resource: node
namespace:
resource: namespace
pod:
resource: pod
containerLabel: container
memory:
containerQuery: |
sum by (<<.GroupBy>>) (
avg_over_time(container_memory_working_set_bytes{container!="",<<.LabelMatchers>>}[3m])
)
nodeQuery: |
sum by (<<.GroupBy>>) (
avg_over_time(node_memory_MemTotal_bytes{<<.LabelMatchers>>}[3m])
-
avg_over_time(node_memory_MemAvailable_bytes{<<.LabelMatchers>>}[3m])
)
resources:
overrides:
node:
resource: node
namespace:
resource: namespace
pod:
resource: pod
containerLabel: container
window: 3m
service:
annotations: {}
port: 443
type: ClusterIP
# clusterIP: 1.2.3.4
ipDualStack:
enabled: false
ipFamilies: ["IPv6", "IPv4"]
ipFamilyPolicy: "PreferDualStack"
tls:
enable: false
ca: |-
# Public CA file that signed the APIService
key: |-
# Private key of the APIService
certificate: |-
# Public key of the APIService
# Set environment variables from secrets, configmaps or by setting them as name/value
env: []
# - name: TMP_DIR
# value: /tmp
# - name: PASSWORD
# valueFrom:
# secretKeyRef:
# name: mysecret
# key: password
# optional: false
# Any extra arguments
extraArguments: []
# - --tls-private-key-file=/etc/tls/tls.key
# - --tls-cert-file=/etc/tls/tls.crt
# Additional containers to add to the pod
extraContainers: []
# Any extra volumes
extraVolumes: []
# - name: example-name
# hostPath:
# path: /path/on/host
# type: DirectoryOrCreate
# - name: ssl-certs
# hostPath:
# path: /etc/ssl/certs/ca-bundle.crt
# type: File
# Any extra volume mounts
extraVolumeMounts: []
# - name: example-name
# mountPath: /path/in/container
# - name: ssl-certs
# mountPath: /etc/ssl/certs/ca-certificates.crt
# readOnly: true
tolerations: []
# Labels added to the pod
podLabels: {}
# Annotations added to the pod
podAnnotations: {}
# Annotations added to the deployment
deploymentAnnotations: {}
hostNetwork:
# Specifies if prometheus-adapter should be started in hostNetwork mode.
#
# You would require this enabled if you use alternate overlay networking for pods and
# API server unable to communicate with metrics-server. As an example, this is required
# if you use Weave network on EKS. See also dnsPolicy
enabled: false
# When hostNetwork is enabled, you probably want to set this to ClusterFirstWithHostNet
# dnsPolicy: ClusterFirstWithHostNet
# Deployment strategy type
strategy:
type: RollingUpdate
rollingUpdate:
maxUnavailable: 25%
maxSurge: 25%
podDisruptionBudget:
# Specifies if PodDisruptionBudget should be enabled
# When enabled, minAvailable or maxUnavailable should also be defined.
enabled: false
minAvailable:
maxUnavailable: 1
certManager:
enabled: false
caCertDuration: 43800h0m0s
certDuration: 8760h0m0s
# -- Set the revisionHistoryLimit on the Certificates. See
# https://cert-manager.io/docs/reference/api-docs/#cert-manager.io/v1.CertificateSpec
# Defaults to nil.
caCertRevisionHistoryLimit:
certRevisionHistoryLimit:

View File

@ -826,10 +826,10 @@ containerSecurityContext:
##
resources:
requests:
cpu: "500m"
memory: "1Gi"
cpu: "250m"
memory: "256Mi"
limits:
cpu: "1000m"
cpu: "500m"
memory: "1Gi"