fix: optimize resource requests and limits for RabbitMQ and update cAdvisor configuration
Signed-off-by: zhenyus <zhenyus@mathmast.com>
This commit is contained in:
parent
119c2ce149
commit
149d68874d
@ -0,0 +1,3 @@
|
||||
# Why this?
|
||||
|
||||
K8s 1.24+ has removed the Docker plugin from cAdvisor. So while you can use cri-dockerd (Docker by Mirantis) to adjust the container runtime, kubelet can no longer retrieve Docker container information such as image, pod, container labels, etc. through cAdvisor.
|
||||
@ -0,0 +1,185 @@
|
||||
apiVersion: v1
|
||||
kind: ServiceAccount
|
||||
metadata:
|
||||
labels:
|
||||
app: cadvisor
|
||||
name: cadvisor
|
||||
namespace: "freeleaps-monitoring-system"
|
||||
---
|
||||
apiVersion: rbac.authorization.k8s.io/v1
|
||||
kind: ClusterRole
|
||||
metadata:
|
||||
labels:
|
||||
app: cadvisor
|
||||
name: cadvisor
|
||||
rules:
|
||||
- apiGroups:
|
||||
- policy
|
||||
resourceNames:
|
||||
- cadvisor
|
||||
resources:
|
||||
- podsecuritypolicies
|
||||
verbs:
|
||||
- use
|
||||
---
|
||||
apiVersion: rbac.authorization.k8s.io/v1
|
||||
kind: ClusterRoleBinding
|
||||
metadata:
|
||||
labels:
|
||||
app: cadvisor
|
||||
name: cadvisor
|
||||
roleRef:
|
||||
apiGroup: rbac.authorization.k8s.io
|
||||
kind: ClusterRole
|
||||
name: cadvisor
|
||||
subjects:
|
||||
- kind: ServiceAccount
|
||||
name: cadvisor
|
||||
namespace: "freeleaps-monitoring-system"
|
||||
---
|
||||
apiVersion: apps/v1
|
||||
kind: DaemonSet
|
||||
metadata:
|
||||
annotations:
|
||||
seccomp.security.alpha.kubernetes.io/pod: docker/default
|
||||
labels:
|
||||
app: cadvisor
|
||||
name: cadvisor
|
||||
namespace: "freeleaps-monitoring-system"
|
||||
spec:
|
||||
selector:
|
||||
matchLabels:
|
||||
app: cadvisor
|
||||
name: cadvisor
|
||||
template:
|
||||
metadata:
|
||||
annotations:
|
||||
scheduler.alpha.kubernetes.io/critical-pod: ""
|
||||
labels:
|
||||
app: cadvisor
|
||||
name: cadvisor
|
||||
spec:
|
||||
automountServiceAccountToken: false
|
||||
containers:
|
||||
- args:
|
||||
- --housekeeping_interval=10s
|
||||
- --max_housekeeping_interval=15s
|
||||
- --event_storage_event_limit=default=0
|
||||
- --event_storage_age_limit=default=0
|
||||
- --enable_metrics=app,cpu,disk,diskIO,memory,network,process
|
||||
- --docker_only
|
||||
- --store_container_labels=false
|
||||
- --whitelisted_container_labels=io.kubernetes.container.name,io.kubernetes.pod.name,io.kubernetes.pod.namespace
|
||||
image: gcr.io/cadvisor/cadvisor:v0.45.0
|
||||
name: cadvisor
|
||||
ports:
|
||||
- containerPort: 8080
|
||||
name: http
|
||||
protocol: TCP
|
||||
resources:
|
||||
limits:
|
||||
cpu: 800m
|
||||
memory: 2000Mi
|
||||
requests:
|
||||
cpu: 400m
|
||||
memory: 400Mi
|
||||
volumeMounts:
|
||||
- mountPath: /rootfs
|
||||
name: rootfs
|
||||
readOnly: true
|
||||
- mountPath: /var/run
|
||||
name: var-run
|
||||
readOnly: true
|
||||
- mountPath: /sys
|
||||
name: sys
|
||||
readOnly: true
|
||||
- mountPath: /var/lib/docker
|
||||
name: docker
|
||||
readOnly: true
|
||||
- mountPath: /dev/disk
|
||||
name: disk
|
||||
readOnly: true
|
||||
priorityClassName: system-node-critical
|
||||
serviceAccountName: cadvisor
|
||||
terminationGracePeriodSeconds: 30
|
||||
tolerations:
|
||||
- effect: NoSchedule
|
||||
operator: Exists
|
||||
volumes:
|
||||
- hostPath:
|
||||
path: /
|
||||
name: rootfs
|
||||
- hostPath:
|
||||
path: /var/run
|
||||
name: var-run
|
||||
- hostPath:
|
||||
path: /sys
|
||||
name: sys
|
||||
- hostPath:
|
||||
path: /var/lib/docker
|
||||
name: docker
|
||||
- hostPath:
|
||||
path: /dev/disk
|
||||
name: disk
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: cadvisor
|
||||
labels:
|
||||
app: cadvisor
|
||||
namespace: "freeleaps-monitoring-system"
|
||||
spec:
|
||||
selector:
|
||||
app: cadvisor
|
||||
ports:
|
||||
- name: cadvisor
|
||||
port: 8080
|
||||
protocol: TCP
|
||||
targetPort: 8080
|
||||
---
|
||||
apiVersion: monitoring.coreos.com/v1
|
||||
kind: ServiceMonitor
|
||||
metadata:
|
||||
labels:
|
||||
app: cadvisor
|
||||
release: kube-prometheus-stack
|
||||
name: cadvisor
|
||||
namespace: "freeleaps-monitoring-system"
|
||||
spec:
|
||||
endpoints:
|
||||
- metricRelabelings:
|
||||
- sourceLabels:
|
||||
- container_label_io_kubernetes_pod_name
|
||||
targetLabel: pod
|
||||
- sourceLabels:
|
||||
- container_label_io_kubernetes_container_name
|
||||
targetLabel: container
|
||||
- sourceLabels:
|
||||
- container_label_io_kubernetes_pod_namespace
|
||||
targetLabel: namespace
|
||||
- action: labeldrop
|
||||
regex: container_label_io_kubernetes_pod_name
|
||||
- action: labeldrop
|
||||
regex: container_label_io_kubernetes_container_name
|
||||
- action: labeldrop
|
||||
regex: container_label_io_kubernetes_pod_namespace
|
||||
port: cadvisor
|
||||
relabelings:
|
||||
- sourceLabels:
|
||||
- __meta_kubernetes_pod_node_name
|
||||
targetLabel: node
|
||||
- sourceLabels:
|
||||
- __metrics_path__
|
||||
targetLabel: metrics_path
|
||||
replacement: /metrics/cadvisor
|
||||
- sourceLabels:
|
||||
- job
|
||||
targetLabel: job
|
||||
replacement: kubelet
|
||||
namespaceSelector:
|
||||
matchNames:
|
||||
- "freeleaps-monitoring-system"
|
||||
selector:
|
||||
matchLabels:
|
||||
app: cadvisor
|
||||
@ -1389,7 +1389,7 @@ kubelet:
|
||||
|
||||
## Enable scraping /metrics/cadvisor from kubelet's service
|
||||
##
|
||||
cAdvisor: true
|
||||
cAdvisor: false
|
||||
## Configure the scrape interval for cAdvisor. This is configured to the default Kubelet cAdvisor
|
||||
## minimum housekeeping interval in order to avoid missing samples. Note, this value is ignored
|
||||
## if kubelet.serviceMonitor.interval is not empty.
|
||||
@ -2179,21 +2179,21 @@ kube-state-metrics:
|
||||
## MetricRelabelConfigs to apply to samples after scraping, but before ingestion.
|
||||
## ref: https://github.com/prometheus-operator/prometheus-operator/blob/main/Documentation/api.md#relabelconfig
|
||||
##
|
||||
metricRelabelings: []
|
||||
# - action: keep
|
||||
# regex: 'kube_(daemonset|deployment|pod|namespace|node|statefulset).+'
|
||||
# sourceLabels: [__name__]
|
||||
metricRelabelings:
|
||||
- action: keep
|
||||
regex: 'kube_(daemonset|deployment|pod|namespace|node|statefulset).+'
|
||||
sourceLabels: [__name__]
|
||||
|
||||
## RelabelConfigs to apply to samples before scraping
|
||||
## ref: https://github.com/prometheus-operator/prometheus-operator/blob/main/Documentation/api.md#relabelconfig
|
||||
##
|
||||
relabelings: []
|
||||
# - sourceLabels: [__meta_kubernetes_pod_node_name]
|
||||
# separator: ;
|
||||
# regex: ^(.*)$
|
||||
# targetLabel: nodename
|
||||
# replacement: $1
|
||||
# action: replace
|
||||
relabelings:
|
||||
- sourceLabels: [__meta_kubernetes_pod_node_name]
|
||||
separator: ;
|
||||
regex: ^(.*)$
|
||||
targetLabel: node
|
||||
replacement: $1
|
||||
action: replace
|
||||
|
||||
selfMonitor:
|
||||
enabled: false
|
||||
@ -2286,18 +2286,18 @@ prometheus-node-exporter:
|
||||
## RelabelConfigs to apply to samples before scraping
|
||||
## ref: https://github.com/prometheus-operator/prometheus-operator/blob/main/Documentation/api.md#relabelconfig
|
||||
##
|
||||
relabelings: []
|
||||
# - sourceLabels: [__meta_kubernetes_pod_node_name]
|
||||
# separator: ;
|
||||
# regex: ^(.*)$
|
||||
# targetLabel: nodename
|
||||
# replacement: $1
|
||||
# action: replace
|
||||
relabelings:
|
||||
- sourceLabels: [__meta_kubernetes_pod_node_name]
|
||||
separator: ;
|
||||
regex: ^(.*)$
|
||||
targetLabel: node
|
||||
replacement: $1
|
||||
action: replace
|
||||
|
||||
## Attach node metadata to discovered targets. Requires Prometheus v2.35.0 and above.
|
||||
##
|
||||
# attachMetadata:
|
||||
# node: false
|
||||
attachMetadata:
|
||||
node: true
|
||||
|
||||
rbac:
|
||||
## If true, create PSPs for node-exporter
|
||||
|
||||
@ -0,0 +1,304 @@
|
||||
affinity: {}
|
||||
|
||||
topologySpreadConstraints: []
|
||||
|
||||
image:
|
||||
repository: registry.k8s.io/prometheus-adapter/prometheus-adapter
|
||||
# if not set appVersion field from Chart.yaml is used
|
||||
tag: ""
|
||||
pullPolicy: IfNotPresent
|
||||
pullSecrets: []
|
||||
# - foo
|
||||
|
||||
logLevel: 4
|
||||
|
||||
metricsRelistInterval: 1m
|
||||
|
||||
listenPort: 6443
|
||||
|
||||
nodeSelector: {}
|
||||
|
||||
priorityClassName: ""
|
||||
|
||||
## Override the release namespace (for multi-namespace deployments in combined charts)
|
||||
namespaceOverride: ""
|
||||
|
||||
## Additional annotations to add to all resources
|
||||
customAnnotations: {}
|
||||
# role: custom-metrics
|
||||
|
||||
## Additional labels to add to all resources
|
||||
customLabels: {}
|
||||
# monitoring: prometheus-adapter
|
||||
|
||||
# Url to access prometheus
|
||||
prometheus:
|
||||
# Value is templated
|
||||
url: http://kube-prometheus-stack-prometheus.freeleaps-monitoring-system.svc
|
||||
port: 9090
|
||||
path: ""
|
||||
|
||||
replicas: 1
|
||||
|
||||
# k8s 1.21 needs fsGroup to be set for non root deployments
|
||||
# ref: https://github.com/kubernetes/kubernetes/issues/70679
|
||||
podSecurityContext:
|
||||
fsGroup: 10001
|
||||
|
||||
# SecurityContext of the container
|
||||
# ref. https://kubernetes.io/docs/tasks/configure-pod-container/security-context
|
||||
securityContext:
|
||||
allowPrivilegeEscalation: false
|
||||
capabilities:
|
||||
drop: ["ALL"]
|
||||
readOnlyRootFilesystem: true
|
||||
runAsNonRoot: true
|
||||
runAsUser: 10001
|
||||
seccompProfile:
|
||||
type: RuntimeDefault
|
||||
|
||||
rbac:
|
||||
# Specifies whether RBAC resources should be created
|
||||
create: true
|
||||
# Specifies if a Cluster Role should be used for the Auth Reader
|
||||
useAuthReaderClusterRole: false
|
||||
externalMetrics:
|
||||
resources: ["*"]
|
||||
customMetrics:
|
||||
resources: ["*"]
|
||||
|
||||
psp:
|
||||
# Specifies whether PSP resources should be created
|
||||
create: false
|
||||
# Annotations added to the pod security policy
|
||||
annotations: {}
|
||||
## Ref: https://kubernetes.io/docs/concepts/policy/pod-security-policy/#apparmor
|
||||
## Ref: https://kubernetes.io/docs/concepts/policy/pod-security-policy/#seccomp
|
||||
## Ref: https://kubernetes.io/docs/concepts/policy/pod-security-policy/#sysctl
|
||||
|
||||
# If false then the user will opt out of automounting API credentials.
|
||||
automountServiceAccountToken: true
|
||||
|
||||
serviceAccount:
|
||||
# Specifies whether a service account should be created
|
||||
create: true
|
||||
# The name of the service account to use.
|
||||
# If not set and create is true, a name is generated using the fullname template
|
||||
name:
|
||||
# ServiceAccount annotations.
|
||||
# Use case: AWS EKS IAM roles for service accounts
|
||||
# ref: https://docs.aws.amazon.com/eks/latest/userguide/specify-service-account-role.html
|
||||
annotations: {}
|
||||
# If false then the user will opt out of automounting API credentials.
|
||||
automountServiceAccountToken: true
|
||||
|
||||
# Custom DNS configuration to be added to prometheus-adapter pods
|
||||
dnsConfig: {}
|
||||
# nameservers:
|
||||
# - 1.2.3.4
|
||||
# searches:
|
||||
# - ns1.svc.cluster-domain.example
|
||||
# - my.dns.search.suffix
|
||||
# options:
|
||||
# - name: ndots
|
||||
# value: "2"
|
||||
# - name: edns0
|
||||
|
||||
resources: {}
|
||||
# requests:
|
||||
# cpu: 100m
|
||||
# memory: 128Mi
|
||||
# limits:
|
||||
# cpu: 100m
|
||||
# memory: 128Mi
|
||||
|
||||
# Configure liveness probe
|
||||
# https://kubernetes.io/docs/reference/kubernetes-api/workload-resources/pod-v1/#Probe
|
||||
livenessProbe:
|
||||
httpGet:
|
||||
path: /healthz
|
||||
port: https
|
||||
scheme: HTTPS
|
||||
initialDelaySeconds: 30
|
||||
timeoutSeconds: 5
|
||||
|
||||
# Configure readiness probe
|
||||
readinessProbe:
|
||||
httpGet:
|
||||
path: /healthz
|
||||
port: https
|
||||
scheme: HTTPS
|
||||
initialDelaySeconds: 30
|
||||
timeoutSeconds: 5
|
||||
|
||||
# Configure startup probe
|
||||
# Use if prometheus-adapter takes a long time to finish startup e.g. polling a lot of API versions in cluster
|
||||
startupProbe: {}
|
||||
|
||||
rules:
|
||||
default: true
|
||||
|
||||
custom: []
|
||||
# - seriesQuery: '{__name__=~"^some_metric_count$"}'
|
||||
# resources:
|
||||
# template: <<.Resource>>
|
||||
# name:
|
||||
# matches: ""
|
||||
# as: "my_custom_metric"
|
||||
# metricsQuery: sum(<<.Series>>{<<.LabelMatchers>>}) by (<<.GroupBy>>)
|
||||
|
||||
# Mounts a configMap with pre-generated rules for use. Overrides the
|
||||
# default, custom, external and resource entries
|
||||
existing:
|
||||
|
||||
external: []
|
||||
# - seriesQuery: '{__name__=~"^some_metric_count$"}'
|
||||
# resources:
|
||||
# template: <<.Resource>>
|
||||
# name:
|
||||
# matches: ""
|
||||
# as: "my_external_metric"
|
||||
# metricsQuery: sum(<<.Series>>{<<.LabelMatchers>>}) by (<<.GroupBy>>)
|
||||
|
||||
resource:
|
||||
cpu:
|
||||
containerQuery: |
|
||||
sum by (<<.GroupBy>>) (
|
||||
rate(container_cpu_usage_seconds_total{container!="",<<.LabelMatchers>>}[3m])
|
||||
)
|
||||
nodeQuery: |
|
||||
sum by (<<.GroupBy>>) (
|
||||
rate(node_cpu_seconds_total{mode!="idle",mode!="iowait",mode!="steal",<<.LabelMatchers>>}[3m])
|
||||
)
|
||||
resources:
|
||||
overrides:
|
||||
node:
|
||||
resource: node
|
||||
namespace:
|
||||
resource: namespace
|
||||
pod:
|
||||
resource: pod
|
||||
containerLabel: container
|
||||
memory:
|
||||
containerQuery: |
|
||||
sum by (<<.GroupBy>>) (
|
||||
avg_over_time(container_memory_working_set_bytes{container!="",<<.LabelMatchers>>}[3m])
|
||||
)
|
||||
nodeQuery: |
|
||||
sum by (<<.GroupBy>>) (
|
||||
avg_over_time(node_memory_MemTotal_bytes{<<.LabelMatchers>>}[3m])
|
||||
-
|
||||
avg_over_time(node_memory_MemAvailable_bytes{<<.LabelMatchers>>}[3m])
|
||||
)
|
||||
resources:
|
||||
overrides:
|
||||
node:
|
||||
resource: node
|
||||
namespace:
|
||||
resource: namespace
|
||||
pod:
|
||||
resource: pod
|
||||
containerLabel: container
|
||||
window: 3m
|
||||
|
||||
service:
|
||||
annotations: {}
|
||||
port: 443
|
||||
type: ClusterIP
|
||||
# clusterIP: 1.2.3.4
|
||||
ipDualStack:
|
||||
enabled: false
|
||||
ipFamilies: ["IPv6", "IPv4"]
|
||||
ipFamilyPolicy: "PreferDualStack"
|
||||
tls:
|
||||
enable: false
|
||||
ca: |-
|
||||
# Public CA file that signed the APIService
|
||||
key: |-
|
||||
# Private key of the APIService
|
||||
certificate: |-
|
||||
# Public key of the APIService
|
||||
|
||||
# Set environment variables from secrets, configmaps or by setting them as name/value
|
||||
env: []
|
||||
# - name: TMP_DIR
|
||||
# value: /tmp
|
||||
# - name: PASSWORD
|
||||
# valueFrom:
|
||||
# secretKeyRef:
|
||||
# name: mysecret
|
||||
# key: password
|
||||
# optional: false
|
||||
|
||||
# Any extra arguments
|
||||
extraArguments: []
|
||||
# - --tls-private-key-file=/etc/tls/tls.key
|
||||
# - --tls-cert-file=/etc/tls/tls.crt
|
||||
|
||||
# Additional containers to add to the pod
|
||||
extraContainers: []
|
||||
|
||||
# Any extra volumes
|
||||
extraVolumes: []
|
||||
# - name: example-name
|
||||
# hostPath:
|
||||
# path: /path/on/host
|
||||
# type: DirectoryOrCreate
|
||||
# - name: ssl-certs
|
||||
# hostPath:
|
||||
# path: /etc/ssl/certs/ca-bundle.crt
|
||||
# type: File
|
||||
|
||||
# Any extra volume mounts
|
||||
extraVolumeMounts: []
|
||||
# - name: example-name
|
||||
# mountPath: /path/in/container
|
||||
# - name: ssl-certs
|
||||
# mountPath: /etc/ssl/certs/ca-certificates.crt
|
||||
# readOnly: true
|
||||
|
||||
tolerations: []
|
||||
|
||||
# Labels added to the pod
|
||||
podLabels: {}
|
||||
|
||||
# Annotations added to the pod
|
||||
podAnnotations: {}
|
||||
|
||||
# Annotations added to the deployment
|
||||
deploymentAnnotations: {}
|
||||
|
||||
hostNetwork:
|
||||
# Specifies if prometheus-adapter should be started in hostNetwork mode.
|
||||
#
|
||||
# You would require this enabled if you use alternate overlay networking for pods and
|
||||
# API server unable to communicate with metrics-server. As an example, this is required
|
||||
# if you use Weave network on EKS. See also dnsPolicy
|
||||
enabled: false
|
||||
|
||||
# When hostNetwork is enabled, you probably want to set this to ClusterFirstWithHostNet
|
||||
# dnsPolicy: ClusterFirstWithHostNet
|
||||
|
||||
# Deployment strategy type
|
||||
strategy:
|
||||
type: RollingUpdate
|
||||
rollingUpdate:
|
||||
maxUnavailable: 25%
|
||||
maxSurge: 25%
|
||||
|
||||
podDisruptionBudget:
|
||||
# Specifies if PodDisruptionBudget should be enabled
|
||||
# When enabled, minAvailable or maxUnavailable should also be defined.
|
||||
enabled: false
|
||||
minAvailable:
|
||||
maxUnavailable: 1
|
||||
|
||||
certManager:
|
||||
enabled: false
|
||||
caCertDuration: 43800h0m0s
|
||||
certDuration: 8760h0m0s
|
||||
# -- Set the revisionHistoryLimit on the Certificates. See
|
||||
# https://cert-manager.io/docs/reference/api-docs/#cert-manager.io/v1.CertificateSpec
|
||||
# Defaults to nil.
|
||||
caCertRevisionHistoryLimit:
|
||||
certRevisionHistoryLimit:
|
||||
@ -826,10 +826,10 @@ containerSecurityContext:
|
||||
##
|
||||
resources:
|
||||
requests:
|
||||
cpu: "500m"
|
||||
memory: "1Gi"
|
||||
cpu: "250m"
|
||||
memory: "256Mi"
|
||||
limits:
|
||||
cpu: "1000m"
|
||||
cpu: "500m"
|
||||
memory: "1Gi"
|
||||
|
||||
|
||||
|
||||
Loading…
Reference in New Issue
Block a user