diff --git a/infra/argocd/templates/kube-prometheus-stack.yaml b/infra/argocd/templates/kube-prometheus-stack.yaml new file mode 100644 index 0000000..d89dad1 --- /dev/null +++ b/infra/argocd/templates/kube-prometheus-stack.yaml @@ -0,0 +1,20 @@ +apiVersion: argoproj.io/v1alpha1 +kind: Application +metadata: + name: kube-prometheus-stack + namespace: argocd +spec: + project: default + source: + repoURL: https://gitlab.com/developerdurp/homelab.git + targetRevision: main + path: infra/kube-prometheus-stack + destination: + namespace: kube-prometheus-stack + name: in-cluster + syncPolicy: + automated: + prune: true + selfHeal: true + syncOptions: + - CreateNamespace=true diff --git a/infra/kube-prometheus-stack/Chart.yaml b/infra/kube-prometheus-stack/Chart.yaml new file mode 100644 index 0000000..d2de063 --- /dev/null +++ b/infra/kube-prometheus-stack/Chart.yaml @@ -0,0 +1,12 @@ +apiVersion: v2 +name: kube-prometheus-stack +description: A Helm chart for Kubernetes +type: application + +version: 0.1.0 +appVersion: "1.16.0" + +dependencies: + - name: kube-prometheus-stack + repository: https://prometheus-community.github.io/helm-charts + version: 77.10.0 diff --git a/infra/kube-prometheus-stack/templates/grafana-secrets-sealed.yaml b/infra/kube-prometheus-stack/templates/grafana-secrets-sealed.yaml new file mode 100644 index 0000000..7f01c1e --- /dev/null +++ b/infra/kube-prometheus-stack/templates/grafana-secrets-sealed.yaml @@ -0,0 +1,41 @@ +apiVersion: external-secrets.io/v1 +kind: ExternalSecret +metadata: + name: vault-grafana-oauth +spec: + secretStoreRef: + name: vault + kind: ClusterSecretStore + target: + name: grafana-oauth + data: + - secretKey: GF_AUTH_GENERIC_OAUTH_CLIENT_ID + remoteRef: + key: secrets/kube-prometheus/grafana/oauth + property: GF_AUTH_GENERIC_OAUTH_CLIENT_ID + - secretKey: GF_AUTH_GENERIC_OAUTH_CLIENT_SECRET + remoteRef: + key: secrets/kube-prometheus/grafana/oauth + property: GF_AUTH_GENERIC_OAUTH_CLIENT_SECRET + +--- + +apiVersion: external-secrets.io/v1 +kind: ExternalSecret +metadata: + name: vault-admin-credentials +spec: + secretStoreRef: + name: vault + kind: ClusterSecretStore + target: + name: grafana-admin-credentials + data: + - secretKey: admin-password + remoteRef: + key: secrets/kube-prometheus/grafana/admin + property: admin-password + - secretKey: admin-user + remoteRef: + key: secrets/kube-prometheus/grafana/admin + property: admin-user diff --git a/infra/kube-prometheus-stack/templates/ingress.yaml b/infra/kube-prometheus-stack/templates/ingress.yaml new file mode 100644 index 0000000..caf0ee1 --- /dev/null +++ b/infra/kube-prometheus-stack/templates/ingress.yaml @@ -0,0 +1,80 @@ +apiVersion: traefik.containo.us/v1alpha1 +kind: IngressRoute +metadata: + name: grafana-ingress +spec: + entryPoints: + - websecure + routes: + - match: Host(`grafana.durp.info`) && PathPrefix(`/`) + kind: Rule + services: + - name: grafana + port: 80 + tls: + secretName: grafana-tls + +--- + +apiVersion: cert-manager.io/v1 +kind: Certificate +metadata: + name: grafana-tls +spec: + secretName: grafana-tls + issuerRef: + name: letsencrypt-production + kind: ClusterIssuer + commonName: "grafana.durp.info" + dnsNames: + - "grafana.durp.info" + +--- + +apiVersion: traefik.containo.us/v1alpha1 +kind: IngressRoute +metadata: + name: alertmanager-ingress +spec: + entryPoints: + - websecure + routes: + - match: Host(`alertmanager.durp.info`) && PathPrefix(`/`) + middlewares: + - name: whitelist + namespace: traefik + - name: authentik-proxy-provider + namespace: traefik + kind: Rule + services: + - name: prometheus-alertmanager + port: 9093 + tls: + secretName: alertmanager-tls + +--- + +apiVersion: cert-manager.io/v1 +kind: Certificate +metadata: + name: alertmanager-tls +spec: + secretName: alertmanager-tls + issuerRef: + name: letsencrypt-production + kind: ClusterIssuer + commonName: "alertmanager.durp.info" + dnsNames: + - "alertmanager.durp.info" + +--- + +kind: Service +apiVersion: v1 +metadata: + name: grafana-external-dns + annotations: + external-dns.alpha.kubernetes.io/hostname: grafana.durp.info +spec: + type: ExternalName + externalName: durp.info \ No newline at end of file diff --git a/infra/kube-prometheus-stack/values.yaml b/infra/kube-prometheus-stack/values.yaml new file mode 100644 index 0000000..df68eba --- /dev/null +++ b/infra/kube-prometheus-stack/values.yaml @@ -0,0 +1,203 @@ +kube-prometheus-stack: + fullnameOverride: prometheus + + defaultRules: + create: true + rules: + alertmanager: true + etcd: true + configReloaders: true + general: true + k8s: true + kubeApiserverAvailability: true + kubeApiserverBurnrate: true + kubeApiserverHistogram: true + kubeApiserverSlos: true + kubelet: true + kubeProxy: true + kubePrometheusGeneral: true + kubePrometheusNodeRecording: true + kubernetesApps: true + kubernetesResources: true + kubernetesStorage: true + kubernetesSystem: true + kubeScheduler: true + kubeStateMetrics: true + network: true + node: true + nodeExporterAlerting: true + nodeExporterRecording: true + prometheus: true + prometheusOperator: true + + alertmanager: + fullnameOverride: alertmanager + enabled: true + ingress: + enabled: false + grafana: + enabled: true + fullnameOverride: grafana + forceDeployDatasources: false + forceDeployDashboards: false + defaultDashboardsEnabled: true + defaultDashboardsTimezone: utc + plugins: + - grafana-polystat-panel + serviceMonitor: + enabled: true + admin: + existingSecret: grafana-admin-credentials + userKey: admin-user + passwordKey: admin-password + ingress: + enabled: false + grafana.ini: + server: + root_url: https://grafana.durp.info + auth.generic_oauth: + enabled: true + scopes: openid profile email + auth_url: https://authentik.durp.info/application/o/authorize/ + token_url: https://authentik.durp.info/application/o/token/ + api_url: https://authentik.durp.info/application/o/userinfo/ + envFromSecret: "grafana-oauth" + + kubeApiServer: + enabled: true + + kubelet: + enabled: true + serviceMonitor: + metricRelabelings: + - action: replace + sourceLabels: + - node + targetLabel: instance + + kubeControllerManager: + enabled: true + endpoints: # ips of servers + - 192.168.12.11 + - 192.168.12.12 + - 192.168.12.13 + + coreDns: + enabled: false + + kubeDns: + enabled: false + + kubeEtcd: + enabled: true + endpoints: # ips of servers + - 192.168.12.11 + - 192.168.12.12 + - 192.168.12.13 + service: + enabled: true + port: 2381 + targetPort: 2381 + + kubeScheduler: + enabled: true + endpoints: # ips of servers + - 192.168.12.11 + - 192.168.12.12 + - 192.168.12.13 + + kubeProxy: + enabled: true + endpoints: # ips of servers + - 192.168.12.11 + - 192.168.12.12 + - 192.168.12.13 + + kubeStateMetrics: + enabled: true + + kube-state-metrics: + fullnameOverride: kube-state-metrics + selfMonitor: + enabled: true + prometheus: + monitor: + enabled: true + relabelings: + - action: replace + regex: (.*) + replacement: $1 + sourceLabels: + - __meta_kubernetes_pod_node_name + targetLabel: kubernetes_node + + nodeExporter: + enabled: true + serviceMonitor: + relabelings: + - action: replace + regex: (.*) + replacement: $1 + sourceLabels: + - __meta_kubernetes_pod_node_name + targetLabel: kubernetes_node + + prometheus-node-exporter: + fullnameOverride: node-exporter + podLabels: + jobLabel: node-exporter + extraArgs: + - --collector.filesystem.mount-points-exclude=^/(dev|proc|sys|var/lib/docker/.+|var/lib/kubelet/.+)($|/) + - --collector.filesystem.fs-types-exclude=^(autofs|binfmt_misc|bpf|cgroup2?|configfs|debugfs|devpts|devtmpfs|fusectl|hugetlbfs|iso9660|mqueue|nsfs|overlay|proc|procfs|pstore|rpc_pipefs|securityfs|selinuxfs|squashfs|sysfs|tracefs)$ + service: + portName: http-metrics + prometheus: + monitor: + enabled: true + relabelings: + - action: replace + regex: (.*) + replacement: $1 + sourceLabels: + - __meta_kubernetes_pod_node_name + targetLabel: kubernetes_node + resources: + requests: + memory: 512Mi + cpu: 250m + limits: + memory: 2048Mi + + prometheusOperator: + enabled: true + prometheusConfigReloader: + resources: + requests: + cpu: 200m + memory: 50Mi + limits: + memory: 100Mi + + prometheus: + enabled: true + prometheusSpec: + replicas: 1 + replicaExternalLabelName: "replica" + ruleSelectorNilUsesHelmValues: false + serviceMonitorSelectorNilUsesHelmValues: false + podMonitorSelectorNilUsesHelmValues: false + probeSelectorNilUsesHelmValues: false + retention: 6h + enableAdminAPI: true + walCompression: true + storageSpec: + volumeClaimTemplate: + spec: + storageClassName: longhorn + accessModes: ["ReadWriteMany"] + resources: + requests: + storage: 20Gi + + thanosRuler: + enabled: false