1、alertmanager告警插件部署

[root@VM_0_48_centos prometheus]# cat  alertmanager-pvc.yaml
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
name: alertmanager
namespace: kube-system
labels:
kubernetes.io/cluster-service: "true"
addonmanager.kubernetes.io/mode: EnsureExists
spec:
storageClassName: managed-nfs-storage #必须动态PV,需要改为自己的名字
accessModes:
- ReadWriteOnce
resources:
requests:

[root@VM_0_48_centos prometheus]# cat alertmanager-deployment.yaml
apiVersion: apps/v1
kind: Deployment
metadata:
name: alertmanager
namespace: kube-system
labels:
k8s-app: alertmanager
kubernetes.io/cluster-service: "true"
addonmanager.kubernetes.io/mode: Reconcile
version: v0.14.0
spec:
replicas: 1
selector:
matchLabels:
k8s-app: alertmanager
version: v0.14.0
template:
metadata:
labels:
k8s-app: alertmanager
version: v0.14.0
annotations:
scheduler.alpha.kubernetes.io/critical-pod: ''
spec:
priorityClassName: system-cluster-critical
containers:
- name: prometheus-alertmanager
image: "prom/alertmanager:v0.14.0"
imagePullPolicy: "IfNotPresent"
args:
- --config.file=/etc/config/alertmanager.yml
- --storage.path=/data
- --web.external-url=/
ports:
- containerPort: 9093
readinessProbe:
httpGet:
path: /#/status
port: 9093
initialDelaySeconds: 30
timeoutSeconds: 30
volumeMounts:
- name: config-volume
mountPath: /etc/config
- name: storage-volume
mountPath: "/data"
subPath: ""
resources:
limits:
cpu: 10m
memory: 50Mi
requests:
cpu: 10m
memory: 50Mi
- name: prometheus-alertmanager-configmap-reload
image: "jimmidyson/configmap-reload:v0.1"
imagePullPolicy: "IfNotPresent"
args:
- --volume-dir=/etc/config
- --webhook-url=http://localhost:9093/-/reload
volumeMounts:
- name: config-volume
mountPath: /etc/config
readOnly: true
resources:
limits:
cpu: 10m
memory: 10Mi
requests:
cpu: 10m
memory: 10Mi
volumes:
- name: config-volume
configMap:
name: alertmanager-config
- name: storage-volume
persistentVolumeClaim:
claimName: alertmanager

[root@VM_0_48_centos prometheus]# cat alertmanager-service.yaml
apiVersion: v1
kind: Service
metadata:
name: alertmanager
namespace: kube-system
labels:
kubernetes.io/cluster-service: "true"
addonmanager.kubernetes.io/mode: Reconcile
kubernetes.io/name: "Alertmanager"
spec:
ports:
- name: http
port: 80
protocol: TCP
targetPort: 9093
selector:
k8s-app: alertmanager
type: "ClusterIP"

2、prometheus 查看服务是否正常

查看alertmanager服务是否正常
[root@VM_0_48_centos prometheus]# kubectl get svc -n kube-system
NAME TYPE CLUSTER-IP EXTERNAL-IP PORT(S) AGE
alertmanager ClusterIP 10.0.0.176 <none> 80/TCP 46h

 

3、 prometheus 配置告警规则和调用altermanager

[root@VM_0_48_centos prometheus]# cat  prometheus-configmap.yaml
# Prometheus configuration format https://prometheus.io/docs/prometheus/latest/configuration/configuration/
apiVersion: v1
kind: ConfigMap
metadata:
name: prometheus-config
namespace: kube-system
labels:
kubernetes.io/cluster-service: "true"
addonmanager.kubernetes.io/mode: EnsureExists
data:
prometheus.yml: |
scrape_configs:
- job_name: prometheus
static_configs:
- targets:
- localhost:9090 - job_name: kubernetes-apiservers
kubernetes_sd_configs:
- role: endpoints
relabel_configs:
- action: keep
regex: default;kubernetes;https
source_labels:
- __meta_kubernetes_namespace
- __meta_kubernetes_service_name
- __meta_kubernetes_endpoint_port_name
scheme: https
tls_config:
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
insecure_skip_verify: true
bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token - job_name: kubernetes-nodes-kubelet
kubernetes_sd_configs:
- role: node
relabel_configs:
- action: labelmap
regex: __meta_kubernetes_node_label_(.+)
scheme: https
tls_config:
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
insecure_skip_verify: true
bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token - job_name: kubernetes-nodes-cadvisor
kubernetes_sd_configs:
- role: node
relabel_configs:
- action: labelmap
regex: __meta_kubernetes_node_label_(.+)
- target_label: __metrics_path__
replacement: /metrics/cadvisor
scheme: https
tls_config:
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
insecure_skip_verify: true
bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token - job_name: kubernetes-service-endpoints
kubernetes_sd_configs:
- role: endpoints
relabel_configs:
- action: keep
regex: true
source_labels:
- __meta_kubernetes_service_annotation_prometheus_io_scrape
- action: replace
regex: (https?)
source_labels:
- __meta_kubernetes_service_annotation_prometheus_io_scheme
target_label: __scheme__
- action: replace
regex: (.+)
source_labels:
- __meta_kubernetes_service_annotation_prometheus_io_path
target_label: __metrics_path__
- action: replace
regex: ([^:]+)(?::\d+)?;(\d+)
replacement: $1:$2
source_labels:
- __address__
- __meta_kubernetes_service_annotation_prometheus_io_port
target_label: __address__
- action: labelmap
regex: __meta_kubernetes_service_label_(.+)
- action: replace
source_labels:
- __meta_kubernetes_namespace
target_label: kubernetes_namespace
- action: replace
source_labels:
- __meta_kubernetes_service_name
target_label: kubernetes_name - job_name: kubernetes-services
kubernetes_sd_configs:
- role: service
metrics_path: /probe
params:
module:
- http_2xx
relabel_configs:
- action: keep
regex: true
source_labels:
- __meta_kubernetes_service_annotation_prometheus_io_probe
- source_labels:
- __address__
target_label: __param_target
- replacement: blackbox
target_label: __address__
- source_labels:
- __param_target
target_label: instance
- action: labelmap
regex: __meta_kubernetes_service_label_(.+)
- source_labels:
- __meta_kubernetes_namespace
target_label: kubernetes_namespace
- source_labels:
- __meta_kubernetes_service_name
target_label: kubernetes_name - job_name: kubernetes-pods
kubernetes_sd_configs:
- role: pod
relabel_configs:
- action: keep
regex: true
source_labels:
- __meta_kubernetes_pod_annotation_prometheus_io_scrape
- action: replace
regex: (.+)
source_labels:
- __meta_kubernetes_pod_annotation_prometheus_io_path
target_label: __metrics_path__
- action: replace
regex: ([^:]+)(?::\d+)?;(\d+)
replacement: $1:$2
source_labels:
- __address__
- __meta_kubernetes_pod_annotation_prometheus_io_port
target_label: __address__
- action: labelmap
regex: __meta_kubernetes_pod_label_(.+)
- action: replace
source_labels:
- __meta_kubernetes_namespace
target_label: kubernetes_namespace
- action: replace
source_labels:
- __meta_kubernetes_pod_name
target_label: kubernetes_pod_name
alerting:
alertmanagers:
- static_configs:
- targets: ["alertmanager:80"] ####需要修改alertmanger服务名字,集群内部通过服务名调用

4、配置告警规则-创建告警规则的configmap

[root@VM_0_48_centos prometheus]# cat prometheus-rules.yaml
# Prometheus configuration format https://prometheus.io/docs/prometheus/latest/configuration/configuration/
apiVersion: v1
kind: ConfigMap
metadata:
name: prometheus-rules-config
namespace: kube-system
labels:
kubernetes.io/cluster-service: "true"
addonmanager.kubernetes.io/mode: EnsureExists
data:
rules.yml: |
groups:
- name: node
rules:
- alert: memory_use
expr: 100-(node_memory_MemFree_bytes+node_memory_Cached_bytes+node_memory_Buffers_bytes)/node_memory_MemTotal_bytes*100 >= 90
for: 10s
annotations:
summary: "机器 {{ $labels.instance }} 内存使用的率超过90%" expr: 100-(node_memory_MemFree_bytes+node_memory_Cached_bytes+node_memory_Buffers_bytes)/node_memory_MemTotal_bytes*100 >= 80
for: 10s
annotations:
summary: "机器 {{ $labels.instance }} 内存使用的率超过80%"

5、配置告警规则-将告警规则挂载到promethues中

[root@VM_0_48_centos prometheus]# cat prometheus-statefulset.yaml
apiVersion: apps/v1
kind: StatefulSet
metadata:
name: prometheus
namespace: kube-system
labels:
k8s-app: prometheus
kubernetes.io/cluster-service: "true"
addonmanager.kubernetes.io/mode: Reconcile
version: v2.2.1
spec:
serviceName: "prometheus"
replicas: 1
podManagementPolicy: "Parallel"
updateStrategy:
type: "RollingUpdate"
selector:
matchLabels:
k8s-app: prometheus
template:
metadata:
labels:
k8s-app: prometheus
annotations:
scheduler.alpha.kubernetes.io/critical-pod: ''
spec:
priorityClassName: system-cluster-critical
serviceAccountName: prometheus
initContainers:
- name: "init-chown-data"
image: "busybox:latest"
imagePullPolicy: "IfNotPresent"
command: ["chown", "-R", "65534:65534", "/data"]
volumeMounts:
- name: prometheus-data
mountPath: /data
subPath: ""
containers:
- name: prometheus-server-configmap-reload
image: "jimmidyson/configmap-reload:v0.1"
imagePullPolicy: "IfNotPresent"
args:
- --volume-dir=/etc/config
- --webhook-url=http://localhost:9090/-/reload
volumeMounts:
- name: config-volume
mountPath: /etc/config
readOnly: true
resources:
limits:
cpu: 10m
memory: 10Mi
requests:
cpu: 10m
memory: 10Mi - name: prometheus-server
image: "prom/prometheus:v2.2.1"
imagePullPolicy: "IfNotPresent"
args:
- --config.file=/etc/config/prometheus.yml
- --storage.tsdb.path=/data
- --web.console.libraries=/etc/prometheus/console_libraries
- --web.console.templates=/etc/prometheus/consoles
- --web.enable-lifecycle
ports:
- containerPort: 9090
readinessProbe:
httpGet:
path: /-/ready
port: 9090
initialDelaySeconds: 30
timeoutSeconds: 30
livenessProbe:
httpGet:
path: /-/healthy
port: 9090
initialDelaySeconds: 30
timeoutSeconds: 30
# based on 10 running nodes with 30 pods each
resources:
limits:
cpu: 200m
memory: 1000Mi
requests:
cpu: 200m
memory: 1000Mi volumeMounts:
- name: config-volume
mountPath: /etc/config
- name: prometheus-data
mountPath: /data
subPath: ""
- name: rules-volume
mountPath: /etc/config/alert_rules
terminationGracePeriodSeconds: 300
volumes:
- name: config-volume
configMap:
name: prometheus-config
- name: rules-volume
configMap:
name: prometheus-rules-config ####挂载规则到服务目录下
- name: prometheus-data
persistentVolumeClaim:
claimName: prometheus-claim

 6、配置告警规则 配置 prometheus启用告警规则

[root@VM_0_48_centos prometheus]# cat prometheus-configmap.yaml
# Prometheus configuration format https://prometheus.io/docs/prometheus/latest/configuration/configuration/
apiVersion: v1
kind: ConfigMap
metadata:
name: prometheus-config
namespace: kube-system
labels:
kubernetes.io/cluster-service: "true"
addonmanager.kubernetes.io/mode: EnsureExists
data:
prometheus.yml: |
scrape_configs:
- job_name: prometheus
static_configs:
- targets:
- localhost:9090
- job_name: node
static_configs:
- targets:
- 172.19.0.48:9100
- 172.19.0.14:9100
- 172.19.0.9:9100
- 172.19.0.2:9100 - job_name: kubernetes-apiservers
kubernetes_sd_configs:
- role: endpoints
relabel_configs:
- action: keep
regex: default;kubernetes;https
source_labels:
- __meta_kubernetes_namespace
- __meta_kubernetes_service_name
- __meta_kubernetes_endpoint_port_name
scheme: https
tls_config:
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
insecure_skip_verify: true
bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token - job_name: kubernetes-nodes-kubelet
kubernetes_sd_configs:
- role: node
relabel_configs:
- action: labelmap
regex: __meta_kubernetes_node_label_(.+)
scheme: https
tls_config:
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
insecure_skip_verify: true
bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token - job_name: kubernetes-nodes-cadvisor
kubernetes_sd_configs:
- role: node
relabel_configs:
- action: labelmap
regex: __meta_kubernetes_node_label_(.+)
- target_label: __metrics_path__
replacement: /metrics/cadvisor
scheme: https
tls_config:
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
insecure_skip_verify: true
bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token - job_name: kubernetes-service-endpoints
kubernetes_sd_configs:
- role: endpoints
relabel_configs:
- action: keep
regex: true
source_labels:
- __meta_kubernetes_service_annotation_prometheus_io_scrape
- action: replace
regex: (https?)
source_labels:
- __meta_kubernetes_service_annotation_prometheus_io_scheme
target_label: __scheme__
- action: replace
regex: (.+)
source_labels:
- __meta_kubernetes_service_annotation_prometheus_io_path
target_label: __metrics_path__
- action: replace
regex: ([^:]+)(?::\d+)?;(\d+)
replacement: $1:$2
source_labels:
- __address__
- __meta_kubernetes_service_annotation_prometheus_io_port
target_label: __address__
- action: labelmap
regex: __meta_kubernetes_service_label_(.+)
- action: replace
source_labels:
- __meta_kubernetes_namespace
target_label: kubernetes_namespace
- action: replace
source_labels:
- __meta_kubernetes_service_name
target_label: kubernetes_name - job_name: kubernetes-services
kubernetes_sd_configs:
- role: service
metrics_path: /probe
params:
module:
- http_2xx
relabel_configs:
- action: keep
regex: true
source_labels:
- __meta_kubernetes_service_annotation_prometheus_io_probe
- source_labels:
- __address__
target_label: __param_target
- replacement: blackbox
target_label: __address__
- source_labels:
- __param_target
target_label: instance
- action: labelmap
regex: __meta_kubernetes_service_label_(.+)
- source_labels:
- __meta_kubernetes_namespace
target_label: kubernetes_namespace
- source_labels:
- __meta_kubernetes_service_name
target_label: kubernetes_name - job_name: kubernetes-pods
kubernetes_sd_configs:
- role: pod
relabel_configs:
- action: keep
regex: true
source_labels:
- __meta_kubernetes_pod_annotation_prometheus_io_scrape
- action: replace
regex: (.+)
source_labels:
- __meta_kubernetes_pod_annotation_prometheus_io_path
target_label: __metrics_path__
- action: replace
regex: ([^:]+)(?::\d+)?;(\d+)
replacement: $1:$2
source_labels:
- __address__
- __meta_kubernetes_pod_annotation_prometheus_io_port
target_label: __address__
- action: labelmap
regex: __meta_kubernetes_pod_label_(.+)
- action: replace
source_labels:
- __meta_kubernetes_namespace
target_label: kubernetes_namespace
- action: replace
source_labels:
- __meta_kubernetes_pod_name
target_label: kubernetes_pod_name
alerting:
alertmanagers:
- static_configs:
- targets: ["alertmanager:80"]
rule_files:
- "/etc/config/alert_rules/*.yml" ####启用挂载的规则文件

5、配置告警媒介

[root@VM_0_48_centos prometheus]# cat   alertmanager-configmap.yaml
apiVersion: v1
kind: ConfigMap
metadata:
name: alertmanager-config
namespace: kube-system
labels:
kubernetes.io/cluster-service: "true"
addonmanager.kubernetes.io/mode: EnsureExists
data:
alertmanager.yml: |
global:
resolve_timeout: 5m
smtp_smarthost: 'smtp.163.com:25'
smtp_from: 'xjq18125012766@163.com'
smtp_auth_username: '********@163.com'
smtp_auth_password: '*******'
smtp_require_tls: false route:
group_by: ['alertname']
group_wait: 10s
group_interval: 30s
repeat_interval: 10s
receiver: 'mail' receivers:
- name: 'mail'
email_configs:
- to: '2654071080@qq.com'

6、告警结果展示

k8s全方位监控-prometheus-alertmanager部署-配置第一条告警邮件的更多相关文章

  1. k8s全方位监控-prometheus部署

    1.k8s 监控资源对象 2. prometheus简单介绍. https://github.com/prometheus •多维数据模型:由度量名称和键值对标识的时间序列数据•PromSQL:一种灵 ...

  2. k8s全方位监控 -prometheus实现短信告警接口编写(python)

    1.prometheus短信告警接口实现(python)源码如下: import subprocess from flask import Flask from flask import reques ...

  3. k8s全方位监控中-常用rules配置

    [root@VM_0_48_centos prometheus]# cat alertmanager-configmap.yaml apiVersion: v1 kind: ConfigMap met ...

  4. Alertmanager 部署配置

    目录 前言 源码安装 配置 启动 配置prometheus监控Alertmanager 修改prometheus配置 重新加载配置文件 配置测试告警 修改prometheus配置 重新加载配置文件 测 ...

  5. Longhorn,企业级云原生容器分布式存储 - 监控(Prometheus+AlertManager+Grafana)

    内容来源于官方 Longhorn 1.1.2 英文技术手册. 系列 Longhorn 是什么? Longhorn 企业级云原生容器分布式存储解决方案设计架构和概念 Longhorn 企业级云原生容器分 ...

  6. k8s全方位监控-prometheus-配置文件介绍以及基于文件服务发现

    1.scrape_configs 参数介绍 # 默认的全局配置 global: scrape_interval: 15s # 采集间隔15s,默认为1min一次 evaluation_interval ...

  7. 容器编排系统K8s之Prometheus监控系统+Grafana部署

    前文我们聊到了k8s的apiservice资源结合自定义apiserver扩展原生apiserver功能的相关话题,回顾请参考:https://www.cnblogs.com/qiuhom-1874/ ...

  8. K8S(13)监控实战-部署prometheus

    k8s监控实战-部署prometheus 目录 k8s监控实战-部署prometheus 1 prometheus前言相关 1.1 Prometheus的特点 1.2 基本原理 1.2.1 原理说明 ...

  9. 【集群监控】Docker上部署Prometheus+Alertmanager+Grafana实现集群监控

    Docker部署 下载 sudo yum-config-manager --add-repo https://download.docker.com/linux/centos/docker-ce.re ...

随机推荐

  1. python if-elif-else 判断

    #!/usr/bin/python #coding=utf-8 #好好学习,天天向上 age=12 if age<4: price=0 elif age<18: price=40 elif ...

  2. python07day

    回顾 id == is: ==: 数值是否相同 is: 内存地址是否相同 id: 获取对象的内存地址 代码块: 一个文件.交互式命令一行都是一个代码块 同一代码块下缓存机制(字符串驻留机制) 所有数字 ...

  3. .NET 诞生已20周年,您的 .NET 技能是否还停留在2010 年?

    20年来,我们见证了超过上千万.NET 开发员,当前有600万.NET 开发者正在使用.NET技术构建各类解决方案.今天,IT市场对.NET 开发人员的需求达到了前所未有的程度,特别是在中国,各大公司 ...

  4. KVC替换系统的tabbar为自定义tabbar---秀清

    CustomTabbar *tabbar = [[CustomTabbar alloc]init]; //KVC,更换系统的tabbar为自定义tabbar tabbar.tabbarDelegate ...

  5. List接口中特有方法

    import java.util.ArrayList; import java.util.List; /* List接口中特有方法: 添加 add(int index, E element) addA ...

  6. 《Effective Python》笔记——第4章 元类及属性

    一.用属性取代get和set方法 常规的get和set方法如下: class OldResistor(): def __init__(self, ohms): self.__ohms = ohms d ...

  7. 前端语言之js(对比python快速入门)

    昨日内容回顾 浮动 定位 溢出 透明度 模态框 今日内容概要 变量与常量 基本数据类型 数据类型内置方法 函数 常见内置对象 BOM与DOM操作 内容详细 1.变量与常量 # 在JS中声明变量需要使用 ...

  8. 总结haproxy各调度算法的实现方式及其应用场景

    一.静态算法 1.1 static-rr 基于权重的轮询调度,不支持运行时利用socat进行权重的动态调整(只支持0和1,不支持其它值)及后端服务器慢启动,其后端主机数量没有限制,相当于LVS中的 w ...

  9. python官网导航翻译

  10. 利用信号量semaphore实现两个进程读写同步 Linux C

    这篇帖子主要是记录一下自己使用信号量遇到的坑. 首先是需求:创建两个进程A,B.A往buffer中写,B读.两个进程利用命名管道进行通信,并实现读写同步.即A写完后通知B读,B读完后通知A写. 如果A ...