docker容器的监控grafana prometheus

#客户端节点:
#上传代码包:
wget http://192.168.18.251/file/docker_monitor_node.tar.gz 
docker load -i docker_monitor_node.tar.gz 
#启动node-exporter
 docker run -d   -p 9100:9100   -v "/:/host:ro,rslave"   --name=node_exporter   quay.io/prometheus/node-exporter   --path.rootfs /host
#启动cadvisor
docker run --volume=/:/rootfs:ro  --volume=/var/run:/var/run:rw --volume=/sys:/sys:ro --volume=/var/lib/docker/:/var/lib/docker:ro  -p 8080:8080 -d --name=cadvisor google/cadvisor:latest

#prometheus节点:
#上传代码包:
wget http://192.168.18.251/file/prometheus-2.23.0.linux-amd64.tar.gz
tar xf prometheus-2.23.0.linux-amd64.tar.gz 
cd prometheus-2.23.0.linux-amd64/
vim prometheus.yml  #静态配置
scrape_configs:
  - job_name: 'prometheus'
    static_configs:
    - targets: ['localhost:9090']
  - job_name: 'cadvisor'
    static_configs:
    - targets: ['10.0.0.11:8080','10.0.0.12:8080']
  - job_name: 'node_exporter'
    static_configs:
    - targets: ['10.0.0.11:9100','10.0.0.12:9100']
#加载配置文件
./prometheus --config.file="prometheus.yml" & #不加&就是前台运行
#访问prometheus http://IP:3000:9090主界面 http://IP:3000:9093状态界面配置文件界面
#安装grafana
wget http://192.168.18.251/file/grafana-7.5.4-1.x86_64.rpm
yum localinstall grafana-7.5.4-1.x86_64.rpm -y
systemctl start grafana-server.service 
systemctl enable grafana-server.service
#访问grafana  http://IP:3000,默认账号admin:admin   
新建数据源--导入dashboard模板
 #Prometheus自动发现:
 mkdir -p /root/prometheus-2.23.0.linux-amd64/config
 cd /root/prometheus-2.23.0.linux-amd64/config
 vim discovery_node-exporter.json
 [
  {
    "targets":  ["10.0.0.11:9100","10.0.0.12:9100"]
  }
]
vim discovery_cadvisor.json
[
  {
    "targets":  ["10.0.0.11:9100","10.0.0.12:9100"]
  }
]
#动态配置 编辑之前要把之前的静态配置删掉
vim /root/prometheus-2.23.0.linux-amd64/prometheus.yml
  - job_name: 'node_exporter'
    file_sd_configs:
      - files:
        - /root/prometheus-2.23.0.linux-amd64/config/discovery_node-exporter.json
        refresh_interval: 10s 
  - job_name: 'cadvisor'
    file_sd_configs:
      - files:
        - /root/prometheus-2.23.0.linux-amd64/config/discovery_cadvisor.json
        refresh_interval: 10s
#重启服务
kill prometheus进程
cd prometheus-2.23.0.linux-amd64/
./prometheus --config.file="prometheus.yml" &
#Prometheus邮件报警:
#prometheus节点:
wget https://github.com/prometheus/alertmanager/releases/download/v0.21.0/alertmanager-0.21.0.linux-amd64.tar.gz
tar xf alertmanager-0.21.0.linux-amd64.tar.gz 
mv alertmanager-0.21.0.linux-amd64 alertmanager
cd alertmanager/
vim alertmanager.yml 
global:
  resolve_timeout: 5m
  smtp_from: 'xxxxxxx@qq.com'
  smtp_smarthost: 'smtp.qq.com:465'
  smtp_auth_username: 'xxxxxx@qq.com'
  smtp_auth_password: 'qq授权码'
  smtp_require_tls: false
  smtp_hello: 'qq.com'

route:
  group_by: ['alertname']
  group_wait: 10s
  group_interval: 10s
  repeat_interval: 1h
  receiver: 'web.hook'
receivers:
- name: 'web.hook'
  email_configs:
  - to: 'xxxxxxx@qq.com'
    send_resolved: true
inhibit_rules:
  - source_match:
      severity: 'critical'
    target_match:
      severity: 'warning'
    equal: ['alertname', 'dev', 'instance']
#编辑prometheus.yml
alerting:
  alertmanagers:
  - static_configs:
    - targets:
       - 127.0.0.1:9093
rule_files:
   - "/root/prometheus-2.23.0.linux-amd64/config/node_rule.yaml"

#重启服务:
kill prommetheus进程
cd prometheus-2.23.0.linux-amd64/
./prometheus --config.file="prometheus.yml" &
#altermanager配置
mkdir prometheus-2.23.0.linux-amd64/config
vim prometheus-2.23.0.linux-amd64/config/node_rule.yaml
groups:
- name: node-up
  rules:
  - alert: node-up
    expr: up{job="node_exporter"} == 0
    for: 15s
    labels:
      severity: 1
      team: node
    annotations:
      summary: "{{ $labels.instance }} 已停止运行超过 15s!"
#启动服务
cd alertmanager/
./alertmanager --config.file="alertmanager.yml" &