#客户端节点:
#上传代码包:
wget http://192.168.18.251/file/docker_monitor_node.tar.gz
docker load -i docker_monitor_node.tar.gz
#启动node-exporter
docker run -d -p 9100:9100 -v "/:/host:ro,rslave" --name=node_exporter quay.io/prometheus/node-exporter --path.rootfs /host
#启动cadvisor
docker run --volume=/:/rootfs:ro --volume=/var/run:/var/run:rw --volume=/sys:/sys:ro --volume=/var/lib/docker/:/var/lib/docker:ro -p 8080:8080 -d --name=cadvisor google/cadvisor:latest
#prometheus节点:
#上传代码包:
wget http://192.168.18.251/file/prometheus-2.23.0.linux-amd64.tar.gz
tar xf prometheus-2.23.0.linux-amd64.tar.gz
cd prometheus-2.23.0.linux-amd64/
vim prometheus.yml #静态配置
scrape_configs:
- job_name: 'prometheus'
static_configs:
- targets: ['localhost:9090']
- job_name: 'cadvisor'
static_configs:
- targets: ['10.0.0.11:8080','10.0.0.12:8080']
- job_name: 'node_exporter'
static_configs:
- targets: ['10.0.0.11:9100','10.0.0.12:9100']
#加载配置文件
./prometheus --config.file="prometheus.yml" & #不加&就是前台运行
#访问prometheus http://IP:3000:9090主界面 http://IP:3000:9093状态界面配置文件界面
#安装grafana
wget http://192.168.18.251/file/grafana-7.5.4-1.x86_64.rpm
yum localinstall grafana-7.5.4-1.x86_64.rpm -y
systemctl start grafana-server.service
systemctl enable grafana-server.service
#访问grafana http://IP:3000,默认账号admin:admin
新建数据源--导入dashboard模板
#Prometheus自动发现:
mkdir -p /root/prometheus-2.23.0.linux-amd64/config
cd /root/prometheus-2.23.0.linux-amd64/config
vim discovery_node-exporter.json
[
{
"targets": ["10.0.0.11:9100","10.0.0.12:9100"]
}
]
vim discovery_cadvisor.json
[
{
"targets": ["10.0.0.11:9100","10.0.0.12:9100"]
}
]
#动态配置 编辑之前要把之前的静态配置删掉
vim /root/prometheus-2.23.0.linux-amd64/prometheus.yml
- job_name: 'node_exporter'
file_sd_configs:
- files:
- /root/prometheus-2.23.0.linux-amd64/config/discovery_node-exporter.json
refresh_interval: 10s
- job_name: 'cadvisor'
file_sd_configs:
- files:
- /root/prometheus-2.23.0.linux-amd64/config/discovery_cadvisor.json
refresh_interval: 10s
#重启服务
kill prometheus进程
cd prometheus-2.23.0.linux-amd64/
./prometheus --config.file="prometheus.yml" &
#Prometheus邮件报警:
#prometheus节点:
wget https://github.com/prometheus/alertmanager/releases/download/v0.21.0/alertmanager-0.21.0.linux-amd64.tar.gz
tar xf alertmanager-0.21.0.linux-amd64.tar.gz
mv alertmanager-0.21.0.linux-amd64 alertmanager
cd alertmanager/
vim alertmanager.yml
global:
resolve_timeout: 5m
smtp_from: 'xxxxxxx@qq.com'
smtp_smarthost: 'smtp.qq.com:465'
smtp_auth_username: 'xxxxxx@qq.com'
smtp_auth_password: 'qq授权码'
smtp_require_tls: false
smtp_hello: 'qq.com'
route:
group_by: ['alertname']
group_wait: 10s
group_interval: 10s
repeat_interval: 1h
receiver: 'web.hook'
receivers:
- name: 'web.hook'
email_configs:
- to: 'xxxxxxx@qq.com'
send_resolved: true
inhibit_rules:
- source_match:
severity: 'critical'
target_match:
severity: 'warning'
equal: ['alertname', 'dev', 'instance']
#编辑prometheus.yml
alerting:
alertmanagers:
- static_configs:
- targets:
- 127.0.0.1:9093
rule_files:
- "/root/prometheus-2.23.0.linux-amd64/config/node_rule.yaml"
#重启服务:
kill prommetheus进程
cd prometheus-2.23.0.linux-amd64/
./prometheus --config.file="prometheus.yml" &
#altermanager配置
mkdir prometheus-2.23.0.linux-amd64/config
vim prometheus-2.23.0.linux-amd64/config/node_rule.yaml
groups:
- name: node-up
rules:
- alert: node-up
expr: up{job="node_exporter"} == 0
for: 15s
labels:
severity: 1
team: node
annotations:
summary: "{{ $labels.instance }} 已停止运行超过 15s!"
#启动服务
cd alertmanager/
./alertmanager --config.file="alertmanager.yml" &