一、prometheus 介绍
prometheus server 是 Prometheus组件中的核心部分,负责实现对监控数据的获取,存储以及查询。它会定期从静态配置的监控目标或者基于服务发现自动配置的自标中进行拉取数据,当新拉取到的数据大于配置的内存缓存区时,数据就会持久化到存储设备当中。prometheus是一个开源的服务监控系统和时间序列数据库。
二、prometheus 下载
https://github.com/prometheus/prometheus/releases/download/v2.48.0/prometheus-2.48.0.linux-amd64.tar.gz
三、prometheus安装
3.1、prometheus.yml
[root@node1 prometheus-2.48.0.linux-amd64]# cat prometheus.yml
# my global config
global:
scrape_interval: 15s # Set the scrape interval to every 15 seconds. Default is every 1 minute.
evaluation_interval: 15s # Evaluate rules every 15 seconds. The default is every 1 minute.
# scrape_timeout is set to the global default (10s).
# Alertmanager configuration
alerting:
alertmanagers:
- static_configs:
- targets:
# - alertmanager:9093
# Load rules once and periodically evaluate them according to the global 'evaluation_interval'.
rule_files:
# - "first_rules.yml"
# - "second_rules.yml"
# A scrape configuration containing exactly one endpoint to scrape:
# Here it's Prometheus itself.
scrape_configs:
# The job name is added as a label `job=<job_name>` to any timeseries scraped from this config.
- job_name: "prometheus"
# metrics_path defaults to '/metrics'
# scheme defaults to 'http'.
static_configs:
- targets: ["node1:9090"]
- job_name: node_exporter
honor_timestamps: true
scrape_interval: 5s
scrape_timeout: 5s
metrics_path: /metrics
scheme: http
follow_redirects: true
static_configs:
- targets:
- node1:9100
- node2:9100
# params: ##配置比较鸡肋,可以从node_exporter端过率
# collect[]: #node_exporter可以传递一个可选的收集器列表来过滤指标。该collect[]参数可以多次使用。
# - cpu
# - meminfo
# - diskstats
- job_name: mysqld
static_configs:
- targets: ['node1:9105']
- job_name: consul-exporter
static_configs:
- targets: ['node1:9107']
- job_name: memcached-exporter
static_configs:
- targets: ['node1:9151']
3.2、node_exporter
node_exporter是用于采集node的运行指标,包括node的cpu、load、filesystem、meminfo、network等基础监控指标,类似于zabbix监控系统的的zabbix-agent。
3.3、启动node_exporter
./node_exporter
ts=2023-11-28T03:32:41.888Z caller=node_exporter.go:117 level=info collector=loadavg
ts=2023-11-28T03:32:41.888Z caller=node_exporter.go:117 level=info collector=mdadm
ts=2023-11-28T03:32:41.888Z caller=node_exporter.go:117 level=info collector=meminfo
ts=2023-11-28T03:32:41.888Z caller=node_exporter.go:117 level=info collector=netclass
ts=2023-11-28T03:32:41.888Z caller=node_exporter.go:117 level=info collector=netdev
ts=2023-11-28T03:32:41.888Z caller=node_exporter.go:117 level=info collector=netstat
ts=2023-11-28T03:32:41.888Z caller=node_exporter.go:117 level=info collector=nfs
ts=2023-11-28T03:32:41.888Z caller=node_exporter.go:117 level=info collector=nfsd
ts=2023-11-28T03:32:41.888Z caller=node_exporter.go:117 level=info collector=nvme
ts=2023-11-28T03:32:41.888Z caller=node_exporter.go:117 level=info collector=os
ts=2023-11-28T03:32:41.888Z caller=node_exporter.go:117 level=info collector=powersupplyclass
ts=2023-11-28T03:32:41.888Z caller=node_exporter.go:117 level=info collector=pressure
ts=2023-11-28T03:32:41.888Z caller=node_exporter.go:117 level=info collector=rapl
ts=2023-11-28T03:32:41.888Z caller=node_exporter.go:117 level=info collector=schedstat
ts=2023-11-28T03:32:41.888Z caller=node_exporter.go:117 level=info collector=selinux
ts=2023-11-28T03:32:41.888Z caller=node_exporter.go:117 level=info collector=sockstat
ts=2023-11-28T03:32:41.888Z caller=node_exporter.go:117 level=info collector=softnet
ts=2023-11-28T03:32:41.888Z caller=node_exporter.go:117 level=info collector=stat
ts=2023-11-28T03:32:41.888Z caller=node_exporter.go:117 level=info collector=tapestats
ts=2023-11-28T03:32:41.888Z caller=node_exporter.go:117 level=info collector=textfile
ts=2023-11-28T03:32:41.888Z caller=node_exporter.go:117 level=info collector=thermal_zone
ts=2023-11-28T03:32:41.888Z caller=node_exporter.go:117 level=info collector=time
ts=2023-11-28T03:32:41.888Z caller=node_exporter.go:117 level=info collector=timex
ts=2023-11-28T03:32:41.888Z caller=node_exporter.go:117 level=info collector=udp_queues
ts=2023-11-28T03:32:41.888Z caller=node_exporter.go:117 level=info collector=uname
ts=2023-11-28T03:32:41.888Z caller=node_exporter.go:117 level=info collector=vmstat
ts=2023-11-28T03:32:41.888Z caller=node_exporter.go:117 level=info collector=xfs
ts=2023-11-28T03:32:41.888Z caller=node_exporter.go:117 level=info collector=zfs
ts=2023-11-28T03:32:41.888Z caller=tls_config.go:274 level=info msg="Listening on" address=[::]:9100
ts=2023-11-28T03:32:41.888Z caller=tls_config.go:277 level=info msg="TLS is disabled." http2=false address=[::]:9100
3.4、启动 consul_exporter
./consul_exporter --consul.server="http://192.168.1.103:8500"
3.5、启动memcached_exporter
./memcached_exporter --memcached.address="192.168.1.103:11211" --web.listen-address=:9151
3.6、启动mysqld_exporter
./mysqld_exporter --config.my-cnf=my.cnf --web.listen-address=0.0.0.0:9105 --collect.slave_status --collect.binlog_size --collect.info_schema.processlist --collect.info_schema.innodb_metrics --collect.engine_innodb_status --collect.perf_schema.file_events --collect.perf_schema.replication_group_member_stats
3.7、启动prometheus
./prometheus
81µs web_handler=487ns query_engine=2.769µs scrape=180.363µs scrape_sd=70.38µs notify=22.955µs notify_sd=4.174µs rules=1.936µs tracing=11.289µs
ts=2023-11-27T19:12:43.965Z caller=main.go:1009 level=info msg="Server is ready to receive web requests."
ts=2023-11-27T19:12:43.965Z caller=manager.go:1012 level=info component="rule manager" msg="Starting rule manager..."
ts=2023-11-27T19:51:54.640Z caller=compact.go:523 level=info component=tsdb msg="write block" mint=1701103912907 maxt=1701108000000 ulid=01HG95D6EV0BK4M7T5YTZ98K01 duration=52.923742ms
ts=2023-11-27T19:51:54.643Z caller=head.go:1299 level=info component=tsdb msg="Head GC completed" caller=truncateMemory duration=2.729062ms
ts=2023-11-27T19:51:54.643Z caller=checkpoint.go:100 level=info component=tsdb msg="Creating checkpoint" from_segment=0 to_segment=10 mint=1701108000000
ts=2023-11-27T19:51:54.679Z caller=head.go:1267 level=info component=tsdb msg="WAL checkpoint complete" first=0 last=10 duration=36.255855ms
ts=2023-11-27T21:06:59.663Z caller=compact.go:523 level=info component=tsdb msg="write block" mint=1701108418884 maxt=1701115200000 ulid=01HG99PNVVJBTPYJK5A0749G1X duration=83.897189ms
ts=2023-11-27T21:06:59.666Z caller=head.go:1299 level=info component=tsdb msg="Head GC completed" caller=truncateMemory duration=2.614255ms
ts=2023-11-27T21:06:59.667Z caller=checkpoint.go:100 level=info component=tsdb msg="Creating checkpoint" from_segment=11 to_segment=14 mint=1701115200000
ts=2023-11-27T21:06:59.682Z caller=head.go:1267 level=info component=tsdb msg="WAL checkpoint complete" first=11 last=14 duration=15.489267ms
ts=2023-11-27T23:00:02.792Z caller=compact.go:523 level=info component=tsdb msg="write block" mint=1701115202599 maxt=1701122400000 ulid=01HG9G5P0X7WWN0CN3JZQV8D1V duration=74.54779ms
ts=2023-11-27T23:00:02.795Z caller=head.go:1299 level=info component=tsdb msg="Head GC completed" caller=truncateMemory duration=2.600091ms
ts=2023-11-27T23:00:02.796Z caller=checkpoint.go:100 level=info component=tsdb msg="Creating checkpoint" from_segment=15 to_segment=16 mint=1701122400000
ts=2023-11-27T23:00:02.823Z caller=head.go:1267 level=info component=tsdb msg="WAL checkpoint complete" first=15 last=16 duration=27.447969ms
ts=2023-11-28T01:00:02.781Z caller=compact.go:523 level=info component=tsdb msg="write block" mint=1701122402599 maxt=1701129600000 ulid=01HG9Q1D8XRCTAE667X2HWP97M duration=64.158921ms
ts=2023-11-28T01:00:02.785Z caller=head.go:1299 level=info component=tsdb msg="Head GC completed" caller=truncateMemory duration=2.784529ms
ts=2023-11-28T03:00:02.787Z caller=compact.go:523 level=info component=tsdb msg="write block" mint=1701129602599 maxt=1701136800000 ulid=01HG9XX4H0CSV557X71NC39B3H duration=67.465927ms
ts=2023-11-28T03:00:02.791Z caller=head.go:1299 level=info component=tsdb msg="Head GC completed" caller=truncateMemory duration=2.779195ms
ts=2023-11-28T03:00:02.794Z caller=checkpoint.go:100 level=info component=tsdb msg="Creating checkpoint" from_segment=17 to_segment=18 mint=1701136800000
ts=2023-11-28T03:00:02.921Z caller=head.go:1267 level=info component=tsdb msg="WAL checkpoint complete" first=17 last=18 duration=127.022097ms
3.8、查看prometheus
http://node1:9090/
3.9、结合grafana应用
grafana是一个跨平台的开源的度量分析和可视化工具,可以将采集的数据可视化的展示,并及时通知给告警接收方。其官方库中具有丰富的仪表盘插件。
这样 prometheus部署就成功了,也和grafana结合成功。还可以自定义prometheus的指标收集