环境
-夜莺5.1
- telegraf-1.21.0
telegraf安装与基础配置
centos7下安装telegraf采集端
mkdir -p /opt/telegraf
wget https://dl.influxdata.com/telegraf/releases/telegraf-1.21.0_linux_amd64.tar.gz
tar xf telegraf-1.21.0_linux_amd64.tar.gz
cp telegraf-1.21.0/usr/bin/telegraf /opt/telegraf/
cat > /opt/telegraf/telegraf.conf <<EOF
[global_tags]
#全局标签
platform = "88lulu"
[agent]
## 默认数据采集间隔10s一次,这里1分钟一次,全局的,可以在每个模块上使用interval
interval = "60s"
round_interval = true
metric_batch_size = 1000
metric_buffer_limit = 10000
collection_jitter = "0s"
flush_interval = "10s"
flush_jitter = "0s"
precision = ""
## 默认主机名
hostname = ""
omit_hostname = false
[[outputs.opentsdb]]
##配置采集数据上传到夜莺上
host = "http://192.168.31.9"
port = 19000
http_batch_size = 50
http_path = "/opentsdb/put"
debug = false
separator = "_"
[[inputs.cpu]]
percpu = true
totalcpu = true
collect_cpu_time = false
report_active = true
[[inputs.disk]]
##采集对应的挂载点
mount_points = ["/","/data1","/data2"]
ignore_fs = ["tmpfs", "devtmpfs", "devfs", "iso9660", "overlay", "aufs", "squashfs"]
[[inputs.diskio]]
##采集对应的磁盘类型
devices = ["sda", "sdb", "vd*"]
[[inputs.kernel]]
[[inputs.mem]]
## 收集需要匹配的项,根据自己需求收集
fieldpass = ["*available", "*total","*percent","*used"]
[[inputs.swap]]
fieldpass = ["*used","*total", "*percent"]
[[inputs.processes]]
[[inputs.system]]
fielddrop = ["uptime_format"]
[[inputs.net]]
## 收集指定网卡eth0
interfaces = ["eth0"]
ignore_protocol_stats = true
[[inputs.netstat]]
EOF
cat > /etc/systemd/system/telegraf.service <<EOF
[Unit]
Description="telegraf"
After=network.target
[Service]
Type=simple
ExecStart=/opt/telegraf/telegraf --config telegraf.conf
WorkingDirectory=/opt/telegraf
Restart=on-failure
SuccessExitStatus=0
LimitNOFILE=65536
StandardOutput=syslog
StandardError=syslog
SyslogIdentifier=telegraf
[Install]
WantedBy=multi-user.target
EOF
systemctl daemon-reload
systemctl enable telegraf
systemctl restart telegraf
systemctl status telegraf
centos6下安装telegraf采集端
```shell
wget https://dl.influxdata.com/telegraf/releases/telegraf-1.21.1-1.x86_64.rpm
yum install telegraf-1.21.1-1.x86_64.rpm -y
cat > /etc/telegraf/telegraf.conf<<EOF
[global_tags]
#全局标签
platform = "88lulu"
[agent]
## 默认数据采集间隔10s一次,这里1分钟一次,全局的,可以在每个模块上使用interval
interval = "60s"
round_interval = true
metric_batch_size = 1000
metric_buffer_limit = 10000
collection_jitter = "0s"
flush_interval = "10s"
flush_jitter = "0s"
precision = ""
## 默认主机名
hostname = ""
omit_hostname = false
[[outputs.opentsdb]]
##配置采集数据上传到夜莺上
host = "http://192.168.31.9"
port = 19000
http_batch_size = 50
http_path = "/opentsdb/put"
debug = false
separator = "_"
[[inputs.cpu]]
percpu = true
totalcpu = true
collect_cpu_time = false
report_active = true
[[inputs.disk]]
##采集对应的挂载点
mount_points = ["/","/data1","/data2"]
ignore_fs = ["tmpfs", "devtmpfs", "devfs", "iso9660", "overlay", "aufs", "squashfs"]
[[inputs.diskio]]
##采集对应的磁盘类型
devices = ["sda", "sdb", "vd*"]
[[inputs.kernel]]
[[inputs.mem]]
## 收集需要匹配的项,根据自己需求收集
fieldpass = ["*available", "*total","*percent","*used"]
[[inputs.swap]]
fieldpass = ["*used","*total", "*percent"]
[[inputs.processes]]
[[inputs.system]]
fielddrop = ["uptime_format"]
[[inputs.net]]
## 收集指定网卡eth0
interfaces = ["eth0"]
ignore_protocol_stats = true
[[inputs.netstat]]
EOF
chkconfig --add telegraf
/etc/init.d/telegraf restart
telegraf基础数据采集项说明
-
cpu:[[inputs.cpu]]
cpu_usage_active cpu使用率% -
内存:[[inputs.mem]]
mem_used_percent 内存使用率%
mem_available 内存剩余大小
mem_total 内存总大小 -
swap分区:[[inputs.swap]]
swap_used_percent swap分区使用率 -
磁盘:[[inputs.diskio]]
disk_used_percent 磁盘使用率
disk_free 磁盘剩余大小
disk_total 磁盘总大小
(disk_inodes_used/disk_inodes_total)*100 磁盘inodes使用率
rate(diskio_io_time[1m])/10 磁盘IO使用率 -
系统:[[inputs.system]]
system_load1 1分钟负载
system_load5 5分钟负载
system_load15 15分钟负载 -
进程:[[inputs.processes]]
processes_total 进程总数
processes_zombies 假死进程数 -
网络: [[inputs.net]]
rate(net_bytes_recv[5m]) 接收/进站流量,5分钟内,每秒的速率 单位bytes/秒
rate(net_bytes_sent[5m]) 发送/出站流量,5分钟内,每秒的速率 单位bytes/秒 -
TCP连接数:[[inputs.netstat]]
netstat_tcp_established 已建立的TCP连接数
netstat_tcp_close_wait 等待关闭的TCP连接数
telegraf的mysql数据采集说明
- mysql模块
[[inputs.mysql]] ##mysql账号密码连--接 servers = ["root:123456@tcp(127.0.0.1:3306)/?tls=false"] ## telegraf版本>1.6 用2,否则1 metric_version = 2 # 默认收集所有库,否则需要指定库 table_schema_databases = [] # 从 INFORMATION_SCHEMA的PROCESSLIST表上 收集线程状态计数 gather_process_list = true # 从信息模式中收集 auto_increment 列和最大值 gather_info_schema_auto_inc = true # 从 SHOW SLAVE STATUS 命令输出中收集指标 gather_slave_status = true # 从 SHOW GLOBAL VARIABLES 命令输出收集指标 gather_global_variables = true perf_events_statements_digest_text_limit = 120 perf_events_statements_limit = 250 perf_events_statements_time_limit = 86400 interval_slow = "30m"
- mysql指标说明(仅供参考):
rate(mysql_com_show_processlist[5m]) * 60 5分钟内,每分钟mysql进程总数 rate(mysql_bytes_sent[5m]) 5分钟内,每秒出流量单位bytes/每秒 rate(mysql_bytes_received[5m]) 5分钟内,每秒入流量单位bytes/每秒 rate(mysql_aborted_connects[5m]) * 60 5分钟内,每分钟mysql连接错误数 rate(mysql_connections[5m]) * 60 5分钟内,每分钟mysql连接总数(包括错误连接数) mysql_threads_connected 当前打开的mysql连接数 rate(mysql_com_select[5m]) 5分钟内,每秒查询的次数(读IOPS) rate(mysql_com_insert[5m])+rate(mysql_com_update[5m])+rate(mysql_com_delete[5m]) 5分钟内,每秒写入的次数(写IOPS)