前言: 以前做了cacti上展示redis性能报表图,可以看到redis的性能变化趋势图,但是还缺了实时报警通知的功能,现在补上这一环节。在redis服务瓶颈或者异常时候即使报警通知,方便dba第一时间处理维护。1,下载redis监控插件redis已经在服务器安装好了,所以直接可以进行监控,redis集群安装请参考:http://blog.itpub.net/26230597/viewspace-1145831/,下载地址为:http://download.csdn.net/detail/mchdba/8023351,有2个版本,一个是perl脚本写成的,一个是php脚本写成的,可以任意选择一个,这里选择的是perl脚本。
2,赋予执行权限将check_redis.php和check_redis.pl复制到/usr/lib/nagios/plugins/目录,然后赋予执行权限,
[root@wgq_41 plugins]# cd /usr/lib/nagios/plugins/
[root@wgq_41 plugins]# chown -r nagios.nagios check_redis.*
[root@wgq_41 plugins]# chmod 750 check_redis.*
3,定义监控命令[root@wgq objects] vim /usr/local/nagios/etc/objects/commands.cfg
# add by tim on 20141010,for redis
# check redis
define command {
command_name check_redis
command_line /usr/lib/nagios/plugins/check_redis.pl -h $hostaddress$ -p $arg1$ -a $arg2$ -w $arg3$ -c $arg4$ -f
}
4,定义redis监控主机
[root@wgq etc]# vim /usr/local/nagios/etc/hosts.cfg
# no.018,redis master server
define host{
use linux-server
host_name cache-1
alias cache-1
address 10.xxx.3.x0
check_command check-host-alive
max_check_attempts 5
check_period 24x7
contact_groups ops
notification_interval 30
notification_period 24x7
notification_options d,u,r
}
# no.020 cache-3 redis slave server
define host{
use linux-server
host_name cache-3
alias cache-3
address 10.xx.3.x2
check_command check-host-alive
max_check_attempts 5
check_period 24x7
contact_groups ops
notification_interval 30
notification_period 24x7
notification_options d,u,r
}
5,定义redis监控主机组
define hostgroup {
hostgroup_name redis_servers
alias redisservices
members cache-1,cache-2
}
6,定义redis监控服务选项
[root@wgq objects]# vim /usr/local/nagios/etc/objects/services_redis.cfg
# redis master 监控选项
define service {
host_name cache-1
servicegroups redisservices
service_description redis master clients
check_command check_redis!6379!'connected_clients,blocked_clients,client_longest_output_list,client_biggest_input_buf'!200,50,~,~!600,150,~,~
max_check_attempts 5
normal_check_interval 3
retry_check_interval 2
check_period 24x7
notification_interval 10
notification_period 24x7
notification_options w,u,c,r
contact_groups ops
}
define service {
host_name cache-1
servicegroups redisservices
service_description redis master memory
check_command check_redis!6379!'used_memory_human,used_memory_peak_human'!~,~!~,~
max_check_attempts 5
normal_check_interval 3
retry_check_interval 2
check_period 24x7
notification_interval 10
notification_period 24x7
notification_options w,u,c,r
contact_groups ops
}
define service {
host_name cache-1
servicegroups redisservices
service_description redis master cpu
check_command check_redis!6379!'used_cpu_sys,used_cpu_user,used_cpu_sys_children,used_cpu_user_children'!~,~,~,~!~,~,~,~ ; #未定义监控报警阀值
max_check_attempts 5
normal_check_interval 3
retry_check_interval 2
check_period 24x7
notification_interval 10
notification_period 24x7
notification_options w,u,c,r
contact_groups ops
}
# redis slave 监控选项
define service {
host_name cache-3
servicegroups redisservices
service_description redis slave clients
check_command check_redis!6379!'connected_clients,blocked_clients,client_longest_output_list,client_biggest_input_buf'!200,50,~,~!600,150,~,~
max_check_attempts 5
normal_check_interval 3
retry_check_interval 2
check_period 24x7
notification_interval 10
notification_period 24x7
notification_options w,u,c,r
contact_groups ops
}
define service {
host_name cache-3
servicegroups redisservices
service_description redis slave memory
check_command check_redis!6379!'used_memory_human,used_memory_peak_human'!~,~!~,~
max_check_attempts 5
normal_check_interval 3
retry_check_interval 2
check_period 24x7
notification_interval 10
notification_period 24x7
notification_options w,u,c,r
contact_groups ops
}
define service {
host_name cache-3
servicegroups redisservices
service_description redis slave cpu
check_command check_redis!6379!'used_cpu_sys,used_cpu_user,used_cpu_sys_children,used_cpu_user_children'!~,~,~,~!~,~,~,~ ; #未定义监控报警阀值
max_check_attempts 5
normal_check_interval 3
retry_check_interval 2
check_period 24x7
notification_interval 10
notification_period 24x7
notification_options w,u,c,r
contact_groups ops
}
赋予nagios用户执行权限
[root@wgq objects]# chown -r nagios.nagios services_redis.cfg
[root@wgq objects]# chmod 777 services_redis.cfg
添加监控服务项到nagios.cfg
[root@wgq etc]# vim /usr/local/nagios/etc/nagios.cfg
cfg_file=/usr/local/nagios/etc/objects/services_redis.cfg
7,测试redis监控服务执行命令/usr/lib/nagios/plugins/check_redis.pl -h cache-1 -a 'connected_clients,blocked_clients' -w ~,~ -c ~,~ -m -m 4g -a -r -t 来测试下redis监控是否正常运行
[root@wgq plugins]# /usr/lib/nagios/plugins/check_redis.pl -h 10.2xx.3.x0 -a 'connected_clients,blocked_clients' -w ~,~ -c ~,~ -m -m 4g -a -r -t
ok: redis 2.8.8 on 10.2xx.3.x0:6379 has 1 databases (db0) with 28497 keys, up 76 days 2 hours - response in 0.004s, hitrate is 12.83%, memory use is 194.14m (peak 205.14m, 6.49% of max, fragmentation 1.37%), connected_clients is 35, blocked_clients is 11 | redis_build_id=d322d411218ade61 total_connections_received=341191c used_memory_lua=33792 aof_rewrite_buffer_length=0 used_memory_rss=278749184b redis_git_dirty=0 loading=0 redis_mode=standalone latest_fork_usec=5588 repl_backlog_first_byte_offset=0 sync_partial_ok=0 master_repl_offset=0 uptime_in_days=76c aof_rewrite_scheduled=0 lru_clock=3649276 rdb_bgsave_in_progress=0 rejected_connections=0 repl_backlog_active=0 aof_delayed_fsync=1 sync_full=0 process_id=7776 used_memory_human=194.14m aof_current_rewrite_time_sec=-1 used_memory=203570960 aof_enabled=1 blocked_clients=11 aof_last_bgrewrite_status=ok aof_rewrite_in_progress=0 sync_partial_err=0 used_cpu_sys_children=2222.75 connected_slaves=0 repl_backlog_histlen=0 uptime_in_seconds=6576292c repl_backlog_size=1048576 os=linux 2.6.32-358.el6.x86_64 x86_64 used_cpu_sys=32640.80 aof_pending_bio_fsync=0 connected_clients=35 rdb_last_bgsave_time_sec=1 used_memory_peak_human=205.14m run_id=d1fc098d26fa4bbcef3eabeec6d19a858f03dd00 rdb_last_bgsave_status=ok pubsub_patterns=8 client_biggest_input_buf=0 keyspace_hits=42175896c rdb_last_save_time=1412935342 rdb_changes_since_last_save=318 db0_keys=28497 db0_expires=7 db0_avg_ttl=34003 aof_pending_rewrite=0 aof_buffer_length=0 config_file=/usr/local/redis-2.8.8/etc/redis.conf pubsub_channels=0 used_cpu_user_children=21375.34 hz=10 aof_last_rewrite_time_sec=2 aof_last_write_status=ok aof_base_size=82883253 used_cpu_user=18460.42 keyspace_misses=286602797c tcp_port=6379 total_commands_processed=797581196c mem_fragmentation_ratio=1.37 aof_current_size=146485850 rdb_current_bgsave_time_sec=-1 client_longest_output_list=0 instantaneous_ops_per_sec=114 evicted_keys=0c used_memory_peak=215106272b expired_keys=58977c total_keys=28497 total_expires=7 response_time=0.003802s hitrate=12.8281% memory_utilization=6.49013519287109%
[root@wgq plugins]#
8,查看redis监控服务状态先重新加载nagios,使刚添加的redis监控配置生效
[root@wgq objects]# service nagios reload
running configuration check...
reloading nagios configuration...
done
[root@wgq objects]#
redis监控服务界面,如下图所示:
9,操作过程中的报错处理过程
报错:
[root@wgq_line_cache_3_41 plugins]# ./check_redis.pl --help
can't locate redis.pm in @inc (@inc contains: /usr/local/lib64/perl5 /usr/local/share/perl5 /usr/lib64/perl5/vendor_perl /usr/share/perl5/vendor_perl /usr/lib64/perl5 /usr/share/perl5 .) at ./check_redis.pl line 421.
begin failed--compilation aborted at ./check_redis.pl line 421.
[root@wgq_line_cache_3_41 plugins]#
[root@wgq_line_cache_3_41 plugins]# perl -mcpan -e shell
terminal does not support addhistory.
cpan shell -- cpan exploration and modules installation (v1.9402)
enter 'h' for help.
cpan[1]> install redis
…
can't locate module/build/tiny.pm in @inc (@inc contains: /usr/local/lib64/perl5 /usr/local/share/perl5 /usr/lib64/perl5/vendor_perl /usr/share/perl5/vendor_perl /usr/lib64/perl5 /usr/share/perl5 .) at build.pl line 2.
begin failed--compilation aborted at build.pl line 2.
warning: no success on command[/usr/bin/perl build.pl --installdirs site]
warning (usually harmless): 'yaml' not installed, will not store persistent state
dams/redis-1.976.tar.gz
/usr/bin/perl build.pl --installdirs site -- not ok
running build test
make had some problems, won't test
running build install
make had some problems, won't install
could not read '/root/.cpan/build/redis-1.976-zhz6xi/meta.yml'. falling back to other methods to determine prerequisites……
yaml是以数据为中央的标记语言,其使用ascii码(如连字符、问号、冒号、逗号等)构造数据块(标量值或哈希码)。和xml相同,yaml也是一种机器可识别语言,并能和多种脚本语言相结合,其中一种便是perl,需要安装yaml,如下执行:
cpan[2]>install yaml
……
appending installation info to /usr/lib64/perl5/perllocal.pod
ingy/yaml-1.12.tar.gz
/usr/bin/make install -- ok
cpan: yaml loaded ok (v1.12)
ps:这里可能会安装失败,失败原因是网络连接,可以多执行几次install yaml就会成功。
再继续执行install redis,有如下提示信息
cpan[4]> install redis
running install for module 'redis'
running build for d/da/dams/redis-1.976.tar.gz
has already been unwrapped into directory /root/.cpan/build/redis-1.976-cul4rt
'/usr/bin/perl build.pl --installdirs site' returned status 512, won't make
running build test
make had some problems, won't test
running build install
make had some problems, won't install
cpan[5]>
build失败,build.pl故障了,需要重新安装下执行命令install build
cpan[5]> install build
成功后,再执行install redis
cpan[6]> install redis
redis安装执行成功。
参考文档:http://exchange.nagios.org/directory/plugins/databases/check_redis-2epl/details