nagios : 오픈소스 모니터링 도구
프로그램 동작 방식 : Server 에서 Client 에 설치된 nrpe 프로그램을 통해 command 를 보내고, 해당 결과값을 임계값과 비교하여 알람 구성 Server : CentOS 7.4 구성 Client : CentOS 7.4/ CentOS 6.9
1. nagios 설치 a. Server 설치 [root@nagios opt]# yum install epel-release [root@nagios opt]# yum install nagios nagios-plugins* perl-rrdtool perl-GD perl-Nagios-Plugin nagios-nrpe nagios-plugins-all nagios-plugins-nrpe perl-Sys-Statistics-Linux b. Client 설치 [root@client opt]# yum install epel-release [root@client opt]# yum -y install nagios-nrpe nagios-plugins-all nagios-plugins-nrpe perl-Sys-Statistics-Linux c. 공통 : check_linux_stats.pl 다운로드
https://exchange.nagios.org/directory/Plugins/Operating-Systems/Linux/check_linux_stats/details check_linux_stats.pl 다운로드 후 /usr/lib64/nagios/plugins/ 에 저장 chmod 755 /usr/lib64/nagios/plugins/check_linux_stats.pl
check_linux_stats.pl
#!/usr/bin/perl # ---------------------------------------------------- # # File : check_linux_stats # Author : Damien SIAUD # Email : plugmon@free.fr # Date : 07/12/2009 # Rev. Date : 07/05/2010 # Rev. Date : 07/01/2013 # Rev. Date : 19/11/2015 # ---------------------------------------------------- # # This script require Sys::Statistics::Linux # # Plugin check for nagios # # License Information: # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 3 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program. If not, see <http://www.gnu.org/licenses/>. # # ---------------------------------------------------- #
use FindBin; use lib $FindBin::Bin; use utils qw($TIMEOUT %ERRORS &print_revision &support); use Getopt::Long; use Sys::Statistics::Linux; use Sys::Statistics::Linux::Processes; use Sys::Statistics::Linux::SysInfo;
use vars qw($script_name $script_version $o_sleep $o_pattern $o_cpu $o_context $o_procs $o_process $o_mem $o_net $o_disk $o_io $o_load $o_file $o_socket $o_paging $o_uptime $o_help $o_version $o_warning $o_critical $o_unit); use strict;
# --------------------------- globals -------------------------- #
$script_name = "check_linux_stats"; $script_version = "1.5"; $o_help = undef; $o_pattern = undef; $o_version = undef; $o_warning = 0; $o_critical = 0; $o_sleep = 1; $o_unit = "MB"; my $status = 'UNKNOWN';
# ---------------------------- main ----------------------------- # check_options();
if($o_cpu){ check_cpu(); } elsif($o_context){ check_context_switch(); } elsif($o_mem){ check_mem(); } elsif($o_disk){ check_disk(); } elsif($o_io){ check_io(); } elsif($o_net){ check_net(); } elsif($o_load){ check_load(); } elsif($o_file){ check_file(); } elsif($o_procs){ check_procs(); } elsif($o_socket){ check_socket(); } elsif($o_process){ check_process(); } elsif($o_paging){ check_paging(); } elsif($o_uptime){ check_uptime(); } else { help(); } print "\n"; exit $ERRORS{$status}; sub check_cpu { my $lxs = Sys::Statistics::Linux->new(cpustats => 1); $lxs->init; sleep $o_sleep; my $stat = $lxs->get; if(defined($stat->cpustats)) { $status = "OK"; my $cpu = $stat->cpustats->{cpu}; my $cpu_used=sprintf("%.2f", (100-$cpu->{idle})); if ($cpu_used >= $o_critical) { $status = "CRITICAL"; } elsif ($cpu_used >= $o_warning) { $status = "WARNING"; } my $perfdata .= "|" ."idle=$cpu->{idle}%;$o_warning;$o_critical " ."user=$cpu->{user}% " ."system=$cpu->{system}% " ."iowait=$cpu->{iowait}%"; $perfdata .= " steal=$cpu->{steal}%" if(defined($cpu->{steal}));
print "CPU $status : idle $cpu->{idle}% $perfdata"; } else { print "No data"; } } sub _getproc { my $file = shift; my %proc = (); open FILE, "< $file" or die ("Cannot open $file: $!"); while (my $line = <FILE>) { chomp($line); if ($line =~ /^(\w+)\s+(.*)$/) { $proc{$1} = $2; } } close FILE; return \%proc; } sub check_context_switch { # Read the /proc/stat my $file = '/proc/stat'; my $proc1 = _getproc($file); if($proc1->{ctxt}) { $status = "OK"; sleep $o_sleep; my $proc2 = _getproc($file); my $ctxt = int(($proc2->{ctxt} - $proc1->{ctxt}) / $o_sleep); if($ctxt >= $o_critical) { $status = "CRITICAL"; } elsif ($ctxt >= $o_warning) { $status = "WARNING"; } print "CONTEXT SWITCH $status : context $ctxt|ctxt=$ctxt"; } else { print "No data"; } } sub check_procs { my $lxs = Sys::Statistics::Linux->new(procstats => 1); $lxs->init; sleep $o_sleep; my $stat = $lxs->get;
sub version { print "$script_name v$script_version\n"; }
sub help { version(); usage();
print <<HELP; -h, --help print this help message -C, --cpu=CPU USAGE -P, --procs -M, --memory=MEMORY USAGE -N, --network=NETWORK USAGE -D, --disk=DISK USAGE -I, --io=DISK IO USAGE -L, --load=LOAD AVERAGE -F, --file=FILE STATS -S, --socket=SOCKET STATS -W, --paging=PAGING AND SWAPPING STATS -X, --ctxt=CPU CONTEXT SWITCH -U, --uptime -p, --pattern eth0,eth1...sda1,sda2.../usr,/tmp -w, --warning -c, --critical -s, --sleep -u, --unit %, KB, MB or GB left on disk usage, default : MB REQS OR BYTES on disk io statistics, default : REQS -V, --version version number ex : Memory usage : perl check_linux_stats.pl -M -w 90 -c 95 Cpu usage : perl check_linux_stats.pl -C -w 90 -c 95 -s 5 Disk usage : perl check_linux_stats.pl -D -w 95 -c 100 -u % -p /tmp,/usr,/var Load average : perl check_linux_stats.pl -L -w 10,8,5 -c 20,18,15 Paging statistics : perl check_linux_stats.pl -W -w 10,1000,1 -c 20,2000,20 -s 3 Process statistics : perl check_linux_stats.pl -P -w 100 -c 200 I/O statistics on disk device : perl check_linux_stats.pl -I -w 10 -c 5 -p sda1,sda4,sda5,sda6 Network usage : perl check_linux_stats.pl -N -w 10000 -c 100000000 -p eth0 Processes virtual memory : perl check_linux_stats.pl -T -w 9551820 -c 9551890 -p /var/run/sendmail.pid Cpu context switch : perl check_linux_stats.pl -X -w 6000 -c 70000 -s 2 Uptime : perl check_linux_stats.pl -U -w 5 HELP } sub check_options { Getopt::Long::Configure("bundling"); GetOptions( 'h' => \$o_help, 'help' => \$o_help, 's:i' => \$o_sleep, 'sleep:i' => \$o_sleep, 'C' => \$o_cpu, 'cpu' => \$o_cpu, 'X' => \$o_context, 'ctx' => \$o_context, 'P' => \$o_procs, 'procs' => \$o_procs, 'T' => \$o_process, 'top' => \$o_process, 'M' => \$o_mem, 'memory' => \$o_mem, 'N' => \$o_net, 'network' => \$o_net, 'D' => \$o_disk, 'disk' => \$o_disk, 'I' => \$o_io, 'io' => \$o_io, 'L' => \$o_load, 'load' => \$o_load, 'F' => \$o_file, 'file' => \$o_file, 'S' => \$o_socket, 'socket' => \$o_socket, 'W' => \$o_paging, 'paging' => \$o_paging, 'U' => \$o_uptime, 'uptime' => \$o_uptime, 'V' => \$o_version, 'version' => \$o_version, 'p:s' => \$o_pattern, 'pattern:s' => \$o_pattern, 'w:s' => \$o_warning, 'warning:s' => \$o_warning, 'c:s' => \$o_critical, 'critical:s' => \$o_critical, 'u:s' => \$o_unit, 'unit:s' => \$o_unit ); if(defined($o_help)) { help(); exit $ERRORS{'UNKNOWN'}; } if(defined($o_version)) { version(); exit $ERRORS{'UNKNOWN'}; } } sub bytes_to_readable { my ($bignum) = @_; foreach my $unit ("B","KB","MB","GB") { return sprintf("%.2f",$bignum)."$unit" if $bignum < 1024; $bignum /= 1024; } } sub bytes_to_kilobytes { my ($bignum) = @_; return sprintf("%.2f", $bignum/1024); } sub bytes_to_megabytes { my ($bignum) = @_; return sprintf("%.2f", $bignum/1048576); } sub bytes_to_gigabytes { my ($bignum) = @_; return sprintf("%.2f", $bignum/1073741824); }
2. client 환경 설정 a. 환경 설정 파일 [root@client nagios]# vi /etc/nagios/nrpe.cfg
# ALLOWED HOST ADDRESSES allowed_hosts=NAGIOS-Server-IP
# COMMAND ARGUMENT PROCESSING (ARG 사용시) # Values: 0=do not allow arguments, 1=allow command arguments dont_blame_nrpe=1
#command 설정 command[check_users]=/usr/lib64/nagios/plugins/check_users -w $ARG1$ -c $ARG1$ command[check_disk1]=/usr/lib64/nagios/plugins/check_disk $ARG1$ command[check_disk2]=/usr/lib64/nagios/plugins/check_disk $ARG1$ command[check_disk3]=/usr/lib64/nagios/plugins/check_disk $ARG1$ command[check_disk4]=/usr/lib64/nagios/plugins/check_disk $ARG1$ command[check_load]=/usr/lib64/nagios/plugins/check_load -r -w $ARG1$ -c $ARG2$ command[check_zombie_procs]=/usr/lib64/nagios/plugins/check_procs $ARG1$ -s Z command[check_total_procs]=/usr/lib64/nagios/plugins/check_procs $ARG1$ command[check_swap]=/usr/lib64/nagios/plugins/check_swap $ARG1$ command[check_mysql]=/usr/lib64/nagios/plugins/check_mysql -H localhost $ARG1$ command[check_smart]=/usr/lib64/nagios/plugins/check_smart command[check_io]=/usr/lib64/nagios/plugins/check_linux_stats.pl -I $ARG1$ -s 5 command[check_mem]=/usr/lib64/nagios/plugins/check_linux_stats.pl -M $ARG1$ command[check_open_files]=/usr/lib64/nagios/plugins/check_linux_stats.pl -F $ARG1$ command[check_procs]=/usr/lib64/nagios/plugins/check_linux_stats.pl $ARG1$ command[check_net_eth0]=/usr/lib64/nagios/plugins/check_linux_stats.pl $ARG1$ command[check_socket]=/usr/lib64/nagios/plugins/check_linux_stats.pl $ARG1$ command[check_uptime]=/usr/lib64/nagios/plugins/check_linux_stats.pl $ARG1$
b. nagios client 시작 [root@client nagios]# systemctl start nrpe 서비스 포트 : TCP :5666
|