기술노트

기술노트

게시물 상세
[Linux] Nagios 설치-1
작성자 : 관리자(jieun@ehostidc.co.kr)  작성일 : 2018-07-05   조회수 : 5261

nagios : 오픈소스 모니터링 도구

프로그램 동작 방식 : Server 에서 Client 에 설치된 nrpe 프로그램을 통해 command 를 보내고, 해당 결과값을 임계값과 비교하여 알람
구성 Server : CentOS 7.4
구성 Client : CentOS 7.4/ CentOS 6.9

1.  nagios 설치
a. Server 설치
[root@nagios opt]# yum install epel-release
[root@nagios opt]# yum install nagios nagios-plugins* perl-rrdtool perl-GD perl-Nagios-Plugin nagios-nrpe nagios-plugins-all nagios-plugins-nrpe perl-Sys-Statistics-Linux
 
b. Client 설치
[root@client opt]# yum install epel-release
[root@client opt]# yum -y install nagios-nrpe nagios-plugins-all nagios-plugins-nrpe perl-Sys-Statistics-Linux
 
c. 공통 : check_linux_stats.pl 다운로드

https://exchange.nagios.org/directory/Plugins/Operating-Systems/Linux/check_linux_stats/details
 check_linux_stats.pl 다운로드 후 /usr/lib64/nagios/plugins/  에 저장
 chmod 755 /usr/lib64/nagios/plugins/check_linux_stats.pl 

  check_linux_stats.pl  

 #!/usr/bin/perl
# ---------------------------------------------------- #
# File : check_linux_stats
# Author : Damien SIAUD
# Email : plugmon@free.fr
# Date : 07/12/2009
# Rev. Date : 07/05/2010
# Rev. Date : 07/01/2013
# Rev. Date : 19/11/2015
# ---------------------------------------------------- #
# This script require Sys::Statistics::Linux
#
# Plugin check for nagios
#
# License Information:
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.
#
# ---------------------------------------------------- #

use FindBin;
use lib $FindBin::Bin;
use utils qw($TIMEOUT %ERRORS &print_revision &support);
use Getopt::Long;
use Sys::Statistics::Linux;
use Sys::Statistics::Linux::Processes;
use Sys::Statistics::Linux::SysInfo;


use vars qw($script_name $script_version $o_sleep $o_pattern $o_cpu $o_context $o_procs $o_process $o_mem $o_net $o_disk $o_io $o_load $o_file $o_socket $o_paging $o_uptime $o_help $o_version $o_warning $o_critical $o_unit);
use strict;

# --------------------------- globals -------------------------- #

$script_name = "check_linux_stats";
$script_version = "1.5";
$o_help = undef;
$o_pattern = undef;
$o_version = undef;
$o_warning = 0;
$o_critical = 0;
$o_sleep = 1;
$o_unit = "MB";
my $status = 'UNKNOWN';

# ---------------------------- main ----------------------------- #
check_options();

if($o_cpu){
 check_cpu();
}
elsif($o_context){
 check_context_switch();
}
elsif($o_mem){
 check_mem();
}
elsif($o_disk){
 check_disk();
}
elsif($o_io){
 check_io();
}
elsif($o_net){
 check_net();
}
elsif($o_load){
 check_load();
}
elsif($o_file){
 check_file();
}
elsif($o_procs){
 check_procs();
}
elsif($o_socket){
 check_socket();
}
elsif($o_process){
 check_process();
}
elsif($o_paging){
 check_paging();
}
elsif($o_uptime){
 check_uptime();
}
else {
 help();
}
print "\n";
exit $ERRORS{$status};
sub check_cpu {
 my $lxs = Sys::Statistics::Linux->new(cpustats  => 1);
 $lxs->init;
 sleep $o_sleep;
 my $stat = $lxs->get;
 if(defined($stat->cpustats)) {
  $status = "OK";
  my $cpu  = $stat->cpustats->{cpu};
  my $cpu_used=sprintf("%.2f", (100-$cpu->{idle}));
  if ($cpu_used >= $o_critical) {
          $status = "CRITICAL";
  }
  elsif ($cpu_used >= $o_warning) {
          $status = "WARNING";
  }
  my $perfdata .= "|"
  ."idle=$cpu->{idle}%;$o_warning;$o_critical "
  ."user=$cpu->{user}% "
  ."system=$cpu->{system}% "
  ."iowait=$cpu->{iowait}%";
  $perfdata .= " steal=$cpu->{steal}%" if(defined($cpu->{steal}));

  print "CPU $status : idle $cpu->{idle}% $perfdata";
 }
 else {
  print "No data";
 }
}
sub _getproc {
  my $file = shift;
   my %proc = ();
   open FILE, "< $file" or die ("Cannot open $file: $!");
 while (my $line = <FILE>) {
  chomp($line);
        if ($line =~ /^(\w+)\s+(.*)$/) {
            $proc{$1} = $2;
        }
    }
 close FILE;
    return \%proc;
}
sub check_context_switch {
 # Read the  /proc/stat
 my $file = '/proc/stat';
 my $proc1 = _getproc($file);
 if($proc1->{ctxt}) {
  $status = "OK";
  sleep $o_sleep;
  my $proc2 = _getproc($file);
  my $ctxt = int(($proc2->{ctxt} - $proc1->{ctxt}) / $o_sleep);
  if($ctxt >= $o_critical) {
              $status = "CRITICAL";
        }
        elsif ($ctxt >= $o_warning) {
              $status = "WARNING";
        }
  print "CONTEXT SWITCH $status : context $ctxt|ctxt=$ctxt";
 }
 else {
  print "No data";
 }
}
sub check_procs {
    my $lxs = Sys::Statistics::Linux->new(procstats => 1);
 $lxs->init;
        sleep $o_sleep;
        my $stat = $lxs->get;

sub version {
 print "$script_name v$script_version\n";
}


sub help {
 version();
 usage();

 print <<HELP;
 -h, --help
     print this help message
 -C, --cpu=CPU USAGE
 -P, --procs
 -M, --memory=MEMORY USAGE
 -N, --network=NETWORK USAGE
 -D, --disk=DISK USAGE
 -I, --io=DISK IO USAGE
 -L, --load=LOAD AVERAGE
 -F, --file=FILE STATS
 -S, --socket=SOCKET STATS
 -W, --paging=PAGING AND SWAPPING STATS
 -X, --ctxt=CPU CONTEXT SWITCH
 -U, --uptime
 -p, --pattern
  eth0,eth1...sda1,sda2.../usr,/tmp
 -w, --warning
 -c, --critical
 -s, --sleep
 -u, --unit
               %, KB, MB or GB left on disk usage, default : MB
        REQS OR BYTES on disk io statistics, default : REQS
 -V, --version
  version number
 ex :
 Memory usage                    : perl check_linux_stats.pl -M -w 90 -c 95
 Cpu usage                       : perl check_linux_stats.pl -C -w 90 -c 95 -s 5
 Disk usage                      : perl check_linux_stats.pl -D -w 95 -c 100 -u % -p /tmp,/usr,/var
 Load average                    : perl check_linux_stats.pl -L -w 10,8,5 -c 20,18,15
 Paging statistics    : perl check_linux_stats.pl -W -w 10,1000,1 -c 20,2000,20 -s 3
 Process statistics              : perl check_linux_stats.pl -P -w 100 -c 200
 I/O statistics on disk device   : perl check_linux_stats.pl -I -w 10 -c 5 -p sda1,sda4,sda5,sda6    
 Network usage                   : perl check_linux_stats.pl -N -w 10000 -c 100000000 -p eth0
 Processes virtual memory        : perl check_linux_stats.pl -T -w 9551820 -c 9551890 -p /var/run/sendmail.pid
 Cpu context switch              : perl check_linux_stats.pl -X -w 6000 -c 70000 -s 2
 Uptime       : perl check_linux_stats.pl -U -w 5
HELP
}
sub check_options {
 Getopt::Long::Configure("bundling");
 GetOptions(
  'h' => \$o_help,  'help'  => \$o_help,
  's:i' => \$o_sleep,  'sleep:i' => \$o_sleep,
  'C' => \$o_cpu,  'cpu'  => \$o_cpu,
  'X' => \$o_context, 'ctx'  => \$o_context,
  'P' => \$o_procs,   'procs'  => \$o_procs,
  'T' => \$o_process,  'top'  => \$o_process,
  'M' => \$o_mem,  'memory' => \$o_mem,
  'N' => \$o_net,  'network' => \$o_net,
  'D' => \$o_disk,  'disk'  => \$o_disk,
  'I' => \$o_io,  'io'  => \$o_io,
  'L' => \$o_load,  'load'  => \$o_load,
  'F' => \$o_file,  'file'  => \$o_file,
  'S' => \$o_socket,  'socket' => \$o_socket,
  'W' => \$o_paging,  'paging' => \$o_paging,
  'U' => \$o_uptime,  'uptime' => \$o_uptime,
  'V' => \$o_version,  'version' => \$o_version,
  'p:s' => \$o_pattern,  'pattern:s' => \$o_pattern,
  'w:s' => \$o_warning,  'warning:s' => \$o_warning,
  'c:s' => \$o_critical, 'critical:s' => \$o_critical,
  'u:s' => \$o_unit,         'unit:s' => \$o_unit
 );
 if(defined($o_help)) {
  help();
  exit $ERRORS{'UNKNOWN'};
 }
 if(defined($o_version)) {
  version();
  exit $ERRORS{'UNKNOWN'};
 }
}
sub bytes_to_readable {
 my ($bignum) = @_;
 foreach my $unit ("B","KB","MB","GB") {
  return sprintf("%.2f",$bignum)."$unit" if $bignum < 1024;
  $bignum /= 1024;
 }
}
sub bytes_to_kilobytes {
 my ($bignum) = @_;
 return sprintf("%.2f", $bignum/1024);
}
sub bytes_to_megabytes {
 my ($bignum) = @_;
 return sprintf("%.2f", $bignum/1048576);
}
sub bytes_to_gigabytes {
 my ($bignum) = @_;
 return sprintf("%.2f", $bignum/1073741824);
}

 

2. client 환경 설정
a. 환경 설정 파일
[root@client nagios]# vi /etc/nagios/nrpe.cfg

# ALLOWED HOST ADDRESSES
allowed_hosts=NAGIOS-Server-IP

# COMMAND ARGUMENT PROCESSING (ARG 사용시)
# Values: 0=do not allow arguments, 1=allow command arguments
dont_blame_nrpe=1

#command 설정
command[check_users]=/usr/lib64/nagios/plugins/check_users -w $ARG1$ -c $ARG1$
command[check_disk1]=/usr/lib64/nagios/plugins/check_disk $ARG1$
command[check_disk2]=/usr/lib64/nagios/plugins/check_disk $ARG1$
command[check_disk3]=/usr/lib64/nagios/plugins/check_disk $ARG1$
command[check_disk4]=/usr/lib64/nagios/plugins/check_disk $ARG1$
command[check_load]=/usr/lib64/nagios/plugins/check_load -r -w $ARG1$ -c $ARG2$
command[check_zombie_procs]=/usr/lib64/nagios/plugins/check_procs $ARG1$ -s Z
command[check_total_procs]=/usr/lib64/nagios/plugins/check_procs $ARG1$
command[check_swap]=/usr/lib64/nagios/plugins/check_swap $ARG1$
command[check_mysql]=/usr/lib64/nagios/plugins/check_mysql -H localhost $ARG1$
command[check_smart]=/usr/lib64/nagios/plugins/check_smart
command[check_io]=/usr/lib64/nagios/plugins/check_linux_stats.pl -I $ARG1$ -s 5
command[check_mem]=/usr/lib64/nagios/plugins/check_linux_stats.pl -M $ARG1$
command[check_open_files]=/usr/lib64/nagios/plugins/check_linux_stats.pl -F $ARG1$
command[check_procs]=/usr/lib64/nagios/plugins/check_linux_stats.pl $ARG1$
command[check_net_eth0]=/usr/lib64/nagios/plugins/check_linux_stats.pl $ARG1$
command[check_socket]=/usr/lib64/nagios/plugins/check_linux_stats.pl $ARG1$
command[check_uptime]=/usr/lib64/nagios/plugins/check_linux_stats.pl $ARG1$

b. nagios client 시작
[root@client nagios]# systemctl start nrpe
서비스 포트 : TCP :5666

이전글 [Windows] Nagios Window 서버 client 등록
다음글 [Linux] 리눅스 서버 점검시 체크 사항