#!/usr/bin/env perl
#
# $Id: pollcsm.pl,v 2.9 2008/12/06 21:59:16 ksb Exp $
#
# pollcss polls a Cisco CSS via SNMP.  We collect information about the
# flows on each service, and report those to a central rrdd.
# http://mbo.telecom.fedex.com/jkbstats/csm_health.cgi?
#	h=192.168.193.2&c=~public
# enterprises.9.9.254 (extended); enterprises.9.9.161 (classic)
#
# -- ksb, candyh
#

use lib '/usr/local/lib/sac/perl'.join('.', unpack('c*', $^V)),
	'/usr/local/lib/sac';

use strict;
use Socket;
use Sys::Hostname;
use IO::Socket;

use Net::SNMP (qw(oid_lex_sort oid_context_match));
use Getopt::Std;
my(%opts);				# getopt
my($progname);				# us.

use vars qw($VERSION @CSSMIB @PORTMIB64 $s);
$VERSION='$Id: pollcsm.pl,v 2.9 2008/12/06 21:59:16 ksb Exp $';

my(%real);				# find the reals to scan
my(%virt);				# find the vips to scan
my(%nsch);				# name server cache hash

# use site policy to map a DNS reverse to a peg sampler file		(ksb,pv)
# cluster/name/vipX.spec, or so.  Caller adds "port" and 'rrd'.
sub vipName($)
{
	my($n) = shift @_;
	$n =~ s/\.fedex\.com\.*$//o;
	$n =~ s/\.sac$//o;
	if ($n =~ m/^(vip\d+)[-_]([^.]+)(\..*)/o) {
		return "$2/app-$1$3";
	}
	if ($n =~ m/^([^-]*)-(vip.*)/o || $n =~ m/^([^_]*)_(vip.*)/o) {
		return "$1/app-$2";
	}
	if ($n =~ m/^(.*\D+)(\d+.*)/o) {
		return "$1/app-vip$2";
	}
	# some fall-back we should never use, I hope -- ksb
	if ($n =~ m/^(.+)vip(.*)/o) {
		return "$1/app-vip$2";
	}
	if ($n =~ m/^vip(.*)/o) {
		return "csm/app-$n";
	}
	return "csm/$n";
}

# Walk a Cisco CSM (or other SLB) for REAL stats/data			(ksb)
# 1.3.6.1.4.1.9.9.161.1.3.1.1.4		# slbRealState (2 good, 9 bad)
# 1.3.6.1.4.1.9.9.161.1.3.1.1.18	# slbRealTotalConnections
# 1.3.6.1.4.1.9.9.161.1.3.1.1.5		# slbRealNumberOfConnections
sub PollSLB($)
{
	my($switch) = shift;
	my($s, $e) = Net::SNMP->session(-hostname=>$switch,-version=>'2c',-community=>$opts{'c'});
	die "NET::SNMP->session v2c: $switch: $e" unless $s;

	my($tp);
	my($base,$next,$status,$oid);
	my($ip,$host,$port);

	$base='1.3.6.1.4.1.9.9.161.1.3.1.1.4';	# current state
	$next=$base;
scan:	while (defined($s->get_bulk_request(-maxrepetitions=>20, -varbindlist=>[$next]))) {
		foreach $oid (oid_lex_sort(keys(%{$s->var_bind_list()}))) {
			last scan if (!oid_context_match($base, $oid));
			$next = $oid;
			$status = $s->var_bind_list()->{$oid};
			last scan if ($status eq 'endOfMibView');
			next if (1 == $status);	# outOfService
			next unless ($oid =~ m/^$base\.(.*)\.(\d+\.\d+\.\d+\.\d+)\.(\d+)$/);
			$tp = $1;
			$ip = $2;
			$port = $3;
			$nsch{$ip} ||= gethostbyaddr(inet_aton($ip), AF_INET);
			$real{$ip}->{$port}->{'name'} ||= $nsch{$ip};
			$real{$ip}->{$port}->{'state'} ||= $status;
			$real{$ip}->{$port}->{'tlc'} ||= 0;
			$real{$ip}->{$port}->{'lc'} ||= 0;
			$real{$ip}->{$port}->{'key'} = $tp;
		}
	}

	# fetch the curent counters for the real hosts
	my(@mib);
	my($res,$tail,$found);
	foreach $ip (keys(%real)) {
		$tp = $real{$ip};
		foreach $port (keys(%{$tp})) {
			$res = $tp->{$port};
			$tail = ${$res}{'key'}.".$ip.$port";
			# slbRealTotalConnections (18)
			# slbRealNumberOfConnections (5)
			@mib = ('1.3.6.1.4.1.9.9.161.1.3.1.1.18.'.$tail,
				'1.3.6.1.4.1.9.9.161.1.3.1.1.5.'.$tail);
			next unless $found = $s->get_request(@mib);
			${$res}{'tlc'} += $$found{$mib[0]};
			${$res}{'lc'} += $$found{$mib[1]};
		}
	}

	# now look at the virtual hosts in front of the reals
	my(%temp) = ();
	$base = '1.3.6.1.4.1.9.9.161.1.4.1.1.2';	# slbVirtualServerState
	$next = $base;
vscan:	while (defined($s->get_bulk_request(-maxrepetitions=>20, -varbindlist=>[$next]))) {
		foreach $oid (oid_lex_sort(keys(%{$s->var_bind_list()}))) {
			last vscan if (!oid_context_match($base, $oid));
			$next = $oid;
			$status = $s->var_bind_list()->{$oid};
			last vscan if ($status eq 'endOfMibView');
			next if (3 == $status);	# standby, do not dup
			next unless ($oid =~ m/^$base\.(.*)$/);
			$tp = $1;
			$temp{$1} = $status;
		}
	}
	# pick up the details
	my($rrd);
	foreach $tail (keys(%temp)) {
		@mib = ('1.3.6.1.4.1.9.9.161.1.4.1.1.4.'.$tail,	# IpAddress
			'1.3.6.1.4.1.9.9.161.1.4.1.1.5.'.$tail, # Port
			'1.3.6.1.4.1.9.9.161.1.4.1.1.17.'.$tail, # Number Con
			'1.3.6.1.4.1.9.9.161.1.4.1.1.18.'.$tail); # Total con
		next unless $found = $s->get_request(@mib);
		$ip = $$found{$mib[0]};
		$nsch{$ip} ||= gethostbyaddr(inet_aton($ip), AF_INET);
		if (!defined($nsch{$ip})) {
			print STDERR "$progname: no reverse for $ip\n";
			next;
		}
		$rrd = vipName($nsch{$ip}).'_'.$$found{$mib[1]};
		$virt{$rrd}->{'state'} ||= $temp{$tail};
		$virt{$rrd}->{'lc'} += $$found{$mib[2]};
		$virt{$rrd}->{'tlc'} += $$found{$mib[3]};
	}
	$s->close();
}

MAIN: {
	my($res,$dres);				# results from SNMP Query
	my($e);					# snmp session errors
	my($speedmib);				# MIB to poll for speeds
	my($hostname);				# hostname we're walking

	$progname = $0;
	$progname =~ s/.*\///;
	getopts('DH:c:nhp:Vx', \%opts);
	# set a default snmp read-community name and other values
	$opts{'H'} ||= 'peg.sac.fedex.com';
	$opts{'c'} ||= 'public';

	if ($opts{'V'}) {
		print "$progname: $VERSION\n";
		exit(0);
	}
	if ($opts{'h'} || 0 == scalar(@ARGV)) {
		print "$progname: usage [-Dnx] [-c community] [-H samplehost] [-p delay] csmA [csmB]\n",
		    "$progname: usage -h\n",
		    "$progname: usage -V\n",
		    "c community  specify a snmp read-community\n",
		    "D            display all the PEG resource we'll update\n",
		    "h            show this help message\n",
		    "H samplehost host receiving RRD samples\n",
		    "n            do not send the actual updates\n",
		    "p delay      be persistant, report every delay seconds\n",
		    "V            show version information\n",
		    "x            trace PEG udpates on stderr\n",
		    "csm{A,B}     the primary and failover CSMs\n";
		exit(0);
	}

	my($ip,$host,$port,$tp,$limit,$when,$rrd,$hr);
	my($ipout,$proto,$sockaddr,$update);

	# Open up a data socket to PEG
	$proto = getprotobyname('udp');
	while ($ipout = inet_aton($opts{'H'})) {
		%real = ();
		%nsch = ();
		foreach $hostname (@ARGV) {
			PollSLB($hostname);
		}
		$when = time;

		$sockaddr = sockaddr_in(31415, $ipout);
		last unless socket(SOCKET, PF_INET, SOCK_DGRAM, $proto);

		# This outputs way too fast, we drop a lot of the UDP updates,
		# so we put in a speed limit [20/second] (ksb)
		$limit = 1;
		print STDERR "rrdup ", $opts{'H'}, " <<\\!\n" if $opts{'x'};
		foreach $ip (keys(%real)) {
			$tp = $real{$ip};
			foreach $port (keys(%{$tp})) {
				$res = $tp->{$port};
				$host = ${$res}{'name'};
				$host =~ s/\..*//o;	# ZZZ cmd line option
				$update = "host/$host/app-real$port.rrd";
				if ($opts{'D'}) {
					print "$update\n";
					next;
				}
				$update .= " state:tlc:lc " .
					join(':', $when, ${$res}{'state'}, ${$res}{'tlc'} & (2**64-1), ${$res}{'lc'} & (2**32-1));
				unless ($opts{'n'}) {
					if (0 == --$limit) {
						$limit = 2;
						select(undef, undef, undef, 0.04);
					}
					send(SOCKET, "00 $update", 0, $sockaddr);
				}
				print STDERR $update, "\n" if $opts{'x'};
			}
		}
		foreach $rrd (keys(%virt)) {
			$hr = $virt{$rrd};
			$update = "cluster/$rrd.rrd";
			if ($opts{'D'}) {
				print "$update\n";
				next;
			}
			$update .= ' state:tlc:lc ' .
				join(':', $when, $hr->{'state'}, $hr->{'tlc'} & (2**64-1), $hr->{'lc'} & (2**32-1));
			unless ($opts{'n'}) {
				if (0 == --$limit) {
					$limit = 2;
					select(undef, undef, undef, 0.04);
				}
				send(SOCKET, "00 $update", 0, $sockaddr);
			}
			print STDERR $update, "\n" if $opts{'x'};
		}
		print STDERR "!\n" if $opts{'x'};
		close(SOCKET);
		last if ($opts{'D'});
		last unless $opts{'p'};
		sleep($opts{'p'});
	}
	exit 0;
}
