Pages

4/10/2007

Perl::Screen Scraping


Required: MRTG Graphing for "source records remaining" of replica on Data Domain DDR appliance.

Problem: The replication statistics are not exposed via SNMP.

Workaround: Create external monitoring script to http to the device and grab this stat out of the html table that we are manually checking it from now.

I used PAR to package this into an EXE so I don't even need to screw with Perl modules on the MRTG/Web server.

Perl code:
####################################################
# checkrep.pl
#
# MRTG External Monitoring Script
# to return Records Remaining to be replicated
# for specific host & replication destination
####################################################

use Socket; # include Socket module
require 'tcp.pl'; # file with Open_TCP routine
use HTML::TableExtract; # Module to parse HTML

##########################################
#
# Parameters for customization below:
#
##########################################

##########################################
# Host to query

my $host="DDR05";

##########################################
# Replication Destination

$Query="dir://ddr02.usa.mydomain.com/backup/rep1";

##########################################
#
# Don't mess with stuff below here
#
##########################################


my $time = localtime;
open (LOG, '>checkrep.log');

#open (OUT, '>replication.html');


print LOG "\n----------\n$time\n";

##########################################
#Authenticate

open_TCP('F', $host, 80);
print LOG "\n----------\nLOGON-\n";
print F "POST /cgi-bin/auth.pl HTTP/1.0\n";
print F "User-Agent: Mozilla/1.1N (X11; I; SunOS 5.3 sun4m)\n";
print F "Accept: */*\n";
print F "Accept: image/gif\n";
print F "Accept: image/x-xbitmap\n";
print F "Accept: image/jpeg\n";
print F "Accept: text/javascript\n";
print F "Content-type: application/x-www-form-urlencoded\n";
print F "Content-length: 31\n";
print F "\n";
print F "user=TESTLOGON&password=********\n";

# get the HTTP response line
my $the_response=;
print LOG $the_response;

# get the header data
my %header;
while(=~ m/^(\S+):\s+(.+)/) {
$header{$1} = $2;
print LOG "$1: $2\n";
}

# get the entity body
print LOG while ();

# close (F);


##########################################
#Open Main (create cookie)

open_TCP('F', $host, 80);
print LOG "\n----------\nOPEN MAIN-\n";
# request the path of the document to get
print F "GET $header{'Location'} HTTP/1.0\n";
print F "Accept: */*\n";
print F "User-Agent: Mozilla/1.1N (X11; I; SunOS 5.3 sun4m)\n";
print F "Connection: Keep-Alive\n";
print F "\n";

# get the HTTP response line
$the_response=;
print LOG $the_response;

# get the header data
while(=~ m/^(\S+):\s+(.+)/) {
$header{$1} = $2;
print LOG "$1: $2\n";
}

my $Cookie = $header{'Set-Cookie'};


# get the entity body
print LOG while ();


# close the network connection
close(F);

##########################################
#Open Page
open_TCP('F', $host, 80);
print LOG "\n----------\nOPEN REPLICATION PAGE-\n";
print F "GET /view.cgi?ref=replication.gui HTTP/1.0\n";
print F "Accept: */*\n";
print F "User-Agent: Mozilla/1.1N (X11; I; SunOS 5.3 sun4m)\n";
print F "Connection: Keep-Alive\n";
print F "Cookie: $Cookie\n\n";

# get the HTTP response line
$the_response=;
print LOG $the_response;

# get the header data
while(=~ m/^(\S+):\s+(.+)/) {
print LOG "$1: $2\n";
}

# get the entity body
# print OUT while ();
@line = ;

# close the network connection
close(F);

##########################################
#Logout

# Test to see if session is still logged on
# e.g. http://atlddr05/view.cgi?ref=main.gui&session=14

open_TCP('F', $host, 80);
print LOG "\n----------\nLOGOUT-\n";
print F "POST /logout.cgi HTTP/1.0\n";
print F "User-Agent: Mozilla/1.1N (X11; I; SunOS 5.3 sun4m)\n";
print F "Accept: */*\n";
print F "Accept: image/gif\n";
print F "Accept: image/x-xbitmap\n";
print F "Accept: image/jpeg\n";
print F "Accept: text/javascript\n";
print F "Content-type: application/x-www-form-urlencoded\n";
print F "Content-length: 0\n";
print F "Pragma: no-cache\n";
print F "Cookie: $Cookie\n\n";

# get the HTTP response line
$the_response=;
print LOG $the_response;

# get the header data
while(=~ m/^(\S+):\s+(.+)/) {
$header{$1} = $2;
print LOG "$1: $2\n";
}
# get the entity body
print LOG while ();

print LOG "\n----------\nEND\n----------\n";
close (F);
close (LOG);
close (OUT);

##########################################
# Parse HTML


#use Data::Dumper;

foreach $line (@line) {
$line =~ s/\x0d{0,1}\x0a{0,1}\Z/ /s;
}

$html_string = join ('',@line);

$te = HTML::TableExtract->new( headers => ['Destination', 'Source Records Remaining'] );
$te->parse($html_string);

#print Dumper $te;
#print "\n";

foreach $ts ($te->tables) {
foreach $row (@$ts) {
# print join(',', @$row), "\n";
($Null,$Destination,$Null,$Null,$Null,$Null,$Null,$SourceRecordsRemaining,$Null,$Null) = @$row;
$Result{$Destination} = $SourceRecordsRemaining;
}
}

$RecordsRemaining = $Result{$Query};

$RecordsRemaining =~ s/,//;


##########################################
# Return Results

$Results = "$RecordsRemaining\n$RecordsRemaining\nNA\n$host\\$Query\n";

print $Results;



#The external command must return 4 lines of output:

#Line 1 - current state of the first variable, normally 'incoming bytes count'
#Line 2 - current state of the second variable, normally 'outgoing bytes count'
#Line 3 - string (in any human readable format), telling the uptime of the target.
#Line 4 - string, telling the name of the target



##########################################
# END
##########################################

No comments: