Parsing HTML with regex.

dadecoza

Expert Member
Joined
Aug 30, 2006
Messages
1,294

qft

a piece of code I have written last week ...

Code:
foreach my $page (@lists) {
    my $resp = $ua->get($page);
    if ($resp->is_success) {
        my @line = split(/\n/,$resp->content);
        while (@line) {
            my $l = shift @line;
            if ($l =~ /<td class=Property><a href="(.*?)"><font color=blue><b>(.*?)<\/b><\/font><a\/><\/td>/) {
                my $url = "$page/$1";
                my $name = $2;
                my $n = shift @line;
                my $s = shift @line;
                my ($network) = $n =~ /(\d+\.\d+\.\d+\.\d+)/;
                my ($subnet) = $s =~ /(\d+\.\d+\.\d+\.\d+)/;
                if ($network and $subnet and $name) {
                    $staging->do("insert into subnet (name, network, subnet, url) values (?,?,?,?)",undef, $name, $network, $subnet, $url);
                    push @detailPages, $url;
                }
            }
        }
    } else {
        print $resp->status_line."\n";
        print $resp->headers()->as_string."\n";
    }
}
 
Top