From: Michael Howe Date: Tue, 8 Apr 2014 08:05:26 +0000 (+0100) Subject: And the result of Monday's work X-Git-Url: https://git.michaelhowe.org/gitweb/?a=commitdiff_plain;h=3aaef85d18e40f4132d84dd64c7e17e30d5cd192;p=pub%2Fmichael%2Fnational-rail-ticket-split.git And the result of Monday's work --- diff --git a/parse-route.pl b/parse-route.pl old mode 100644 new mode 100755 index 0995bf3..6eeebca --- a/parse-route.pl +++ b/parse-route.pl @@ -5,6 +5,11 @@ use warnings; use WWW::Mechanize; use HTML::TreeBuilder; +use Data::Dumper; +use List::Util qw(sum); + +use 5.14.0; + my $mech = WWW::Mechanize->new(); my $query_site = "http://traintimes.org.uk"; @@ -14,25 +19,104 @@ die "Usage: $0 \n" my ( $src, $dest, $date, $depart ) = @ARGV; -print "Finding routes from $src to $dest on $date at $depart\n"; +say "Finding routes from $src to $dest on $date at $depart"; my $url = "${query_site}/${src}/${dest}/${depart}/${date}"; -print "Checking $url\n"; +say "Checking $url"; + +# This is the overall route +my $route = get_stops( $url ); + +print Dumper( $route ); + +my @options; + +# We can't find the time to the start point, obviously +for( my $i = 1; $i <= $#{$route->{stops}}; $i++ ){ + my $current_stop = $route->{stops}[$i]; + my $prev_stop = $route->{stops}[$i - 1]; + say "Checking $prev_stop->{point} -> $current_stop->{point}"; +# + my $stop_url = sprintf "%s/%s/%s/%s/%s", $query_site, $prev_stop->{abbr}, $current_stop->{abbr}, $prev_stop->{departs}, $date; + push @options, get_stops( $stop_url ); +} -my $stops = get_stops( $url ); +say Dumper( \@options ); + +say sum( map { $_->{price} } @options ); sub get_stops { my ( $url ) = @_; + say "get_stops( $url )"; $mech->get( $url ); - + my $tree = HTML::TreeBuilder->new_from_content( $mech->content ); my $first_stop = $tree->look_down( '_tag', 'li', 'id', 'result0' ); my $details_url = $first_stop->look_down( '_tag', 'a', 'class', 'calling_link' )->attr('href'); my $ticket_type = $first_stop->look_down( '_tag', 'span', 'class', 'fare-type tooltip')->look_down('_tag', 'a')->as_text; my $ticket_price = ( $first_stop->look_down( '_tag', 'label' ) )[0]->as_text; - print "Ticket type: $ticket_type\n"; - print "Cost: $ticket_price\n"; - print "stopping points URL: $details_url\n"; + + $tree = $tree->delete(); + + # tidy up + $ticket_price =~ s{[^0-9.]}{}g; + $ticket_type =~ s{^\s+}{}; + + say "Ticket type: $ticket_type"; + say "Cost: $ticket_price"; + + my $stops = _parse_stopping_points( "$query_site/$details_url" ); + my %return = ( + stops => $stops, + type => $ticket_type, + price => $ticket_price, + ); + return \%return; +} + +sub _parse_stopping_points { + my ( $url ) = @_; + + my @stops; + + my $mech = WWW::Mechanize->new(); + + $mech->get( $url ); + + my $tree = HTML::TreeBuilder->new_from_content( $mech->content ); + + # intermediate stops + my $calling_points_div = $tree->look_down( '_tag', 'div', 'class', 'calling' ); + my @rows = $calling_points_div->look_down( '_tag', 'tr' ); + foreach my $row ( @rows ){ + my $point_ent = $row->look_down('_tag', 'td', 'class', 'calling-points'); + next unless( $point_ent ); + my ( $point, $abbr ) = $point_ent->as_text() =~ m{(.*)\s\[([A-Z]+)\]}; + my $arrives = $row->look_down( '_tag', 'td', 'class', 'arrives' )->as_text(); + my $departs = $row->look_down( '_tag', 'td', 'class', 'departs' )->as_text(); + say "$point ($abbr): Arrives: $arrives; departs $departs"; + my %res = ( + arrives => $arrives, + departs => $departs, + point => $point, + abbr => $abbr, + ); + push @stops, \%res; + } + + # first and last stops + my $srcdest = $tree->look_down( '_tag', 'div', 'id', 'content' )->look_down('_tag', 'h2')->as_text; + my ( $departs, $arrives ) = $tree->look_down( '_tag', 'div', 'id', 'content' )->look_down('_tag', 'p')->as_text() =~ m{Leaving\sat\s(\d+:\d+),\sarriving\sat\s(\d+:\d+)}x; + + my ( $src_station, $src_abbr, $dest_station, $dest_abbr ) = + $srcdest =~ m{\s*(.+)\s+\[([A-Z]+)\]\s*to\s*(.+)\s+\[([A-Z]+)\]}; + say "Source: $src_station, $src_abbr at $departs"; + say "Dest: $dest_station, $dest_abbr at $arrives"; + push @stops, { arrives => $arrives, point => $dest_station, abbr => $dest_abbr }; + unshift @stops, { departs => $departs, point => $src_station, abbr => $src_abbr }; + + $tree = $tree->delete(); + return \@stops; }