+++ /dev/null
-#!/usr/bin/perl
-use strict;
-use warnings;
-
-use WWW::Mechanize;
-use HTML::TreeBuilder;
-
-use Data::Dumper;
-use List::Util qw(sum);
-use Graph;
-
-use 5.14.0;
-
-my $mech = WWW::Mechanize->new();
-
-my $query_site = "http://traintimes.org.uk";
-
-die "Usage: $0 <source> <dest> <date> <departure time>\n"
- unless( scalar( @ARGV ) == 4 );
-
-my ( $src_in, $dest_in, $date, $depart ) = @ARGV;
-
-say "Finding routes from $src_in to $dest_in on $date at $depart";
-
-my $url = "${query_site}/${src_in}/${dest_in}/${depart}/${date}";
-say "Checking $url";
-
-# This is the overall route
-my $route = get_stops( $url );
-
-print Dumper( $route );
-
-# Give me some more useful variables. Note that we need the array because we need the order.
-my @all_stops = map { $_->{abbr} } @{$route->{stops}};
-my %stop_details = map { $_->{abbr} => { departs => $_->{departs}, arrives => $_->{arrives}, point => $_->{point} } } @{$route->{stops}};
-
-print Dumper( \%stop_details );
-my $graph = Graph->new();
-my %journeys;
-
-$graph->add_weighted_edge( $all_stops[0], $all_stops[-1], $route->{price} );
-get_substops( \@all_stops );
-
-print Dumper( \%journeys );
-
-my @best_route = $graph->SP_Dijkstra( $all_stops[0], $all_stops[-1] );
-
-say "Best route:";
-my $sum = 0;
-for(my $i = 0; $i < $#best_route; $i++ ){
- my $journey = $journeys{$best_route[$i]}->{$best_route[$i+1]};
- printf "%s -> %s: %s (%s)\n", $best_route[$i], $best_route[$i+1], $journey->{price}, $journey->{type};
- $sum += $journey->{price};
-}
-say "Total price: $sum";
-say "Direct price: " . $route->{price};
-
-##
-## functions below here
-sub get_stops {
- my ( $url ) = @_;
- say "get_stops( $url )";
-
- $mech->get( $url );
-
- my $tree = HTML::TreeBuilder->new_from_content( $mech->content );
-
- my $first_stop = $tree->look_down( '_tag', 'li', 'id', 'result0' );
- my $details_url = $first_stop->look_down( '_tag', 'a', 'class', 'calling_link' )->attr('href');
- my $ticket_type = $first_stop->look_down( '_tag', 'span', 'class', 'fare-type tooltip')->look_down('_tag', 'a')->as_text;
- my $ticket_price = ( $first_stop->look_down( '_tag', 'label' ) )[0]->as_text;
-
- $tree = $tree->delete();
-
- # tidy up
- $ticket_price =~ s{[^0-9.]}{}g;
- $ticket_type =~ s{^\s+}{};
-
- say "Ticket type: $ticket_type";
- say "Cost: $ticket_price";
-
- my $stops = _parse_stopping_points( "$query_site/$details_url" );
- my %return = (
- stops => $stops,
- type => $ticket_type,
- price => $ticket_price,
- );
- return \%return;
-}
-
-sub _parse_stopping_points {
- my ( $url ) = @_;
-
- my @stops;
-
- my $mech = WWW::Mechanize->new();
-
- $mech->get( $url );
-
- my $tree = HTML::TreeBuilder->new_from_content( $mech->content );
-
- # intermediate stops
- my $calling_points_div = $tree->look_down( '_tag', 'div', 'class', 'calling' );
- my @rows = $calling_points_div->look_down( '_tag', 'tr' );
- foreach my $row ( @rows ){
- my $point_ent = $row->look_down('_tag', 'td', 'class', 'calling-points');
- next unless( $point_ent );
- my ( $point, $abbr ) = $point_ent->as_text() =~ m{(.*)\s\[([A-Z]+)\]};
- my $arrives = $row->look_down( '_tag', 'td', 'class', 'arrives' )->as_text();
- my $departs = $row->look_down( '_tag', 'td', 'class', 'departs' )->as_text();
- say "$point ($abbr): Arrives: $arrives; departs $departs";
- my %res = (
- arrives => $arrives,
- departs => $departs,
- point => $point,
- abbr => $abbr,
- );
- push @stops, \%res;
- }
-
- # first and last stops
- my $srcdest = $tree->look_down( '_tag', 'div', 'id', 'content' )->look_down('_tag', 'h2')->as_text;
- my ( $departs, $arrives ) = $tree->look_down( '_tag', 'div', 'id', 'content' )->look_down('_tag', 'p')->as_text() =~ m{Leaving\sat\s(\d+:\d+),\sarriving\sat\s(\d+:\d+)}x;
-
- my ( $src_station, $src_abbr, $dest_station, $dest_abbr ) =
- $srcdest =~ m{\s*(.+)\s+\[([A-Z]+)\]\s*to\s*(.+)\s+\[([A-Z]+)\]};
- say "Source: $src_station, $src_abbr at $departs";
- say "Dest: $dest_station, $dest_abbr at $arrives";
- push @stops, { arrives => $arrives, point => $dest_station, abbr => $dest_abbr };
- unshift @stops, { departs => $departs, point => $src_station, abbr => $src_abbr };
-
- $tree = $tree->delete();
- return \@stops;
-}
-
-sub get_substops {
- my ( $stop_list ) = @_;
-
- for( my $i = 0; $i <= $#{$stop_list}; $i++ ){
- for( my $j = $i + 1; $j <= $#{$stop_list}; $j++ ){
- # skip src-dest as we've already calculated that
- next if( 0 == $i and $#{$stop_list} == $j );
- my $src = $stop_list->[$i];
- my $dst = $stop_list->[$j];
-
- say "checking $src -> $dst ($i -> $j)";
-
- my $depart = $stop_details{$src}->{departs};
- my $url = "${query_site}/${src}/${dst}/${depart}/${date}";
- my $journey = get_stops( $url );
-
- $graph->add_weighted_edge($src, $dst, $journey->{price} );
- $journeys{$src}->{$dst} = $journey;
- }
- }
-}
--- /dev/null
+#!/usr/bin/perl
+use strict;
+use warnings;
+use 5.10.0; # pull in say
+
+=head1 NAME
+
+split-route.pl - parse a train route and attempt to find split tickets
+
+=head1 SYNOPSIS
+
+split-route.pl [--debug] [--help|--man] SRC DEST DATE TIME
+
+=head1 DESCRIPTION
+
+Given two stations, SRC and DEST, and a train travelling between them on DATE
+at TIME, this script will scrape L<http://traintimes.org.uk> to find all the
+possible intermediate stations at which the journey could be broken, and then
+calculate the price of making all those journeys.
+
+=head1 CAVEATS
+
+This script assumes that the first train returned by a search for the given
+date and time between the given stations will be direct. Bad things will
+happen if it is not (also, split tickets may do odd things).
+
+=head1 STATION IDENTIFIERS
+
+Either station names (eg 'Oxford', 'Birmingham New Street') or 3-letter codes
+(eg 'OXF', 'BHM') can be used.
+
+=head1 OPTIONS
+
+=over 4
+
+=item --debug
+
+Turns on debugging; will print details about each route.
+
+=item --help
+
+Gives a short help
+
+=item --man
+
+Shows this manpage.
+
+=back
+
+=head1 EXAMPLE USAGE
+
+ split-route.pl OXF BHM 2014-05-16 17:00
+
+=cut
+
+use WWW::Mechanize;
+use HTML::TreeBuilder;
+
+use Data::Dumper;
+use List::Util qw(sum);
+use Graph;
+use Getopt::Long;
+use Pod::Usage;
+
+my $debug = 0;
+my ( $help, $man );
+
+GetOptions( "debug" => \$debug,
+ "help" => \$help,
+ "man" => \$man,
+ );
+
+pod2usage(1) if( $help );
+pod2usage( -verbose => 2 ) if( $man );
+
+unless( scalar( @ARGV ) == 4 ){
+ pod2usage( "Invalid number of arguments passed (use --help for more details)" );
+}
+
+my ( $src_in, $dest_in, $date, $depart ) = @ARGV;
+
+my $mech = WWW::Mechanize->new();
+
+my $query_site = "http://traintimes.org.uk";
+
+say "Finding routes from $src_in to $dest_in on $date at $depart";
+
+my $url = "${query_site}/${src_in}/${dest_in}/${depart}/${date}";
+
+# Hash to hold all journeys, of the form:
+# SRCSHORTCODE => { DESTSHORTCODE1 => { stops => [], price => 'price', type => 'type' },
+# DESTSHORTCODE2 => { stops => [], price => 'price', type => 'type },
+# },
+my %journeys;
+
+
+# This is the overall route
+my $route = get_stops( $url );
+
+# Add the overall route to the journeys list, otherwise if it's cheapest the
+# summary doesn't have its details.
+$journeys{$route->{stops}[0]->{abbr}}->{$route->{stops}[-1]->{abbr}} = $route;
+
+debug( "Route:", Dumper( $route ) );
+
+# Give us some more useful variables. Note that we need the array because we
+# need the order.
+# all_stops is simply a list of stops, in order
+my @all_stops = map { $_->{abbr} } @{$route->{stops}};
+# stop_details is a hash of:
+# SHORTCODE => { point => 'station name',
+# arrives => 'arrival_time',
+# departs => 'departure_time',
+# },
+# to make looking things up by shortcode so much easier.
+# Note that this is based on the original route.
+my %stop_details = map { $_->{abbr} => { departs => $_->{departs}, arrives => $_->{arrives}, point => $_->{point} } } @{$route->{stops}};
+
+debug( "Stop details:", Dumper( \%stop_details ) );
+
+my $graph = Graph->new();
+
+# Add the source-dest journey
+$graph->add_weighted_edge( $all_stops[0], $all_stops[-1], $route->{price} );
+
+# Find all suitable combinations of substops
+# This populates the graph nodes
+get_substops( \@all_stops );
+
+debug( "Journeys:", Dumper( \%journeys ) );
+
+# Find the single best route
+my @best_route = $graph->SP_Dijkstra( $all_stops[0], $all_stops[-1] );
+
+say "Cheapest option:";
+my $sum = 0;
+if ( 2 == @best_route ){
+ say "[direct price]";
+}
+for(my $i = 0; $i < $#best_route; $i++ ){
+ my $journey = $journeys{$best_route[$i]}->{$best_route[$i+1]};
+ my $source_station = $stop_details{$best_route[$i]}->{point};
+ my $dest_station = $stop_details{$best_route[$i+1]}->{point};
+ my $departs = $journey->{stops}->[0]->{departs};
+ my $arrives = $journey->{stops}->[-1]->{arrives};
+ printf "%s (%s) -> %s (%s): %s -> %s: £%s (%s)\n", $source_station, $best_route[$i], $dest_station, $best_route[$i+1], $departs, $arrives, $journey->{price}, $journey->{type};
+ $sum += $journey->{price};
+}
+printf "Total price: £%.2f\n", $sum;
+printf "Direct price: £%.2f\n", $route->{price};
+
+##
+## functions below here
+# Get ticket price, type and stops from a URL
+sub get_stops {
+ my ( $url ) = @_;
+ debug( "get_stops( $url )" );
+
+ $mech->get( $url );
+
+ my $tree = HTML::TreeBuilder->new_from_content( $mech->content );
+
+ my $first_stop = $tree->look_down( '_tag', 'li', 'id', 'result0' );
+ my $details_url = $first_stop->look_down( '_tag', 'a', 'class', 'calling_link' )->attr('href');
+ my $ticket_type = $first_stop->look_down( '_tag', 'span', 'class', 'fare-type tooltip')->look_down('_tag', 'a')->as_text;
+ my $ticket_price = ( $first_stop->look_down( '_tag', 'label' ) )[0]->as_text;
+
+ $tree = $tree->delete();
+
+ # tidy up
+ $ticket_price =~ s{[^0-9.]}{}g;
+ $ticket_type =~ s{^\s+}{};
+
+ debug( "Ticket type: $ticket_type" );
+ debug( "Cost: $ticket_price" );
+
+ my $stops = _parse_stopping_points( "$query_site/$details_url" );
+ my %return = (
+ stops => $stops,
+ type => $ticket_type,
+ price => $ticket_price,
+ );
+ return \%return;
+}
+
+# Get stops from a given URL
+sub _parse_stopping_points {
+ my ( $url ) = @_;
+
+ my @stops;
+
+ my $mech = WWW::Mechanize->new();
+
+ $mech->get( $url );
+
+ my $tree = HTML::TreeBuilder->new_from_content( $mech->content );
+
+ # intermediate stops
+ my $calling_points_div = $tree->look_down( '_tag', 'div', 'class', 'calling' );
+ my @rows = $calling_points_div->look_down( '_tag', 'tr' );
+ foreach my $row ( @rows ){
+ my $point_ent = $row->look_down('_tag', 'td', 'class', 'calling-points');
+ next unless( $point_ent );
+ my ( $point, $abbr ) = $point_ent->as_text() =~ m{(.*)\s\[([A-Z]+)\]};
+ my $arrives = $row->look_down( '_tag', 'td', 'class', 'arrives' )->as_text();
+ my $departs = $row->look_down( '_tag', 'td', 'class', 'departs' )->as_text();
+ debug( "$point ($abbr): Arrives: $arrives; departs $departs" );
+ my %res = (
+ arrives => $arrives,
+ departs => $departs,
+ point => $point,
+ abbr => $abbr,
+ );
+ push @stops, \%res;
+ }
+
+ # first and last stops
+ my $srcdest = $tree->look_down( '_tag', 'div', 'id', 'content' )->look_down('_tag', 'h2')->as_text;
+ my ( $departs, $arrives ) = $tree->look_down( '_tag', 'div', 'id', 'content' )->look_down('_tag', 'p')->as_text() =~ m{Leaving\sat\s(\d+:\d+),\sarriving\sat\s(\d+:\d+)}x;
+
+ my ( $src_station, $src_abbr, $dest_station, $dest_abbr ) =
+ $srcdest =~ m{\s*(.+)\s+\[([A-Z]+)\]\s*to\s*(.+)\s+\[([A-Z]+)\]};
+ debug( "Source: $src_station, $src_abbr at $departs" );
+ debug( "Dest: $dest_station, $dest_abbr at $arrives" );
+ push @stops, { arrives => $arrives, point => $dest_station, abbr => $dest_abbr };
+ unshift @stops, { departs => $departs, point => $src_station, abbr => $src_abbr };
+
+ $tree = $tree->delete();
+ return \@stops;
+}
+
+# Walk a list of stops and work out their cost, then populate the graph
+sub get_substops {
+ my ( $stop_list ) = @_;
+
+ for( my $i = 0; $i <= $#{$stop_list}; $i++ ){
+ for( my $j = $i + 1; $j <= $#{$stop_list}; $j++ ){
+ # skip src-dest as we've already calculated that
+ next if( 0 == $i and $#{$stop_list} == $j );
+ my $src = $stop_list->[$i];
+ my $dst = $stop_list->[$j];
+
+ debug( "checking $src -> $dst ($i -> $j)" );
+
+ my $depart = $stop_details{$src}->{departs};
+ my $url = "${query_site}/${src}/${dst}/${depart}/${date}";
+ my $journey = get_stops( $url );
+ # Sanity check departure and arrival times match up with the source journey
+ my $expected_depart = $stop_details{$src}->{departs};
+ my $actual_depart = $journey->{stops}[0]{departs};
+ unless( $expected_depart eq $actual_depart ){
+ die "Error: train $src -> $dst does not depart at the expected time! (Expected $expected_depart, actual $actual_depart)";
+ }
+ my $expected_arrive = $stop_details{$dst}->{arrives};
+ my $actual_arrive = $journey->{stops}[-1]{arrives};
+ unless( $expected_arrive eq $actual_arrive ){
+ die "Error: train $src -> $dst does not arrive at the expected time! (Expected $expected_arrive, actual $actual_arrive)";
+ }
+
+ $graph->add_weighted_edge($src, $dst, $journey->{price} );
+ $journeys{$src}->{$dst} = $journey;
+ }
+ }
+}
+
+# Simple debugging function
+sub debug {
+ if( $debug ){
+ print join( "\n", @_ );
+ }
+}