#!/usr/bin/perl use Data::Dumper; use Getopt::Long; use XML::Simple; use strict; use warnings; my $output_directory = './'; Getopt::Long::Configure("bundling", "ignorecase_always", "permute"); GetOptions( "target-directory|d=s" => \$output_directory, "dt_fmt=s" => \$RDF::RDFa::Generator::XMLTV::dt_fmt, "d_fmt=s" => \$RDF::RDFa::Generator::XMLTV::d_fmt, "t_fmt=s" => \$RDF::RDFa::Generator::XMLTV::t_fmt, ); my $input_file = shift @ARGV || die "Usage: xmltv2xhtml.pl --target-directory=DIR INPUTFILE\n"; my $xs = XML::Simple->new( ForceArray => [qw(actor subtitles programme channel)], ForceContent => [qw(desc)], KeyAttr => [], ); my $data = $xs->parse_file($input_file); my $channels = {}; my $hours = {}; my $genres = {}; # Loop through channels, reading channel data. foreach my $c (@{ $data->{'channel'} }) { $channels->{$c->{'id'}} = RDF::RDFa::Generator::XMLTV::Channel->new($c); } # Loop through programmes, reading programme data. # Add programmes to schedules. foreach my $p (@{ $data->{'programme'} }) { my $start_hour = substr($p->{'start'}, 0, 10); $hours->{ $start_hour } = RDF::RDFa::Generator::XMLTV::Hour->new($start_hour) unless defined $hours->{ $start_hour }; my $h = $hours->{ $start_hour }; my $g; if (length $p->{'category'}->{'content'}) { $genres->{ $p->{'category'}->{'content'} } = RDF::RDFa::Generator::XMLTV::Genre->new($p->{'category'}->{'content'}) unless defined $genres->{ $p->{'category'}->{'content'} }; $g = $genres->{ $p->{'category'}->{'content'} }; } my $c = $channels->{ $p->{'channel'} }; my $P = RDF::RDFa::Generator::XMLTV::Programme->new($p, $c, $g); $c->add_program($P); $h->add_program($P); $g->add_program($P) if ($g); } my $menu_string = RDF::RDFa::Generator::XMLTV::make_menu({ 'Chronological' => $hours, 'By Channel' => $channels, 'By Genre' => $genres, }); foreach my $schedule (values %$channels) { $schedule->publish($output_directory, $menu_string); } foreach my $schedule (values %$hours) { $schedule->publish($output_directory, $menu_string); } foreach my $schedule (values %$genres) { $schedule->publish($output_directory, $menu_string); } package RDF::RDFa::Generator::XMLTV; BEGIN { our $d_fmt = '%F'; our $dt_fmt = '%F %R'; our $t_fmt = '%R'; } sub make_menu { my $sections = shift; my $rv = "\t\t
\n"; foreach my $label (keys %$sections) { my @pages = sort { return $a->sort_string cmp $b->sort_string; } (values %{$sections->{$label}}); $rv .= "\t\t\t
\n" . "\t\t\t\t

$label

\n"; if (UNIVERSAL::isa($pages[0], 'RDF::RDFa::Generator::XMLTV::Hour')) { $rv .= sprintf("\t\t\t\t

%s

\n", $pages[0]->date_string); } $rv .= "\t\t\t\t\n\t\t\t\t

%s

\n\t\t\t\t\n" . "\t\t\t
\n"; } $rv .= "\t\t
\n"; return $rv; } 1; package RDF::RDFa::Generator::XMLTV::Programme; use HTML::Entities qw(encode_entities_numeric); use Data::Dumper; use DateTime; use DateTime::Format::Strptime; use Digest::SHA1 qw(sha1_hex); sub new { my $class = shift; my $self = shift; my $chan = shift; my $genre = shift; $self->{'start'} = DateTime->new( 'year' => $1, 'month' => $2, 'day' => $3, 'hour' => $4, 'minute' => $5, 'second' => $6, 'time_zone' => $7 ) if $self->{'start'} =~ /^(\d{4})(\d{2})(\d{2})(\d{2})(\d{2})(\d{2})\s*(.\d{4})$/; $self->{'stop'} = DateTime->new( 'year' => $1, 'month' => $2, 'day' => $3, 'hour' => $4, 'minute' => $5, 'second' => $6, 'time_zone' => $7 ) if $self->{'stop'} =~ /^(\d{4})(\d{2})(\d{2})(\d{2})(\d{2})(\d{2})\s*(.\d{4})$/; $self->{'duration'} = $self->{'stop'}->subtract_datetime($self->{'start'}); $self->{'chan'} = $chan; $self->{'genre'} = $genre; bless $self, $class; } sub get { my $self = shift; my $field = shift; my @bits = split m#/#, $field; my $it = $self; my $this_bit; while (@bits) { $this_bit = shift @bits; if (ref $it eq 'HASH' || ref $it eq ref $self) { $it = $it->{$this_bit}; } elsif (ref $it eq 'ARRAY') { $it = $it->[0 + $this_bit]; } } no warnings; return "$it"; } sub hget { my $self = shift; return encode_entities_numeric($self->get(@_)); } sub langattr { my $self = shift; my $key = shift; my $lang = $self->get("$key/lang"); return "xml:lang=\"$lang\"" if ($lang); return ''; } sub id { my $self = shift; $self->{'ID'} = sha1_hex($self->{'channel'}.$self->{'start'}->iso8601) unless defined $self->{'ID'}; return $self->{'ID'}; } sub uri { my $self = shift; my $part = shift || ''; # Identifying URIs for the broadcast on a particular channel at a particular # time. These URIs are opaque-looking and non-dereferenceable, but any URI # at all is better than no URI! return 'tag:buzzword.org.uk,2009:tv/' . $self->id . '/' . $part; } sub interval_uri { my $self = shift; $self->{'INTERVAL_URI'} = sprintf( 'http://placetime.com/interval/gregorian/%s%s/%s', $self->{'start'}->iso8601, ($self->{'start'}->time_zone->is_utc ? 'Z' : $self->{'start'}->strtime('%z')), RDF::RDFa::Generator::XMLTV::Programme::DurationHelper::to_iso8601($self->{'duration'})) unless defined $self->{'INTERVAL_URI'}; return $self->{'INTERVAL_URI'}; } sub to_rdfa { my $self = shift; my $showchan = shift || 0; my $showgen = shift || 0; my $css = shift || 'item'; my $hx = shift || 2; my $hxp = $hx + 1; my $hxpp = $hx + 2; my $dt_fmt = $RDF::RDFa::Generator::XMLTV::t_fmt; my $_subtitle = ''; $_subtitle = "".$self->hget('sub-title/content')."" if (defined $self->{'sub-title'}); my $_category = ''; if ($showgen && defined $self->{'genre'}) { my $genreURI = $self->{'genre'}->page_name; $_category = "
".$self->hget('category/content')."
" if (defined $self->{'category'}); } my $_credits = ''; if (defined $self->{'credits'}) { $_credits = "\n"; } my $_aspect = ''; $_aspect = "
(widescreen)
" if (defined $self->{'video'}->{'aspect'}->{'content'} && $self->{'video'}->{'aspect'}->{'content'} eq '16:9'); my $_interval; $_interval = sprintf( '' .'%s' .'–%s' .' (%s)', $self->interval_uri, ($self->{'start'}->time_zone->is_utc ? $self->{'start'}->strftime('%FT%TZ') : $self->{'start'}->strftime('%FT%T%z')), $self->{'start'}->strftime($dt_fmt), ($self->{'stop'}->time_zone->is_utc ? $self->{'stop'}->strftime('%FT%TZ') : $self->{'stop'}->strftime('%FT%T%z')), $self->{'stop'}->strftime($dt_fmt), RDF::RDFa::Generator::XMLTV::Programme::DurationHelper::to_iso8601($self->{'duration'}), RDF::RDFa::Generator::XMLTV::Programme::DurationHelper::to_friendly($self->{'duration'}) ); my $_broadcastType = 'Broadcast'; $_broadcastType = 'RepeatBroadcast' if defined $self->{'previously-shown'}; my $_access = ''; if (defined $self->{'subtitles'}) { $_access .= '
'; foreach my $st (@{$self->{'subtitles'}}) { $_access .= sprintf('%s ', $st->{'type'}); } $_access .= '
'; } my $_chan = ''; if ($showchan && defined $self->{'chan'}) { $_chan = "
".$self->{'chan'}->to_rdfa_short."
"; } else { $_chan = ''; } return "
id."\" about=\"".$self->uri('version')."\" typeof=\"po:Version\" class=\"$css\">
uri('broadcast')."\"> $_chan
$_interval
uri('episode')."\" >
".$self->hget('title/content')."
$_subtitle
langattr('desc')." property=\"dc:abstract po:synopsis\">".$self->hget('desc/content')."
$_category $_credits
$_aspect $_access
"; } 1; package RDF::RDFa::Generator::XMLTV::Programme::DurationHelper; sub to_iso8601 { my $this = shift; my $str; # We coerce weeks into days and nanoseconds into fractions of a second # for compatibility with xsd:duration. if ($this->is_negative) { $str .= '-P'; } else { $str .= 'P'; } if ($this->years) { $str .= $this->years.'Y'; } if ($this->months) { $str .= $this->months.'M'; } if ($this->weeks || $this->days) { $str .= ($this->days + (7 * $this->weeks)).'D'; } $str .= 'T'; if ($this->hours) { $str .= $this->hours.'H'; } if ($this->minutes) { $str .= $this->minutes.'M'; } if ($this->seconds) { $str .= ($this->seconds + ($this->nanoseconds / 1000000000)).'S'; } $str =~ s/T$//; return $str; } sub to_friendly { my $this = shift; my $str; # We coerce weeks into days and nanoseconds into fractions of a second # for compatibility with xsd:duration. if ($this->years) { $str .= $this->years.' years, '; } if ($this->months) { $str .= $this->months.' months, '; } if ($this->weeks || $this->days) { $str .= ($this->days + (7 * $this->weeks)).' days, '; } if ($this->hours) { $str .= $this->hours.' hr, '; } if ($this->minutes) { $str .= $this->minutes.' min, '; } if ($this->seconds) { $str .= ($this->seconds + ($this->nanoseconds / 1000000000)).' sec'; } $str =~ s/, $//; return $str; } 1; package RDF::RDFa::Generator::XMLTV::Schedule; sub add_program { my $self = shift; push @{ $self->{'programmes'} }, shift; } sub prologue { my $self = shift; my $title = $self->page_title; return < $title EOF } sub epilogue { my $self = shift; my $menu = shift; return "\n$menu\n\t\t\n\t\n\n"; } sub heading_block { my $self = shift; return "

".$self->page_title."

"; } sub shortcuts { my $self = shift; my $rv = "\n\t\t\t
\n"; $rv .= "\t\t\t\t

Summary

\n"; $rv .= "\t\t\t\t
    \n"; foreach my $p (@_) { $rv .= sprintf("\t\t\t\t\t
  • %s: %s
  • \n", $p->{'start'}->strftime($RDF::RDFa::Generator::XMLTV::t_fmt), $p->id, $p->hget('title/content')); } $rv .= "\t\t\t\t
\n"; $rv .= "\t\t\t
\n"; return $rv; } 1; package RDF::RDFa::Generator::XMLTV::Genre; BEGIN{ our @ISA = qw(RDF::RDFa::Generator::XMLTV::Schedule); }; sub new { my $class = shift; my $genre = shift; my $self = { 'genre' => $genre, 'programmes' => [], }; bless $self, $class; } sub sort_string { my $self = shift; return lc($self->{'genre'}); } sub page_name { my $self = shift; my $g = $self->{'genre'}; $g =~ s/[^A-Za-z]//g; return sprintf("Genre__%s.html", $g); } sub page_title { my $self = shift; return $self->{'genre'}; } sub publish { my $self = shift; my $dir = shift; my $menu = shift; my @progs = sort { return $a->{'start'} cmp $b->{'start'} unless $a->{'start'} eq $b->{'start'}; return lc($a->{'chan'}->{'display-name'}->{'content'}) cmp lc($b->{'chan'}->{'display-name'}->{'content'}); } @{$self->{'programmes'}}; open OUT, ">".$dir.$self->page_name; print OUT $self->prologue; print OUT $self->heading_block; my $i = 0; foreach my $p (@progs) { print OUT $p->to_rdfa(1, 0, (++$i%2?'odd item':'even item')); } print OUT $self->shortcuts(@progs); print OUT $self->epilogue($menu); close OUT; } 1; package RDF::RDFa::Generator::XMLTV::Channel; BEGIN{ our @ISA = qw(RDF::RDFa::Generator::XMLTV::Schedule); }; sub new { my $class = shift; my $self = shift; $self->{'programmes'} = []; bless $self, $class; } sub sort_string { my $self = shift; return lc($self->{'display-name'}->{'content'}); } sub page_name { my $self = shift; my $pn = $self->{'id'}; $pn =~ s/\./_/g; return sprintf("Channel__%s.html", $pn); } sub page_title { my $self = shift; return $self->{'display-name'}->{'content'}; } sub uri { my $self = shift; # See RFC 2838. return 'tv:'.$self->{'id'}; } sub to_rdfa_short { my $self = shift; if (defined $self->{'icon'}->{'src'}) { return sprintf(' %s', $self->uri, $self->{'icon'}->{'src'}, $self->page_name, $self->{'display-name'}->{'content'}); } return sprintf('%s', $self->uri, $self->page_name, $self->{'display-name'}->{'content'}); } sub publish { my $self = shift; my $dir = shift; my $menu = shift; my @progs = sort { return $a->{'start'} cmp $b->{'start'} unless $a->{'start'} eq $b->{'start'}; return lc($a->{'chan'}->{'display-name'}->{'content'}) cmp lc($b->{'chan'}->{'display-name'}->{'content'}); } @{$self->{'programmes'}}; open OUT, ">".$dir.$self->page_name; print OUT $self->prologue; print OUT $self->heading_block; my $i = 0; foreach my $p (@progs) { print OUT $p->to_rdfa(0, 1, (++$i%2?'odd item':'even item')); } print OUT $self->shortcuts(@progs); print OUT $self->epilogue($menu); close OUT; } sub heading_block { my $self = shift; return "
uri."\"> {'icon'}->{'src'}."\" alt=\"\" />

".$self->page_title."


" if defined $self->{'icon'}->{'src'}; return "
uri."\">

".$self->page_title."


"; } 1; package RDF::RDFa::Generator::XMLTV::Hour; BEGIN { our @ISA = qw(RDF::RDFa::Generator::XMLTV::Schedule); }; sub new { my $class = shift; my $hour = shift; my $self = { 'hour' => substr($hour, 0, 10), 'programmes' => [], }; $self->{'dt'} = DateTime->new( 'year' => $1, 'month' => $2, 'day' => $3, 'hour' => $4, 'minute' => 0, 'second' => 0, ) if $self->{'hour'} =~ /^(\d{4})(\d{2})(\d{2})(\d{2})$/; bless $self, $class; } sub is_different_day { my $self = shift; my $other = shift; return unless defined $other; return (substr($self->{'hour'}, 0, 8) ne substr($other->{'hour'}, 0, 8)); } sub date_string { my $self = shift; return $self->{'dt'}->strftime($RDF::RDFa::Generator::XMLTV::d_fmt); } sub sort_string { my $self = shift; return $self->{'hour'}; } sub page_name { my $self = shift; return sprintf("Hour__%s.html", $self->{'hour'}); } sub page_title { my $self = shift; my $hr = substr($self->{'hour'}, 8) + 0; return "Listings for $hr AM" if ($hr > 0 && $hr < 12); return "Listings for ".($hr-12)." PM" if ($hr > 12 && $hr < 24); return "Listings for Noon" if $hr==12; return "Listings for Midnight"; } sub publish { my $self = shift; my $dir = shift; my $menu = shift; my @progs = sort { return $a->{'start'} cmp $b->{'start'} unless $a->{'start'} eq $b->{'start'}; return lc($a->{'chan'}->{'display-name'}->{'content'}) cmp lc($b->{'chan'}->{'display-name'}->{'content'}); } @{$self->{'programmes'}}; open OUT, ">".$dir.$self->page_name; print OUT $self->prologue; print OUT $self->heading_block; my $i = 0; foreach my $p (@progs) { print OUT $p->to_rdfa(1, 1, (++$i%2?'odd item':'even item')); } print OUT $self->shortcuts(@progs); print OUT $self->epilogue($menu); close OUT; } 1;