#!/usr/bin/perl
use Data::Dumper;
use Getopt::Long;
use XML::Simple;
use strict;
use warnings;
my $output_directory = './';
Getopt::Long::Configure("bundling", "ignorecase_always", "permute");
GetOptions(
"target-directory|d=s" => \$output_directory,
"dt_fmt=s" => \$RDF::RDFa::Generator::XMLTV::dt_fmt,
"d_fmt=s" => \$RDF::RDFa::Generator::XMLTV::d_fmt,
"t_fmt=s" => \$RDF::RDFa::Generator::XMLTV::t_fmt,
);
my $input_file = shift @ARGV
|| die "Usage: xmltv2xhtml.pl --target-directory=DIR INPUTFILE\n";
my $xs = XML::Simple->new(
ForceArray => [qw(actor subtitles programme channel)],
ForceContent => [qw(desc)],
KeyAttr => [],
);
my $data = $xs->parse_file($input_file);
my $channels = {};
my $hours = {};
my $genres = {};
# Loop through channels, reading channel data.
foreach my $c (@{ $data->{'channel'} })
{
$channels->{$c->{'id'}} = RDF::RDFa::Generator::XMLTV::Channel->new($c);
}
# Loop through programmes, reading programme data.
# Add programmes to schedules.
foreach my $p (@{ $data->{'programme'} })
{
my $start_hour = substr($p->{'start'}, 0, 10);
$hours->{ $start_hour } = RDF::RDFa::Generator::XMLTV::Hour->new($start_hour)
unless defined $hours->{ $start_hour };
my $h = $hours->{ $start_hour };
my $g;
if (length $p->{'category'}->{'content'})
{
$genres->{ $p->{'category'}->{'content'} } = RDF::RDFa::Generator::XMLTV::Genre->new($p->{'category'}->{'content'})
unless defined $genres->{ $p->{'category'}->{'content'} };
$g = $genres->{ $p->{'category'}->{'content'} };
}
my $c = $channels->{ $p->{'channel'} };
my $P = RDF::RDFa::Generator::XMLTV::Programme->new($p, $c, $g);
$c->add_program($P);
$h->add_program($P);
$g->add_program($P) if ($g);
}
my $menu_string = RDF::RDFa::Generator::XMLTV::make_menu({
'Chronological' => $hours,
'By Channel' => $channels,
'By Genre' => $genres,
});
foreach my $schedule (values %$channels)
{ $schedule->publish($output_directory, $menu_string); }
foreach my $schedule (values %$hours)
{ $schedule->publish($output_directory, $menu_string); }
foreach my $schedule (values %$genres)
{ $schedule->publish($output_directory, $menu_string); }
package RDF::RDFa::Generator::XMLTV;
BEGIN
{
our $d_fmt = '%F';
our $dt_fmt = '%F %R';
our $t_fmt = '%R';
}
sub make_menu
{
my $sections = shift;
my $rv = "\t\t<div class=\"menu\">\n";
foreach my $label (keys %$sections)
{
my @pages = sort
{
return $a->sort_string cmp $b->sort_string;
}
(values %{$sections->{$label}});
$rv .= "\t\t\t<div>\n"
. "\t\t\t\t<h2>$label</h2>\n";
if (UNIVERSAL::isa($pages[0], 'RDF::RDFa::Generator::XMLTV::Hour'))
{ $rv .= sprintf("\t\t\t\t<h3>%s</h3>\n", $pages[0]->date_string); }
$rv .= "\t\t\t\t<ul>\n";
my $last_page = undef;
foreach my $page (@pages)
{
if (UNIVERSAL::isa($page, 'RDF::RDFa::Generator::XMLTV::Hour')
&& $page->is_different_day($last_page))
{ $rv .= sprintf("\t\t\t\t</ul>\n\t\t\t\t<h3>%s</h3>\n\t\t\t\t<ul>\n", $page->date_string); }
$rv .= sprintf("\t\t\t\t\t<li><a href=\"%s\">%s</a></li>\n",
$page->page_name,
$page->page_title);
$last_page = $page;
}
$rv .= "\t\t\t\t</ul>\n"
. "\t\t\t</div>\n";
}
$rv .= "\t\t</div>\n";
return $rv;
}
1;
package RDF::RDFa::Generator::XMLTV::Programme;
use HTML::Entities qw(encode_entities_numeric);
use Data::Dumper;
use DateTime;
use DateTime::Format::Strptime;
use Digest::SHA1 qw(sha1_hex);
sub new
{
my $class = shift;
my $self = shift;
my $chan = shift;
my $genre = shift;
$self->{'start'} = DateTime->new(
'year' => $1,
'month' => $2,
'day' => $3,
'hour' => $4,
'minute' => $5,
'second' => $6,
'time_zone' => $7
)
if $self->{'start'} =~ /^(\d{4})(\d{2})(\d{2})(\d{2})(\d{2})(\d{2})\s*(.\d{4})$/;
$self->{'stop'} = DateTime->new(
'year' => $1,
'month' => $2,
'day' => $3,
'hour' => $4,
'minute' => $5,
'second' => $6,
'time_zone' => $7
)
if $self->{'stop'} =~ /^(\d{4})(\d{2})(\d{2})(\d{2})(\d{2})(\d{2})\s*(.\d{4})$/;
$self->{'duration'} = $self->{'stop'}->subtract_datetime($self->{'start'});
$self->{'chan'} = $chan;
$self->{'genre'} = $genre;
bless $self, $class;
}
sub get
{
my $self = shift;
my $field = shift;
my @bits = split m#/#, $field;
my $it = $self;
my $this_bit;
while (@bits)
{
$this_bit = shift @bits;
if (ref $it eq 'HASH' || ref $it eq ref $self)
{ $it = $it->{$this_bit}; }
elsif (ref $it eq 'ARRAY')
{ $it = $it->[0 + $this_bit]; }
}
no warnings;
return "$it";
}
sub hget
{
my $self = shift;
return encode_entities_numeric($self->get(@_));
}
sub langattr
{
my $self = shift;
my $key = shift;
my $lang = $self->get("$key/lang");
return "xml:lang=\"$lang\"" if ($lang);
return '';
}
sub id
{
my $self = shift;
$self->{'ID'} = sha1_hex($self->{'channel'}.$self->{'start'}->iso8601)
unless defined $self->{'ID'};
return $self->{'ID'};
}
sub uri
{
my $self = shift;
my $part = shift || '';
# Identifying URIs for the broadcast on a particular channel at a particular
# time. These URIs are opaque-looking and non-dereferenceable, but any URI
# at all is better than no URI!
return 'tag:buzzword.org.uk,2009:tv/' . $self->id . '/' . $part;
}
sub interval_uri
{
my $self = shift;
$self->{'INTERVAL_URI'} = sprintf(
'http://placetime.com/interval/gregorian/%s%s/%s',
$self->{'start'}->iso8601,
($self->{'start'}->time_zone->is_utc ? 'Z' : $self->{'start'}->strtime('%z')),
RDF::RDFa::Generator::XMLTV::Programme::DurationHelper::to_iso8601($self->{'duration'}))
unless defined $self->{'INTERVAL_URI'};
return $self->{'INTERVAL_URI'};
}
sub to_rdfa
{
my $self = shift;
my $showchan = shift || 0;
my $showgen = shift || 0;
my $css = shift || 'item';
my $hx = shift || 2;
my $hxp = $hx + 1;
my $hxpp = $hx + 2;
my $dt_fmt = $RDF::RDFa::Generator::XMLTV::t_fmt;
my $_subtitle = '';
$_subtitle = "<h$hxp property=\"dc:title\">".$self->hget('sub-title/content')."</h$hxp>"
if (defined $self->{'sub-title'});
my $_category = '';
if ($showgen && defined $self->{'genre'})
{
my $genreURI = $self->{'genre'}->page_name;
$_category = "<div rel=\"tag:taggedWithTag\"><span typeof=\"tag:Tag\" property=\"tag:name\" datatype=\"\"><a rel=\"rdfs:seeAlso\" href=\"".$genreURI."\">".$self->hget('category/content')."</a></span></div>"
if (defined $self->{'category'});
}
my $_credits = '';
if (defined $self->{'credits'})
{
$_credits = "<ul>\n";
foreach my $role (keys %{$self->{'credits'}})
{
my $rcode = 'dc:contributor';
$rcode .= ' po:actor' if $role eq 'actor';
$rcode .= ' po:director' if $role eq 'director';
if ('ARRAY' eq ref $self->{'credits'}->{$role})
{
foreach my $name (@{$self->{'credits'}->{$role}})
{
$_credits .= sprintf("\t\t\t\t\t\t\t<li rel=\"%s\"><span typeof=\"foaf:Person\"><span property=\"foaf:name\">%s</span> (%s)</span></li>\n",
$rcode, encode_entities_numeric($name->{'content'}), $role);
}
}
else
{
$_credits .= sprintf("\t\t\t\t\t\t\t<li rel=\"%s\"><span typeof=\"foaf:Person\"><span property=\"foaf:name\">%s</span> (%s)</span></li>\n",
$rcode, encode_entities_numeric($self->{'credits'}->{$role}->{'content'}), $role);
}
}
$_credits .= "\t\t\t\t\t\t</ul>\n";
}
my $_aspect = '';
$_aspect = "<div property=\"po:aspect_ratio\" content=\"16:9\">(widescreen)</div>"
if (defined $self->{'video'}->{'aspect'}->{'content'}
&& $self->{'video'}->{'aspect'}->{'content'} eq '16:9');
my $_interval;
$_interval = sprintf(
'<span typeof="t:Interval" about="%s" property="rdfs:label" datatype="">'
.'<span property="t:start" content="%s" datatype="xsd:dateTime">%s</span>'
.'–<span property="t:end" content="%s" datatype="xsd:dateTime">%s</span>'
.' <em property="t:duration" content="%s" datatype="xsd:duration" rel="t:timeline" resource="[s:universaltimeline]">(%s)</em></span>',
$self->interval_uri,
($self->{'start'}->time_zone->is_utc ? $self->{'start'}->strftime('%FT%TZ') : $self->{'start'}->strftime('%FT%T%z')),
$self->{'start'}->strftime($dt_fmt),
($self->{'stop'}->time_zone->is_utc ? $self->{'stop'}->strftime('%FT%TZ') : $self->{'stop'}->strftime('%FT%T%z')),
$self->{'stop'}->strftime($dt_fmt),
RDF::RDFa::Generator::XMLTV::Programme::DurationHelper::to_iso8601($self->{'duration'}),
RDF::RDFa::Generator::XMLTV::Programme::DurationHelper::to_friendly($self->{'duration'})
);
my $_broadcastType = 'Broadcast';
$_broadcastType = 'RepeatBroadcast' if defined $self->{'previously-shown'};
my $_access = '';
if (defined $self->{'subtitles'})
{
$_access .= '<div rel="po:subtitle">';
foreach my $st (@{$self->{'subtitles'}})
{
$_access .= sprintf('<span typeof="po:Subtitle" property="po:subtitle_type">%s</span> ', $st->{'type'});
}
$_access .= '</div>';
}
my $_chan = '';
if ($showchan && defined $self->{'chan'})
{
$_chan = "<div rel=\"po:broadcast_on\">".$self->{'chan'}->to_rdfa_short."</div>";
}
else
{
$_chan = '<span rel="po:broadcast_on" resource="'.$self->{'chan'}->uri.'"></span>';
}
return "
<div id=\"id_".$self->id."\" about=\"".$self->uri('version')."\" typeof=\"po:Version\" class=\"$css\">
<div rev=\"po:broadcast_of\">
<div typeof=\"po:$_broadcastType\" about=\"".$self->uri('broadcast')."\">
$_chan
<div rel=\"e:time\">$_interval</div>
</div>
</div>
<div rel=\"dc:isVersionOf\" rev=\"po:version dc:hasVersion\">
<div typeof=\"po:Episode\" about=\"".$self->uri('episode')."\" >
<div rel=\"dc:isPartOf\" rev=\"dc:hasPart po:episode\">
<h$hx typeof=\"po:Series\" property=\"dc:title\">".$self->hget('title/content')."</h$hx>
</div>
$_subtitle
<div ".$self->langattr('desc')." property=\"dc:abstract po:synopsis\">".$self->hget('desc/content')."</div>
$_category
$_credits
</div>
</div>
$_aspect
$_access
</div>
";
}
1;
package RDF::RDFa::Generator::XMLTV::Programme::DurationHelper;
sub to_iso8601
{
my $this = shift;
my $str;
# We coerce weeks into days and nanoseconds into fractions of a second
# for compatibility with xsd:duration.
if ($this->is_negative)
{ $str .= '-P'; }
else
{ $str .= 'P'; }
if ($this->years)
{ $str .= $this->years.'Y'; }
if ($this->months)
{ $str .= $this->months.'M'; }
if ($this->weeks || $this->days)
{ $str .= ($this->days + (7 * $this->weeks)).'D'; }
$str .= 'T';
if ($this->hours)
{ $str .= $this->hours.'H'; }
if ($this->minutes)
{ $str .= $this->minutes.'M'; }
if ($this->seconds)
{ $str .= ($this->seconds + ($this->nanoseconds / 1000000000)).'S'; }
$str =~ s/T$//;
return $str;
}
sub to_friendly
{
my $this = shift;
my $str;
# We coerce weeks into days and nanoseconds into fractions of a second
# for compatibility with xsd:duration.
if ($this->years)
{ $str .= $this->years.' years, '; }
if ($this->months)
{ $str .= $this->months.' months, '; }
if ($this->weeks || $this->days)
{ $str .= ($this->days + (7 * $this->weeks)).' days, '; }
if ($this->hours)
{ $str .= $this->hours.' hr, '; }
if ($this->minutes)
{ $str .= $this->minutes.' min, '; }
if ($this->seconds)
{ $str .= ($this->seconds + ($this->nanoseconds / 1000000000)).' sec'; }
$str =~ s/, $//;
return $str;
}
1;
package RDF::RDFa::Generator::XMLTV::Schedule;
sub add_program
{
my $self = shift;
push @{ $self->{'programmes'} }, shift;
}
sub prologue
{
my $self = shift;
my $title = $self->page_title;
return <<EOF;
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML+RDFa 1.0//EN"
"http://www.w3.org/MarkUp/DTD/xhtml-rdfa-1.dtd">
<!--
The following document contains embedded RDFa data (see <http://rdfa.info>).
This comment provides notes on that data.
* Listings data is described using the BBC's Broadcast Ontology.
See <http://purl.org/ontology/po/2009-02-20.shtml>.
* Episodes have Versions; Versions have Broadcasts; Broadcasts have Intervals.
* Episodes, Versions and Broadcasts all have URIs. These URIs are opaque and
non-dereferencable, but I consider that better than not having URIs at all!
* Intervals have dereferencable URIs, thanks to placetime.com.
* Channels also have non-dereferencable URIs, thanks to RFC 2838.
-->
<html
xmlns="http://www.w3.org/1999/xhtml"
xmlns:xsd="http://www.w3.org/2001/XMLSchema#"
xmlns:foaf="http://xmlns.com/foaf/0.1/"
xmlns:dc="http://purl.org/dc/terms/"
xmlns:po="http://purl.org/ontology/po/"
xmlns:e="http://purl.org/NET/c4dm/event.owl#"
xmlns:t="http://purl.org/NET/c4dm/timeline.owl#"
xmlns:tag="http://www.holygoat.co.uk/owl/redwood/0.1/tags/"
xmlns:h5="http://buzzword.org.uk/rdf/h5#"
xmlns:rdfs="http://www.w3.org/2000/01/rdf-schema#">
<head profile="http://www.w3.org/1999/xhtml/vocab">
<title property="dc:title">$title</title>
<link rel="stylesheet" media="screen,projection" type="text/css" href="screen.css" />
<meta property="h5:generator" name="Generator" content="RDF::RDFa::Generator::XMLTV" />
</head>
<body>
EOF
}
sub epilogue
{
my $self = shift;
my $menu = shift;
return "\n$menu\n\t\t</div>\n\t</body>\n</html>\n";
}
sub heading_block
{
my $self = shift;
return "
<div class=\"head\">
<h1>".$self->page_title."</h1>
</div>
<div class=\"main\">
";
}
sub shortcuts
{
my $self = shift;
my $rv = "\n\t\t\t<div class=\"shortcuts\">\n";
$rv .= "\t\t\t\t<h2>Summary</h2>\n";
$rv .= "\t\t\t\t<ul>\n";
foreach my $p (@_)
{
$rv .= sprintf("\t\t\t\t\t<li>%s: <a href=\"#id_%s\">%s</a></li>\n",
$p->{'start'}->strftime($RDF::RDFa::Generator::XMLTV::t_fmt),
$p->id,
$p->hget('title/content'));
}
$rv .= "\t\t\t\t</ul>\n";
$rv .= "\t\t\t</div>\n";
return $rv;
}
1;
package RDF::RDFa::Generator::XMLTV::Genre;
BEGIN{ our @ISA = qw(RDF::RDFa::Generator::XMLTV::Schedule); };
sub new
{
my $class = shift;
my $genre = shift;
my $self = {
'genre' => $genre,
'programmes' => [],
};
bless $self, $class;
}
sub sort_string
{
my $self = shift;
return lc($self->{'genre'});
}
sub page_name
{
my $self = shift;
my $g = $self->{'genre'}; $g =~ s/[^A-Za-z]//g;
return sprintf("Genre__%s.html", $g);
}
sub page_title
{
my $self = shift;
return $self->{'genre'};
}
sub publish
{
my $self = shift;
my $dir = shift;
my $menu = shift;
my @progs = sort
{
return $a->{'start'} cmp $b->{'start'}
unless $a->{'start'} eq $b->{'start'};
return lc($a->{'chan'}->{'display-name'}->{'content'}) cmp lc($b->{'chan'}->{'display-name'}->{'content'});
}
@{$self->{'programmes'}};
open OUT, ">".$dir.$self->page_name;
print OUT $self->prologue;
print OUT $self->heading_block;
my $i = 0;
foreach my $p (@progs)
{
print OUT $p->to_rdfa(1, 0, (++$i%2?'odd item':'even item'));
}
print OUT $self->shortcuts(@progs);
print OUT $self->epilogue($menu);
close OUT;
}
1;
package RDF::RDFa::Generator::XMLTV::Channel;
BEGIN{ our @ISA = qw(RDF::RDFa::Generator::XMLTV::Schedule); };
sub new
{
my $class = shift;
my $self = shift;
$self->{'programmes'} = [];
bless $self, $class;
}
sub sort_string
{
my $self = shift;
return lc($self->{'display-name'}->{'content'});
}
sub page_name
{
my $self = shift;
my $pn = $self->{'id'}; $pn =~ s/\./_/g;
return sprintf("Channel__%s.html", $pn);
}
sub page_title
{
my $self = shift;
return $self->{'display-name'}->{'content'};
}
sub uri
{
my $self = shift;
# See RFC 2838.
return 'tv:'.$self->{'id'};
}
sub to_rdfa_short
{
my $self = shift;
if (defined $self->{'icon'}->{'src'})
{
return sprintf('<strong about="%s" typeof="po:TV"><img style="height:50px;width:66px" rel="foaf:img" src="%s" alt="" /> <a property="foaf:name" rel="rdfs:seeAlso foaf:page" href="%s">%s</a></strong>',
$self->uri,
$self->{'icon'}->{'src'},
$self->page_name,
$self->{'display-name'}->{'content'});
}
return sprintf('<strong about="%s" typeof="po:TV"><a property="foaf:name" rel="foaf:page rdfs@seeAlso" href="%s">%s</a></strong>',
$self->uri,
$self->page_name,
$self->{'display-name'}->{'content'});
}
sub publish
{
my $self = shift;
my $dir = shift;
my $menu = shift;
my @progs = sort
{
return $a->{'start'} cmp $b->{'start'}
unless $a->{'start'} eq $b->{'start'};
return lc($a->{'chan'}->{'display-name'}->{'content'}) cmp lc($b->{'chan'}->{'display-name'}->{'content'});
}
@{$self->{'programmes'}};
open OUT, ">".$dir.$self->page_name;
print OUT $self->prologue;
print OUT $self->heading_block;
my $i = 0;
foreach my $p (@progs)
{
print OUT $p->to_rdfa(0, 1, (++$i%2?'odd item':'even item'));
}
print OUT $self->shortcuts(@progs);
print OUT $self->epilogue($menu);
close OUT;
}
sub heading_block
{
my $self = shift;
return "
<div class=\"head\" rel=\"foaf:primaryTopic\">
<div typeof=\"po:TV\" about=\"".$self->uri."\">
<img rel=\"foaf:img\" src=\"".$self->{'icon'}->{'src'}."\" alt=\"\" />
<h1 property=\"foaf:name\">".$self->page_title."</h1>
<br />
</div>
</div>
<div class=\"main\">
"
if defined $self->{'icon'}->{'src'};
return "
<div class=\"head\" rel=\"foaf:primaryTopic\">
<div typeof=\"po:TV\" about=\"".$self->uri."\">
<h1 property=\"foaf:name\">".$self->page_title."</h1>
<br />
</div>
</div>
<div class=\"main\">
";
}
1;
package RDF::RDFa::Generator::XMLTV::Hour;
BEGIN { our @ISA = qw(RDF::RDFa::Generator::XMLTV::Schedule); };
sub new
{
my $class = shift;
my $hour = shift;
my $self = {
'hour' => substr($hour, 0, 10),
'programmes' => [],
};
$self->{'dt'} = DateTime->new(
'year' => $1,
'month' => $2,
'day' => $3,
'hour' => $4,
'minute' => 0,
'second' => 0,
)
if $self->{'hour'} =~ /^(\d{4})(\d{2})(\d{2})(\d{2})$/;
bless $self, $class;
}
sub is_different_day
{
my $self = shift;
my $other = shift;
return unless defined $other;
return (substr($self->{'hour'}, 0, 8) ne substr($other->{'hour'}, 0, 8));
}
sub date_string
{
my $self = shift;
return $self->{'dt'}->strftime($RDF::RDFa::Generator::XMLTV::d_fmt);
}
sub sort_string
{
my $self = shift;
return $self->{'hour'};
}
sub page_name
{
my $self = shift;
return sprintf("Hour__%s.html", $self->{'hour'});
}
sub page_title
{
my $self = shift;
my $hr = substr($self->{'hour'}, 8) + 0;
return "Listings for $hr AM"
if ($hr > 0 && $hr < 12);
return "Listings for ".($hr-12)." PM"
if ($hr > 12 && $hr < 24);
return "Listings for Noon"
if $hr==12;
return "Listings for Midnight";
}
sub publish
{
my $self = shift;
my $dir = shift;
my $menu = shift;
my @progs = sort
{
return $a->{'start'} cmp $b->{'start'}
unless $a->{'start'} eq $b->{'start'};
return lc($a->{'chan'}->{'display-name'}->{'content'}) cmp lc($b->{'chan'}->{'display-name'}->{'content'});
}
@{$self->{'programmes'}};
open OUT, ">".$dir.$self->page_name;
print OUT $self->prologue;
print OUT $self->heading_block;
my $i = 0;
foreach my $p (@progs)
{
print OUT $p->to_rdfa(1, 1, (++$i%2?'odd item':'even item'));
}
print OUT $self->shortcuts(@progs);
print OUT $self->epilogue($menu);
close OUT;
}
1;