#!/usr/bin/perl

use strict;
use warnings;

use CGI qw(:standard);
use CGI::Carp 'fatalsToBrowser';
use HTML::HTML5::Parser;
use HTML::HTML5::Sanity;
use RDF::RDFa::Parser;
use RDF::Trine;
use RDF::Trine::Serializer::NTriples;
use XML::LibXML;

my $cgi_mode  = 0;
my $uri       = shift @ARGV or ($cgi_mode=1);
if ($cgi_mode)
{
	$uri = param('uri') or die "Need to provide an input URI.\n";
}

my $parser    = HTML::HTML5::Parser->new;
my $options   = {};
my $document  = $parser->parse_file($uri, $options);

my ($input_dom, $rdfa_options, $parse_comment);

if ($options->{'parser_used'} =~ /^HTML::HTML5::/i)
{
	if (defined $parser->dtd_public_id($document)
	and $parser->dtd_public_id($document) =~ /HTML 4/i)
	{
		$rdfa_options  = RDF::RDFa::Parser::OPTS_HTML4;
		$parse_comment = 'Input parsed as HTML, using HTML 4.x defaults and auto_config.';
	}
	else
	{
		$rdfa_options  = RDF::RDFa::Parser::OPTS_HTML5;
		$parse_comment = 'Input parsed as HTML, using HTML 5 defaults and auto_config.';
	}
	
	$input_dom = HTML::HTML5::Sanity::fix_document($document);
}
else
{
	if ($document->documentElement->localname eq 'html'
	&&  $document->documentElement->namespaceURI eq 'http://www.w3.org/1999/xhtml')
	{
		$rdfa_options  = RDF::RDFa::Parser::OPTS_XHTML;
		$parse_comment = 'Input parsed as XML, using XHTML defaults and auto_config.';
	}
	else
	{
		$rdfa_options  = RDF::RDFa::Parser::OPTS_XML;
		$parse_comment = 'Input parsed as XML, using generic XML defaults and auto_config.';
	}
	
	$input_dom = $document;
}

$rdfa_options->{'auto_config'} = 1;

my $rdfa_parser = RDF::RDFa::Parser->new($input_dom, $options->{'response'}->base, $rdfa_options);
$rdfa_parser->consume;

if ($cgi_mode)
{
	print header(-type => "text/plain", -charset => "us-ascii");
}

my $ser = RDF::Trine::Serializer::NTriples->new;
print "### $parse_comment ###\n";
print $ser->serialize_model_to_string($rdfa_parser->graph);
