#!/usr/bin/perl use strict; use warnings; use CGI qw(:standard); use CGI::Carp 'fatalsToBrowser'; use HTML::HTML5::Parser; use HTML::HTML5::Sanity; use RDF::RDFa::Parser; use RDF::Trine; use RDF::Trine::Serializer::NTriples; use XML::LibXML; my $cgi_mode = 0; my $uri = shift @ARGV or ($cgi_mode=1); if ($cgi_mode) { $uri = param('uri') or die "Need to provide an input URI.\n"; } my $parser = HTML::HTML5::Parser->new; my $options = {}; my $document = $parser->parse_file($uri, $options); my ($input_dom, $rdfa_options, $parse_comment); if ($options->{'parser_used'} =~ /^HTML::HTML5::/i) { if (defined $parser->dtd_public_id($document) and $parser->dtd_public_id($document) =~ /HTML 4/i) { $rdfa_options = RDF::RDFa::Parser::OPTS_HTML4; $parse_comment = 'Input parsed as HTML, using HTML 4.x defaults and auto_config.'; } else { $rdfa_options = RDF::RDFa::Parser::OPTS_HTML5; $parse_comment = 'Input parsed as HTML, using HTML 5 defaults and auto_config.'; } $input_dom = HTML::HTML5::Sanity::fix_document($document); } else { if ($document->documentElement->localname eq 'html' && $document->documentElement->namespaceURI eq 'http://www.w3.org/1999/xhtml') { $rdfa_options = RDF::RDFa::Parser::OPTS_XHTML; $parse_comment = 'Input parsed as XML, using XHTML defaults and auto_config.'; } else { $rdfa_options = RDF::RDFa::Parser::OPTS_XML; $parse_comment = 'Input parsed as XML, using generic XML defaults and auto_config.'; } $input_dom = $document; } $rdfa_options->{'auto_config'} = 1; my $rdfa_parser = RDF::RDFa::Parser->new($input_dom, $options->{'response'}->base, $rdfa_options); $rdfa_parser->consume; if ($cgi_mode) { print header(-type => "text/plain", -charset => "us-ascii"); } my $ser = RDF::Trine::Serializer::NTriples->new; print "### $parse_comment ###\n"; print $ser->serialize_model_to_string($rdfa_parser->graph);