#!/usr/bin/perl

# Copyright (c) 2009 Toby A Inkster
# 
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
# 
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
# 
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
# THE SOFTWARE.

use CGI qw(:all);
use CGI::Carp qw(fatalsToBrowser);
use DateTime;
use Digest::SHA1 qw(sha1_hex);
use Getopt::Long;
use JSON;
use LWP::UserAgent;
use XML::Simple;
use URI;
use URI::Escape;
use strict;

my $format = param('format');

# Hard-coded relation for finger SPARQL endpoint.
my $rel_fingerpoint = 'http://ontologi.es/sparql#fingerpoint';

# Get e-mail address supplied on command line, clean up.
my $ident = param('mbox') || die "Need to supply an e-mail address\n";
$ident = "mailto:$ident" unless $ident =~ /^mailto:/i;
$ident = URI->new($ident);
my ($user, $host) = split /\@/, $ident->to;

# Instantiate an HTTP client
my $ua = LWP::UserAgent->new;
$ua->timeout(10);
$ua->env_proxy;

# Use HTTP Link header to find preferred finger SPARQL endpoint from address
my $httphost = "http://$host/";
my $response = $ua->head($httphost);
die "HTTP non-success when finding fingerpoint.\n"
	unless $response->is_success;
my $linkdata = HTTP::Link::Parser::parse_response_to_rdf($response);
my $sparql   = $linkdata->{ $httphost }->{ $rel_fingerpoint }->[0]->{'value'};

# If HTTP Link header was not present, look for HTML <link/> tag.
unless (defined $sparql)
{
	$response = $ua->get($httphost,
		'Accept' => 'application/xhtml+xml;q=1.0, text/html;q=0.9, */*;q=0.1');
	die "HTTP non-success when finding fingerpoint HTML.\n"
		unless $response->is_success;
	if ($response->header('content-type') =~ m`^(text/html|application/xhtml+xml|application/xml|text/xml)`i)
	{
		$sparql = URI->new_abs($1, URI->new($httphost))
			if ($response->content =~ m`<[Ll][Ii][Nn][Kk]\s+[Rr][Ee][Ll]="[^"]*http://ontologi\.es/sparql#fingerpoint[^"]*"\s+[Hh][Rr][Ee][Ff]="([^"]+)"\s*/?>`);
	}
}

die "Could not discover Fingerpoint server.\n"
	unless length $sparql;

# Figure out SPARQL query to run
my $sha1 = sha1_hex($ident);
my $query = "PREFIX foaf: <http://xmlns.com/foaf/0.1/>
PREFIX wot: <http://xmlns.com/wot/0.1/>
SELECT DISTINCT *
WHERE {
	{
		{ ?person foaf:mbox <$ident> . }
		UNION
		{ ?person foaf:mbox_sha1sum \"$sha1\" . }
	}
	OPTIONAL { ?person foaf:name ?name . }
	OPTIONAL { ?person foaf:homepage ?homepage . }
	OPTIONAL { ?person foaf:mbox ?mbox . }
	OPTIONAL { ?person foaf:weblog ?weblog . }
	OPTIONAL { ?person foaf:img ?image . }
	OPTIONAL { ?k wot:pubkeyAddress ?key ; wot:identity ?person . }
}";
my @fields = qw(name homepage mbox weblog image key);

# Get the SPARQL Results
my ($base_part, $params_part) = split /\?/, $sparql;
$params_part .= '&query=' . uri_escape($query);
$params_part =~ s/^\&//;
my $results = $ua->post(
	$base_part,
	'Content' => $params_part,
	'Accept' => 'application/sparql-results+json;q=1.0, application/sparql-results+xml;q=0.5');
die "HTTP non-success when performing SPARQL query at <$sparql>.\n"
	unless $results->is_success;
my $data = {};

# Parse JSON response
if ($results->header('content-type') =~ /json/)
{
	my $result_data = from_json($results->content);
	foreach my $binding (@{ $result_data->{'results'}->{'bindings'} })
	{
		foreach my $field (@fields)
		{
			if (defined $binding->{$field}->{'value'})
				{ $data->{ $binding->{'person'}->{'value'} }->{$field}->{ $binding->{$field}->{'value'} }++; }
		}
	}
}

# Parse XML response
else
{
	my $result_data = XMLin($results->content);
	foreach my $result (@{ $result_data->{'results'}->{'result'} })
	{
		my $person = $result->{'binding'}->{'person'}->{'uri'};
		$person = '_:' . $result->{'binding'}->{'person'}->{'bnode'}
			if defined $result->{'binding'}->{'person'}->{'bnode'};
		foreach my $field (@fields)
		{
			if (defined $result->{'binding'}->{$field}->{'uri'})
				{ $data->{ $person }->{$field}->{ $result->{'binding'}->{$field}->{'uri'} }++; }
			elsif (defined $result->{'binding'}->{$field}->{'literal'})
				{ $data->{ $person }->{$field}->{ $result->{'binding'}->{$field}->{'literal'} }++; }
		}
	}
}

# Output results - VCARD
if ('vcard' eq lc $format)
{
	print header("text/directory");
	
	foreach my $hit (sort keys %$data)
	{
		print "BEGIN:VCARD\n";
		print "VERSION:3.0\n";
		printf("SOURCE;X-QUERY=SPARQL:%s\n", vcard_escape($base_part));
		printf("FN:%s\n", vcard_escape(sort keys %{$data->{$hit}->{'name'}}));
		printf("UID:%s\n", vcard_escape($hit));
		foreach my $email (sort keys %{$data->{$hit}->{'mbox'}})
		{
			$email =~ s/^mailto://i;
			printf("EMAIL;TYPE=INTERNET:%s\n", vcard_escape($email));
		}
		foreach my $u (sort keys %{$data->{$hit}->{'homepage'}})
		{
			printf("URL:%s\n", vcard_escape($u));
		}
		foreach my $u (sort keys %{$data->{$hit}->{'weblog'}})
		{
			printf("URL;TYPE=X-WEBLOG:%s\n", vcard_escape($u));
		}
		foreach my $u (sort keys %{$data->{$hit}->{'image'}})
		{
			printf("PHOTO;VALUE=URI:%s\n", vcard_escape($u));
		}
		foreach my $u (sort keys %{$data->{$hit}->{'key'}})
		{
			printf("KEY;VALUE=URI:%s\n", vcard_escape($u));
		}
		printf("REV:%s\n", DateTime->now->strftime('%Y%m%dT%H%M%S'));
		print "END:VCARD\n";
	}
}

# Output results - FOAF
elsif ('foaf' eq lc $format)
{
	print header("application/rdf+xml");
	
	my ($mainClass, $topicLink);
	
	if (1 == scalar keys %$data)
	{
		$mainClass = 'PersonalProfileDocument';
		$topicLink = 'primaryTopic';
	}
	else
	{
		$mainClass = 'Document';
		$topicLink = 'topic';
	}
	
	$sparql = escapeHTML($sparql);
	my $dt = DateTime->now->strftime('%Y-%m-%dT%H:%M:%S').'Z';
	
	print <<HEADER;
<$mainClass rdf:about=""
	xmlns="http://xmlns.com/foaf/0.1/"
	xmlns:dcterms="http://purl.org/dc/terms/"
	xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
	xmlns:rdfs="http://www.w3.org/2000/01/rdf-schema#"
	xmlns:wot="http://xmlns.com/wot/0.1/">

	<dcterms:source rdf:resource="$sparql" />
	<dcterms:date rdf:datatype="http://www.w3.org/2001/XMLSchema#dateTime">$dt</dcterms:date>

HEADER
	
	foreach my $hit (sort keys %$data)
	{
		print "\t<$topicLink>\n";
		printf("\t\t<Agent %s=\"%s\">\n",
			substr($hit,0,2) eq '_:' ? 'rdf:nodeID' : 'rdf:about',
			escapeHTML(substr($hit,0,2) eq '_:' ? substr($hit,2) : $hit)
			);
		
		foreach my $name (sort keys %{$data->{$hit}->{'name'}})
		{
			printf("\t\t\t<name>%s</name>\n", escapeHTML($name));
		}
		foreach my $mbox (sort keys %{$data->{$hit}->{'mbox'}})
		{
			printf("\t\t\t<mbox rdf:resource=\"%s\" />\n", escapeHTML($mbox));
			printf("\t\t\t<mbox_sha1sum>%s</mbox_sha1sum>\n", escapeHTML(sha1_hex($mbox)));
		}		
		foreach my $u (sort keys %{$data->{$hit}->{'homepage'}})
		{
			printf("\t\t\t<homepage rdf:resource=\"%s\" />\n", escapeHTML($u));
		}
		foreach my $u (sort keys %{$data->{$hit}->{'weblog'}})
		{
			printf("\t\t\t<weblog rdf:resource=\"%s\" />\n", escapeHTML($u));
		}
		foreach my $u (sort keys %{$data->{$hit}->{'image'}})
		{
			printf("\t\t\t<image rdf:resource=\"%s\" />\n", escapeHTML($u));
		}
		foreach my $u (sort keys %{$data->{$hit}->{'key'}})
		{
			printf("\t\t\t<wot:hasKey rdf:parseType=\"Resource\">\n");
			printf("\t\t\t\t<wot:pubkeyAddress rdf:resource=\"%s\" />\n",
				escapeHTML($u));
			printf("\t\t\t\t<wot:identity %s=\"%s\" />\n",
				substr($hit,0,2) eq '_:' ? 'rdf:nodeID' : 'rdf:resource',
				escapeHTML(substr($hit,0,2) eq '_:' ? substr($hit,2) : $hit)
				);
			printf("\t\t\t</wot:hasKey>\n");
		}
		print "\t\t</Agent>\n";
		print "\t</$topicLink>\n";
	}
		
	print "\n</$mainClass>\n";
}

# Output results - plain text
else
{
	print header("text/plain");
	
	foreach my $hit (sort keys %$data)
	{
		print "<$hit>\n";
		foreach my $field (@fields)
		{
			print "\t$field:\n"
				if scalar keys %{$data->{$hit}->{$field}};
			foreach my $value (sort keys %{ $data->{$hit}->{$field} })
			{
				print "\t\t$value\n";
			}
		}
		print "\n";
	}
}

sub vcard_escape
{
	return $_[0];
}

# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
 # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #

package HTTP::Link::Parser;

use URI;
use strict;

sub relationship
{
	my $str = shift;

	if ($str =~ /^([a-z][a-z0-9\+\.\-]*)\:/i)
	{
		# seems to be an absolute URI, so can safely return "as is".
		return $str;
	}

	return 'http://www.iana.org/assignments/relation/' . (lc $str);

	my $url = url (lc $str), 'http://www.iana.org/assignments/relation/';
	return $url->abs->as_string;

	return undef;
}

sub parse_value_string
{
	my $hdrv = shift;
	my $base = shift;
	my $rv   = {};
	
	my $uri  = undef;
	if ($hdrv =~ /^(\s*<([^>]*)>\s*)/)
	{
		$uri  = $2;
		$hdrv = substr($hdrv, length($1));
	}
	else
	{
		return $rv;
	}
	
	$rv->{'URI'} = URI->new_abs($uri, $base);
	
	while ($hdrv =~ /^(\s*\;\s*(\/|[a-z0-9-]+)\s*\=\s*("[^"]*"|'[^']*'|[^\s\'\"\;\,]+)\s*)/i)
	{
		$hdrv = substr($hdrv, length($1));
		my $key = lc $2;
		my $val = $3;
	
		$val =~ s/(^"|"$)//g if ($val =~ /^".*"$/);
		$val =~ s/(^'|'$)//g if ($val =~ /^'.*'$/ && $3 !~ /^".*"$/);
			
		if ($key eq 'rel')
		{
			$val =~ s/(^\s+)|(\s+$)//g;
			$val =~ s/\s+/ /g;
			
			my @rels = split / /, $val;
			foreach my $rel (@rels)
				{ push @{ $rv->{'rel'} }, $rel; }
		}
		elsif ($key eq 'rev')
		{
			$val =~ s/(^\s+)|(\s+$)//g;
			$val =~ s/\s+/ /g;
			
			my @rels = split / /, $val;
			foreach my $rel (@rels)
				{ push @{ $rv->{'rev'} }, $rel; }
		}
		elsif ($key eq 'anchor')
		{
			$rv->{'anchor'} = URI->new_abs($val, $base);
		}
		else
		{
			$rv->{ $key } = $val;
		}
	}
	
	return $rv;
}

sub parse_response
{
	my $response = shift;
	my $rv       = [];
	my $base     = URI->new($response->base);
	
	foreach my $header ($response->header('link'))
	{
		push @$rv, parse_value_string($header, $base);
	}
	
	return $rv;
}

sub parse_response_to_rdf
{
	my $response = shift;
	my $base     = URI->new($response->base);
	my $links    = parse_response($response);
	my $rv       = {};
	
	foreach my $link (@$links)
	{
		my $subject = $base;
		
		$subject = $link->{'anchor'}
			if defined $link->{'anchor'};
		
		my $object = $link->{'URI'};
		
		foreach my $r (@{ $link->{'rel'} })
		{
			push @{ $rv->{ $subject }->{ $r } },
				{
					'value'    => "$object",
					'type'     => 'uri',
				};
		}

		foreach my $r (@{ $link->{'rev'} })
		{
			push @{ $rv->{ $object }->{ $r } },
				{
					'value'    => "$subject",
					'type'     => 'uri',
				};
		}
		
		if (defined $link->{'title'})
		{
			push @{ $rv->{ $object }->{ 'http://purl.org/dc/terms/title' } },
				{
					'value'    => $link->{'title'},
					'type'     => 'literal',
				};
		}
	}
	
	return $rv;
}

1;
