#!/usr/bin/env python # EasyPub: easy publication of RDF vocabulary # Copyright (C) 2009 Pierre-Antoine Champin # # EasyPub is free software: you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published # by the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. # # KTBS is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU Lesser General Public License for more details. # # You should have received a copy of the GNU Lesser General Public License # along with KTBS. If not, see . """ This is a drop-in CGI/WSGI script for publishing RDF vocabulary. Quick start =========== Assuming you want to publish the vocabulary http://example.com/mydir/myvoc, the reciepe with the most chances to work is the following: 1. Make `myvoc` a directory at a place where your HTTP server will serve it at the desired URI. 2. Copy the script in this directory as 'index.cgi' (or 'index.wsgi' if your server as WSGI support). 3. In the same directory, put two files named 'index.html' and 'index.rdf' At this point, it may work (if you are lucky), or may have to tell your HTTP server that the directory index (i.e. the file to serve for the bare directory) is index.wsgi. In apache, this is done by creating (if not present) a `.htaccess` file in the `myvoc` diractory, and adding the following line:: DirectoryIndex index.cgi (or `index.wsgi`, accordingly) Fortunately, this option is allowed to end-users by most webmasters. More generaly ============= The script will redirect, according to the Accept HTTP header, to a file with the same name but a different extension. The file may have no extension at all, so the following layout would work as well:: mydir/myvoc (the script) mydir/myvoc.html mydir/myvoc.rdf However, the tricky part is to convince the HTTP server to consider `myvoc` (an extension-less file) as a CGI script (a thing in which I didn't succeed for the moment...). The interesting feature of such a config is that it would support "slash-based" vocabulary. For example, http://example.com/mydir/myvoc/MyTerm would still redirect to the html or rdf file. This would not work with the reciep. This would not work with the `index.cgi` recipe. The script is can be configured to serve different files or support other mime types by altering the `MAPPING` constant below. """ # the list below maps mime-types to redirection URL; %s is to be replaced by # the script name (without its extension); note that the order may be # significant (when matching */*) MAPPING = [ ("text/html", "%s.html"), ("application/rdf+xml", "%s.rdf"), ## uncomment the following if applicable #("application/turtle", "%s.ttl"), #("text/n3", "%s.n3"), ] HTML_REDIRECT = """ Non-Information Resource

Non-Information Resource

You should be redirected to %s. """ HTML_NOT_ACCEPTABLE = """ No acceptable representation

No acceptable representation

This server has no representation of the required resource that is acceptable by your web agent. Available representations are:

%s """ HTML_REPRESENTATION = \ """

%(location)s (%(mimetype)s)

\n""" def application(env, start_response): """ Find the most appropriate redirection, and issues an HTTP response accordingly. """ redirection = find_redirection(env.get("HTTP_ACCEPT")) if redirection is None: # TODO should check that HTML is acceptable... representations = "" for mimetype, location in MAPPING: representations += HTML_REPRESENTATION % locals() msg = HTML_NOT_ACCEPTABLE % representations start_response("406 No Acceptable Representation", [ ("content-type", "text/html;encoding=ascii"), ("content-size", str(len(msg))), ]) yield msg else: script_name = env["SCRIPT_NAME"] if "." in script_name: script_name, _ = script_name.rsplit(".",1) if "%s" in redirection: redirection %= script_name msg = HTML_REDIRECT % (redirection, redirection) start_response("303 Non-Information Resource", [ ("content-type", "text/html;encoding=ascii"), ("content-size", str(len(msg))), ("location", redirection), ]) yield msg def find_redirection(http_accept): """ Compare the HTTP Accept header field with the available mapping. """ if http_accept is None: http_accept = "*/*" accept_list = sort_accept(http_accept) for accepted in accept_list: # FIXME: we currently ignore mimetype parameters for matching: accepted = accepted[0] for mimetype, redirection in MAPPING: if mime_match(accepted, mimetype): return redirection return None # failed def sort_accept(accept_str): """ Transform a string complying with the HTTP Accept syntax into a sorted list of mimetype (possibly with parameters). """ # build list of accepted mimetype lst1 = split_n_strip(accept_str, ",") # split params in each accepted mimetype lst2 = [ split_n_strip(item, ";") for item in lst1 ] # split each param as [name, value] lst3 = [ [item[0]] + [ split_n_strip(param, "=") for param in item[1:] ] for item in lst2 ] # insert priority marker in each accepted mimetype lst_prio = [ compile_priority(item) for item in lst3 ] lst_prio.sort(reverse=True) # strip priority tuples before returning ret = [ item[1:] for item in lst_prio ] return ret def split_n_strip(astr, sep): """ Split astr according to sep, and strip each element. """ return [ item.strip() for item in astr.split(sep) ] def compile_priority(lst): """ Takes a list of the form [ mimetype, parameters* ] and return a copy with a tuple (q, p) inserted at the start where q is the value of the 'q' parameter, and 'p' represents the "specificity" of the mime type. The goal is that those tuples allow to sort different lst with the standard sort function (decreasing ordet for decreasing priority). Note also that this function removes from the return value the 'q' parameter if present. """ ret = list(lst) last = ret[-1] if len(ret) > 1 and last[0] == "q": del ret[-1] q = float(last[1]) else: q = 1.0 mime = ret[0] if mime == "*/*": p = 1 elif mime.endswith("/*"): p = 2 elif len(ret) == 1: p = 3 else: p = 4 ret.insert(0, (q, p)) return ret def mime_match(m1, m2): """ Return True if mimetype m1 matches mimetype m2, where m1 can contain wildcards. """ if m1 == m2: return True elif m1 == "*/*": return True elif m1[-2:] == "/*": return m2.startswith(m1[:-1]) return False if __name__ == "__main__": from wsgiref.handlers import CGIHandler h = CGIHandler() h.run(application)