View source with raw comments or as raw
    1/*  $Id$
    2
    3    Part of SWI-Prolog
    4
    5    Author:        Jan Wielemaker
    6    E-mail:        wielemak@science.uva.nl
    7    WWW:           http://www.swi-prolog.org
    8    Copyright (C): 1985-2007, University of Amsterdam
    9
   10    This program is free software; you can redistribute it and/or
   11    modify it under the terms of the GNU General Public License
   12    as published by the Free Software Foundation; either version 2
   13    of the License, or (at your option) any later version.
   14
   15    This program is distributed in the hope that it will be useful,
   16    but WITHOUT ANY WARRANTY; without even the implied warranty of
   17    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   18    GNU General Public License for more details.
   19
   20    You should have received a copy of the GNU General Public
   21    License along with this library; if not, write to the Free Software
   22    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
   23
   24    As a special exception, if you link this library with other files,
   25    compiled with a Free Software compiler, to produce an executable, this
   26    library does not by itself cause the resulting executable to be covered
   27    by the GNU General Public License. This exception does not however
   28    invalidate any other reasons why the executable file might be covered by
   29    the GNU General Public License.
   30*/
   31
   32:- module(rdf_history,
   33	  [ rdfh_transaction/1,		% :Goal
   34	    rdfh_assert/3,		% +S,+P,+O
   35	    rdfh_retractall/3,		% +S,+P,+O
   36	    rdfh_update/3,		% +S[->NS],+P[->NP],+O[->[NO]
   37	    rdfh_db_transaction/3,	% ?DB, +Condition, ?Transaction
   38	    rdfh_triple_transaction/2,	% +Triple, -Transaction
   39	    rdfh_transaction_member/2	% ?Action, +Transaction
   40	  ]).   41:- use_module(library('http/http_session')).   42:- use_module(library(lists)).   43:- use_module(library(record)).   44:- use_module(library(error)).   45:- use_module(library(debug)).   46:- use_module(library('semweb/rdf_persistency')).   47:- use_module(library('semweb/rdf_db')).

RDF Persistent store change history

This module deals with accessing the journal files of the RDF persistency layer to get insight in the provenance and history of the RDF database. It is designed for Wiki-like collaborative editing of an RDF graph. We make the following assumptions:

Users are identified using a URI, typically an OpenID (http://openid.net/) Triples created by a user are added to a named graph identified by the URI of the user. Changes are grouped using rdf_transaction(Goal, log(Message, User)) The number that is associated with the named graph of a triple (normally expressing the line number in the source) is used to store the time-stamp. Although this information is redundant (the time stamp is the same as for the transaction), it allows for binary search through the history file for the enclosing transaction.
author
- Jan Wielemaker */
To be done
- Cleanup thoughts on delete and update.
   72		 /*******************************
   73		 *	   DECLARATIONS		*
   74		 *******************************/
   75
   76:- module_transparent
   77	rdfh_transaction/1.   78
   79:- rdf_meta
   80	rdfh_assert(r,r,o),
   81	rdfh_retractall(r,r,o),
   82	rdfh_update(t,t,t).   83
   84:- multifile
   85	rdfh_hook/1.   86
   87:- record
   88	rdf_transaction(id:integer,
   89			nesting:integer,
   90			time:number,
   91			message,
   92			actions:list,
   93			other_graphs:list).   94
   95
   96		 /*******************************
   97		 *	   MODIFICATIONS	*
   98		 *******************************/
 rdfh_transaction(:Goal) is semidet
Run Goal using rdf_transaction/2, using information from the HTTP layer to provide OpenID and session-id.
  105rdfh_transaction(Goal) :-
  106	rdfh_user(User),
  107	transaction_context(Context),
  108	rdf_transaction(Goal, log(rdfh([user(User)|Context]), User)).
 rdfh_assert(+S, +P, +O) is det
Assert a triple, adding current user and time to the triple context.
  116rdfh_assert(S,P,O) :-
  117	(   rdf_active_transaction(log(rdfh(_), User))
  118	->  rdfh_time(Time),
  119	    rdf_assert(S,P,O,User:Time)
  120	;   throw(error(permission_error(assert, triple, rdf(S,P,O)),
  121			context(_, 'No rdfh_transaction/1')))
  122	).
 rdfh_retractall(+S, +P, +O) is det
Retract triples that match {S,P,O}. Note that all matching triples are added to the journal, so we can undo the action as well as report on retracted triples, even if multiple are retracted at the same time.

One of the problems we are faced with is that a retract action goes into the journal of the user whose triple is retracted, which may or may not be the one who performed the action.

  136rdfh_retractall(S,P,O) :-
  137	(   rdf_active_transaction(log(rdfh(_), _User))
  138	->  rdf_retractall(S,P,O)
  139	;   throw(error(permission_error(retract, triple, rdf(S,P,O)),
  140			context(_, 'No rdfh_transaction/1')))
  141	).
 rdfh_update(+S, +P, +O) is det
More tricky stuff, replacing a triple by another. Typically this will be changing the predicate or object. Provenance info should move the new triple to the user making the change, surely if the object is changed. If the predicate is changed to a related predicate, this actually becomes less obvious.

Current simple-minded approach is to turn an update into a retract and assert. The S,P,O specifications are either a ground value or of the form Old -> New. Here is an example:

rdfh_update(Work, Style, wn:oldstyle -> wn:newstyle)
  160rdfh_update(S,P,O) :-
  161	(   rdf_active_transaction(log(rdfh(_), User))
  162	->  update(S,P,O, rdf(RS, RP, RO), rdf(AS, AP, AO)),
  163	    must_be(ground, RS),
  164	    must_be(ground, RP),
  165	    must_be(ground, RO),
  166	    rdfh_time(Time),
  167	    rdf_retractall(RS, RP, RO),
  168	    rdf_assert(AS, AP, AO, User:Time)
  169	;   throw(error(permission_error(retract, triple, rdf(S,P,O)),
  170			context(_, 'No rdfh_transaction/1')))
  171	).
  172
  173update(Ss, Ps, Os, rdf(S0, P0, O0), rdf(S,P,O)) :-
  174	update(Ss, S0, S),
  175	update(Ps, P0, P),
  176	update(Os, O0, O).
  177
  178update(From->To, From, To) :- !.
  179update(Value, Value, Value).
 transaction_context(-Term) is det
Context to pass with an RDF transaction. Note that we pass the user. We don't need this for simple additions, but we do need it to track deletions.
  188transaction_context(Context) :-
  189	(   rdfh_session(Session)
  190	->  Context = [session(Session)]
  191	;   Context = []
  192	).
 rdfh_session(-Session) is semidet
Session is a (ground) identifier for the current session.
  198rdfh_session(Session) :-
  199	rdfh_hook(session(Session)), !.
  200rdfh_session(Session) :-
  201	catch(http_session_id(Session), _, fail).
 rdfh_user(-URI) is det
Get user-id of current session.
To be done
- Make hookable, so we can use the SeRQL user/openid hooks
  210rdfh_user(User) :-
  211	rdfh_hook(user(User)), !.
  212rdfh_user(OpenId) :-
  213	http_session_data(openid(OpenId)).
 rdfh_time(-Time:integer) is det
Get time stamp as integer. Second resolution is enough, and avoids rounding problems associated with floats.
  220rdfh_time(Seconds) :-
  221	get_time(Now),
  222	Seconds is round(Now).
  223
  224
  225		 /*******************************
  226		 *	 EXAMINE HISTORY	*
  227		 *******************************/
 rdfh_triple_transaction(+Triple:rdf(S,P,O), -Transaction) is nondet
True if the (partial) Triple is modified in Transaction.
  233rdfh_triple_transaction(rdf(S,P,O), Transaction) :-
  234	rdf(S,P,O,DB:Time),
  235	After is Time - 1,
  236	rdfh_db_transaction(DB, after(After), Transaction),
  237	rdfh_transaction_member(assert(S,P,O,Time), Transaction).
 rdfh_db_transaction(?DB, +Condition, ?Transaction) is nondet
True if Transaction satisfying Condition was executed on DB. Condition is one of:
true
Always true, returns all transactions.
id(Id)
Specifies the identifier of the transaction. Only makes sense if DB is specified as transaction identifiers are local to each DB.
after(Time)
True if transaction is executed at or after Time.
To be done
- More conditions (e.g. before(Time)).
  255rdfh_db_transaction(DB, true, Transaction) :- !,
  256	rdf_journal_file(DB, Journal),
  257	journal_transaction(Journal, Transaction).
  258rdfh_db_transaction(DB, id(Id), Transaction) :- !,
  259	must_be(atom, DB),
  260	rdf_journal_file(DB, Journal),
  261	open_journal(Journal, Fd),
  262	call_cleanup((seek_journal(Fd, id(Id)),
  263		      read_transaction(Fd, Transaction)),
  264		     close(Fd)).
  265rdfh_db_transaction(DB, Condition, Transaction) :- !,
  266	valid_condition(Condition),
  267	rdf_journal_file(DB, Journal),
  268	open_journal(Journal, Fd),
  269	seek_journal(Fd, Condition),
  270	stream_transaction(Fd, Transaction).
  271
  272valid_condition(Var) :-
  273	var(Var), !,
  274	instantiation_error(Var).
  275valid_condition(after(Time)) :- !,
  276	must_be(number, Time).
  277valid_condition(Cond) :-
  278	type_error(condition, Cond).
 open_journal(+File, -Stream) is det
Open a journal file. Journal files are always UTF-8 encoded.
  284open_journal(JournalFile, Fd) :-
  285	open(JournalFile, read, Fd, [encoding(utf8)]).
 journal_transaction(+JournalFile, ?Transaction) is nondet
True if Transaction is a transaction in JournalFile,
  291journal_transaction(JournalFile, Transaction) :-
  292	open_journal(JournalFile, Fd),
  293	stream_transaction(Fd, Transaction).
  294
  295stream_transaction(JFD, Transaction) :-
  296	call_cleanup(read_transaction(JFD, Transaction), close(JFD)).
  297
  298read_transaction(In, Transaction) :-
  299	repeat,
  300	   read(In, T0),
  301	(   T0 == end_of_file
  302	->  !, fail
  303	;   transaction(T0, In, T),	% transaction/3 is not steadfast
  304	    T = Transaction
  305	).
  306
  307transaction(begin(Id, Nest, Time, Msg), In,
  308	    rdf_transaction(Id, Nest, Time, Msg, Actions, Others)) :- !,
  309	read(In, T2),
  310	read_transaction_actions(T2, Id, In, Actions, Others).
  311transaction(start(_), _, _) :- !, fail.	% Open journal
  312transaction(end(_), _, _) :- !, fail.   % Close journal
  313transaction(Action, _, Action).		% Action outside transaction?
  314
  315read_transaction_actions(end(Id, _, Others), Id, _, [], Others) :- !.
  316read_transaction_actions(end_of_file, _, _, [], []) :- !. % TBD: Incomplete transaction (error)
  317read_transaction_actions(Action, Id, In, Actions, Others) :-
  318	ignore_in_transaction(Action), !,
  319	read(In, T2),
  320	read_transaction_actions(T2, Id, In, Actions, Others).
  321read_transaction_actions(Action, Id, In, [Action|Actions], Others) :-
  322	read(In, T2),
  323	read_transaction_actions(T2, Id, In, Actions, Others).
  324
  325ignore_in_transaction(start(_)).
  326ignore_in_transaction(end(_)).
  327ignore_in_transaction(begin(_,_,_,_)).
  328ignore_in_transaction(end(_,_,_)).
 seek_journal(+Fd:stream, +Spec) is semidet
See an open journal descriptor to the start of a transaction specified by Spec. Spec is one of:
after(Time)
First transaction at or after Time. Fails if there are no transactions after time.
id(Id)
Start of transaction labeled with given Id. Fails if there is no transaction labeled Id.

The implementation relies on the incrementing identifier numbers and time-stamps.

  346seek_journal(Fd, Spec) :-
  347	stream_property(Fd, file_name(File)),
  348	size_file(File, Size),
  349	Here is Size//2,
  350	Last = last(-),
  351	(   is_after_spec(Spec)
  352	->  (   bsearch_journal(Fd, 0, Here, Size, Spec, Last)
  353	    ->	true
  354	    ;	arg(1, Last, StartOfTerm),
  355		StartOfTerm \== (-),
  356		seek(Fd, StartOfTerm, bof, _)
  357	    )
  358	;   bsearch_journal(Fd, 0, Here, Size, Spec, Last)
  359	).
  360
  361is_after_spec(after(_Time)).
 bsearch_journal(+Fd, +Start, +Here, +End, +Spec, !Last) is semidet
Perform a binary search in the journal opened as Fd.
  367bsearch_journal(Fd, Start, Here, End, Spec, Last) :-
  368	start_of_transaction(Fd, Here, StartOfTerm, Begin), !,
  369	compare_transaction(Spec, Begin, Diff),
  370	(   Diff == (=)
  371	->  seek(Fd, StartOfTerm, bof, _)
  372	;   Diff == (<)
  373	->  NewHere is Start+(Here-Start)//2,
  374	    NewHere < Here,
  375	    nb_setarg(1, Last, StartOfTerm),
  376	    bsearch_journal(Fd, Start, NewHere, Here, Spec, Last)
  377	;   NewHere is StartOfTerm+(End-StartOfTerm)//2,
  378	    NewHere > StartOfTerm,
  379	    bsearch_journal(Fd, StartOfTerm, NewHere, End, Spec, Last)
  380	).
  381bsearch_journal(Fd, Start, Here, _End, Spec, Last) :-
  382	NewHere is Start+(Here-Start)//2,
  383	NewHere	< Here,
  384	bsearch_journal(Fd, Start, NewHere, Here, Spec, Last).
  385
  386compare_transaction(id(Id), begin(Id2,_,_,_), Diff) :- !,
  387	compare(Diff, Id, Id2).
  388compare_transaction(after(Time), begin(_,_,T,_), Diff) :- !,
  389	compare(Diff, Time, T).
 start_of_transaction(+Fd, +From, -Start, -Term) is semidet
Term is the start term of the first transaction after byte position From. Fails if no transaction can be found after From.
  396start_of_transaction(Fd, From, Start, Term) :-
  397	seek(Fd, From, bof, _),
  398	skip(Fd, 10),
  399	repeat,
  400	    seek(Fd, 0, current, Start),
  401	    read(Fd, Term),
  402	    (	transaction_start(Term)
  403	    ->	!
  404	    ;	Term == end_of_file
  405	    ->	!, fail
  406	    ;	fail
  407	    ).
  408
  409transaction_start(begin(_Id,_Nest,_Time,_Message)).
 rdfh_transaction_member(Action, Transaction) is nondet
True if Action is an action in Transaction.
  415rdfh_transaction_member(Action, Transaction) :-
  416	rdf_transaction_actions(Transaction, Actions),
  417	member(Action, Actions)