View source with formatted comments or as raw
    1/*  $Id$
    2
    3    Part of SWI-Prolog
    4
    5    Author:        Jan Wielemaker
    6    E-mail:        wielemak@science.uva.nl
    7    WWW:           http://www.swi-prolog.org
    8    Copyright (C): 1985-2007, University of Amsterdam
    9
   10    This program is free software; you can redistribute it and/or
   11    modify it under the terms of the GNU General Public License
   12    as published by the Free Software Foundation; either version 2
   13    of the License, or (at your option) any later version.
   14
   15    This program is distributed in the hope that it will be useful,
   16    but WITHOUT ANY WARRANTY; without even the implied warranty of
   17    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   18    GNU General Public License for more details.
   19
   20    You should have received a copy of the GNU General Public
   21    License along with this library; if not, write to the Free Software
   22    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
   23
   24    As a special exception, if you link this library with other files,
   25    compiled with a Free Software compiler, to produce an executable, this
   26    library does not by itself cause the resulting executable to be covered
   27    by the GNU General Public License. This exception does not however
   28    invalidate any other reasons why the executable file might be covered by
   29    the GNU General Public License.
   30*/
   31
   32:- module(rdf_history,
   33	  [ rdfh_transaction/1,		% :Goal
   34	    rdfh_assert/3,		% +S,+P,+O
   35	    rdfh_retractall/3,		% +S,+P,+O
   36	    rdfh_update/3,		% +S[->NS],+P[->NP],+O[->[NO]
   37	    rdfh_db_transaction/3,	% ?DB, +Condition, ?Transaction
   38	    rdfh_triple_transaction/2,	% +Triple, -Transaction
   39	    rdfh_transaction_member/2	% ?Action, +Transaction
   40	  ]).   41:- use_module(library('http/http_session')).   42:- use_module(library(lists)).   43:- use_module(library(record)).   44:- use_module(library(error)).   45:- use_module(library(debug)).   46:- use_module(library('semweb/rdf_persistency')).   47:- use_module(library('semweb/rdf_db')).   48
   49
   50/** <module> RDF Persistent store change history
   51
   52This  module  deals  with  accessing  the   journal  files  of  the  RDF
   53persistency layer to get insight in the   provenance  and history of the
   54RDF database. It is designed for   Wiki-like collaborative editing of an
   55RDF graph. We make the following assumptions:
   56
   57 * Users are identified using a URI, typically an OpenID (http://openid.net/)
   58 * Triples created by a user are added to a named graph identified by the
   59   URI of the user.
   60 * Changes are grouped using rdf_transaction(Goal, log(Message, User))
   61 * The number that is associated with the named graph of a triple (normally
   62   expressing the line number in the source) is used to store the time-stamp.
   63   Although this information is redundant (the time stamp is the same as
   64   for the transaction), it allows for binary search through the history
   65   file for the enclosing transaction.
   66
   67@tbd	Cleanup thoughts on delete and update.
   68
   69@author	Jan Wielemaker
   70*/
   71
   72		 /*******************************
   73		 *	   DECLARATIONS		*
   74		 *******************************/
   75
   76:- module_transparent
   77	rdfh_transaction/1.   78
   79:- rdf_meta
   80	rdfh_assert(r,r,o),
   81	rdfh_retractall(r,r,o),
   82	rdfh_update(t,t,t).   83
   84:- multifile
   85	rdfh_hook/1.   86
   87:- record
   88	rdf_transaction(id:integer,
   89			nesting:integer,
   90			time:number,
   91			message,
   92			actions:list,
   93			other_graphs:list).   94
   95
   96		 /*******************************
   97		 *	   MODIFICATIONS	*
   98		 *******************************/
   99
  100%%	rdfh_transaction(:Goal) is semidet.
  101%
  102%	Run Goal using rdf_transaction/2, using information from the HTTP
  103%	layer to provide OpenID and session-id.
  104
  105rdfh_transaction(Goal) :-
  106	rdfh_user(User),
  107	transaction_context(Context),
  108	rdf_transaction(Goal, log(rdfh([user(User)|Context]), User)).
  109
  110
  111%%	rdfh_assert(+S, +P, +O) is det.
  112%
  113%	Assert a triple, adding current  user   and  time  to the triple
  114%	context.
  115
  116rdfh_assert(S,P,O) :-
  117	(   rdf_active_transaction(log(rdfh(_), User))
  118	->  rdfh_time(Time),
  119	    rdf_assert(S,P,O,User:Time)
  120	;   throw(error(permission_error(assert, triple, rdf(S,P,O)),
  121			context(_, 'No rdfh_transaction/1')))
  122	).
  123
  124
  125%%	rdfh_retractall(+S, +P, +O) is det.
  126%
  127%	Retract triples that  match  {S,P,O}.   Note  that  all matching
  128%	triples are added to the journal, so   we can undo the action as
  129%	well as report on  retracted  triples,   even  if  multiple  are
  130%	retracted at the same time.
  131%	
  132%	One of the problems we are faced   with is that a retract action
  133%	goes into the journal of  the   user  whose triple is retracted,
  134%	which may or may not be the one who performed the action.
  135
  136rdfh_retractall(S,P,O) :-
  137	(   rdf_active_transaction(log(rdfh(_), _User))
  138	->  rdf_retractall(S,P,O)
  139	;   throw(error(permission_error(retract, triple, rdf(S,P,O)),
  140			context(_, 'No rdfh_transaction/1')))
  141	).
  142
  143
  144%%	rdfh_update(+S, +P, +O) is det.
  145%
  146%	More tricky stuff, replacing a triple by another. Typically this
  147%	will be changing the predicate or object. Provenance info should
  148%	move the new triple to the user making the change, surely if the
  149%	object is changed. If the  predicate   is  changed  to a related
  150%	predicate, this actually becomes less obvious.
  151%	
  152%	Current simple-minded approach is  to  turn   an  update  into a
  153%	retract and assert. The S,P,O specifications are either a ground
  154%	value or of the form _Old_ =|->|= _New_. Here is an example:
  155%	
  156%	==
  157%	rdfh_update(Work, Style, wn:oldstyle -> wn:newstyle)
  158%	==
  159
  160rdfh_update(S,P,O) :-
  161	(   rdf_active_transaction(log(rdfh(_), User))
  162	->  update(S,P,O, rdf(RS, RP, RO), rdf(AS, AP, AO)),
  163	    must_be(ground, RS),
  164	    must_be(ground, RP),
  165	    must_be(ground, RO),
  166	    rdfh_time(Time),
  167	    rdf_retractall(RS, RP, RO),
  168	    rdf_assert(AS, AP, AO, User:Time)
  169	;   throw(error(permission_error(retract, triple, rdf(S,P,O)),
  170			context(_, 'No rdfh_transaction/1')))
  171	).
  172
  173update(Ss, Ps, Os, rdf(S0, P0, O0), rdf(S,P,O)) :-
  174	update(Ss, S0, S),
  175	update(Ps, P0, P),
  176	update(Os, O0, O).
  177
  178update(From->To, From, To) :- !.
  179update(Value, Value, Value).
  180
  181
  182%%	transaction_context(-Term) is det.
  183%
  184%	Context to pass with an RDF transaction.   Note that we pass the
  185%	user. We don't need this for simple additions, but we do need it
  186%	to track deletions.
  187
  188transaction_context(Context) :-
  189	(   rdfh_session(Session)
  190	->  Context = [session(Session)]
  191	;   Context = []
  192	).
  193
  194%%	rdfh_session(-Session) is semidet.
  195%
  196%	Session is a (ground) identifier for the current session.
  197
  198rdfh_session(Session) :-
  199	rdfh_hook(session(Session)), !.
  200rdfh_session(Session) :-
  201	catch(http_session_id(Session), _, fail).
  202
  203
  204%%	rdfh_user(-URI) is det.
  205%
  206%	Get user-id of current session.
  207%	
  208%	@tbd	Make hookable, so we can use the SeRQL user/openid hooks
  209
  210rdfh_user(User) :-
  211	rdfh_hook(user(User)), !.
  212rdfh_user(OpenId) :-
  213	http_session_data(openid(OpenId)).
  214
  215%%	rdfh_time(-Time:integer) is det.
  216%
  217%	Get time stamp as integer.  Second resolution is enough, and
  218%	avoids rounding problems associated with floats.
  219
  220rdfh_time(Seconds) :-
  221	get_time(Now),
  222	Seconds is round(Now).
  223
  224
  225		 /*******************************
  226		 *	 EXAMINE HISTORY	*
  227		 *******************************/
  228
  229%%	rdfh_triple_transaction(+Triple:rdf(S,P,O), -Transaction) is nondet.
  230%
  231%	True if the (partial) Triple is modified in Transaction.
  232
  233rdfh_triple_transaction(rdf(S,P,O), Transaction) :-
  234	rdf(S,P,O,DB:Time),
  235	After is Time - 1,
  236	rdfh_db_transaction(DB, after(After), Transaction),
  237	rdfh_transaction_member(assert(S,P,O,Time), Transaction).
  238
  239%%	rdfh_db_transaction(?DB, +Condition, ?Transaction) is nondet.
  240%
  241%	True if Transaction satisfying  Condition   was  executed on DB.
  242%	Condition is one of:
  243%	
  244%	  * true
  245%	  Always true, returns all transactions.
  246%	  * id(Id)
  247%	  Specifies the identifier of the transaction.  Only makes sense
  248%	  if DB is specified as transaction identifiers are local to each
  249%	  DB.
  250%	  * after(Time)
  251%	  True if transaction is executed at or after Time.
  252%	  
  253%	  @tbd	More conditions (e.g. before(Time)).
  254
  255rdfh_db_transaction(DB, true, Transaction) :- !,
  256	rdf_journal_file(DB, Journal),
  257	journal_transaction(Journal, Transaction).
  258rdfh_db_transaction(DB, id(Id), Transaction) :- !,
  259	must_be(atom, DB),
  260	rdf_journal_file(DB, Journal),
  261	open_journal(Journal, Fd),
  262	call_cleanup((seek_journal(Fd, id(Id)),
  263		      read_transaction(Fd, Transaction)),
  264		     close(Fd)).
  265rdfh_db_transaction(DB, Condition, Transaction) :- !,
  266	valid_condition(Condition),
  267	rdf_journal_file(DB, Journal),
  268	open_journal(Journal, Fd),
  269	seek_journal(Fd, Condition),
  270	stream_transaction(Fd, Transaction).
  271
  272valid_condition(Var) :-
  273	var(Var), !,
  274	instantiation_error(Var).
  275valid_condition(after(Time)) :- !,
  276	must_be(number, Time).
  277valid_condition(Cond) :-
  278	type_error(condition, Cond).
  279
  280%%	open_journal(+File, -Stream) is det.
  281%
  282%	Open a journal file.  Journal files are always UTF-8 encoded.
  283
  284open_journal(JournalFile, Fd) :-
  285	open(JournalFile, read, Fd, [encoding(utf8)]).
  286
  287%%	journal_transaction(+JournalFile, ?Transaction) is nondet.
  288%
  289%	True if Transaction is a transaction in JournalFile,
  290
  291journal_transaction(JournalFile, Transaction) :-
  292	open_journal(JournalFile, Fd),
  293	stream_transaction(Fd, Transaction).
  294
  295stream_transaction(JFD, Transaction) :-
  296	call_cleanup(read_transaction(JFD, Transaction), close(JFD)).
  297
  298read_transaction(In, Transaction) :-
  299	repeat,
  300	   read(In, T0),
  301	(   T0 == end_of_file
  302	->  !, fail
  303	;   transaction(T0, In, T),	% transaction/3 is not steadfast
  304	    T = Transaction
  305	).
  306
  307transaction(begin(Id, Nest, Time, Msg), In,
  308	    rdf_transaction(Id, Nest, Time, Msg, Actions, Others)) :- !,
  309	read(In, T2),
  310	read_transaction_actions(T2, Id, In, Actions, Others).
  311transaction(start(_), _, _) :- !, fail.	% Open journal
  312transaction(end(_), _, _) :- !, fail.   % Close journal
  313transaction(Action, _, Action).		% Action outside transaction?
  314
  315read_transaction_actions(end(Id, _, Others), Id, _, [], Others) :- !.
  316read_transaction_actions(end_of_file, _, _, [], []) :- !. % TBD: Incomplete transaction (error)
  317read_transaction_actions(Action, Id, In, Actions, Others) :-
  318	ignore_in_transaction(Action), !,
  319	read(In, T2),
  320	read_transaction_actions(T2, Id, In, Actions, Others).
  321read_transaction_actions(Action, Id, In, [Action|Actions], Others) :-
  322	read(In, T2),
  323	read_transaction_actions(T2, Id, In, Actions, Others).
  324
  325ignore_in_transaction(start(_)).
  326ignore_in_transaction(end(_)).
  327ignore_in_transaction(begin(_,_,_,_)).
  328ignore_in_transaction(end(_,_,_)).
  329
  330
  331%%	seek_journal(+Fd:stream, +Spec) is semidet.
  332%
  333%	See an open journal descriptor to the start of a transaction
  334%	specified by Spec.  Spec is one of:
  335%	
  336%	  * after(Time)
  337%	  First transaction at or after Time.  Fails if there are no
  338%	  transactions after time.
  339%	  * id(Id)
  340%	  Start of transaction labeled with given Id.  Fails if there
  341%	  is no transaction labeled Id.
  342%	
  343%	The implementation relies on the incrementing identifier numbers
  344%	and time-stamps.
  345
  346seek_journal(Fd, Spec) :-
  347	stream_property(Fd, file_name(File)),
  348	size_file(File, Size),
  349	Here is Size//2,
  350	Last = last(-),
  351	(   is_after_spec(Spec)
  352	->  (   bsearch_journal(Fd, 0, Here, Size, Spec, Last)
  353	    ->	true
  354	    ;	arg(1, Last, StartOfTerm),
  355		StartOfTerm \== (-),
  356		seek(Fd, StartOfTerm, bof, _)
  357	    )
  358	;   bsearch_journal(Fd, 0, Here, Size, Spec, Last)
  359	).
  360
  361is_after_spec(after(_Time)).
  362
  363%%	bsearch_journal(+Fd, +Start, +Here, +End, +Spec, !Last) is semidet.
  364%
  365%	Perform a binary search in the journal opened as Fd.
  366
  367bsearch_journal(Fd, Start, Here, End, Spec, Last) :-
  368	start_of_transaction(Fd, Here, StartOfTerm, Begin), !,
  369	compare_transaction(Spec, Begin, Diff),
  370	(   Diff == (=)
  371	->  seek(Fd, StartOfTerm, bof, _)
  372	;   Diff == (<)
  373	->  NewHere is Start+(Here-Start)//2,
  374	    NewHere < Here,
  375	    nb_setarg(1, Last, StartOfTerm),
  376	    bsearch_journal(Fd, Start, NewHere, Here, Spec, Last)
  377	;   NewHere is StartOfTerm+(End-StartOfTerm)//2,
  378	    NewHere > StartOfTerm,
  379	    bsearch_journal(Fd, StartOfTerm, NewHere, End, Spec, Last)
  380	).
  381bsearch_journal(Fd, Start, Here, _End, Spec, Last) :-
  382	NewHere is Start+(Here-Start)//2,
  383	NewHere	< Here,
  384	bsearch_journal(Fd, Start, NewHere, Here, Spec, Last).
  385
  386compare_transaction(id(Id), begin(Id2,_,_,_), Diff) :- !,
  387	compare(Diff, Id, Id2).
  388compare_transaction(after(Time), begin(_,_,T,_), Diff) :- !,
  389	compare(Diff, Time, T).
  390
  391%%	start_of_transaction(+Fd, +From, -Start, -Term) is semidet.
  392%
  393%	Term is the start  term  of   the  first  transaction after byte
  394%	position From. Fails if no transaction can be found after From.
  395
  396start_of_transaction(Fd, From, Start, Term) :-
  397	seek(Fd, From, bof, _),
  398	skip(Fd, 10),
  399	repeat,
  400	    seek(Fd, 0, current, Start),
  401	    read(Fd, Term),
  402	    (	transaction_start(Term)
  403	    ->	!
  404	    ;	Term == end_of_file
  405	    ->	!, fail
  406	    ;	fail
  407	    ).
  408
  409transaction_start(begin(_Id,_Nest,_Time,_Message)).
  410
  411%%	rdfh_transaction_member(Action, Transaction) is nondet.
  412%
  413%	True if Action is an action in Transaction.
  414
  415rdfh_transaction_member(Action, Transaction) :-
  416	rdf_transaction_actions(Transaction, Actions),
  417	member(Action, Actions)