View source with raw comments or as raw
    1/*  $Id$
    2
    3    Part of SWI-Prolog
    4
    5    Author:        Jan Wielemaker
    6    E-mail:        jan@swi.psy.uva.nl
    7    WWW:           http://www.swi-prolog.org
    8    Copyright (C): 1985-2004, University of Amsterdam
    9
   10    This program is free software; you can redistribute it and/or
   11    modify it under the terms of the GNU General Public License
   12    as published by the Free Software Foundation; either version 2
   13    of the License, or (at your option) any later version.
   14
   15    This program is distributed in the hope that it will be useful,
   16    but WITHOUT ANY WARRANTY; without even the implied warranty of
   17    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   18    GNU General Public License for more details.
   19
   20    You should have received a copy of the GNU Lesser General Public
   21    License along with this library; if not, write to the Free Software
   22    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
   23
   24    As a special exception, if you link this library with other files,
   25    compiled with a Free Software compiler, to produce an executable, this
   26    library does not by itself cause the resulting executable to be covered
   27    by the GNU General Public License. This exception does not however
   28    invalidate any other reasons why the executable file might be covered by
   29    the GNU General Public License.
   30*/
   31
   32:- module(serql,
   33	  [ serql_query/2,		% +Query, -Result
   34	    serql_query/3,		% +Query, -Result, +Options
   35	    serql_compile/3,		% +Query, -Compiled, +Options
   36	    serql_run/2			% +Compiled, -Reply
   37	  ]).   38:- use_module(library(semweb/rdf_db)).   39:- use_module(library(semweb/rdf_optimise)).   40:- use_module(library(lists)).   41:- use_module(library(option)).   42:- use_module(library(debug)).   43:- use_module(library(settings)).   44:- use_module(rdfql_util).   45:- include(entailment(load)).
   46
   47:- meta_predicate
   48	select_results(+,-,0).   49
   50/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
   51A Prolog path expression is a conjunction of rdf/3 statements. Parts may
   52be wrapped in opt/1 to indicate they are   optional  and nodes may be of
   53the form set(List) to indicate a conjunction of distinct values.
   54- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
 serql_query(+Query, -Reply, +Module)
Where Query is either a SeRQL query text or a parsed query. Reply is, similar to the ODBC interface a term of the form row(Col1, Col2, ...) for SELECT statements or a term rdf(S,P,O). The predicate is non-deterministic, returning the rows or RDF statements one-by-one.
   65serql_query(Query, Result) :-
   66	serql_query(Query, Result,
   67		    [ entailment(rdf)
   68		    ]).
   69
   70serql_query(Query, Result, Options) :-
   71	serql_compile(Query, Compiled, Options),
   72	serql_run(Compiled, Result).
 serql_compile(+Query, -Compiled, +Options)
Compile a SeRQL query, returning the result in Compiled. Options:
entailment(Entailment)
Entailment module to use.
type(-Type)
Return one of select(VarNames) or construct
   84serql_compile(Text, Compiled, Options) :-
   85	atom(Options), Options \== [], !,	% compatibility
   86	serql_compile(Text, Compiled, [entailment(Options)]).
   87serql_compile(Text, serql_query(Goal, ReplyTempl, Module), Options) :-
   88	serql_parse(Text, Query),
   89	compile(Query, Goal, ReplyTempl, Module, Options).
   90
   91compile(select(Row0, VarNames, Path, Where, Distinct, Limit, Offset),
   92	select(Final, Solutions),
   93	Row,
   94	Module,
   95	Options) :-
   96	option(entailment(Entailment), Options, rdfs),
   97	entailment_module(Entailment, Module),
   98	mk_solutions(Distinct, Limit, Offset, Solutions),
   99	set_type(select(VarNames), Options),
  100	where_constraints(Where, Annotations),
  101	serql_compile_path(Path, select, Goal),
  102	remove_annotations(Annotations, where),
  103	projection_functions(Row0, Row, Select),
  104	(   setting(cliopatria:optimise_query, Def),
  105	    option(optimise(Opt), Options, Def),
  106	    Opt == true
  107	->  rdf_optimise((Goal,Where,Select), Optimised)
  108	;   Optimised = (Goal,Where,Select)
  109	),
  110	serql_select_bind_null(Optimised, Final),
  111	debug(serql(compiled), '~@',
  112	            [ portray_clause((q(Row) :- Final))
  113		    ]).
  114compile(construct(RPath, Path, Where, Distinct, Limit, Offset),
  115	construct(Final, Solutions),
  116	RDF,
  117	Module,
  118	Options) :-
  119	option(entailment(Entailment), Options, rdfs),
  120	entailment_module(Entailment, Module),
  121	mk_solutions(Distinct, Limit, Offset, Solutions),
  122	set_type(construct, Options),
  123	where_constraints(Where, Annotations),
  124	serql_compile_path(Path, construct, Goal),
  125	remove_annotations(Annotations, where),
  126	statements(RPath, Statements),
  127	entailment_module(Entailment, Module),
  128	(   setting(cliopatria:optimise_query, Def),
  129	    option(optimise(Opt), Options, Def),
  130	    Opt == true
  131	->  rdf_optimise((Goal,Where), Optimised)
  132	;   Optimised = (Goal,Where)
  133	),
  134	Final = (Optimised, serql_member_statement(RDF, Statements)),
  135	debug(serql(compiled), '~@',
  136	            [ portray_clause((q(RDF) :- Final))
  137		    ]).
 mk_solutions(+Distinct, +Limit, +Offset, -Term)
Create a solutions-selecting term compatible to SPARQL.
  144mk_solutions(distinct, Limit, Offset,
  145	     distinct(solutions(unsorted, Limit, Offset))) :- !.
  146mk_solutions(_, Limit, Offset, solutions(unsorted, Limit, Offset)).
 set_type(+Type, +Options)
Fill option type(X)
  152set_type(Type, Options) :-
  153	memberchk(type(T), Options), !,
  154	(   T = Type
  155	->  true
  156	;   functor(T, Expected, _),
  157	    functor(Type, Found, _),
  158	    throw(error(type_error(query_type(Expected), Found), _))
  159	).
  160set_type(_, _).
 serql_run(+Term, -Result)
  164serql_run(serql_query(Parsed, Reply, Module), Reply) :-
  165	serql_run(Parsed, Reply, Module).
  166
  167serql_run(select(Goal, Solutions), Reply, Module) :-
  168	select_results(Solutions, Reply, Module:Goal).
  169serql_run(construct(Goal, Solutions), Reply, Module) :-
  170	select_results(Solutions, Reply, Module:Goal).
 select_results(+Spec, -Reply, :Goal)
Apply ordering and limits on result-set.
  176select_results(distinct(solutions(Order, Limit, Offset)), Reply, Goal) :- !,
  177	select_results(distinct, Offset, Limit, Order, Reply, Goal).
  178select_results(solutions(Order, Limit, Offset), Reply, Goal) :-
  179	select_results(all, Offset, Limit, Order, Reply, Goal).
  180
  181
  182		 /*******************************
  183		 *	     COMPILER		*
  184		 *******************************/
 serql_compile_path(+PathExpr, +Type, -PrologGoal)
Compile a Serql path expression into a plain Prolog goal. Type is one of 'select' or 'construct'.
  191serql_compile_path(rdf(S,P,O), Type, Conj) :-
  192	set(S, Set), !,
  193	make_set_subj_conj(Set, [], P, O, Type, Conj).
  194serql_compile_path(rdf(S,P,O), Type, Conj) :-
  195	set(O, Set), !,
  196	make_set_obj_conj(Set, [], S, P, Type, Conj).
  197serql_compile_path(rdf(S0, P, O), Type, Goal) :-
  198	reified(S0, S, GS), !,
  199	serql_compile_path(rdf(S, P, O), Type, G0),
  200	Goal = (G0, GS).
  201serql_compile_path(rdf(S, P, O0), Type, Goal) :-
  202	reified(O0, O, GS), !,
  203	serql_compile_path(rdf(S, P, O), Type, G0),
  204	Goal = (G0, GS).
  205serql_compile_path((A0,B0), Type, (A,B)) :- !,
  206	serql_compile_path(A0, Type, A),
  207	serql_compile_path(B0, Type, B).
  208serql_compile_path(optional(Id, A0), construct, (A *-> Id=true ; Id=false)) :- !,
  209	serql_compile_path(A0, construct, A).
  210serql_compile_path(optional(_, A0), select, (A *-> true ; true)) :- !,
  211	serql_compile_path(A0, select, A).
  212serql_compile_path(rdf(S,P,O0), _, Goal) :- !,
  213	resource_annotations(S, GS),
  214	resource_annotations(P, GP),
  215	object_annotations(O0, O, GO),
  216	clean_conj((GS, GP, rdf(S,P,O), GO), Goal).
  217serql_compile_path(G, _, G).
  218
  219reified(0, _, _) :-			% catch variables
  220	!, fail.
  221reified(rdf(S,P,O), StatementId,
  222	(   rdf(StatementId, Type, Statement),
  223	    rdf(StatementId, Subject, S),
  224	    rdf(StatementId, Predicate, P),
  225	    rdf(StatementId, Object, O)
  226	)) :-
  227	rdf_equal(Type, rdf:type),
  228	rdf_equal(Subject, rdf:subject),
  229	rdf_equal(Predicate, rdf:predicate),
  230	rdf_equal(Object, rdf:object),
  231	rdf_equal(Statement, rdf:'Statement').
  232
  233
  234
  235make_set_subj_conj([], _, _, _, _, true).	% should not happen
  236make_set_subj_conj([Last], [], P, O, Type, Goal) :- !,
  237	serql_compile_path(rdf(Last, P, O), Type, Goal).
  238make_set_subj_conj([Last], Diff, P, O, Type, (Goal, Diffs)) :- !,
  239	serql_compile_path(rdf(Last, P, O), Type, Goal),
  240	make_diff(Diff, Last, Diffs).
  241make_set_subj_conj([H|T], Diff, P, O, Type, (Goal, Diffs, More)) :- !,
  242	serql_compile_path(rdf(H, P, O), Type, Goal),
  243	make_diff(Diff, H, Diffs),
  244	make_set_subj_conj(T, [H|Diff], P, O, Type, More).
  245
  246
  247make_set_obj_conj([], _, _, _, _, true).	% should not happen
  248make_set_obj_conj([Last], [], S, P, Type, Goal) :- !,
  249	serql_compile_path(rdf(S, P, Last), Type, Goal).
  250make_set_obj_conj([Last], Diff, S, P, Type, (Goal, Diffs)) :- !,
  251	serql_compile_path(rdf(S, P, Last), Type, Goal),
  252	make_diff(Diff, Last, Diffs).
  253make_set_obj_conj([H|T], Diff, S, P, Type, (Goal, Diffs, More)) :- !,
  254	serql_compile_path(rdf(S, P, H), Type, Goal),
  255	make_diff(Diff, H, Diffs),
  256	make_set_obj_conj(T, [H|Diff], S, P, Type, More).
  257
  258
  259make_diff([], _, true).
  260make_diff([Last], To, (Last \== To)) :- !.
  261make_diff([H|T], To, (H \== To, More)) :-
  262	make_diff(T, To, More).
 statements(+Graph, -ListOfTriples)
Extract a plain list of triples from an CONSTRUCT path-expression. Optional parts of the tree are represented as % optional(Bool, ListOfTriples). Using CONSTRUCT * (i.e. when the executed path is the result path) the goal generated by the compiler will unify Bool with true or false. See also member_statement/2.
  274statements(Graph, Statements) :-
  275	phrase(statements(Graph), Statements).
  276
  277statements(rdf(S,P,O)) -->
  278	{ set(S, Set) }, !,
  279	subj_statements(Set, P, O).
  280statements(rdf(S,P0,O)) --> !,
  281	{   nonvar(P0),
  282	    map_builtin(P0, P)
  283	->  true
  284	;   P = P0
  285	},
  286	[ rdf(S,P,O) ].
  287statements((A,B)) --> !,
  288	statements(A),
  289	statements(B).
  290statements(optional(Id, A)) --> !,
  291	{ phrase(statements(A), OptionalStatements) },
  292	[ optional(Id, OptionalStatements) ].
  293statements(_) -->
  294	[].
  295
  296term_expansion(map_builtin(B0, P0), map_builtin(B, P)) :-
  297	rdf_global_id(B0, B),
  298	rdf_global_id(P0, P).
  299
  300map_builtin(serql:directSubClassOf,    rdfs:subClassOf).
  301map_builtin(serql:directSubPropertyOf, rdfs:subPropertyOf).
  302map_builtin(serql:directType,          rdf:type).
  303
  304
  305subj_statements([], _, _) -->
  306	[].
  307subj_statements([H|T], P, O) -->
  308	(   { set(O, Set) }
  309	->  obj_statements(Set, H, P)
  310	;   [ rdf(H, P, O) ]
  311	),
  312	subj_statements(T, P, O).
  313
  314obj_statements([], _, _) -->
  315	[].
  316obj_statements([H|T], S, P) -->
  317	[ rdf(S, P, H) ],
  318	obj_statements(T, S, P).
  319
  320
  321set(Node, Set) :-
  322	nonvar(Node),
  323	Node = set(Set).
  324
  325
  326		 /*******************************
  327		 *	 SELECT FUNCTIONS	*
  328		 *******************************/
  329
  330projection_functions(Row0, Row, Map) :-
  331	functor(Row0, Functor, Arity),
  332	functor(Row, Functor, Arity),
  333	projection_functions(0, Arity, Row0, Row, true, Map).
  334
  335projection_functions(Arity, Arity, _, _, Map, Map) :- !.
  336projection_functions(I0, Arity, Row0, Row, Map0, Map) :-
  337	I is I0 + 1,
  338	arg(I, Row0, A0),
  339	(   var(A0)
  340	->  arg(I, Row, A0),
  341	    projection_functions(I, Arity, Row0, Row, Map0, Map)
  342	;   arg(I, Row, A),
  343	    add_conj(Map0, serql_eval(A0, A), Map1),
  344	    projection_functions(I, Arity, Row0, Row, Map1, Map)
  345	).
  346
  347add_conj(true, X, X) :- !.
  348add_conj(C0, G, (C0,G)).
  349
  350
  351		 /*******************************
  352		 *	WHERE CONSTRAINTS	*
  353		 *******************************/
  354
  355/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
  356The idea of this step  is  to   turn  where  clauses into constraints on
  357variables.
  358
  359Supported annotations (in standard order of terms):
  360
  361	any
  362	literal
  363	resource
  364	eq(Value)
  365	like(Pattern)
  366- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
 where_constraints(+Goal, -Annotations)
Each annotation is either a plain annotation or a term or(ListOfAlternatives). The latter is used if different paths through the control-structure yields different annotations.
  374where_constraints(Goal, Annotations) :-
  375	bagof(Annot, where_constraint_list(Goal, Annot), AltAnnots),
  376	sort_lol(AltAnnots, AltAnnots1),
  377	join_alt_annots(AltAnnots1, Annotations).
 where_constraint_list(+Goal, -Annotations)
Interpret Goal, making annotations on the variables. Backtracking yields alternative annotations due to choicepoints in Goal.
  385where_constraint_list(Goal, Annotations) :-
  386	where_constraints(Goal, AttrVars, []),
  387	attrs_to_terms(AttrVars, Annotations).
  388
  389
  390where_constraints((A,B)) --> !,
  391	where_constraints(A),
  392	where_constraints(B).
  393where_constraints((A;B)) --> !,
  394	(   where_constraints(A)
  395	;   where_constraints(B)
  396	).
  397where_constraints(serql_compare(like, Var, Pattern)) --> !,
  398	constrain(Var, like(Pattern)).
  399where_constraints(serql_compare(=, Var, Value)) --> !,
  400	constrain(Var, eq(Value)).
  401where_constraints(rdf_is_literal(V)) --> !,
  402	constrain(V, literal).
  403where_constraints(rdf_is_resource(V)) --> !,
  404	constrain(V, resource).
  405where_constraints(rdf(S,P,_)) --> !,
  406	constrain(S, resource),
  407	constrain(P, resource).
  408where_constraints(_) -->
  409	[].
  410
  411constrain(Var, Cond) -->
  412	{ var(Var) }, !,
  413	(   { get_attr(Var, where, C0) }
  414	->  { put_attr(Var, where, (Cond, C0)) },
  415	    []
  416	;   { put_attr(Var, where, Cond)
  417	    },
  418	    [ Var ]
  419	).
  420constrain(label(X), Cond) --> !,
  421	constrain(X, (literal, Cond)).
  422constrain(lang(X), Cond) --> !,
  423	constrain(X, (literal, Cond)).
  424constrain(datatype(X), Cond) --> !,
  425	constrain(X, (literal, Cond)).
  426constrain(_, _) -->
  427	[].
 join_alt_annots(+ListOfAnnotLists, -AnnotatedVars)
ListOfAnnotLists is a list of alternative annotations due to choicepoints. Each annotation list represents annotations in the form Var = Annotation. AnnotatedVars is a list of variables with attributes representing their annotations.
  436join_alt_annots(LoL, Annotated) :-
  437	smallest_var(LoL, Var), !,
  438	var_annotations(Var, LoL, LoL1, Annotations0),
  439	sort(Annotations0, Annotations),	% remove duplicates
  440	(   empty_annotations(Annotations)
  441	->  join_alt_annots(LoL1, Annotated)
  442	;   put_annotations(Annotations, Var),
  443	    Annotated = [Var|T],
  444	    join_alt_annots(LoL1, T)
  445	).
  446join_alt_annots(LoL, []) :-
  447	assertion(maplist(=([]), LoL)).
 normalise_annotation(+A0, -A)
Create a normalised version of an annotation for easy processing. Currently only deals with annotations that are a conjunction.
  456normalise_annotation(A0, A) :-
  457	conj_to_list(A0, L0, []),
  458	sort(L0, L),
  459	list_do_conj(L, A).
  460
  461conj_to_list((A,B)) --> !,
  462	conj_to_list(A),
  463	conj_to_list(B).
  464conj_to_list(A) -->
  465	[A].
  466
  467list_do_conj([], any).
  468list_do_conj([H], H) :- !.
  469list_do_conj([H|T0], (H,T)) :-
  470	list_do_conj(T0, T).
 empty_annotations(+List)
True if there is no sensible conclusion we can draw using the annotations found. This is often the case if multiple paths in a disjunction do not deal with all variables. Note that this is not necessarily the end of the story. We could rewrite
A,(C1;C2) into (A,C1);(A,C2)

And apply optimisation on both branches.

  484empty_annotations([]) :- !.
  485empty_annotations(List) :-
  486	memberchk(any, List).
  487
  488put_annotations([], _).
  489put_annotations([One], Var) :- !,
  490	put_attr(Var, where, One).
  491put_annotations(More, Var) :-
  492	put_attr(Var, where, or(More)).
 smallest_var(+ListOfList, -Smallest)
Get the smallest (in standard order of terms) annotated variable.
  499smallest_var([[S0=_|_]|T], Smallest) :-
  500	smallest_var(T, S0, Smallest).
  501smallest_var([[]|T], Smallest) :-
  502	smallest_var(T, Smallest).
  503
  504smallest_var([], S, S).
  505smallest_var([[S1=_|_]|T], S0, S) :- !,
  506	smallest(S1, S0, S2),
  507	smallest_var(T, S2, S).
  508smallest_var([[]|T], S0, S) :-
  509	smallest_var(T, S0, S).
  510
  511smallest(A, B, S) :-
  512	(   A @< B
  513	->  S = A
  514	;   S = B
  515	).
 var_annotations(+Var, +LoL0, -LoL, -Annotations)
Get all Annotation for Var. Note that the annotation is either the head of the list or not in the list.
  522var_annotations(_, [], [], []) :- !.
  523var_annotations(Var, [[Var=A|TA0]|TL0], LoL, [A|TA]) :- !,
  524	(   TA0 == []
  525	->  LoL = TL
  526	;   LoL = [TA0|TL]
  527	),
  528	var_annotations(Var, TL0, TL, TA).
  529var_annotations(Var, [A0|TL0], [A0|TL], [any|A]) :-
  530	var_annotations(Var, TL0, TL, A).
  531
  532
  533where:attr_unify_hook(_,_) :- fail.
  534where:attr_portray_hook(Val, _Var) :-
  535	print(Val).
 attrs_to_terms(AttrsVars, List)
Convert X{where=A} into X=A terms. Without this we cannot use bagof/3 and maintain the variables. Not sure this is a bug in bagof or not.
  543attrs_to_terms([], []).
  544attrs_to_terms([H|T0], [H=A|T]) :-
  545	get_attr(H, where, A0),
  546	del_attr(H, where),
  547	normalise_annotation(A0, A),
  548	attrs_to_terms(T0, T).
 sort_lol(+ListOfList, ListOfSortedLists)
  552sort_lol([], []).
  553sort_lol([H0|T0], [H|T]) :-
  554	sort(H0, H),
  555	sort_lol(T0, T).
 remove_annotations(+List, +Attr)
  560remove_annotations([], _).
  561remove_annotations([H|T], A) :-
  562	del_attr(H, A),
  563	remove_annotations(T, A).
 object_annotations(+In, -Out, -Goal)
  568object_annotations(O0, O, G) :-
  569	get_attr(O0, where, Annotations),
  570	object_annot(Annotations, O0, O, G), !.
  571object_annotations(O, O, true).
  572
  573object_annot((literal, like(Pattern)), O,
  574	     literal(like(Pattern), L), O = literal(L)).
 resource_annotations(R, G)
  578resource_annotations(R, Goal) :-
  579	get_attr(R, where, Annotations),
  580	resource_annot(Annotations, R, Goal), !.
  581resource_annotations(_, true).
  582
  583resource_annot(eq(R1), R, true) :-	% where A = B
  584	var(R1), !,
  585	del_attr(R, where),
  586	R = R1.
  587resource_annot(eq(query(String)), R, true) :- !,
  588	del_attr(R, where),
  589	R = String.
  590resource_annot(or(List), R, Goal) :-
  591	eq_list(List, Resources), !,
  592	Goal = member(R, Resources).
  593
  594eq_list([], []).
  595eq_list([eq(query(R))|T0], [R|T]) :-
  596	eq_list(T0, T).
 clean_conj(+Goal0, -Goal)
Remove redundant true statements from a conjunction
  603clean_conj((true, G0), G) :- !,
  604	clean_conj(G0, G).
  605clean_conj((G0, true), G) :- !,
  606	clean_conj(G0, G).
  607clean_conj(G, G).
  608
  609		 /*******************************
  610		 *	      PARSER		*
  611		 *******************************/
 serql_parse(+Input, -ParseTree)
Parse the SeRQL statement Input into a Prolog representation.
  617serql_parse(Codes, Query) :-
  618	is_list(Codes), !,
  619	(   phrase(tokens(Tokens), Codes),
  620	    phrase(query(Query0, NameSpaces), Tokens),
  621	    expand_vars(Query0, Query1),
  622	    expand_uris(Query1, NameSpaces, Query)
  623	->  true
  624	;   syntax_error(unknown)
  625	).
  626serql_parse(Atomic, Query) :-
  627	atomic(Atomic), !,
  628	atom_codes(Atomic, Codes),
  629	serql_parse(Codes, Query).
  630serql_parse(Input, _) :-
  631	throw(error(type_error(text, Input), _)).
  632
  633
  634		 /*******************************
  635		 *	       ERRORS		*
  636		 *******************************/
  637
  638syntax_error(What) :-
  639	throw(error(syntax_error(What),
  640		    context(_, 'in SeRQL query'))).
  641
  642
  643		 /*******************************
  644		 *	     NAMESPACES		*
  645		 *******************************/
  646
  647expand_uris(Var, _, Var) :-
  648	var(Var), !.
  649expand_uris(uri(URI), _, URI) :- !.		% <!foo:bar>
  650expand_uris(uri(NS, URI0), Map, URI) :- !,	% foo:bar
  651	(   memberchk(NS=Prefix, Map)
  652	->  true
  653	;   ns(NS, Prefix)
  654	->  true
  655	;   throw(error(existence_error(namespace, NS), _))
  656	),
  657	atom_concat(Prefix, URI0, URI).
  658expand_uris(old_uri(NS, URI0), Map, URI) :- !,	% <foo:bar>
  659	(   (   memberchk(NS=Prefix, Map)
  660	    ;   ns(NS, Prefix)
  661	    )
  662	->  atom_concat(Prefix, URI0, URI)
  663	;   concat_atom([NS, :, URI0], URI)
  664	).
  665expand_uris(Q0, Map, Q) :-
  666	compound(Q0), !,
  667	functor(Q0, Name, Arity),
  668	functor(Q, Name, Arity),
  669	expand_uris(0, Arity, Q0, Map, Q).
  670expand_uris(Q, _, Q).
  671
  672expand_uris(Arity, Arity, _, _, _) :- !.
  673expand_uris(I0, Arity, Q0, Map, Q) :-
  674	I is I0 + 1,
  675	arg(I, Q0, A0),
  676	arg(I, Q, A),
  677	expand_uris(A0, Map, A),
  678	expand_uris(I, Arity, Q0, Map, Q).
 ns(?Id, ?URI)
Translate between namespace id and URI. If the flag rdf_db_namespaces is true, we share the namespace declarations with the SeRQL store.
  687ns(NS, URI) :-
  688	setting(cliopatria:rdf_db_namespaces, true), !,
  689	rdf_db:ns(NS, URI).
  690ns(NS, URI) :-
  691	serql_ns(NS, URI).
  692
  693serql_ns(rdf,  'http://www.w3.org/1999/02/22-rdf-syntax-ns#').
  694serql_ns(rdfs, 'http://www.w3.org/2000/01/rdf-schema#').
  695serql_ns(owl,  'http://www.w3.org/2002/7/owl#').
  696serql_ns(xsd,  'http://www.w3.org/2001/XMLSchema#'). % Wrong in SeRQL docs!
  697serql_ns(serql,'http://rdf4j.org/schema/serql#').
  698
  699
  700		 /*******************************
  701		 *	     VARIABLES		*
  702		 *******************************/
  703
  704%	TBD: Check that projection variables actually appear in the
  705%	query!
  706
  707expand_vars(select(*, Path0, Where0, Distinct, Limit, Offset),
  708	    select(Row, VNames, Path, Where, Distinct, Limit, Offset)) :- !,
  709	var_names(Path0-Where0, Path-Where, VarNames),
  710	vars(VarNames, Vars, Names),
  711	Row =.. [row | Vars],
  712	VNames =.. [names|Names].
  713expand_vars(select(Projection, Path0, Where0, Distinct, Limit, Offset),
  714	    select(Row, VNames, Path, Where, Distinct, Limit, Offset)) :- !,
  715	var_names(x(Projection,Path0,Where0), x(Vars,Path,Where), _VarNames),
  716	Row =.. [row | Vars],
  717	proj_names(Projection, Names),
  718	VNames =.. [names|Names].
  719expand_vars(construct(*, Path0, Where0, Distinct, Limit, Offset),
  720	    construct(Path, Path, Where, Distinct, Limit, Offset)) :- !,
  721	var_names(x(Path0,Where0), x(Path,Where), _VarNames).
  722expand_vars(construct(Ret0, Path0, Where0, Distinct, Limit, Offset),
  723	    construct(Ret, Path, Where, Distinct, Limit, Offset)) :- !,
  724	var_names(x(Ret0,Path0,Where0), x(Ret,Path,Where), _VarNames).
  725
  726
  727var_names(var(-(V)), V, _) :- !.		% bnodes, mapped from {}
  728var_names(var(Name), Var, Map) :-
  729	member(Name=Var, Map), !.
  730var_names(Q0, Q, Map) :-
  731	compound(Q0), !,
  732	functor(Q0, Name, Arity),
  733	functor(Q, Name, Arity),
  734	var_names(0, Arity, Q0, Q, Map).
  735var_names(Q, Q, _).
  736
  737var_names(Arity, Arity, _, _, _) :- !.
  738var_names(I0, Arity, Q0, Q, Map) :-
  739	I is I0 + 1,
  740	arg(I, Q0, A0),
  741	arg(I, Q, A),
  742	var_names(A0, A, Map),
  743	var_names(I, Arity, Q0, Q, Map).
  744
  745vars([], [], []) :- !.			% also closes list!
  746vars([Name=Var|T0], [Var|TV], [Name|TN]) :-
  747	vars(T0, TV, TN).
  748
  749proj_names([], []).
  750proj_names([var(Var)|T0], [Var|T]) :- !,
  751	proj_names(T0, T).
  752proj_names([_|T0], [-|T]) :-
  753	proj_names(T0, T).
  754
  755
  756		 /*******************************
  757		 *	 ERROR LOCATIONS	*
  758		 *******************************/
  759
  760syntax_error(What, In, []) :-
  761	throw(error(syntax_error(What),
  762		    context(_, left(In)))).
  763
  764add_error_location(error(syntax_error(What),
  765			 context(_, left(After))),
  766		   Tokens) :-
  767	append(Before, After, Tokens),
  768	length(Before, BL),
  769	(   BL =< 5
  770	->  BC = Before
  771	;   length(BC0, 5),
  772	    append(_, BC0, Before),
  773	    BC = ['...'|BC0]
  774	),
  775	length(After, AL),
  776	(   AL =< 5
  777	->  AC = After
  778	;   length(AC0, 5),
  779	    append(AC0, _, After),
  780	    append(AC0, ['...'], AC)
  781	),
  782	append(BC, ['**here**'|AC], ContextTokens0),
  783	maplist(token_to_atom, ContextTokens0, ContextTokens),
  784	concat_atom(ContextTokens, ' ', Context),
  785	throw(error(syntax_error(What),
  786		    context(serql_parse/2, Context))).
  787
  788token_to_atom(Token, Token) :-
  789	atom(Token), !.
  790token_to_atom(id(X), X) :- !.
  791token_to_atom(string(X), X) :- !.
  792token_to_atom(uri(URI), X) :- !,
  793	concat_atom([<, URI, >], X).
  794token_to_atom(uri(NS,Local), X) :- !,
  795	concat_atom([NS, Local], :, X).
  796token_to_atom(old_uri(NS,Local), X) :- !,
  797	concat_atom([<, NS, :, Local, >], X).
  798token_to_atom(cmp(X), X) :- !.
  799token_to_atom(rest(X), X) :- !.
  800token_to_atom(Token, Atom) :-
  801	term_to_atom(Token, Atom).
  802
  803query(Query, NameSpaces, In, Out) :-
  804	catch(compilation_unit(Query, NameSpaces, In, Out),
  805	      E,
  806	      add_error_location(E, In)).
  807
  808must_see(Token) -->
  809	[Token], !.
  810must_see(Token) -->
  811	syntax_error(expected(Token)).
  812
  813must_see(Token, _) -->
  814	[Token], !.
  815must_see(_, UserName) -->
  816	syntax_error(expected(UserName)).
  817
  818
  819		 /*******************************
  820		 *	 HIGH LEVEL PARSER	*
  821		 *******************************/
  822
  823compilation_unit(Query, NameSpaces) -->
  824	query(Query),
  825	namespace_list(NameSpaces).
 namespace_list(-NSList:list)// is det
Arguments:
NSList- List of Prefix=URI for each defined namespace
  831namespace_list([H|T]) -->
  832	[ using ], !, must_see(namespace), !,
  833	must_see_namespace(H),
  834	namespaces(T).
  835namespace_list([]) -->
  836	[].
  837
  838must_see_namespace(Decl) -->
  839	namespace(Decl), !.
  840must_see_namespace(_) -->
  841	syntax_error(expected(namespace_declaration)).
  842
  843namespace(NS=URI) -->
  844	must_see(id(NS), identifier),
  845	must_see(cmp(=), =),
  846	namespace_uri(URI).
  847
  848namespace_uri(URI) -->
  849	[ uri(URI) ], !.
  850namespace_uri(URI) -->
  851	[ old_uri(Protocol, Local) ], !, % New style <foo:bar>
  852	{ concat_atom([Protocol, :, Local], URI)
  853	}.
  854namespace_uri(_) -->
  855	syntax_error(expected(absolute_uri)).
  856
  857namespaces([H|T]) -->
  858	[ ',' ], !,
  859	must_see_namespace(H),
  860	namespaces(T).
  861namespaces([]) -->
  862	[].
  863
  864query(select(Projection, Path, Where, Distinct, Limit, Offset)) -->
  865	[ select ], !,
  866	distinct(Distinct),
  867	projection(Projection),
  868	must_see(from), path_expr_list(Path),
  869	query_tail(Where, Limit, Offset).
  870query(construct(Construct, Path, Where, Distinct, Limit, Offset)) -->
  871	[ construct ], !,
  872	distinct(Distinct),
  873	construct_clause(Construct),
  874	must_see(from), path_expr_list(Path),
  875	query_tail(Where, Limit, Offset).
  876query(_) -->
  877	syntax_error(no_select_or_construct).
  878
  879distinct(distinct) -->
  880	[ distinct ], !.
  881distinct(false) -->
  882	[].
  883
  884query_tail(Where, Limit, Offset) -->
  885	(   [ where ]
  886	->  (   boolean_query(Where)
  887	    ->	[]
  888	    ;	syntax_error(illegal_where_clause)
  889	    )
  890	;   {Where = true}
  891	),
  892	(   [ limit ]
  893	->  (   pos_int(Limit)
  894	    ->	[]
  895	    ;	syntax_error(illegal_limit)
  896	    )
  897	;   {Limit = inf}
  898	),
  899	(   [ offset ]
  900	->  (   pos_int(Offset)
  901	    ->	[]
  902	    ;	syntax_error(illegal_offset)
  903	    )
  904	;   {Offset = 0}
  905	).
  906
  907projection(*) -->
  908	[ * ], !.
  909projection([H|T]) -->
  910	var_or_value(H), !,
  911	var_or_value_list(T).
  912projection(_) -->
  913	syntax_error(expected(projection)).
  914
  915construct_clause(*) -->
  916	[ * ], !.
  917construct_clause(Path) -->
  918	path_expr_list(Path), !.
  919construct_clause(_) -->
  920	syntax_error(expected(construct_clause)).
  921
  922path_expr_list(Expr) -->
  923	must_see_path_expr(E0),
  924	(   [ ',' ]
  925	->  path_expr_list(Es),
  926	    { Expr = (E0, Es) }
  927	;   { Expr = E0 }
  928	).
  929
  930must_see_path_expr(E) -->
  931	path_expr(E), !.
  932must_see_path_expr(_) -->
  933	syntax_error(expected(path_expression)).
  934
  935path_expr(optional(_, Path)) -->
  936	[ '[' ], !, path_expr_list(Path), must_see(']').
  937path_expr(Expr) -->
  938	path_expr0(Expr).
  939
  940path_expr0(Expr) -->
  941	path_expr_head(Head),
  942	(   (   [ ';' ]
  943	    ->	{ arg(1, Head, H) }
  944	    ;	{ arg(3, Head, H) }
  945	    ),
  946	    path_expr_tail(H, Tail)
  947	->  { Expr = (Head, Tail)
  948	    }
  949	;   { Expr = Head }
  950	).
  951
  952
  953path_expr_head(rdf(S, P, O)) -->
  954	must_see_node(S), must_see_edge(P), must_see_node(O).
  955
  956path_expr_tail(S, Expr) -->
  957	[ '[' ], path_expr_tail0(S, Expr1), [ ']' ],
  958	{ Expr0 = optional(_, Expr1) },
  959	(   [ ';' ]
  960	->  path_expr_tail(S, Tail),
  961	    { Expr = (Expr0, Tail) }
  962	;   { Expr = Expr0 }
  963	).
  964path_expr_tail(S, Expr) -->
  965	path_expr_tail0(S, Expr).
  966
  967%	path_expr_tail0 <=> Edge Node ((";")? Path_expr_tail)?
  968
  969path_expr_tail0(S, Expr) -->
  970	edge(P), must_see_node(O),
  971	{ Statement = rdf(S, P, O) },
  972	(   (   [ ';' ]
  973	    ->  path_expr_tail(S, Tail)
  974	    ;	path_expr_tail(O, Tail)
  975	    )
  976	->  { Expr = (Statement, Tail) }
  977	;   { Expr = Statement }
  978	).
  979
  980must_see_edge(Edge) -->
  981	edge(Edge), !.
  982must_see_edge(_) -->
  983	syntax_error(expected(edge)).
  984
  985edge(var(Var)) -->
  986	[ id(Var) ], !.
  987edge(uri(URI)) -->
  988	[ uri(URI) ], !.		% <!foo:bar>
  989edge(uri(NS, URI)) -->
  990	[ uri(NS, URI) ], !.		% foo:bar
  991edge(old_uri(NS, URI)) -->
  992	[ old_uri(NS, URI) ], !.	% <foo:bar>
  993
  994must_see_node(Node) -->
  995	node(Node), !.
  996must_see_node(_) -->
  997	syntax_error(expected(node)).
  998
  999node(Node) -->
 1000	[ '{' ], node_elem(E0), !, node_elem_list(Es), [ '}' ],
 1001	(   {Es == []}
 1002	->  {Node = E0}
 1003	;   {Node = set([E0|Es])}
 1004	).
 1005node(var(-(_))) -->			% the _ is the variable that will
 1006	[ '{', '}' ].			% be shared
 1007
 1008node_elem_list([H|T]) -->
 1009	[ ',' ], !,
 1010	must_see_node_elem(H),
 1011	node_elem_list(T).
 1012node_elem_list([]) -->
 1013	[].
 1014
 1015must_see_node_elem(Elem) -->
 1016	node_elem(Elem), !.
 1017must_see_node_elem(_) -->
 1018	syntax_error(expected(node_element)).
 1019
 1020node_elem(Elem) -->
 1021	(   var(Elem)
 1022	;   uri(Elem)
 1023	;   literal(Elem)
 1024	;   reified_stat(Elem)
 1025	), !.
 1026
 1027reified_stat(rdf(S,P,O)) -->
 1028	node(S), must_see_edge(P), must_see_node(O).
 1029
 1030
 1031		 /*******************************
 1032		 *	      WHERE ...		*
 1033		 *******************************/
 1034
 1035boolean_query(Query) -->
 1036	and_expr(And),
 1037	(   [ or ],
 1038	    boolean_query(Or)
 1039	->  {Query = (And ; Or)}
 1040	;   {Query = And}
 1041	).
 1042
 1043and_expr(Query) -->
 1044	boolean_query0(Q0),
 1045	(   [ and ],
 1046	    and_expr(And)
 1047	->  {Query = (Q0, And)}
 1048	;   {Query = Q0}
 1049	).
 1050
 1051boolean_query0(Query) -->
 1052	[ '(' ], !, boolean_query(Query), must_see(')').
 1053boolean_query0(true) -->
 1054	[ true ], !.
 1055boolean_query0(fail) -->
 1056	[ false ], !.
 1057boolean_query0(\+(Q)) -->
 1058	[ not ], !, boolean_query0(Q).
 1059boolean_query0(serql_compare(Cmp, L, R)) -->
 1060	var_or_query_value(L),
 1061	[ cmp(Cmp) ], !,
 1062	var_or_query_value(R).
 1063boolean_query0(serql_compare(like, Var, String)) -->
 1064	var_or_value(Var),		% must be var?
 1065	[ like ], !, must_see_string(String).
 1066boolean_query0(rdf_is_literal(V)) -->
 1067	[ isliteral, '(' ], !, var(V), must_see(')').
 1068boolean_query0(rdf_is_resource(V)) -->
 1069	[ isresource, '(' ], !, var(V), must_see(')').
 1070boolean_query0(_) -->
 1071	syntax_error(expected(boolean_test)).
 1072
 1073must_see_string(String) -->
 1074	[ string(String) ], !.
 1075must_see_string(_) -->
 1076	syntax_error(expected(string)).
 1077
 1078var_or_value_list([H|T]) -->
 1079	[ ',' ], !,
 1080	must_see_var_or_value(H),
 1081	var_or_value_list(T).
 1082var_or_value_list([]) -->
 1083	[].
 1084
 1085must_see_var_or_value(X) -->
 1086	var_or_value(X), !.
 1087must_see_var_or_value(_) -->
 1088	syntax_error(expected(var_or_value)).
 1089
 1090var_or_value(X) -->
 1091	var(X), !.
 1092var_or_value(X) -->
 1093	value(X).
 1094
 1095var_or_query_value(X) -->
 1096	(   literal_value(Value)
 1097	->  { X = query(Value)
 1098	    }
 1099	;   var_or_value(X)
 1100	).
 1101
 1102var(var(Var)) -->
 1103	[ id(Var) ], !.
 1104
 1105value(URI) -->
 1106	uri(URI).
 1107value('$null$') -->
 1108	[ null ].
 1109value(Literal) -->
 1110	literal(Literal), !.
 1111value(datatype(var(Var))) -->
 1112	[ datatype, '(', id(Var), ')' ].
 1113value(lang(var(Var))) -->
 1114	[ lang, '(', id(Var), ')' ].
 1115value(label(var(Var))) -->
 1116	[ label, '(', id(Var), ')' ].
 1117
 1118uri(uri(URI)) --> [uri(URI)].
 1119uri(uri(NS, URI)) --> [uri(NS, URI)].
 1120uri(old_uri(NS, URI)) --> [old_uri(NS, URI)].
 1121
 1122literal(Literal) -->
 1123	literal_value(Value),
 1124	{ Literal = literal(Value) }.
 1125
 1126literal_value(Lit) -->
 1127	[ string(String) ],
 1128	(   [@, id(Lang)]
 1129	->  { Lit = lang(Lang, String) }
 1130	;   [^^, URI]
 1131	->  { Lit = type(URI, String) }
 1132	;   { Lit = String }
 1133	).
 1134
 1135pos_int(I) -->
 1136	[ int(I) ], { I >= 0 }.		% bit weird not to have >0, but this
 1137					% is the Sesame spec
 1138
 1139
 1140		 /*******************************
 1141		 *	    TOKENISER		*
 1142		 *******************************/
 1143
 1144tokens([H|T]) -->
 1145	blank,
 1146	token(H), !,
 1147	tokens(T).
 1148tokens([]) -->
 1149	blank.
 1150
 1151token(uri(URI)) -->			% Old style absolute URI
 1152	"<!", uri_codes(Codes), ">",
 1153	{ atom_codes(URI, Codes)
 1154	}.
 1155token(old_uri(NS, Local)) -->		% Old style local, new style absolute
 1156	"<", identifier(NS), ":", uri_codes(Codes), ">",
 1157	{ atom_codes(Local, Codes)
 1158	}.
 1159token(string(String)) -->
 1160	"\"", string_codes(Codes), "\"",
 1161	{ atom_codes(String, Codes)
 1162	}.
 1163token(Token) -->
 1164	identifier(Id), !,
 1165	(   ":", identifier(Local)	% new style URI
 1166	->  { Token = uri(Id, Local)
 1167	    }
 1168	;   {   downcase_atom(Id, Keyword),
 1169		serql_keyword(Keyword)
 1170	    ->  Token = Keyword
 1171	    ;   Token = id(Id)
 1172	    }
 1173	).
 1174token(int(Int)) -->
 1175	digit(D0), !,
 1176	digits(Digits),
 1177	{ number_codes(Int, [D0|Digits])
 1178	}.
 1179token(cmp(Cmp)) -->
 1180	cmp(Cmp), !.
 1181token(^^) -->
 1182	"^^", !.
 1183token(Char) -->
 1184	[C],
 1185	{ single(C),
 1186	  char_code(Char, C)
 1187	}.
 1188token(rest(Rest), In, []) :-		% catch syntax errors.
 1189	In \== [],
 1190	atom_codes(Rest, In).
 1191
 1192
 1193single(0'*).
 1194single(0'=).
 1195single(0'().
 1196single(0')).
 1197single(0'{).
 1198single(0'}).
 1199single(0'[).
 1200single(0']).
 1201single(0'@).
 1202single(0',).
 1203single(0';).
 1204
 1205%	cmp//1
 1206%
 1207%	Returns Prolog comparison operators from the SeRQL ones.
 1208
 1209cmp(=<) --> "<=".
 1210cmp(\=) --> "!=".
 1211cmp(>=) --> ">=".
 1212cmp(=)  --> "=".
 1213cmp(<)  --> "<".
 1214cmp(>)  --> ">".
 uri_codes(-Codes)
Get a URI string. Does not check for otherwise valid syntax. This could be done using library(url).
 1222uri_codes([C0|Cs]) -->
 1223	[C0],
 1224	{ uri_code(C0)
 1225	}, !,
 1226	uri_codes(Cs).
 1227uri_codes([]) -->
 1228	[].
 1229
 1230uri_code(C) :-
 1231	code_type(C, csym), !.
 1232uri_code(0'$).
 1233uri_code(0'-).
 1234uri_code(0'@).
 1235uri_code(0'&).
 1236uri_code(0'+).
 1237uri_code(0'.).
 1238uri_code(0'/).
 1239uri_code(0'?).
 1240uri_code(0'#).
 1241uri_code(0'=).
 1242uri_code(0':).
 1243uri_code(0'~).				% officially not
 1244uri_code(0';).
 1245uri_code(0'{).
 1246uri_code(0'}).
 string_codes(-Codes)
Chars between "...", Can contain \" and \\
 1253string_codes([C0|Cs]) -->
 1254	"\"", [C0],
 1255	{ C0 == 0'\\ ; C0 = 0'" }, !,
 1256	string_codes(Cs).
 1257string_codes([]) -->
 1258	peek(0'").
 1259string_codes([C0|Cs]) -->
 1260	[C0],
 1261	string_codes(Cs).
 identifier(-Id)
An SeRQL must start with a letter or an underscore ('_') and can be followed by zero or more letters, numbers, underscores, dashes ('-') or dots ('.').
 1270identifier(Id) -->
 1271	[C0],
 1272	{ code_type(C0, csymf) },
 1273	id_chars(Cs),
 1274	{ atom_codes(Id, [C0|Cs])
 1275	}.
 1276
 1277id_chars([C0|Cs]) -->
 1278	[C0],
 1279	{ code_type(C0, csym)
 1280	; C0 == 0'.
 1281	; C0 == 0'-
 1282	}, !,
 1283	id_chars(Cs).
 1284id_chars([]) -->
 1285	[].
 1286
 1287digit(D) -->
 1288	[D],
 1289	{ code_type(D, digit) }.
 1290
 1291digits([D0|Ds]) -->
 1292	digit(D0), !,
 1293	digits(Ds).
 1294digits([]) -->
 1295	[].
 1296
 1297blank -->
 1298	[C],
 1299	{ code_type(C, space) }, !,
 1300	blank.
 1301blank -->
 1302	[].
 serql_keyword(?Keyword)
True if Keyword is the lowercase version if a keyword
 1308serql_keyword(select).
 1309serql_keyword(construct).
 1310serql_keyword(from).
 1311serql_keyword(where).
 1312serql_keyword(using).
 1313serql_keyword(namespace).
 1314serql_keyword(true).
 1315serql_keyword(false).
 1316serql_keyword(not).
 1317serql_keyword(and).
 1318serql_keyword(or).
 1319serql_keyword(like).
 1320serql_keyword(label).
 1321serql_keyword(lang).
 1322serql_keyword(datatype).
 1323serql_keyword(null).
 1324serql_keyword(isresource).
 1325serql_keyword(isliteral).
 1326serql_keyword(sort).
 1327serql_keyword(in).
 1328serql_keyword(union).
 1329serql_keyword(intersect).
 1330serql_keyword(minus).
 1331serql_keyword(exists).
 1332serql_keyword(forall).
 1333serql_keyword(distinct).		% SPEC: not in grammar
 1334serql_keyword(limit).			% SPEC: not in grammar
 1335serql_keyword(offset).			% SPEC: not in grammar
 1336
 1337		 /*******************************
 1338		 *	     DCG BASICS		*
 1339		 *******************************/
 1340
 1341peek(C, L, L) :-
 1342	L = [C|_].
 1343
 1344
 1345		 /*******************************
 1346		 *    HUMAN READABLE MESSAGES	*
 1347		 *******************************/
 1348
 1349:- multifile
 1350	prolog:message/3. 1351
 1352prolog:message(error(syntax_error(What),
 1353		     context(serql_parse/2, Location))) -->
 1354	[ 'Syntax error in SeRQL query: ' ],
 1355	explain(What), [ ' at **here** in', nl, nl],
 1356	['~w'-[Location] ].
 1357
 1358explain(expected(X)) -->
 1359	[ '"~w" expected'-[X] ]