rdfql_util.pl

View source with raw comments or as raw

    1/*  Part of SWI-Prolog
    2
    3    Author:        Jan Wielemaker
    4    E-mail:        J.Wielemaker@vu.nl
    5    WWW:           http://www.swi-prolog.org
    6    Copyright (C): 2004-2014, University of Amsterdam
    7			      VU University Amsterdam
    8
    9    This program is free software; you can redistribute it and/or
   10    modify it under the terms of the GNU General Public License
   11    as published by the Free Software Foundation; either version 2
   12    of the License, or (at your option) any later version.
   13
   14    This program is distributed in the hope that it will be useful,
   15    but WITHOUT ANY WARRANTY; without even the implied warranty of
   16    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   17    GNU General Public License for more details.
   18
   19    You should have received a copy of the GNU Lesser General Public
   20    License along with this library; if not, write to the Free Software
   21    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
   22
   23    As a special exception, if you link this library with other files,
   24    compiled with a Free Software compiler, to produce an executable, this
   25    library does not by itself cause the resulting executable to be covered
   26    by the GNU General Public License. This exception does not however
   27    invalidate any other reasons why the executable file might be covered by
   28    the GNU General Public License.
   29*/
   30
   31:- module(rdfql_util,
   32	  [ select_results/6,		% +Distinct, +Offset, +Limit,
   33					% :SortBy, -Result, :Goal
   34	    select_results/9,		% +Distinct, +Group, +Having, +Agg,
   35					% +Offset, +Limit,
   36					% :SortBy, -Result, :Goal
   37	    entailment_module/2		% +Entailment, -Module
   38	  ]).   39:- use_module(library(nb_set)).   40:- use_module(library(semweb/rdf_db)).   41:- use_module(library(lists)).   42:- use_module(library(pairs)).   43:- use_module(library(apply)).   44:- use_module(library(xsdp_types)).   45:- use_module(sparql_runtime).   46
   47:- meta_predicate
   48	select_results(+, +, 0, +, +, +, +, -, 0),
   49	select_results(+, +, +, +, -, 0),
   50	select_results(+, +, +, -, 0).

select_results(+Distinct, +Offset, +Limit, +SortBy, -Result, :Goal): Calls select_results/8 using Group=[] and Having=true.

   56select_results(Distinct, Offset, Limit, SortBy, Result, Goal) :-
   57	select_results(Distinct, [], true, [],
   58		       Offset, Limit, SortBy, Result, Goal).

select_results(+Distinct, +Group, +Having, +Aggregates, +Offset, +Limit, +SortBy, -Result, :Goal) is nondet

Select results for the template Result on backtracking over Goal.

Arguments:

`Distinct`	- Iff 'distinct', only consider distinct solutions
`Group`	- is a list of variables on which to group the results. These are the only variables that can be used in the HAVING filter and final projection.
`Having`	- is a constraint (similar to `FILTER`) to filter grouped results.
`Aggregates`	- List of `aggregate(Function, Var)`
`Offset`	- Skip the first `Offset` results. `Offset` is applied after `Distinct` and `SortBy`
`Limit`	- Only return the first `Limit` results. `Limit` is applied after `Distinct`, `SortBy` and `Offset`. The value 'inf' returns all values.
`SortBy`	- Either 'unsorted' or a term `order_by(Cols)`, where each Col in Cols is a term `ascending(Expr)` or `descending(Expr)`.

To be done

- Group, Having and Aggregate are currently ignored.

   96:- discontiguous select_results/9.   97
   98/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
   991. ORDERED RESULTS WITHOUT AGGREGATES
  100- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
  101
  102select_results(Distinct, [], _:true, [], Offset, Limit,
  103	       order_by(Cols), Result, Goal) :-
  104	exclude(ground, Cols, SortCols), SortCols \== [], !,
  105	group_order(SortCols, GroupedCols),
  106	reverse(GroupedCols, RevGroupedCols),
  107	maplist(sort_key_goal, RevGroupedCols, GroupedKeys, KeyGenList),
  108	list_conj(KeyGenList, KeyGenGoal),
  109	sort_template(GroupedKeys, Result, Template),
  110	findall(Template, (Goal,rdfql_util:KeyGenGoal), Results0),
  111	(   Distinct == distinct
  112	->  sort(Results0, Results1)
  113	;   Results1 = Results0
  114	),
  115	order_by(RevGroupedCols, Results1, Results2),
  116	apply_offset(Offset, Results2, Results3),
  117	apply_limit(Limit, Results3, Results),
  118	member(Result, Results).

group_order(+Cols, -GroupedCols) is det: Group ordering expressions by the same ordering direction. E.g., [ascending(X), ascending(Y), descending(Z)] becomes [ascending([X,Y]), descending([Z])]

  126group_order([], []).
  127group_order([Col|CT0], [Grouped|GT]) :-
  128	Col =.. [Dir,Expr],
  129	Grouped =.. [Dir,[Expr|Exprs]],
  130	same_order(Dir, CT0, CT, Exprs),
  131	group_order(CT, GT).
  132
  133same_order(Dir, [Col|CT0], CT, [E0|ET]) :-
  134	Col =.. [Dir,E0], !,
  135	same_order(Dir, CT0, CT, ET).
  136same_order(_, CT, CT, []).
  137
  138list_conj([], true) :- !.
  139list_conj([G], G) :- !.
  140list_conj([G|T0], (G,T)) :-
  141	list_conj(T0, T).

order_by(+Cols, +Results0, -Results) is det

Order the results. Cols is a list of ascending(Var) or descending(Var). Note that the sorting is done with the least importing (right most) order declaration first and relies on the fact that keysort/2 is stable wrt to ordering the values.

To be done: - For DESC sorting, we need to reverse, but not the order inside the groups. We'd need a reverse keysort/2.

  154order_by([], Results, Results).
  155order_by([ascending(_)|T], Results0, Results) :- !,
  156	keysort(Results0, Results1),
  157	pairs_values(Results1, Results2),
  158	order_by(T, Results2, Results).
  159order_by([descending(_)|T], Results0, Results) :-
  160	keysort(Results0, AscSorted),
  161	group_pairs_by_key(AscSorted, Grouped),
  162	reverse(Grouped, DescSortedKeyedGroups),
  163	pairs_values(DescSortedKeyedGroups, DescSortedGroups),
  164	append(DescSortedGroups, DescSorted),
  165	order_by(T, DescSorted, Results).

sort_key_goal(+ColGroup, -KeyGroup, -Translate)

  170sort_key_goal(ColGroup, KeyGroup, Translate) :-
  171	arg(1, ColGroup, Exprs),
  172	sort_expr_goal(Exprs, KeyGroup, Translate).
  173
  174sort_expr_goal([], [], true).
  175sort_expr_goal([V], [K], sort_key(V,K)) :- !.
  176sort_expr_goal([V|TV], [K|TK], (sort_key(V,K),G)) :-
  177	sort_expr_goal(TV, TK, G).
  178
  179sort_template([], Result, Result).
  180sort_template([H0|T], Result, H-Template) :-
  181	simplify_sort_key(H0, H),
  182	sort_template(T, Result, Template).
  183
  184simplify_sort_key([One], One) :- !.
  185simplify_sort_key(List, Term) :-
  186	Term =.. [v|List].

sort_key(+Result, -Key) is det

Determine the sort-key from a result according to the SPARQL standard:

undefined/null
blank nodes
IRIs
RDF Literals
a. Numbers are mapped to their value space b. Other literals are mapped to their plain literal. Note that this works fine for some types:
- xsd:date and variants
- xsd:boolean

bug: - This is not good enough. Literals must be compared in their value-space. This requires some study.; - Result is a SPARQL expression.

  207:- public sort_key/2.			% Goal created by sort_key_goal/3.
  208
  209sort_key(Var, Var) :- var(Var), !.
  210sort_key(literal(L), sk(4, V)) :- !,
  211	simple_literal(L, V).
  212sort_key(IRI, sk(N, IRI)) :-
  213	(   rdf_is_bnode(IRI)
  214	->  N = 2
  215	;   N = 3
  216	).
  217
  218simple_literal(lang(_Lang, SL), SL) :- !.
  219simple_literal(type(Type, SL), Number) :-
  220	xsdp_numeric_uri(Type, _),
  221	atom_number(SL, Number), !.
  222simple_literal(type(_Type, SL), SL) :- !.
  223simple_literal(SL, SL).
  224
  225
  226/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
  2272. UNORDERED RESULTS WITHOUT AGGREGATES
  228- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
  229
  230select_results(Distinct, [], _:true, [], Offset, Limit,
  231	       _Unsorted, Result, Goal) :- !,
  232	select_results(Distinct, Offset, Limit, Result, Goal).

select_results(+Distinct, +Offset, +Limit, -Result, :Goal): Unsorted version. In this case we can avoid first collecting all results.

  239select_results(distinct, Offset, Limit, Result, Goal) :- !,
  240	term_variables(Result, Vars),
  241	V =.. [v|Vars],
  242	empty_nb_set(Set),
  243	Counter = count(0),
  244	Goal,
  245	   add_nb_set(V, Set, New),
  246	   New == true,
  247	   apply_limit(Counter, Offset, Limit, Last),
  248	   ( Last == true -> ! ; true ).
  249select_results(_, 0, inf, _, G) :- !,
  250	G.
  251select_results(_, Offset, Limit, _, G) :- !,
  252	Counter = count(0),
  253	G,
  254	    apply_limit(Counter, Offset, Limit, Last),
  255	    ( Last == true -> ! ; true ).
  256
  257apply_limit(_, 0, inf, false) :- !.
  258apply_limit(Counter, Offset, Limit, Last) :-
  259	arg(1, Counter, N0),
  260	N is N0 + 1,
  261	nb_setarg(1, Counter, N),
  262	N > Offset,
  263	(   Limit \== inf,
  264	    N =:= Offset+Limit
  265	->  Last = true
  266	;   Last = false
  267	).

apply_offset(+Offset, +AllSolutions, -OffsetSolutions) is det

  271apply_offset(N, [_|T], List) :-
  272	N > 0, !,
  273	N2 is N - 1,
  274	apply_offset(N2, T, List).
  275apply_offset(_, List, List).

apply_limit(+Limit, +AllSolutions, -LimitSolutions) is det

  279apply_limit(inf, List, List) :- !.
  280apply_limit(Limit, All, List) :-
  281	limit(Limit, All, List).
  282
  283limit(N, [H|T0], [H|T]) :-
  284	N > 0, !,
  285	N2 is N - 1,
  286	limit(N2, T0, T).
  287limit(_, _, []).
  288
  289
  290/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
  2913. AGGREGATE SUPPORT
  292
  293To support aggregation, we have to collect   results and group them into
  294sets that have equal values for the  variables that appear in Group. For
  295each group, we must:
  296
  297  1. Evaluate the Aggregate functions
  298  2. Evaluate the Having constraing and drop sets for which Having fails.
  299
  300Note that the projection (=Result) and   Having constraints can only use
  301variables Group and aggregate functions. This   implies  that we need to
  302track the grouped variables as well  as   variables  that  are needed to
  303compute the aggregates, but _no_ more.
  304
  305Q: Can we call select_results/9 recursively  with the iteration over the
  306groups to get OrderBy, Offset and Limit working?
  307
  308Q: When do we need to apply Distinct? Before or after grouping?
  309
  310Note that we do not need to do anything with the result term because the
  311output variables are already shared with it.
  312- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
  313
  314select_results(_Distinct, [], Having, AggregateEval, _Offset, _Limit,
  315	       _Order, _Result, Goal) :- !,
  316	aggregate_vars(AggregateEval, Aggregates, AggVars, Eval),
  317	AV =.. [a|AggVars],
  318	findall(AV, Goal, Group),
  319	aggregate(Group, Aggregates),
  320	call(Eval),
  321	call(Having).
  322select_results(_Distinct, Group, Having, AggregateEval, Offset, Limit,
  323	       _Order, _Result, Goal) :-
  324	aggregate_vars(AggregateEval, Aggregates, AggVars, Eval),
  325	GV =.. [v|Group],
  326	AV =.. [a|AggVars],
  327	findall(GV-AV, Goal, Pairs),
  328	keysort(Pairs, SortedPairs),
  329	group_pairs_by_key(SortedPairs, Groups0),
  330	apply_offset(Offset, Groups0, Groups1),
  331	apply_limit(Limit, Groups1, Groups),
  332	member(GV-G, Groups),
  333	aggregate(G, Aggregates),
  334	call(Eval),
  335	call(Having).

aggregate(+Group, +Aggregates)

  339aggregate([AVT|Group], Aggregates) :- !,
  340	AVT =.. [a|AV0],
  341	maplist(aggregate_setup, AV0, State0),
  342	aggregate_steps(Group, State0, State),
  343	maplist(aggregate_bind, Aggregates, State).
  344aggregate([], Aggregates) :-
  345	maplist(empty_aggregate, Aggregates).
  346
  347aggregate_setup(count(X), Count) :-
  348	aggregate_step(count(X), 0, Count).
  349aggregate_setup(distinct(X, _Op), Set) :-
  350	( is_null(X) -> Set = [] ; Set = [X] ).
  351aggregate_setup(sum(X0), X) :-
  352	sparql_eval_raw(X0, X).
  353aggregate_setup(min(X0), X) :-
  354	sparql_eval_raw(X0, X).
  355aggregate_setup(max(X0), X) :-
  356	sparql_eval_raw(X0, X).
  357aggregate_setup(avg(X0), X-1) :-
  358	sparql_eval_raw(X0, X).
  359aggregate_setup(sample(X), X).
  360aggregate_setup(group_concat(Expr,_), [X]) :-
  361	group_concat_value(Expr, X).
  362
  363aggregate_steps([], State, State).
  364aggregate_steps([HT|T], State0, State) :-
  365	HT =.. [a|H],
  366	maplist(aggregate_step, H, State0, State1),
  367	aggregate_steps(T, State1, State).
  368
  369aggregate_step(count(X), Count0, Count) :-
  370	( is_null(X) -> Count = Count0 ; Count is Count0 + 1 ).
  371aggregate_step(distinct(X, _Op), S0, S) :-
  372	( is_null(X) -> S = S0 ; S = [X|S0] ).
  373aggregate_step(sum(X), Sum0, Sum) :-
  374	sparql_eval_raw(X+Sum0, Sum).
  375aggregate_step(min(X), Min0, Min) :-
  376	sparql_eval_raw(min(X, Min0), Min).
  377aggregate_step(max(X), Min0, Min) :-
  378	sparql_eval_raw(max(X, Min0), Min).
  379aggregate_step(avg(X), Sum0-Count0, Sum-Count) :-
  380	sparql_eval_raw(X+Sum0, Sum),
  381	Count is Count0+1.
  382aggregate_step(sample(X), S0, S) :-
  383	(   is_null(S0)
  384	->  S = X
  385	;   S = S0
  386	).
  387aggregate_step(group_concat(Expr, _), S0, [X|S0]) :-
  388	group_concat_value(Expr, X).

aggregate_bind(+Aggregation, +State) is det

To be done: - : bind to error if the function does not evaluate?

  394aggregate_bind(aggregate(Func, Var), State) :-
  395	aggregate_bind(Func, Var, State).
  396
  397aggregate_bind(count(_), Count, Count0) :-
  398	rdf_equal(xsd:integer, XSDInt),
  399	sparql_eval(numeric(XSDInt, Count0), Count).
  400aggregate_bind(sum(_), Sum, Sum0) :-
  401	bind_number(Sum0, Sum).
  402aggregate_bind(min(_), Min, Min0) :-
  403	bind_number(Min0, Min).
  404aggregate_bind(max(_), Max, Max0) :-
  405	bind_number(Max0, Max).
  406aggregate_bind(avg(_), Avg, Sum-Count) :-
  407	rdf_equal(xsd:integer, XSDInt),
  408	sparql_eval(Sum/numeric(XSDInt, Count), Avg).
  409aggregate_bind(sample(_), Sample, Sample).
  410aggregate_bind(group_concat(Expr, literal(Sep)), literal(Concat), Parts0) :-
  411	(   functor(Expr, distinct, 1)
  412	->  sort(Parts0, Parts)
  413	;   Parts = Parts0
  414	),
  415	maplist(text_of, Parts, Texts),
  416	atomic_list_concat(Texts, Sep, Concat).
  417aggregate_bind(distinct(_, Op), Value, Set) :-
  418	sort(Set, Distinct),
  419	aggregate_distinct(Op, Distinct, Value).

aggregate_distinct(+Operation, +Set, -Value)

  423aggregate_distinct(count, Set, Value) :-
  424	rdf_equal(xsd:integer, IntType),
  425	length(Set, Len),
  426	bind_number(numeric(IntType, Len), Value).
  427aggregate_distinct(sum, Set, Sum) :-
  428	rdf_equal(xsd:integer, IntType),
  429	foldl(add, Set, number(IntType, 0), Sum0),
  430	bind_number(Sum0, Sum).
  431aggregate_distinct(avg, Set, Avg) :-
  432	aggregate_distinct(sum, Set, Sum),
  433	length(Set, Count),
  434	rdf_equal(xsd:integer, XSDInt),
  435	sparql_eval(Sum/numeric(XSDInt, Count), Avg).
  436
  437add(X, Sum0, Sum) :-
  438	sparql_eval_raw(X+Sum0, Sum).
  439
  440text_of(Expr, Atom) :-
  441	sparql_eval_raw(Expr, V0),
  442	raw_text(V0, Atom).
  443
  444raw_text(string(X), X).
  445raw_text(simple_literal(X), X).
  446raw_text(lang(_Lang, SL), SL).		% allow lang qualified strings in
  447					% group_concat
  448
  449bind_number(V0, V) :-
  450	(   V0 = numeric(_, _)
  451	->  sparql_eval(V0, V)
  452	;   V = '$null$'
  453	).
  454
  455group_concat_value(Expr, Value) :-
  456	(   functor(Expr, distinct, 1)
  457	->  arg(1, Expr, Value)
  458	;   Value = Expr
  459	).
  460
  461:- rdf_meta empty_aggregate(t).  462
  463empty_aggregate(aggregate(count(_), literal(type(xsd:integer, '0')))).
  464empty_aggregate(aggregate(sum(_), literal(type(xsd:integer, '0')))).
  465empty_aggregate(aggregate(distinct(_,count), literal(type(xsd:integer, '0')))).
  466empty_aggregate(aggregate(group_concat(_,_), literal(''))).

aggregate_vars(+AggregateEval, -Aggregates, -Template, -Query) is det

Arguments:

`AggregateEval`	- is a combination of `aggregate(Expr, Var)` and queries that depend on aggregation results and must therefore be executed after computing the aggregates.%
`Aggregates`	- is a list of `aggregate(Expr, Var)`, where Expr is a plain aggregate function over a number of variables and Var is shared with the place where the aggregate is used.

  479aggregate_vars([], [], [], true).
  480aggregate_vars([aggregate(Term0, Into)|T],
  481	       [aggregate(Term,  Into)|AT], [Term|Templ], Q) :- !,
  482	x_distinct(Term0, Term),
  483	aggregate_vars(T, AT, Templ, Q).
  484aggregate_vars([Q0|T], Agg, Templ, Q) :-
  485	aggregate_vars(T, Agg, Templ, Q1),
  486	mkconj(Q1, Q0, Q).
  487
  488mkconj(true, Q, Q) :- !.
  489mkconj(Q, true, Q) :- !.
  490mkconj(A, B, (A,B)).
  491
  492x_distinct(Term0, Term) :-
  493	arg(1, Term0, Spec),
  494	compound(Spec),
  495	Spec = distinct(_),
  496	distinct_x(Term0, Term), !.
  497x_distinct(Term, Term).
  498
  499distinct_x(count(distinct(X)), distinct(X, count)).
  500distinct_x(count(sum(X)), distinct(X, sum)).
  501distinct_x(count(avg(X)), distinct(X, avg)).
  502
  503is_null(X) :-
  504	(   var(X)
  505	->  true
  506	;   X == '$null$'
  507	).
  508
  509		 /*******************************
  510		 *	     ENTAILMENT		*
  511		 *******************************/

entailment_module(+Entailment, -Module): Find the Prolog module implementing the entailment rules for a semantic web language.

  518entailment_module(Entailment, Module) :-
  519	cliopatria:entailment(Entailment, Module), !.
  520entailment_module(Entailment, _) :-
  521	throw(error(existence_error(entailment, Entailment), _)).
  522
  523:- multifile
  524	cliopatria:entailment/2.