rdf_optimise.pl

View source with formatted comments or as raw
    1/*  Part of SWI-Prolog
    2
    3    Author:        Jan Wielemaker
    4    E-mail:        J.Wielemaker@vu.nl
    5    WWW:           http://www.swi-prolog.org
    6    Copyright (C): 1985-2012, University of Amsterdam
    7			      VU University Amsterdam
    8
    9    This program is free software; you can redistribute it and/or
   10    modify it under the terms of the GNU General Public License
   11    as published by the Free Software Foundation; either version 2
   12    of the License, or (at your option) any later version.
   13
   14    This program is distributed in the hope that it will be useful,
   15    but WITHOUT ANY WARRANTY; without even the implied warranty of
   16    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   17    GNU General Public License for more details.
   18
   19    You should have received a copy of the GNU Lesser General Public
   20    License along with this library; if not, write to the Free Software
   21    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
   22
   23    As a special exception, if you link this library with other files,
   24    compiled with a Free Software compiler, to produce an executable, this
   25    library does not by itself cause the resulting executable to be covered
   26    by the GNU General Public License. This exception does not however
   27    invalidate any other reasons why the executable file might be covered by
   28    the GNU General Public License.
   29*/
   30
   31:- module(rdf_optimise,
   32	  [ rdf_optimise/1,		% :Query
   33	    rdf_optimise/2,		% +Query, -Optimised
   34	    rdf_optimise/4,		% +Query, -Optimised, -Space, -Time
   35	    rdf_complexity/3,		% :Goal, -SpaceEstimate, -TimeEstimate
   36	    serql_select_bind_null/2	% +Goal, -WithBind
   37	  ]).   38:- use_module(library(semweb/rdf_db)).   39:- use_module(library(debug)).   40:- use_module(library(lists)).   41:- use_module(library(pairs)).   42:- use_module(library(ordsets)).   43:- use_module(library(ugraphs)).   44
   45:- meta_predicate
   46	rdf_optimise(0).   47
   48/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
   49Queries  as  returned  by  serql_compile_path/2    consists  of  a  path
   50expression which is compiled to a conjunction  of calls to rdf/3 and the
   51translation of the WHERE clause acting as an additional filter.
   52
   53Optimisation of a query basically means moving conditions into the rdf/3
   54calls where possible, moving other conditions   as  early as possibly in
   55the graph-matching and reordering graph matching calls (rdf/3) to reduce
   56non-determinism.
   57
   58Reordering
   59----------
   60
   61Reordering of graph expressions  is   required  to  reduce backtracking.
   62Roughly I see three approaches:
   63
   64	* Learning
   65	Create permutations of the query and make them run under time
   66	constraints.  Try to learn patterns that work (fast).
   67
   68	* Use statistics
   69	Given the number of solutions on a certain partially instantiated
   70	rdf/3 call (and the required execution time), reorganise them to
   71	minimalise the cost.
   72
   73	* Use constraint solving
   74	Instead of trying to solve an rdf/3 call, create a constraint from
   75	it and only try to solve it if there is more information.  This
   76	is especially attractive if some form of high-level reasoning
   77	from the language entailment rules can be applied or set-theory
   78	is a possibility.
   79
   80After experiments with using constraint solving,   this  module now uses
   81planning based on statistical information provided by the rdf_db module.
   82This  algorithm  reaches  optimal  performance  under  quite  reasonable
   83assumptions  while  the  planning  overhead   is  very  reasonable.  The
   84algorithm is described in "An  optimised   Semantic  Web  query language
   85implementation        in        Prolog",          available         from
   86http://www.swi-prolog.org/download/publications/ICLP05-SeRQL.pdf
   87
   88NOTES:
   89
   90	* SeRQL LIKE works on resources *and* literals.  Do we want this?
   91	  See http://rdf4j.org/forum/mvnforum/viewthread?thread=275
   92- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
   93
   94:- multifile
   95	user:goal_expansion/2.   96
   97user:goal_expansion(rdf_complexity(G0, C), rdf_complexity(G, C)) :-
   98	expand_goal(G0, G).
   99user:goal_expansion(rdf_optimise(G0, O), rdf_optimise(G, O)) :-
  100	expand_goal(G0, G).
  101user:goal_expansion(rdf_optimise(G0, O, C, E), rdf_optimise(G, O, C, E)) :-
  102	expand_goal(G0, G).
  103
  104
  105/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
  106Plan (conjunctions)
  107
  108	* Generate permutations and costs.  Select cheapest
  109	* The above moves tests right after the RDF call filling
  110	  its input argument.  As a last optimisation some of these
  111	  searches may be integrated in the rdf/3 call to help using
  112	  indexing of the RDF database.
  113
  114complexity/2 needs to update the order of clauses inside meta calls
  115(notably optional path expressions).
  116- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
  117
  118%%	rdf_optimise(:Goal) is nondet.
  119%
  120%	Optimise Goal and execute the result. Semantically equivalent to
  121%	call/1.
  122%
  123%	@tbd	Module handling is not correct.
  124
  125rdf_optimise(Module:Goal) :-
  126	rdf_optimise(Goal, Optimised),
  127	call(Module:Optimised).
  128
  129
  130%%	rdf_optimise(+Goal, -Optimized) is det.
  131%
  132%	Goal is a Prolog control-structure with   calls to the rdf_db.pl
  133%	and  SeRQL  runtime  predicates.  Optimized  is  a  semantically
  134%	equivalent goal, obtained by re-ordering   conjunctions  in Goal
  135%	and  processing  sub-queries  that  do    not   share  variables
  136%	independently.
  137
  138rdf_optimise(Conj, Optimised) :-
  139	rdf_optimise(Conj, Optimised, _, _).
  140
  141rdf_optimise(Conj, Optimised, Space, Time) :-
  142	debug(rdf_optimise, '*** OPTIMIZING ***~n', []),
  143	dbg_portray_body(Conj),
  144	term_variables(Conj, Vars),
  145	rdf_complexity(Conj, Conj, S0, E0),
  146	State = state(Vars-Conj, S0, E0, 1),
  147	debug(rdf_optimise, 'C0 = ~w~n', [E0]),
  148	(   reorder(Conj, Perm),
  149	    (	debugging(rdf_optimise(all))
  150	    ->	dbg_portray_body(Perm)
  151	    ;	true
  152	    ),
  153	    rdf_complexity(Perm, Perm1, S, C),
  154	    debug(rdf_optimise(all), '--> space=~w, time=~w~n', [S, C]),
  155
  156	    arg(4, State, N),
  157	    N2 is N + 1,
  158	    nb_setarg(4, State, N2),
  159
  160	    (	arg(3, State, C0),
  161		C < C0
  162	    ->	debug(rdf_optimise,
  163		      'BETTER ONE [~D]: --> space=~w, time=~w~n', [N, S, C]),
  164		dbg_portray_body(Perm1),
  165	        nb_setarg(3, State, C),
  166		nb_setarg(2, State, S),
  167		nb_setarg(1, State, Vars-Perm1)
  168	    ;	true
  169	    ),
  170	    fail
  171	;   arg(1, State, Vars-Optimised),
  172	    arg(2, State, Space),
  173	    arg(3, State, Time),
  174	    debug(rdf_optimise, '  --> optimised: s/t = ~w/~w --> ~w/~w~n',
  175		  [S0, E0, Space, Time]),
  176	    dbg_portray_body(Optimised)
  177	), !.
  178optimise_order(Conj, Conj, -1, -1) :-
  179	debug(rdf_optimise, 'Failed to optimise:~n', []),
  180	dbg_portray_body(Conj).
  181
  182
  183		 /*******************************
  184		 *	     REORDERING		*
  185		 *******************************/
  186
  187%%	reorder(Goal, Reordered)
  188%
  189%	Assuming Goal is  a  conjunction,   create  permutations  of its
  190%	order. Instead of blindly generating   permutations  however, we
  191%	get an arbitrary element of  the   conjunction  to the front and
  192%	compute subgraphs of goals connected   through variables _after_
  193%	executing the first goal.
  194
  195reorder(Goal, Reordered) :-
  196	State = bindings([]),
  197	conj_to_list(Goal, Conj0),
  198	reorder_conj(Conj0, State, Conj1),
  199	list_to_conj(Conj1, Reordered),
  200	arg(1, State, Bindings),
  201	unbind(Bindings).
  202
  203reorder_conj([One], _, [One]) :- !.
  204reorder_conj(List, State, Perm) :-
  205	group_by_cut(List, Before, Cut, After), !,
  206	reorder_conj(Before, State, PermBefore),
  207	bind_args(Before, State),		% this part is done
  208	reorder_conj(After, State, PermAfter),
  209	append(PermBefore, [Cut|PermAfter], Perm).
  210reorder_conj(List, State, Perm) :-
  211	group_by_optional(List, Normal, Optional), !,
  212	reorder_conj(Normal, State, PermNormal),
  213	bind_args(Normal, State),		% this part is done
  214	reorder_conj(Optional, State, PermOptional),
  215	append(PermNormal, PermOptional, Perm).
  216reorder_conj(List, State, [goal(_,independent(SubPerms),_)]) :-
  217	make_subgraphs(List, SubGraphs),
  218	SubGraphs \= [_], !,
  219	reorder_subgraph_conjs(SubGraphs, State, SubPerms).
  220reorder_conj(List, State, [Prefix|Perm]) :-
  221	select(Prefix, List, Rest),
  222	bind_args(Prefix, State),
  223	make_subgraphs(Rest, SubGraphs),
  224	(   SubGraphs = [SubGraph]
  225	->  reorder_conj2(SubGraph, State, Perm)
  226	;   Perm = [goal(_,independent(SubPerms),_)],
  227	    reorder_subgraph_conjs(SubGraphs, State, SubPerms)
  228	).
  229
  230
  231%%	reorder_subgraph_conjs(SubGraphs, -ReorderedSubGraphs)
  232%
  233%	Reorder  the  individual  subgraphs.  As    we  know  these  are
  234%	independent there is no need to  order the subgraphs themselves.
  235%	we also know they are  fully   connected,  and no longer contain
  236%	cuts, so we use the simplified  version of reorder_conj/2 called
  237%	reorder_conj2/2.
  238
  239reorder_subgraph_conjs([], _, []).
  240reorder_subgraph_conjs([H0|T0], State, [H|T]) :-
  241	reorder_conj2(H0, State, H1),
  242	list_to_conj(H1, H),
  243	reorder_subgraph_conjs(T0, State, T).
  244
  245reorder_conj2([One], _, [One]) :- !.
  246reorder_conj2(List, State, [Prefix|Perm]) :-
  247	select(Prefix, List, Rest),
  248	bind_args(Prefix, State),
  249	make_subgraphs(Rest, SubGraphs),
  250	(   SubGraphs = [SubGraph]
  251	->  reorder_conj2(SubGraph, State, Perm)
  252	;   Perm = [goal(_,independent(SubPerms),_)],
  253	    reorder_subgraph_conjs(SubGraphs, State, SubPerms)
  254	).
  255
  256%%	group_by_cut(+List, -BeforeCut, -Cut, -AfterCut)
  257%
  258%	Split the conjunction over a  cut   (!)  as  we cannot guarantee
  259%	proper behaviour when moving goals to the other side of a cut.
  260
  261group_by_cut(Goals, Before, Cut, After) :-
  262	Cut = goal(_, !, _),
  263	append(Before, [Cut|After], Goals), !.
  264
  265
  266%%	group_by_optional(+List, -NotOptional, -Optional)
  267%
  268%	Split the conjunction in optional and non-optional part as we
  269%	always want the optional part to happen last.
  270
  271group_by_optional(List, Normal, Optional) :-
  272	split_optional(List, Normal, Optional),
  273	Normal \== [],
  274	Optional \== [].
  275
  276split_optional([], [], []).
  277split_optional([H|T0], Normal, Optional) :-
  278	(   optional(H)
  279	->  Optional = [H|T],
  280	    split_optional(T0, Normal, T)
  281	;   Normal = [H|T],
  282	    split_optional(T0, T, Optional)
  283	).
  284
  285optional(G) :-
  286	goal(G, (_*->_;_)).
  287
  288%%	bind_args(Goal, !State)
  289%
  290%	Bind all arguments in  Goal  or   list  of  goals.  Assumes that
  291%	executing a goal grounds all its arguments.   Only  the goal A =
  292%	literal(B), generated by optimising where constraints is handled
  293%	special.
  294%
  295%	State is a term bindings(List)  that is destructively maintained
  296%	by instantiate/4.
  297
  298bind_args([], _) :- !.
  299bind_args([H|T], State) :- !,
  300	bind_args(H, State),
  301	bind_args(T, State).
  302bind_args(H, State) :-
  303	goal(H, A=literal(B)),
  304	var(A), var(B), !,
  305	(   instantiated(A, I),
  306	    I \== (-)
  307	->  instantiate(B, _, I, State)
  308	;   instantiated(B, I),
  309	    I \== (-)
  310	->  instantiate(A, _, I, State)
  311	;   true
  312	).
  313bind_args(Goal, State) :-
  314	vars(Goal, Vars),
  315	ground_vars(Vars, State).
  316
  317ground_vars([], _).
  318ground_vars([H|T], State) :-
  319	instantiate(H, _, r, State),
  320	ground_vars(T, State).
  321
  322
  323%%	make_subgraphs(+Goals, -SubGraphs)
  324%
  325%	Create a list of connected subgraphs   from  Goals, assuming the
  326%	variables in the assoc Grounded have been bound.
  327%
  328%	@param Goals is a list goal(Id, Goal, Vars).
  329
  330make_subgraphs([Goal], [[Goal]]) :- !.
  331make_subgraphs([G1,G2], Graphs) :- !,
  332	unbound_vars(G1, V1),
  333	unbound_vars(G2, V2),
  334	(   ord_intersect(V1, V2)
  335	->  Graphs = [[G1,G2]]
  336	;   Graphs = [[G1],[G2]]
  337	).
  338make_subgraphs(Goals, SubGraphs) :-
  339	map_list_to_pairs(unbound_vars, Goals, UnBoundKeyed),
  340	connected_pairs(UnBoundKeyed, Edges),
  341	vertices_edges_to_ugraph(Goals, Edges, UGraph),
  342	connected_vertices(UGraph, SubGraphs).
  343
  344connected_pairs([], []).
  345connected_pairs([H|T], Edges) :-
  346	connected_pairs(T, H, Edges, EdgeTail),
  347	connected_pairs(T, EdgeTail).
  348
  349connected_pairs([], _, Edges, Edges).
  350connected_pairs([H|T], To, Edges, EdgeTail) :-
  351	(   connected(H, To, GH, GT)
  352	->  Edges = [GH-GT,GT-GH|Edges1],
  353	    connected_pairs(T, To, Edges1, EdgeTail)
  354	;   connected_pairs(T, To, Edges, EdgeTail)
  355	).
  356
  357connected(V1-G1, V2-G2, G1, G2) :-
  358	ord_intersect(V1, V2).
  359
  360connected_vertices([], []) :- !.
  361connected_vertices(UGraph, [Set1|Sets]) :-
  362	UGraph = [V1-_|_],
  363	reachable(V1, UGraph, Set1),
  364	del_vertices(UGraph, Set1, UGraph2),
  365	connected_vertices(UGraph2, Sets).
  366
  367
  368%%	unbound_vars(+Goal, -Vars) is det.
  369%
  370%	True when Vars is an ordered set of unbound variables in Goal.
  371
  372unbound_vars(Goal, Vars) :-
  373	vars(Goal, AllVars),
  374	unbound(AllVars, Vars0),
  375	sort(Vars0, Vars).
  376
  377unbound([], []).
  378unbound([H|T0], [H|T]) :-
  379	instantiated(H, -), !,
  380	unbound(T0, T).
  381unbound([_|T0], T) :-
  382	unbound(T0, T).
  383
  384%%	conj_to_list(+Conj, -List)
  385%
  386%	Translate a conjunction into a list of elements of the format
  387%
  388%%		goal(Id, Goal, Vars)
  389%
  390%	Where Id is a goal identifier, Goal  is the goal itself and Vars
  391%	is a list of variables inside the  Goal. Variables are sorted to
  392%	the standard order of terms.
  393
  394conj_to_list(Conj, List) :-
  395	phrase(conj_to_list(Conj, 1, _), List).
  396
  397conj_to_list((A,B), N0, N) --> !,
  398	conj_to_list(A, N0, N1),
  399	conj_to_list(B, N1, N).
  400conj_to_list(true, N, N) --> !,
  401	[].
  402conj_to_list(G, N0, N) -->
  403	{ term_variables(G, Vars0),
  404	  sort(Vars0, Vars),
  405	  N is N0 + 1
  406	},
  407	[ goal(N0, G, Vars)
  408	].
  409
  410
  411%%	list_to_conj(+List, -Conj)
  412
  413
  414list_to_conj([], true).
  415list_to_conj([goal(_,G,_)], G) :- !.
  416list_to_conj([goal(_,G,_)|T0], (G,T)) :-
  417	list_to_conj(T0, T).
  418
  419
  420%%	id(G, Id) is det.
  421%%	goal(G, Goal) is det.
  422%%	vars(G, Vars) is det.
  423%
  424%	Extract fields from the goal structure.
  425
  426%id(goal(Id, _, _),     Id).
  427goal(goal(_, Goal, _), Goal).
  428vars(goal(_, _, Vars), Vars).
  429
  430
  431		 /*******************************
  432		 *	       BINDING		*
  433		 *******************************/
  434
  435/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
  436Keep track of binding status of a   variable.  There are basically three
  437ways. One is to use a seperate   (assoc) table. Alternatively we can use
  438attributed variables and delete the attributes   at the end, and finally
  439we can use normal variables and recreate  the original goal by unbinding
  440them again.
  441
  442Using assocs requires us to pass  these   things  around  and involves a
  443log(N) complexity lookup. Using real terms  has the disadvantage that to
  444unbind we have to copy the term, thus  loosing bindings it may have with
  445the environment.  Using attributes suffers neither of these problems and
  446its only drawback is relying on non-standard Prolog features.
  447
  448Note that the remainder of the  algorithm   uses  sets  organised to the
  449standard order of terms.  As  putting   attributes  does  not change the
  450identity of global stack variables and goals are global stack terms this
  451is guaranteed.
  452- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
  453
  454%%	instantiate_obj(+Arg, -Old, +New, !Bindings) is det.
  455%
  456%	Called to change the state of an   RDF object after running some
  457%	goal. Old is the current instantiation.  After executing the RDF
  458%	Goal the new instantiation is New (typically =b=). Bindings is a
  459%	term  bindings(List),  which  is    updated   using  destructive
  460%	assignment. List is the list of all  variables to which we added
  461%	attributes.
  462
  463instantiate_obj(Arg, Old, New, Bindings) :-
  464	(   var(Arg)
  465	;   ground(Arg)
  466	), !,
  467	instantiate(Arg, Old, New, Bindings).
  468instantiate_obj(literal(Pattern, Var), +(Pattern), New, Bindings) :-
  469	instantiate(Var, _, New, Bindings).
  470
  471instantiate(Var, Old, New, Bindings) :-
  472	instantiated(Var, Old),
  473	(   Old == (-)
  474	->  put_attr(Var, instantiated, New),
  475	    arg(1, Bindings, B0),
  476	    setarg(1, Bindings, [Var|B0])
  477	;   true
  478	).
  479
  480instantiated(Term, How) :-
  481	(   nonvar(Term)
  482	->  How = +(+)
  483	;   get_attr(Term, instantiated, H)
  484	->  How = +(H)
  485	;   How = -
  486	).
  487
  488uninstantiate(Term, How) :-
  489	(   get_attr(Term, instantiated, How)
  490	->  del_attr(Term, instantiated)
  491	;   true
  492	).
  493
  494%%	instantiate_unify(A, B, State) is det.
  495%
  496%	We encounter a plain unification. Ideally, we determine the most
  497%	specific binding and propagate this and execute the unification.
  498%	The latter however must be undone  if we evaluate a disjunction.
  499%	For now, the code is somewhat   simplified  and we merely decide
  500%	that if one  side  is  instantiated,   the  other  will  be too,
  501%	disregarding the actual value we might know.
  502
  503instantiate_unify(A, B, State) :-
  504	instantiated(B, +(_)), !,
  505	instantiate(A, _, b, State).
  506instantiate_unify(A, B, State) :-
  507	instantiated(A, +(_)), !,
  508	instantiate(B, _, b, State).
  509instantiate_unify(_, _, _).
  510
  511
  512%%	attr_unify_hook(+Attribute, +Value)
  513%
  514%	For now, the attribute unify hook   only  allows unifying with a
  515%	variable  with  the  same  attribute.    This   deals  with  the
  516%	unification that takes place in rdf_optimise/3 for the variables
  517%	of the saved copy.
  518
  519instantiated:attr_unify_hook(Attr, Value) :-
  520	get_attr(Value, instantiated, Attr).
  521
  522instantiated:attr_portray_hook(Value, _Var) :-
  523	write(+(Value)).
  524
  525%%	instantiate_term(+Term)
  526%
  527%	Instantiate all unbound variables
  528
  529instantiate_term(Term, How) :-
  530	compound(Term), !,
  531	functor(Term, _, Arity),
  532	instantiate_args(Arity, Term, How).
  533instantiate_term(Term, How) :-
  534	(   var(Term),
  535	    \+ get_attr(Term, instantiated, _)
  536	->  put_attr(Term, instantiated, How)
  537	;   true
  538	).
  539
  540instantiate_args(0, _, _) :- !.
  541instantiate_args(N, Term, How) :-
  542	arg(N, Term, A),
  543	instantiate_term(A, How),
  544	N2 is N - 1,
  545	instantiate_args(N2, Term, How).
  546
  547
  548%%	uninstantiate_term(+Term, +How)
  549%
  550%	Remove all skolem instantiations from Term.
  551
  552uninstantiate_term(Term, How) :-
  553	compound(Term), !,
  554	functor(Term, _, Arity),
  555	uninstantiate_args(Arity, Term, How).
  556uninstantiate_term(Term, How) :-
  557	uninstantiate(Term, How).
  558
  559uninstantiate_args(0, _, _) :- !.
  560uninstantiate_args(N, Term, How) :-
  561	arg(N, Term, A),
  562	uninstantiate_term(A, How),
  563	N2 is N - 1,
  564	uninstantiate_args(N2, Term, How).
  565
  566
  567%%	delete_instantiated(+List, -Unbound)
  568%
  569%	Delete all elements of List that are not instantiated (i.e. var
  570%	and without an instantiated attribute).
  571
  572delete_instantiated([], []).
  573delete_instantiated([H|T0], L) :-
  574	(   instantiated(H, -)
  575	->  L = [H|T],
  576	    delete_instantiated(T0, T)
  577	;   delete_instantiated(T0, L)
  578	).
  579
  580
  581		 /*******************************
  582		 *	    COMPLEXITY		*
  583		 *******************************/
  584
  585%%	rdf_complexity(+GoalIn, -GoalOut, -SpaceEstimate, -TimeEstimate)
  586%
  587%	Provide an estimate  for  the  size   of  the  search  space for
  588%	executing Goal. For this we  estimate   the  branching factor of
  589%	each subgoal in the conjunction. If   the  branching factors are
  590%	B0, B1, ... then the total complexity estimate is
  591%
  592%		E = 1 + B0 + B0*B1 + B0*B1*B2, ...
  593%
  594%	Non-RDF calls are supposed  to  be   boolean  tests  that can be
  595%	executed at the first opportunity all arguments are bound by RDF
  596%	calls. They have a probability of   failure, reducing the search
  597%	space. Using the above formula, any   number  lower than 1 moves
  598%	the test as far as possible to the head of the conjunction.
  599%
  600%	If GoalIn and GoalOut are the same   the  system will not try to
  601%	optimize local conjunctions.
  602%
  603%	ISSUES: control structures ;, if-then-else, etc.
  604
  605rdf_complexity(Goal, SpaceEstimate, TimeEstimate) :-
  606	rdf_complexity(Goal, Goal, SpaceEstimate, TimeEstimate).
  607
  608rdf_complexity(Goal0, Goal, Space, Time) :-
  609	State = bindings([]),
  610	complexity(Goal0, Goal, State, 1, Space, 0, Time),
  611	arg(1, State, Bindings),
  612	unbind(Bindings).
  613
  614unbind([]).
  615unbind([H|T]) :-
  616	del_attr(H, instantiated),
  617	unbind(T).
  618
  619%	complexity(:GoalIn, -GoalOut,
  620%		   +State,
  621%		   +SpaceIn, -SpaceOut,
  622%		   +CountIn, -CountOut)
  623%
  624%	Compute the complexity of Goal.  Vars   is  an assoc holding the
  625%	variables bound earlier in the conjunction. Space keeps the size
  626%	of the search space and Count is   the  cummulative count of the
  627%	costs for exploring the search space  espressed in the number of
  628%	nodes that will be visited.
  629%
  630%	The (G*->_=true;_=false) clause deals with   the  code generated
  631%	from optional graph specs as provided by SeRQL.
  632
  633complexity((A0,B0), (A,B), State, Sz0, Sz, C0, C) :- !,
  634	complexity(A0, A, State, Sz0, Sz1, C0, C1),
  635	complexity(B0, B, State, Sz1, Sz, C1, C).
  636complexity((G0*->True;False),
  637	   ( G*->True;False), State, Sz0, Sz, C0, C) :- !,
  638	(   var(G)
  639	->  optimise_order(G0, G, Sz1, C1),
  640	    Sz is Sz0 * Sz1,
  641	    C is C0+Sz0*C1
  642	;   complexity(G, G, State, Sz0, Sz, C0, C)
  643	).
  644complexity((If0->Then0;Else0),		% dubious
  645	   ( If->Then; Else), State, Sz0, Sz, C0, C) :- !,
  646	(   var(If)
  647	->  optimise_order(If0,   If,   Sz1, C1),
  648	    optimise_order(Then0, Then, Sz2, C2),
  649	    optimise_order(Else0, Else, Sz3, C3),
  650	    Sz is max(Sz0 * Sz1 * Sz2, Sz0 * Sz3),
  651	    C  is C0 + max(Sz0*C1+Sz0*Sz1*C2, Sz0*C1+Sz0*Sz1*C3)
  652	;   complexity(If,   If,   State, Sz0, Sz1, C0, C1),
  653	    complexity(Then, Then, State, Sz1, Sz2, C1, C2),
  654	    complexity(Else, Else, State, Sz0, Sz3, C0, C3),
  655	    Sz is max(Sz2, Sz3),
  656	    C  is max(C2, C3)
  657	).
  658complexity((A0;B0), (A;B), State, Sz0, Sz, C0, C) :- !,
  659	(   var(A)
  660	->  optimise_order(A0, A, _, _),	% First try the cheap one?
  661	    optimise_order(B0, B, _, _)
  662	;   A = A0,
  663	    B = B0
  664	),
  665	complexity(A, A, State, Sz0, SzA, C0, CA),
  666	complexity(B, B, State, Sz0, SzB, C0, CB),
  667	Sz is SzA + SzB,
  668	C is CA + CB.
  669complexity(sparql_group(G0), sparql_group(G), State, Sz0, Sz, C0, C) :- !,
  670	(   var(G)
  671	->  rdf_optimise(G0, G, Sz1, C1),
  672	    Sz is Sz0 * Sz1,
  673	    C is C0+Sz0*C1
  674	;   complexity(G, G, State, Sz0, Sz, C0, C)
  675	).
  676complexity(sparql_group(G0, OV, IV),
  677	   sparql_group(G, OV, IV),
  678	   State, Sz0, Sz, C0, C) :- !,
  679	(   var(G)
  680	->  rdf_optimise(G0, G, Sz1, C1),
  681	    Sz is Sz0 * Sz1,
  682	    C is C0+Sz0*C1
  683	;   complexity(G, G, State, Sz0, Sz, C0, C)
  684	).
  685complexity(rdfql_carthesian(Bags),
  686	   rdfql_carthesian(Bags), State, Sz0, Sz, C0, C) :- !,
  687	carth_complexity(Bags, State, Sz0, Sz, C0, 0, C).
  688complexity(independent(Goals0), Goal, State, Sz0, Sz, C0, C) :- !,
  689	independent_complexity(Goals0, Goal, State, Sz0, Sz, C0, 0, C).
  690complexity(Goal, Goal, State, Sz0, Sz, C0, C) :-
  691	Goal = member(Var, List), !,	% List is list of resources
  692	instantiate(Var, _, b, State),
  693	length(List, Branch),
  694	Sz is Sz0 * Branch,
  695	C is C0 + Sz0*0.2 + Sz*0.2.
  696complexity(Goal, Goal, State, Sz, Sz, C0, C) :-
  697	Goal = (A=B), !,
  698	instantiate_unify(A, B, State),
  699	C is C0 + 0.2.
  700complexity(Goal, Goal, State, Sz, Sz, C0, C) :-
  701	Goal = (Var=literal(V)), !,
  702	instantiated(V, +(_)),
  703	instantiate(Var, _, b, State),
  704	C is C0 + 0.2.
  705complexity(Goal, Goal, State, Sz0, Sz, C0, C) :-
  706	rdf_db_goal(Goal, S, P, O), !,
  707	instantiate(S, SI, b, State),
  708	instantiate(P, PI, b, State),
  709	instantiate_obj(O, OI, b, State),
  710	complexity0(SI, PI, OI, P, Goal, SetUp, PerAlt, Branch),
  711	Sz is Sz0 * Branch,
  712	C is C0 + Sz0*SetUp + Sz*PerAlt,
  713	debug(rdf_optimise(complexity), 'Complexity ~p: (~w) ~w --> ~w',
  714	      [i(SI,PI,OI), Goal, Branch, C]).
  715complexity(sparql_eval(E,V), sparql_eval(E,V), _, Sz0, Sz, C0, C) :- !,
  716	term_variables(E, Vars),
  717	all_bound(Vars),
  718	Sz is Sz0,			% probability of failure
  719	C is C0 + Sz*20.		% Sz * CostOfEval
  720complexity(sparql_true(E), sparql_true(E), _, Sz0, Sz, C0, C) :- !,
  721	term_variables(E, Vars),
  722	all_bound(Vars),
  723	Sz is Sz0 * 0.5,		% probability of failure
  724	C is C0 + Sz0*20.		% Sz * CostOfEval
  725complexity(G, G, _, Sz0, Sz, C0, C) :-	% non-rdf tests
  726	term_variables(G, Vars),
  727	all_bound(Vars),
  728	Sz is Sz0 * 0.5,		% probability of failure
  729	C is C0 + Sz0.			% Sz * CostOfTest
  730
  731all_bound([]).
  732all_bound([H|T]) :-
  733	instantiated(H, +(_)),
  734	all_bound(T).
  735
  736%%	carth_complexity(+Bags, +State,
  737%%			 +Size0, -Size,
  738%%			 +Time0, +TimeSum0, -TimeSum) is det.
  739%
  740%	Compute the time and space efficiency of the carthesian product.
  741%	the total cost in time is the sum  of the costs of all branches,
  742%	The search space at the end is still the same product.
  743
  744carth_complexity([], _, Sz, Sz, _, C, C).
  745carth_complexity([bag(_,G)|T], State,
  746		 Sz0, Sz,
  747		 C0, Csum0, Csumz) :-
  748	complexity(G, G, State, Sz0, Sz1, C0, C1),
  749	Csum1 is Csum0 + C1,
  750	carth_complexity(T, State, Sz1, Sz, C0, Csum1, Csumz).
  751
  752
  753%%	independent_complexity(+GoalsIn, -Goals, +State,
  754%%			       +Size0, -Size,
  755%%			       +Time0, +TimeSum0, -TimeSum) is det.
  756%
  757%	Compute the complexity of an independent conjunction.
  758%
  759%	@param Goals is a list of g(Goal, Branch, Cost), where Branch is
  760%	the number of solutions expected fromGoal and Cost is the
  761%	estimated CPU time.
  762
  763independent_complexity(GoalsIn, Goal, State,
  764		       Size0, Size,
  765		       Time0, TimeSum0, TimeSum) :-
  766	indep_complexity(GoalsIn, Goals1, State,
  767			 Size0, Size,
  768			 Time0, TimeSum0, TimeSum),
  769	keysort(Goals1, ByCost),
  770	pairs_values(ByCost, Goals2),
  771	simplify_carthesian(Goals2, Goal).
  772
  773indep_complexity([], [], _, Sz, Sz, _, C, C).
  774indep_complexity([G0|GT0], [SzG-bag(Vars,G,SzG,CG)|GT], State,
  775		       Sz0, Sz,
  776		       C0, Csum0, Csumz) :-
  777	complexity(G0, G, State, Sz0, Sz1, C0, C1),
  778	term_variables(G, VList),
  779	Vars =.. [v|VList],
  780	(   Sz0 =:= 0
  781	->  SzG = 0,
  782	    CG = 0
  783	;   SzG is Sz1/Sz0,
  784	    CG is (C1-C0)/Sz0
  785	),
  786	Csum1 is Csum0 + C1,
  787	indep_complexity(GT0, GT, State, Sz1, Sz, C0, Csum1, Csumz).
  788
  789%%	simplify_carthesian(+Bags, -Goal) is det.
  790%
  791%	Peer of the leading little branching   goals from the carthesian
  792%	handler. If there is a bag of one   left,  turn it into a normal
  793%	goal.
  794
  795simplify_carthesian([], true).
  796simplify_carthesian([bag(_,Goal,_Branch,_Cost)], Goal) :- !.
  797simplify_carthesian([bag(_,Goal,Branch,_Cost)|Bags], Final) :-
  798	(   Branch < 1.5
  799	->  !, Final = (Goal, Final1),
  800	    simplify_carthesian(Bags, Final1)
  801	;   Bags == []
  802	->  !, Final = Goal
  803	).
  804simplify_carthesian(Bags, rdfql_carthesian(Bags)).
  805
  806
  807%%	complexity0(+SI, +PI, +OI, +P, +G, -Setup, -PerAlt, -Branch).
  808%
  809%	SI,PI,OI describe the instantiation pattern.  P is the predicate
  810%	and G is the actual goal. Complexity   is unified to an estimate
  811%	of the size of the search space and therefore an estimate of the
  812%	execution time required to prove the goal.
  813%
  814%	Literal `like' matches come  out   as  +(like(Pattern)). We must
  815%	estimate the percentage of the literals that match this pattern.
  816%	Suppose the factor is 1,000. This means the branching is reduced
  817%	by 1,000, but finding each solution  is   slow  as it requires a
  818%	linear scan. It is faster than going  all the way back to Prolog
  819%	backtracking however, so we estimate a   factor  10 (TBD: verify
  820%	that number).
  821%
  822%	ISSUES: rdf_has/3 vs rdf_reachable/3.
  823
  824complexity0(+(_),+(_),+(_), _, _, 1, 0, 1) :- !.
  825complexity0(+(b),+(+),-, P, G, 1, 1, B) :- !,
  826	subj_branch_factor(G, B, Prop),
  827	rdf_predicate_property(P, Prop).
  828complexity0(-,+(+),+(b), P, G, 1, 1, B) :- !,
  829	obj_branch_factor(G, B, Prop),
  830	rdf_predicate_property(P, Prop).
  831complexity0(+(b), -, -, _, _, 1, 1, B) :- !,
  832	rdf_statistics(triples(Total)),
  833	subject_count(Subjs),
  834	(   Total == 0
  835	->  B = 0
  836	;   B is Total/Subjs
  837	).
  838complexity0(_,_,+(like(Pat)),_, G, Factor, Factor, B) :- !,
  839	rdf_estimate_complexity(G, B0),
  840	pattern_filter(Pat, Factor0),
  841	Factor is max(1, min(B0, Factor0)/10),
  842	B is B0/Factor.
  843complexity0(_,_,_, _, G, 1, 1, B) :-
  844	rdf_estimate_complexity(G, B).
  845
  846:- if(rdf_statistics(subjects(_))).  847subject_count(Count) :-				% RDF-DB 2.x
  848	rdf_statistics(subjects(Count)).
  849:- else.  850subject_count(Count) :-				% RDF-DB 3.x
  851	rdf_statistics(resources(Count)).
  852:- endif.  853
  854:- multifile
  855        subj_branch_factor/3,
  856        obj_branch_factor/3.  857
  858subj_branch_factor(rdf(_,_,_),           X, rdf_subject_branch_factor(X)).
  859subj_branch_factor(rdf_has(_,_,_),       X, rdfs_subject_branch_factor(X)).
  860subj_branch_factor(rdf_reachable(_,_,_), X, rdfs_subject_branch_factor(X)).
  861
  862obj_branch_factor(rdf(_,_,_),            X, rdf_object_branch_factor(X)).
  863obj_branch_factor(rdf_has(_,_,_),        X, rdfs_object_branch_factor(X)).
  864obj_branch_factor(rdf_reachable(_,_,_),  X, rdfs_object_branch_factor(X)).
  865
  866
  867%%	rdf_db_goal(+Goal, -Subject, -Predicate, -Object)
  868%
  869%	True if Goal is a pure (logical)   predicate on the RDF database
  870%	involving the given  Subject,  Predicate   and  Object.  Defined
  871%	multifile, allowing the  optimiser   to  understand user-defined
  872%	rdf/3 like predicates.
  873%
  874%	@tbd	Allow specifying different costs and branching factors
  875
  876:- multifile
  877	rdf_db_goal/4.  878
  879rdf_db_goal(rdf(S,P,O),			S,P,O).
  880rdf_db_goal(rdf_has(S,P,O),		S,P,O).
  881rdf_db_goal(rdf_reachable(S,P,O),	S,P,O).
  882rdf_db_goal(rdf(S,P,O, _DB),		S,P,O). % TBD: less hits
  883
  884%%	pattern_filter(+Like, -Factor)
  885%
  886%	Estimate the efficiency of a pattern. This is a bit hard, as
  887%	characters are not independent.
  888
  889pattern_filter(Like, Factor) :-
  890	atom_codes(Like, Codes),
  891	pattern_factor(Codes, 1, Factor).
  892
  893pattern_factor([], F, F).
  894pattern_factor([0'*|T], F0, F) :- !,
  895	pattern_factor(T, F0, F).
  896pattern_factor([_|T], F0, F) :-
  897	F1 is F0*10,
  898	pattern_factor(T, F1, F).
  899
  900
  901%%	rdf_estimate_complexity(+Goal, -Complexity)
  902%
  903%	Estimate the branching factor introduced by  Goal. This uses the
  904%	rdf_db  statistics  of  the  hash  chains  which  are  based  on
  905%	exploiting the RDFS subPropertyOf reasoning.
  906%
  907%	In addition, rdf_reachable/3 introduces its own complexity which
  908%	must be estimate using the branching factor of the relation.
  909
  910rdf_estimate_complexity(G, C) :-
  911	rdf_db_goal(G, S, P0, O),
  912	map_predicate(P0, P),
  913	rdf_estimate_complexity(S, P, O, C).
  914
  915map(map_predicate(_,_)).
  916map(map_predicate(_,_):-_).
  917
  918term_expansion(In, Out) :-
  919	map(In), !,
  920	rdf_global_term(In, Out).
  921
  922map_predicate(X, X) :-
  923	var(X), !.
  924map_predicate(serql:directSubClassOf,    rdfs:subClassOf) :- !.
  925map_predicate(serql:directType,          rdf:type) :- !.
  926map_predicate(serql:directSubPropertyOf, rdfs:subPropertyOf) :- !.
  927map_predicate(X, X).
  928
  929
  930		 /*******************************
  931		 *     INSTANTIATE OPTIONAL	*
  932		 *******************************/
  933
  934/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
  935In  SELECT  queries,  optional  parts  of   the  path  expression  leave
  936uninstantiated variables. These must be bound to  '$null$' to be able to
  937do correct merging for  DISTINCT.  The  naive   way  to  do  this  is to
  938instantiate all variables at the end  of   the  query.  On large selects
  939(i.e. involving many variables) this appears to be quite costly.  Doing
  940the job early, as in
  941
  942	(   Goal
  943	*-> true
  944	;   bind_null(VarsInGoal)
  945	)
  946
  947is not correct as well, as VarsInGoal may  be involved in other parts of
  948the code either before or after the optional path expression. So we need
  949to:
  950
  951	* Do abtract execution and find the bindings done before arriving
  952	  at Goal.
  953	* continue the execution, and watch for new bindings to these
  954	  variables.  If we find a binding for the second time, remove
  955	  it from the first and make a conditional binding for it.
  956
  957If we bind an argument unconditionally, we place an attribute 'b'. If it
  958is conditionally bound, we place an attribute   c(Set), where Set is the
  959set in which it was bound or plain  'c' if it was conditionally bound in
  960multiple places.
  961
  962TBD:	disjunctions and other control structures.
  963	queries that do not bind (such as SPARQL bound(X))
  964- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
  965
  966serql_select_bind_null(Goal0, Goal) :-
  967	State = bindings([]),
  968	select_bind_null(Goal0, Goal1, State),
  969	arg(1, State, Bindings),
  970	c_unbind(Bindings, Left),
  971	(   Left == []
  972	->  Goal = Goal1
  973	;   Goal = (Goal1, rdfql_cond_bind_null(Left))
  974	).
  975
  976c_unbind([], []).
  977c_unbind([H|T0], L) :-
  978	(   get_attr(H, instantiated, c)
  979	->  L = [H|T]
  980	;   L = T
  981	),
  982	del_attr(H, instantiated),
  983	c_unbind(T0, T).
  984
  985
  986select_bind_null((A0,B0), (A,B), State) :- !,
  987	select_bind_null(A0, A, State),
  988	select_bind_null(B0, B, State).
  989select_bind_null((G0 *-> true; true),
  990		 ( G *-> true; Bind),
  991		 State) :- !,
  992	arg(1, State, B0),
  993	select_bind_null(G0, G, State),
  994	arg(1, State, B),
  995	Bind = rdfql_bind_null(Vars),
  996	c_bindings(B, B0, c(Bind), Vars).
  997select_bind_null(rdfql_carthesian(List0),
  998		 rdfql_carthesian(List), State) :- !,
  999	select_carth_bind_null(List0, List, State).
 1000select_bind_null(sparql_group(A0), sparql_group(A), State) :- !,
 1001	select_bind_null(A0, A, State).
 1002select_bind_null(sparql_group(A0,Outer,Inner),
 1003		 sparql_group(A, Outer,Inner),
 1004		 State) :- !,
 1005	select_bind_null(A0, A, State),
 1006	term_variables(Outer, Vars),
 1007	c_bind(Vars, State).
 1008select_bind_null(Goal, Goal, State) :-
 1009	term_variables(Goal, Vars),
 1010	c_bind(Vars, State).
 1011
 1012%%	c_bindings(+AtEnd, +AtStart, +CVars, -Vars)
 1013%
 1014%	The  variables  of  the    difference-list   AtEnd..AtStart  are
 1015%	conditionally bound. Tag each such  variable with CVars.
 1016%
 1017%	@param CVars	Term c(Vars), where Vars are the other variables
 1018%			that have the same conditional binding.
 1019
 1020c_bindings(B, B0, _, Vars)  :-
 1021	B == B0, !,
 1022	Vars = [].
 1023c_bindings([H|T0], B0, Attr, [H|Vars]) :-
 1024	get_attr(H, instantiated, I),
 1025	is_instantiated(I), !,
 1026	put_attr(H, instantiated, Attr),
 1027	c_bindings(T0, B0, Attr, Vars).
 1028c_bindings([_|T0], B0, Attr, Vars) :-
 1029	c_bindings(T0, B0, Attr, Vars).
 1030
 1031
 1032is_instantiated(b).		% unconditionally bound
 1033is_instantiated(c(_)).		% bound either by call or rdfql_bind_null/1
 1034
 1035
 1036%%	c_bind(+Vars, +State)
 1037%
 1038%	Do  unconditional  binding  of   Vars.   Var    may   be   in  a
 1039%	rdfql_bind_null/1 set. In that case, delete it from the set, and
 1040%	set the class to 'c' to make a conditional binding at the end.
 1041
 1042c_bind([], _).
 1043c_bind([H|T], State) :-
 1044	(   get_attr(H, instantiated, I)
 1045	->  (   I == b			% already unconditionally bound
 1046	    ->	true
 1047	    ;	I = c(Set)
 1048	    ->	arg(1, Set, Vars0),
 1049		del_var(H, Vars0, Vars),
 1050		setarg(1, Set, Vars),
 1051		put_attr(H, instantiated, c)
 1052	    ;	I == c
 1053	    ->	true
 1054	    )
 1055	;   put_attr(H, instantiated, b),
 1056	    arg(1, State, B0),
 1057	    setarg(1, State, [H|B0])
 1058	),
 1059	c_bind(T, State).
 1060
 1061del_var(H, [X|T0], T) :-
 1062	(   H == X
 1063	->  T = T0
 1064	;   T = [X|T1],
 1065	    del_var(H, T0, T1)
 1066	).
 1067
 1068select_carth_bind_null([], [], _).
 1069select_carth_bind_null([bag(Vars, G0)|T0], [bag(Vars, G)|T], State) :-
 1070	select_bind_null(G0, G, State),
 1071	select_carth_bind_null(T0, T, State).
 1072
 1073
 1074		 /*******************************
 1075		 *	  DEBUG SUPPORT		*
 1076		 *******************************/
 1077
 1078dbg_portray_body(G) :-
 1079	debugging(rdf_optimise), !,
 1080	portray_body(G).
 1081dbg_portray_body(_).
 1082
 1083portray_body(G) :-
 1084	(   pp_instantiate_term(G),
 1085	    debug(_, '~@',
 1086		  [ portray_clause(current_output, (<> :- G), [module(sparql_runtime)])
 1087		  ]),
 1088	    fail
 1089	;   true
 1090	).
 1091
 1092pp_instantiate_term(Term) :-
 1093	compound(Term), !,
 1094	functor(Term, _, Arity),
 1095	pp_instantiate_args(Arity, Term).
 1096pp_instantiate_term(Term) :-
 1097	var(Term),
 1098	get_attr(Term, instantiated, H), !,
 1099	del_attr(Term, instantiated),
 1100	Term = +(H).
 1101pp_instantiate_term(_).
 1102
 1103pp_instantiate_args(0, _) :- !.
 1104pp_instantiate_args(N, Term) :-
 1105	arg(N, Term, A),
 1106	pp_instantiate_term(A),
 1107	N2 is N - 1,
 1108	pp_instantiate_args(N2, Term).
 1109
 1110
 1111		 /*******************************
 1112		 *	      SANDBOX		*
 1113		 *******************************/
 1114
 1115:- multifile sandbox:safe_meta_predicate/1. 1116
 1117sandbox:safe_meta_predicate(rdf_optimise:rdf_optimise/1)