/*  Part of ClioPatria SeRQL and SPARQL server

    Author:        Jan Wielemaker
    E-mail:        J.Wielemaker@cs.vu.nl
    WWW:           http://www.swi-prolog.org
    Copyright (C): 2007-2010, University of Amsterdam,
			      VU University Amsterdam

    This program is free software; you can redistribute it and/or
    modify it under the terms of the GNU General Public License
    as published by the Free Software Foundation; either version 2
    of the License, or (at your option) any later version.

    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.

    You should have received a copy of the GNU General Public
    License along with this library; if not, write to the Free Software
    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA

    As a special exception, if you link this library with other files,
    compiled with a Free Software compiler, to produce an executable, this
    library does not by itself cause the resulting executable to be covered
    by the GNU General Public License. This exception does not however
    invalidate any other reasons why the executable file might be covered by
    the GNU General Public License.
*/

:- module(rdf_abstract,
	  [ merge_sameas_graph/3,	% +GraphIn, -GraphOut, +Options
	    bagify_graph/4,		% +GraphIn, -GraphOut, -Bags, +Options
	    abstract_graph/3,		% +GraphIn, -GraphOut, +Options
	    minimise_graph/2,		% +GraphIn, -GraphOut

	    graph_resources/2,		% +Graph, -Resources
	    graph_resources/4		% +Graph, -Resources, -Predicates, -Types
	  ]).
:- use_module(library(semweb/rdf_db)).
:- use_module(library(semweb/rdfs)).
:- use_module(library(assoc)).
:- use_module(library(option)).
:- use_module(library(pairs)).
:- use_module(library(ordsets)).
:- use_module(library(debug)).
:- use_module(library(apply)).
:- use_module(library(lists)).
:- use_module(library(settings)).

/** <module> Abstract RDF graphs

The task of this module is to do some simple manipulations on RDF graphs
represented as lists of rdf(S,P,O).  Supported operations:

	* merge_sameas_graph(+GraphIn, -GraphOut, +Options)
	Merge nodes by owl:sameAs

	* bagify_graph(+GraphIn, -GraphOut, -Bags, +Options)
	Bagify a graph, returning a new graph holding bags of resources
	playing a similar role in the graph.

	* abstract_graph(+GraphIn, -GraphOut, +Options)
	Abstract nodes or edges using rdf:type, rdfs:subClassOf and/or
	rdfs:subPropertyOf
*/

%%	merge_sameas_graph(GraphIn, GraphOut, +Options) is det.
%
%	Collapse nodes in GraphIn that are   related through an identity
%	mapping.  By  default,  owl:sameAs  is  the  identity  relation.
%	Options defines:
%
%	    * predicate(-PredOrList)
%	    Use an alternate or list of predicates that are to be
%	    treated as identity relations.
%
%	    * sameas_mapped(-Assoc)
%	    Assoc from resources to the resource it was mapped to.

:- rdf_meta
	merge_sameas_graph(+, -, t).

merge_sameas_graph(GraphIn, GraphOut, Options) :-
	sameas_spec(Options, SameAs),
	sameas_map(GraphIn, SameAs, Assoc),		% R->EqSet
	(   empty_assoc(Assoc)
	->  GraphOut = GraphIn,
	    empty_assoc(EqMap)
	;   assoc_to_list(Assoc, List),
	    pairs_values(List, EqSets),
	    sort(EqSets, UniqueEqSets),
	    map_list_to_pairs(rdf_representative, UniqueEqSets, Keyed),	% Repr-EqSet
	    representer_map(Keyed, EqMap),
	    map_graph(GraphIn, EqMap, GraphOut),
	    (	debugging(abstract)
	    ->	length(GraphIn, Before),
		length(GraphOut, After),
		debug(abstract, 'owl:sameAs reduction: ~D --> ~D edges', [Before, After])
	    ;	true
	    )
	),
	option(sameas_mapped(EqMap), Options, _).

sameas_spec(Options, SameAs) :-
	rdf_equal(owl:sameAs, OwlSameAs),
	option(predicate(SameAs0), Options, OwlSameAs),
	(   is_list(SameAs0)
	->  SameAs = SameAs0
	;   SameAs = [SameAs0]
	).

%%	sameas_map(+Graph, +SameAs, -Map:assoc) is det.
%
%	Create an assoc with R->Set, where   Set contains an ordered set
%	of resources equivalent to R.

sameas_map(Graph, SameAs, Assoc) :-
	empty_assoc(Assoc0),
	sameas_map(Graph, SameAs, Assoc0, Assoc).

sameas_map([], _, Assoc, Assoc).
sameas_map([rdf(S, P, O)|T], SameAs, Assoc0, Assoc) :-
	same_as(P, SameAs),
	S \== O, !,
	(   get_assoc(S, Assoc0, SetS)
	->  (   get_assoc(O, Assoc0, SetO)
	    ->	ord_union(SetO, SetS, Set)
	    ;	ord_union([O], SetS, Set)
	    )
	;   (   get_assoc(O, Assoc0, SetO)
	    ->	ord_union([S], SetO, Set)
	    ;   sort([S,O], Set)
	    )
	),
	putall(Set, Assoc0, Set, Assoc1),
	sameas_map(T, SameAs, Assoc1, Assoc).
sameas_map([_|T], SameAs, Assoc0, Assoc) :-
	sameas_map(T, SameAs, Assoc0, Assoc).

putall([], Assoc, _, Assoc).
putall([H|T], Assoc0, Value, Assoc) :-
	put_assoc(H, Assoc0, Value, Assoc1),
	putall(T, Assoc1, Value, Assoc).


%%	same_as(+Predicate:resource, +SameAs:list) is semidet.
%
%	True if Predicate expresses a same-as mapping.

same_as(P, Super) :-
	member(S, Super),
	rdfs_subproperty_of(P, S), !.


%%	representer_map(+List:list(Repr-Set), -Assoc) is det.
%
%	Assoc maps all elements of Set to its representer.

representer_map(Keyed, EqMap) :-
	empty_assoc(Assoc0),
	representer_map(Keyed, Assoc0, EqMap).

representer_map([], Assoc, Assoc).
representer_map([R-Set|T], Assoc0, Assoc) :-
	putall(Set, Assoc0, R, Assoc1),
	representer_map(T, Assoc1, Assoc).


		 /*******************************
		 *	       BAGIFY		*
		 *******************************/

%%	bagify_graph(+GraphIn, -GraphOut, -Bags, +Options) is det.
%
%	If a graph contains multiple objects of the same type (class) in
%	the same location in the graph (i.e.   all  links are the same),
%	create a *bag*. The bag is   represented by a generated resource
%	of type rdf:Bag and the RDF for the   bags  is put in Bags. I.e.
%	appending GraphOut and Bags provides a proper RDF model. Options
%	provides additional abstraction properties.  In particular:
%
%	    * class(+Class)
%	    Try to bundle objects under Class rather than their
%	    rdf:type.  Multiple of these options may be defined
%
%	    * property(+Property)
%	    Consider predicates that are an rdfs:subPropertyOf
%	    Property the same relations.
%
%	    * bagify_literals(+Bool)
%	    If =true= (default), also try to put literals into a
%	    bag.  Works well to collapse non-preferred labels.
%
%	@tbd Handle the property option

:- rdf_meta
	bagify_graph(+, -, -, t).

bagify_graph(GraphIn, GraphOut, Bags, Options) :-
	canonise_options(Options, Options1),
	partition_options(class, Options1, ClassOptions, Options2),
	graph_node_edges(GraphIn, AssocNodesToEdges, Options2),
	assoc_to_list(AssocNodesToEdges, NodesToEdges),
	pairs_keys(NodesToEdges, Nodes),
	group_resources_by_class(Nodes, ByClass, ClassOptions),
	resource_bags(ByClass, NodesToEdges, RawBags),
	(   debugging(abstract)
	->  length(RawBags, Len),
	    maplist(length, RawBags, BagLens),
	    sumlist(BagLens, ObjCount),
	    debug(abstract, 'Created ~D bags holding ~D objects', [Len, ObjCount])
	;   true
	),
	assign_bagids(RawBags, IDBags),
	representer_map(IDBags, Assoc),
	map_graph(GraphIn, Assoc, GraphOut0),
	merge_properties(GraphOut0, GraphOut, Options2),
	make_rdf_graphs(IDBags, Bags).

partition_options(Name, Options, WithName, WithoutName) :-
	partition(option_name(Name), Options, WithName, WithoutName).

option_name(Name, Option) :-
	functor(Option, Name, 1).

%%	canonise_options(+OptionsIn, -OptionsOut) is det.
%
%	Rewrite option list from possible Name=Value to Name(Value)

canonise_options(In, Out) :-
	memberchk(_=_, In), !,		% speedup a bit if already ok.
	canonise_options2(In, Out).
canonise_options(Options, Options).

canonise_options2([], []).
canonise_options2([Name=Value|T0], [H|T]) :- !,
	H =.. [Name,Value],
	canonise_options2(T0, T).
canonise_options2([H|T0], [H|T]) :- !,
	canonise_options2(T0, T).


%%	group_resources_by_class(+Resources, -ByClass, +Options) is det.
%
%	ByClass is a list of lists of  resources that belong to the same
%	class. First step we process the classes specified in Options.

group_resources_by_class([], [], _) :- !.
group_resources_by_class(Resources, ByClass, Options) :-
	select_option(class(Class), Options, Options1), !,
	(   partition(has_class(sub_class, Class), Resources, InClass, NotInClass),
	    InClass \== []
	->  ByClass = [InClass|ByClass1],
	    group_resources_by_class(NotInClass, ByClass1, Options1)
	;   group_resources_by_class(Resources, ByClass, Options1)
	).
group_resources_by_class([H|T0], [[H|S]|T], Options) :-
	class_of(H, exact, Class),
	partition(has_class(exact, Class), T0, S, T1),
	group_resources_by_class(T1, T, Options).

%%	has_class(+Match, +Class, +Node) is semidet.

has_class(Match, Class, Node) :-
	class_of(Node, Match, Class).

%%	class_of(+Node, +Match, -Class) is det.
%%	class_of(+Node, +Match, +Class) is semidet.

class_of(Node, sub_class, Class) :- !,
	rdfs_individual_of(Node, Class), !.
class_of(literal(_), exact, Literal) :- !,
	rdf_equal(Literal, rdfs:'Literal').
class_of(R, exact, Class) :-
	rdf_has(R, rdf:type, Class), !.
class_of(_, exact, Class) :-
	rdf_equal(Class, rdfs:'Resource').


%%	resource_bags(+ByClass:list(list(resource)),
%%		      +NodeToEdges:list(node-list(edges)),
%%		      -RawBags:list(list(resource))) is det.
%
%	Find bags of resources that have the same connections.

resource_bags(ByClass, NodeToEdges, Bags) :-
	phrase(resource_bags(ByClass, NodeToEdges), Bags).

resource_bags([], _) -->
	[].
resource_bags([ByClassH|ByClassT], NodeToEdges) -->
	{ sort(ByClassH, SortedNodes),
	  ord_subkeys(SortedNodes, NodeToEdges, SubNodeToEdges),
	  same_edges(SubNodeToEdges, Bags)
	},
	Bags,
	resource_bags(ByClassT, NodeToEdges).

%%	ord_subkeys(+Keys, +Pairs, -SubPairs) is det.
%
%	SubPairs is the sublist of Pairs with a key in Keys.
%
%	@param Keys	Sorted list of keys
%	@param Pairs	Key-sorted pair-list
%	@param SubPairs	Key-sorted pair-list

ord_subkeys([], _, []).
ord_subkeys([K|KT], [P|PT], Pairs) :-
	P = PK-_,
	compare(Diff, K, PK),
	ord_subkeys(Diff, K, KT, P, PT, Pairs).

ord_subkeys(=, _, KT, P, PT, [P|Pairs]) :- !,
	ord_subkeys(KT, PT, Pairs).
ord_subkeys(<, _, [K|KT], P, PT, Pairs) :-
	P = PK-_,
	compare(Diff, K, PK),
	ord_subkeys(Diff, K, KT, P, PT, Pairs).
ord_subkeys(>, K, KT, _, [P|PT], Pairs) :-
	P = PK-_,
	compare(Diff, K, PK),
	ord_subkeys(Diff, K, KT, P, PT, Pairs).


%%	same_edges(+NodeToEdges:list(node-edges), -Bags:list(list)) is det.
%
%	Bags is a list of lists of resources (nodes) that share the same
%	(abstracted) edges with the rest of the graph.

same_edges(NodeToEdges, Bags) :-
	transpose_pairs(NodeToEdges, ByEdges),		% list(edges-node)
	keysort(ByEdges, Sorted),
	group_pairs_by_key(Sorted, Grouped),
	pairs_values(Grouped, AllBySameEdge),
	include(longer_than_one, AllBySameEdge, Bags).

longer_than_one([_,_|_]).

%%	graph_node_edges(+Graph, -NodeEdges:assoc, +Options) is det.
%
%	NodeEdges is an assoc from resource to a sorted list of involved
%	triples. Only subject and objects are considered.
%
%	Processes =bagify_literals= and =property= options

graph_node_edges(Graph, Assoc, Options) :-
	option(bagify_literals(LitToo), Options, true),
	property_map(Options, Map0),
	empty_assoc(Assoc0),
	graph_node_edges(Graph, LitToo, Map0, Assoc0, Assoc1),
	map_assoc(sort, Assoc1, Assoc).

graph_node_edges([], _, _, Assoc, Assoc).
graph_node_edges([rdf(S,P,O)|T], LitToo, Map, Assoc0, Assoc) :-
	abstract_property(P, Map, SP, Map1),
	add_assoc(S, Assoc0, rdf(-, SP, O), Assoc1),
	(   (atom(O) ; LitToo == true )
	->  add_assoc(O, Assoc1, rdf(S, SP, -), Assoc2)
	;   Assoc2 = Assoc1
	),
	graph_node_edges(T, LitToo, Map1, Assoc2, Assoc).

add_assoc(Key, Assoc0, Value, Assoc) :-
	get_assoc(Key, Assoc0, Old, Assoc, [Value|Old]), !.
add_assoc(Key, Assoc0, Value, Assoc) :-
	put_assoc(Key, Assoc0, [Value], Assoc).


%%	property_map(+Options, -Map:assoc(P-Super))
%
%	Process the options, creating a map that replaces a property by
%	its registered super.

property_map(Options, Map) :-
	empty_assoc(Map0),
	property_map(Options, Map0, Map).

property_map([], Map, Map).
property_map([property(P)|T], Map0, Map) :- !,
	(   rdfs_subproperty_of(P, Super),
	    get_assoc(Super, Map0, Root)
	->  put_assoc(P, Map0, Root, Map1)
	;   put_assoc(P, Map0, P, Map1)
	),
	property_map(T, Map1, Map).
property_map([_|T], Map0, Map) :-
	property_map(T, Map0, Map).

%%	abstract_property(+P0, +Map0, -P, -Map) is det.
%
%	Find the abstract property for some property P.

abstract_property(P0, Map0, P, Map) :-
	get_assoc(P0, Map0, P), !,
	Map = Map0.
abstract_property(P, Map0, Root, Map) :-
	rdfs_subproperty_of(P, Super),
	get_assoc(Super, Map0, Root), !,
	debug(abstract(property), 'Mapped ~p --> ~p', [P, Root]),
	put_assoc(P, Map0, Root, Map).
abstract_property(P, Map, P, Map).


%%	assign_bagids(+Bags:list(bag), -IDBags:list(id-bag)).
%
%	Assign bag identifiers to the each bag in Bags.

assign_bagids(Bags, IDBags) :-
	assign_bagids(Bags, 1, IDBags).

assign_bagids([], _, []).
assign_bagids([H|T0], I, [Id-H|T]) :-
	atom_concat('_:bag_', I, Id),
	I2 is I + 1,
	assign_bagids(T0, I2, T).


%%	make_rdf_graphs(+IDBags, -RDFBags) is det.
%
%	Translate BagID-Members into an RDF graph.

:- rdf_meta
	statement(r,r,o,?,?).			% statement//3

make_rdf_graphs(IDBags, RDFBags) :-
	phrase(make_rdf_graphs(IDBags), RDFBags).

make_rdf_graphs([]) -->
	[].
make_rdf_graphs([ID-Members|T]) -->
	statement(ID, rdf:type, rdf:'Bag'),
	bag_members(Members, 0, ID),
	make_rdf_graphs(T).

bag_members([], _, _) -->
	[].
bag_members([H|T], I, ID) -->
	{ I2 is I + 1,
	  atom_concat('_:', I, P)
	},
	statement(ID, P, H),
	bag_members(T, I2, ID).

statement(S, P, O) -->
	[ rdf(S, P, O) ].


		 /*******************************
		 *	 MERGE PROPERTIES	*
		 *******************************/

%%	merge_properties(+GraphIn, -GraphOut, +Options) is det.
%
%	Merge equivalent properties joining the same nodes.  They are
%	replaced by their common ancestors.
%
%	@param GraphIn	List of rdf(S,P,O)
%	@param GraphOut List of rdf(S,P,O)
%	@param Options  Option list (unused)

merge_properties([], [], _).
merge_properties([rdf(S,P,O)|GraphIn], GraphOut, Options) :-
	memberchk(rdf(S,_,O), GraphIn), !,
	partition(same_so(S,O), GraphIn, Same, Rest),
	maplist(pred, Same, Preds),
	sort([P|Preds], UniquePreds),
	common_ancestor_forest(sub_property_of, UniquePreds, Forest),
	pairs_keys(Forest, Roots),
	debug(abstract, 'Merged ~p --> ~p', [UniquePreds, Roots]),
	mk_p_triples(Roots, S, O, GraphOut, Out2),
	merge_properties(Rest, Out2, Options).
merge_properties([Triple|GraphIn], [Triple|GraphOut], Options) :-
	merge_properties(GraphIn, GraphOut, Options).

same_so(S, O, rdf(S, _, O)).
pred(rdf(_,P,_), P).

mk_p_triples([], _, _) --> [].
mk_p_triples([P|T], S, O) -->
	[rdf(S,P,O)],
	mk_p_triples(T, S, O).

sub_property_of(P, Super) :-
	rdf_has(P, rdfs:subPropertyOf, Super).


%%	common_ancestor_forest(:Pred, +Objects, -Forest) is det.
%
%	Forest is a minimal set  of   minimal  spanning  trees with real
%	branching (more than one child per   node) covering all Objects.
%	The partial ordering is defined   by  the non-deterministic goal
%	call(Pred, +Node, -Parent).
%
%		* Build up a graph represented as Node->Children and
%		a list of roots.  The initial list of roots is Objects.
%		The graph is built using breath-first search to minimize
%		depth.
%
%		* Once we have all roots, we delete all branches that
%		have only a single child.
%
%	@param	Forest	is a list of trees.  Each tree is represented
%		as Root-Children, where Children is a possibly
%		empty list if sub-trees.
%
%	@tbd	First prune dead-ends?
%
%		==
%		rdf_db:rdf_global_term([ulan:assisted_by, ulan:cousin_of], In),
%		gtrace,
%		rdf_abstract:common_ancestor_forest(sub_property_of, In, Out).
%		==

:- meta_predicate
	common_ancestor_forest(2, +, -).

common_ancestor_forest(Pred, Objects, Forest) :-
	strip_module(Pred, M, P),
	sort(Objects, Objects1),
	keys_to_assoc(Objects1, target*[], Nodes0),
	ancestor_tree(Objects1, M:P, Nodes0, Nodes, Roots),
	prune_forest(Nodes, Roots, Forest),
	debug(common_ancestor, 'Ancestors of ~p: ~p', [Objects1, Forest]).

%%	keys_to_assoc(+Keys:list, +Value, -Assoc) is det.
%
%	True if Assoc is an assoc where each Key maps to Value.

keys_to_assoc(Keys, Value, Assoc) :-
	empty_assoc(Assoc0),
	keys_to_assoc(Keys, Assoc0, Value, Assoc).

keys_to_assoc([], Assoc, _, Assoc).
keys_to_assoc([H|T], Assoc0, Value, Assoc) :-
	put_assoc(H, Assoc0, Value, Assoc1),
	keys_to_assoc(T, Assoc1, Value, Assoc).


ancestor_tree(Objects, Pred, Nodes0, Nodes, Roots) :-
	ancestor_tree(Objects, [], Objects, Pred, Nodes0, Nodes, Roots).

%%	ancestor_tree(+Open, +Closed, +Targets, :Pred, +NodesIn,
%%		      -NodesOut, -Roots) is det.
%
%	Explore the ancestor graph one more step. This is the main loop
%	looking for a spanning tree.  We are done if
%
%		* There is only one open node left and no closed ones.
%		We found the single common root.
%
%		* No open nodes are left.  We have a set of closed roots
%		which form our starting points.  We still have to figure
%		out the minimal set of these, as some of the trees may
%		overlap others.
%
%		* We have an open node covering all targets. This is the
%		lowest one as we used breath-first expansion.  This step
%		is too expensive.

ancestor_tree([One], [], _, _, Nodes, Nodes, [One]) :- !.
ancestor_tree([], Closed, _, _, Nodes, Nodes, Closed) :- !.
ancestor_tree(Open, _, Objects, _, Nodes, Nodes, [One]) :-
	member(One, Open),
	tree_covers(One, Nodes, Objects), !.
ancestor_tree(Open, Closed, Objects, Pred, Nodes0, Nodes, Roots) :-
	expand_ancestor_tree(Open, NewOpen, NewClosed, Closed, Nodes0, Nodes1, Pred),
	ancestor_tree(NewOpen, NewClosed, Objects, Pred, Nodes1, Nodes, Roots).


%%	expand_ancestor_tree(+Open0, -Open,
%%			     +Closed0, -Closed,
%%			     +Nodes0, -Nodes,
%%			     :Pred)
%
%	Expand the explored graph with one level. Open are the currently
%	open nodes. Closed  are  the  nodes   that  have  no  parent and
%	therefore are roots.
%
%	@param Nodes	is an assoc R->(State*list(Child))

expand_ancestor_tree([], [], Closed, Closed, Nodes, Nodes, _).
expand_ancestor_tree([H|T], Open, Closed0, Closed, Nodes0, Nodes, Pred) :-
	setof(Parent, call(Pred, H, Parent), Parents), !,
	add_parents(Parents, H, Open, OpenT, Nodes0, Nodes1),
	expand_ancestor_tree(T, OpenT, Closed0, Closed, Nodes1, Nodes, Pred).
expand_ancestor_tree([H|T], Open, [H|ClosedT], Closed, Nodes0, Nodes, Pred) :-
	expand_ancestor_tree(T, Open, ClosedT, Closed, Nodes0, Nodes, Pred).


%%	add_parents(+Parents:list, +Child, -NR, +NRT, +Nodes0, -Nodes)
%
%	Add links Parent->Child to the tree  Nodes0. The difference list
%	NR\NRT contains Parents added new to the tree.

add_parents([], _, NP, NP, Nodes, Nodes).
add_parents([H|T], Child, NP, NPT, Nodes0, Nodes) :-
	in_tree(Child, H, Nodes0), !,
	add_parents(T, Child, NP, NPT, Nodes0, Nodes).
add_parents([H|T], Child, NP, NPT, Nodes0, Nodes) :-
	get_assoc(H,
		  Nodes0, State*Children,
		  Nodes1, State*[Child|Children]), !,
	add_parents(T, Child, NP, NPT, Nodes1, Nodes).
add_parents([H|T], Child, [H|NP], NPT, Nodes0, Nodes) :-
	put_assoc(H, Nodes0, node*[Child], Nodes1),
	add_parents(T, Child, NP, NPT, Nodes1, Nodes).


%%	in_tree(?Node, +Root, +Nodes) is nondet.
%
%	True if Node appears in the tree below Root.

in_tree(Node, Node, _).
in_tree(Node, Root, Nodes) :-
	get_assoc(Root, Nodes, _State*Children),
	member(Child, Children),
	in_tree(Node, Child, Nodes).


%%	prune_forest(+Nodes, +Roots, -MinimalForest) is det.
%
%	MinimalForest is the minimal forest overlapping all targets.
%
%	@tbd Currently doesn't remove unnecessary trees.

prune_forest(Nodes, Roots, Forest) :-
	maplist(prune_root(Nodes), Roots, Roots1),
	sort(Roots1, Roots2),
	maplist(prune_ancestor_tree(Nodes), Roots2, Forest0),
	sort(Forest0, Forest).

%%	prune_root(+Nodes, +Root0, -Root) is det.
%
%	Prune the parts of the search tree   that  ended up nowhere. The
%	first real branch is where  we  find   a  solution  or there are
%	multiple parents. This avoids  doing   double  work  pruning the
%	trees itself.

prune_root(Nodes, Root0, Root) :-
	get_assoc(Root0, Nodes, node*[One]), !,
	prune_root(Nodes, One, Root).
prune_root(_, Root, Root).

%%	prune_ancestor_tree(Nodes, Root, Tree) is det.
%
%	Tree is a pruned hierarchy from Root using the branching paths of
%	Nodes.

prune_ancestor_tree(Nodes, Root, Tree) :-
	get_assoc(Root, Nodes, Value),
	(   Value = node*[One]
	->  prune_ancestor_tree(Nodes, One, Tree)
	;   Tree = (Root-Children),
	    Value = _*Children0,
	    maplist(prune_ancestor_tree(Nodes), Children0, Children)
	).

%%	tree_covers(+Root, +Nodes, -Targets:list) is det.
%
%	True if Targets is the sorted  list   of  targets covered by the
%	tree for which Root is the root.

tree_covers(Root, Nodes, Targets) :-
	phrase(tree_covers(Root, Nodes), Targets0),
	sort(Targets0, Targets).

tree_covers(Root, Nodes) -->
	{ get_assoc(Root, Nodes, State*Children) },
	(   {State == target}
	->  [Root]
	;   []
	),
	tree_covers_list(Children, Nodes).

tree_covers_list([], _) -->
	[].
tree_covers_list([H|T], Nodes) -->
	tree_covers(H, Nodes),
	tree_covers_list(T, Nodes).


		 /*******************************
		 *	    PRIMITIVES		*
		 *******************************/

%%	map_graph(+GraphIn, +Map:assoc, -GraphOut) is det.
%
%	Map a graph to a new graph  by   mapping  all  fields of the RDF
%	statements over Map. Then delete   duplicates from the resulting
%	graph as well as rdf(S,P,S) links that did not appear before the
%	mapping.
%
%	@tbd	Should we look inside literals for mapped types?  That
%		would be consistent with abstract_graph/3.

map_graph(GraphIn, Map, GraphOut) :-
	phrase(map_triples(GraphIn, Map), Graph2),
	sort(Graph2, GraphOut).

map_triples([], _) -->
	[].
map_triples([H0|T0], Map) -->
	map_triple(H0, Map),
	map_triples(T0, Map).

map_triple(rdf(S0,P0,O0), Map) -->
	{ map_resource(S0, Map, S),
	  map_resource(P0, Map, P),
	  map_object(O0, Map, O)
	},
	(   { S == O, S0 \== O0 }
	->  []
	;   [ rdf(S,P,O) ]
	).

map_resource(N0, Map, N) :-
	get_assoc(N0, Map, N), !.
map_resource(N, _, N).

map_object(O0, Map, O) :-
	get_assoc(O0, Map, O), !.
map_object(literal(type(T0, V)), Map, L) :-
	get_assoc(T0, Map, T), !,
	L = literal(type(T, V)).
map_object(O, _, O).


%%	map_graph(+GraphIn, +Map:assoc, -GraphOut, -AbstractMap) is det.
%
%	Map a graph to a new graph  by   mapping  all  fields of the RDF
%	statements over Map. The nodes in these  graphs are terms of the
%	form Abstract-list(concrete).
%
%	@param AbstractMap assoc Abstract -> ordset(concrete)

map_graph(GraphIn, Map, GraphOut, AbstractMap) :-
	map_graph(GraphIn, Map, GraphOut),
	assoc_to_list(Map, ConcAbstr),	% Concrete->Abstract
	graph_nodes(GraphIn, AllConcrete),
	pairs_keys_intersection(ConcAbstr, AllConcrete, UsedConcAbstr),
	transpose_pairs(UsedConcAbstr, AbstrConc),
	group_pairs_by_key(AbstrConc, Grouped),
	list_to_assoc(Grouped, AbstractMap).


%%	pairs_keys_intersection(+Pairs, +Keys, -PairsInKeys) is det.
%
%	True if PairsInKeys is a subset  of   Pairs  whose key appear in
%	Keys. Pairs must be key-sorted and Keys must be sorted.  E.g.
%
%	==
%	?- pairs_keys_intersection([a-1,b-2,c-3], [a,c], X).
%	X = [a-1,c-3]
%	==

pairs_keys_intersection(Pairs, [K], Int) :- !,	% One key: happens quite often
	find_one_key(Pairs, K, Int).
pairs_keys_intersection([P1|TP], [K1|TK], Int) :- !,
	compare_pair_key(Diff, P1, K1),
	pairs_keys_isect(Diff, P1, TP, K1, TK, Int).
pairs_keys_intersection(_, _, []).

pairs_keys_isect(<, _, [P1|TP], K1, TK, Int) :- !,
	compare_pair_key(Diff, P1, K1),
	pairs_keys_isect(Diff, P1, TP, K1, TK, Int).
pairs_keys_isect(=, P, [P1|TP], K1, TK, [P|Int]) :- !,
	compare_pair_key(Diff, P1, K1),
	pairs_keys_isect(Diff, P1, TP, K1, TK, Int).
pairs_keys_isect(>, P1, TP, _, [K1|TK], Int) :- !,
	compare_pair_key(Diff, P1, K1),
	pairs_keys_isect(Diff, P1, TP, K1, TK, Int).
pairs_keys_isect(=, P, _, _, _, [P]) :- !.
pairs_keys_isect(_, _, _, _, _, []).

compare_pair_key(Order, K1-_, K2) :- !,
	compare(Order, K1, K2).

find_one_key([], _, []).
find_one_key([K0-V|T0], K, List) :-
	(   K0 == K
	->  List = [k0-V|T],
	    find_one_key(T0, K, T)
	;   find_one_key(T0, K, List)
	).


%%	map_to_bagged_graph(+GraphIn, +Map, -GraphOut, -Bags) is det.
%
%	GraphOut is a graph between  objects   and  bags, using the most
%	specific common ancestor for representing properties.

map_to_bagged_graph(GraphIn, Map, GraphOut, Bags) :-
	map_graph(GraphIn, Map, AbstractGraph, AbstractMap),
%	assertion(map_assoc(is_ordset, AbstractMap)),
	empty_assoc(Nodes),
	rdf_to_paired_graph(GraphIn, PairGraph),
	phrase(bagify_triples(AbstractGraph, PairGraph, AbstractMap,
			      Nodes, Bags, []),
	       GraphOut).

bagify_triples([], _, _, _, Bags, Bags) --> [].
bagify_triples([rdf(S0,_P,O0)|T], PairGraph, Map, Nodes, Bags, BagsT) -->
	{ bagify_resource(S0, S, Map, Nodes, Nodes1, Bags, BagsT0),
	  bagify_object(O0, O, Map, Nodes1, Nodes2, BagsT0, BagsT1),

					% normal properties
	  used_properties(S0, O0, PairGraph, Map, PList),
	  common_ancestor_forest(sub_property_of, PList, Forest),
	  debug(used_properties, 'Forest = ~p', [Forest]),
	  pairs_keys(Forest, PRoots),
					% inverse properties
	  used_properties(O0, S0, PairGraph, Map, IPList),
	  common_ancestor_forest(sub_property_of, IPList, IForest),
	  debug(used_properties, 'IForest = ~p', [IForest]),
	  pairs_keys(IForest, IPRoots)
	},
	mk_p_triples(PRoots, S, O),
	mk_p_triples(IPRoots, O, S),
	bagify_triples(T, PairGraph, Map, Nodes2, BagsT1, BagsT).


bagify_resource(R0, R, _Map, Nodes, Nodes) -->
	{ get_assoc(R0, Nodes, R) }, !.
bagify_resource(R0, BagID, Map, Nodes0, Nodes) -->
	{ get_assoc(R0, Map, Set), Set = [_,_|_], !,
	  atom_concat('_:rbag_', R0, BagID),
	  put_assoc(R0, Nodes0, BagID, Nodes)
	},
	make_rdf_graphs([BagID-Set]).
bagify_resource(R0, One, Map, Nodes, Nodes) -->
	{ get_assoc(R0, Map, [One]) }, !.
bagify_resource(R, R, _, Nodes, Nodes) --> [].

bagify_object(R0, R, Map, Nodes0, Nodes) -->
	bagify_resource(R0, R, Map, Nodes0, Nodes).


%%	rdf_to_paired_graph(+GraphIn, -PairedGraph) is det.
%
%	@param GraphIn		Graph as list(rdf(S,P,O))
%	@param PairedGraph	Graph as list(S-list(O-P)), where the
%				pair lists are key-sorted,

rdf_to_paired_graph(Triples, Pairs) :-
	subject_pairs(Triples, Pairs0),
	keysort(Pairs0, Pairs1),
	group_pairs_by_key(Pairs1, Pairs2),
	maplist(keysort_values, Pairs2, Pairs).

subject_pairs([], []).
subject_pairs([rdf(S,P,O)|T0], [S-(O-P)|T]) :-
	subject_pairs(T0, T).

keysort_values(K-V0, K-V) :-
	keysort(V0, V).


%%	used_properties(+S0, +O0, +GraphIn, +AbstractMap, -PredList) is det.
%
%	Find properties actually used between two   bags.  S0 and O0 are
%	the subject and object from the   abstract graph.
%
%	@param GraphIn	original concrete graph represented as pairs.
%			See rdf_to_paired_graph/2.
%	@param AbstractMap Assoc Abstract->Concrete, where Concrete is
%			an ordset of resources.

used_properties(S0, O0, GraphIn, Map, PList) :-
	get_assoc(S0, Map, SList),
	get_assoc(O0, Map, OList),
	pairs_keys_intersection(GraphIn, SList, Intersection),
	pairs_values(Intersection, OPList0),
	append(OPList0, OPList1),
	keysort(OPList1, OPList),
	pairs_keys_intersection(OPList, OList, IntPList),
	pairs_values(IntPList, PListDupl),
	sort(PListDupl, PList),
	debug(used_properties, '  --> ~p', [PList]).


%%	graph_resources(+Graph, -Resources:list(atom)) is det.
%
%	Resources is a sorted list  of   unique  resources  appearing in
%	Graph. All resources are in Resources,   regardless  of the role
%	played in the graph: node, edge (predicate)  or type for a typed
%	literal.
%
%	@see graph_resources/4 distinguishes the role of the resources.

graph_resources(Graph, Resources) :-
	graph_resources(Graph, R, P, P, T, T, [], _, _),
	sort(R, Resources).

%%	graph_nodes(+Graph, -Nodes) is det.
%
%	Nodes is a sorted list of   all resources and literals appearing
%	in Graph.
%
%	@tbd	Better name

graph_nodes(Graph, Nodes) :-
	graph_resources(Graph, Nodes0, P, P, L, _, _, L, []),
	sort(Nodes0, Nodes).


%%	graph_resources(+Graph,
%%			-Resources:list(atom),
%%			-Predicates:list(atom),
%%			-Types:list(atom)) is det.
%
%	Resources is a sorted list  of   unique  resources  appearing in
%	Graph as subject or object of a  triple. Predicates is a list of
%	all unique predicates in Graph and Types is a list of all unique
%	literal types in Graph.

graph_resources(Graph, Resources, Preds, Types) :-
	graph_resources(Graph, R, [], P, [], T, [], _, _),
	sort(R, Resources),
	sort(P, Preds),
	sort(T, Types).

graph_resources([], R, R, P, P, T, T, L, L).
graph_resources([rdf(S,P,O)|T], [S|RT0], RT, [P|PTl0], PTl, Tl0, Tl, L0, L) :-
	object_resources(O, RT0, RT1, Tl0, Tl1, L0, L1),
	graph_resources(T, RT1, RT, PTl0, PTl, Tl1, Tl, L1, L).


object_resources(O, R0, R, T0, T, L0, L) :-
	(   atom(O)
	->  R0 = [O|R], T0 = T, L0 = L
	;   O = literal(Val)
	->  R0 = R, L0 = [O|L],
	    (	Val = type(Type, _)
	    ->	T0 = [Type|T]
	    ;	T0 = T
	    )
	;   assertion(fail)
	).


		 /*******************************
		 *	      ABSTRACT		*
		 *******************************/

%%	abstract_graph(+GraphIn, -GraphOut, +Options) is det.
%
%	Unify GraphOut with  an  abstracted   version  of  GraphIn.  The
%	abstraction is carried out triple-by-triple.   Note  there is no
%	need to abstract all triples to the   same  level. We do however
%	need to map nodes in the graph consistently. I.e. if we abstract
%	the object of rdf(s,p,o), we must abstract the subject of rdf(o,
%	p2, o2) to the same resource.
%
%	If we want to do incremental growing   we  must keep track which
%	nodes where mapped to which resources.  Option?
%
%	We must also decide on the abstraction   level  for a node. This
%	can be based on the weight  in   the  search graph, the involved
%	properties and focus such  as  location   and  time.  Should  we
%	express this focus in the weight?
%
%	Options:
%
%	    * map_in(?Map)
%	    If present, this is the initial resource abstraction map.
%	    * map_out(-Map)
%	    Provide access to the final resource abstraction map.
%	    * bags(-Bags)
%	    If provided, _bagify_ the graph, returning the triples that
%	    define the bags in Bags. The full graph is created by
%	    appending Bags to GraphOut.
%	    * merge_concepts_with_super(+Boolean)
%	    If =true= (default), merge nodes of one is a super-concept
%	    of another.

abstract_graph(GraphIn, GraphOut, Options) :-
	map_in(Options, MapIn),
	graph_resources(GraphIn, Nodes, NT, Edges, [], _T0, _TT, NT, []),
	node_map(Nodes, MapIn, Map2, Options),
	edge_map(Edges, Map2, MapOut),
	map_out(Options, MapOut),
	(   option(bags(Bags), Options)
	->  map_to_bagged_graph(GraphIn, MapOut, GraphOut, Bags)
	;   map_graph(GraphIn, MapOut, GraphOut)
	).

map_in(Options, Map) :-
	option(map_in(Map), Options, Map),
	(var(Map) -> empty_assoc(Map) ; true).

map_out(Options, Map) :-
	option(map_out(Map), Options, _).

%%	node_map(+Nodes, +Map0, -Map, +Options) is det.
%
%	Create the abstraction map  for  the   nodes  of  the  graph. It
%	consists of two steps:
%
%	    1. Map all instances to their class, except for concepts
%	    2. If some instances are mapped to class A and others to
%	       class B, where A is a super-class of B, map all instances
%	       to class A.

node_map(Nodes, Map0, Map, Options) :-
	concepts_of(Nodes, Map0, Map1, _NewConcepts),
	(   option(merge_concepts_with_super(true), Options, true)
	->  assoc_to_values(Map1, Concepts),
	    sort(Concepts, Unique),
	    identity_map(Unique, SuperMap0),
	    find_broaders(Unique, SuperMap0, SuperMap1),
	    deref_map(SuperMap1, SuperMap),
	    map_assoc(map_over(SuperMap), Map1, Map)
	;   Map = Map1
	).

map_over(Map, V0, V) :-
	(   get_assoc(V0, Map, V1)
	->  V = V1
	;   V = V0
	).

concepts_of([], Map, Map, []).
concepts_of([R|T], Map0, Map, New) :-
	get_assoc(R, Map0, _), !,
	concepts_of(T, Map0, Map, New).
concepts_of([R|T], Map0, Map, [C|New]) :-
	concept_of(R, C),
	put_assoc(R, Map0, C, Map1),
	concepts_of(T, Map1, Map, New).

%%	identity_map(+List, -Map) is det.
%%	find_broaders(+List, +Map0, -Map) is det.
%%	deref_map(+Map0, -Map) is det.

identity_map(List, Map) :-
	map_list_to_pairs(=, List, Pairs),
	list_to_assoc(Pairs, Map).

find_broaders([], Map, Map).
find_broaders([C|T], Map0, Map) :-
	broader(C, Super),
	get_assoc(Super, Map0, SuperSuper), !,
	debug(rdf_abstract, 'Mapped ~p to super concept ~p', [C, SuperSuper]),
	put_assoc(C, Map0, SuperSuper, Map1),
	find_broaders(T, Map1, Map).
find_broaders([_|T], Map0, Map) :-
	find_broaders(T, Map0, Map).


deref_map(Map0, Map) :-
	findall(KV, mapped_kv(KV, Map0), Pairs),
	deref(Pairs, NewPairs),
	list_to_assoc(NewPairs, Map).

mapped_kv(K-V, Assoc) :-
	gen_assoc(K, Assoc, V),
	K \== V.

%%	deref(+Pairs0, NewPairs) is det.
%
%	Dereference chains V1-V2, V2-V3  into   V1-V3,  V2-V3. Note that
%	Pairs0 may contain cycles, in which case  all the members of the
%	cycle  are  replaced  by  the    representative  as  defined  by
%	rdf_representative/2.

deref(Pairs, NewPairs) :-
	list_to_assoc(Pairs, Assoc),
	deref(Pairs, Assoc, NewPairs).

deref([], _, []).
deref([K-V0|T0], Map, [K-V|T]) :-
	deref2(V0, Map, [V0], EqSet, V),
	(   EqSet == []
	->  deref(T0, Map, T)
	;   rdf_representative(EqSet, V),
	    deref_cycle(T0, EqSet, V, Cycle, T1),
	    append(Cycle, T2, T),
	    deref(T1, Map, T2)
	).

deref2(V0, Map, Visited, EqSet, V) :-
	get_assoc(V0, Map, V1), !,
	(   memberchk(V1, Visited)
	->  EqSet = Visited
	;   deref2(V1, Map, [V1|Visited], EqSet, V)
	).
deref2(V, _, _, [], V).

deref_cycle([], _, _, [], []).
deref_cycle([K-V0|T0], EqSet, V, [K-V|CT], Rest) :-
	memberchk(V0, EqSet), !,
	deref_cycle(T0, EqSet, V, CT, Rest).
deref_cycle([H|T0], EqSet, V, CT, [H|RT]) :-
	deref_cycle(T0, EqSet, V, CT, RT).


%%	edge_map(+Edges, +MapIn, -MapOut) is det.

edge_map([], Map, Map).
edge_map([R|T], Map0, Map) :-
	get_assoc(R, Map0, _), !,
	edge_map(T, Map0, Map).
%edge_map([R|T], Map0, Map) :-
%	iface_abstract_predicate(R, C),
%	put_assoc(R, Map0, C, Map1),
%	edge_map(T, Map1, Map).

%%	concept_of(+Resource, -Concept) is det.
%
%	True if Concept is the concept Resource belongs to.  If Resource
%	is a concept itself, Concept is Resource.
%
%	@tbd	Make thesaurus concept classes a subclass of skos:Class.
%	@tbd	Put in a reusable place, merge with kwd_search.pl

concept_of(O, O) :-
	rdfs_individual_of(O, skos:'Concept'), !.
concept_of(O, C) :-
	rdf_has(O, rdf:type, C), !.
concept_of(O, O).

%%	broader(+Term, -Broader) is nondet.
%
%	True if Broader is a broader term according to the SKOS schema.
%
%	@tbd Deal with owl:sameAs (and skos:exactMatch)

broader(Term, Broader) :-
	rdf_reachable(Term, skos:broader, Broader),
	Broader \== Term.

%%	rdf_representative(+Resources:list, -Representative:atom) is det.
%
%	Representative is the most popular   resource from the non-empty
%	list  Resources.  The  preferred   representative  is  currently
%	defined as the resource with the   highest  number of associated
%	edges.
%
%	@tbd	Think about the function.  Use sum of logs or sum of sqrt?

rdf_representative(List, Representative) :-
	(   exclude(rdf_is_bnode, List, NonBNodes),
	    NonBNodes \== []
	->  representative(NonBNodes, Representative)
	;   representative(List, Representative)
	).

representative([H], Representative) :- !,
	Representative = H.
representative([H|T], Representative) :-
	fan_in_out(H, Fan0),
	best(T, Fan0, H, Representative).

best([], _, R, R).
best([H|T], S0, R0, R) :-
	fan_in_out(H, S1),
	(   S1 > S0
	->  best(T, S1, H, R)
	;   best(T, S0, R0, R)
	).

fan_in_out(R, Fan) :-
	count(rdf(R, _, _), 100, FanOut),
	count(rdf(_, _, R), 100, FanIn),
	Fan is FanOut + FanIn.


		 /*******************************
		 *	      SIMPLIFY		*
		 *******************************/

%%	minimise_graph(+GraphIn, -GraphOut) is det.
%
%	Remove redudant triples from  a   graph.  Redundant  triples are
%	defined as:
%
%		* Super-properties of another property
%		* Inverse
%		* Symetric
%		* Entailed transitive
%
%	@tbd Implement entailed transitive

minimise_graph(RDF0, RDF) :-
	partition(object_triple, RDF0, ObjRDF, LitRDF),
	map_list_to_pairs(os_rdf, ObjRDF, Pairs),
	group_pairs_by_key(Pairs, Grouped),
	maplist(key_remove_reduntant_relations, Grouped, MinGroups),
	append([LitRDF|MinGroups], RDF).

object_triple(rdf(_,_,O)) :-
	atom(O).

os_rdf(rdf(S,_,O), (A+B)) :-
	(   S @< O
	->  A = S, B = O
	;   A = O, B = S
	).

key_remove_reduntant_relations(_-Rs0, Rs) :-
	remove_reduntant_relations(Rs0, Rs).

remove_reduntant_relations([R], [R]) :- !.
remove_reduntant_relations(List0, List) :-
	select(rdf(S,P1,O), List0, List1),
	select(rdf(S,P2,O), List1, List2),
	rdfs_subproperty_of(P1, P2), !,
	remove_reduntant_relations([rdf(S,P1,O)|List2], List).
remove_reduntant_relations(List0, List) :-
	select(rdf(S,P,O), List0, List1),
	select(rdf(O,P,S), List1, List2),
	rdfs_individual_of(P, owl:'SymmetricProperty'), !,
	remove_reduntant_relations([rdf(S,P,O)|List2], List).
remove_reduntant_relations(List0, List) :-
	select(rdf(S,P1,O), List0, List1),
	select(rdf(O,P2,S), List1, List2),
	rdf_has(P1, owl:inverseOf, P2), !,
	remove_reduntant_relations([rdf(S,P2,O)|List2], List).
remove_reduntant_relations(List, List).


		 /*******************************
		 *		UTIL		*
		 *******************************/

:- meta_predicate
	count(:, +, -).

count(G, Max, Count) :-
        C = c(0),
        (   G,
            arg(1, C, C0),
            C1 is C0+1,
            nb_setarg(1, C, C1),
            C1 == Max
        ->  Count = Max
        ;   arg(1, C, Count)
        ).