View source with raw comments or as raw
    1/*  Part of ClioPatria SeRQL and SPARQL server
    2
    3    Author:        Jan Wielemaker
    4    E-mail:        J.Wielemaker@cs.vu.nl
    5    WWW:           http://www.swi-prolog.org
    6    Copyright (C): 2007-2010, University of Amsterdam,
    7			      VU University Amsterdam
    8
    9    This program is free software; you can redistribute it and/or
   10    modify it under the terms of the GNU General Public License
   11    as published by the Free Software Foundation; either version 2
   12    of the License, or (at your option) any later version.
   13
   14    This program is distributed in the hope that it will be useful,
   15    but WITHOUT ANY WARRANTY; without even the implied warranty of
   16    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   17    GNU General Public License for more details.
   18
   19    You should have received a copy of the GNU General Public
   20    License along with this library; if not, write to the Free Software
   21    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
   22
   23    As a special exception, if you link this library with other files,
   24    compiled with a Free Software compiler, to produce an executable, this
   25    library does not by itself cause the resulting executable to be covered
   26    by the GNU General Public License. This exception does not however
   27    invalidate any other reasons why the executable file might be covered by
   28    the GNU General Public License.
   29*/
   30
   31:- module(rdf_abstract,
   32	  [ merge_sameas_graph/3,	% +GraphIn, -GraphOut, +Options
   33	    bagify_graph/4,		% +GraphIn, -GraphOut, -Bags, +Options
   34	    abstract_graph/3,		% +GraphIn, -GraphOut, +Options
   35	    minimise_graph/2,		% +GraphIn, -GraphOut
   36
   37	    graph_resources/2,		% +Graph, -Resources
   38	    graph_resources/4		% +Graph, -Resources, -Predicates, -Types
   39	  ]).   40:- use_module(library(semweb/rdf_db)).   41:- use_module(library(semweb/rdfs)).   42:- use_module(library(assoc)).   43:- use_module(library(option)).   44:- use_module(library(pairs)).   45:- use_module(library(ordsets)).   46:- use_module(library(debug)).   47:- use_module(library(apply)).   48:- use_module(library(lists)).   49:- use_module(library(settings)).

Abstract RDF graphs

The task of this module is to do some simple manipulations on RDF graphs represented as lists of rdf(S,P,O). Supported operations:

merge_sameas_graph(+GraphIn, -GraphOut, +Options)
Merge nodes by owl:sameAs
bagify_graph(+GraphIn, -GraphOut, -Bags, +Options)
Bagify a graph, returning a new graph holding bags of resources playing a similar role in the graph.
abstract_graph(+GraphIn, -GraphOut, +Options)
Abstract nodes or edges using rdf:type, rdfs:subClassOf and/or rdfs:subPropertyOf */
 merge_sameas_graph(GraphIn, GraphOut, +Options) is det
Collapse nodes in GraphIn that are related through an identity mapping. By default, owl:sameAs is the identity relation. Options defines:
predicate(-PredOrList)
Use an alternate or list of predicates that are to be treated as identity relations.
sameas_mapped(-Assoc)
Assoc from resources to the resource it was mapped to.
   81:- rdf_meta
   82	merge_sameas_graph(+, -, t).   83
   84merge_sameas_graph(GraphIn, GraphOut, Options) :-
   85	sameas_spec(Options, SameAs),
   86	sameas_map(GraphIn, SameAs, Assoc),		% R->EqSet
   87	(   empty_assoc(Assoc)
   88	->  GraphOut = GraphIn,
   89	    empty_assoc(EqMap)
   90	;   assoc_to_list(Assoc, List),
   91	    pairs_values(List, EqSets),
   92	    sort(EqSets, UniqueEqSets),
   93	    map_list_to_pairs(rdf_representative, UniqueEqSets, Keyed),	% Repr-EqSet
   94	    representer_map(Keyed, EqMap),
   95	    map_graph(GraphIn, EqMap, GraphOut),
   96	    (	debugging(abstract)
   97	    ->	length(GraphIn, Before),
   98		length(GraphOut, After),
   99		debug(abstract, 'owl:sameAs reduction: ~D --> ~D edges', [Before, After])
  100	    ;	true
  101	    )
  102	),
  103	option(sameas_mapped(EqMap), Options, _).
  104
  105sameas_spec(Options, SameAs) :-
  106	rdf_equal(owl:sameAs, OwlSameAs),
  107	option(predicate(SameAs0), Options, OwlSameAs),
  108	(   is_list(SameAs0)
  109	->  SameAs = SameAs0
  110	;   SameAs = [SameAs0]
  111	).
 sameas_map(+Graph, +SameAs, -Map:assoc) is det
Create an assoc with R->Set, where Set contains an ordered set of resources equivalent to R.
  118sameas_map(Graph, SameAs, Assoc) :-
  119	empty_assoc(Assoc0),
  120	sameas_map(Graph, SameAs, Assoc0, Assoc).
  121
  122sameas_map([], _, Assoc, Assoc).
  123sameas_map([rdf(S, P, O)|T], SameAs, Assoc0, Assoc) :-
  124	same_as(P, SameAs),
  125	S \== O, !,
  126	(   get_assoc(S, Assoc0, SetS)
  127	->  (   get_assoc(O, Assoc0, SetO)
  128	    ->	ord_union(SetO, SetS, Set)
  129	    ;	ord_union([O], SetS, Set)
  130	    )
  131	;   (   get_assoc(O, Assoc0, SetO)
  132	    ->	ord_union([S], SetO, Set)
  133	    ;   sort([S,O], Set)
  134	    )
  135	),
  136	putall(Set, Assoc0, Set, Assoc1),
  137	sameas_map(T, SameAs, Assoc1, Assoc).
  138sameas_map([_|T], SameAs, Assoc0, Assoc) :-
  139	sameas_map(T, SameAs, Assoc0, Assoc).
  140
  141putall([], Assoc, _, Assoc).
  142putall([H|T], Assoc0, Value, Assoc) :-
  143	put_assoc(H, Assoc0, Value, Assoc1),
  144	putall(T, Assoc1, Value, Assoc).
 same_as(+Predicate:resource, +SameAs:list) is semidet
True if Predicate expresses a same-as mapping.
  151same_as(P, Super) :-
  152	member(S, Super),
  153	rdfs_subproperty_of(P, S), !.
 representer_map(+List:list(Repr-Set), -Assoc) is det
Assoc maps all elements of Set to its representer.
  160representer_map(Keyed, EqMap) :-
  161	empty_assoc(Assoc0),
  162	representer_map(Keyed, Assoc0, EqMap).
  163
  164representer_map([], Assoc, Assoc).
  165representer_map([R-Set|T], Assoc0, Assoc) :-
  166	putall(Set, Assoc0, R, Assoc1),
  167	representer_map(T, Assoc1, Assoc).
  168
  169
  170		 /*******************************
  171		 *	       BAGIFY		*
  172		 *******************************/
 bagify_graph(+GraphIn, -GraphOut, -Bags, +Options) is det
If a graph contains multiple objects of the same type (class) in the same location in the graph (i.e. all links are the same), create a bag. The bag is represented by a generated resource of type rdf:Bag and the RDF for the bags is put in Bags. I.e. appending GraphOut and Bags provides a proper RDF model. Options provides additional abstraction properties. In particular:
class(+Class)
Try to bundle objects under Class rather than their rdf:type. Multiple of these options may be defined
property(+Property)
Consider predicates that are an rdfs:subPropertyOf Property the same relations.
bagify_literals(+Bool)
If true (default), also try to put literals into a bag. Works well to collapse non-preferred labels.
To be done
- Handle the property option
  197:- rdf_meta
  198	bagify_graph(+, -, -, t).  199
  200bagify_graph(GraphIn, GraphOut, Bags, Options) :-
  201	canonise_options(Options, Options1),
  202	partition_options(class, Options1, ClassOptions, Options2),
  203	graph_node_edges(GraphIn, AssocNodesToEdges, Options2),
  204	assoc_to_list(AssocNodesToEdges, NodesToEdges),
  205	pairs_keys(NodesToEdges, Nodes),
  206	group_resources_by_class(Nodes, ByClass, ClassOptions),
  207	resource_bags(ByClass, NodesToEdges, RawBags),
  208	(   debugging(abstract)
  209	->  length(RawBags, Len),
  210	    maplist(length, RawBags, BagLens),
  211	    sumlist(BagLens, ObjCount),
  212	    debug(abstract, 'Created ~D bags holding ~D objects', [Len, ObjCount])
  213	;   true
  214	),
  215	assign_bagids(RawBags, IDBags),
  216	representer_map(IDBags, Assoc),
  217	map_graph(GraphIn, Assoc, GraphOut0),
  218	merge_properties(GraphOut0, GraphOut, Options2),
  219	make_rdf_graphs(IDBags, Bags).
  220
  221partition_options(Name, Options, WithName, WithoutName) :-
  222	partition(option_name(Name), Options, WithName, WithoutName).
  223
  224option_name(Name, Option) :-
  225	functor(Option, Name, 1).
 canonise_options(+OptionsIn, -OptionsOut) is det
Rewrite option list from possible Name=Value to Name(Value)
  231canonise_options(In, Out) :-
  232	memberchk(_=_, In), !,		% speedup a bit if already ok.
  233	canonise_options2(In, Out).
  234canonise_options(Options, Options).
  235
  236canonise_options2([], []).
  237canonise_options2([Name=Value|T0], [H|T]) :- !,
  238	H =.. [Name,Value],
  239	canonise_options2(T0, T).
  240canonise_options2([H|T0], [H|T]) :- !,
  241	canonise_options2(T0, T).
 group_resources_by_class(+Resources, -ByClass, +Options) is det
ByClass is a list of lists of resources that belong to the same class. First step we process the classes specified in Options.
  251group_resources_by_class([], [], _) :- !.
  252group_resources_by_class(Resources, ByClass, Options) :-
  253	select_option(class(Class), Options, Options1), !,
  254	(   partition(has_class(sub_class, Class), Resources, InClass, NotInClass),
  255	    InClass \== []
  256	->  ByClass = [InClass|ByClass1],
  257	    group_resources_by_class(NotInClass, ByClass1, Options1)
  258	;   group_resources_by_class(Resources, ByClass, Options1)
  259	).
  260group_resources_by_class([H|T0], [[H|S]|T], Options) :-
  261	class_of(H, exact, Class),
  262	partition(has_class(exact, Class), T0, S, T1),
  263	group_resources_by_class(T1, T, Options).
 has_class(+Match, +Class, +Node) is semidet
  267has_class(Match, Class, Node) :-
  268	class_of(Node, Match, Class).
 class_of(+Node, +Match, -Class) is det
class_of(+Node, +Match, +Class) is semidet
  273class_of(Node, sub_class, Class) :- !,
  274	rdfs_individual_of(Node, Class), !.
  275class_of(literal(_), exact, Literal) :- !,
  276	rdf_equal(Literal, rdfs:'Literal').
  277class_of(R, exact, Class) :-
  278	rdf_has(R, rdf:type, Class), !.
  279class_of(_, exact, Class) :-
  280	rdf_equal(Class, rdfs:'Resource').
 resource_bags(+ByClass:list(list(resource)), +NodeToEdges:list(node-list(edges)), -RawBags:list(list(resource))) is det
Find bags of resources that have the same connections.
  289resource_bags(ByClass, NodeToEdges, Bags) :-
  290	phrase(resource_bags(ByClass, NodeToEdges), Bags).
  291
  292resource_bags([], _) -->
  293	[].
  294resource_bags([ByClassH|ByClassT], NodeToEdges) -->
  295	{ sort(ByClassH, SortedNodes),
  296	  ord_subkeys(SortedNodes, NodeToEdges, SubNodeToEdges),
  297	  same_edges(SubNodeToEdges, Bags)
  298	},
  299	Bags,
  300	resource_bags(ByClassT, NodeToEdges).
 ord_subkeys(+Keys, +Pairs, -SubPairs) is det
SubPairs is the sublist of Pairs with a key in Keys.
Arguments:
Keys- Sorted list of keys
Pairs- Key-sorted pair-list
SubPairs- Key-sorted pair-list
  310ord_subkeys([], _, []).
  311ord_subkeys([K|KT], [P|PT], Pairs) :-
  312	P = PK-_,
  313	compare(Diff, K, PK),
  314	ord_subkeys(Diff, K, KT, P, PT, Pairs).
  315
  316ord_subkeys(=, _, KT, P, PT, [P|Pairs]) :- !,
  317	ord_subkeys(KT, PT, Pairs).
  318ord_subkeys(<, _, [K|KT], P, PT, Pairs) :-
  319	P = PK-_,
  320	compare(Diff, K, PK),
  321	ord_subkeys(Diff, K, KT, P, PT, Pairs).
  322ord_subkeys(>, K, KT, _, [P|PT], Pairs) :-
  323	P = PK-_,
  324	compare(Diff, K, PK),
  325	ord_subkeys(Diff, K, KT, P, PT, Pairs).
 same_edges(+NodeToEdges:list(node-edges), -Bags:list(list)) is det
Bags is a list of lists of resources (nodes) that share the same (abstracted) edges with the rest of the graph.
  333same_edges(NodeToEdges, Bags) :-
  334	transpose_pairs(NodeToEdges, ByEdges),		% list(edges-node)
  335	keysort(ByEdges, Sorted),
  336	group_pairs_by_key(Sorted, Grouped),
  337	pairs_values(Grouped, AllBySameEdge),
  338	include(longer_than_one, AllBySameEdge, Bags).
  339
  340longer_than_one([_,_|_]).
 graph_node_edges(+Graph, -NodeEdges:assoc, +Options) is det
NodeEdges is an assoc from resource to a sorted list of involved triples. Only subject and objects are considered.

Processes bagify_literals and property options

  349graph_node_edges(Graph, Assoc, Options) :-
  350	option(bagify_literals(LitToo), Options, true),
  351	property_map(Options, Map0),
  352	empty_assoc(Assoc0),
  353	graph_node_edges(Graph, LitToo, Map0, Assoc0, Assoc1),
  354	map_assoc(sort, Assoc1, Assoc).
  355
  356graph_node_edges([], _, _, Assoc, Assoc).
  357graph_node_edges([rdf(S,P,O)|T], LitToo, Map, Assoc0, Assoc) :-
  358	abstract_property(P, Map, SP, Map1),
  359	add_assoc(S, Assoc0, rdf(-, SP, O), Assoc1),
  360	(   (atom(O) ; LitToo == true )
  361	->  add_assoc(O, Assoc1, rdf(S, SP, -), Assoc2)
  362	;   Assoc2 = Assoc1
  363	),
  364	graph_node_edges(T, LitToo, Map1, Assoc2, Assoc).
  365
  366add_assoc(Key, Assoc0, Value, Assoc) :-
  367	get_assoc(Key, Assoc0, Old, Assoc, [Value|Old]), !.
  368add_assoc(Key, Assoc0, Value, Assoc) :-
  369	put_assoc(Key, Assoc0, [Value], Assoc).
 property_map(+Options, -Map:assoc(P-Super))
Process the options, creating a map that replaces a property by its registered super.
  377property_map(Options, Map) :-
  378	empty_assoc(Map0),
  379	property_map(Options, Map0, Map).
  380
  381property_map([], Map, Map).
  382property_map([property(P)|T], Map0, Map) :- !,
  383	(   rdfs_subproperty_of(P, Super),
  384	    get_assoc(Super, Map0, Root)
  385	->  put_assoc(P, Map0, Root, Map1)
  386	;   put_assoc(P, Map0, P, Map1)
  387	),
  388	property_map(T, Map1, Map).
  389property_map([_|T], Map0, Map) :-
  390	property_map(T, Map0, Map).
 abstract_property(+P0, +Map0, -P, -Map) is det
Find the abstract property for some property P.
  396abstract_property(P0, Map0, P, Map) :-
  397	get_assoc(P0, Map0, P), !,
  398	Map = Map0.
  399abstract_property(P, Map0, Root, Map) :-
  400	rdfs_subproperty_of(P, Super),
  401	get_assoc(Super, Map0, Root), !,
  402	debug(abstract(property), 'Mapped ~p --> ~p', [P, Root]),
  403	put_assoc(P, Map0, Root, Map).
  404abstract_property(P, Map, P, Map).
 assign_bagids(+Bags:list(bag), -IDBags:list(id-bag))
Assign bag identifiers to the each bag in Bags.
  411assign_bagids(Bags, IDBags) :-
  412	assign_bagids(Bags, 1, IDBags).
  413
  414assign_bagids([], _, []).
  415assign_bagids([H|T0], I, [Id-H|T]) :-
  416	atom_concat('_:bag_', I, Id),
  417	I2 is I + 1,
  418	assign_bagids(T0, I2, T).
 make_rdf_graphs(+IDBags, -RDFBags) is det
Translate BagID-Members into an RDF graph.
  425:- rdf_meta
  426	statement(r,r,o,?,?).			% statement//3
  427
  428make_rdf_graphs(IDBags, RDFBags) :-
  429	phrase(make_rdf_graphs(IDBags), RDFBags).
  430
  431make_rdf_graphs([]) -->
  432	[].
  433make_rdf_graphs([ID-Members|T]) -->
  434	statement(ID, rdf:type, rdf:'Bag'),
  435	bag_members(Members, 0, ID),
  436	make_rdf_graphs(T).
  437
  438bag_members([], _, _) -->
  439	[].
  440bag_members([H|T], I, ID) -->
  441	{ I2 is I + 1,
  442	  atom_concat('_:', I, P)
  443	},
  444	statement(ID, P, H),
  445	bag_members(T, I2, ID).
  446
  447statement(S, P, O) -->
  448	[ rdf(S, P, O) ].
  449
  450
  451
  452		 /*******************************
  453		 *	 MERGE PROPERTIES	*
  454		 *******************************/
 merge_properties(+GraphIn, -GraphOut, +Options) is det
Merge equivalent properties joining the same nodes. They are replaced by their common ancestors.
Arguments:
GraphIn- List of rdf(S,P,O)
GraphOut- List of rdf(S,P,O)
Options- Option list (unused)
  465merge_properties([], [], _).
  466merge_properties([rdf(S,P,O)|GraphIn], GraphOut, Options) :-
  467	memberchk(rdf(S,_,O), GraphIn), !,
  468	partition(same_so(S,O), GraphIn, Same, Rest),
  469	maplist(pred, Same, Preds),
  470	sort([P|Preds], UniquePreds),
  471	common_ancestor_forest(sub_property_of, UniquePreds, Forest),
  472	pairs_keys(Forest, Roots),
  473	debug(abstract, 'Merged ~p --> ~p', [UniquePreds, Roots]),
  474	mk_p_triples(Roots, S, O, GraphOut, Out2),
  475	merge_properties(Rest, Out2, Options).
  476merge_properties([Triple|GraphIn], [Triple|GraphOut], Options) :-
  477	merge_properties(GraphIn, GraphOut, Options).
  478
  479same_so(S, O, rdf(S, _, O)).
  480pred(rdf(_,P,_), P).
  481
  482mk_p_triples([], _, _) --> [].
  483mk_p_triples([P|T], S, O) -->
  484	[rdf(S,P,O)],
  485	mk_p_triples(T, S, O).
  486
  487sub_property_of(P, Super) :-
  488	rdf_has(P, rdfs:subPropertyOf, Super).
 common_ancestor_forest(:Pred, +Objects, -Forest) is det
Forest is a minimal set of minimal spanning trees with real branching (more than one child per node) covering all Objects. The partial ordering is defined by the non-deterministic goal call(Pred, +Node, -Parent).
Arguments:
Forest- is a list of trees. Each tree is represented as Root-Children, where Children is a possibly empty list if sub-trees.
To be done
- First prune dead-ends?
rdf_db:rdf_global_term([ulan:assisted_by, ulan:cousin_of], In),
gtrace,
rdf_abstract:common_ancestor_forest(sub_property_of, In, Out).
  518:- meta_predicate
  519	common_ancestor_forest(2, +, -).  520
  521common_ancestor_forest(Pred, Objects, Forest) :-
  522	strip_module(Pred, M, P),
  523	sort(Objects, Objects1),
  524	keys_to_assoc(Objects1, target*[], Nodes0),
  525	ancestor_tree(Objects1, M:P, Nodes0, Nodes, Roots),
  526	prune_forest(Nodes, Roots, Forest),
  527	debug(common_ancestor, 'Ancestors of ~p: ~p', [Objects1, Forest]).
 keys_to_assoc(+Keys:list, +Value, -Assoc) is det
True if Assoc is an assoc where each Key maps to Value.
  533keys_to_assoc(Keys, Value, Assoc) :-
  534	empty_assoc(Assoc0),
  535	keys_to_assoc(Keys, Assoc0, Value, Assoc).
  536
  537keys_to_assoc([], Assoc, _, Assoc).
  538keys_to_assoc([H|T], Assoc0, Value, Assoc) :-
  539	put_assoc(H, Assoc0, Value, Assoc1),
  540	keys_to_assoc(T, Assoc1, Value, Assoc).
  541
  542
  543ancestor_tree(Objects, Pred, Nodes0, Nodes, Roots) :-
  544	ancestor_tree(Objects, [], Objects, Pred, Nodes0, Nodes, Roots).
 ancestor_tree(+Open, +Closed, +Targets, :Pred, +NodesIn, -NodesOut, -Roots) is det
Explore the ancestor graph one more step. This is the main loop looking for a spanning tree. We are done if
  564ancestor_tree([One], [], _, _, Nodes, Nodes, [One]) :- !.
  565ancestor_tree([], Closed, _, _, Nodes, Nodes, Closed) :- !.
  566ancestor_tree(Open, _, Objects, _, Nodes, Nodes, [One]) :-
  567	member(One, Open),
  568	tree_covers(One, Nodes, Objects), !.
  569ancestor_tree(Open, Closed, Objects, Pred, Nodes0, Nodes, Roots) :-
  570	expand_ancestor_tree(Open, NewOpen, NewClosed, Closed, Nodes0, Nodes1, Pred),
  571	ancestor_tree(NewOpen, NewClosed, Objects, Pred, Nodes1, Nodes, Roots).
 expand_ancestor_tree(+Open0, -Open, +Closed0, -Closed, +Nodes0, -Nodes, :Pred)
Expand the explored graph with one level. Open are the currently open nodes. Closed are the nodes that have no parent and therefore are roots.
Arguments:
Nodes- is an assoc R->(State*list(Child))
  585expand_ancestor_tree([], [], Closed, Closed, Nodes, Nodes, _).
  586expand_ancestor_tree([H|T], Open, Closed0, Closed, Nodes0, Nodes, Pred) :-
  587	setof(Parent, call(Pred, H, Parent), Parents), !,
  588	add_parents(Parents, H, Open, OpenT, Nodes0, Nodes1),
  589	expand_ancestor_tree(T, OpenT, Closed0, Closed, Nodes1, Nodes, Pred).
  590expand_ancestor_tree([H|T], Open, [H|ClosedT], Closed, Nodes0, Nodes, Pred) :-
  591	expand_ancestor_tree(T, Open, ClosedT, Closed, Nodes0, Nodes, Pred).
 add_parents(+Parents:list, +Child, -NR, +NRT, +Nodes0, -Nodes)
Add links Parent->Child to the tree Nodes0. The difference list NR\NRT contains Parents added new to the tree.
  599add_parents([], _, NP, NP, Nodes, Nodes).
  600add_parents([H|T], Child, NP, NPT, Nodes0, Nodes) :-
  601	in_tree(Child, H, Nodes0), !,
  602	add_parents(T, Child, NP, NPT, Nodes0, Nodes).
  603add_parents([H|T], Child, NP, NPT, Nodes0, Nodes) :-
  604	get_assoc(H,
  605		  Nodes0, State*Children,
  606		  Nodes1, State*[Child|Children]), !,
  607	add_parents(T, Child, NP, NPT, Nodes1, Nodes).
  608add_parents([H|T], Child, [H|NP], NPT, Nodes0, Nodes) :-
  609	put_assoc(H, Nodes0, node*[Child], Nodes1),
  610	add_parents(T, Child, NP, NPT, Nodes1, Nodes).
 in_tree(?Node, +Root, +Nodes) is nondet
True if Node appears in the tree below Root.
  617in_tree(Node, Node, _).
  618in_tree(Node, Root, Nodes) :-
  619	get_assoc(Root, Nodes, _State*Children),
  620	member(Child, Children),
  621	in_tree(Node, Child, Nodes).
 prune_forest(+Nodes, +Roots, -MinimalForest) is det
MinimalForest is the minimal forest overlapping all targets.
To be done
- Currently doesn't remove unnecessary trees.
  630prune_forest(Nodes, Roots, Forest) :-
  631	maplist(prune_root(Nodes), Roots, Roots1),
  632	sort(Roots1, Roots2),
  633	maplist(prune_ancestor_tree(Nodes), Roots2, Forest0),
  634	sort(Forest0, Forest).
 prune_root(+Nodes, +Root0, -Root) is det
Prune the parts of the search tree that ended up nowhere. The first real branch is where we find a solution or there are multiple parents. This avoids doing double work pruning the trees itself.
  643prune_root(Nodes, Root0, Root) :-
  644	get_assoc(Root0, Nodes, node*[One]), !,
  645	prune_root(Nodes, One, Root).
  646prune_root(_, Root, Root).
 prune_ancestor_tree(Nodes, Root, Tree) is det
Tree is a pruned hierarchy from Root using the branching paths of Nodes.
  653prune_ancestor_tree(Nodes, Root, Tree) :-
  654	get_assoc(Root, Nodes, Value),
  655	(   Value = node*[One]
  656	->  prune_ancestor_tree(Nodes, One, Tree)
  657	;   Tree = (Root-Children),
  658	    Value = _*Children0,
  659	    maplist(prune_ancestor_tree(Nodes), Children0, Children)
  660	).
 tree_covers(+Root, +Nodes, -Targets:list) is det
True if Targets is the sorted list of targets covered by the tree for which Root is the root.
  667tree_covers(Root, Nodes, Targets) :-
  668	phrase(tree_covers(Root, Nodes), Targets0),
  669	sort(Targets0, Targets).
  670
  671tree_covers(Root, Nodes) -->
  672	{ get_assoc(Root, Nodes, State*Children) },
  673	(   {State == target}
  674	->  [Root]
  675	;   []
  676	),
  677	tree_covers_list(Children, Nodes).
  678
  679tree_covers_list([], _) -->
  680	[].
  681tree_covers_list([H|T], Nodes) -->
  682	tree_covers(H, Nodes),
  683	tree_covers_list(T, Nodes).
  684
  685
  686		 /*******************************
  687		 *	    PRIMITIVES		*
  688		 *******************************/
 map_graph(+GraphIn, +Map:assoc, -GraphOut) is det
Map a graph to a new graph by mapping all fields of the RDF statements over Map. Then delete duplicates from the resulting graph as well as rdf(S,P,S) links that did not appear before the mapping.
To be done
- Should we look inside literals for mapped types? That would be consistent with abstract_graph/3.
  700map_graph(GraphIn, Map, GraphOut) :-
  701	phrase(map_triples(GraphIn, Map), Graph2),
  702	sort(Graph2, GraphOut).
  703
  704map_triples([], _) -->
  705	[].
  706map_triples([H0|T0], Map) -->
  707	map_triple(H0, Map),
  708	map_triples(T0, Map).
  709
  710map_triple(rdf(S0,P0,O0), Map) -->
  711	{ map_resource(S0, Map, S),
  712	  map_resource(P0, Map, P),
  713	  map_object(O0, Map, O)
  714	},
  715	(   { S == O, S0 \== O0 }
  716	->  []
  717	;   [ rdf(S,P,O) ]
  718	).
  719
  720map_resource(N0, Map, N) :-
  721	get_assoc(N0, Map, N), !.
  722map_resource(N, _, N).
  723
  724map_object(O0, Map, O) :-
  725	get_assoc(O0, Map, O), !.
  726map_object(literal(type(T0, V)), Map, L) :-
  727	get_assoc(T0, Map, T), !,
  728	L = literal(type(T, V)).
  729map_object(O, _, O).
 map_graph(+GraphIn, +Map:assoc, -GraphOut, -AbstractMap) is det
Map a graph to a new graph by mapping all fields of the RDF statements over Map. The nodes in these graphs are terms of the form Abstract-list(concrete).
Arguments:
AbstractMap- assoc Abstract -> ordset(concrete)
  740map_graph(GraphIn, Map, GraphOut, AbstractMap) :-
  741	map_graph(GraphIn, Map, GraphOut),
  742	assoc_to_list(Map, ConcAbstr),	% Concrete->Abstract
  743	graph_nodes(GraphIn, AllConcrete),
  744	pairs_keys_intersection(ConcAbstr, AllConcrete, UsedConcAbstr),
  745	transpose_pairs(UsedConcAbstr, AbstrConc),
  746	group_pairs_by_key(AbstrConc, Grouped),
  747	list_to_assoc(Grouped, AbstractMap).
 pairs_keys_intersection(+Pairs, +Keys, -PairsInKeys) is det
True if PairsInKeys is a subset of Pairs whose key appear in Keys. Pairs must be key-sorted and Keys must be sorted. E.g.
?- pairs_keys_intersection([a-1,b-2,c-3], [a,c], X).
X = [a-1,c-3]
  760pairs_keys_intersection(Pairs, [K], Int) :- !,	% One key: happens quite often
  761	find_one_key(Pairs, K, Int).
  762pairs_keys_intersection([P1|TP], [K1|TK], Int) :- !,
  763	compare_pair_key(Diff, P1, K1),
  764	pairs_keys_isect(Diff, P1, TP, K1, TK, Int).
  765pairs_keys_intersection(_, _, []).
  766
  767pairs_keys_isect(<, _, [P1|TP], K1, TK, Int) :- !,
  768	compare_pair_key(Diff, P1, K1),
  769	pairs_keys_isect(Diff, P1, TP, K1, TK, Int).
  770pairs_keys_isect(=, P, [P1|TP], K1, TK, [P|Int]) :- !,
  771	compare_pair_key(Diff, P1, K1),
  772	pairs_keys_isect(Diff, P1, TP, K1, TK, Int).
  773pairs_keys_isect(>, P1, TP, _, [K1|TK], Int) :- !,
  774	compare_pair_key(Diff, P1, K1),
  775	pairs_keys_isect(Diff, P1, TP, K1, TK, Int).
  776pairs_keys_isect(=, P, _, _, _, [P]) :- !.
  777pairs_keys_isect(_, _, _, _, _, []).
  778
  779compare_pair_key(Order, K1-_, K2) :- !,
  780	compare(Order, K1, K2).
  781
  782find_one_key([], _, []).
  783find_one_key([K0-V|T0], K, List) :-
  784	(   K0 == K
  785	->  List = [k0-V|T],
  786	    find_one_key(T0, K, T)
  787	;   find_one_key(T0, K, List)
  788	).
 map_to_bagged_graph(+GraphIn, +Map, -GraphOut, -Bags) is det
GraphOut is a graph between objects and bags, using the most specific common ancestor for representing properties.
  796map_to_bagged_graph(GraphIn, Map, GraphOut, Bags) :-
  797	map_graph(GraphIn, Map, AbstractGraph, AbstractMap),
  798%	assertion(map_assoc(is_ordset, AbstractMap)),
  799	empty_assoc(Nodes),
  800	rdf_to_paired_graph(GraphIn, PairGraph),
  801	phrase(bagify_triples(AbstractGraph, PairGraph, AbstractMap,
  802			      Nodes, Bags, []),
  803	       GraphOut).
  804
  805bagify_triples([], _, _, _, Bags, Bags) --> [].
  806bagify_triples([rdf(S0,_P,O0)|T], PairGraph, Map, Nodes, Bags, BagsT) -->
  807	{ bagify_resource(S0, S, Map, Nodes, Nodes1, Bags, BagsT0),
  808	  bagify_object(O0, O, Map, Nodes1, Nodes2, BagsT0, BagsT1),
  809
  810					% normal properties
  811	  used_properties(S0, O0, PairGraph, Map, PList),
  812	  common_ancestor_forest(sub_property_of, PList, Forest),
  813	  debug(used_properties, 'Forest = ~p', [Forest]),
  814	  pairs_keys(Forest, PRoots),
  815					% inverse properties
  816	  used_properties(O0, S0, PairGraph, Map, IPList),
  817	  common_ancestor_forest(sub_property_of, IPList, IForest),
  818	  debug(used_properties, 'IForest = ~p', [IForest]),
  819	  pairs_keys(IForest, IPRoots)
  820	},
  821	mk_p_triples(PRoots, S, O),
  822	mk_p_triples(IPRoots, O, S),
  823	bagify_triples(T, PairGraph, Map, Nodes2, BagsT1, BagsT).
  824
  825
  826bagify_resource(R0, R, _Map, Nodes, Nodes) -->
  827	{ get_assoc(R0, Nodes, R) }, !.
  828bagify_resource(R0, BagID, Map, Nodes0, Nodes) -->
  829	{ get_assoc(R0, Map, Set), Set = [_,_|_], !,
  830	  atom_concat('_:rbag_', R0, BagID),
  831	  put_assoc(R0, Nodes0, BagID, Nodes)
  832	},
  833	make_rdf_graphs([BagID-Set]).
  834bagify_resource(R0, One, Map, Nodes, Nodes) -->
  835	{ get_assoc(R0, Map, [One]) }, !.
  836bagify_resource(R, R, _, Nodes, Nodes) --> [].
  837
  838bagify_object(R0, R, Map, Nodes0, Nodes) -->
  839	bagify_resource(R0, R, Map, Nodes0, Nodes).
 rdf_to_paired_graph(+GraphIn, -PairedGraph) is det
Arguments:
GraphIn- Graph as list(rdf(S,P,O))
PairedGraph- Graph as list(S-list(O-P)), where the pair lists are key-sorted,
  848rdf_to_paired_graph(Triples, Pairs) :-
  849	subject_pairs(Triples, Pairs0),
  850	keysort(Pairs0, Pairs1),
  851	group_pairs_by_key(Pairs1, Pairs2),
  852	maplist(keysort_values, Pairs2, Pairs).
  853
  854subject_pairs([], []).
  855subject_pairs([rdf(S,P,O)|T0], [S-(O-P)|T]) :-
  856	subject_pairs(T0, T).
  857
  858keysort_values(K-V0, K-V) :-
  859	keysort(V0, V).
 used_properties(+S0, +O0, +GraphIn, +AbstractMap, -PredList) is det
Find properties actually used between two bags. S0 and O0 are the subject and object from the abstract graph.
Arguments:
GraphIn- original concrete graph represented as pairs. See rdf_to_paired_graph/2.
AbstractMap- Assoc Abstract->Concrete, where Concrete is an ordset of resources.
  872used_properties(S0, O0, GraphIn, Map, PList) :-
  873	get_assoc(S0, Map, SList),
  874	get_assoc(O0, Map, OList),
  875	pairs_keys_intersection(GraphIn, SList, Intersection),
  876	pairs_values(Intersection, OPList0),
  877	append(OPList0, OPList1),
  878	keysort(OPList1, OPList),
  879	pairs_keys_intersection(OPList, OList, IntPList),
  880	pairs_values(IntPList, PListDupl),
  881	sort(PListDupl, PList),
  882	debug(used_properties, '  --> ~p', [PList]).
 graph_resources(+Graph, -Resources:list(atom)) is det
Resources is a sorted list of unique resources appearing in Graph. All resources are in Resources, regardless of the role played in the graph: node, edge (predicate) or type for a typed literal.
See also
- graph_resources/4 distinguishes the role of the resources.
  894graph_resources(Graph, Resources) :-
  895	graph_resources(Graph, R, P, P, T, T, [], _, _),
  896	sort(R, Resources).
 graph_nodes(+Graph, -Nodes) is det
Nodes is a sorted list of all resources and literals appearing in Graph.
To be done
- Better name
  905graph_nodes(Graph, Nodes) :-
  906	graph_resources(Graph, Nodes0, P, P, L, _, _, L, []),
  907	sort(Nodes0, Nodes).
 graph_resources(+Graph, -Resources:list(atom), -Predicates:list(atom), -Types:list(atom)) is det
Resources is a sorted list of unique resources appearing in Graph as subject or object of a triple. Predicates is a list of all unique predicates in Graph and Types is a list of all unique literal types in Graph.
  920graph_resources(Graph, Resources, Preds, Types) :-
  921	graph_resources(Graph, R, [], P, [], T, [], _, _),
  922	sort(R, Resources),
  923	sort(P, Preds),
  924	sort(T, Types).
  925
  926graph_resources([], R, R, P, P, T, T, L, L).
  927graph_resources([rdf(S,P,O)|T], [S|RT0], RT, [P|PTl0], PTl, Tl0, Tl, L0, L) :-
  928	object_resources(O, RT0, RT1, Tl0, Tl1, L0, L1),
  929	graph_resources(T, RT1, RT, PTl0, PTl, Tl1, Tl, L1, L).
  930
  931
  932object_resources(O, R0, R, T0, T, L0, L) :-
  933	(   atom(O)
  934	->  R0 = [O|R], T0 = T, L0 = L
  935	;   O = literal(Val)
  936	->  R0 = R, L0 = [O|L],
  937	    (	Val = type(Type, _)
  938	    ->	T0 = [Type|T]
  939	    ;	T0 = T
  940	    )
  941	;   assertion(fail)
  942	).
  943
  944
  945		 /*******************************
  946		 *	      ABSTRACT		*
  947		 *******************************/
 abstract_graph(+GraphIn, -GraphOut, +Options) is det
Unify GraphOut with an abstracted version of GraphIn. The abstraction is carried out triple-by-triple. Note there is no need to abstract all triples to the same level. We do however need to map nodes in the graph consistently. I.e. if we abstract the object of rdf(s,p,o), we must abstract the subject of rdf(o, p2, o2) to the same resource.

If we want to do incremental growing we must keep track which nodes where mapped to which resources. Option?

We must also decide on the abstraction level for a node. This can be based on the weight in the search graph, the involved properties and focus such as location and time. Should we express this focus in the weight?

Options:

map_in(?Map)
If present, this is the initial resource abstraction map.
map_out(-Map)
Provide access to the final resource abstraction map.
bags(-Bags)
If provided, bagify the graph, returning the triples that define the bags in Bags. The full graph is created by appending Bags to GraphOut.
merge_concepts_with_super(+Boolean)
If true (default), merge nodes of one is a super-concept of another.
  980abstract_graph(GraphIn, GraphOut, Options) :-
  981	map_in(Options, MapIn),
  982	graph_resources(GraphIn, Nodes, NT, Edges, [], _T0, _TT, NT, []),
  983	node_map(Nodes, MapIn, Map2, Options),
  984	edge_map(Edges, Map2, MapOut),
  985	map_out(Options, MapOut),
  986	(   option(bags(Bags), Options)
  987	->  map_to_bagged_graph(GraphIn, MapOut, GraphOut, Bags)
  988	;   map_graph(GraphIn, MapOut, GraphOut)
  989	).
  990
  991map_in(Options, Map) :-
  992	option(map_in(Map), Options, Map),
  993	(var(Map) -> empty_assoc(Map) ; true).
  994
  995map_out(Options, Map) :-
  996	option(map_out(Map), Options, _).
 node_map(+Nodes, +Map0, -Map, +Options) is det
Create the abstraction map for the nodes of the graph. It consists of two steps:
  1. Map all instances to their class, except for concepts
  2. If some instances are mapped to class A and others to class B, where A is a super-class of B, map all instances to class A.
 1008node_map(Nodes, Map0, Map, Options) :-
 1009	concepts_of(Nodes, Map0, Map1, _NewConcepts),
 1010	(   option(merge_concepts_with_super(true), Options, true)
 1011	->  assoc_to_values(Map1, Concepts),
 1012	    sort(Concepts, Unique),
 1013	    identity_map(Unique, SuperMap0),
 1014	    find_broaders(Unique, SuperMap0, SuperMap1),
 1015	    deref_map(SuperMap1, SuperMap),
 1016	    map_assoc(map_over(SuperMap), Map1, Map)
 1017	;   Map = Map1
 1018	).
 1019
 1020map_over(Map, V0, V) :-
 1021	(   get_assoc(V0, Map, V1)
 1022	->  V = V1
 1023	;   V = V0
 1024	).
 1025
 1026concepts_of([], Map, Map, []).
 1027concepts_of([R|T], Map0, Map, New) :-
 1028	get_assoc(R, Map0, _), !,
 1029	concepts_of(T, Map0, Map, New).
 1030concepts_of([R|T], Map0, Map, [C|New]) :-
 1031	concept_of(R, C),
 1032	put_assoc(R, Map0, C, Map1),
 1033	concepts_of(T, Map1, Map, New).
 identity_map(+List, -Map) is det
 find_broaders(+List, +Map0, -Map) is det
 deref_map(+Map0, -Map) is det
 1039identity_map(List, Map) :-
 1040	map_list_to_pairs(=, List, Pairs),
 1041	list_to_assoc(Pairs, Map).
 1042
 1043find_broaders([], Map, Map).
 1044find_broaders([C|T], Map0, Map) :-
 1045	broader(C, Super),
 1046	get_assoc(Super, Map0, SuperSuper), !,
 1047	debug(rdf_abstract, 'Mapped ~p to super concept ~p', [C, SuperSuper]),
 1048	put_assoc(C, Map0, SuperSuper, Map1),
 1049	find_broaders(T, Map1, Map).
 1050find_broaders([_|T], Map0, Map) :-
 1051	find_broaders(T, Map0, Map).
 1052
 1053
 1054deref_map(Map0, Map) :-
 1055	findall(KV, mapped_kv(KV, Map0), Pairs),
 1056	deref(Pairs, NewPairs),
 1057	list_to_assoc(NewPairs, Map).
 1058
 1059mapped_kv(K-V, Assoc) :-
 1060	gen_assoc(K, Assoc, V),
 1061	K \== V.
 deref(+Pairs0, NewPairs) is det
Dereference chains V1-V2, V2-V3 into V1-V3, V2-V3. Note that Pairs0 may contain cycles, in which case all the members of the cycle are replaced by the representative as defined by rdf_representative/2.
 1070deref(Pairs, NewPairs) :-
 1071	list_to_assoc(Pairs, Assoc),
 1072	deref(Pairs, Assoc, NewPairs).
 1073
 1074deref([], _, []).
 1075deref([K-V0|T0], Map, [K-V|T]) :-
 1076	deref2(V0, Map, [V0], EqSet, V),
 1077	(   EqSet == []
 1078	->  deref(T0, Map, T)
 1079	;   rdf_representative(EqSet, V),
 1080	    deref_cycle(T0, EqSet, V, Cycle, T1),
 1081	    append(Cycle, T2, T),
 1082	    deref(T1, Map, T2)
 1083	).
 1084
 1085deref2(V0, Map, Visited, EqSet, V) :-
 1086	get_assoc(V0, Map, V1), !,
 1087	(   memberchk(V1, Visited)
 1088	->  EqSet = Visited
 1089	;   deref2(V1, Map, [V1|Visited], EqSet, V)
 1090	).
 1091deref2(V, _, _, [], V).
 1092
 1093deref_cycle([], _, _, [], []).
 1094deref_cycle([K-V0|T0], EqSet, V, [K-V|CT], Rest) :-
 1095	memberchk(V0, EqSet), !,
 1096	deref_cycle(T0, EqSet, V, CT, Rest).
 1097deref_cycle([H|T0], EqSet, V, CT, [H|RT]) :-
 1098	deref_cycle(T0, EqSet, V, CT, RT).
 edge_map(+Edges, +MapIn, -MapOut) is det
 1103edge_map([], Map, Map).
 1104edge_map([R|T], Map0, Map) :-
 1105	get_assoc(R, Map0, _), !,
 1106	edge_map(T, Map0, Map).
 1107%edge_map([R|T], Map0, Map) :-
 1108%	iface_abstract_predicate(R, C),
 1109%	put_assoc(R, Map0, C, Map1),
 1110%	edge_map(T, Map1, Map).
 concept_of(+Resource, -Concept) is det
True if Concept is the concept Resource belongs to. If Resource is a concept itself, Concept is Resource.
To be done
- Make thesaurus concept classes a subclass of skos:Class.
- Put in a reusable place, merge with kwd_search.pl
 1120concept_of(O, O) :-
 1121	rdfs_individual_of(O, skos:'Concept'), !.
 1122concept_of(O, C) :-
 1123	rdf_has(O, rdf:type, C), !.
 1124concept_of(O, O).
 broader(+Term, -Broader) is nondet
True if Broader is a broader term according to the SKOS schema.
To be done
- Deal with owl:sameAs (and skos:exactMatch)
 1132broader(Term, Broader) :-
 1133	rdf_reachable(Term, skos:broader, Broader),
 1134	Broader \== Term.
 rdf_representative(+Resources:list, -Representative:atom) is det
Representative is the most popular resource from the non-empty list Resources. The preferred representative is currently defined as the resource with the highest number of associated edges.
To be done
- Think about the function. Use sum of logs or sum of sqrt?
 1145rdf_representative(List, Representative) :-
 1146	(   exclude(rdf_is_bnode, List, NonBNodes),
 1147	    NonBNodes \== []
 1148	->  representative(NonBNodes, Representative)
 1149	;   representative(List, Representative)
 1150	).
 1151
 1152representative([H], Representative) :- !,
 1153	Representative = H.
 1154representative([H|T], Representative) :-
 1155	fan_in_out(H, Fan0),
 1156	best(T, Fan0, H, Representative).
 1157
 1158best([], _, R, R).
 1159best([H|T], S0, R0, R) :-
 1160	fan_in_out(H, S1),
 1161	(   S1 > S0
 1162	->  best(T, S1, H, R)
 1163	;   best(T, S0, R0, R)
 1164	).
 1165
 1166fan_in_out(R, Fan) :-
 1167	count(rdf(R, _, _), 100, FanOut),
 1168	count(rdf(_, _, R), 100, FanIn),
 1169	Fan is FanOut + FanIn.
 1170
 1171
 1172		 /*******************************
 1173		 *	      SIMPLIFY		*
 1174		 *******************************/
 minimise_graph(+GraphIn, -GraphOut) is det
Remove redudant triples from a graph. Redundant triples are defined as:
To be done
- Implement entailed transitive
 1188minimise_graph(RDF0, RDF) :-
 1189	partition(object_triple, RDF0, ObjRDF, LitRDF),
 1190	map_list_to_pairs(os_rdf, ObjRDF, Pairs),
 1191	group_pairs_by_key(Pairs, Grouped),
 1192	maplist(key_remove_reduntant_relations, Grouped, MinGroups),
 1193	append([LitRDF|MinGroups], RDF).
 1194
 1195object_triple(rdf(_,_,O)) :-
 1196	atom(O).
 1197
 1198os_rdf(rdf(S,_,O), (A+B)) :-
 1199	(   S @< O
 1200	->  A = S, B = O
 1201	;   A = O, B = S
 1202	).
 1203
 1204key_remove_reduntant_relations(_-Rs0, Rs) :-
 1205	remove_reduntant_relations(Rs0, Rs).
 1206
 1207remove_reduntant_relations([R], [R]) :- !.
 1208remove_reduntant_relations(List0, List) :-
 1209	select(rdf(S,P1,O), List0, List1),
 1210	select(rdf(S,P2,O), List1, List2),
 1211	rdfs_subproperty_of(P1, P2), !,
 1212	remove_reduntant_relations([rdf(S,P1,O)|List2], List).
 1213remove_reduntant_relations(List0, List) :-
 1214	select(rdf(S,P,O), List0, List1),
 1215	select(rdf(O,P,S), List1, List2),
 1216	rdfs_individual_of(P, owl:'SymmetricProperty'), !,
 1217	remove_reduntant_relations([rdf(S,P,O)|List2], List).
 1218remove_reduntant_relations(List0, List) :-
 1219	select(rdf(S,P1,O), List0, List1),
 1220	select(rdf(O,P2,S), List1, List2),
 1221	rdf_has(P1, owl:inverseOf, P2), !,
 1222	remove_reduntant_relations([rdf(S,P2,O)|List2], List).
 1223remove_reduntant_relations(List, List).
 1224
 1225
 1226		 /*******************************
 1227		 *		UTIL		*
 1228		 *******************************/
 1229
 1230:- meta_predicate
 1231	count(:, +, -). 1232
 1233count(G, Max, Count) :-
 1234        C = c(0),
 1235        (   G,
 1236            arg(1, C, C0),
 1237            C1 is C0+1,
 1238            nb_setarg(1, C, C1),
 1239            C1 == Max
 1240        ->  Count = Max
 1241        ;   arg(1, C, Count)
 1242        )