View source with formatted comments or as raw
    1/*  Part of SWI-Prolog
    2
    3    Author:        Jan Wielemaker
    4    E-mail:        J.Wielemaker@vu.nl
    5    WWW:           http://www.swi-prolog.org
    6    Copyright (c)  2010-2014, University of Amsterdam
    7                              VU University Amsterdam
    8    All rights reserved.
    9
   10    Redistribution and use in source and binary forms, with or without
   11    modification, are permitted provided that the following conditions
   12    are met:
   13
   14    1. Redistributions of source code must retain the above copyright
   15       notice, this list of conditions and the following disclaimer.
   16
   17    2. Redistributions in binary form must reproduce the above copyright
   18       notice, this list of conditions and the following disclaimer in
   19       the documentation and/or other materials provided with the
   20       distribution.
   21
   22    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
   23    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
   24    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
   25    FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
   26    COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
   27    INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
   28    BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
   29    LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
   30    CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   31    LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
   32    ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
   33    POSSIBILITY OF SUCH DAMAGE.
   34*/
   35
   36:- module(rdf_library,
   37          [ rdf_attach_library/1,       % +Dir
   38            rdf_load_library/1,         % +Ontology
   39            rdf_load_library/2,         % +Ontology, +Options
   40            rdf_list_library/0,
   41            rdf_list_library/1,         % +Ontology
   42            rdf_list_library/2,         % +Ontology, +Options
   43            rdf_library_source/2,       % +Ontology, -SourceURL
   44            rdf_library_index/2,        % ?Id, ?Facet
   45            rdf_current_manifest/1      % -Manifest
   46          ]).   47:- use_module(library(semweb/rdf_db)).   48:- use_module(library(semweb/turtle)).   49:- use_module(library(rdf)).   50:- use_module(library(lists)).   51:- use_module(library(option)).   52:- use_module(library(debug)).   53:- use_module(library(error)).   54:- use_module(library(pairs)).   55:- use_module(library(date)).   56:- use_module(library(uri)).   57:- use_module(library(http/http_open)).   58:- use_module(library(thread)).   59:- use_module(library(apply)).   60:- use_module(library(solution_sequences)).   61
   62:- predicate_options(rdf_list_library/2, 2,
   63                     [ indent(atom),
   64                       show_graph(boolean),
   65                       show_source(boolean),
   66                       show_virtual(boolean)
   67                     ]).   68:- predicate_options(rdf_load_library/2, 2,
   69                     [ concurrent(positive_integer),
   70                       import(boolean),
   71                       load(boolean),
   72                       base_uri(atom),
   73                       claimed_source(atom),
   74                       not_found(oneof([error,warning,silent]))
   75                     ]).   76
   77/** <module> RDF Library Manager
   78
   79This module manages an ontology library. Such   a  library consists of a
   80directory with manifest files named =|Manifest.rdf|= or =|Manifest.ttl|=
   81(Turtle). The manifest files define ontologies  appearing in the library
   82as well as namespace mnemonics and dependencies.
   83
   84The typical usage scenario is
   85
   86==
   87?- rdf_attach_library('/some/directory').
   88?- rdf_load_library(my_ontology).
   89==
   90
   91@tbd    Add caching info
   92@tbd    Allow using Manifests on HTTP servers
   93@author Jan Wielemaker
   94*/
   95
   96:- rdf_register_ns(lib,  'http://www.swi-prolog.org/rdf/library/').   97:- rdf_register_ns(void, 'http://rdfs.org/ns/void#').   98:- rdf_register_ns(vann, 'http://purl.org/vocab/vann/').   99
  100:- dynamic
  101    manifest/2,                     % Path, Time
  102    library_db/3.                   % Name, URL, Facets
  103
  104%       Force compile-time namespace expansion
  105
  106:- rdf_meta
  107    edge(+, r,r,o).  108
  109                 /*******************************
  110                 *            LOADING           *
  111                 *******************************/
  112
  113%!  rdf_load_library(+Id) is det.
  114%!  rdf_load_library(+Id, +Options) is det.
  115%
  116%   Load ontologies from the  library.  A   library  must  first  be
  117%   attached using rdf_attach_library/1.  Defined Options are:
  118%
  119%           * import(Bool)
  120%           If =true= (default), also load ontologies that are
  121%           explicitely imported.
  122%
  123%           * base_uri(URI)
  124%           BaseURI used for loading RDF.  Local definitions in
  125%           ontologies overrule this option.
  126%
  127%           * claimed_source(URL)
  128%           URL from which we claim to have loaded the data.
  129%
  130%           * not_found(+Level)
  131%           The system does a pre-check for the existence of
  132%           all references RDF databases.  If Level is =error=
  133%           it reports missing databases as an error and fails.
  134%           If =warning= it prints them, but continues.  If
  135%           =silent=, no checks are preformed.  Default is =error=.
  136%
  137%           * concurrent(Threads)
  138%           Perform the load concurrently using N threads.  If not
  139%           specified, the number is determined by
  140%           guess_concurrency/2.
  141%
  142%           * load(+Bool)
  143%           If =false=, to all the preparation, but do not execute
  144%           the actual loading.  See also rdf_list_library/2.
  145
  146rdf_load_library(Id) :-
  147    rdf_load_library(Id, []).
  148
  149rdf_load_library(Id, Options) :-
  150    cleaned_load_commands(Id, Cmds, Options),
  151    (   option(concurrent(Threads), Options)
  152    ->  true
  153    ;   guess_concurrency(Cmds, Threads)
  154    ),
  155    length(Cmds, NSources),
  156    print_message(informational, rdf(loading(NSources, Threads))),
  157    (   option(load(true), Options, true)
  158    ->  concurrent(Threads, Cmds, [])
  159    ;   true
  160    ).
  161
  162%!  rdf_library_source(+Id, -Source) is nondet.
  163%
  164%   True of Source is the URL that is  part of the given library Id.
  165%   This predicate finds all indirect   dependencies.  It does _not_
  166%   check whether the source exists or is valid.
  167%
  168%   @see uri_file_name/2 for converting file:// URLs to a filename.
  169
  170rdf_library_source(Id, Source) :-
  171    cleaned_load_commands(Id, Cmds,
  172                          [ import(true),
  173                            not_found(silent)
  174                          ]),
  175    member(rdf_load(Source, _), Cmds).
  176
  177
  178cleaned_load_commands(Id, Cmds, Options) :-
  179    load_commands(Id, Options, Pairs),
  180    pairs_values(Pairs, Commands),
  181    list_to_set(Commands, Cmds2),
  182    delete_virtual(Cmds2, Cmds3),
  183    find_conflicts(Cmds3),
  184    check_existence(Cmds3, Cmds, Options).
  185
  186delete_virtual([], []).
  187delete_virtual([virtual(_)|T0], T) :-
  188    !,
  189    delete_virtual(T0, T).
  190delete_virtual([H|T0], [H|T]) :-
  191    delete_virtual(T0, T).
  192
  193
  194%!  find_conflicts(+LoadCommands) is semidet.
  195%
  196%   Find possibly conflicting options for loading the same source
  197
  198find_conflicts(Commands) :-
  199    no_source_with_different_options(Commands),
  200    no_sources_in_same_graph(Commands).
  201
  202%!  no_source_with_different_options(+Commands) is semidet.
  203%
  204%   True if there are not multiple calls to load the same graph, but
  205%   with  different  load-options.  Prints  a    warning  and  fails
  206%   otherwise.
  207
  208no_source_with_different_options(Commands) :-
  209    sort(Commands, Cmds),
  210    conflicts(Cmds, Conflicts),
  211    report_conflicts(Conflicts),
  212    Conflicts == [].
  213
  214conflicts([], []).
  215conflicts([C1, C2|T0], [C1-C2|T]) :-
  216    conflict(C1, C2),
  217    !,
  218    conflicts([C2|T0], T).
  219conflicts([_|T0], T) :-
  220    conflicts(T0, T).
  221
  222conflict(rdf_load(Src, Options1), rdf_load(Src, Options2)) :-
  223    sort(Options1, S1),
  224    sort(Options2, S2),
  225    S1 \== S2.
  226
  227report_conflicts([]).
  228report_conflicts([C1-C2|T]) :-
  229    print_message(warning, rdf(load_conflict(C1,C2))),
  230    report_conflicts(T).
  231
  232%!  no_sources_in_same_graph(+Commands) is semidet.
  233%
  234%   True if there are not two load   commands  referring to the same
  235%   graph.
  236
  237no_sources_in_same_graph(Commands) :-
  238    map_list_to_pairs(command_graph, Commands, Keyed),
  239    keysort(Keyed, KeySorted),
  240    group_pairs_by_key(KeySorted, SourcesByGraph),
  241    (   member(Graph-Sources, SourcesByGraph),
  242        Sources = [_,_|_]
  243    ->  forall(( member(Graph-Sources, SourcesByGraph),
  244                 Sources = [_,_|_]
  245               ),
  246               print_message(error,
  247                             rdf(multiple_source_for_graph(Graph, Sources)))),
  248        fail
  249    ;   true
  250    ).
  251
  252command_graph(rdf_load(_, Options), Graph) :-
  253    option(graph(Graph), Options),
  254    !.
  255command_graph(rdf_load(URL, _), URL) :- !.
  256command_graph(_, _).                    % Other command.  Each variable it its own key
  257
  258
  259%!  check_existence(+CommandsIn, -Commands, +Options) is det.
  260%
  261%   Report existence errors. Fail if at   least  one source does not
  262%   exist. and the not_found level is not =silent=.
  263%
  264%   @error existence_error(urls, ListOfUrls)
  265
  266check_existence(CommandsIn, Commands, Options) :-
  267    option(not_found(Level), Options, error),
  268    must_be(oneof([error,warning,silent]), Level),
  269    (   Level == silent
  270    ->  Commands = CommandsIn
  271    ;   missing_urls(CommandsIn, Commands, Missing),
  272        (   Missing == []
  273        ->  true
  274        ;   Level == warning
  275        ->  report_missing(Missing, Level)
  276        ;   existence_error(urls, Missing)
  277        )
  278    ).
  279
  280
  281missing_urls([], [], []).
  282missing_urls([H|T0], Cmds, Missing) :-
  283    H = rdf_load(URL, _),
  284    (   catch(exists_url(URL, _Ext), error(existence_error(_,_), _), fail)
  285    ->  Cmds = [H|T],
  286        missing_urls(T0, T, Missing)
  287    ;   Missing = [URL|T],
  288        missing_urls(T0, Cmds, T)
  289    ).
  290
  291report_missing([], _).
  292report_missing([H|T], Level) :-
  293    print_message(Level, error(existence_error(url, H), _)),
  294    report_missing(T, Level).
  295
  296%!  guess_concurrency(+Commands, -Threads) is det.
  297%
  298%   How much concurrency to use? Set to   the  number of CPUs if all
  299%   input comes from  files  or  5   if  network  based  loading  is
  300%   demanded.
  301
  302guess_concurrency(Commands, Threads) :-
  303    count_uris(Commands, FileURLs, OtherURLs),
  304    (   FileURLs > 0
  305    ->  (   current_prolog_flag(cpu_count, CPUs)
  306        ->  true
  307        ;   CPUs = 1
  308        ),
  309        FileThreads is min(FileURLs, CPUs)
  310    ;   FileThreads = 0
  311    ),
  312    (   OtherURLs > 0
  313    ->  OtherThreads is min(5, OtherURLs)
  314    ;   OtherThreads = 0
  315    ),
  316    Threads is FileThreads + OtherThreads.
  317
  318count_uris([], 0, 0).
  319count_uris([rdf_load(URL, _)|T], F, NF) :-
  320    count_uris(T, F0, NF0),
  321    (   web_url(URL)
  322    ->  NF is NF0 + 1,
  323        F = F0
  324    ;   F is F0 + 1,
  325        NF = NF0
  326    ).
  327
  328
  329%!  load_commands(+Id, +Options, -Pairs:list(Level-Command)) is det.
  330%
  331%   Commands are the RDF commands to execute for rdf_load_library/2.
  332%   Splitting  in  command  collection  and   execution  allows  for
  333%   concurrent execution as well  as   forward  checking of possible
  334%   problems.
  335%
  336%   @tbd    Fix poor style; avoid assert/retract.
  337
  338:- thread_local
  339    command/2.  340
  341load_commands(Id, Options, Commands) :-
  342    retractall(command(_,_)),
  343    rdf_update_library_index,
  344    dry_load(Id, 1, Options),
  345    findall(Level-Cmd, retract(command(Level, Cmd)), Commands).
  346
  347dry_load(Id, Level, Options) :-
  348    (   library(Id, File, Facets)
  349    ->  merge_base_uri(Facets, Options, Options1),
  350        merge_source(Facets, Options1, Options2),
  351        merge_blanks(Facets, Options2, Options3),
  352        merge_format(Facets, Options3, Options4),
  353        (   \+ memberchk(virtual, Facets)
  354        ->  load_options(Options4, File, RdfOptions),
  355            assert(command(Level, rdf_load(File, RdfOptions)))
  356        ;   assert(command(Level, virtual(File)))
  357        ),
  358        (   option(import(true), Options, true)
  359        ->  Level1 is Level + 1,
  360            forall(member(imports(Type, Import), Facets),
  361                   import(Import, Level1, [type(Type)|Options4]))
  362        ;   true
  363        )
  364    ;   existence_error(ontology, Id)
  365    ).
  366
  367merge_base_uri(Facets, Options0, Options) :-
  368    (   option(base_uri(Base), Facets)
  369    ->  exclude(name_option(base_uri), Options0, Options1),
  370        Options = [base_uri(Base)|Options1]
  371    ;   Options = Options0
  372    ).
  373
  374merge_source(Facets, Options0, Options) :-
  375    (   option(claimed_source(Base), Facets)
  376    ->  exclude(name_option(claimed_source), Options0, Options1),
  377        Options = [claimed_source(Base)|Options1]
  378    ;   Options = Options0
  379    ).
  380
  381merge_blanks(Facets, Options0, Options) :-
  382    (   option(blank_nodes(Share), Facets)
  383    ->  exclude(name_option(blank_nodes), Options0, Options1),
  384        Options = [blank_nodes(Share)|Options1]
  385    ;   Options = Options0
  386    ).
  387
  388merge_format(Facets, Options0, Options) :-
  389    (   option(format(Format), Facets)
  390    ->  exclude(name_option(format), Options0, Options1),
  391        Options = [format(Format)|Options1]
  392    ;   Options = Options0
  393    ).
  394
  395name_option(Name, Term) :-
  396    functor(Term, Name, 1).
  397
  398load_options(Options, File, RDFOptions) :-
  399    findall(O, load_option(Options, File, O), RDFOptions).
  400
  401load_option(Options, File, graph(Source)) :-
  402    option(claimed_source(Source0), Options),
  403    (   sub_atom(Source0, _, _, 0, /)
  404    ->  file_base_name(File, Base),
  405        atom_concat(Source0, Base, Source)
  406    ;   atom_concat(Source, #, Source0)
  407    ->  true
  408    ).
  409load_option(Options, File, base_uri(BaseURI)) :-
  410    option(base_uri(Base0), Options),
  411    sub_atom(/, _, _, 0, Base0),
  412    atom_concat(Base0, File, BaseURI).
  413load_option(Options, _File, blank_nodes(Share)) :-
  414    option(blank_nodes(Share), Options).
  415load_option(Options, _File, format(Format)) :-
  416    option(format(Format), Options).
  417
  418%!  import(+URL, +Level, +Options) is det.
  419
  420import(Path, Level, Options) :-
  421    option(type(data_dump), Options),
  422    !,
  423    load_options(Options, Path, RdfOptions),
  424    assert(command(Level, rdf_load(Path, RdfOptions))).
  425import(Path, Level, Options) :-
  426    (   (   library(Id, Path, _)
  427        ->  true
  428        ;   manifest_for_path(Path, Manifest),
  429            catch(exists_url(Manifest, _Ext), _, fail)
  430        ->  process_manifest(Manifest),
  431            library(Id, Path, _)
  432        )
  433    ->  dry_load(Id, Level, Options)
  434    ;   load_options(Options, Path, RdfOptions),
  435        assert(command(Level, rdf_load(Path, RdfOptions)))
  436    ).
  437
  438manifest_for_path(URL, Manifest) :-
  439    file_directory_name(URL, Parent),
  440    manifest_file(Base),
  441    rdf_extension(Ext),
  442    atomic_list_concat([Parent, /, Base, '.', Ext], Manifest).
  443
  444%!  rdf_list_library(+Id) is det.
  445%!  rdf_list_library(+Id, +Options) is det.
  446%
  447%   Print library dependency tree to the terminal.  Options include
  448%   options for rdf_load_library/2 and
  449%
  450%           * show_source(+Boolean)
  451%           If =true= (default), show location we are loading
  452%
  453%           * show_graph(+Boolean)
  454%           If =true= (default =false=), show name of graph
  455%
  456%           * show_virtual(+Boolean)
  457%           If =false= (default =true=), do not show virtual
  458%           repositories.
  459%
  460%           * indent(Atom)
  461%           Atom repeated for indentation levels
  462
  463rdf_list_library(Id) :-
  464    rdf_list_library(Id, []).
  465rdf_list_library(Id, Options) :-
  466    load_commands(Id, Options, Commands),
  467    maplist(print_load(Options), Commands).
  468
  469print_load(Options, _Level-virtual(_)) :-
  470    option(show_virtual(false), Options),
  471    !.
  472print_load(Options, Level-Command) :-
  473    option(indent(Indent), Options, '. '),
  474    forall(between(2, Level, _), format(Indent)),
  475    print_command(Command, Options),
  476    format('~N').
  477
  478print_command(virtual(URL), _Options) :-
  479    format('<~w>', [URL]).
  480print_command(rdf_load(URL), Options) :-
  481    print_command(rdf_load(URL, []), Options).
  482print_command(rdf_load(URL, RDFOptions), Options) :-
  483    (   option(show_source(true), Options, true)
  484    ->  format('~w', [URL]),
  485        (   option(blank_nodes(noshare), RDFOptions)
  486        ->  format(' <not shared>')
  487        ;   true
  488        ),
  489        (   exists_url(URL, Ext)
  490        ->  (   Ext == ''
  491            ->  true
  492            ;   format('[.~w]', [Ext])
  493            )
  494        ;   format(' [NOT FOUND]')
  495        )
  496    ;   true
  497    ),
  498    (   option(show_graph(true), Options, false),
  499        option(graph(Base), RDFOptions)
  500    ->  format('~N\tSource: ~w', [Base])
  501    ;   true
  502    ).
  503
  504exists_url(URL, Ext) :-
  505    uri_file_name(URL, Path),
  506    !,
  507    add_storage_extension(Path, Ext, PathEx),
  508    access_file(PathEx, read),
  509    !.
  510exists_url(URL, Ext) :-
  511    uri_components(URL, Components),
  512    uri_data(scheme, Components, Scheme),
  513    atom(Scheme),
  514    url_scheme(Scheme),
  515    add_storage_extension(URL, Ext, URLEx),
  516    catch(http_open(URLEx, Stream, [ method(head) ]), _, fail),
  517    !,
  518    close(Stream).
  519
  520:- multifile
  521    rdf_db:rdf_storage_encoding/2.  522
  523add_storage_extension(File, '', File).
  524add_storage_extension(File, Ext, FileEx) :-
  525    rdf_db:rdf_storage_encoding(Ext, _Format),
  526    \+ file_name_extension(_, Ext, File),
  527    file_name_extension(File, Ext, FileEx).
  528
  529url_scheme(http).
  530url_scheme(https).
  531
  532
  533%!  rdf_list_library
  534%
  535%   Prints known RDF library identifiers to current output.
  536
  537rdf_list_library :-
  538    rdf_update_library_index,
  539    (   rdf_library_index(Id, title(TitleLiteral)),
  540        plain_string(TitleLiteral, Title),
  541        format('~w ~t~20|~w', [Id, Title]),
  542        (   rdf_library_index(Id, version(Version))
  543        ->  format(' (version ~w)', [Version])
  544        ;   true
  545        ),
  546        nl,
  547        fail
  548    ;   true
  549    ).
  550
  551plain_string(String, String) :-
  552    atomic(String),
  553    !.
  554plain_string(lang(en, String), String) :- !.
  555plain_string(lang(_, String), String) :- !.
  556plain_string(type(_, String), String) :- !.
  557
  558%!  rdf_library_index(?Id, ?Facet) is nondet.
  559%
  560%   Query the content of the library.  Defined facets are:
  561%
  562%           * source(URL)
  563%           Location from which to load the ontology
  564%
  565%           * title(Atom)
  566%           Title used for the ontology
  567%
  568%           * comment(Atom)
  569%           Additional comments for the ontology
  570%
  571%           * version(Atom)
  572%           Version information on the ontology
  573%
  574%           * imports(Type, URL)
  575%           URLs needed by this ontology. May succeed multiple
  576%           times.  Type is one of =ontology=, =schema= or =instances=.
  577%
  578%           * base_uri(BaseURI)
  579%           Base URI to use when loading documents. If BaseURI
  580%           ends in =|/|=, the actual filename is attached.
  581%
  582%           * claimed_source(Source)
  583%           URL from which we claim to have loaded the RDF. If
  584%           Source ends in =|/|=, the actual filename is
  585%           attached.
  586%
  587%           * blank_nodes(Share)
  588%           Defines how equivalent blank nodes are handled, where
  589%           Share is one of =share= or =noshare=.  Default is to
  590%           share.
  591%
  592%           * format(Format)
  593%           Format of the resource.  Can be used to overrule
  594%           if the format as derived from the HTTP content type
  595%           is wrong.
  596%
  597%           * provides_ns(URL)
  598%           Ontology provides definitions in the namespace URL.
  599%           The formal definition of this is troublesome, but in
  600%           practice it means the ontology has triples whose
  601%           subjects are in the given namespace.
  602%
  603%           * uses_ns(URL)
  604%           The ontology depends on the given namespace.  Normally
  605%           means it contains triples that have predicates or
  606%           objects in the given namespace.
  607%
  608%           * manifest(URL)
  609%           URL of the manifest in which this ontology is defined.
  610%
  611%           * virtual
  612%           Entry is virtual (cannot be loaded)
  613
  614rdf_library_index(Id, Facet) :-
  615    library(Id, Path, Facets),
  616    (   Facet = source(Path)
  617    ;   member(Facet, Facets)
  618    ).
  619
  620
  621                 /*******************************
  622                 *      MANIFEST PROCESSING     *
  623                 *******************************/
  624
  625%!  rdf_attach_library(+Source)
  626%
  627%   Attach manifest from Source.  Source is one of
  628%
  629%           * URL
  630%           Load single manifest from this URL
  631%           * File
  632%           Load single manifest from this file
  633%           * Directory
  634%           Scan all subdirectories and load all =|Manifest.ttl|= or
  635%           =|Manifest.rdf|= found.  If Directory is a path-alias
  636%           (e.g., ontology(.)), _all_ referenced directories are
  637%           scanned for manifest files.
  638%
  639%   Encountered namespaces are registered   using rdf_register_ns/2.
  640%   Encountered ontologies are added to the index. If a manifest was
  641%   already loaded it will be reloaded  if the modification time has
  642%   changed.
  643
  644rdf_attach_library(URL) :-
  645    atom(URL),
  646    uri_is_global(URL),
  647    \+ is_absolute_file_name(URL),   % avoid interpreting C: as a schema
  648    !,
  649    process_manifest(URL).
  650rdf_attach_library(File) :-
  651    absolute_file_name(File, Path,
  652                       [ extensions([rdf,ttl]),
  653                         access(read),
  654                         file_errors(fail)
  655                       ]),
  656    !,
  657    process_manifest(Path).
  658rdf_attach_library(Dir) :-
  659    forall(absolute_file_name(Dir, Path,
  660                              [ file_type(directory),
  661                                access(read),
  662                                solutions(all)
  663                              ]),
  664           attach_dir(Path, [])).
  665
  666
  667%!  rdf_update_library_index
  668%
  669%   Reload all Manifest files.
  670
  671rdf_update_library_index :-
  672    forall(manifest(Location, _Time),
  673           process_manifest(Location)).
  674
  675attach_dir(Path, Visited) :-
  676    memberchk(Path, Visited),
  677    !.
  678attach_dir(Path, Visited) :-
  679    atom_concat(Path, '/*', Pattern),
  680    expand_file_name(Pattern, Members),
  681    (   manifest_file(MBase),
  682        rdf_extension(Ext),
  683        atomic_list_concat([Path, /, MBase, '.', Ext], Manifest),
  684        exists_file(Manifest)
  685    ->  process_manifest(Manifest)
  686    ;   print_message(silent, rdf(no_manifest(Path)))
  687    ),
  688    (   member(Dir, Members),
  689        exists_directory(Dir),
  690        file_base_name(Dir, Base),
  691        \+ hidden_base(Base),
  692        attach_dir(Dir, [Path|Visited]),
  693        fail ; true
  694    ).
  695
  696hidden_base('CVS').
  697hidden_base('cvs').                     % Windows
  698
  699%!  process_manifest(+Location) is det.
  700%
  701%   Process a manifest file, registering  encountered namespaces and
  702%   creating clauses for library/3. No op if manifest was loaded and
  703%   not changed. Removes old data if the manifest was changed.
  704%
  705%   @param  Location is either a path name or a URL.
  706
  707process_manifest(Source) :-
  708    (   web_url(Source)
  709    ->  uri_normalized(Source, Manifest)
  710    ;   uri_file_name(Source, Manifest0)
  711    ->  absolute_file_name(Manifest0, ManifestFile),
  712        uri_file_name(Manifest, ManifestFile)
  713    ;   absolute_file_name(Source, ManifestFile),
  714        uri_file_name(Manifest, ManifestFile)
  715    ),                              % Manifest is a canonical URI
  716    source_time(Manifest, MT),
  717    (   manifest(Manifest, Time),
  718        (   MT =< Time
  719        ->  !
  720        ;   retractall(manifest(Manifest, Time)),
  721            library_db(Id, URL, Facets),
  722            memberchk(manifest(Manifest), Facets),
  723            retractall(library_db(Id, URL, Facets)),
  724            fail
  725        )
  726    ;   read_triples(Manifest, Triples),
  727        process_triples(Manifest, Triples),
  728        print_message(informational, rdf(manifest(loaded, Manifest))),
  729        assert(manifest(Manifest, MT))
  730    ).
  731
  732process_triples(Manifest, Triples) :-
  733    findall(ns(Mnemonic, NameSpace),
  734            extract_namespace(Triples, Mnemonic, NameSpace),
  735            NameSpaces),
  736    findall(Ontology,
  737            extract_ontology(Triples, Ontology),
  738            Ontologies),
  739    maplist(define_namespace, NameSpaces),
  740    maplist(assert_ontology(Manifest), Ontologies).
  741
  742%!  extract_namespace(+Triples, -Mnemonic, -NameSpace)
  743%
  744%   True if Mnemonic is an abbreviation of NameSpace.
  745
  746extract_namespace(Triples, Mnemonic, Namespace) :-
  747    edge(Triples, Decl, lib:mnemonic, literal(Mnemonic)),
  748    edge(Triples, Decl, lib:namespace, Namespace).
  749extract_namespace(Triples, Mnemonic, Namespace) :-
  750    edge(Triples, Decl, vann:preferredNamespacePrefix, literal(Mnemonic)),
  751    edge(Triples, Decl, vann:preferredNamespaceUri, literal(Namespace)).
  752
  753%!  extract_ontology(+Triples, -Ontology) is nondet.
  754%
  755%   Extract definition of an ontology
  756
  757extract_ontology(Triples, library(Name, URL, Options)) :-
  758    distinct(URL, ontology(Triples, URL)),
  759    file_base_name(URL, BaseName),
  760    file_name_extension(Name, _, BaseName),
  761    findall(Facet, facet(Triples, URL, Facet), Options0),
  762    sort(Options0, Options1),
  763    keep_specialized_facets(Options1, Options).
  764
  765ontology(Triples, URL) :-
  766    edge(Triples, URL, rdf:type, Type),
  767    ontology_type(Type).
  768
  769keep_specialized_facets(All, Special) :-
  770    exclude(more_general(All), All, Special).
  771
  772more_general(All, Facet) :-
  773    generalized(Facet, Special),
  774    memberchk(Special, All).
  775
  776generalized(imports(ontology, Path), imports(Other, Path)) :-
  777    dif(Other, ontology).
  778
  779ontology_type(X) :-
  780    (   rdf_equal(X, lib:'Ontology')
  781    ;   rdf_equal(X, lib:'Schema')
  782    ;   rdf_equal(X, lib:'Instances')
  783    ;   rdf_equal(X, void:'Dataset')
  784    ;   rdf_equal(X, void:'Linkset')
  785    ).
  786
  787%!  facet(+Triples, +File, -Facet) is nondet.
  788%
  789%   Enumerate facets about File from   Triples. Facets are described
  790%   with rdf_library_index/2.
  791
  792facet(Triples, File, title(Title)) :-
  793    edge(Triples, File, dcterms:title, literal(Title)).
  794facet(Triples, File, version(Version)) :-
  795    edge(Triples, File, owl:versionInfo, literal(Version)).
  796facet(Triples, File, comment(Comment)) :-
  797    edge(Triples, File, rdfs:comment, literal(Comment)).
  798facet(Triples, File, base_uri(BaseURI)) :-
  799    edge(Triples, File, lib:baseURI, BaseURI).
  800facet(Triples, File, claimed_source(Source)) :-
  801    edge(Triples, File, lib:source, Source).
  802facet(Triples, File, format(Format)) :-
  803    edge(Triples, File, lib:format, literal(Format)).
  804facet(Triples, File, blank_nodes(Mode)) :-
  805    edge(Triples, File, lib:blankNodes, literal(Mode)),
  806    must_be(oneof([share,noshare]), Mode).
  807facet(Triples, File, imports(ontology, Path)) :-
  808    edge(Triples, File, owl:imports, Path).
  809facet(Triples, File, imports(schema, Path)) :-
  810    edge(Triples, File, lib:schema, Path).
  811facet(Triples, File, imports(instances, Path)) :-
  812    edge(Triples, File, lib:instances, Path).
  813facet(Triples, File, imports(subset, Path)) :-
  814    edge(Triples, File, void:subset, Path).
  815facet(Triples, File, imports(data_dump, Path)) :-
  816    edge(Triples, File, void:dataDump, Path).
  817facet(Triples, File, provides_ns(NS)) :-
  818    edge(Triples, File, lib:providesNamespace, NSDecl),
  819    edge(Triples, NSDecl, lib:namespace, NS).
  820facet(Triples, File, uses_ns(NS)) :-
  821    edge(Triples, File, lib:usesNamespace, NSDecl),
  822    edge(Triples, NSDecl, lib:namespace, NS).
  823facet(Triples, File, virtual) :-
  824    (   edge(Triples, File, rdf:type, lib:'Virtual')
  825    ;   edge(Triples, File, rdf:type, void:'Dataset')
  826    ;   edge(Triples, File, rdf:type, void:'Linkset')
  827    ) -> true.
  828
  829%!  edge(+Triples, ?S, ?P, ?O) is nondet.
  830%
  831%   Like rdf_has/3 over a list of Triples.
  832
  833edge(Triples, S, P, O) :-
  834    nonvar(P),
  835    !,
  836    sub_p(SubP, P),
  837    member(rdf(S,SubP,O), Triples).
  838edge(Triples, S, P, O) :-
  839    member(rdf(S,SubP,O), Triples),
  840    sub_p(SubP, P).
  841
  842sub_p(P, P).
  843sub_p(Sub, P) :-
  844    (   nonvar(Sub)
  845    ->  sub_property_of(Sub, Sub1),
  846        sub_p(Sub1, P)
  847    ;   sub_property_of(Sub1, P),
  848        sub_p(Sub, Sub1)
  849    ).
  850
  851:- rdf_meta
  852    sub_property_of(r,r).  853
  854sub_property_of(void:subset,         owl:imports).
  855sub_property_of(dcterms:description, rdfs:comment).
  856sub_property_of(void:dataDump,       owl:imports).
  857sub_property_of(dc:title,            dcterms:title).
  858
  859%!  source_time(+Source, -Modified) is semidet.
  860%
  861%   Modified is the last modification time of Source.
  862%
  863%   @error  existence_error(Type, Source).
  864
  865source_time(URL, Modified) :-
  866    web_url(URL),
  867    !,
  868    http_open(URL, Stream,
  869              [ header(last_modified, Date),
  870                method(head)
  871              ]),
  872    close(Stream),
  873    Date \== '',
  874    parse_time(Date, Modified).
  875source_time(URL, Modified) :-
  876    uri_file_name(URL, File),
  877    !,
  878    time_file(File, Modified).
  879source_time(File, Modified) :-
  880    time_file(File, Modified).
  881
  882web_url(URL) :-
  883    sub_atom(URL, 0, _, _, 'http://').
  884
  885
  886%!  read_triples(+URL, -Triples) is det.
  887%
  888%   Read RDF/XML or Turtle file into a list of triples.
  889
  890read_triples(FileURL, Triples) :-
  891    uri_file_name(FileURL, File),
  892    !,
  893    (   file_name_extension(_, rdf, File)
  894    ->  load_rdf(File, Triples)
  895    ;   rdf_load_turtle(File, Triples, [])
  896    ).
  897read_triples(HTTPURL, Triples) :-
  898    file_name_extension(_, Ext, HTTPURL),
  899    setup_call_cleanup(
  900        http_open(HTTPURL, In, []),
  901        stream_triples(In, Ext, Triples),
  902        close(In)).
  903
  904stream_triples(Stream, rdf, Triples) :-
  905    load_rdf(stream(Stream), Triples).
  906stream_triples(Stream, ttl, Triples) :-
  907    rdf_load_turtle(stream(Stream), Triples, []).
  908
  909
  910manifest_file('void').                  % make order optional?
  911manifest_file('Manifest').
  912manifest_file('manifest').
  913
  914rdf_extension(ttl).
  915rdf_extension(rdf).
  916
  917
  918%!  assert_ontology(+Manifest, +Term:library(Name, File, Facets)) is det.
  919%
  920%   Add ontology to our library.
  921%
  922%   @tbd    Proper behaviour of re-definition?
  923
  924assert_ontology(Manifest, Term) :-
  925    Term = library(Name, URL, Facets),
  926    (   library(Name, _URL2, Facets2)
  927    ->  memberchk(manifest(Manifest2), Facets2),
  928        print_message(warning, rdf(redefined(Manifest, Name, Manifest2)))
  929    ;   true
  930    ),
  931    assert(library_db(Name, URL,
  932                   [ manifest(Manifest)
  933                   | Facets
  934                   ])).
  935
  936
  937%!  library(?Id, ?URL, ?Facets)
  938%
  939%   Access DB for library information.
  940
  941library(Id, URL, Facets) :-
  942    nonvar(URL),
  943    normalize_url(URL, CanonicalURL),
  944    library_db(Id, CanonicalURL, Facets).
  945library(Id, URL, Facets) :-
  946    library_db(Id, URL, Facets).
  947
  948%!  normalize_url(+URL, -Normalized)
  949%
  950%   Like uri_normalized/2, but we  also   need  (platform dependent)
  951%   filename canonization.
  952
  953normalize_url(URL, CanonicalURL) :-
  954    uri_file_name(URL, File),
  955    !,
  956    absolute_file_name(File, CanFile),
  957    uri_file_name(CanonicalURL, CanFile).
  958normalize_url(URL, CanonicalURL) :-
  959    uri_normalized(URL, CanonicalURL).
  960
  961%!  define_namespace(NS:ns(Mnemonic, Namespace)) is det.
  962%
  963%   Add namespace declaration for Mnemonic.
  964
  965define_namespace(ns(Mnemonic, Namespace)) :-
  966    debug(rdf_library, 'Adding NS ~w = ~q', [Mnemonic, Namespace]),
  967    rdf_register_ns(Mnemonic, Namespace,
  968                    [
  969                        ]).
  970
  971%!  rdf_current_manifest(-URL) is nondet.
  972%
  973%   True if URL is the URL of a currently loaded manifest file.
  974
  975rdf_current_manifest(URL) :-
  976    manifest(URL, _Time).
  977
  978
  979
  980                 /*******************************
  981                 *            MESSAGES          *
  982                 *******************************/
  983
  984:- multifile
  985    prolog:message/3.  986
  987prolog:message(rdf(no_manifest(Path))) -->
  988    [ 'Directory ~w has no Manifest.{ttl,rdf} file'-[Path] ].
  989prolog:message(rdf(redefined(Manifest, Name, Manifest2))) -->
  990    [ '~w: Ontology ~w already defined in ~w'-
  991      [Manifest, Name, Manifest2]
  992    ].
  993prolog:message(rdf(manifest(loaded, Manifest))) -->
  994    [ 'Loaded RDF manifest ~w'-[Manifest]
  995    ].
  996prolog:message(rdf(load_conflict(C1, C2))) -->
  997    [ 'Conflicting loads: ~p <-> ~p'-[C1, C2] ].
  998prolog:message(rdf(multiple_source_for_graph(Graph, Sources))) -->
  999    [ 'Multiple sources for graph ~p:'-[Graph] ],
 1000    sources(Sources).
 1001prolog:message(rdf(loading(Files, Threads))) -->
 1002    [ 'Loading ~D files using ~D threads ...'-[Files, Threads] ].
 1003
 1004sources([]) --> [].
 1005sources([rdf_load(From, _Options)|T]) -->
 1006    [ nl, '\t~p'-[From] ],
 1007    sources(T)