View source with formatted comments or as raw
    1/*  Part of SWI-Prolog
    2
    3    Author:        Jan Wielemaker
    4    E-mail:        J.Wielemaker@vu.nl
    5    WWW:           http://www.swi-prolog.org
    6    Copyright (c)  2010-2013, University of Amsterdam
    7    All rights reserved.
    8
    9    Redistribution and use in source and binary forms, with or without
   10    modification, are permitted provided that the following conditions
   11    are met:
   12
   13    1. Redistributions of source code must retain the above copyright
   14       notice, this list of conditions and the following disclaimer.
   15
   16    2. Redistributions in binary form must reproduce the above copyright
   17       notice, this list of conditions and the following disclaimer in
   18       the documentation and/or other materials provided with the
   19       distribution.
   20
   21    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
   22    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
   23    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
   24    FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
   25    COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
   26    INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
   27    BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
   28    LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
   29    CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   30    LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
   31    ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
   32    POSSIBILITY OF SUCH DAMAGE.
   33*/
   34
   35:- module(rdf_triple,
   36          [ rdf_triples/2,              % +Parsed, -Tripples
   37            rdf_triples/3,              % +Parsed, -Tripples, +Tail
   38            rdf_reset_ids/0,            % Reset gensym id's
   39            rdf_start_file/2,           % +Options, -Cleanup
   40            rdf_end_file/1,             % +Cleanup
   41            anon_prefix/1               % Prefix for anonynmous resources
   42          ]).   43:- use_module(library(gensym)).   44:- use_module(library(option)).   45:- use_module(library(uri)).   46:- use_module(rdf_parser).   47
   48:- predicate_options(rdf_start_file/2, 1,
   49                     [ base_uri(atom),
   50                       blank_nodes(oneof([share,noshare]))
   51                     ]).   52
   53/** <module> Create triples from intermediate representation
   54
   55Convert the output of xml_to_rdf/3  from   library(rdf)  into  a list of
   56triples of the format described   below. The intermediate representation
   57should be regarded a proprietary representation.
   58
   59        rdf(Subject, Predicate, Object).
   60
   61Where `Subject' is
   62
   63        * Atom
   64        The subject is a resource
   65
   66        * each(URI)
   67        URI is the URI of an RDF Bag
   68
   69        * prefix(Pattern)
   70        Pattern is the prefix of a fully qualified Subject URI
   71
   72And `Predicate' is
   73
   74        * Atom
   75        The predicate is always a resource
   76
   77And `Object' is
   78
   79        * Atom
   80        URI of Object resource
   81
   82        * literal(Value)
   83        Literal value (Either a single atom or parsed XML data)
   84*/
   85
   86%!  rdf_triples(+Term, -Triples) is det.
   87%!  rdf_triples(+Term, -Tridpples, +Tail) is det.
   88%
   89%   Convert an object as parsed by rdf.pl into a list of rdf/3
   90%   triples.  The identifier of the main object created is returned
   91%   by rdf_triples/3.
   92%
   93%   Input is the `content' of the RDF element in the format as
   94%   generated by load_structure(File, Term, [dialect(xmlns)]).
   95%   rdf_triples/3 can process both individual descriptions as
   96%   well as the entire content-list of an RDF element.  The first
   97%   mode is suitable when using library(sgml) in `call-back' mode.
   98
   99rdf_triples(RDF, Tripples) :-
  100    rdf_triples(RDF, Tripples, []).
  101
  102rdf_triples([]) -->
  103    !,
  104    [].
  105rdf_triples([H|T]) -->
  106    !,
  107    rdf_triples(H),
  108    rdf_triples(T).
  109rdf_triples(Term) -->
  110    triples(Term, _).
  111
  112%!  triples(-Triples, -Id, +In, -Tail)
  113%
  114%   DGC set processing the output of  xml_to_rdf/3. Id is unified to
  115%   the identifier of the main description.
  116
  117triples(description(Type, About, Props), Subject) -->
  118    { var(About),
  119      share_blank_nodes(true)
  120    },
  121    !,
  122    (   { shared_description(description(Type, Props), Subject)
  123        }
  124    ->  []
  125    ;   { make_id('_:Description', Id)
  126        },
  127        triples(description(Type, about(Id), Props), Subject),
  128        { assert_shared_description(description(Type, Props), Subject)
  129        }
  130    ).
  131triples(description(description, IdAbout, Props), Subject) -->
  132    !,
  133    { description_id(IdAbout, Subject)
  134    },
  135    properties(Props, Subject).
  136triples(description(TypeURI, IdAbout, Props), Subject) -->
  137    { description_id(IdAbout, Subject)
  138    },
  139    properties([ rdf:type = TypeURI
  140               | Props
  141               ], Subject).
  142triples(unparsed(Data), Id) -->
  143    { make_id('_:Error', Id),
  144      print_message(error, rdf(unparsed(Data)))
  145    },
  146    [].
  147
  148
  149                 /*******************************
  150                 *          DESCRIPTIONS        *
  151                 *******************************/
  152
  153:- thread_local
  154    node_id/2,                      % nodeID --> ID
  155    unique_id/1.                    % known rdf:ID
  156
  157rdf_reset_node_ids :-
  158    retractall(node_id(_,_)),
  159    retractall(unique_id(_)).
  160
  161description_id(Id, Id) :-
  162    var(Id),
  163    !,
  164    make_id('_:Description', Id).
  165description_id(about(Id), Id).
  166description_id(id(Id), Id) :-
  167    (   unique_id(Id)
  168    ->  print_message(error, rdf(redefined_id(Id)))
  169    ;   assert(unique_id(Id))
  170    ).
  171description_id(each(Id), each(Id)).
  172description_id(prefix(Id), prefix(Id)).
  173description_id(node(NodeID), Id) :-
  174    (   node_id(NodeID, Id)
  175    ->  true
  176    ;   make_id('_:Node', Id),
  177        assert(node_id(NodeID, Id))
  178    ).
  179
  180properties(PlRDF, Subject) -->
  181    properties(PlRDF, 1, [], [], Subject).
  182
  183properties([], _, Bag, Bag, _) -->
  184    [].
  185properties([H0|T0], N, Bag0, Bag, Subject) -->
  186    property(H0, N, NN, Bag0, Bag1, Subject),
  187    properties(T0, NN, Bag1, Bag, Subject).
  188
  189%!  property(Property, N, NN, Subject)// is det.
  190%
  191%   Generate triples for {Subject,  Pred,   Object}.  Also generates
  192%   triples for Object if necessary.
  193%
  194%   @param Property One of
  195%
  196%           * Pred = Object
  197%           Used for normal statements
  198%           * id(Id, Pred = Object)
  199%           Used for reified statements
  200
  201property(Pred0 = Object, N, NN, BagH, BagT, Subject) --> % inlined object
  202    triples(Object, Id),
  203    !,
  204    { li_pred(Pred0, Pred, N, NN)
  205    },
  206    statement(Subject, Pred, Id, _, BagH, BagT).
  207property(Pred0 = collection(Elems), N, NN, BagH, BagT, Subject) -->
  208    !,
  209    { li_pred(Pred0, Pred, N, NN)
  210    },
  211    statement(Subject, Pred, Object, _Id, BagH, BagT),
  212    collection(Elems, Object).
  213property(Pred0 = Object, N, NN, BagH, BagT, Subject) -->
  214    !,
  215    { li_pred(Pred0, Pred, N, NN)
  216    },
  217    statement(Subject, Pred, Object, _Id, BagH, BagT).
  218property(id(Id, Pred0 = Object), N, NN, BagH, BagT, Subject) -->
  219    triples(Object, ObjectId),
  220    !,
  221    { li_pred(Pred0, Pred, N, NN)
  222    },
  223    statement(Subject, Pred, ObjectId, Id, BagH, BagT).
  224property(id(Id, Pred0 = collection(Elems)), N, NN, BagH, BagT, Subject) -->
  225    !,
  226    { li_pred(Pred0, Pred, N, NN)
  227    },
  228    statement(Subject, Pred, Object, Id, BagH, BagT),
  229    collection(Elems, Object).
  230property(id(Id, Pred0 = Object), N, NN, BagH, BagT, Subject) -->
  231    { li_pred(Pred0, Pred, N, NN)
  232    },
  233    statement(Subject, Pred, Object, Id, BagH, BagT).
  234
  235%!  statement(+Subject, +Pred, +Object, +Id, +BagH, -BagT)
  236%
  237%   Add a statement to the model. If nonvar(Id), we reinify the
  238%   statement using the given Id.
  239
  240statement(Subject, Pred, Object, Id, BagH, BagT) -->
  241    rdf(Subject, Pred, Object),
  242    {   BagH = [Id|BagT]
  243    ->  statement_id(Id)
  244    ;   BagT = BagH
  245    },
  246    (   { nonvar(Id)
  247        }
  248    ->  rdf(Id, rdf:type, rdf:'Statement'),
  249        rdf(Id, rdf:subject, Subject),
  250        rdf(Id, rdf:predicate, Pred),
  251        rdf(Id, rdf:object, Object)
  252    ;   []
  253    ).
  254
  255
  256statement_id(Id) :-
  257    nonvar(Id),
  258    !.
  259statement_id(Id) :-
  260    make_id('_:Statement', Id).
  261
  262%!  li_pred(+Pred, -Pred, +Nth, -NextNth)
  263%
  264%   Transform rdf:li predicates into _1, _2, etc.
  265
  266li_pred(rdf:li, rdf:Pred, N, NN) :-
  267    !,
  268    NN is N + 1,
  269    atom_concat('_', N, Pred).
  270li_pred(Pred, Pred, N, N).
  271
  272%!  collection(+Elems, -Id)
  273%
  274%   Handle the elements of a collection and return the identifier
  275%   for the whole collection in Id.
  276
  277collection([], Nil) -->
  278    { global_ref(rdf:nil, Nil)
  279    }.
  280collection([H|T], Id) -->
  281    triples(H, HId),
  282    { make_id('_:List', Id)
  283    },
  284    rdf(Id, rdf:type, rdf:'List'),
  285    rdf(Id, rdf:first, HId),
  286    rdf(Id, rdf:rest, TId),
  287    collection(T, TId).
  288
  289
  290rdf(S0, P0, O0) -->
  291    { global_ref(S0, S),
  292      global_ref(P0, P),
  293      global_obj(O0, O)
  294    },
  295    [ rdf(S, P, O) ].
  296
  297
  298global_ref(In, Out) :-
  299    (   nonvar(In),
  300        In = NS:Local
  301    ->  (   NS == rdf,
  302            rdf_name_space(RDF)
  303        ->  atom_concat(RDF, Local, Out)
  304        ;   atom_concat(NS, Local, Out0),
  305            iri_normalized(Out0, Out)
  306        )
  307    ;   Out = In
  308    ).
  309
  310global_obj(V, V) :-
  311    var(V),
  312    !.
  313global_obj(literal(type(Local, X)), literal(type(Global, X))) :-
  314    !,
  315    global_ref(Local, Global).
  316global_obj(literal(X), literal(X)) :- !.
  317global_obj(Local, Global) :-
  318    global_ref(Local, Global).
  319
  320
  321                 /*******************************
  322                 *             SHARING          *
  323                 *******************************/
  324
  325:- thread_local
  326    shared_description/3,           % +Hash, +Term, -Subject
  327    share_blank_nodes/1,            % Boolean
  328    shared_nodes/1.                 % counter
  329
  330reset_shared_descriptions :-
  331    retractall(shared_description(_,_,_)),
  332    retractall(shared_nodes(_)).
  333
  334shared_description(Term, Subject) :-
  335    term_hash(Term, Hash),
  336    shared_description(Hash, Term, Subject),
  337    (   retract(shared_nodes(N))
  338    ->  N1 is N + 1
  339    ;   N1 = 1
  340    ),
  341    assert(shared_nodes(N1)).
  342
  343
  344assert_shared_description(Term, Subject) :-
  345    term_hash(Term, Hash),
  346    assert(shared_description(Hash, Term, Subject)).
  347
  348
  349                 /*******************************
  350                 *            START/END         *
  351                 *******************************/
  352
  353%!  rdf_start_file(+Options, -Cleanup) is det.
  354%
  355%   Initialise for the translation of a file.
  356
  357rdf_start_file(Options, Cleanup) :-
  358    rdf_reset_node_ids,             % play safe
  359    reset_shared_descriptions,
  360    set_bnode_sharing(Options, C1),
  361    set_anon_prefix(Options, C2),
  362    add_cleanup(C1, C2, Cleanup).
  363
  364%!  rdf_end_file(:Cleanup) is det.
  365%
  366%   Cleanup reaching the end of an RDF file.
  367
  368rdf_end_file(Cleanup) :-
  369    rdf_reset_node_ids,
  370    (   shared_nodes(N)
  371    ->  print_message(informational, rdf(shared_blank_nodes(N)))
  372    ;   true
  373    ),
  374    reset_shared_descriptions,
  375    Cleanup.
  376
  377set_bnode_sharing(Options, erase(Ref)) :-
  378    option(blank_nodes(Share), Options, noshare),
  379    (   Share == share
  380    ->  assert(share_blank_nodes(true), Ref), !
  381    ;   Share == noshare
  382    ->  fail                        % next clause
  383    ;   throw(error(domain_error(share, Share), _))
  384    ).
  385set_bnode_sharing(_, true).
  386
  387set_anon_prefix(Options, erase(Ref)) :-
  388    option(base_uri(BaseURI), Options),
  389    nonvar(BaseURI),
  390    !,
  391    (   BaseURI == []
  392    ->  AnonBase = '_:'
  393    ;   atomic_list_concat(['_:', BaseURI, '#'], AnonBase)
  394    ),
  395    asserta(anon_prefix(AnonBase), Ref).
  396set_anon_prefix(_, true).
  397
  398add_cleanup(true, X, X) :- !.
  399add_cleanup(X, true, X) :- !.
  400add_cleanup(X, Y, (X, Y)).
  401
  402
  403                 /*******************************
  404                 *             UTIL             *
  405                 *******************************/
  406
  407%!  anon_prefix(-Prefix) is semidet.
  408%
  409%   If defined, it is the prefix used to generate a blank node.
  410
  411:- thread_local
  412    anon_prefix/1.  413
  414make_id(For, ID) :-
  415    anon_prefix(Prefix),
  416    !,
  417    atom_concat(Prefix, For, Base),
  418    gensym(Base, ID).
  419make_id(For, ID) :-
  420    gensym(For, ID).
  421
  422anon_base('_:Description').
  423anon_base('_:Statement').
  424anon_base('_:List').
  425anon_base('_:Node').
  426
  427%!  rdf_reset_ids is det.
  428%
  429%   Utility predicate to reset the gensym counters for the various
  430%   generated identifiers.  This simplifies debugging and matching
  431%   output with the stored desired output (see rdf_test.pl).
  432
  433rdf_reset_ids :-
  434    anon_prefix(Prefix),
  435    !,
  436    (   anon_base(Base),
  437        atom_concat(Prefix, Base, X),
  438        reset_gensym(X),
  439        fail
  440    ;   true
  441    ).
  442rdf_reset_ids :-
  443    (   anon_base(Base),
  444        reset_gensym(Base),
  445        fail
  446    ;   true
  447    )