View source with formatted comments or as raw
    1/*  Part of SWI-Prolog
    2
    3    Author:        Jan Wielemaker
    4    E-mail:        J.Wielemaker@vu.nl
    5    WWW:           http://www.swi-prolog.org
    6    Copyright (c)  2010-2015, University of Amsterdam
    7    All rights reserved.
    8
    9    Redistribution and use in source and binary forms, with or without
   10    modification, are permitted provided that the following conditions
   11    are met:
   12
   13    1. Redistributions of source code must retain the above copyright
   14       notice, this list of conditions and the following disclaimer.
   15
   16    2. Redistributions in binary form must reproduce the above copyright
   17       notice, this list of conditions and the following disclaimer in
   18       the documentation and/or other materials provided with the
   19       distribution.
   20
   21    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
   22    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
   23    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
   24    FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
   25    COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
   26    INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
   27    BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
   28    LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
   29    CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   30    LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
   31    ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
   32    POSSIBILITY OF SUCH DAMAGE.
   33*/
   34
   35:- module(rdf,
   36          [ load_rdf/2,                 % +File, -Triples
   37            load_rdf/3,                 % +File, -Triples, :Options
   38            xml_to_rdf/3,               % +XML, -Triples, +Options
   39            process_rdf/3               % +File, :OnTriples, :Options
   40          ]).   41
   42:- meta_predicate
   43    load_rdf(+, -, :),
   44    process_rdf(+, :, :).   45
   46:- use_module(library(sgml)).           % Basic XML loading
   47:- use_module(library(option)).         % option/3
   48:- use_module(library(lists)).   49:- use_module(rdf_parser).              % Basic parser
   50:- use_module(rdf_triple).              % Generate triples
   51
   52%!  load_rdf(+File, -Triples) is det.
   53%!  load_rdf(+File, -Triples, :Options) is det.
   54%
   55%   Parse an XML file holding an RDF term into a list of RDF triples.
   56%   see rdf_triple.pl for a definition of the output format. Options:
   57%
   58%           * base_uri(+URI)
   59%           URI to use as base
   60%
   61%           * expand_foreach(+Bool)
   62%           Apply each(Container, Pred, Object) on the members of
   63%           Container
   64%
   65%           * namespaces(-Namespaces:list(NS=URL))
   66%           Return list of namespaces declared using xmlns:NS=URL in
   67%           the document.  This can be used to update the namespace
   68%           list with rdf_register_ns/2.
   69%
   70%   @see    Use process_rdf/3 for processing large documents in
   71%           _|call-back|_ style.
   72
   73load_rdf(File, Triples) :-
   74    load_rdf(File, Triples, []).
   75
   76load_rdf(File, Triples, M:Options0) :-
   77    entity_options(Options0, EntOptions, Options1),
   78    meta_options(load_meta_option, M:Options1, Options),
   79    init_ns_collect(Options, NSList),
   80    load_structure(File,
   81                   [ RDFElement
   82                   ],
   83                   [ dialect(xmlns),
   84                     space(sgml),
   85                     call(xmlns, rdf:on_xmlns)
   86                   | EntOptions
   87                   ]),
   88    rdf_start_file(Options, Cleanup),
   89    call_cleanup(xml_to_rdf(RDFElement, Triples0, Options),
   90                 rdf_end_file(Cleanup)),
   91    exit_ns_collect(NSList),
   92    post_process(Options, Triples0, Triples).
   93
   94entity_options([], [], []).
   95entity_options([H|T0], Entities, Rest) :-
   96    (   H = entity(_,_)
   97    ->  Entities = [H|ET],
   98        entity_options(T0, ET, Rest)
   99    ;   Rest = [H|RT],
  100        entity_options(T0, Entities, RT)
  101    ).
  102
  103load_meta_option(convert_typed_literal).
  104
  105%!  xml_to_rdf(+XML, -Triples, +Options)
  106
  107xml_to_rdf(XML, Triples, Options) :-
  108    is_list(Options),
  109    !,
  110    make_rdf_state(Options, State, _),
  111    xml_to_plrdf(XML, RDF, State),
  112    rdf_triples(RDF, Triples).
  113xml_to_rdf(XML, BaseURI, Triples) :-
  114    atom(BaseURI),
  115    !,
  116    xml_to_rdf(XML, Triples, [base_uri(BaseURI)]).
  117
  118
  119                 /*******************************
  120                 *       POST-PROCESSING        *
  121                 *******************************/
  122
  123post_process([], Triples, Triples).
  124post_process([expand_foreach(true)|T], Triples0, Triples) :-
  125    !,
  126    expand_each(Triples0, Triples1),
  127    post_process(T, Triples1, Triples).
  128post_process([_|T], Triples0, Triples) :-
  129    !,
  130    post_process(T, Triples0, Triples).
  131
  132
  133                 /*******************************
  134                 *            EXPAND            *
  135                 *******************************/
  136
  137expand_each(Triples0, Triples) :-
  138    select(rdf(each(Container), Pred, Object),
  139           Triples0, Triples1),
  140    !,
  141    each_triples(Triples1, Container, Pred, Object, Triples2),
  142    expand_each(Triples2, Triples).
  143expand_each(Triples, Triples).
  144
  145each_triples([], _, _, _, []).
  146each_triples([H0|T0], Container, P, O,
  147             [H0, rdf(S,P,O)|T]) :-
  148    H0 = rdf(Container, rdf:A, S),
  149    member_attribute(A),
  150    !,
  151    each_triples(T0, Container, P, O, T).
  152each_triples([H|T0], Container, P, O, [H|T]) :-
  153    each_triples(T0, Container, P, O, T).
  154
  155member_attribute(A) :-
  156    sub_atom(A, 0, _, _, '_').      % must check number?
  157
  158
  159                 /*******************************
  160                 *           BIG FILES          *
  161                 *******************************/
  162
  163%!  process_rdf(+Input, :OnObject, :Options)
  164%
  165%   Process RDF from Input. Input is either an atom or a term of the
  166%   format stream(Handle). For each   encountered  description, call
  167%   OnObject(+Triples) to handle the  triples   resulting  from  the
  168%   description. Defined Options are:
  169%
  170%           * base_uri(+URI)
  171%           Determines the reference URI.
  172%
  173%           * db(DB)
  174%           When loading from a stream, the source is taken from
  175%           this option or -if non-existent- from base_uri.
  176%
  177%           * lang(LanguageID)
  178%           Set initial language (as xml:lang)
  179%
  180%           * convert_typed_literal(:Convertor)
  181%           Call Convertor(+Type, +Content, -RDFObject) to create
  182%           a triple rdf(S, P, RDFObject) instead of rdf(S, P,
  183%           literal(type(Type, Content)).
  184%
  185%           *  namespaces(-Namespaces:list(NS=URL))
  186%           Return list of namespaces declared using xmlns:NS=URL in
  187%           the document.  This can be used to update the namespace
  188%           list with rdf_register_ns/2.
  189%
  190%           * entity(Name, Value)
  191%           Overrule entity values found in the file
  192%
  193%           * embedded(Boolean)
  194%           If =true=, do not give warnings if rdf:RDF is embedded
  195%           in other XML data.
  196
  197process_rdf(File, OnObject, M:Options0) :-
  198    is_list(Options0),
  199    !,
  200    entity_options(Options0, EntOptions, Options1),
  201    meta_options(load_meta_option, M:Options1, Options2),
  202    option(base_uri(BaseURI), Options2, ''),
  203    rdf_start_file(Options2, Cleanup),
  204    strip_module(OnObject, Module, Pred),
  205    b_setval(rdf_object_handler, Module:Pred),
  206    nb_setval(rdf_options, Options2),
  207    nb_setval(rdf_state, -),
  208    init_ns_collect(Options2, NSList),
  209    (   File = stream(In)
  210    ->  Source = BaseURI
  211    ;   is_stream(File)
  212    ->  In = File,
  213        option(graph(Source), Options2, BaseURI)
  214    ;   open(File, read, In, [type(binary)]),
  215        Close = In,
  216        Source = File
  217    ),
  218    new_sgml_parser(Parser, [dtd(DTD)]),
  219    def_entities(EntOptions, DTD),
  220    (   Source \== []
  221    ->  set_sgml_parser(Parser, file(Source))
  222    ;   true
  223    ),
  224    set_sgml_parser(Parser, dialect(xmlns)),
  225    set_sgml_parser(Parser, space(sgml)),
  226    do_process_rdf(Parser, In, NSList, Close, Cleanup, Options2).
  227process_rdf(File, BaseURI, OnObject) :-
  228    process_rdf(File, OnObject, [base_uri(BaseURI)]).
  229
  230def_entities([], _).
  231def_entities([entity(Name, Value)|T], DTD) :-
  232    !,
  233    def_entity(DTD, Name, Value),
  234    def_entities(T, DTD).
  235def_entities([_|T0], DTD) :-
  236    def_entities(T0, DTD).
  237
  238def_entity(DTD, Name, Value) :-
  239    open_dtd(DTD, [], Stream),
  240    xml_quote_attribute(Value, QValue),
  241    format(Stream, '<!ENTITY ~w "~w">~n', [Name, QValue]),
  242    close(Stream).
  243
  244
  245do_process_rdf(Parser, In, NSList, Close, Cleanup, Options) :-
  246    call_cleanup((   sgml_parse(Parser,
  247                                [ source(In),
  248                                  call(begin, on_begin),
  249                                  call(xmlns, on_xmlns)
  250                                | Options
  251                                ]),
  252                     exit_ns_collect(NSList)
  253                 ),
  254                 cleanup_process(Close, Cleanup, Parser)).
  255
  256cleanup_process(In, Cleanup, Parser) :-
  257    (   var(In)
  258    ->  true
  259    ;   close(In)
  260    ),
  261    free_sgml_parser(Parser),
  262    nb_delete(rdf_options),
  263    nb_delete(rdf_object_handler),
  264    nb_delete(rdf_state),
  265    nb_delete(rdf_nslist),
  266    rdf_end_file(Cleanup).
  267
  268on_begin(NS:'RDF', Attr, _) :-
  269    rdf_name_space(NS),
  270    !,
  271    nb_getval(rdf_options, Options),
  272    make_rdf_state(Options, State0, _),
  273    rdf_modify_state(Attr, State0, State),
  274    nb_setval(rdf_state, State).
  275on_begin(Tag, Attr, Parser) :-
  276    nb_getval(rdf_state, State),
  277    (   State == (-)
  278    ->  nb_getval(rdf_options, RdfOptions),
  279        (   memberchk(embedded(true), RdfOptions)
  280        ->  true
  281        ;   print_message(warning, rdf(unexpected(Tag, Parser)))
  282        )
  283    ;   get_sgml_parser(Parser, line(Start)),
  284        get_sgml_parser(Parser, file(File)),
  285        sgml_parse(Parser,
  286                   [ document(Content),
  287                     parse(content)
  288                   ]),
  289        b_getval(rdf_object_handler, OnTriples),
  290        element_to_plrdf(element(Tag, Attr, Content), Objects, State),
  291        rdf_triples(Objects, Triples),
  292        call(OnTriples, Triples, File:Start)
  293    ).
  294
  295%!  on_xmlns(+NS, +URL, +Parser)
  296%
  297%   Build up the list of   encountered xmlns:NS=URL declarations. We
  298%   use  destructive  assignment  here   as    an   alternative   to
  299%   assert/retract, ensuring thread-safety and better performance.
  300
  301on_xmlns(NS, URL, _Parser) :-
  302    (   nb_getval(rdf_nslist, List),
  303        List = list(L0)
  304    ->  nb_linkarg(1, List, [NS=URL|L0])
  305    ;   true
  306    ).
  307
  308init_ns_collect(Options, NSList) :-
  309    (   option(namespaces(NSList), Options, -),
  310        NSList \== (-)
  311    ->  nb_setval(rdf_nslist, list([]))
  312    ;   nb_setval(rdf_nslist, -),
  313        NSList = (-)
  314    ).
  315
  316exit_ns_collect(NSList) :-
  317    (   NSList == (-)
  318    ->  true
  319    ;   nb_getval(rdf_nslist, list(NSList))
  320    ).
  321
  322
  323
  324                 /*******************************
  325                 *            MESSAGES          *
  326                 *******************************/
  327
  328:- multifile
  329    prolog:message/3.  330
  331%       Catch messages.  sgml/4 is generated by the SGML2PL binding.
  332
  333prolog:message(rdf(unparsed(Data))) -->
  334    { phrase(unparse_xml(Data), XML)
  335    },
  336    [ 'RDF: Failed to interpret "~s"'-[XML] ].
  337prolog:message(rdf(shared_blank_nodes(N))) -->
  338    [ 'RDF: Shared ~D blank nodes'-[N] ].
  339prolog:message(rdf(not_a_name(Name))) -->
  340    [ 'RDF: argument to rdf:ID is not an XML name: ~p'-[Name] ].
  341prolog:message(rdf(redefined_id(Id))) -->
  342    [ 'RDF: rdf:ID ~p: multiple definitions'-[Id] ].
  343prolog:message(rdf(unexpected(Tag, Parser))) -->
  344    { get_sgml_parser(Parser, file(File)),
  345      get_sgml_parser(Parser, line(Line))
  346    },
  347    [ 'RDF: ~w:~d: Unexpected element ~w'-[File, Line, Tag] ].
  348
  349
  350                 /*******************************
  351                 *          XML-TO-TEXT         *
  352                 *******************************/
  353
  354unparse_xml([]) -->
  355    !,
  356    [].
  357unparse_xml([H|T]) -->
  358    !,
  359    unparse_xml(H),
  360    unparse_xml(T).
  361unparse_xml(Atom) -->
  362    { atom(Atom)
  363    },
  364    !,
  365    atom(Atom).
  366unparse_xml(element(Name, Attr, Content)) -->
  367    "<",
  368    identifier(Name),
  369    attributes(Attr),
  370    (   { Content == []
  371        }
  372    ->  "/>"
  373    ;   ">",
  374        unparse_xml(Content)
  375    ).
  376
  377attributes([]) -->
  378    [].
  379attributes([H|T]) -->
  380    attribute(H),
  381    attributes(T).
  382
  383attribute(Name=Value) -->
  384    " ",
  385    identifier(Name),
  386    "=",
  387    value(Value).
  388
  389identifier(NS:Local) -->
  390    !,
  391    "{", atom(NS), "}",
  392    atom(Local).
  393identifier(Local) -->
  394    atom(Local).
  395
  396atom(Atom, Text, Rest) :-
  397    atom_codes(Atom, Chars),
  398    append(Chars, Rest, Text).
  399
  400value(Value) -->
  401    { atom_codes(Value, Chars)
  402    },
  403    "\"",
  404    quoted(Chars),
  405    "\"".
  406
  407quoted([]) -->
  408    [].
  409quoted([H|T]) -->
  410    quote(H),
  411    !,
  412    quoted(T).
  413
  414quote(0'<) --> "&lt;".
  415quote(0'>) --> "&gt;".
  416quote(0'") --> "&quot;".
  417quote(0'&) --> "&amp;".
  418quote(X)   --> [X].
  419
  420
  421                 /*******************************
  422                 *             XREF             *
  423                 *******************************/
  424
  425:- multifile prolog:meta_goal/2.  426prolog:meta_goal(process_rdf(_,G,_), [G+2])