View source with formatted comments or as raw
    1/*  Part of SWI-Prolog
    2
    3    Author:        Jan Wielemaker
    4    E-mail:        J.Wielemaker@vu.nl
    5    WWW:           http://www.swi-prolog.org
    6    Copyright (c)  2010-2013, University of Amsterdam
    7    All rights reserved.
    8
    9    Redistribution and use in source and binary forms, with or without
   10    modification, are permitted provided that the following conditions
   11    are met:
   12
   13    1. Redistributions of source code must retain the above copyright
   14       notice, this list of conditions and the following disclaimer.
   15
   16    2. Redistributions in binary form must reproduce the above copyright
   17       notice, this list of conditions and the following disclaimer in
   18       the documentation and/or other materials provided with the
   19       distribution.
   20
   21    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
   22    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
   23    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
   24    FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
   25    COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
   26    INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
   27    BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
   28    LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
   29    CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   30    LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
   31    ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
   32    POSSIBILITY OF SUCH DAMAGE.
   33*/
   34
   35:- module(rdf_parser,
   36          [ xml_to_plrdf/3,             % +XMLTerm, -RDFTerm, +State
   37            element_to_plrdf/3,         % +ContentList, -RDFTerm, +State
   38            make_rdf_state/3,           % +Options, -State, -RestOptions
   39            rdf_modify_state/3,         % +XMLAttrs, +State0, -State
   40            rdf_name_space/1
   41          ]).   42:- use_module(rewrite).   43:- use_module(library(sgml)).           % xml_name/1
   44:- use_module(library(lists)).   45:- use_module(library(uri)).   46:- use_module(library(record)).   47
   48:- op(500, fx, \?).                     % Optional (attrs)
   49
   50term_expansion(F, T) :- rew_term_expansion(F, T).
   51goal_expansion(F, T) :- rew_goal_expansion(F, T).
   52
   53goal_expansion(attrs(Attrs, List), Goal) :-
   54    translate_attrs(List, Attrs, Goal).
   55
   56translate_attrs(Var, Attrs, rewrite(Var, Attrs)) :-
   57    var(Var),
   58    !.
   59translate_attrs([], _, true) :- !.
   60translate_attrs([H], Attrs, Goal) :-
   61    !,
   62    (   var(H)
   63    ->  Goal = rewrite(H, Attrs)
   64    ;   H = \?Optional
   65    ->  Goal = (   member(A, Attrs),
   66                   OptRewrite
   67               ->  true
   68               ;   true
   69               ),
   70        expand_goal(rewrite(\Optional, A), OptRewrite)
   71    ;   Goal = (   member(A, Attrs),
   72                   Rewrite
   73               ->  true
   74               ),
   75        expand_goal(rewrite(H, A), Rewrite)
   76    ).
   77translate_attrs([H|T], Attrs0, (G0, G1)) :-
   78    !,
   79    (   var(H)
   80    ->  G0 = rewrite(H, Attrs0),
   81        Attrs1 = Attrs0
   82    ;   H = \?Optional
   83    ->  G0 = (   select(A, Attrs0, Attrs1),
   84                 OptRewrite
   85             ->  true
   86             ;   Attrs1 = Attrs0
   87             ),
   88        expand_goal(rewrite(\Optional, A), OptRewrite)
   89    ;   G0 = (   select(A, Attrs0, Attrs1),
   90                 Rewrite
   91             ),
   92        expand_goal(rewrite(H, A), Rewrite)
   93    ),
   94    translate_attrs(T, Attrs1, G1).
   95translate_attrs(Rule, Attrs, Goal) :-
   96    expand_goal(rewrite(Rule, Attrs), Goal).
   97
   98
   99:- multifile rdf_name_space/1.  100:- dynamic   rdf_name_space/1.  101
  102%!  rdf_name_space(?URL) is nondet.
  103%
  104%   True if URL must be handled  as rdf: Determines special handling
  105%   of rdf:about, rdf:resource, etc.
  106
  107
  108rdf_name_space('http://www.w3.org/1999/02/22-rdf-syntax-ns#').
  109rdf_name_space('http://www.w3.org/TR/REC-rdf-syntax').
  110
  111
  112:- record
  113    rdf_state(base_uri='',
  114              lang='',
  115              ignore_lang=false,
  116              convert_typed_literal).  117
  118
  119%!  xml_to_plrdf(+RDFElementOrObject, -RDFTerm, +State)
  120%
  121%   Translate an XML (using namespaces)  term   into  an Prolog term
  122%   representing the RDF data.  This  term   can  then  be  fed into
  123%   rdf_triples/[2,3] to create a list of   RDF triples. State is an
  124%   instance of an rdf_state record.
  125
  126xml_to_plrdf(Element, RDF, State) :-
  127    (   is_list(Element)
  128    ->  rewrite(\xml_content_objects(RDF, State), Element)
  129    ;   rewrite(\xml_objects(RDF, State), Element)
  130    ).
  131
  132%!  element_to_plrdf(+DOM, -RDFTerm, +State)
  133%
  134%   Rewrite a single XML element.
  135
  136element_to_plrdf(Element, RDF, State) :-
  137    rewrite(\nodeElementList(RDF, State), [Element]).
  138
  139xml_objects(Objects, Options0) ::=
  140        E0,
  141        { modify_state(E0, Options0, E, Options), !,
  142          rewrite(\xml_objects(Objects, Options), E)
  143        }.
  144xml_objects(Objects, Options) ::=
  145        element((\rdf('RDF'), !),
  146                _,
  147                \nodeElementList(Objects, Options)),
  148        !.
  149xml_objects(Objects, Options) ::=
  150        element(_, _, \xml_content_objects(Objects, Options)).
  151
  152xml_content_objects([], _) ::=
  153        [].
  154xml_content_objects([H|T], Options) ::=
  155        [ \xml_objects(H, Options)
  156        | \xml_content_objects(T, Options)
  157        ].
  158
  159
  160nodeElementList([], _Options) ::=
  161        [], !.
  162nodeElementList(L, Options) ::=
  163        [ (\ws, !)
  164        | \nodeElementList(L, Options)
  165        ].
  166nodeElementList([H|T], Options) ::=
  167        [ \nodeElementOrError(H, Options)
  168        | \nodeElementList(T, Options)
  169        ].
  170
  171nodeElementOrError(H, Options) ::=
  172        \nodeElement(H, Options), !.
  173nodeElementOrError(unparsed(Data), _Options) ::=
  174        Data.
  175
  176nodeElement(description(Type, About, Properties), Options) ::=
  177        \description(Type, About, Properties, Options).
  178
  179
  180                 /*******************************
  181                 *          DESCRIPTION         *
  182                 *******************************/
  183
  184description(Type, About, Properties, Options0) ::=
  185        E0,
  186        { modify_state(E0, Options0, E, Options), !,
  187          rewrite(\description(Type, About, Properties, Options), E)
  188        }.
  189description(description, About, Properties, Options) ::=
  190        element(\rdf('Description'),
  191                \attrs([ \?idAboutAttr(About, Options)
  192                       | \propAttrs(PropAttrs, Options)
  193                       ]),
  194                \propertyElts(PropElts, Options)),
  195        { !, append(PropAttrs, PropElts, Properties)
  196        }.
  197description(Type, About, Properties, Options) ::=
  198        element(\name_uri(Type, Options),
  199                \attrs([ \?idAboutAttr(About, Options)
  200                       | \propAttrs(PropAttrs, Options)
  201                       ]),
  202                \propertyElts(PropElts, Options)),
  203        { append(PropAttrs, PropElts, Properties)
  204        }.
  205
  206propAttrs([], _) ::=
  207        [], !.
  208propAttrs([H|T], Options) ::=
  209        [ \propAttr(H, Options)
  210        | \propAttrs(T, Options)
  211        ].
  212
  213propAttr(rdf:type = URI, Options) ::=
  214        \rdf_or_unqualified(type) = \value_uri(URI, Options), !.
  215propAttr(Name = Literal, Options) ::=
  216        Name = Value,
  217        { mkliteral(Value, Literal, Options)
  218        }.
  219
  220propertyElts([], _) ::=
  221        [], !.
  222propertyElts(Elts, Options) ::=
  223        [ (\ws, !)
  224        | \propertyElts(Elts, Options)
  225        ].
  226propertyElts([H|T], Options) ::=
  227        [ \propertyElt(H, Options)
  228        | \propertyElts(T, Options)
  229        ].
  230
  231propertyElt(E, Options) ::=
  232        \propertyElt(Id, Name, Value, Options),
  233        { mkprop(Name, Value, Prop),
  234          (   var(Id)
  235          ->  E = Prop
  236          ;   E = id(Id, Prop)
  237          )
  238        }.
  239
  240mkprop(NS:Local, Value, rdf:Local = Value) :-
  241    rdf_name_space(NS),
  242    !.
  243mkprop(Name, Value, Name = Value).
  244
  245
  246propertyElt(Id, Name, Value, Options0) ::=
  247        E0,
  248        { modify_state(E0, Options0, E, Options), !,
  249          rewrite(\propertyElt(Id, Name, Value, Options), E)
  250        }.
  251propertyElt(Id, Name, Value, Options) ::=
  252        \literalPropertyElt(Id, Name, Value, Options), !.
  253propertyElt(_, Name, Literal, Options) ::=
  254        element(Name,
  255                \attrs([ \parseLiteral
  256                       ]),
  257                Content),
  258        { !,
  259          literal_value(Content, Literal, Options)
  260        }.
  261propertyElt(Id, Name, collection(Elements), Options) ::=
  262        element(Name,
  263                \attrs([ \parseCollection,
  264                         \?idAttr(Id, Options)
  265                       ]),
  266                \nodeElementList(Elements, Options)).
  267                                        % 5.14 emptyPropertyElt
  268propertyElt(Id, Name, Value, Options) ::=
  269        element(Name, A, \all_ws),
  270        { !,
  271          rewrite(\emptyPropertyElt(Id, Value, Options), A)
  272        }.
  273
  274propertyElt(_, Name, description(description, Id, Properties), Options) ::=
  275        element(Name,
  276                \attrs([ \parseResource,
  277                         \?idAboutAttr(Id, Options)
  278                       ]),
  279                \propertyElts(Properties, Options)),
  280        !.
  281propertyElt(Id, Name, Literal, Options) ::=
  282        element(Name,
  283                \attrs([ \?idAttr(Id, Options)
  284                       ]),
  285                [ Value ]),
  286        { atom(Value), !,
  287          mkliteral(Value, Literal, Options)
  288        }.
  289propertyElt(Id, Name, Value, Options) ::=
  290        element(Name,
  291                \attrs([ \?idAttr(Id, Options)
  292                       ]),
  293                \an_rdf_object(Value, Options)), !.
  294propertyElt(Id, Name, unparsed(Value), Options) ::=
  295        element(Name,
  296                \attrs([ \?idAttr(Id, Options)
  297                       ]),
  298                Value).
  299
  300literalPropertyElt(Id, Name, Literal, Options) ::=
  301        element(Name,
  302                \attrs([ \typeAttr(Type, Options),
  303                         \?idAttr(Id, Options)
  304                       ]),
  305                Content),
  306        { typed_literal(Type, Content, Literal, Options)
  307        }.
  308
  309emptyPropertyElt(Id, Literal, Options) ::=
  310        \attrs([ \?idAttr(Id, Options),
  311                 \?parseLiteral
  312               | \noMoreAttrs
  313               ]),
  314        { !,
  315          mkliteral('', Literal, Options)
  316        }.
  317emptyPropertyElt(Id,
  318                 description(description, About, Properties),
  319                 Options) ::=
  320        \attrs([ \?idAttr(Id, Options),
  321                 \?aboutResourceEmptyElt(About, Options),
  322                 \?parseResource
  323               | \propAttrs(Properties, Options)
  324               ]), !.
  325
  326aboutResourceEmptyElt(about(URI), Options) ::=
  327        \resourceAttr(URI, Options), !.
  328aboutResourceEmptyElt(node(URI), _Options) ::=
  329        \nodeIDAttr(URI).
  330
  331%!  literal_value(+In, -Value, +Options)
  332%
  333%   Create the literal value for rdf:parseType="Literal" attributes.
  334%   The content is the Prolog XML DOM tree for the literal.
  335%
  336%   @tbd    Note that the specs demand a canonical textual representation
  337%           of the XML data as a Unicode string.  For now the user can
  338%           achieve this using the convert_typed_literal hook.
  339
  340literal_value(Value, literal(type(rdf:'XMLLiteral', Value)), _).
  341
  342%!  mkliteral(+Atom, -Object, +Options)
  343%
  344%   Translate attribute value Atom into an RDF object using the
  345%   lang(Lang) option from Options.
  346
  347mkliteral(Text, literal(Val), Options) :-
  348    atom(Text),
  349    (   rdf_state_lang(Options, Lang),
  350        Lang \== ''
  351    ->  Val = lang(Lang, Text)
  352    ;   Val = Text
  353    ).
  354
  355%!  typed_literal(+Type, +Content, -Literal, +Options)
  356%
  357%   Handle a literal attribute with rdf:datatype=Type qualifier. NB:
  358%   possibly  it  is  faster  to  use  a  global  variable  for  the
  359%   conversion hook.
  360
  361typed_literal(Type, Content, literal(Object), Options) :-
  362    rdf_state_convert_typed_literal(Options, Convert),
  363    nonvar(Convert),
  364    !,
  365    (   catch(call(Convert, Type, Content, Object), E, true)
  366    ->  (   var(E)
  367        ->  true
  368        ;   Object = E
  369        )
  370    ;   Object = error(cannot_convert(Type, Content), _)
  371    ).
  372typed_literal(Type, [], literal(type(Type, '')), _Options) :- !.
  373typed_literal(Type, [Text], literal(type(Type, Text)), _Options) :- !.
  374typed_literal(Type, Content, literal(type(Type, Content)), _Options).
  375
  376
  377idAboutAttr(id(Id), Options) ::=
  378        \idAttr(Id, Options), !.
  379idAboutAttr(about(About), Options) ::=
  380        \aboutAttr(About, Options), !.
  381idAboutAttr(node(About), _Options) ::=
  382        \nodeIDAttr(About), !.
  383
  384%!  an_rdf_object(-Object, +OptionsURI)
  385%
  386%   Deals with an object, but there may be spaces around.  I'm still
  387%   not sure where to deal with these.  Best is to ask the XML parser
  388%   to get rid of them, So most likely this code will change if this
  389%   happens.
  390
  391an_rdf_object(Object, Options) ::=
  392        [ \nodeElement(Object, Options)
  393        ], !.
  394an_rdf_object(Object, Options) ::=
  395        [ (\ws, !)
  396        | \an_rdf_object(Object, Options)
  397        ].
  398an_rdf_object(Object, Options) ::=
  399        [ \nodeElement(Object, Options),
  400          \ws
  401        ], !.
  402
  403ws ::=
  404        A,
  405        { atom(A),
  406          atom_chars(A, Chars),
  407          all_blank(Chars), !
  408        }.
  409ws ::=
  410        pi(_).
  411
  412all_ws ::=
  413        [], !.
  414all_ws ::=
  415        [\ws | \all_ws].
  416
  417all_blank([]).
  418all_blank([H|T]) :-
  419    char_type(H, space),            % SWI-Prolog specific
  420    all_blank(T).
  421
  422
  423                 /*******************************
  424                 *         RDF ATTRIBUTES       *
  425                 *******************************/
  426
  427idAttr(Id, Options) ::=
  428        \rdf_or_unqualified('ID') = \uniqueid(Id, Options).
  429
  430aboutAttr(About, Options) ::=
  431        \rdf_or_unqualified(about) = \value_uri(About, Options).
  432
  433nodeIDAttr(About) ::=
  434        \rdf_or_unqualified(nodeID) = About.
  435
  436resourceAttr(URI, Options) ::=
  437        \rdf_or_unqualified(resource) = \value_uri(URI, Options).
  438
  439typeAttr(Type, Options) ::=
  440        \rdf_or_unqualified(datatype) = \value_uri(Type, Options).
  441
  442name_uri(URI, Options) ::=
  443        NS:Local,
  444        {   !, atom_concat(NS, Local, A),
  445            rewrite(\value_uri(URI, Options), A)
  446        }.
  447name_uri(URI, Options) ::=
  448        \value_uri(URI, Options).
  449
  450value_uri(URI, Options) ::=
  451        A,
  452        {   rdf_state_base_uri(Options, Base),
  453            (   Base \== []
  454            ->  iri_normalized(A, Base, URI)
  455            ;   URI = A
  456            )
  457        }.
  458
  459uniqueid(Id, Options) ::=
  460        A,
  461        {   unique_xml_name(A, HashID),
  462            make_globalid(HashID, Options, Id)
  463        }.
  464
  465unique_xml_name(Name, HashID) :-
  466    atom_concat(#, Name, HashID),
  467    (   xml_name(Name)
  468    ->  true
  469    ;   print_message(warning, rdf(not_a_name(Name)))
  470    ).
  471
  472make_globalid(In, Options, Id) :-
  473    rdf_state_base_uri(Options, Base),
  474    iri_normalized(In, Base, Id).
  475
  476parseLiteral    ::= \rdf_or_unqualified(parseType) = 'Literal'.
  477parseResource   ::= \rdf_or_unqualified(parseType) = 'Resource'.
  478parseCollection ::= \rdf_or_unqualified(parseType) = 'Collection'.
  479
  480
  481                 /*******************************
  482                 *           PRIMITIVES         *
  483                 *******************************/
  484
  485rdf(Tag) ::=
  486        NS:Tag,
  487        { rdf_name_space(NS), !
  488        }.
  489
  490rdf_or_unqualified(Tag) ::=
  491        Tag.
  492rdf_or_unqualified(Tag) ::=
  493        NS:Tag,
  494        { rdf_name_space(NS), !
  495        }.
  496
  497
  498                 /*******************************
  499                 *             BASICS           *
  500                 *******************************/
  501
  502/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
  503This code is translated by the  goal_expansion/2   rule  at the start of
  504this file. We leave the original code for reference.
  505
  506attrs(Bag) ::=
  507        L0,
  508        { do_attrs(Bag, L0)
  509        }.
  510
  511do_attrs([], _) :- !.
  512do_attrs([\?H|T], L0) :- !,             % optional
  513        (   select(X, L0, L),
  514            rewrite(\H, X)
  515        ->  true
  516        ;   L = L0
  517        ),
  518        do_attrs(T, L).
  519do_attrs([H|T], L0) :-
  520        select(X, L0, L),
  521        rewrite(H, X), !,
  522        do_attrs(T, L).
  523do_attrs(C, L) :-
  524        rewrite(C, L).
  525- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
  526
  527%       \noMoreAttrs
  528%
  529%       Check attribute-list is empty.  Reserved xml: attributes are
  530%       excluded from this test.
  531
  532noMoreAttrs ::=
  533        [], !.
  534noMoreAttrs ::=
  535        [ xml:_=_
  536        | \noMoreAttrs
  537        ].
  538
  539%!  modify_state(+Element0, +Options0, -Element, -Options) is semidet.
  540%
  541%   If Element0 contains xml:base = Base, strip it from the
  542%   attributes list and update base_uri(_) in the Options
  543%
  544%   It Element0 contains xml:lang = Lang, strip it from the
  545%   attributes list and update lang(_) in the Options
  546%
  547%   Remove all xmlns=_, xmlns:_=_ and xml:_=_.  Only succeed
  548%   if something changed.
  549
  550modify_state(element(Name, Attrs0, Content), Options0,
  551             element(Name, Attrs,  Content), Options) :-
  552    modify_a_state(Attrs0, Options0, Attrs, Options),
  553    Attrs0 \== Attrs.
  554
  555rdf_modify_state(Attributes, State0, State) :-
  556    modify_a_state(Attributes, State0, _, State).
  557
  558
  559modify_a_state([], Options, [], Options).
  560modify_a_state([Name=Value|T0], Options0, T, Options) :-
  561    modify_a(Name, Value, Options0, Options1),
  562    !,
  563    modify_a_state(T0, Options1, T, Options).
  564modify_a_state([H|T0], Options0, [H|T], Options) :-
  565    modify_a_state(T0, Options0, T, Options).
  566
  567
  568modify_a(xml:base, Base1, Options0, Options) :-
  569    !,
  570    rdf_state_base_uri(Options0, Base0),
  571    remove_fragment(Base1, Base2),
  572    iri_normalized(Base2, Base0, Base),
  573    set_base_uri_of_rdf_state(Base, Options0, Options).
  574modify_a(xml:lang, Lang, Options0, Options) :-
  575    !,
  576    rdf_state_ignore_lang(Options0, false),
  577    !,
  578    set_lang_of_rdf_state(Lang, Options0, Options).
  579modify_a(xmlns, _, Options, Options).
  580modify_a(xmlns:_, _, Options, Options).
  581modify_a(xml:_, _, Options, Options).
  582
  583
  584%!  remove_fragment(+URI, -WithoutFragment)
  585%
  586%   When handling xml:base, we must delete the possible fragment.
  587
  588remove_fragment(URI, Plain) :-
  589    sub_atom(URI, B, _, _, #),
  590    !,
  591    sub_atom(URI, 0, B, _, Plain).
  592remove_fragment(URI, URI).
  593
  594
  595                 /*******************************
  596                 *     HELP PCE-EMACS A BIT     *
  597                 *******************************/
  598
  599:- multifile
  600    emacs_prolog_colours:term_colours/2,
  601    emacs_prolog_colours:goal_classification/2.  602
  603expand(c(X), _, X) :- !.
  604expand(In,   Pattern, Colours) :-
  605    compound(In),
  606    !,
  607    In =.. [F|Args],
  608    expand_list(Args, PatternArgs, ColourArgs),
  609    Pattern =.. [F|PatternArgs],
  610    Colours = functor(F) - ColourArgs.
  611expand(X, X, classify).
  612
  613expand_list([], [], []).
  614expand_list([H|T], [PH|PT], [CH|CT]) :-
  615    expand(H, PH, CH),
  616    expand_list(T, PT, CT).
  617
  618:- discontiguous
  619    term_expansion/2.  620
  621term_expansion(term_colours(C),
  622               emacs_prolog_colours:term_colours(Pattern, Colours)) :-
  623    expand(C, Pattern, Colours).
  624
  625term_colours((c(head(+(1))) ::= c(match), {c(body)})).
  626term_colours((c(head(+(1))) ::= c(match))).
  627
  628emacs_prolog_colours:goal_classification(\_, expanded).
  629
  630:- dynamic
  631    prolog:meta_goal/2.  632:- multifile
  633    prolog:meta_goal/2,
  634    prolog:called_by/2.  635
  636prolog:meta_goal(rewrite(A, _), [A]).
  637prolog:meta_goal(\A,            [A+1]).
  638
  639prolog:called_by(attrs(Attrs, _Term), Called) :-
  640    findall(G+1, sub_term(\?G, Attrs), Called, Tail),
  641    findall(G+1, sub_term(\G, Attrs), Tail)