View source with formatted comments or as raw
    1/*  Part of SWI-Prolog
    2
    3    Author:        Jan Wielemaker and Matt Lilley
    4    E-mail:        J.Wielemaker@vu.nl
    5    WWW:           http://www.swi-prolog.org
    6    Copyright (c)  2016, CWI Amsterdam
    7    All rights reserved.
    8
    9    Redistribution and use in source and binary forms, with or without
   10    modification, are permitted provided that the following conditions
   11    are met:
   12
   13    1. Redistributions of source code must retain the above copyright
   14       notice, this list of conditions and the following disclaimer.
   15
   16    2. Redistributions in binary form must reproduce the above copyright
   17       notice, this list of conditions and the following disclaimer in
   18       the documentation and/or other materials provided with the
   19       distribution.
   20
   21    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
   22    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
   23    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
   24    FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
   25    COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
   26    INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
   27    BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
   28    LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
   29    CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   30    LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
   31    ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
   32    POSSIBILITY OF SUCH DAMAGE.
   33*/
   34
   35:- module(c14n2,
   36          [ xml_write_canonical/3       % +Stream, +Term, +Options
   37          ]).   38:- use_module(library(error)).   39:- use_module(library(option)).   40:- use_module(library(sgml_write)).   41:- use_module(library(dicts)).   42:- use_module(library(ordsets)).   43:- use_module(library(apply)).   44:- use_module(library(lists)).   45
   46/** <module> C14n2 canonical XML documents
   47
   48C14n2 specifies a _canonical_ XML document.   This library writes such a
   49document from an XML DOM as returned by   the  XML (or SGML) parser. The
   50process takes two steps:
   51
   52  - Normalise the DOM
   53  - Call xml_write/3 with appropriate flags
   54*/
   55
   56%!  xml_write_canonical(+Stream, +DOM, +Options) is det.
   57%
   58%   Write an XML DOM using the   canonical conventions as defined by
   59%   C14n2. Namespace declarations in the   canonical document depend
   60%   on the original namespace  declarations.   For  this  reason the
   61%   input document must be parsed   (see load_structure/3) using the
   62%   dialect `xmlns` and the option keep_prefix(true).
   63
   64xml_write_canonical(Stream, DOM, Options) :-
   65    option(method(Method), Options, 'http://www.w3.org/TR/2001/REC-xml-c14n-20010315'),
   66    xml_canonical_dom(DOM, CDOM, xml{in_ns:ns{}, out_ns:ns{}, is_root:true, method:Method}),
   67    xml_write(Stream, CDOM,
   68              [ header(false),
   69                layout(false),
   70                net(false)
   71              ]).
   72
   73xml_canonical_dom(Var, _, _) :-
   74    var(Var),
   75    !,
   76    instantiation_error(Var).
   77xml_canonical_dom(DOM, CDOM, Options) :-
   78    is_list(DOM),
   79    !,
   80    xml_canonical_list(DOM, CDOM, Options).
   81xml_canonical_dom(element( Name,  Attrs,  Content),
   82                  element(CName, CAttrs, CContent),
   83                  Options) :-
   84    !,
   85    InNS0  = Options.in_ns,
   86    OutNS0 = Options.out_ns,
   87    Method = Options.method,
   88    take_ns(Attrs, Method, Name, Attrs1, InNS0, InNS),
   89    partition(has_ns, Attrs1, AttrsWithNS0, AttrsSans0),
   90    sort(1, @<, AttrsWithNS0, AttrsWithNS1),
   91    sort(1, @<, AttrsSans0, AttrsSans),
   92    put_elemns(Name, CName, InNS, OutNS0, OutNS1, KillDefault),
   93    put_ns_attrs(AttrsWithNS1, AttrsWithNS, InNS, OutNS1, OutNS),
   94    ns_attrs(OutNS0, OutNS, NSAttrs),
   95    (  Options.is_root == true ->
   96           (  select(xmlns=DefaultNamespace, NSAttrs, NSAttrs0)
   97              % If there is a default namespace, it must come first, and I dont think sort/4 can sort on two keys at once
   98           -> findall(xmlns:NS=URI, member(xmlns:NS=URI, Attrs), RootNSAttrs, NSAttrs0),
   99              sort(2, @=<, RootNSAttrs, RootNSAttrs0),
  100              RootNSAttrs1 = [xmlns=DefaultNamespace|RootNSAttrs0]
  101           ;  Method == 'http://www.w3.org/2001/10/xml-exc-c14n#'
  102           -> RootNSAttrs1 = NSAttrs
  103           ;  findall(xmlns:NS=URI, member(xmlns:NS=URI, Attrs), RootNSAttrs, NSAttrs),
  104              sort(2, @<, RootNSAttrs, RootNSAttrs1)
  105           ),
  106           append([KillDefault, RootNSAttrs1, AttrsSans, AttrsWithNS], CAttrs)
  107    ;  append([KillDefault, NSAttrs, AttrsSans, AttrsWithNS], CAttrs)
  108    ),
  109    must_be(list, Content),
  110    xml_canonical_list(Content, CContent,
  111                       Options.put(_{in_ns:InNS, out_ns:OutNS, is_root:false})).
  112xml_canonical_dom(CDATA, CDATA, _) :-
  113    atomic(CDATA).
  114
  115has_ns(_NS:_Name=_Value).
  116
  117xml_canonical_list([], [], _).
  118xml_canonical_list([H0|T0], [H|T], Options) :-
  119    xml_canonical_dom(H0, H, Options),
  120    xml_canonical_list(T0, T, Options).
  121
  122take_ns([], _, _, [], NSList, NSList).
  123take_ns([H|T0], Method, Name, T, NSList0, NSList) :-
  124    xml_ns(H, NS, URL),
  125    !,
  126    (  include_ns(Name, Method, NS, URL)
  127    -> take_ns(T0, Method, Name, T, NSList0.put(NS, URL), NSList)
  128    ;  take_ns(T0, Method, Name, T, NSList0, NSList)
  129    ).
  130take_ns([H|T0], Method, Name, [H|T], NSList0, NSList) :-
  131    take_ns(T0, Method, Name, T, NSList0, NSList).
  132
  133include_ns(ns(Prefix, URI):_, 'http://www.w3.org/2001/10/xml-exc-c14n#', Prefix, URI):- !.
  134include_ns(_, 'http://www.w3.org/TR/2001/REC-xml-c14n-20010315', _, _):- !.
  135
  136
  137put_ns_attrs([], [], _, OutNS, OutNS).
  138put_ns_attrs([Name=Value|T0], [CName=Value|T], InNS, OutNS0, OutNS) :-
  139    put_ns(Name, CName, InNS, OutNS0, OutNS1),
  140    put_ns_attrs(T0, T, InNS, OutNS1, OutNS).
  141
  142put_elemns(Name, Name, _InNS, OutNS0, OutNS1, [xmlns='']) :-
  143    atom(Name),
  144    dict_pairs(OutNS0, _, Pairs),
  145    memberchk(URL-'', Pairs),
  146    !,
  147    del_dict(URL, OutNS0, '', OutNS1).
  148put_elemns(Name, CName, InNS, OutNS0, OutNS, []) :-
  149    put_ns(Name, CName, InNS, OutNS0, OutNS).
  150
  151put_ns(ns(NS, URL):Name, CName, _InNS, OutNS, OutNS) :-
  152    get_dict(URL, OutNS, NS),
  153    !,
  154    make_cname(NS:Name, CName).
  155put_ns(ns(NS, URL):Name, CName, _InNS, OutNS0, OutNS) :-
  156    !,
  157    make_cname(NS:Name, CName),
  158    OutNS = OutNS0.put(URL, NS).
  159put_ns(URL:Name, CName, _InNS, OutNS, OutNS) :-
  160    get_dict(URL, OutNS, NS),
  161    !,
  162    make_cname(NS:Name, CName).
  163put_ns(URL:Name, CName, InNS, OutNS0, OutNS) :-
  164    dict_pairs(InNS, _, Pairs),
  165    memberchk(NS-URL, Pairs),
  166    !,
  167    make_cname(NS:Name, CName),
  168    OutNS = OutNS0.put(URL, NS).
  169put_ns(Name, Name, _, OutNS, OutNS).
  170
  171ns_attrs(OutNS, OutNS, []) :- !.
  172ns_attrs(OutNS0, OutNS, NSAttrs) :-
  173    !,
  174    dict_keys(OutNS, URLs),
  175    dict_keys(OutNS0, URLs0),
  176    ord_subtract(URLs, URLs0, NewURLs),
  177    maplist(ns_attr(OutNS), NewURLs, NSAttrs0),
  178    sort(NSAttrs0, NSAttrs).
  179
  180ns_attr(Dict, URL, NSAttr) :-
  181    ns_simplify(xmlns:Dict.URL=URL, NSAttr).
  182
  183ns_simplify(xmlns:''=URL, xmlns=URL) :- !.
  184ns_simplify(xmlns:NS=URL, XMLNS=URL) :-
  185    make_cname(xmlns:NS, XMLNS).
  186
  187xml_ns(xmlns=URL, '', URL) :- !.
  188xml_ns(xmlns:NS=URL, NS, URL) :- !.
  189xml_ns(Name=URL, NS, URL) :-
  190    atom(Name),
  191    atom_concat('xmlns:', NS, Name).
  192
  193make_cname('':Name, Name) :- !.
  194make_cname(NS:Name, CName) :-
  195    atomic_list_concat([NS,Name], :, CName)