View source with formatted comments or as raw
    1/*  Part of SWI-Prolog
    2
    3    Author:        Jan Wielemaker & Richard O'Keefe
    4    E-mail:        J.Wielemaker@vu.nl
    5    WWW:           http://www.swi-prolog.org
    6    Copyright (c)  2004-2016, University of Amsterdam
    7                              VU University Amsterdam
    8    All rights reserved.
    9
   10    Redistribution and use in source and binary forms, with or without
   11    modification, are permitted provided that the following conditions
   12    are met:
   13
   14    1. Redistributions of source code must retain the above copyright
   15       notice, this list of conditions and the following disclaimer.
   16
   17    2. Redistributions in binary form must reproduce the above copyright
   18       notice, this list of conditions and the following disclaimer in
   19       the documentation and/or other materials provided with the
   20       distribution.
   21
   22    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
   23    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
   24    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
   25    FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
   26    COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
   27    INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
   28    BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
   29    LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
   30    CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   31    LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
   32    ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
   33    POSSIBILITY OF SUCH DAMAGE.
   34*/
   35
   36:- module(sgml_write,
   37          [ html_write/2,               %          +Data, +Options
   38            html_write/3,               % +Stream, +Data, +Options
   39            sgml_write/2,               %          +Data, +Options
   40            sgml_write/3,               % +Stream, +Data, +Options
   41            xml_write/2,                %          +Data, +Options
   42            xml_write/3                 % +Stream, +Data, +Options
   43          ]).   44:- use_module(library(lists)).   45:- use_module(library(sgml)).   46:- use_module(library(assoc)).   47:- use_module(library(option)).   48:- use_module(library(error)).   49
   50:- predicate_options(xml_write/2, 2, [pass_to(xml_write/3, 3)]).   51:- predicate_options(xml_write/3, 3,
   52                     [ dtd(any),
   53                       doctype(atom),
   54                       public(atom),
   55                       system(atom),
   56                       header(boolean),
   57                       nsmap(list),
   58                       indent(nonneg),
   59                       layout(boolean),
   60                       net(boolean),
   61                       cleanns(boolean)
   62                     ]).   63
   64:- multifile
   65    xmlns/2.                        % NS, URI
   66
   67/** <module> XML/SGML writer module
   68
   69This library provides the inverse functionality   of  the sgml.pl parser
   70library, writing XML, SGML and HTML documents from the parsed output. It
   71is intended to allow rewriting in a  different dialect or encoding or to
   72perform document transformation in Prolog on the parsed representation.
   73
   74The current implementation is  particularly   keen  on getting character
   75encoding and the use of character  entities   right.  Some work has been
   76done providing layout, but space handling in   XML  and SGML make this a
   77very hazardous area.
   78
   79The Prolog-based low-level character and  escape   handling  is the real
   80bottleneck in this library and will probably be   moved  to C in a later
   81stage.
   82
   83@see    library(http/html_write) provides a high-level library for
   84        emitting HTML and XHTML.
   85*/
   86
   87%!  xml_write(+Data, +Options) is det.
   88%!  sgml_write(+Data, +Options) is det.
   89%!  html_write(+Data, +Options) is det.
   90%!  xml_write(+Stream, +Data, +Options) is det.
   91%!  sgml_write(+Stream, +Data, +Options) is det.
   92%!  html_write(+Stream, +Data, +Options) is det.
   93%
   94%   Write a term as created by the SGML/XML parser to a stream in
   95%   SGML or XML format.  Options:
   96%
   97%           * cleanns(Bool)
   98%           If `true` (default), remove duplicate `xmlns`
   99%           attributes.
  100%           * dtd(DTD)
  101%           The DTD.  This is needed for SGML documents that contain
  102%           elements with content model EMPTY.  Characters which may
  103%           not be written directly in the Stream's encoding will be
  104%           written using character data entities from the DTD if at
  105%           all possible, otherwise as numeric character references.
  106%           Note that the DTD will NOT be written out at all; as yet
  107%           there is no way to write out an internal subset,  though
  108%           it would not be hard to add one.
  109%
  110%           * doctype(DocType)
  111%           Document type for the SGML document type declaration.
  112%           If omitted it is taken from the root element.  There is
  113%           never any point in having this be disagree with the
  114%           root element.  A <!DOCTYPE> declaration will be written
  115%           if and only if at least one of doctype(_), public(_), or
  116%           system(_) is provided in Options.
  117%
  118%           * public(PubId)
  119%           The public identifier to be written in the <!DOCTYPE> line.
  120%
  121%           * system(SysId)
  122%           The system identifier to be written in the <!DOCTYPE> line.
  123%
  124%           * header(Bool)
  125%           If Bool is 'false', do not emit the <xml ...> header
  126%           line.  (xml_write/3 only)
  127%
  128%           * nsmap(Map:list(Id=URI))
  129%           When emitting embedded XML, assume these namespaces
  130%           are already defined from the environment.  (xml_write/3
  131%           only).
  132%
  133%           * indent(Indent)
  134%           Indentation of the document (for embedding)
  135%
  136%           * layout(Bool)
  137%           Emit/do not emit layout characters to make output
  138%           readable.
  139%
  140%           * net(Bool)
  141%           Use/do not use Null End Tags.
  142%           For XML, this applies only to empty elements, so you get
  143%
  144%           ==
  145%               <foo/>      (default, net(true))
  146%               <foo></foo> (net(false))
  147%           ==
  148%
  149%           For SGML, this applies to empty elements, so you get
  150%
  151%           ==
  152%               <foo>       (if foo is declared to be EMPTY in the DTD)
  153%               <foo></foo> (default, net(false))
  154%               <foo//      (net(true))
  155%           ==
  156%
  157%           and also to elements with character content not containing /
  158%
  159%           ==
  160%               <b>xxx</b>  (default, net(false))
  161%               <b/xxx/     (net(true)).
  162%           ==
  163%
  164%   Note that if the stream is UTF-8, the system will write special
  165%   characters as UTF-8 sequences, while if it is ISO Latin-1 it
  166%   will use (character) entities if there is a DTD that provides
  167%   them, otherwise it will use numeric character references.
  168
  169xml_write(Data, Options) :-
  170    current_output(Stream),
  171    xml_write(Stream, Data, Options).
  172
  173xml_write(Stream0, Data, Options) :-
  174    fix_user_stream(Stream0, Stream),
  175    (   stream_property(Stream, encoding(text))
  176    ->  set_stream(Stream, encoding(utf8)),
  177        call_cleanup(xml_write(Stream, Data, Options),
  178                     set_stream(Stream, encoding(text)))
  179    ;   new_state(xml, State),
  180        init_state(Options, State),
  181        get_state(State, nsmap, NSMap),
  182        add_missing_namespaces(Data, NSMap, Data1),
  183        emit_xml_encoding(Stream, Options),
  184        emit_doctype(Options, Data, Stream),
  185        write_initial_indent(State, Stream),
  186        emit(Data1, Stream, State)
  187    ).
  188
  189
  190sgml_write(Data, Options) :-
  191    current_output(Stream),
  192    sgml_write(Stream, Data, Options).
  193
  194sgml_write(Stream0, Data, Options) :-
  195    fix_user_stream(Stream0, Stream),
  196    (   stream_property(Stream, encoding(text))
  197    ->  set_stream(Stream, encoding(utf8)),
  198        call_cleanup(sgml_write(Stream, Data, Options),
  199                     set_stream(Stream, encoding(text)))
  200    ;   new_state(sgml, State),
  201        init_state(Options, State),
  202        write_initial_indent(State, Stream),
  203        emit_doctype(Options, Data, Stream),
  204        emit(Data, Stream, State)
  205    ).
  206
  207
  208html_write(Data, Options) :-
  209    current_output(Stream),
  210    html_write(Stream, Data, Options).
  211
  212html_write(Stream, Data, Options) :-
  213    sgml_write(Stream, Data,
  214               [ dtd(html)
  215               | Options
  216               ]).
  217
  218fix_user_stream(user, user_output) :- !.
  219fix_user_stream(Stream, Stream).
  220
  221
  222init_state([], _).
  223init_state([H|T], State) :-
  224    update_state(H, State),
  225    init_state(T, State).
  226
  227update_state(dtd(DTD), State) :-
  228    !,
  229    (   atom(DTD)
  230    ->  dtd(DTD, DTDObj)
  231    ;   DTDObj = DTD
  232    ),
  233    set_state(State, dtd, DTDObj),
  234    dtd_character_entities(DTDObj, EntityMap),
  235    set_state(State, entity_map, EntityMap).
  236update_state(nsmap(Map), State) :-
  237    !,
  238    set_state(State, nsmap, Map).
  239update_state(cleanns(Bool), State) :-
  240    !,
  241    must_be(boolean, Bool),
  242    set_state(State, cleanns, Bool).
  243update_state(indent(Indent), State) :-
  244    !,
  245    must_be(integer, Indent),
  246    set_state(State, indent, Indent).
  247update_state(layout(Bool), State) :-
  248    !,
  249    must_be(boolean, Bool),
  250    set_state(State, layout, Bool).
  251update_state(doctype(_), _) :- !.
  252update_state(public(_),  _) :- !.
  253update_state(system(_),  _) :- !.
  254update_state(net(Bool), State) :-
  255    !,
  256    must_be(boolean, Bool),
  257    set_state(State, net, Bool).
  258update_state(header(Bool), _) :-
  259    !,
  260    must_be(boolean, Bool).
  261update_state(Option, _) :-
  262    domain_error(xml_write_option, Option).
  263
  264%       emit_xml_encoding(+Stream, +Options)
  265%
  266%       Emit the XML fileheader with   encoding information. Setting the
  267%       right encoding on the output stream  must be done before calling
  268%       xml_write/3.
  269
  270emit_xml_encoding(Out, Options) :-
  271    option(header(Hdr), Options, true),
  272    Hdr == true,
  273    !,
  274    stream_property(Out, encoding(Encoding)),
  275    (   (   Encoding == utf8
  276        ;   Encoding == wchar_t
  277        )
  278    ->  format(Out, '<?xml version="1.0" encoding="UTF-8"?>~n~n', [])
  279    ;   Encoding == iso_latin_1
  280    ->  format(Out, '<?xml version="1.0" encoding="ISO-8859-1"?>~n~n', [])
  281    ;   domain_error(xml_encoding, Encoding)
  282    ).
  283emit_xml_encoding(_, _).
  284
  285
  286%!  emit_doctype(+Options, +Data, +Stream)
  287%
  288%   Emit the document-type declaration.
  289%   There is a problem with the first clause if we are emitting SGML:
  290%   the SGML DTDs for HTML up to HTML 4 *do not allow* any 'version'
  291%   attribute; so the only time this is useful is when it is illegal!
  292
  293emit_doctype(_Options, Data, Out) :-
  294    (   Data = [_|_], memberchk(element(html,Att,_), Data)
  295    ;   Data = element(html,Att,_)
  296    ),
  297    memberchk(version=Version, Att),
  298    !,
  299    format(Out, '<!DOCTYPE HTML PUBLIC "~w">~n~n', [Version]).
  300emit_doctype(Options, Data, Out) :-
  301    (   memberchk(public(PubId), Options) -> true
  302    ;   PubId = (-)
  303    ),
  304    (   memberchk(system(SysId), Options) -> true
  305    ;   SysId = (-)
  306    ),
  307    \+ (PubId == (-),
  308        SysId == (-),
  309        \+ memberchk(doctype(_), Options)
  310    ),
  311    (   Data = element(DocType,_,_)
  312    ;   Data = [_|_], memberchk(element(DocType,_,_), Data)
  313    ;   memberchk(doctype(DocType), Options)
  314    ),
  315    !,
  316    write_doctype(Out, DocType, PubId, SysId).
  317emit_doctype(_, _, _).
  318
  319write_doctype(Out, DocType, -, -) :-
  320    !,
  321    format(Out, '<!DOCTYPE ~w []>~n~n', [DocType]).
  322write_doctype(Out, DocType, -, SysId) :-
  323    !,
  324    format(Out, '<!DOCTYPE ~w SYSTEM "~w">~n~n', [DocType,SysId]).
  325write_doctype(Out, DocType, PubId, -) :-
  326    !,
  327    format(Out, '<!DOCTYPE ~w PUBLIC "~w">~n~n', [DocType,PubId]).
  328write_doctype(Out, DocType, PubId, SysId) :-
  329    format(Out, '<!DOCTYPE ~w PUBLIC "~w" "~w">~n~n', [DocType,PubId,SysId]).
  330
  331
  332%!  emit(+Element, +Out, +State, +Options)
  333%
  334%   Emit a single element
  335
  336emit(Var, _, _) :-
  337    var(Var),
  338    !,
  339    instantiation_error(Var).
  340emit([], _, _) :- !.
  341emit([H|T], Out, State) :-
  342    !,
  343    emit(H, Out, State),
  344    emit(T, Out, State).
  345emit(CDATA, Out, State) :-
  346    atomic(CDATA),
  347    !,
  348    sgml_write_content(Out, CDATA, State).
  349emit(Element, Out, State) :-
  350    \+ \+ emit_element(Element, Out, State).
  351
  352emit_element(pi(PI), Out, State) :-
  353    !,
  354    get_state(State, entity_map, EntityMap),
  355    write(Out, <?),
  356    write_quoted(Out, PI, '', EntityMap),
  357    (   get_state(State, dialect, xml) ->
  358        write(Out, ?>)
  359    ;   write(Out, >)
  360    ).
  361emit_element(element(Name, Attributes, Content), Out, State) :-
  362    !,
  363    must_be(list, Attributes),
  364    must_be(list, Content),
  365    (   get_state(State, dialect, xml)
  366    ->  update_nsmap(Attributes, CleanAttrs, State),
  367        (   get_state(State, cleanns, true)
  368        ->  WriteAttrs = CleanAttrs
  369        ;   WriteAttrs = Attributes
  370        )
  371    ;   WriteAttrs = Attributes
  372    ),
  373    att_length(WriteAttrs, State, Alen),
  374    (   Alen > 60,
  375        get_state(State, layout, true)
  376    ->  Sep = nl,
  377        AttIndent = 4
  378    ;   Sep = sp,
  379        AttIndent = 0
  380    ),
  381    put_char(Out, '<'),
  382    emit_name(Name, Out, State),
  383    (   AttIndent > 0
  384    ->  \+ \+ ( inc_indent(State, AttIndent),
  385                attributes(WriteAttrs, Sep, Out, State)
  386              )
  387    ;   attributes(WriteAttrs, Sep, Out, State)
  388    ),
  389    content(Content, Out, Name, State).
  390emit_element(E, _, _) :-
  391    type_error(xml_dom, E).
  392
  393attributes([], _, _, _).
  394attributes([H|T], Sep, Out, State) :-
  395    (   Sep == nl
  396    ->  write_indent(State, Out)
  397    ;   put_char(Out, ' ')
  398    ),
  399    attribute(H, Out, State),
  400    attributes(T, Sep, Out, State).
  401
  402attribute(Name=Value, Out, State) :-
  403    emit_name(Name, Out, State),
  404    put_char(Out, =),
  405    sgml_write_attribute(Out, Value, State).
  406
  407att_length(Atts, State, Len) :-
  408    att_length(Atts, State, 0, Len).
  409
  410att_length([], _, Len, Len).
  411att_length([A0|T], State, Len0, Len) :-
  412    alen(A0, State, AL),
  413    Len1 is Len0 + 1 + AL,
  414    att_length(T, State, Len1, Len).
  415
  416alen(ns(NS, _URI):Name=Value, _State, Len) :-
  417    !,
  418    atom_length(Value, AL),
  419    vlen(Name, NL),
  420    atom_length(NS, NsL),
  421    Len is AL+NL+NsL+3.
  422alen(URI:Name=Value, State, Len) :-
  423    !,
  424    atom_length(Value, AL),
  425    vlen(Name, NL),
  426    get_state(State, nsmap, Nsmap),
  427    (   memberchk(NS=URI, Nsmap)
  428    ->  atom_length(NS, NsL)
  429    ;   atom_length(URI, NsL)
  430    ),
  431    Len is AL+NL+NsL+3.
  432alen(Name=Value, _, Len) :-
  433    atom_length(Name, NL),
  434    vlen(Value, AL),
  435    Len is AL+NL+3.
  436
  437vlen(Value, Len) :-
  438    is_list(Value),
  439    !,
  440    vlen_list(Value, 0, Len).
  441vlen(Value, Len) :-
  442    atom_length(Value, Len).
  443
  444vlen_list([], L, L).
  445vlen_list([H|T], L0, L) :-
  446    atom_length(H, HL),
  447    (   L0 == 0
  448    ->  L1 is L0 + HL
  449    ;   L1 is L0 + HL + 1
  450    ),
  451    vlen_list(T, L1, L).
  452
  453
  454emit_name(Name, Out, _) :-
  455    atom(Name),
  456    !,
  457    write(Out, Name).
  458emit_name(ns(NS,_URI):Name, Out, _State) :-
  459    !,
  460    (  NS == ''
  461    -> write(Out, Name)
  462    ;  format(Out, '~w:~w', [NS, Name])
  463    ).
  464emit_name(URI:Name, Out, State) :-
  465    get_state(State, nsmap, NSMap),
  466    memberchk(NS=URI, NSMap),
  467    !,
  468    (   NS == []
  469    ->  write(Out, Name)
  470    ;   format(Out, '~w:~w', [NS, Name])
  471    ).
  472emit_name(Term, Out, _) :-              % error?
  473    write(Out, Term).
  474
  475%!  update_nsmap(+Attributes, -Attributes1, !State)
  476%
  477%   Modify the nsmap of State to reflect modifications due to xmlns
  478%   arguments.
  479%
  480%   @arg    Attributes1 is a copy of Attributes with all redundant
  481%           namespace attributes deleted.
  482
  483update_nsmap(Attributes, Attributes1, State) :-
  484    get_state(State, nsmap, Map0),
  485    update_nsmap(Attributes, Attributes1, Map0, Map),
  486    set_state(State, nsmap, Map).
  487
  488update_nsmap([], [], Map, Map).
  489update_nsmap([xmlns:NS=URI|T], Attrs, Map0, Map) :-
  490    !,
  491    (   memberchk(NS=URI, Map0)
  492    ->  update_nsmap(T, Attrs, Map0, Map)
  493    ;   set_nsmap(NS, URI, Map0, Map1),
  494        Attrs = [xmlns:NS=URI|Attrs1],
  495        update_nsmap(T, Attrs1, Map1, Map)
  496    ).
  497update_nsmap([xmlns=URI|T], Attrs, Map0, Map) :-
  498    !,
  499    (   memberchk([]=URI, Map0)
  500    ->  update_nsmap(T, Attrs, Map0, Map)
  501    ;   set_nsmap([], URI, Map0, Map1),
  502        Attrs = [xmlns=URI|Attrs1],
  503        update_nsmap(T, Attrs1, Map1, Map)
  504    ).
  505update_nsmap([H|T0], [H|T], Map0, Map) :-
  506    !,
  507    update_nsmap(T0, T, Map0, Map).
  508
  509set_nsmap(NS, URI, Map0, Map) :-
  510    select(NS=_, Map0, Map1),
  511    !,
  512    Map = [NS=URI|Map1].
  513set_nsmap(NS, URI, Map, [NS=URI|Map]).
  514
  515
  516%!  content(+Content, +Out, +Element, +State, +Options)
  517%
  518%   Emit the content part of a structure  as well as the termination
  519%   for the content. For empty content   we have three versions: XML
  520%   style '/>', SGML declared EMPTY element (nothing) or normal SGML
  521%   element (we must close with the same element name).
  522
  523content([], Out, Element, State) :-    % empty element
  524    !,
  525    (   get_state(State, net, true)
  526    ->  (   get_state(State, dialect, xml) ->
  527            write(Out, />)
  528        ;   empty_element(State, Element) ->
  529            write(Out, >)
  530        ;   write(Out, //)
  531        )
  532    ;/* get_state(State, net, false) */
  533        write(Out, >),
  534        (   get_state(State, dialect, sgml),
  535            empty_element(State, Element)
  536        ->  true
  537        ;   emit_close(Element, Out, State)
  538        )
  539    ).
  540content([CDATA], Out, Element, State) :-
  541    atomic(CDATA),
  542    !,
  543    (   get_state(State, dialect, sgml),
  544        get_state(State, net, true),
  545        \+ sub_atom(CDATA, _, _, _, /),
  546        write_length(CDATA, Len, []),
  547        Len < 20
  548    ->  write(Out, /),
  549        sgml_write_content(Out, CDATA, State),
  550        write(Out, /)
  551    ;   verbatim_element(Element, State)
  552    ->  write(Out, >),
  553        write(Out, CDATA),
  554        emit_close(Element, Out, State)
  555    ;/* XML or not NET */
  556            write(Out, >),
  557        sgml_write_content(Out, CDATA, State),
  558        emit_close(Element, Out, State)
  559    ).
  560content(Content, Out, Element, State) :-
  561    get_state(State, layout, true),
  562    /* If xml:space='preserve' is present, */
  563        /* we MUST NOT tamper with white space at all. */
  564        \+ (Element = element(_,Atts,_),
  565        memberchk('xml:space'=preserve, Atts)
  566    ),
  567    element_content(Content, Elements),
  568    !,
  569    format(Out, >, []),
  570    \+ \+ (
  571        inc_indent(State),
  572        write_element_content(Elements, Out, State)
  573    ),
  574    write_indent(State, Out),
  575    emit_close(Element, Out, State).
  576content(Content, Out, Element, State) :-
  577    format(Out, >, []),
  578    write_mixed_content(Content, Out, Element, State),
  579    emit_close(Element, Out, State).
  580
  581verbatim_element(Element, State) :-
  582    verbatim_element(Element),
  583    get_state(State, dtd, DTD),
  584    DTD \== (-),
  585    dtd_property(DTD, doctype(html)).
  586
  587verbatim_element(script).
  588verbatim_element(style).
  589
  590emit_close(Element, Out, State) :-
  591    write(Out, '</'),
  592    emit_name(Element, Out, State),
  593    write(Out, '>').
  594
  595
  596write_mixed_content([], _, _, _).
  597write_mixed_content([H|T], Out, Element, State) :-
  598    write_mixed_content_element(H, Out, State),
  599    write_mixed_content(T, Out, Element, State).
  600
  601write_mixed_content_element(H, Out, State) :-
  602    (   atom(H)
  603    ->  sgml_write_content(Out, H, State)
  604    ;   string(H)
  605    ->  sgml_write_content(Out, H, State)
  606    ;   functor(H, element, 3)
  607    ->  emit(H, Out, State)
  608    ;   functor(H, pi, 1)
  609    ->  emit(H, Out, State)
  610    ;   var(H)
  611    ->  instantiation_error(H)
  612    ;   H = sdata(Data)             % cannot be written without entity!
  613    ->  print_message(warning, sgml_write(sdata_as_cdata(Data))),
  614        sgml_write_content(Out, Data, State)
  615    ;   type_error(sgml_content, H)
  616    ).
  617
  618
  619element_content([], []).
  620element_content([element(Name,Atts,C)|T0], [element(Name,Atts,C)|T]) :-
  621    !,
  622    element_content(T0, T).
  623element_content([Blank|T0], T) :-
  624    atom(Blank),
  625    atom_codes(Blank, Codes),
  626    all_blanks(Codes),
  627    element_content(T0, T).
  628
  629all_blanks([]).
  630all_blanks([H|T]) :-
  631    code_type(H, space),
  632    all_blanks(T).
  633
  634write_element_content([], _, _).
  635write_element_content([H|T], Out, State) :-
  636    write_indent(State, Out),
  637    emit(H, Out, State),
  638    write_element_content(T, Out, State).
  639
  640
  641                 /*******************************
  642                 *           NAMESPACES         *
  643                 *******************************/
  644
  645%!  add_missing_namespaces(+DOM0, +NsMap, -DOM)
  646%
  647%   Add xmlns:NS=URI definitions to the toplevel element(s) to
  648%   deal with missing namespaces.
  649
  650add_missing_namespaces([], _, []) :- !.
  651add_missing_namespaces([H0|T0], Def, [H|T]) :-
  652    !,
  653    add_missing_namespaces(H0, Def, H),
  654    add_missing_namespaces(T0, Def, T).
  655add_missing_namespaces(Elem0, Def, Elem) :-
  656    Elem0 = element(Name, Atts0, Content),
  657    !,
  658    missing_namespaces(Elem0, Def, Missing),
  659    (   Missing == []
  660    ->  Elem = Elem0
  661    ;   add_missing_ns(Missing, Atts0, Atts),
  662        Elem = element(Name, Atts, Content)
  663    ).
  664add_missing_namespaces(DOM, _, DOM).    % CDATA, etc.
  665
  666add_missing_ns([], Atts, Atts).
  667add_missing_ns([H|T], Atts0, Atts) :-
  668    generate_ns(H, NS),
  669    add_missing_ns(T, [xmlns:NS=H|Atts0], Atts).
  670
  671%!  generate_ns(+URI, -NS) is det.
  672%
  673%   Generate a namespace (NS) identifier for URI.
  674
  675generate_ns(URI, NS) :-
  676    xmlns(NS, URI),
  677    !.
  678generate_ns(URI, NS) :-
  679    default_ns(URI, NS),
  680    !.
  681generate_ns(_, NS) :-
  682    gensym(xns, NS).
  683
  684%!  xmlns(?NS, ?URI) is nondet.
  685%
  686%   Hook to define human readable  abbreviations for XML namespaces.
  687%   xml_write/3 tries these locations:
  688%
  689%     1. This hook
  690%     2. Defaults (see below)
  691%     3. rdf_db:ns/2 for RDF-DB integration
  692%
  693%   Default XML namespaces are:
  694%
  695%     | xsi    | http://www.w3.org/2001/XMLSchema-instance |
  696%     | xs     | http://www.w3.org/2001/XMLSchema          |
  697%     | xhtml  | http://www.w3.org/1999/xhtml              |
  698%     | soap11 | http://schemas.xmlsoap.org/soap/envelope/ |
  699%     | soap12 | http://www.w3.org/2003/05/soap-envelope   |
  700%
  701%   @see xml_write/2, rdf_register_ns/2.
  702
  703:- multifile
  704    rdf_db:ns/2.  705
  706default_ns('http://www.w3.org/2001/XMLSchema-instance', xsi).
  707default_ns('http://www.w3.org/2001/XMLSchema',          xs).
  708default_ns('http://www.w3.org/1999/xhtml',              xhtml).
  709default_ns('http://schemas.xmlsoap.org/soap/envelope/', soap11).
  710default_ns('http://www.w3.org/2003/05/soap-envelope',   soap12).
  711default_ns(URI, NS) :-
  712    rdf_db:ns(NS, URI).
  713
  714%!  missing_namespaces(+DOM, +NSMap, -Missing)
  715%
  716%   Return a list of URIs appearing in DOM that are not covered
  717%   by xmlns definitions.
  718
  719missing_namespaces(DOM, Defined, Missing) :-
  720    missing_namespaces(DOM, Defined, [], Missing).
  721
  722missing_namespaces([], _, L, L) :- !.
  723missing_namespaces([H|T], Def, L0, L) :-
  724    !,
  725    missing_namespaces(H, Def, L0, L1),
  726    missing_namespaces(T, Def, L1, L).
  727missing_namespaces(element(Name, Atts, Content), Def, L0, L) :-
  728    !,
  729    update_nsmap(Atts, _, Def, Def1),
  730    missing_ns(Name, Def1, L0, L1),
  731    missing_att_ns(Atts, Def1, L1, L2),
  732    missing_namespaces(Content, Def1, L2, L).
  733missing_namespaces(_, _, L, L).
  734
  735missing_att_ns([], _, M, M).
  736missing_att_ns([Name=_|T], Def, M0, M) :-
  737    missing_ns(Name, Def, M0, M1),
  738    missing_att_ns(T, Def, M1, M).
  739
  740missing_ns(ns(NS, URI):_, Def, M0, M) :-
  741    !,
  742    (  memberchk(NS=URI, Def)
  743    -> M = M0
  744    ;  NS == ''
  745    -> M = M0
  746    ;  M = [URI|M0]
  747    ).
  748missing_ns(URI:_, Def, M0, M) :-
  749    !,
  750    (   (   memberchk(_=URI, Def)
  751        ;   memberchk(URI, M0)
  752        ;   URI = xml               % predefined ones
  753        ;   URI = xmlns
  754        )
  755    ->  M = M0
  756    ;   M = [URI|M0]
  757    ).
  758missing_ns(_, _, M, M).
  759
  760                 /*******************************
  761                 *         QUOTED WRITE         *
  762                 *******************************/
  763
  764sgml_write_attribute(Out, Values, State) :-
  765    is_list(Values),
  766    !,
  767    get_state(State, entity_map, EntityMap),
  768    put_char(Out, '"'),
  769    write_quoted_list(Values, Out, '"<&\r\n\t', EntityMap),
  770    put_char(Out, '"').
  771sgml_write_attribute(Out, Value, State) :-
  772    is_text(Value),
  773    !,
  774    get_state(State, entity_map, EntityMap),
  775    put_char(Out, '"'),
  776    write_quoted(Out, Value, '"<&\r\n\t', EntityMap),
  777    put_char(Out, '"').
  778sgml_write_attribute(Out, Value, _State) :-
  779    number(Value),
  780    !,
  781    format(Out, '"~w"', [Value]).
  782sgml_write_attribute(_, Value, _) :-
  783    type_error(sgml_attribute_value, Value).
  784
  785write_quoted_list([], _, _, _).
  786write_quoted_list([H|T], Out, Escape, EntityMap) :-
  787    write_quoted(Out, H, Escape, EntityMap),
  788    (   T == []
  789    ->  true
  790    ;   put_char(Out, ' '),
  791        write_quoted_list(T, Out, Escape, EntityMap)
  792    ).
  793
  794
  795sgml_write_content(Out, Value, State) :-
  796    is_text(Value),
  797    !,
  798    get_state(State, entity_map, EntityMap),
  799    write_quoted(Out, Value, '<&>\r', EntityMap).
  800sgml_write_content(Out, Value, _) :-
  801    write(Out, Value).
  802
  803is_text(Value) :- atom(Value), !.
  804is_text(Value) :- string(Value), !.
  805
  806write_quoted(Out, Atom, Escape, EntityMap) :-
  807    atom(Atom),
  808    !,
  809    atom_codes(Atom, Codes),
  810    writeq(Codes, Out, Escape, EntityMap).
  811write_quoted(Out, String, Escape, EntityMap) :-
  812    string(String),
  813    !,
  814    string_codes(String, Codes),
  815    writeq(Codes, Out, Escape, EntityMap).
  816write_quoted(_, String, _, _) :-
  817    type_error(atom_or_string, String).
  818
  819
  820%!  writeq(+Text:codes, +Out:stream, +Escape:atom, +Escape:assoc) is det.
  821
  822writeq([], _, _, _).
  823writeq([H|T], Out, Escape, EntityMap) :-
  824    (   char_code(HC, H),
  825        sub_atom(Escape, _, _, _, HC)
  826    ->  write_entity(H, Out, EntityMap)
  827    ;   H >= 256
  828    ->  (   stream_property(Out, encoding(Enc)),
  829            unicode_encoding(Enc)
  830        ->  put_code(Out, H)
  831        ;   write_entity(H, Out, EntityMap)
  832        )
  833    ;   put_code(Out, H)
  834    ),
  835    writeq(T, Out, Escape, EntityMap).
  836
  837unicode_encoding(utf8).
  838unicode_encoding(wchar_t).
  839unicode_encoding(unicode_le).
  840unicode_encoding(unicode_be).
  841
  842write_entity(Code, Out, EntityMap) :-
  843    (   get_assoc(Code, EntityMap, EntityName)
  844    ->  format(Out, '&~w;', [EntityName])
  845    ;   format(Out, '&#x~16R;', [Code])
  846    ).
  847
  848
  849                 /*******************************
  850                 *          INDENTATION         *
  851                 *******************************/
  852
  853write_initial_indent(State, Out) :-
  854    (   get_state(State, indent, Indent),
  855        Indent > 0
  856    ->  emit_indent(Indent, Out)
  857    ;   true
  858    ).
  859
  860write_indent(State, _) :-
  861    get_state(State, layout, false),
  862    !.
  863write_indent(State, Out) :-
  864    get_state(State, indent, Indent),
  865    emit_indent(Indent, Out).
  866
  867emit_indent(Indent, Out) :-
  868    Tabs is Indent // 8,
  869    Spaces is Indent mod 8,
  870    format(Out, '~N', []),
  871    write_n(Tabs, '\t', Out),
  872    write_n(Spaces, ' ', Out).
  873
  874write_n(N, Char, Out) :-
  875    (   N > 0
  876    ->  put_char(Out, Char),
  877        N2 is N - 1,
  878        write_n(N2, Char, Out)
  879    ;   true
  880    ).
  881
  882inc_indent(State) :-
  883    inc_indent(State, 2).
  884
  885inc_indent(State, Inc) :-
  886    state(indent, Arg),
  887    arg(Arg, State, I0),
  888    I is I0 + Inc,
  889    setarg(Arg, State, I).
  890
  891
  892                 /*******************************
  893                 *         DTD HANDLING         *
  894                 *******************************/
  895
  896%!  empty_element(+State, +Element)
  897%
  898%   True if Element is declared  with   EMPTY  content in the (SGML)
  899%   DTD.
  900
  901empty_element(State, Element) :-
  902    get_state(State, dtd, DTD),
  903    DTD \== (-),
  904    dtd_property(DTD, element(Element, _, empty)).
  905
  906%!  dtd_character_entities(+DTD, -Map)
  907%
  908%   Return an assoc mapping character entities   to their name. Note
  909%   that the entity representation is a bit dubious. Entities should
  910%   allow for a wide-character version and avoid the &#..; trick.
  911
  912dtd_character_entities(DTD, Map) :-
  913    empty_assoc(Empty),
  914    dtd_property(DTD, entities(Entities)),
  915    fill_entity_map(Entities, DTD, Empty, Map).
  916
  917fill_entity_map([], _, Map, Map).
  918fill_entity_map([H|T], DTD, Map0, Map) :-
  919    (   dtd_property(DTD, entity(H, CharEntity)),
  920        atom(CharEntity),
  921        (   sub_atom(CharEntity, 0, _, _, '&#'),
  922            sub_atom(CharEntity, _, _, 0, ';')
  923        ->  sub_atom(CharEntity, 2, _, 1, Name),
  924            atom_number(Name, Code)
  925        ;   atom_length(CharEntity, 1),
  926            char_code(CharEntity, Code)
  927        )
  928    ->  put_assoc(Code, Map0, H, Map1),
  929        fill_entity_map(T, DTD, Map1, Map)
  930    ;   fill_entity_map(T, DTD, Map0, Map)
  931    ).
  932
  933
  934
  935                 /*******************************
  936                 *            FIELDS            *
  937                 *******************************/
  938
  939state(indent,     1).                   % current indentation
  940state(layout,     2).                   % use layout (true/false)
  941state(dtd,        3).                   % DTD for entity names
  942state(entity_map, 4).                   % compiled entity-map
  943state(dialect,    5).                   % xml/sgml
  944state(nsmap,      6).                   % defined namespaces
  945state(net,        7).                   % Should null end-tags be used?
  946state(cleanns,    8).                   % Remove duplicate xmlns declarations
  947
  948new_state(Dialect,
  949    state(
  950        0,              % indent
  951        true,           % layout
  952        -,              % DTD
  953        EntityMap,      % entity_map
  954        Dialect,        % dialect
  955        [],             % NS=Full map
  956        Net,            % Null End-Tags?
  957        true            % Remove duplicate xmlns declarations
  958    )) :-
  959    (   Dialect == sgml
  960    ->  Net = false,
  961        empty_assoc(EntityMap)
  962    ;   Net = true,
  963        xml_entities(EntityMap)
  964    ).
  965
  966get_state(State, Field, Value) :-
  967    state(Field, Arg),
  968    arg(Arg, State, Value).
  969
  970set_state(State, Field, Value) :-
  971    state(Field, Arg),
  972    setarg(Arg, State, Value).
  973
  974term_expansion(xml_entities(map),
  975               xml_entities(Map)) :-
  976    list_to_assoc([ 0'< - lt,
  977                    0'& - amp,
  978                    0'> - gt,
  979                    0'\' - apos,
  980                    0'\" - quot
  981                  ], Map).
  982xml_entities(map).
  983
  984                 /*******************************
  985                 *            MESSAGES          *
  986                 *******************************/
  987
  988:- multifile
  989    prolog:message/3.  990
  991prolog:message(sgml_write(sdata_as_cdata(Data))) -->
  992    [ 'SGML-write: emitting SDATA as CDATA: "~p"'-[Data] ]