View source with raw comments or as raw
    1/*  Part of SWI-Prolog
    2
    3    Author:        Jan Wielemaker
    4    E-mail:        J.Wielemaker@vu.nl
    5    WWW:           http://www.swi-prolog.org
    6    Copyright (c)  2000-2016, University of Amsterdam
    7                              VU University Amsterdam
    8    All rights reserved.
    9
   10    Redistribution and use in source and binary forms, with or without
   11    modification, are permitted provided that the following conditions
   12    are met:
   13
   14    1. Redistributions of source code must retain the above copyright
   15       notice, this list of conditions and the following disclaimer.
   16
   17    2. Redistributions in binary form must reproduce the above copyright
   18       notice, this list of conditions and the following disclaimer in
   19       the documentation and/or other materials provided with the
   20       distribution.
   21
   22    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
   23    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
   24    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
   25    FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
   26    COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
   27    INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
   28    BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
   29    LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
   30    CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   31    LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
   32    ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
   33    POSSIBILITY OF SUCH DAMAGE.
   34*/
   35
   36:- module(sgml,
   37          [ load_html/3,                % +Input, -DOM, +Options
   38            load_xml/3,                 % +Input, -DOM, +Options
   39            load_sgml/3,                % +Input, -DOM, +Options
   40
   41            load_sgml_file/2,           % +File, -ListOfContent
   42            load_xml_file/2,            % +File, -ListOfContent
   43            load_html_file/2,           % +File, -Document
   44
   45            load_structure/3,           % +File, -Term, +Options
   46
   47            load_dtd/2,                 % +DTD, +File
   48            load_dtd/3,                 % +DTD, +File, +Options
   49            dtd/2,                      % +Type, -DTD
   50            dtd_property/2,             % +DTD, ?Property
   51
   52            new_dtd/2,                  % +Doctype, -DTD
   53            free_dtd/1,                 % +DTD
   54            open_dtd/3,                 % +DTD, +Options, -Stream
   55
   56            new_sgml_parser/2,          % -Parser, +Options
   57            free_sgml_parser/1,         % +Parser
   58            set_sgml_parser/2,          % +Parser, +Options
   59            get_sgml_parser/2,          % +Parser, +Options
   60            sgml_parse/2,               % +Parser, +Options
   61
   62            sgml_register_catalog_file/2, % +File, +StartOrEnd
   63
   64            xml_quote_attribute/3,      % +In, -Quoted, +Encoding
   65            xml_quote_cdata/3,          % +In, -Quoted, +Encoding
   66            xml_quote_attribute/2,      % +In, -Quoted
   67            xml_quote_cdata/2,          % +In, -Quoted
   68            xml_name/1,                 % +In
   69            xml_name/2,                 % +In, +Encoding
   70
   71            xsd_number_string/2,        % ?Number, ?String
   72            xsd_time_string/3,          % ?Term, ?Type, ?String
   73
   74            xml_basechar/1,             % +Code
   75            xml_ideographic/1,          % +Code
   76            xml_combining_char/1,       % +Code
   77            xml_digit/1,                % +Code
   78            xml_extender/1,             % +Code
   79
   80            iri_xml_namespace/2,        % +IRI, -Namespace
   81            iri_xml_namespace/3,        % +IRI, -Namespace, -LocalName
   82            xml_is_dom/1                % +Term
   83          ]).   84:- use_module(library(lists)).   85:- use_module(library(option)).   86:- use_module(library(error)).   87:- use_module(library(iostream)).   88
   89:- meta_predicate
   90    load_structure(+, -, :),
   91    load_html(+, -, :),
   92    load_xml(+, -, :),
   93    load_sgml(+, -, :).   94
   95:- predicate_options(load_structure/3, 3,
   96                     [ charpos(integer),
   97                       defaults(boolean),
   98                       dialect(oneof([html,html4,html5,sgml,xhtml,xhtml5,xml,xmlns])),
   99                       doctype(atom),
  100                       dtd(any),
  101                       encoding(oneof(['iso-8859-1', 'utf-8', 'us-ascii'])),
  102                       entity(atom,atom),
  103                       keep_prefix(boolean),
  104                       file(atom),
  105                       line(integer),
  106                       offset(integer),
  107                       number(oneof([token,integer])),
  108                       qualify_attributes(boolean),
  109                       shorttag(boolean),
  110                       case_sensitive_attributes(boolean),
  111                       case_preserving_attributes(boolean),
  112                       system_entities(boolean),
  113                       max_memory(integer),
  114                       space(oneof([sgml,preserve,default,remove])),
  115                       xmlns(atom),
  116                       xmlns(atom,atom),
  117                       pass_to(sgml_parse/2, 2)
  118                     ]).  119:- predicate_options(load_html/3, 3,
  120                     [ pass_to(load_structure/3, 3)
  121                     ]).  122:- predicate_options(load_xml/3, 3,
  123                     [ pass_to(load_structure/3, 3)
  124                     ]).  125:- predicate_options(load_sgml/3, 3,
  126                     [ pass_to(load_structure/3, 3)
  127                     ]).  128:- predicate_options(load_dtd/3, 3,
  129                     [ dialect(oneof([sgml,xml,xmlns])),
  130                       pass_to(open/4, 4)
  131                     ]).  132:- predicate_options(sgml_parse/2, 2,
  133                     [ call(oneof([begin,end,cdata,pi,decl,error,xmlns,urlns]),
  134                            callable),
  135                       content_length(integer),
  136                       document(-any),
  137                       max_errors(integer),
  138                       parse(oneof([file,element,content,declaration,input])),
  139                       source(any),
  140                       syntax_errors(oneof([quiet,print,style])),
  141                       xml_no_ns(oneof([error,quiet]))
  142                     ]).  143:- predicate_options(new_sgml_parser/2, 2,
  144                     [ dtd(any)
  145                     ]).

SGML, XML and HTML parser

This library allows you to parse SGML, XML and HTML data into a Prolog data structure. The library defines several families of predicates:

High-level predicates
Most users will only use load_html/3, load_xml/3 or load_sgml/3 to parse arbitrary input into a DOM structure. These predicates all call load_structure/3, which provides more options and may be used for processing non-standard documents.

The DOM structure can be used by library(xpath) to extract information from the document.

The low-level parser
The actual parser is written in C and consists of two parts: one for processing DTD (Document Type Definitions) and one for parsing data. The data can either be parsed to a Prolog (DOM) term or the parser can perform callbacks for the DOM events.
Utility predicates
Finally, this library provides prmitives for classifying characters and strings according to the XML specification such as xml_name/1 to verify whether an atom is a valid XML name (identifier). It also provides primitives to quote attributes and CDATA elements. */
  175:- multifile user:file_search_path/2.  176:- dynamic   user:file_search_path/2.  177
  178user:file_search_path(dtd, '.').
  179user:file_search_path(dtd, swi('library/DTD')).
  180
  181sgml_register_catalog_file(File, Location) :-
  182    prolog_to_os_filename(File, OsFile),
  183    '_sgml_register_catalog_file'(OsFile, Location).
  184
  185:- use_foreign_library(foreign(sgml2pl)).  186
  187register_catalog(Base) :-
  188    absolute_file_name(dtd(Base),
  189                           [ extensions([soc]),
  190                             access(read),
  191                             file_errors(fail)
  192                           ],
  193                           SocFile),
  194    sgml_register_catalog_file(SocFile, end).
  195
  196:- initialization
  197    ignore(register_catalog('HTML4')).  198
  199
  200                 /*******************************
  201                 *         DTD HANDLING         *
  202                 *******************************/
  203
  204/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
  205Note that concurrent access to DTD objects  is not allowed, and hence we
  206will allocate and destroy them in each   thread.  Possibibly it would be
  207nicer to find out why  concurrent  access   to  DTD's  is  flawed. It is
  208diagnosed to mess with the entity resolution by Fabien Todescato.
  209- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
  210
  211:- thread_local
  212    current_dtd/2.  213:- volatile
  214    current_dtd/2.  215:- thread_local
  216    registered_cleanup/0.  217:- volatile
  218    registered_cleanup/0.  219
  220:- multifile
  221    dtd_alias/2.  222
  223:- create_prolog_flag(html_dialect, html5, [type(atom)]).  224
  225dtd_alias(html4, 'HTML4').
  226dtd_alias(html5, 'HTML5').
  227dtd_alias(html,  DTD) :-
  228    current_prolog_flag(html_dialect, Dialect),
  229    dtd_alias(Dialect, DTD).
 dtd(+Type, -DTD) is det
DTD is a DTD object created from the file dtd(Type). Loaded DTD objects are cached. Note that DTD objects may not be shared between threads. Therefore, dtd/2 maintains the pool of DTD objects using a thread_local predicate. DTD objects are destroyed if a thread terminates.
Errors
- existence_error(source_sink, dtd(Type))
  241dtd(Type, DTD) :-
  242    current_dtd(Type, DTD),
  243    !.
  244dtd(Type, DTD) :-
  245    new_dtd(Type, DTD),
  246    (   dtd_alias(Type, Base)
  247    ->  true
  248    ;   Base = Type
  249    ),
  250    absolute_file_name(dtd(Base),
  251                       [ extensions([dtd]),
  252                         access(read)
  253                       ], DtdFile),
  254    load_dtd(DTD, DtdFile),
  255    register_cleanup,
  256    asserta(current_dtd(Type, DTD)).
 load_dtd(+DTD, +DtdFile, +Options)
Load DtdFile into a DTD. Defined options are:
dialect(+Dialect)
Dialect to use (xml, xmlns, sgml)
encoding(+Encoding)
Encoding of DTD file
Arguments:
DTD- is a fresh DTD object, normally created using new_dtd/1.
  271load_dtd(DTD, DtdFile) :-
  272    load_dtd(DTD, DtdFile, []).
  273load_dtd(DTD, DtdFile, Options) :-
  274    sgml_open_options(sgml:Options, OpenOptions, sgml:DTDOptions),
  275    setup_call_cleanup(
  276        open_dtd(DTD, DTDOptions, DtdOut),
  277        setup_call_cleanup(
  278            open(DtdFile, read, DtdIn, OpenOptions),
  279            copy_stream_data(DtdIn, DtdOut),
  280            close(DtdIn)),
  281        close(DtdOut)).
  282
  283split_dtd_options([], [], []).
  284split_dtd_options([H|T], [H|TD], S) :-
  285    dtd_option(H),
  286    !,
  287    split_dtd_options(T, TD, S).
  288split_dtd_options([H|T], TD, [H|S]) :-
  289    split_dtd_options(T, TD, S).
  290
  291dtd_option(dialect(_)).
 destroy_dtds
Destroy DTDs cached by this thread as they will become unreachable anyway.
  299destroy_dtds :-
  300    (   current_dtd(_Type, DTD),
  301        free_dtd(DTD),
  302        fail
  303    ;   true
  304    ).
 register_cleanup
Register cleanup of DTDs created for this thread.
  310register_cleanup :-
  311    registered_cleanup,
  312    !.
  313register_cleanup :-
  314    catch(thread_at_exit(destroy_dtds), _, true),
  315    assert(registered_cleanup).
  316
  317
  318                 /*******************************
  319                 *          EXAMINE DTD         *
  320                 *******************************/
  321
  322prop(doctype(_), _).
  323prop(elements(_), _).
  324prop(entities(_), _).
  325prop(notations(_), _).
  326prop(entity(E, _), DTD) :-
  327    (   nonvar(E)
  328    ->  true
  329    ;   '$dtd_property'(DTD, entities(EL)),
  330        member(E, EL)
  331    ).
  332prop(element(E, _, _), DTD) :-
  333    (   nonvar(E)
  334    ->  true
  335    ;   '$dtd_property'(DTD, elements(EL)),
  336        member(E, EL)
  337    ).
  338prop(attributes(E, _), DTD) :-
  339    (   nonvar(E)
  340    ->  true
  341    ;   '$dtd_property'(DTD, elements(EL)),
  342        member(E, EL)
  343    ).
  344prop(attribute(E, A, _, _), DTD) :-
  345    (   nonvar(E)
  346    ->  true
  347    ;   '$dtd_property'(DTD, elements(EL)),
  348        member(E, EL)
  349    ),
  350    (   nonvar(A)
  351    ->  true
  352    ;   '$dtd_property'(DTD, attributes(E, AL)),
  353        member(A, AL)
  354    ).
  355prop(notation(N, _), DTD) :-
  356    (   nonvar(N)
  357    ->  true
  358    ;   '$dtd_property'(DTD, notations(NL)),
  359        member(N, NL)
  360    ).
  361
  362dtd_property(DTD, Prop) :-
  363    prop(Prop, DTD),
  364    '$dtd_property'(DTD, Prop).
  365
  366
  367                 /*******************************
  368                 *             SGML             *
  369                 *******************************/
 load_structure(+Source, -ListOfContent, :Options) is det
Parse Source and return the resulting structure in ListOfContent. Source is handed to open_any/5, which allows for processing an extensible set of input sources.

A proper XML document contains only a single toplevel element whose name matches the document type. Nevertheless, a list is returned for consistency with the representation of element content.

The encoding(+Encoding) option is treated special for compatibility reasons:

  393load_structure(Spec, DOM, Options) :-
  394    sgml_open_options(Options, OpenOptions, SGMLOptions),
  395    setup_call_cleanup(
  396        open_any(Spec, read, In, Close, OpenOptions),
  397        load_structure_from_stream(In, DOM, SGMLOptions),
  398        close_any(Close)).
  399
  400sgml_open_options(Options, OpenOptions, SGMLOptions) :-
  401    Options = M:Plain,
  402    (   select_option(encoding(Encoding), Plain, NoEnc)
  403    ->  (   sgml_encoding(Encoding)
  404        ->  merge_options(NoEnc, [type(binary)], OpenOptions),
  405            SGMLOptions = Options
  406        ;   OpenOptions = Plain,
  407            SGMLOptions = M:NoEnc
  408        )
  409    ;   merge_options(Plain, [type(binary)], OpenOptions),
  410        SGMLOptions = Options
  411    ).
  412
  413sgml_encoding(Enc) :-
  414    downcase_atom(Enc, Enc1),
  415    sgml_encoding_l(Enc1).
  416
  417sgml_encoding_l('iso-8859-1').
  418sgml_encoding_l('us-ascii').
  419sgml_encoding_l('utf-8').
  420sgml_encoding_l('utf8').
  421sgml_encoding_l('iso_latin_1').
  422sgml_encoding_l('ascii').
  423
  424load_structure_from_stream(In, Term, M:Options) :-
  425    !,
  426    (   select_option(dtd(DTD), Options, Options1)
  427    ->  ExplicitDTD = true
  428    ;   ExplicitDTD = false,
  429        Options1 = Options
  430    ),
  431    move_front(Options1, dialect(_), Options2), % dialect sets defaults
  432    setup_call_cleanup(
  433        new_sgml_parser(Parser,
  434                        [ dtd(DTD)
  435                        ]),
  436        parse(Parser, M:Options2, TermRead, In),
  437        free_sgml_parser(Parser)),
  438    (   ExplicitDTD == true
  439    ->  (   DTD = dtd(_, DocType),
  440            dtd_property(DTD, doctype(DocType))
  441        ->  true
  442        ;   true
  443        )
  444    ;   free_dtd(DTD)
  445    ),
  446    Term = TermRead.
  447
  448move_front(Options0, Opt, Options) :-
  449    selectchk(Opt, Options0, Options1),
  450    !,
  451    Options = [Opt|Options1].
  452move_front(Options, _, Options).
  453
  454
  455parse(Parser, M:Options, Document, In) :-
  456    set_parser_options(Options, Parser, In, Options1),
  457    parser_meta_options(Options1, M, Options2),
  458    set_input_location(Parser, In),
  459    sgml_parse(Parser,
  460               [ document(Document),
  461                 source(In)
  462               | Options2
  463               ]).
  464
  465set_parser_options([], _, _, []).
  466set_parser_options([H|T], Parser, In, Rest) :-
  467    (   set_parser_option(H, Parser, In)
  468    ->  set_parser_options(T, Parser, In, Rest)
  469    ;   Rest = [H|R2],
  470        set_parser_options(T, Parser, In, R2)
  471    ).
  472
  473set_parser_option(Var, _Parser, _In) :-
  474    var(Var),
  475    !,
  476    instantiation_error(Var).
  477set_parser_option(Option, Parser, _) :-
  478    def_entity(Option, Parser),
  479    !.
  480set_parser_option(offset(Offset), _Parser, In) :-
  481    !,
  482    seek(In, Offset, bof, _).
  483set_parser_option(Option, Parser, _In) :-
  484    parser_option(Option),
  485    !,
  486    set_sgml_parser(Parser, Option).
  487set_parser_option(Name=Value, Parser, In) :-
  488    Option =.. [Name,Value],
  489    set_parser_option(Option, Parser, In).
  490
  491
  492parser_option(dialect(_)).
  493parser_option(shorttag(_)).
  494parser_option(case_sensitive_attributes(_)).
  495parser_option(case_preserving_attributes(_)).
  496parser_option(system_entities(_)).
  497parser_option(max_memory(_)).
  498parser_option(file(_)).
  499parser_option(line(_)).
  500parser_option(space(_)).
  501parser_option(number(_)).
  502parser_option(defaults(_)).
  503parser_option(doctype(_)).
  504parser_option(qualify_attributes(_)).
  505parser_option(encoding(_)).
  506parser_option(keep_prefix(_)).
  507
  508
  509def_entity(entity(Name, Value), Parser) :-
  510    get_sgml_parser(Parser, dtd(DTD)),
  511    xml_quote_attribute(Value, QValue),
  512    setup_call_cleanup(open_dtd(DTD, [], Stream),
  513                       format(Stream, '<!ENTITY ~w "~w">~n',
  514                              [Name, QValue]),
  515                       close(Stream)).
  516def_entity(xmlns(URI), Parser) :-
  517    set_sgml_parser(Parser, xmlns(URI)).
  518def_entity(xmlns(NS, URI), Parser) :-
  519    set_sgml_parser(Parser, xmlns(NS, URI)).
 parser_meta_options(+Options0, +Module, -Options)
Qualify meta-calling options to the parser.
  525parser_meta_options([], _, []).
  526parser_meta_options([call(When, Closure)|T0], M, [call(When, M:Closure)|T]) :-
  527    !,
  528    parser_meta_options(T0, M, T).
  529parser_meta_options([H|T0], M, [H|T]) :-
  530    parser_meta_options(T0, M, T).
 set_input_location(+Parser, +In:stream) is det
Set the input location if this was not set explicitly
  537set_input_location(Parser, _In) :-
  538    get_sgml_parser(Parser, file(_)),
  539    !.
  540set_input_location(Parser, In) :-
  541    stream_property(In, file_name(File)),
  542    !,
  543    set_sgml_parser(Parser, file(File)),
  544    stream_property(In, position(Pos)),
  545    set_sgml_parser(Parser, position(Pos)).
  546set_input_location(_, _).
  547
  548                 /*******************************
  549                 *           UTILITIES          *
  550                 *******************************/
 load_sgml_file(+File, -DOM) is det
Load SGML from File and unify the resulting DOM structure with DOM.
deprecated
- New code should use load_sgml/3.
  559load_sgml_file(File, Term) :-
  560    load_sgml(File, Term, []).
 load_xml_file(+File, -DOM) is det
Load XML from File and unify the resulting DOM structure with DOM.
deprecated
- New code should use load_xml/3.
  569load_xml_file(File, Term) :-
  570    load_xml(File, Term, []).
 load_html_file(+File, -DOM) is det
Load HTML from File and unify the resulting DOM structure with DOM.
deprecated
- New code should use load_html/3.
  579load_html_file(File, DOM) :-
  580    load_html(File, DOM, []).
 load_html(+Input, -DOM, +Options) is det
Load HTML text from Input and unify the resulting DOM structure with DOM. Options are passed to load_structure/3, after adding the following default options:
dtd(DTD)
Pass the DTD for HTML as obtained using dtd(html, DTD).
dialect(Dialect)
Current dialect from the Prolog flag html_dialect
max_errors(-1)
syntax_errors(quiet)
Most HTML encountered in the wild contains errors. Even in the context of errors, the resulting DOM term is often a reasonable guess at the intend of the author.

You may also want to use the library(http/http_open) to support loading from HTTP and HTTPS URLs. For example:

:- use_module(library(http/http_open)).
:- use_module(library(sgml)).

load_html_url(URL, DOM) :-
    load_html(URL, DOM, []).
  609load_html(File, Term, M:Options) :-
  610    current_prolog_flag(html_dialect, Dialect),
  611    dtd(Dialect, DTD),
  612    merge_options(Options,
  613                  [ dtd(DTD),
  614                    dialect(Dialect),
  615                    max_errors(-1),
  616                    syntax_errors(quiet)
  617                  ], Options1),
  618    load_structure(File, Term, M:Options1).
 load_xml(+Input, -DOM, +Options) is det
Load XML text from Input and unify the resulting DOM structure with DOM. Options are passed to load_structure/3, after adding the following default options:
  628load_xml(Input, DOM, M:Options) :-
  629    merge_options(Options,
  630                  [ dialect(xml)
  631                  ], Options1),
  632    load_structure(Input, DOM, M:Options1).
 load_sgml(+Input, -DOM, +Options) is det
Load SGML text from Input and unify the resulting DOM structure with DOM. Options are passed to load_structure/3, after adding the following default options:
  642load_sgml(Input, DOM, M:Options) :-
  643    merge_options(Options,
  644                  [ dialect(sgml)
  645                  ], Options1),
  646    load_structure(Input, DOM, M:Options1).
  647
  648
  649
  650                 /*******************************
  651                 *            ENCODING          *
  652                 *******************************/
 xml_quote_attribute(+In, -Quoted) is det
 xml_quote_cdata(+In, -Quoted) is det
Backward compatibility for versions that allow to specify encoding. All characters that cannot fit the encoding are mapped to XML character entities (&#dd;). Using ASCII is the safest value.
  662xml_quote_attribute(In, Quoted) :-
  663    xml_quote_attribute(In, Quoted, ascii).
  664
  665xml_quote_cdata(In, Quoted) :-
  666    xml_quote_cdata(In, Quoted, ascii).
 xml_name(+Atom) is semidet
True if Atom is a valid XML name.
  672xml_name(In) :-
  673    xml_name(In, ascii).
  674
  675
  676                 /*******************************
  677                 *    XML CHARACTER CLASSES     *
  678                 *******************************/
 xml_basechar(+CodeOrChar) is semidet
 xml_ideographic(+CodeOrChar) is semidet
 xml_combining_char(+CodeOrChar) is semidet
 xml_digit(+CodeOrChar) is semidet
 xml_extender(+CodeOrChar) is semidet
XML character classification predicates. Each of these predicates accept both a character (one-character atom) and a code (integer).
See also
- http://www.w3.org/TR/2006/REC-xml-20060816
  693                 /*******************************
  694                 *         TYPE CHECKING        *
  695                 *******************************/
 xml_is_dom(@Term) is semidet
True if term statisfies the structure as returned by load_structure/3 and friends.
  702xml_is_dom(0) :- !, fail.               % catch variables
  703xml_is_dom(List) :-
  704    is_list(List),
  705    !,
  706    xml_is_content_list(List).
  707xml_is_dom(Term) :-
  708    xml_is_element(Term).
  709
  710xml_is_content_list([]).
  711xml_is_content_list([H|T]) :-
  712    xml_is_content(H),
  713    xml_is_content_list(T).
  714
  715xml_is_content(0) :- !, fail.
  716xml_is_content(pi(Pi)) :-
  717    !,
  718    atom(Pi).
  719xml_is_content(CDATA) :-
  720    atom(CDATA),
  721    !.
  722xml_is_content(CDATA) :-
  723    string(CDATA),
  724    !.
  725xml_is_content(Term) :-
  726    xml_is_element(Term).
  727
  728xml_is_element(element(Name, Attributes, Content)) :-
  729    dom_name(Name),
  730    dom_attributes(Attributes),
  731    xml_is_content_list(Content).
  732
  733dom_name(NS:Local) :-
  734    atom(NS),
  735    atom(Local),
  736    !.
  737dom_name(Local) :-
  738    atom(Local).
  739
  740dom_attributes(0) :- !, fail.
  741dom_attributes([]).
  742dom_attributes([H|T]) :-
  743    dom_attribute(H),
  744    dom_attributes(T).
  745
  746dom_attribute(Name=Value) :-
  747    dom_name(Name),
  748    atomic(Value).
  749
  750
  751                 /*******************************
  752                 *            MESSAGES          *
  753                 *******************************/
  754:- multifile
  755    prolog:message/3.  756
  757%       Catch messages.  sgml/4 is generated by the SGML2PL binding.
  758
  759prolog:message(sgml(Parser, File, Line, Message)) -->
  760    { get_sgml_parser(Parser, dialect(Dialect))
  761    },
  762    [ 'SGML2PL(~w): ~w:~w: ~w'-[Dialect, File, Line, Message] ].
  763
  764
  765                 /*******************************
  766                 *         XREF SUPPORT         *
  767                 *******************************/
  768
  769:- multifile
  770    prolog:called_by/2.  771
  772prolog:called_by(sgml_parse(_, Options), Called) :-
  773    findall(Meta, meta_call_term(_, Meta, Options), Called).
  774
  775meta_call_term(T, G+N, Options) :-
  776    T = call(Event, G),
  777    pmember(T, Options),
  778    call_params(Event, Term),
  779    functor(Term, _, N).
  780
  781pmember(X, List) :-                     % member for partial lists
  782    nonvar(List),
  783    List = [H|T],
  784    (   X = H
  785    ;   pmember(X, T)
  786    ).
  787
  788call_params(begin, begin(tag,attributes,parser)).
  789call_params(end,   end(tag,parser)).
  790call_params(cdata, cdata(cdata,parser)).
  791call_params(pi,    pi(cdata,parser)).
  792call_params(decl,  decl(cdata,parser)).
  793call_params(error, error(severity,message,parser)).
  794call_params(xmlns, xmlns(namespace,url,parser)).
  795call_params(urlns, urlns(url,url,parser)).
  796
  797                 /*******************************
  798                 *           SANDBOX            *
  799                 *******************************/
  800
  801:- multifile
  802    sandbox:safe_primitive/1,
  803    sandbox:safe_meta_predicate/1.  804
  805sandbox:safe_meta_predicate(sgml:load_structure/3).
  806sandbox:safe_primitive(sgml:dtd(Dialect, _)) :-
  807    dtd_alias(Dialect, _).
  808sandbox:safe_primitive(sgml:xml_quote_attribute(_,_,_)).
  809sandbox:safe_primitive(sgml:xml_quote_cdata(_,_,_)).
  810sandbox:safe_primitive(sgml:xml_name(_,_)).
  811sandbox:safe_primitive(sgml:xml_basechar(_)).
  812sandbox:safe_primitive(sgml:xml_ideographic(_)).
  813sandbox:safe_primitive(sgml:xml_combining_char(_)).
  814sandbox:safe_primitive(sgml:xml_digit(_)).
  815sandbox:safe_primitive(sgml:xml_extender(_)).
  816sandbox:safe_primitive(sgml:iri_xml_namespace(_,_,_)).
  817sandbox:safe_primitive(sgml:xsd_number_string(_,_)).
  818sandbox:safe_primitive(sgml:xsd_time_string(_,_,_))