View source with formatted comments or as raw
    1/*  Part of SWI-Prolog
    2
    3    Author:        Jan Wielemaker
    4    E-mail:        J.Wielemaker@vu.nl
    5    WWW:           http://www.swi-prolog.org
    6    Copyright (c)  2000-2016, University of Amsterdam
    7                              VU University Amsterdam
    8    All rights reserved.
    9
   10    Redistribution and use in source and binary forms, with or without
   11    modification, are permitted provided that the following conditions
   12    are met:
   13
   14    1. Redistributions of source code must retain the above copyright
   15       notice, this list of conditions and the following disclaimer.
   16
   17    2. Redistributions in binary form must reproduce the above copyright
   18       notice, this list of conditions and the following disclaimer in
   19       the documentation and/or other materials provided with the
   20       distribution.
   21
   22    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
   23    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
   24    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
   25    FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
   26    COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
   27    INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
   28    BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
   29    LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
   30    CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   31    LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
   32    ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
   33    POSSIBILITY OF SUCH DAMAGE.
   34*/
   35
   36:- module(sgml,
   37          [ load_html/3,                % +Input, -DOM, +Options
   38            load_xml/3,                 % +Input, -DOM, +Options
   39            load_sgml/3,                % +Input, -DOM, +Options
   40
   41            load_sgml_file/2,           % +File, -ListOfContent
   42            load_xml_file/2,            % +File, -ListOfContent
   43            load_html_file/2,           % +File, -Document
   44
   45            load_structure/3,           % +File, -Term, +Options
   46
   47            load_dtd/2,                 % +DTD, +File
   48            load_dtd/3,                 % +DTD, +File, +Options
   49            dtd/2,                      % +Type, -DTD
   50            dtd_property/2,             % +DTD, ?Property
   51
   52            new_dtd/2,                  % +Doctype, -DTD
   53            free_dtd/1,                 % +DTD
   54            open_dtd/3,                 % +DTD, +Options, -Stream
   55
   56            new_sgml_parser/2,          % -Parser, +Options
   57            free_sgml_parser/1,         % +Parser
   58            set_sgml_parser/2,          % +Parser, +Options
   59            get_sgml_parser/2,          % +Parser, +Options
   60            sgml_parse/2,               % +Parser, +Options
   61
   62            sgml_register_catalog_file/2, % +File, +StartOrEnd
   63
   64            xml_quote_attribute/3,      % +In, -Quoted, +Encoding
   65            xml_quote_cdata/3,          % +In, -Quoted, +Encoding
   66            xml_quote_attribute/2,      % +In, -Quoted
   67            xml_quote_cdata/2,          % +In, -Quoted
   68            xml_name/1,                 % +In
   69            xml_name/2,                 % +In, +Encoding
   70
   71            xsd_number_string/2,        % ?Number, ?String
   72            xsd_time_string/3,          % ?Term, ?Type, ?String
   73
   74            xml_basechar/1,             % +Code
   75            xml_ideographic/1,          % +Code
   76            xml_combining_char/1,       % +Code
   77            xml_digit/1,                % +Code
   78            xml_extender/1,             % +Code
   79
   80            iri_xml_namespace/2,        % +IRI, -Namespace
   81            iri_xml_namespace/3,        % +IRI, -Namespace, -LocalName
   82            xml_is_dom/1                % +Term
   83          ]).   84:- use_module(library(lists)).   85:- use_module(library(option)).   86:- use_module(library(error)).   87:- use_module(library(iostream)).   88
   89:- meta_predicate
   90    load_structure(+, -, :),
   91    load_html(+, -, :),
   92    load_xml(+, -, :),
   93    load_sgml(+, -, :).   94
   95:- predicate_options(load_structure/3, 3,
   96                     [ charpos(integer),
   97                       defaults(boolean),
   98                       dialect(oneof([html,html4,html5,sgml,xhtml,xhtml5,xml,xmlns])),
   99                       doctype(atom),
  100                       dtd(any),
  101                       encoding(oneof(['iso-8859-1', 'utf-8', 'us-ascii'])),
  102                       entity(atom,atom),
  103                       keep_prefix(boolean),
  104                       file(atom),
  105                       line(integer),
  106                       offset(integer),
  107                       number(oneof([token,integer])),
  108                       qualify_attributes(boolean),
  109                       shorttag(boolean),
  110                       case_sensitive_attributes(boolean),
  111                       case_preserving_attributes(boolean),
  112                       system_entities(boolean),
  113                       max_memory(integer),
  114                       space(oneof([sgml,preserve,default,remove])),
  115                       xmlns(atom),
  116                       xmlns(atom,atom),
  117                       pass_to(sgml_parse/2, 2)
  118                     ]).  119:- predicate_options(load_html/3, 3,
  120                     [ pass_to(load_structure/3, 3)
  121                     ]).  122:- predicate_options(load_xml/3, 3,
  123                     [ pass_to(load_structure/3, 3)
  124                     ]).  125:- predicate_options(load_sgml/3, 3,
  126                     [ pass_to(load_structure/3, 3)
  127                     ]).  128:- predicate_options(load_dtd/3, 3,
  129                     [ dialect(oneof([sgml,xml,xmlns])),
  130                       pass_to(open/4, 4)
  131                     ]).  132:- predicate_options(sgml_parse/2, 2,
  133                     [ call(oneof([begin,end,cdata,pi,decl,error,xmlns,urlns]),
  134                            callable),
  135                       content_length(integer),
  136                       document(-any),
  137                       max_errors(integer),
  138                       parse(oneof([file,element,content,declaration,input])),
  139                       source(any),
  140                       syntax_errors(oneof([quiet,print,style])),
  141                       xml_no_ns(oneof([error,quiet]))
  142                     ]).  143:- predicate_options(new_sgml_parser/2, 2,
  144                     [ dtd(any)
  145                     ]).  146
  147
  148/** <module> SGML, XML and HTML parser
  149
  150This library allows you to parse SGML, XML   and HTML data into a Prolog
  151data structure. The library defines several families of predicates:
  152
  153  $ High-level predicates :
  154  Most users will only use load_html/3, load_xml/3 or load_sgml/3 to
  155  parse arbitrary input into a _DOM_ structure.  These predicates all
  156  call load_structure/3, which provides more options and may be
  157  used for processing non-standard documents.
  158
  159  The DOM structure can be used by library(xpath) to extract information
  160  from the document.
  161
  162  $ The low-level parser :
  163  The actual parser is written in C and consists of two parts: one for
  164  processing DTD (Document Type Definitions) and one for parsing data.
  165  The data can either be parsed to a Prolog (_DOM_) term or the parser
  166  can perform callbacks for the DOM _events_.
  167
  168  $ Utility predicates :
  169  Finally, this library provides prmitives for classifying characters
  170  and strings according to the XML specification such as xml_name/1 to
  171  verify whether an atom is a valid XML name (identifier).  It also
  172  provides primitives to quote attributes and CDATA elements.
  173*/
  174
  175:- multifile user:file_search_path/2.  176:- dynamic   user:file_search_path/2.  177
  178user:file_search_path(dtd, '.').
  179user:file_search_path(dtd, swi('library/DTD')).
  180
  181sgml_register_catalog_file(File, Location) :-
  182    prolog_to_os_filename(File, OsFile),
  183    '_sgml_register_catalog_file'(OsFile, Location).
  184
  185:- use_foreign_library(foreign(sgml2pl)).  186
  187register_catalog(Base) :-
  188    absolute_file_name(dtd(Base),
  189                           [ extensions([soc]),
  190                             access(read),
  191                             file_errors(fail)
  192                           ],
  193                           SocFile),
  194    sgml_register_catalog_file(SocFile, end).
  195
  196:- initialization
  197    ignore(register_catalog('HTML4')).  198
  199
  200                 /*******************************
  201                 *         DTD HANDLING         *
  202                 *******************************/
  203
  204/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
  205Note that concurrent access to DTD objects  is not allowed, and hence we
  206will allocate and destroy them in each   thread.  Possibibly it would be
  207nicer to find out why  concurrent  access   to  DTD's  is  flawed. It is
  208diagnosed to mess with the entity resolution by Fabien Todescato.
  209- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
  210
  211:- thread_local
  212    current_dtd/2.  213:- volatile
  214    current_dtd/2.  215:- thread_local
  216    registered_cleanup/0.  217:- volatile
  218    registered_cleanup/0.  219
  220:- multifile
  221    dtd_alias/2.  222
  223:- create_prolog_flag(html_dialect, html5, [type(atom)]).  224
  225dtd_alias(html4, 'HTML4').
  226dtd_alias(html5, 'HTML5').
  227dtd_alias(html,  DTD) :-
  228    current_prolog_flag(html_dialect, Dialect),
  229    dtd_alias(Dialect, DTD).
  230
  231%!  dtd(+Type, -DTD) is det.
  232%
  233%   DTD is a DTD object created from  the file dtd(Type). Loaded DTD
  234%   objects are cached. Note that  DTD   objects  may  not be shared
  235%   between threads. Therefore, dtd/2  maintains   the  pool  of DTD
  236%   objects  using  a  thread_local  predicate.    DTD  objects  are
  237%   destroyed if a thread terminates.
  238%
  239%   @error existence_error(source_sink, dtd(Type))
  240
  241dtd(Type, DTD) :-
  242    current_dtd(Type, DTD),
  243    !.
  244dtd(Type, DTD) :-
  245    new_dtd(Type, DTD),
  246    (   dtd_alias(Type, Base)
  247    ->  true
  248    ;   Base = Type
  249    ),
  250    absolute_file_name(dtd(Base),
  251                       [ extensions([dtd]),
  252                         access(read)
  253                       ], DtdFile),
  254    load_dtd(DTD, DtdFile),
  255    register_cleanup,
  256    asserta(current_dtd(Type, DTD)).
  257
  258%!  load_dtd(+DTD, +DtdFile, +Options)
  259%
  260%   Load DtdFile into a DTD.  Defined options are:
  261%
  262%           * dialect(+Dialect)
  263%           Dialect to use (xml, xmlns, sgml)
  264%
  265%           * encoding(+Encoding)
  266%           Encoding of DTD file
  267%
  268%   @param  DTD is a fresh DTD object, normally created using
  269%           new_dtd/1.
  270
  271load_dtd(DTD, DtdFile) :-
  272    load_dtd(DTD, DtdFile, []).
  273load_dtd(DTD, DtdFile, Options) :-
  274    sgml_open_options(sgml:Options, OpenOptions, sgml:DTDOptions),
  275    setup_call_cleanup(
  276        open_dtd(DTD, DTDOptions, DtdOut),
  277        setup_call_cleanup(
  278            open(DtdFile, read, DtdIn, OpenOptions),
  279            copy_stream_data(DtdIn, DtdOut),
  280            close(DtdIn)),
  281        close(DtdOut)).
  282
  283split_dtd_options([], [], []).
  284split_dtd_options([H|T], [H|TD], S) :-
  285    dtd_option(H),
  286    !,
  287    split_dtd_options(T, TD, S).
  288split_dtd_options([H|T], TD, [H|S]) :-
  289    split_dtd_options(T, TD, S).
  290
  291dtd_option(dialect(_)).
  292
  293
  294%!  destroy_dtds
  295%
  296%   Destroy  DTDs  cached  by  this  thread   as  they  will  become
  297%   unreachable anyway.
  298
  299destroy_dtds :-
  300    (   current_dtd(_Type, DTD),
  301        free_dtd(DTD),
  302        fail
  303    ;   true
  304    ).
  305
  306%!  register_cleanup
  307%
  308%   Register cleanup of DTDs created for this thread.
  309
  310register_cleanup :-
  311    registered_cleanup,
  312    !.
  313register_cleanup :-
  314    catch(thread_at_exit(destroy_dtds), _, true),
  315    assert(registered_cleanup).
  316
  317
  318                 /*******************************
  319                 *          EXAMINE DTD         *
  320                 *******************************/
  321
  322prop(doctype(_), _).
  323prop(elements(_), _).
  324prop(entities(_), _).
  325prop(notations(_), _).
  326prop(entity(E, _), DTD) :-
  327    (   nonvar(E)
  328    ->  true
  329    ;   '$dtd_property'(DTD, entities(EL)),
  330        member(E, EL)
  331    ).
  332prop(element(E, _, _), DTD) :-
  333    (   nonvar(E)
  334    ->  true
  335    ;   '$dtd_property'(DTD, elements(EL)),
  336        member(E, EL)
  337    ).
  338prop(attributes(E, _), DTD) :-
  339    (   nonvar(E)
  340    ->  true
  341    ;   '$dtd_property'(DTD, elements(EL)),
  342        member(E, EL)
  343    ).
  344prop(attribute(E, A, _, _), DTD) :-
  345    (   nonvar(E)
  346    ->  true
  347    ;   '$dtd_property'(DTD, elements(EL)),
  348        member(E, EL)
  349    ),
  350    (   nonvar(A)
  351    ->  true
  352    ;   '$dtd_property'(DTD, attributes(E, AL)),
  353        member(A, AL)
  354    ).
  355prop(notation(N, _), DTD) :-
  356    (   nonvar(N)
  357    ->  true
  358    ;   '$dtd_property'(DTD, notations(NL)),
  359        member(N, NL)
  360    ).
  361
  362dtd_property(DTD, Prop) :-
  363    prop(Prop, DTD),
  364    '$dtd_property'(DTD, Prop).
  365
  366
  367                 /*******************************
  368                 *             SGML             *
  369                 *******************************/
  370
  371%!  load_structure(+Source, -ListOfContent, :Options) is det.
  372%
  373%   Parse   Source   and   return   the   resulting   structure   in
  374%   ListOfContent. Source is handed to  open_any/5, which allows for
  375%   processing an extensible set of input sources.
  376%
  377%   A proper XML document contains only   a  single toplevel element
  378%   whose name matches the document type.   Nevertheless,  a list is
  379%   returned for consistency with  the   representation  of  element
  380%   content.
  381%
  382%   The  encoding(+Encoding)  option   is    treated   special   for
  383%   compatibility reasons:
  384%
  385%     - If `Encoding` is one of =iso-8859-1=, =us-ascii= or =utf-8=,
  386%       the stream is opened in binary mode and the option is passed
  387%       to the SGML parser.
  388%     - If `Encoding` is present, but not one of the above, the
  389%       stream is opened in text mode using the given encoding.
  390%     - Otherwise (no `Encoding`), the stream is opened in binary
  391%       mode and doing the correct decoding is left to the parser.
  392
  393load_structure(Spec, DOM, Options) :-
  394    sgml_open_options(Options, OpenOptions, SGMLOptions),
  395    setup_call_cleanup(
  396        open_any(Spec, read, In, Close, OpenOptions),
  397        load_structure_from_stream(In, DOM, SGMLOptions),
  398        close_any(Close)).
  399
  400sgml_open_options(Options, OpenOptions, SGMLOptions) :-
  401    Options = M:Plain,
  402    (   select_option(encoding(Encoding), Plain, NoEnc)
  403    ->  (   sgml_encoding(Encoding)
  404        ->  merge_options(NoEnc, [type(binary)], OpenOptions),
  405            SGMLOptions = Options
  406        ;   OpenOptions = Plain,
  407            SGMLOptions = M:NoEnc
  408        )
  409    ;   merge_options(Plain, [type(binary)], OpenOptions),
  410        SGMLOptions = Options
  411    ).
  412
  413sgml_encoding(Enc) :-
  414    downcase_atom(Enc, Enc1),
  415    sgml_encoding_l(Enc1).
  416
  417sgml_encoding_l('iso-8859-1').
  418sgml_encoding_l('us-ascii').
  419sgml_encoding_l('utf-8').
  420sgml_encoding_l('utf8').
  421sgml_encoding_l('iso_latin_1').
  422sgml_encoding_l('ascii').
  423
  424load_structure_from_stream(In, Term, M:Options) :-
  425    !,
  426    (   select_option(dtd(DTD), Options, Options1)
  427    ->  ExplicitDTD = true
  428    ;   ExplicitDTD = false,
  429        Options1 = Options
  430    ),
  431    move_front(Options1, dialect(_), Options2), % dialect sets defaults
  432    setup_call_cleanup(
  433        new_sgml_parser(Parser,
  434                        [ dtd(DTD)
  435                        ]),
  436        parse(Parser, M:Options2, TermRead, In),
  437        free_sgml_parser(Parser)),
  438    (   ExplicitDTD == true
  439    ->  (   DTD = dtd(_, DocType),
  440            dtd_property(DTD, doctype(DocType))
  441        ->  true
  442        ;   true
  443        )
  444    ;   free_dtd(DTD)
  445    ),
  446    Term = TermRead.
  447
  448move_front(Options0, Opt, Options) :-
  449    selectchk(Opt, Options0, Options1),
  450    !,
  451    Options = [Opt|Options1].
  452move_front(Options, _, Options).
  453
  454
  455parse(Parser, M:Options, Document, In) :-
  456    set_parser_options(Options, Parser, In, Options1),
  457    parser_meta_options(Options1, M, Options2),
  458    set_input_location(Parser, In),
  459    sgml_parse(Parser,
  460               [ document(Document),
  461                 source(In)
  462               | Options2
  463               ]).
  464
  465set_parser_options([], _, _, []).
  466set_parser_options([H|T], Parser, In, Rest) :-
  467    (   set_parser_option(H, Parser, In)
  468    ->  set_parser_options(T, Parser, In, Rest)
  469    ;   Rest = [H|R2],
  470        set_parser_options(T, Parser, In, R2)
  471    ).
  472
  473set_parser_option(Var, _Parser, _In) :-
  474    var(Var),
  475    !,
  476    instantiation_error(Var).
  477set_parser_option(Option, Parser, _) :-
  478    def_entity(Option, Parser),
  479    !.
  480set_parser_option(offset(Offset), _Parser, In) :-
  481    !,
  482    seek(In, Offset, bof, _).
  483set_parser_option(Option, Parser, _In) :-
  484    parser_option(Option),
  485    !,
  486    set_sgml_parser(Parser, Option).
  487set_parser_option(Name=Value, Parser, In) :-
  488    Option =.. [Name,Value],
  489    set_parser_option(Option, Parser, In).
  490
  491
  492parser_option(dialect(_)).
  493parser_option(shorttag(_)).
  494parser_option(case_sensitive_attributes(_)).
  495parser_option(case_preserving_attributes(_)).
  496parser_option(system_entities(_)).
  497parser_option(max_memory(_)).
  498parser_option(file(_)).
  499parser_option(line(_)).
  500parser_option(space(_)).
  501parser_option(number(_)).
  502parser_option(defaults(_)).
  503parser_option(doctype(_)).
  504parser_option(qualify_attributes(_)).
  505parser_option(encoding(_)).
  506parser_option(keep_prefix(_)).
  507
  508
  509def_entity(entity(Name, Value), Parser) :-
  510    get_sgml_parser(Parser, dtd(DTD)),
  511    xml_quote_attribute(Value, QValue),
  512    setup_call_cleanup(open_dtd(DTD, [], Stream),
  513                       format(Stream, '<!ENTITY ~w "~w">~n',
  514                              [Name, QValue]),
  515                       close(Stream)).
  516def_entity(xmlns(URI), Parser) :-
  517    set_sgml_parser(Parser, xmlns(URI)).
  518def_entity(xmlns(NS, URI), Parser) :-
  519    set_sgml_parser(Parser, xmlns(NS, URI)).
  520
  521%!  parser_meta_options(+Options0, +Module, -Options)
  522%
  523%   Qualify meta-calling options to the parser.
  524
  525parser_meta_options([], _, []).
  526parser_meta_options([call(When, Closure)|T0], M, [call(When, M:Closure)|T]) :-
  527    !,
  528    parser_meta_options(T0, M, T).
  529parser_meta_options([H|T0], M, [H|T]) :-
  530    parser_meta_options(T0, M, T).
  531
  532
  533%!  set_input_location(+Parser, +In:stream) is det.
  534%
  535%   Set the input location if this was not set explicitly
  536
  537set_input_location(Parser, _In) :-
  538    get_sgml_parser(Parser, file(_)),
  539    !.
  540set_input_location(Parser, In) :-
  541    stream_property(In, file_name(File)),
  542    !,
  543    set_sgml_parser(Parser, file(File)),
  544    stream_property(In, position(Pos)),
  545    set_sgml_parser(Parser, position(Pos)).
  546set_input_location(_, _).
  547
  548                 /*******************************
  549                 *           UTILITIES          *
  550                 *******************************/
  551
  552%!  load_sgml_file(+File, -DOM) is det.
  553%
  554%   Load SGML from File and unify   the resulting DOM structure with
  555%   DOM.
  556%
  557%   @deprecated     New code should use load_sgml/3.
  558
  559load_sgml_file(File, Term) :-
  560    load_sgml(File, Term, []).
  561
  562%!  load_xml_file(+File, -DOM) is det.
  563%
  564%   Load XML from File and unify   the  resulting DOM structure with
  565%   DOM.
  566%
  567%   @deprecated     New code should use load_xml/3.
  568
  569load_xml_file(File, Term) :-
  570    load_xml(File, Term, []).
  571
  572%!  load_html_file(+File, -DOM) is det.
  573%
  574%   Load HTML from File and unify   the resulting DOM structure with
  575%   DOM.
  576%
  577%   @deprecated     New code should use load_html/3.
  578
  579load_html_file(File, DOM) :-
  580    load_html(File, DOM, []).
  581
  582%!  load_html(+Input, -DOM, +Options) is det.
  583%
  584%   Load HTML text from Input and  unify the resulting DOM structure
  585%   with DOM. Options are passed   to load_structure/3, after adding
  586%   the following default options:
  587%
  588%     - dtd(DTD)
  589%     Pass the DTD for HTML as obtained using dtd(html, DTD).
  590%     - dialect(Dialect)
  591%     Current dialect from the Prolog flag =html_dialect=
  592%     - max_errors(-1)
  593%     - syntax_errors(quiet)
  594%     Most HTML encountered in the wild contains errors. Even in the
  595%     context of errors, the resulting DOM term is often a
  596%     reasonable guess at the intend of the author.
  597%
  598%   You may also want to use  the library(http/http_open) to support
  599%   loading from HTTP and HTTPS URLs. For example:
  600%
  601%   ==
  602%   :- use_module(library(http/http_open)).
  603%   :- use_module(library(sgml)).
  604%
  605%   load_html_url(URL, DOM) :-
  606%       load_html(URL, DOM, []).
  607%   ==
  608
  609load_html(File, Term, M:Options) :-
  610    current_prolog_flag(html_dialect, Dialect),
  611    dtd(Dialect, DTD),
  612    merge_options(Options,
  613                  [ dtd(DTD),
  614                    dialect(Dialect),
  615                    max_errors(-1),
  616                    syntax_errors(quiet)
  617                  ], Options1),
  618    load_structure(File, Term, M:Options1).
  619
  620%!  load_xml(+Input, -DOM, +Options) is det.
  621%
  622%   Load XML text from Input and   unify the resulting DOM structure
  623%   with DOM. Options are passed   to load_structure/3, after adding
  624%   the following default options:
  625%
  626%     - dialect(xml)
  627
  628load_xml(Input, DOM, M:Options) :-
  629    merge_options(Options,
  630                  [ dialect(xml)
  631                  ], Options1),
  632    load_structure(Input, DOM, M:Options1).
  633
  634%!  load_sgml(+Input, -DOM, +Options) is det.
  635%
  636%   Load SGML text from Input and  unify the resulting DOM structure
  637%   with DOM. Options are passed   to load_structure/3, after adding
  638%   the following default options:
  639%
  640%     - dialect(sgml)
  641
  642load_sgml(Input, DOM, M:Options) :-
  643    merge_options(Options,
  644                  [ dialect(sgml)
  645                  ], Options1),
  646    load_structure(Input, DOM, M:Options1).
  647
  648
  649
  650                 /*******************************
  651                 *            ENCODING          *
  652                 *******************************/
  653
  654%!  xml_quote_attribute(+In, -Quoted) is det.
  655%!  xml_quote_cdata(+In, -Quoted) is det.
  656%
  657%   Backward  compatibility  for  versions  that  allow  to  specify
  658%   encoding. All characters that cannot fit the encoding are mapped
  659%   to XML character entities (&#dd;).  Using   ASCII  is the safest
  660%   value.
  661
  662xml_quote_attribute(In, Quoted) :-
  663    xml_quote_attribute(In, Quoted, ascii).
  664
  665xml_quote_cdata(In, Quoted) :-
  666    xml_quote_cdata(In, Quoted, ascii).
  667
  668%!  xml_name(+Atom) is semidet.
  669%
  670%   True if Atom is a valid XML name.
  671
  672xml_name(In) :-
  673    xml_name(In, ascii).
  674
  675
  676                 /*******************************
  677                 *    XML CHARACTER CLASSES     *
  678                 *******************************/
  679
  680%!  xml_basechar(+CodeOrChar) is semidet.
  681%!  xml_ideographic(+CodeOrChar) is semidet.
  682%!  xml_combining_char(+CodeOrChar) is semidet.
  683%!  xml_digit(+CodeOrChar) is semidet.
  684%!  xml_extender(+CodeOrChar) is semidet.
  685%
  686%   XML  character  classification   predicates.    Each   of  these
  687%   predicates accept both a character   (one-character  atom) and a
  688%   code (integer).
  689%
  690%   @see http://www.w3.org/TR/2006/REC-xml-20060816
  691
  692
  693                 /*******************************
  694                 *         TYPE CHECKING        *
  695                 *******************************/
  696
  697%!  xml_is_dom(@Term) is semidet.
  698%
  699%   True  if  term  statisfies   the    structure   as  returned  by
  700%   load_structure/3 and friends.
  701
  702xml_is_dom(0) :- !, fail.               % catch variables
  703xml_is_dom(List) :-
  704    is_list(List),
  705    !,
  706    xml_is_content_list(List).
  707xml_is_dom(Term) :-
  708    xml_is_element(Term).
  709
  710xml_is_content_list([]).
  711xml_is_content_list([H|T]) :-
  712    xml_is_content(H),
  713    xml_is_content_list(T).
  714
  715xml_is_content(0) :- !, fail.
  716xml_is_content(pi(Pi)) :-
  717    !,
  718    atom(Pi).
  719xml_is_content(CDATA) :-
  720    atom(CDATA),
  721    !.
  722xml_is_content(CDATA) :-
  723    string(CDATA),
  724    !.
  725xml_is_content(Term) :-
  726    xml_is_element(Term).
  727
  728xml_is_element(element(Name, Attributes, Content)) :-
  729    dom_name(Name),
  730    dom_attributes(Attributes),
  731    xml_is_content_list(Content).
  732
  733dom_name(NS:Local) :-
  734    atom(NS),
  735    atom(Local),
  736    !.
  737dom_name(Local) :-
  738    atom(Local).
  739
  740dom_attributes(0) :- !, fail.
  741dom_attributes([]).
  742dom_attributes([H|T]) :-
  743    dom_attribute(H),
  744    dom_attributes(T).
  745
  746dom_attribute(Name=Value) :-
  747    dom_name(Name),
  748    atomic(Value).
  749
  750
  751                 /*******************************
  752                 *            MESSAGES          *
  753                 *******************************/
  754:- multifile
  755    prolog:message/3.  756
  757%       Catch messages.  sgml/4 is generated by the SGML2PL binding.
  758
  759prolog:message(sgml(Parser, File, Line, Message)) -->
  760    { get_sgml_parser(Parser, dialect(Dialect))
  761    },
  762    [ 'SGML2PL(~w): ~w:~w: ~w'-[Dialect, File, Line, Message] ].
  763
  764
  765                 /*******************************
  766                 *         XREF SUPPORT         *
  767                 *******************************/
  768
  769:- multifile
  770    prolog:called_by/2.  771
  772prolog:called_by(sgml_parse(_, Options), Called) :-
  773    findall(Meta, meta_call_term(_, Meta, Options), Called).
  774
  775meta_call_term(T, G+N, Options) :-
  776    T = call(Event, G),
  777    pmember(T, Options),
  778    call_params(Event, Term),
  779    functor(Term, _, N).
  780
  781pmember(X, List) :-                     % member for partial lists
  782    nonvar(List),
  783    List = [H|T],
  784    (   X = H
  785    ;   pmember(X, T)
  786    ).
  787
  788call_params(begin, begin(tag,attributes,parser)).
  789call_params(end,   end(tag,parser)).
  790call_params(cdata, cdata(cdata,parser)).
  791call_params(pi,    pi(cdata,parser)).
  792call_params(decl,  decl(cdata,parser)).
  793call_params(error, error(severity,message,parser)).
  794call_params(xmlns, xmlns(namespace,url,parser)).
  795call_params(urlns, urlns(url,url,parser)).
  796
  797                 /*******************************
  798                 *           SANDBOX            *
  799                 *******************************/
  800
  801:- multifile
  802    sandbox:safe_primitive/1,
  803    sandbox:safe_meta_predicate/1.  804
  805sandbox:safe_meta_predicate(sgml:load_structure/3).
  806sandbox:safe_primitive(sgml:dtd(Dialect, _)) :-
  807    dtd_alias(Dialect, _).
  808sandbox:safe_primitive(sgml:xml_quote_attribute(_,_,_)).
  809sandbox:safe_primitive(sgml:xml_quote_cdata(_,_,_)).
  810sandbox:safe_primitive(sgml:xml_name(_,_)).
  811sandbox:safe_primitive(sgml:xml_basechar(_)).
  812sandbox:safe_primitive(sgml:xml_ideographic(_)).
  813sandbox:safe_primitive(sgml:xml_combining_char(_)).
  814sandbox:safe_primitive(sgml:xml_digit(_)).
  815sandbox:safe_primitive(sgml:xml_extender(_)).
  816sandbox:safe_primitive(sgml:iri_xml_namespace(_,_,_)).
  817sandbox:safe_primitive(sgml:xsd_number_string(_,_)).
  818sandbox:safe_primitive(sgml:xsd_time_string(_,_,_))