1/* Part of SWI-Prolog 2 3 Author: Jan Wielemaker & Richard O'Keefe 4 E-mail: J.Wielemaker@vu.nl 5 WWW: http://www.swi-prolog.org 6 Copyright (c) 2004-2016, University of Amsterdam 7 VU University Amsterdam 8 All rights reserved. 9 10 Redistribution and use in source and binary forms, with or without 11 modification, are permitted provided that the following conditions 12 are met: 13 14 1. Redistributions of source code must retain the above copyright 15 notice, this list of conditions and the following disclaimer. 16 17 2. Redistributions in binary form must reproduce the above copyright 18 notice, this list of conditions and the following disclaimer in 19 the documentation and/or other materials provided with the 20 distribution. 21 22 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 23 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 24 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 25 FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 26 COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 27 INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 28 BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 29 LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 30 CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 31 LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN 32 ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 33 POSSIBILITY OF SUCH DAMAGE. 34*/ 35 36:- module(sgml_write, 37 [ html_write/2, % +Data, +Options 38 html_write/3, % +Stream, +Data, +Options 39 sgml_write/2, % +Data, +Options 40 sgml_write/3, % +Stream, +Data, +Options 41 xml_write/2, % +Data, +Options 42 xml_write/3 % +Stream, +Data, +Options 43 ]). 44:- use_module(library(lists)). 45:- use_module(library(sgml)). 46:- use_module(library(assoc)). 47:- use_module(library(option)). 48:- use_module(library(error)). 49 50:- predicate_options(xml_write/2, 2, [pass_to(xml_write/3, 3)]). 51:- predicate_options(xml_write/3, 3, 52 [ dtd(any), 53 doctype(atom), 54 public(atom), 55 system(atom), 56 header(boolean), 57 nsmap(list), 58 indent(nonneg), 59 layout(boolean), 60 net(boolean), 61 cleanns(boolean) 62 ]). 63 64:- multifile 65 xmlns/2. % NS, URI
true
(default), remove duplicate xmlns
attributes.doctype(_)
, public(_)
, or
system(_)
is provided in Options.<foo/> (default, net(true)) <foo></foo> (net(false))
For SGML, this applies to empty elements, so you get
<foo> (if foo is declared to be EMPTY in the DTD) <foo></foo> (default, net(false)) <foo// (net(true))
and also to elements with character content not containing /
<b>xxx</b> (default, net(false)) <b/xxx/ (net(true)).
Note that if the stream is UTF-8, the system will write special characters as UTF-8 sequences, while if it is ISO Latin-1 it will use (character) entities if there is a DTD that provides them, otherwise it will use numeric character references.
169xml_write(Data, Options) :- 170 current_output(Stream), 171 xml_write(Stream, Data, Options). 172 173xml_write(Stream0, Data, Options) :- 174 fix_user_stream(Stream0, Stream), 175 ( stream_property(Stream, encoding(text)) 176 -> set_stream(Stream, encoding(utf8)), 177 call_cleanup(xml_write(Stream, Data, Options), 178 set_stream(Stream, encoding(text))) 179 ; new_state(xml, State), 180 init_state(Options, State), 181 get_state(State, nsmap, NSMap), 182 add_missing_namespaces(Data, NSMap, Data1), 183 emit_xml_encoding(Stream, Options), 184 emit_doctype(Options, Data, Stream), 185 write_initial_indent(State, Stream), 186 emit(Data1, Stream, State) 187 ). 188 189 190sgml_write(Data, Options) :- 191 current_output(Stream), 192 sgml_write(Stream, Data, Options). 193 194sgml_write(Stream0, Data, Options) :- 195 fix_user_stream(Stream0, Stream), 196 ( stream_property(Stream, encoding(text)) 197 -> set_stream(Stream, encoding(utf8)), 198 call_cleanup(sgml_write(Stream, Data, Options), 199 set_stream(Stream, encoding(text))) 200 ; new_state(sgml, State), 201 init_state(Options, State), 202 write_initial_indent(State, Stream), 203 emit_doctype(Options, Data, Stream), 204 emit(Data, Stream, State) 205 ). 206 207 208html_write(Data, Options) :- 209 current_output(Stream), 210 html_write(Stream, Data, Options). 211 212html_write(Stream, Data, Options) :- 213 sgml_write(Stream, Data, 214 [ dtd(html) 215 | Options 216 ]). 217 218fix_user_stream(user, user_output) :- !. 219fix_user_stream(Stream, Stream). 220 221 222init_state([], _). 223init_state([H|T], State) :- 224 update_state(H, State), 225 init_state(T, State). 226 227update_state(dtd(DTD), State) :- 228 !, 229 ( atom(DTD) 230 -> dtd(DTD, DTDObj) 231 ; DTDObj = DTD 232 ), 233 set_state(State, dtd, DTDObj), 234 dtd_character_entities(DTDObj, EntityMap), 235 set_state(State, entity_map, EntityMap). 236update_state(nsmap(Map), State) :- 237 !, 238 set_state(State, nsmap, Map). 239update_state(cleanns(Bool), State) :- 240 !, 241 must_be(boolean, Bool), 242 set_state(State, cleanns, Bool). 243update_state(indent(Indent), State) :- 244 !, 245 must_be(integer, Indent), 246 set_state(State, indent, Indent). 247update_state(layout(Bool), State) :- 248 !, 249 must_be(boolean, Bool), 250 set_state(State, layout, Bool). 251update_state(doctype(_), _) :- !. 252update_state(public(_), _) :- !. 253update_state(system(_), _) :- !. 254update_state(net(Bool), State) :- 255 !, 256 must_be(boolean, Bool), 257 set_state(State, net, Bool). 258update_state(header(Bool), _) :- 259 !, 260 must_be(boolean, Bool). 261update_state(Option, _) :- 262 domain_error(xml_write_option, Option). 263 264% emit_xml_encoding(+Stream, +Options) 265% 266% Emit the XML fileheader with encoding information. Setting the 267% right encoding on the output stream must be done before calling 268% xml_write/3. 269 270emit_xml_encoding(Out, Options) :- 271 option(header(Hdr), Options, true), 272 Hdr == true, 273 !, 274 stream_property(Out, encoding(Encoding)), 275 ( ( Encoding == utf8 276 ; Encoding == wchar_t 277 ) 278 -> format(Out, '<?xml version="1.0" encoding="UTF-8"?>~n~n', []) 279 ; Encoding == iso_latin_1 280 -> format(Out, '<?xml version="1.0" encoding="ISO-8859-1"?>~n~n', []) 281 ; domain_error(xml_encoding, Encoding) 282 ). 283emit_xml_encoding(_, _).
293emit_doctype(_Options, Data, Out) :- 294 ( Data = [_|_], memberchk(element(html,Att,_), Data) 295 ; Data = element(html,Att,_) 296 ), 297 memberchk(version=Version, Att), 298 !, 299 format(Out, '<!DOCTYPE HTML PUBLIC "~w">~n~n', [Version]). 300emit_doctype(Options, Data, Out) :- 301 ( memberchk(public(PubId), Options) -> true 302 ; PubId = (-) 303 ), 304 ( memberchk(system(SysId), Options) -> true 305 ; SysId = (-) 306 ), 307 \+ (PubId == (-), 308 SysId == (-), 309 \+ memberchk(doctype(_), Options) 310 ), 311 ( Data = element(DocType,_,_) 312 ; Data = [_|_], memberchk(element(DocType,_,_), Data) 313 ; memberchk(doctype(DocType), Options) 314 ), 315 !, 316 write_doctype(Out, DocType, PubId, SysId). 317emit_doctype(_, _, _). 318 319write_doctype(Out, DocType, -, -) :- 320 !, 321 format(Out, '<!DOCTYPE ~w []>~n~n', [DocType]). 322write_doctype(Out, DocType, -, SysId) :- 323 !, 324 format(Out, '<!DOCTYPE ~w SYSTEM "~w">~n~n', [DocType,SysId]). 325write_doctype(Out, DocType, PubId, -) :- 326 !, 327 format(Out, '<!DOCTYPE ~w PUBLIC "~w">~n~n', [DocType,PubId]). 328write_doctype(Out, DocType, PubId, SysId) :- 329 format(Out, '<!DOCTYPE ~w PUBLIC "~w" "~w">~n~n', [DocType,PubId,SysId]).
336emit(Var, _, _) :- 337 var(Var), 338 !, 339 instantiation_error(Var). 340emit([], _, _) :- !. 341emit([H|T], Out, State) :- 342 !, 343 emit(H, Out, State), 344 emit(T, Out, State). 345emit(CDATA, Out, State) :- 346 atomic(CDATA), 347 !, 348 sgml_write_content(Out, CDATA, State). 349emit(Element, Out, State) :- 350 \+ \+ emit_element(Element, Out, State). 351 352emit_element(pi(PI), Out, State) :- 353 !, 354 get_state(State, entity_map, EntityMap), 355 write(Out, <?), 356 write_quoted(Out, PI, '', EntityMap), 357 ( get_state(State, dialect, xml) -> 358 write(Out, ?>) 359 ; write(Out, >) 360 ). 361emit_element(element(Name, Attributes, Content), Out, State) :- 362 !, 363 must_be(list, Attributes), 364 must_be(list, Content), 365 ( get_state(State, dialect, xml) 366 -> update_nsmap(Attributes, CleanAttrs, State), 367 ( get_state(State, cleanns, true) 368 -> WriteAttrs = CleanAttrs 369 ; WriteAttrs = Attributes 370 ) 371 ; WriteAttrs = Attributes 372 ), 373 att_length(WriteAttrs, State, Alen), 374 ( Alen > 60, 375 get_state(State, layout, true) 376 -> Sep = nl, 377 AttIndent = 4 378 ; Sep = sp, 379 AttIndent = 0 380 ), 381 put_char(Out, '<'), 382 emit_name(Name, Out, State), 383 ( AttIndent > 0 384 -> \+ \+ ( inc_indent(State, AttIndent), 385 attributes(WriteAttrs, Sep, Out, State) 386 ) 387 ; attributes(WriteAttrs, Sep, Out, State) 388 ), 389 content(Content, Out, Name, State). 390emit_element(E, _, _) :- 391 type_error(xml_dom, E). 392 393attributes([], _, _, _). 394attributes([H|T], Sep, Out, State) :- 395 ( Sep == nl 396 -> write_indent(State, Out) 397 ; put_char(Out, ' ') 398 ), 399 attribute(H, Out, State), 400 attributes(T, Sep, Out, State). 401 402attribute(Name=Value, Out, State) :- 403 emit_name(Name, Out, State), 404 put_char(Out, =), 405 sgml_write_attribute(Out, Value, State). 406 407att_length(Atts, State, Len) :- 408 att_length(Atts, State, 0, Len). 409 410att_length([], _, Len, Len). 411att_length([A0|T], State, Len0, Len) :- 412 alen(A0, State, AL), 413 Len1 is Len0 + 1 + AL, 414 att_length(T, State, Len1, Len). 415 416alen(ns(NS, _URI):Name=Value, _State, Len) :- 417 !, 418 atom_length(Value, AL), 419 vlen(Name, NL), 420 atom_length(NS, NsL), 421 Len is AL+NL+NsL+3. 422alen(URI:Name=Value, State, Len) :- 423 !, 424 atom_length(Value, AL), 425 vlen(Name, NL), 426 get_state(State, nsmap, Nsmap), 427 ( memberchk(NS=URI, Nsmap) 428 -> atom_length(NS, NsL) 429 ; atom_length(URI, NsL) 430 ), 431 Len is AL+NL+NsL+3. 432alen(Name=Value, _, Len) :- 433 atom_length(Name, NL), 434 vlen(Value, AL), 435 Len is AL+NL+3. 436 437vlen(Value, Len) :- 438 is_list(Value), 439 !, 440 vlen_list(Value, 0, Len). 441vlen(Value, Len) :- 442 atom_length(Value, Len). 443 444vlen_list([], L, L). 445vlen_list([H|T], L0, L) :- 446 atom_length(H, HL), 447 ( L0 == 0 448 -> L1 is L0 + HL 449 ; L1 is L0 + HL + 1 450 ), 451 vlen_list(T, L1, L). 452 453 454emit_name(Name, Out, _) :- 455 atom(Name), 456 !, 457 write(Out, Name). 458emit_name(ns(NS,_URI):Name, Out, _State) :- 459 !, 460 ( NS == '' 461 -> write(Out, Name) 462 ; format(Out, '~w:~w', [NS, Name]) 463 ). 464emit_name(URI:Name, Out, State) :- 465 get_state(State, nsmap, NSMap), 466 memberchk(NS=URI, NSMap), 467 !, 468 ( NS == [] 469 -> write(Out, Name) 470 ; format(Out, '~w:~w', [NS, Name]) 471 ). 472emit_name(Term, Out, _) :- % error? 473 write(Out, Term).
483update_nsmap(Attributes, Attributes1, State) :- 484 get_state(State, nsmap, Map0), 485 update_nsmap(Attributes, Attributes1, Map0, Map), 486 set_state(State, nsmap, Map). 487 488update_nsmap([], [], Map, Map). 489update_nsmap([xmlns:NS=URI|T], Attrs, Map0, Map) :- 490 !, 491 ( memberchk(NS=URI, Map0) 492 -> update_nsmap(T, Attrs, Map0, Map) 493 ; set_nsmap(NS, URI, Map0, Map1), 494 Attrs = [xmlns:NS=URI|Attrs1], 495 update_nsmap(T, Attrs1, Map1, Map) 496 ). 497update_nsmap([xmlns=URI|T], Attrs, Map0, Map) :- 498 !, 499 ( memberchk([]=URI, Map0) 500 -> update_nsmap(T, Attrs, Map0, Map) 501 ; set_nsmap([], URI, Map0, Map1), 502 Attrs = [xmlns=URI|Attrs1], 503 update_nsmap(T, Attrs1, Map1, Map) 504 ). 505update_nsmap([H|T0], [H|T], Map0, Map) :- 506 !, 507 update_nsmap(T0, T, Map0, Map). 508 509set_nsmap(NS, URI, Map0, Map) :- 510 select(NS=_, Map0, Map1), 511 !, 512 Map = [NS=URI|Map1]. 513set_nsmap(NS, URI, Map, [NS=URI|Map]).
523content([], Out, Element, State) :- % empty element 524 !, 525 ( get_state(State, net, true) 526 -> ( get_state(State, dialect, xml) -> 527 write(Out, />) 528 ; empty_element(State, Element) -> 529 write(Out, >) 530 ; write(Out, //) 531 ) 532 ;/* get_state(State, net, false) */ 533 write(Out, >), 534 ( get_state(State, dialect, sgml), 535 empty_element(State, Element) 536 -> true 537 ; emit_close(Element, Out, State) 538 ) 539 ). 540content([CDATA], Out, Element, State) :- 541 atomic(CDATA), 542 !, 543 ( get_state(State, dialect, sgml), 544 get_state(State, net, true), 545 \+ sub_atom(CDATA, _, _, _, /), 546 write_length(CDATA, Len, []), 547 Len < 20 548 -> write(Out, /), 549 sgml_write_content(Out, CDATA, State), 550 write(Out, /) 551 ; verbatim_element(Element, State) 552 -> write(Out, >), 553 write(Out, CDATA), 554 emit_close(Element, Out, State) 555 ;/* XML or not NET */ 556 write(Out, >), 557 sgml_write_content(Out, CDATA, State), 558 emit_close(Element, Out, State) 559 ). 560content(Content, Out, Element, State) :- 561 get_state(State, layout, true), 562 /* If xml:space='preserve' is present, */ 563 /* we MUST NOT tamper with white space at all. */ 564 \+ (Element = element(_,Atts,_), 565 memberchk('xml:space'=preserve, Atts) 566 ), 567 element_content(Content, Elements), 568 !, 569 format(Out, >, []), 570 \+ \+ ( 571 inc_indent(State), 572 write_element_content(Elements, Out, State) 573 ), 574 write_indent(State, Out), 575 emit_close(Element, Out, State). 576content(Content, Out, Element, State) :- 577 format(Out, >, []), 578 write_mixed_content(Content, Out, Element, State), 579 emit_close(Element, Out, State). 580 581verbatim_element(Element, State) :- 582 verbatim_element(Element), 583 get_state(State, dtd, DTD), 584 DTD \== (-), 585 dtd_property(DTD, doctype(html)). 586 587verbatim_element(script). 588verbatim_element(style). 589 590emit_close(Element, Out, State) :- 591 write(Out, '</'), 592 emit_name(Element, Out, State), 593 write(Out, '>'). 594 595 596write_mixed_content([], _, _, _). 597write_mixed_content([H|T], Out, Element, State) :- 598 write_mixed_content_element(H, Out, State), 599 write_mixed_content(T, Out, Element, State). 600 601write_mixed_content_element(H, Out, State) :- 602 ( atom(H) 603 -> sgml_write_content(Out, H, State) 604 ; string(H) 605 -> sgml_write_content(Out, H, State) 606 ; functor(H, element, 3) 607 -> emit(H, Out, State) 608 ; functor(H, pi, 1) 609 -> emit(H, Out, State) 610 ; var(H) 611 -> instantiation_error(H) 612 ; H = sdata(Data) % cannot be written without entity! 613 -> print_message(warning, sgml_write(sdata_as_cdata(Data))), 614 sgml_write_content(Out, Data, State) 615 ; type_error(sgml_content, H) 616 ). 617 618 619element_content([], []). 620element_content([element(Name,Atts,C)|T0], [element(Name,Atts,C)|T]) :- 621 !, 622 element_content(T0, T). 623element_content([Blank|T0], T) :- 624 atom(Blank), 625 atom_codes(Blank, Codes), 626 all_blanks(Codes), 627 element_content(T0, T). 628 629all_blanks([]). 630all_blanks([H|T]) :- 631 code_type(H, space), 632 all_blanks(T). 633 634write_element_content([], _, _). 635write_element_content([H|T], Out, State) :- 636 write_indent(State, Out), 637 emit(H, Out, State), 638 write_element_content(T, Out, State). 639 640 641 /******************************* 642 * NAMESPACES * 643 *******************************/
element(s)
to
deal with missing namespaces.650add_missing_namespaces([], _, []) :- !. 651add_missing_namespaces([H0|T0], Def, [H|T]) :- 652 !, 653 add_missing_namespaces(H0, Def, H), 654 add_missing_namespaces(T0, Def, T). 655add_missing_namespaces(Elem0, Def, Elem) :- 656 Elem0 = element(Name, Atts0, Content), 657 !, 658 missing_namespaces(Elem0, Def, Missing), 659 ( Missing == [] 660 -> Elem = Elem0 661 ; add_missing_ns(Missing, Atts0, Atts), 662 Elem = element(Name, Atts, Content) 663 ). 664add_missing_namespaces(DOM, _, DOM). % CDATA, etc. 665 666add_missing_ns([], Atts, Atts). 667add_missing_ns([H|T], Atts0, Atts) :- 668 generate_ns(H, NS), 669 add_missing_ns(T, [xmlns:NS=H|Atts0], Atts).
675generate_ns(URI, NS) :- 676 xmlns(NS, URI), 677 !. 678generate_ns(URI, NS) :- 679 default_ns(URI, NS), 680 !. 681generate_ns(_, NS) :- 682 gensym(xns, NS).
Default XML namespaces are:
703:- multifile 704 rdf_db:ns/2. 705 706default_ns('http://www.w3.org/2001/XMLSchema-instance', xsi). 707default_ns('http://www.w3.org/2001/XMLSchema', xs). 708default_ns('http://www.w3.org/1999/xhtml', xhtml). 709default_ns('http://schemas.xmlsoap.org/soap/envelope/', soap11). 710default_ns('http://www.w3.org/2003/05/soap-envelope', soap12). 711default_ns(URI, NS) :- 712 rdf_db:ns(NS, URI).
719missing_namespaces(DOM, Defined, Missing) :- 720 missing_namespaces(DOM, Defined, [], Missing). 721 722missing_namespaces([], _, L, L) :- !. 723missing_namespaces([H|T], Def, L0, L) :- 724 !, 725 missing_namespaces(H, Def, L0, L1), 726 missing_namespaces(T, Def, L1, L). 727missing_namespaces(element(Name, Atts, Content), Def, L0, L) :- 728 !, 729 update_nsmap(Atts, _, Def, Def1), 730 missing_ns(Name, Def1, L0, L1), 731 missing_att_ns(Atts, Def1, L1, L2), 732 missing_namespaces(Content, Def1, L2, L). 733missing_namespaces(_, _, L, L). 734 735missing_att_ns([], _, M, M). 736missing_att_ns([Name=_|T], Def, M0, M) :- 737 missing_ns(Name, Def, M0, M1), 738 missing_att_ns(T, Def, M1, M). 739 740missing_ns(ns(NS, URI):_, Def, M0, M) :- 741 !, 742 ( memberchk(NS=URI, Def) 743 -> M = M0 744 ; NS == '' 745 -> M = M0 746 ; M = [URI|M0] 747 ). 748missing_ns(URI:_, Def, M0, M) :- 749 !, 750 ( ( memberchk(_=URI, Def) 751 ; memberchk(URI, M0) 752 ; URI = xml % predefined ones 753 ; URI = xmlns 754 ) 755 -> M = M0 756 ; M = [URI|M0] 757 ). 758missing_ns(_, _, M, M). 759 760 /******************************* 761 * QUOTED WRITE * 762 *******************************/ 763 764sgml_write_attribute(Out, Values, State) :- 765 is_list(Values), 766 !, 767 get_state(State, entity_map, EntityMap), 768 put_char(Out, '"'), 769 write_quoted_list(Values, Out, '"<&\r\n\t', EntityMap), 770 put_char(Out, '"'). 771sgml_write_attribute(Out, Value, State) :- 772 is_text(Value), 773 !, 774 get_state(State, entity_map, EntityMap), 775 put_char(Out, '"'), 776 write_quoted(Out, Value, '"<&\r\n\t', EntityMap), 777 put_char(Out, '"'). 778sgml_write_attribute(Out, Value, _State) :- 779 number(Value), 780 !, 781 format(Out, '"~w"', [Value]). 782sgml_write_attribute(_, Value, _) :- 783 type_error(sgml_attribute_value, Value). 784 785write_quoted_list([], _, _, _). 786write_quoted_list([H|T], Out, Escape, EntityMap) :- 787 write_quoted(Out, H, Escape, EntityMap), 788 ( T == [] 789 -> true 790 ; put_char(Out, ' '), 791 write_quoted_list(T, Out, Escape, EntityMap) 792 ). 793 794 795sgml_write_content(Out, Value, State) :- 796 is_text(Value), 797 !, 798 get_state(State, entity_map, EntityMap), 799 write_quoted(Out, Value, '<&>\r', EntityMap). 800sgml_write_content(Out, Value, _) :- 801 write(Out, Value). 802 803is_text(Value) :- atom(Value), !. 804is_text(Value) :- string(Value), !. 805 806write_quoted(Out, Atom, Escape, EntityMap) :- 807 atom(Atom), 808 !, 809 atom_codes(Atom, Codes), 810 writeq(Codes, Out, Escape, EntityMap). 811write_quoted(Out, String, Escape, EntityMap) :- 812 string(String), 813 !, 814 string_codes(String, Codes), 815 writeq(Codes, Out, Escape, EntityMap). 816write_quoted(_, String, _, _) :- 817 type_error(atom_or_string, String).
822writeq([], _, _, _). 823writeq([H|T], Out, Escape, EntityMap) :- 824 ( char_code(HC, H), 825 sub_atom(Escape, _, _, _, HC) 826 -> write_entity(H, Out, EntityMap) 827 ; H >= 256 828 -> ( stream_property(Out, encoding(Enc)), 829 unicode_encoding(Enc) 830 -> put_code(Out, H) 831 ; write_entity(H, Out, EntityMap) 832 ) 833 ; put_code(Out, H) 834 ), 835 writeq(T, Out, Escape, EntityMap). 836 837unicode_encoding(utf8). 838unicode_encoding(wchar_t). 839unicode_encoding(unicode_le). 840unicode_encoding(unicode_be). 841 842write_entity(Code, Out, EntityMap) :- 843 ( get_assoc(Code, EntityMap, EntityName) 844 -> format(Out, '&~w;', [EntityName]) 845 ; format(Out, '&#x~16R;', [Code]) 846 ). 847 848 849 /******************************* 850 * INDENTATION * 851 *******************************/ 852 853write_initial_indent(State, Out) :- 854 ( get_state(State, indent, Indent), 855 Indent > 0 856 -> emit_indent(Indent, Out) 857 ; true 858 ). 859 860write_indent(State, _) :- 861 get_state(State, layout, false), 862 !. 863write_indent(State, Out) :- 864 get_state(State, indent, Indent), 865 emit_indent(Indent, Out). 866 867emit_indent(Indent, Out) :- 868 Tabs is Indent // 8, 869 Spaces is Indent mod 8, 870 format(Out, '~N', []), 871 write_n(Tabs, '\t', Out), 872 write_n(Spaces, ' ', Out). 873 874write_n(N, Char, Out) :- 875 ( N > 0 876 -> put_char(Out, Char), 877 N2 is N - 1, 878 write_n(N2, Char, Out) 879 ; true 880 ). 881 882inc_indent(State) :- 883 inc_indent(State, 2). 884 885inc_indent(State, Inc) :- 886 state(indent, Arg), 887 arg(Arg, State, I0), 888 I is I0 + Inc, 889 setarg(Arg, State, I). 890 891 892 /******************************* 893 * DTD HANDLING * 894 *******************************/
901empty_element(State, Element) :-
902 get_state(State, dtd, DTD),
903 DTD \== (-),
904 dtd_property(DTD, element(Element, _, empty)).
912dtd_character_entities(DTD, Map) :- 913 empty_assoc(Empty), 914 dtd_property(DTD, entities(Entities)), 915 fill_entity_map(Entities, DTD, Empty, Map). 916 917fill_entity_map([], _, Map, Map). 918fill_entity_map([H|T], DTD, Map0, Map) :- 919 ( dtd_property(DTD, entity(H, CharEntity)), 920 atom(CharEntity), 921 ( sub_atom(CharEntity, 0, _, _, '&#'), 922 sub_atom(CharEntity, _, _, 0, ';') 923 -> sub_atom(CharEntity, 2, _, 1, Name), 924 atom_number(Name, Code) 925 ; atom_length(CharEntity, 1), 926 char_code(CharEntity, Code) 927 ) 928 -> put_assoc(Code, Map0, H, Map1), 929 fill_entity_map(T, DTD, Map1, Map) 930 ; fill_entity_map(T, DTD, Map0, Map) 931 ). 932 933 934 935 /******************************* 936 * FIELDS * 937 *******************************/ 938 939state(indent, 1). % current indentation 940state(layout, 2). % use layout (true/false) 941state(dtd, 3). % DTD for entity names 942state(entity_map, 4). % compiled entity-map 943state(dialect, 5). % xml/sgml 944state(nsmap, 6). % defined namespaces 945state(net, 7). % Should null end-tags be used? 946state(cleanns, 8). % Remove duplicate xmlns declarations 947 948new_state(Dialect, 949 state( 950 0, % indent 951 true, % layout 952 -, % DTD 953 EntityMap, % entity_map 954 Dialect, % dialect 955 [], % NS=Full map 956 Net, % Null End-Tags? 957 true % Remove duplicate xmlns declarations 958 )) :- 959 ( Dialect == sgml 960 -> Net = false, 961 empty_assoc(EntityMap) 962 ; Net = true, 963 xml_entities(EntityMap) 964 ). 965 966get_state(State, Field, Value) :- 967 state(Field, Arg), 968 arg(Arg, State, Value). 969 970set_state(State, Field, Value) :- 971 state(Field, Arg), 972 setarg(Arg, State, Value). 973 974term_expansion(xml_entities(map), 975 xml_entities(Map)) :- 976 list_to_assoc([ 0'< - lt, 977 0'& - amp, 978 0'> - gt, 979 0'\' - apos, 980 0'\" - quot 981 ], Map). 982xml_entities(map). 983 984 /******************************* 985 * MESSAGES * 986 *******************************/ 987 988:- multifile 989 prolog:message/3. 990 991prologmessage(sgml_write(sdata_as_cdata(Data))) --> 992 [ 'SGML-write: emitting SDATA as CDATA: "~p"'-[Data] ]
XML/SGML writer module
This library provides the inverse functionality of the
sgml.pl
parser library, writing XML, SGML and HTML documents from the parsed output. It is intended to allow rewriting in a different dialect or encoding or to perform document transformation in Prolog on the parsed representation.The current implementation is particularly keen on getting character encoding and the use of character entities right. Some work has been done providing layout, but space handling in XML and SGML make this a very hazardous area.
The Prolog-based low-level character and escape handling is the real bottleneck in this library and will probably be moved to C in a later stage.