1/* Part of SWI-Prolog 2 3 Author: Jan Wielemaker 4 E-mail: J.Wielemaker@vu.nl 5 WWW: http://www.swi-prolog.org 6 Copyright (c) 2010-2013, University of Amsterdam 7 All rights reserved. 8 9 Redistribution and use in source and binary forms, with or without 10 modification, are permitted provided that the following conditions 11 are met: 12 13 1. Redistributions of source code must retain the above copyright 14 notice, this list of conditions and the following disclaimer. 15 16 2. Redistributions in binary form must reproduce the above copyright 17 notice, this list of conditions and the following disclaimer in 18 the documentation and/or other materials provided with the 19 distribution. 20 21 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 24 FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 25 COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 26 INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 27 BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 28 LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 29 CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 30 LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN 31 ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 32 POSSIBILITY OF SUCH DAMAGE. 33*/ 34 35:- module(rdf_write, 36 [ rdf_write_xml/2 % +Stream, +Triples 37 ]). 38:- use_module(library('semweb/rdf_db')). 39:- use_module(library(lists)). 40:- use_module(library(sgml)). 41:- use_module(library(sgml_write)). 42:- use_module(library(assoc)). 43:- use_module(library(debug)).
67 /******************************* 68 * WRITE RDFXML * 69 *******************************/
75rdf_write_xml(Out, Triples) :- 76 sort(Triples, Unique), 77 rdf_write_header(Out, Unique), 78 node_id_map(Unique, AnonIDs), 79 rdf_write_triples(Unique, AnonIDs, Out), 80 rdf_write_footer(Out). 81 82 83 /******************************* 84 * HEADER/FOOTER * 85 *******************************/
92rdf_write_header(Out, Triples) :- 93 xml_encoding(Out, Enc, Encoding), 94 format(Out, '<?xml version=\'1.0\' encoding=\'~w\'?>~n', [Encoding]), 95 format(Out, '<!DOCTYPE rdf:RDF [', []), 96 used_namespaces(Triples, NSList), 97 ( member(Id, NSList), 98 ns(Id, NS), 99 xml_quote_attribute(NS, NSText0, Enc), 100 xml_escape_parameter_entity(NSText0, NSText), 101 format(Out, '~N <!ENTITY ~w \'~w\'>', [Id, NSText]), 102 fail 103 ; true 104 ), 105 format(Out, '~N]>~n~n', []), 106 format(Out, '<rdf:RDF', []), 107 ( member(Id, NSList), 108 format(Out, '~N xmlns:~w="&~w;"~n', [Id, Id]), 109 fail 110 ; true 111 ), 112 format(Out, '>~n', []). 113 114 115xml_encoding(Out, Enc, Encoding) :- 116 stream_property(Out, encoding(Enc)), 117 ( xml_encoding_name(Enc, Encoding) 118 -> true 119 ; throw(error(domain_error(rdf_encoding, Enc), _)) 120 ). 121 122xml_encoding_name(ascii, 'US-ASCII'). 123xml_encoding_name(iso_latin_1, 'ISO-8859-1'). 124xml_encoding_name(utf8, 'UTF-8').
130xml_escape_parameter_entity(In, Out) :- 131 sub_atom(In, _, _, _, '%'), 132 !, 133 atom_codes(In, Codes), 134 phrase(escape_parent(Codes), OutCodes), 135 atom_codes(Out, OutCodes). 136xml_escape_parameter_entity(In, In). 137 138escape_parent([]) --> []. 139escape_parent([H|T]) --> 140 ( { H == 37 } 141 -> "%" 142 ; [H] 143 ), 144 escape_parent(T).
151used_namespaces(Triples, NSList) :- 152 decl_used_predicate_ns(Triples), 153 resources(Triples, Resources), 154 empty_assoc(A0), 155 put_assoc(rdf, A0, *, A1), % needed for rdf:RDF 156 res_used_namespaces(Resources, _NoNS, A1, A), 157 assoc_to_keys(A, NSList). 158 159 160res_used_namespaces([], [], A, A). 161res_used_namespaces([Resource|T], NoNS, A0, A) :- 162 ns(NS, Full), 163 Full \== '', 164 atom_concat(Full, Local, Resource), 165 xml_name(Local), 166 !, 167 put_assoc(NS, A0, *, A1), 168 res_used_namespaces(T, NoNS, A1, A). 169res_used_namespaces([R|T0], [R|T], A0, A) :- 170 res_used_namespaces(T0, T, A0, A).
176resources(Triples, Resources) :- 177 phrase(resources(Triples), Raw), 178 sort(Raw, Resources). 179 180resources([]) --> 181 []. 182resources([rdf(S,P,O)|T]) --> 183 [S,P], 184 object_resources(O), 185 resources(T). 186 187object_resources(Atom) --> 188 { atom(Atom) }, 189 !, 190 [ Atom ]. 191object_resources(literal(type(Type, _))) --> 192 !, 193 [ Type ]. 194object_resources(_) --> 195 [].
202:- thread_local 203 predicate_ns/2. 204 205decl_used_predicate_ns(Triples) :- 206 retractall(predicate_ns(_,_)), 207 ( member(rdf(_,P,_), Triples), 208 decl_predicate_ns(P), 209 fail 210 ; true 211 ). 212 213decl_predicate_ns(Pred) :- 214 predicate_ns(Pred, _), 215 !. 216decl_predicate_ns(Pred) :- 217 rdf_global_id(NS:Local, Pred), 218 xml_name(Local), 219 !, 220 assert(predicate_ns(Pred, NS)). 221decl_predicate_ns(Pred) :- 222 is_bag_li_predicate(Pred), 223 !. 224decl_predicate_ns(Pred) :- 225 atom_codes(Pred, Codes), 226 append(NSCodes, LocalCodes, Codes), 227 xml_codes(LocalCodes), 228 !, 229 ( NSCodes \== [] 230 -> atom_codes(NS, NSCodes), 231 ( ns(Id, NS) 232 -> assert(predicate_ns(Pred, Id)) 233 ; between(1, infinite, N), 234 atom_concat(ns, N, Id), 235 \+ ns(Id, _) 236 -> rdf_register_ns(Id, NS), 237 print_message(informational, 238 rdf(using_namespace(Id, NS))) 239 ), 240 assert(predicate_ns(Pred, Id)) 241 ; assert(predicate_ns(Pred, -)) % no namespace used 242 ). 243 244xml_codes([]). 245xml_codes([H|T]) :- 246 xml_code(H), 247 xml_codes(T). 248 249xml_code(X) :- 250 code_type(X, csym), 251 !. 252xml_code(0'-). % ' 253 254 Out) (:- 256 format(Out, '</rdf:RDF>~n', []). 257 258 259 /******************************* 260 * ANONYMOUS IDS * 261 *******************************/
269node_id_map(Triples, IdMap) :- 270 anonymous_objects(Triples, Objs), 271 msort(Objs, Sorted), 272 empty_assoc(IdMap0), 273 nodeid_map(Sorted, 0, IdMap0, IdMap). 274 275anonymous_objects([], []). 276anonymous_objects([rdf(_,_,O)|T0], Anon) :- 277 rdf_is_bnode(O), 278 !, 279 Anon = [O|T], 280 anonymous_objects(T0, T). 281anonymous_objects([_|T0], T) :- 282 anonymous_objects(T0, T). 283 284nodeid_map([], _, Map, Map). 285nodeid_map([H,H|T0], Id, Map0, Map) :- 286 !, 287 remove_leading(H, T0, T), 288 atom_concat(bn, Id, NodeId), 289 put_assoc(H, Map0, NodeId, Map1), 290 Id2 is Id + 1, 291 nodeid_map(T, Id2, Map1, Map). 292nodeid_map([_|T], Id, Map0, Map) :- 293 nodeid_map(T, Id, Map0, Map). 294 295remove_leading(H, [H|T0], T) :- 296 !, 297 remove_leading(H, T0, T). 298remove_leading(_, T, T). 299 300 301 /******************************* 302 * TRIPLES * 303 *******************************/ 304 305rdf_write_triples(Triples, NodeIDs, Out) :- 306 rdf_write_triples(Triples, NodeIDs, Out, [], Anon), 307 rdf_write_anon(Anon, NodeIDs, Out, Anon). 308 309rdf_write_triples([], _, _, Anon, Anon). 310rdf_write_triples([H|T0], NodeIDs, Out, Anon0, Anon) :- 311 arg(1, H, S), 312 subject_triples(S, [H|T0], T, OnSubject), 313 ( rdf_is_bnode(S) 314 -> rdf_write_triples(T, NodeIDs, Out, [anon(S,_,OnSubject)|Anon0], Anon) 315 ; rdf_write_subject(OnSubject, S, NodeIDs, Out, Anon0), 316 rdf_write_triples(T, NodeIDs, Out, Anon0, Anon) 317 ). 318 319subject_triples(S, [H|T0], T, [H|M]) :- 320 arg(1, H, S), 321 !, 322 subject_triples(S, T0, T, M). 323subject_triples(_, T, T, []). 324 325 326rdf_write_anon([], _, _, _). 327rdf_write_anon([anon(Subject, Done, Triples)|T], NodeIDs, Out, Anon) :- 328 Done \== true, 329 !, 330 Done = true, 331 rdf_write_subject(Triples, Subject, NodeIDs, Out, Anon), 332 rdf_write_anon(T, NodeIDs, Out, Anon). 333rdf_write_anon([_|T], NodeIDs, Out, Anon) :- 334 rdf_write_anon(T, NodeIDs, Out, Anon). 335 336rdf_write_subject(Triples, Subject, NodeIDs, Out, Anon) :- 337 rdf_write_subject(Triples, Out, Subject, NodeIDs, -, 0, Anon), 338 !, 339 format(Out, '~n', []). 340rdf_write_subject(_, Subject, _, _, _) :- 341 throw(error(rdf_save_failed(Subject), 'Internal error')). 342 343rdf_write_subject(Triples, Out, Subject, NodeIDs, DefNS, Indent, Anon) :- 344 rdf_equal(rdf:type, RdfType), 345 select(rdf(_, RdfType,Type), Triples, Triples1), 346 \+ rdf_is_bnode(Type), 347 rdf_id(Type, DefNS, TypeId), 348 xml_is_name(TypeId), 349 !, 350 format(Out, '~*|<', [Indent]), 351 rdf_write_id(Out, TypeId), 352 save_about(Out, Subject, NodeIDs), 353 save_attributes(Triples1, DefNS, Out, NodeIDs, TypeId, Indent, Anon). 354rdf_write_subject(Triples, Out, Subject, NodeIDs, _DefNS, Indent, Anon) :- 355 format(Out, '~*|<rdf:Description', [Indent]), 356 save_about(Out, Subject, NodeIDs), 357 save_attributes(Triples, rdf, Out, NodeIDs, rdf:'Description', Indent, Anon). 358 359xml_is_name(_NS:Atom) :- 360 !, 361 xml_name(Atom). 362xml_is_name(Atom) :- 363 xml_name(Atom). 364 365save_about(Out, Subject, NodeIDs) :- 366 rdf_is_bnode(Subject), 367 !, 368 ( get_assoc(Subject, NodeIDs, NodeID) 369 -> format(Out,' rdf:nodeID="~w"', [NodeID]) 370 ; true 371 ). 372save_about(Out, Subject, _) :- 373 stream_property(Out, encoding(Encoding)), 374 rdf_value(Subject, QSubject, Encoding), 375 format(Out, ' rdf:about="~w"', [QSubject]), 376 !. 377save_about(_, _, _) :- 378 assertion(fail).
386save_attributes(Triples, DefNS, Out, NodeIDs, Element, Indent, Anon) :- 387 split_attributes(Triples, InTag, InBody), 388 SubIndent is Indent + 2, 389 save_attributes2(InTag, DefNS, tag, Out, NodeIDs, SubIndent, Anon), 390 ( InBody == [] 391 -> format(Out, '/>~n', []) 392 ; format(Out, '>~n', []), 393 save_attributes2(InBody, _, body, Out, NodeIDs, SubIndent, Anon), 394 format(Out, '~N~*|</~w>~n', [Indent, Element]) 395 ). 396 397% split_attributes(+Triples, -HeadAttrs, -BodyAttr) 398% 399% Split attribute (Name=Value) list into attributes for the head 400% and body. Attributes can only be in the head if they are literal 401% and appear only one time in the attribute list. 402 403split_attributes(Triples, HeadAttr, BodyAttr) :- 404 duplicate_attributes(Triples, Dupls, Singles), 405 simple_literal_attributes(Singles, HeadAttr, Rest), 406 append(Dupls, Rest, BodyAttr). 407 408% duplicate_attributes(+Attrs, -Duplicates, -Singles) 409% 410% Extract attributes that appear more than onces as we cannot 411% dublicate an attribute in the head according to the XML rules. 412 413duplicate_attributes([], [], []). 414duplicate_attributes([H|T], Dupls, Singles) :- 415 arg(2, H, Name), 416 named_attributes(Name, T, D, R), 417 D \== [], 418 append([H|D], Dupls2, Dupls), 419 !, 420 duplicate_attributes(R, Dupls2, Singles). 421duplicate_attributes([H|T], Dupls2, [H|Singles]) :- 422 duplicate_attributes(T, Dupls2, Singles). 423 424named_attributes(_, [], [], []) :- !. 425named_attributes(Name, [H|T], D, R) :- 426 ( arg(2, H, Name) 427 -> D = [H|DT], 428 named_attributes(Name, T, DT, R) 429 ; R = [H|RT], 430 named_attributes(Name, T, D, RT) 431 ). 432 433% simple_literal_attributes(+Attributes, -Inline, -Body) 434% 435% Split attributes for (literal) attributes to be used in the 436% begin-tag and ones that have to go into the body of the description. 437 438simple_literal_attributes([], [], []). 439simple_literal_attributes([H|TA], [H|TI], B) :- 440 in_tag_attribute(H), 441 !, 442 simple_literal_attributes(TA, TI, B). 443simple_literal_attributes([H|TA], I, [H|TB]) :- 444 simple_literal_attributes(TA, I, TB). 445 446in_tag_attribute(rdf(_,P,literal(Text))) :- 447 atom(Text), % may not have lang qualifier 448 atom_length(Text, Len), 449 Len < 60, 450 \+ is_bag_li_predicate(P). 451 452 453% save_attributes(+List, +DefNS, +TagOrBody, +Out, +NodeIDs, +Indent, +Anon) 454% 455% Save a list of attributes. 456 457save_attributes2([], _, _, _, _, _, _). 458save_attributes2([H|T], DefNS, Where, Out, NodeIDs, Indent, Anon) :- 459 save_attribute(Where, H, DefNS, Out, NodeIDs, Indent, Anon), 460 save_attributes2(T, DefNS, Where, Out, NodeIDs, Indent, Anon).
464save_attribute(tag, rdf(_, Name, literal(Value)), DefNS, Out, _, Indent, _Anon) :- 465 AttIndent is Indent + 2, 466 rdf_att_id(Name, DefNS, NameText), 467 stream_property(Out, encoding(Encoding)), 468 xml_quote_attribute(Value, QVal, Encoding), 469 format(Out, '~N~*|', [AttIndent]), 470 rdf_write_id(Out, NameText), 471 format(Out, '="~w"', [QVal]). 472save_attribute(body, rdf(_,Name,literal(Literal)), DefNS, Out, _, Indent, _) :- 473 !, 474 rdf_p_id(Name, DefNS, NameText), 475 format(Out, '~N~*|<', [Indent]), 476 rdf_write_id(Out, NameText), 477 ( Literal = lang(Lang, Value) 478 -> rdf_id(Lang, DefNS, LangText), 479 format(Out, ' xml:lang="~w">', [LangText]) 480 ; Literal = type(Type, Value) 481 -> ( rdf_equal(Type, rdf:'XMLLiteral') 482 -> write(Out, ' rdf:parseType="Literal">'), 483 Value = Literal 484 ; stream_property(Out, encoding(Encoding)), 485 rdf_value(Type, QVal, Encoding), 486 format(Out, ' rdf:datatype="~w">', [QVal]) 487 ) 488 ; atomic(Literal) 489 -> write(Out, '>'), 490 Value = Literal 491 ; write(Out, ' rdf:parseType="Literal">'), 492 Value = Literal 493 ), 494 save_attribute_value(Value, Out, Indent), 495 write(Out, '</'), rdf_write_id(Out, NameText), write(Out, '>'). 496save_attribute(body, rdf(_, Name, Value), DefNS, Out, NodeIDs, Indent, Anon) :- 497 rdf_is_bnode(Value), 498 !, 499 ( memberchk(anon(Value, Done, ValueTriples), Anon) 500 -> true 501 ; ValueTriples = [] 502 ), 503 rdf_p_id(Name, DefNS, NameText), 504 format(Out, '~N~*|<', [Indent]), 505 rdf_write_id(Out, NameText), 506 ( var(Done) 507 -> Done = true, 508 SubIndent is Indent + 2, 509 ( rdf_equal(RdfType, rdf:type), 510 rdf_equal(ListClass, rdf:'List'), 511 memberchk(rdf(_, RdfType, ListClass), ValueTriples) 512 -> format(Out, ' rdf:parseType="Collection">~n', []), 513 rdf_save_list(ValueTriples, Out, Value, NodeIDs, DefNS, SubIndent, Anon) 514 ; format(Out, '>~n', []), 515 rdf_write_subject(ValueTriples, Out, Value, NodeIDs, DefNS, SubIndent, Anon) 516 ), 517 format(Out, '~N~*|</', [Indent]), 518 rdf_write_id(Out, NameText), 519 format(Out, '>~n', []) 520 ; get_assoc(Value, NodeIDs, NodeID) 521 -> format(Out, ' rdf:nodeID="~w"/>', [NodeID]) 522 ; assertion(fail) 523 ). 524save_attribute(body, rdf(_, Name, Value), DefNS, Out, _, Indent, _Anon) :- 525 stream_property(Out, encoding(Encoding)), 526 rdf_value(Value, QVal, Encoding), 527 rdf_p_id(Name, DefNS, NameText), 528 format(Out, '~N~*|<', [Indent]), 529 rdf_write_id(Out, NameText), 530 format(Out, ' rdf:resource="~w"/>', [QVal]). 531 532save_attribute_value(Value, Out, _) :- % strings 533 atom(Value), 534 !, 535 stream_property(Out, encoding(Encoding)), 536 xml_quote_cdata(Value, QVal, Encoding), 537 write(Out, QVal). 538save_attribute_value(Value, Out, _) :- % numbers 539 number(Value), 540 !, 541 writeq(Out, Value). % quoted: preserve floats 542save_attribute_value(Value, Out, Indent) :- 543 xml_is_dom(Value), 544 !, 545 XMLIndent is Indent+2, 546 xml_write(Out, Value, 547 [ header(false), 548 indent(XMLIndent) 549 ]). 550save_attribute_value(Value, _Out, _) :- 551 throw(error(save_attribute_value(Value), _)). 552 553rdf_save_list(_, _, List, _, _, _, _) :- 554 rdf_equal(List, rdf:nil), 555 !. 556rdf_save_list(ListTriples, Out, List, NodeIDs, DefNS, Indent, Anon) :- 557 rdf_equal(RdfFirst, rdf:first), 558 memberchk(rdf(List, RdfFirst, First), ListTriples), 559 ( rdf_is_bnode(First), 560 memberchk(anon(First, true, FirstTriples), Anon) 561 -> nl(Out), 562 rdf_write_subject(FirstTriples, Out, First, NodeIDs, DefNS, Indent, Anon) 563 ; stream_property(Out, encoding(Encoding)), 564 rdf_value(First, QVal, Encoding), 565 format(Out, '~N~*|<rdf:Description about="~w"/>', 566 [Indent, QVal]) 567 ), 568 ( rdf_equal(RdfRest, rdf:rest), 569 memberchk(rdf(List, RdfRest, List2), ListTriples), 570 \+ rdf_equal(List2, rdf:nil), 571 memberchk(anon(List2, true, List2Triples), Anon) 572 -> rdf_save_list(List2Triples, Out, List2, NodeIDs, DefNS, Indent, Anon) 573 ; true 574 ).
582rdf_p_id(LI, _, 'rdf:li') :- 583 is_bag_li_predicate(LI), 584 !. 585rdf_p_id(Resource, DefNS, NSLocal) :- 586 rdf_id(Resource, DefNS, NSLocal).
593is_bag_li_predicate(Pred) :-
594 atom_concat('_:', AN, Pred),
595 catch(atom_number(AN, N), _, true), integer(N), N >= 0,
596 !.
604rdf_id(Id, NS, NS:Local) :- 605 ns(NS, Full), 606 Full \== '', 607 atom_concat(Full, Local, Id), 608 xml_name(Local), 609 !. 610rdf_id(Id, _, NS:Local) :- 611 ns(NS, Full), 612 Full \== '', 613 atom_concat(Full, Local, Id), 614 xml_name(Local), 615 !. 616rdf_id(Id, _, Id).
624rdf_write_id(Out, NS:Local) :- 625 !, 626 format(Out, '~w:~w', [NS, Local]). 627rdf_write_id(Out, Atom) :- 628 write(Out, Atom).
633rdf_att_id(Id, _, NS:Local) :- 634 ns(NS, Full), 635 Full \== '', 636 atom_concat(Full, Local, Id), 637 xml_name(Local), 638 !. 639rdf_att_id(Id, _, Id).
NOTE: the to_be_described/1 trick ensures entity rewrite in resources that start with 'http://t-d-b.org?'. This is a of a hack to save the artchive data in the MultimediaN project. We should use a more general mechanism.
654rdf_value(V, Text, Encoding) :- 655 to_be_described(Prefix), 656 atom_concat(Prefix, V1, V), 657 ns(NS, Full), 658 atom_concat(Full, Local, V1), 659 !, 660 xml_quote_attribute(Local, QLocal, Encoding), 661 atomic_list_concat([Prefix, '&', NS, (';'), QLocal], Text). 662rdf_value(V, Text, Encoding) :- 663 ns(NS, Full), 664 atom_concat(Full, Local, V), 665 !, 666 xml_quote_attribute(Local, QLocal, Encoding), 667 atomic_list_concat(['&', NS, (';'), QLocal], Text). 668rdf_value(V, Q, Encoding) :- 669 xml_quote_attribute(V, Q, Encoding). 670 671to_be_described('http://t-d-b.org?'). 672 673 674 /******************************* 675 * UTIL * 676 *******************************/ 677 678ns(Id, Full) :- 679 rdf_db:ns(Id, Full)
Write RDF/XML from a list of triples
This module writes an RDF/XML document from a list of triples of the format
rdf(Subject, Predicate, Object)
. It is primarily intended for communicating computed RDF model fragments to external programs using RDF/XML.When used from the HTTP library, use the following code: