View source with formatted comments or as raw
    1/*  Part of SWI-Prolog
    2
    3    Author:        Jan Wielemaker
    4    E-mail:        J.Wielemaker@vu.nl
    5    WWW:           http://www.swi-prolog.org
    6    Copyright (c)  2009-2017, University of Amsterdam
    7                              VU University Amsterdam
    8    All rights reserved.
    9
   10    Redistribution and use in source and binary forms, with or without
   11    modification, are permitted provided that the following conditions
   12    are met:
   13
   14    1. Redistributions of source code must retain the above copyright
   15       notice, this list of conditions and the following disclaimer.
   16
   17    2. Redistributions in binary form must reproduce the above copyright
   18       notice, this list of conditions and the following disclaimer in
   19       the documentation and/or other materials provided with the
   20       distribution.
   21
   22    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
   23    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
   24    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
   25    FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
   26    COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
   27    INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
   28    BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
   29    LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
   30    CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   31    LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
   32    ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
   33    POSSIBILITY OF SUCH DAMAGE.
   34*/
   35
   36:- module(rdf_turtle_write,
   37          [ rdf_save_turtle/2,                  % +File, +Options
   38            rdf_save_canonical_turtle/2,        % +File, +Options
   39            rdf_save_trig/2,                    % +File, +Options
   40            rdf_save_canonical_trig/2,          % +File, +Options
   41            rdf_save_ntriples/2                 % +File, +Options
   42          ]).   43:- use_module(library(semweb/rdf_db)).   44:- use_module(library(semweb/turtle), []). % we make calls to public preds here
   45:- use_module(library(option)).   46:- use_module(library(record)).   47:- use_module(library(error)).   48:- use_module(library(lists)).   49:- use_module(library(rbtrees)).   50:- use_module(library(apply)).   51:- use_module(library(url)).   52:- use_module(library(pairs)).   53:- use_module(library(debug)).   54:- use_module(library(sgml_write)).   55:- use_module(library(sgml)).   56
   57:- predicate_options(rdf_save_turtle/2, 2,
   58                     [ graph(atom),
   59                       base(atom),
   60                       encoding(oneof([utf8])),
   61                       indent(nonneg),
   62                       tab_distance(nonneg),
   63                       silent(boolean),
   64                       subject_white_lines(nonneg),
   65                       align_prefixes(boolean),
   66                       user_prefixes(boolean),
   67                       prefixes(list),
   68                       only_known_prefixes(boolean),
   69                       comment(boolean),
   70                       group(boolean),
   71                       inline_bnodes(boolean),
   72                       single_line_bnodes(boolean),
   73                       abbreviate_literals(boolean),
   74                       canonize_numbers(boolean),
   75                       canonical(boolean),
   76                       a(boolean),
   77                       expand(any)
   78                     ]).   79:- predicate_options(rdf_save_canonical_turtle/2, 2,
   80                     [ pass_to(rdf_save_turtle/2, 2)
   81                     ]).   82
   83/** <module> Turtle - Terse RDF Triple Language writer
   84
   85This module implements the Turtle  language   for  representing  the RDF
   86triple model as defined by Dave Beckett  from the Institute for Learning
   87and Research Technology University of Bristol in the document:
   88
   89  * http://www.w3.org/TeamSubmission/turtle/
   90  * http://www.w3.org/TeamSubmission/2008/SUBM-turtle-20080114/#sec-conformance
   91
   92The Turtle format is designed as an   RDF  serialization that is easy to
   93read and write by both machines and  humans. Due to the latter property,
   94this library goes a long way in trying to produce human-readable output.
   95
   96In addition to the  human-readable  format,   this  library  can write a
   97_canonical_ representation of RDF graphs.   The canonical representation
   98has the following properties:
   99
  100  * Equivalent graphs result in the same document.  Graphs are
  101  considered equivalent iff they contain the same _set_ of
  102  triples, regardless of the labeling of blank nodes in the
  103  graph.
  104
  105  * Changes to the graph are diff-friendly.  This means
  106
  107    - Prefixes are combined in the header and thus changes
  108    to the namespaces only result in changes in the header.
  109    - Blank nodes that are used only once (including collections)
  110    are written in-line with the object they belong to.
  111    - For other blank nodes we to realise stable labeling that
  112    is based on property-values.
  113
  114@tbd    Low-level string output takes 28% of the time.  Move to C?
  115*/
  116
  117:- record
  118    tw_state(graph,                 % graph being saved
  119             graphs:list(atom),     % TriG graphs being saved
  120             base,                  % The base-URI
  121             encoding=utf8,         % Desired encoding
  122             indent:nonneg=8,       % Indent for ; and ,-lists
  123             tab_distance:nonneg=8, % Tab distance
  124             silent:boolean=false,  % If true, do not print a message
  125             subject_white_lines:nonneg=1,%Extra lines between subjects
  126             a:boolean=true,        % Use 'a' for rdf:type
  127             align_prefixes:boolean=true,%Align prefix declarations
  128             prefixes:list,         % Provide prefixes
  129             user_prefixes:boolean=true,% Use rdf_current_ns/2?
  130             only_known_prefixes:boolean=false,% Only use known prefixes
  131             comment:boolean=true,  % write some comments into the file
  132             group:boolean=true,    % Group using ; and ,
  133             inline_bnodes:boolean=true, % Inline single-used bnodes
  134             single_line_bnodes:boolean=false, % No newline after ;
  135             abbreviate_literals:boolean=true, % Abbreviate known types
  136             canonize_numbers:boolean=false, % How to write numbers
  137             canonical:boolean=false,
  138             expand:any=lookup,     % Access to the triples
  139                                    % Private fields
  140             bnode_id=0,            % Incrementing bnode-id
  141             nodeid_map,            % RBTree mapping NodeIDs to Refs
  142             bnode_hash,            % RBTree holding reuse-count of hashes
  143             subject_count=0,       % # subjects saved
  144             triple_count=0,        % # triples saved
  145             base_root,             % Root URL of base
  146             base_dir,              % Directory
  147             base_path,             % Path of base
  148             prefix_map).           % List of Prefix-Map
  149
  150
  151:- meta_predicate
  152    rdf_save_turtle(+, :),
  153    rdf_save_canonical_turtle(+, :),
  154    rdf_save_trig(+, :).  155
  156%!  rdf_save_turtle(+Out, :Options) is det.
  157%
  158%   Save an RDF graph as Turtle.  Options processed are:
  159%
  160%       * a(+Boolean)
  161%       If =true= (default), use =a= for the predicate =rdf:type=.
  162%       Otherwise use the full resource.
  163%       * align_prefixes(+Boolean)
  164%       Nicely align the @prefix declarations
  165%       * base(+Base)
  166%       Save relative to the given Base
  167%       * canonize_numbers(+Boolean)
  168%       If =true= (default =false=), emit numeric datatypes using
  169%       Prolog's write to achieve canonical output.
  170%       * comment(+Boolean)
  171%       It =true= (default), write some informative comments
  172%       between the output segments
  173%       * encoding(+Encoding)
  174%       Encoding used for the output stream.  Default is UTF-8.
  175%       * expand(:Goal)
  176%       Query an alternative graph-representation.  See below.
  177%       * indent(+Column)
  178%       Indentation for ; -lists.  `0' does not indent, but
  179%       writes on the same line.  Default is 8.
  180%       * graph(+Graph)
  181%       Save only the named graph
  182%       * group(+Boolean)
  183%       If =true= (default), using P-O and O-grouping.
  184%       * inline_bnodes(+Boolean)
  185%       if =true= (default), inline bnodes that are used once.
  186%       * abbreviate_literals(+Boolean)
  187%       if =true= (default), omit the type if allowed by turtle.
  188%       * only_known_prefixes(+Boolean)
  189%       Only use prefix notation for known prefixes.  Without, some
  190%       documents produce _huge_ amounts of prefixes.
  191%       * prefixes(+List)
  192%       If provided, uses exactly these prefixes.  List is a list
  193%       of prefix specifications, where each specification is either
  194%       a term _Prefix_-_URI_ or a prefix that is known to
  195%       rdf_current_prefix/2.
  196%       * silent(+Boolean)
  197%       If =true= (default =false=), do not print the final
  198%       informational message.
  199%       * single_line_bnodes(+Bool)
  200%       If =true= (default =false=), write [...] and (...) on a
  201%       single line.
  202%       * subject_white_lines(+Count)
  203%       Extra white lines to insert between statements about a
  204%       different subject.  Default is 1.
  205%       * tab_distance(+Tab)
  206%       Distance between tab-stops.  `0' forces the library to
  207%       use only spaces for layout.  Default is 8.
  208%       * user_prefixes(+Boolean)
  209%       If =true= (default), use prefixes from rdf_current_prefix/2.
  210%
  211%   The option =expand= allows  for   serializing  alternative graph
  212%   representations. It is called through   call/5,  where the first
  213%   argument is the expand-option, followed  by   S,P,O,G.  G is the
  214%   graph-option (which is by  default   a  variable).  This notably
  215%   allows for writing RDF graphs   represented  as rdf(S,P,O) using
  216%   the following code fragment:
  217%
  218%       ==
  219%       triple_in(RDF, S,P,O,_G) :-
  220%           member(rdf(S,P,O), RDF).
  221%
  222%           ...,
  223%           rdf_save_turtle(Out, [ expand(triple_in(RDF)) ]),
  224%       ==
  225%
  226%   @param  Out is one of stream(Stream), a stream handle, a file-URL
  227%           or an atom that denotes a filename.
  228
  229rdf_save_turtle(Spec, QOptions) :-
  230    meta_options(is_meta, QOptions, Options),
  231    thread_self(Me),
  232    thread_statistics(Me, cputime, T0),
  233    must_be(list, Options),
  234    make_tw_state(Options, State0, _Rest),
  235    init_base(State0, State1),
  236    init_prefix_map(State1, State),
  237    tw_state_encoding(State, Enc),
  238    setup_call_cleanup(
  239        open_output(Spec, Enc, Stream, Cleanup),
  240        ( tw_prefix_map(State, Stream),
  241          tw_graph(State, Stream)
  242        ),
  243        Cleanup),
  244    thread_statistics(Me, cputime, T1),
  245    Time is T1-T0,
  246    tw_state_triple_count(State, SavedTriples),
  247    tw_state_subject_count(State, SavedSubjects),
  248    (   tw_state_silent(State, true)
  249    ->  true
  250    ;   print_message(informational,
  251                      rdf(saved(Spec, Time, SavedSubjects, SavedTriples)))
  252    ).
  253
  254is_meta(expand).
  255
  256%!  rdf_save_canonical_turtle(+Spec, :Options) is det.
  257%
  258%   Save triples in  a  canonical  format.   This  is  the  same  as
  259%   rdf_save_turtle/2, but using different defaults. In particular:
  260%
  261%       * encoding(utf8),
  262%       * indent(0),
  263%       * tab_distance(0),
  264%       * subject_white_lines(1),
  265%       * align_prefixes(false),
  266%       * user_prefixes(false)
  267%       * comment(false),
  268%       * group(false),
  269%       * single_line_bnodes(true)
  270%
  271%   @tbd Work in progress. Notably blank-node handling is
  272%   incomplete.
  273
  274rdf_save_canonical_turtle(Spec, M:Options) :-
  275    canonical_options(CannonicalOptions, Options),
  276    rdf_save_turtle(Spec, M:CannonicalOptions).
  277
  278canonical_options([ encoding(utf8),
  279                    indent(0),
  280                    tab_distance(0),
  281                    subject_white_lines(1),
  282                    align_prefixes(false),
  283                    user_prefixes(false),
  284                    comment(false),
  285                    group(false),
  286                    single_line_bnodes(true),
  287                    canonical(true)
  288                  | Options
  289                  ],
  290                  Options).
  291
  292
  293%!  rdf_save_ntriples(+Spec, :Options) is det.
  294%
  295%   Save RDF using ntriples format. The  ntriples format is a subset
  296%   of Turtle, writing each triple fully qualified on its own line.
  297
  298rdf_save_ntriples(File, Options):-
  299    rdf_save_turtle(File,
  300                    [ comment(false),
  301                      encoding(utf8),
  302                      group(false),
  303                      prefixes([]),
  304                      subject_white_lines(0),
  305                      a(false),
  306                      inline_bnodes(false),
  307                      abbreviate_literals(false)
  308                    | Options
  309                    ]).
  310
  311
  312%!  rdf_save_trig(+Spec, :Options) is det.
  313%
  314%   Save multiple RDF graphs into a TriG  file. Options are the same
  315%   as   for   rdf_save_turtle/2.   rdf_save_trig/2    ignores   the
  316%   graph(+Graph)  option  and  instead   processes  one  additional
  317%   option:
  318%
  319%     - graphs(+ListOfGraphs)
  320%     List of graphs to save. When omitted, all graphs in the RDF
  321%     store are stored in the TriG file.
  322
  323rdf_save_trig(Spec, QOptions) :-
  324    meta_options(is_meta, QOptions, Options),
  325    thread_self(Me),
  326    thread_statistics(Me, cputime, T0),
  327    must_be(list, Options),
  328    make_tw_state(Options, State0, _Rest),
  329    init_base(State0, State1),
  330    trig_graphs(State1, Graphs),
  331    init_prefix_map(State1, Graphs, State2),
  332    tw_state_encoding(State2, Enc),
  333    setup_call_cleanup(
  334        open_output(Spec, Enc, Stream, Cleanup),
  335        ( tw_prefix_map(State2, Stream),
  336          tw_trig_graphs(Graphs, Stream, State2, State)
  337        ),
  338        Cleanup),
  339    thread_statistics(Me, cputime, T1),
  340    Time is T1-T0,
  341    tw_state_triple_count(State, SavedTriples),
  342    tw_state_subject_count(State, SavedSubjects),
  343    length(Graphs, SavedGraphs),
  344    (   tw_state_silent(State, true)
  345    ->  true
  346    ;   print_message(informational,
  347                      rdf(saved(Spec, Time, SavedSubjects, SavedTriples, SavedGraphs)))
  348    ).
  349
  350%!  rdf_save_canonical_trig(+Spec, :Options) is det.
  351%
  352%   Save     triples     in     a      canonical     format.     See
  353%   rdf_save_canonical_turtle/2 foir details.
  354
  355
  356rdf_save_canonical_trig(Spec, M:Options) :-
  357    canonical_options(CannonicalOptions, Options),
  358    rdf_save_trig(Spec, M:CannonicalOptions).
  359
  360tw_trig_graphs([], _, State, State).
  361tw_trig_graphs([H|T], Stream, State0, State) :-
  362    set_graph_of_tw_state(H, State0, State1),
  363    nl(Stream),
  364    tw_resource(H, State1, Stream),
  365    format(Stream, ' {~n', []),
  366    tw_graph(State1, Stream),
  367    format(Stream, '~N}~n', []),
  368    set_bnode_id_of_tw_state(0, State1, State2),
  369    set_nodeid_map_of_tw_state(_, State2, State3),
  370    set_bnode_hash_of_tw_state(_, State3, State4),
  371    tw_trig_graphs(T, Stream, State4, State).
  372
  373
  374%!  trig_graphs(+State, -Graphs) is det.
  375%
  376%   True when Graphs is the (sorted) list of graphs we must save. If
  377%   the _expand_ argument is used and   no  graphs are specified, it
  378%   enumerates all triples and extracts the graphs.
  379
  380trig_graphs(State, Graphs) :-
  381    tw_state_graphs(State, Graphs),
  382    (   nonvar(Graphs)
  383    ->  true
  384    ;   tw_state_expand(State, Expand),
  385        (   Expand == lookup
  386        ->  findall(G, rdf_graph(G), Graphs0)
  387        ;   findall(G, call(Expand,_S,_P,_O,G), Graphs0)
  388        ),
  389        sort(Graphs0, Graphs)
  390    ).
  391
  392
  393%!  open_output(+Spec, +Encoding, -Stream, -Cleanup) is det.
  394%
  395%   Open output Spec, returning a stream using Encoding.
  396%
  397%   @param  Cleanup is a goal that must be used to revert the side
  398%           effects of open_output/4.
  399
  400open_output(stream(Out), Encoding, Out, Cleanup) :-
  401    !,
  402    stream_property(Out, encoding(Old)),
  403    (   (   Old == Encoding
  404        ;   Old == wchar_t          % Internal encoding
  405        )
  406    ->  Cleanup = true
  407    ;   set_stream(Out, encoding(Encoding)),
  408        Cleanup = set_stream(Out, encoding(Old))
  409    ).
  410open_output(Stream, Encoding, Out, Cleanup) :-
  411    \+ atom(Stream),
  412    is_stream(Stream),
  413    !,
  414    open_output(stream(Stream), Encoding, Out, Cleanup).
  415open_output(Spec, Encoding, Out,
  416            close(Out)) :-
  417    out_to_file(Spec, File),
  418    open(File, write, Out, [encoding(Encoding)]).
  419
  420out_to_file(URL, File) :-
  421    atom(URL),
  422    file_name_to_url(File, URL),
  423    !.
  424out_to_file(File, File).
  425
  426
  427                 /*******************************
  428                 *            PREFIXES          *
  429                 *******************************/
  430
  431%!  init_prefix_map(+State, -State) is det.
  432%
  433%   Set  the  prefix_map  of  State.  The  prefix  map  is  list  of
  434%   Prefix-URI of prefixes to use for   emitting the graph requested
  435%   in State. If multiple prefixes are present   where  the one is a
  436%   prefix of the other, the longer one appears first in the list.
  437
  438init_prefix_map(State0, State) :-
  439    tw_state_prefixes(State0, Prefixes),
  440    nonvar(Prefixes),
  441    !,
  442    user_prefix_map(Prefixes, PrefixMap),
  443    set_prefix_map_of_tw_state(PrefixMap, State0, State).
  444init_prefix_map(State0, State) :-
  445    tw_state_graph(State0, Graph),
  446    graph_prefix_map(State0, Graph, PrefixMap),
  447    set_prefix_map_of_tw_state(PrefixMap, State0, State).
  448
  449init_prefix_map(State0, _Graphs, State) :-      % TriG version
  450    tw_state_prefixes(State0, Prefixes),
  451    nonvar(Prefixes),
  452    !,
  453    user_prefix_map(Prefixes, PrefixMap),
  454    set_prefix_map_of_tw_state(PrefixMap, State0, State).
  455init_prefix_map(State0, Graphs, State) :-       % TriG version
  456    maplist(graph_prefixes(State0), Graphs, NestedPrefixes),
  457    append(NestedPrefixes, Prefixes0),
  458    sort(Prefixes0, Prefixes),
  459    prefix_map(State0, Prefixes, PrefixMap),
  460    set_prefix_map_of_tw_state(PrefixMap, State0, State).
  461
  462graph_prefix_map(State, Graph, PrefixMap) :-
  463    graph_prefixes(State, Graph, Prefixes),
  464    prefix_map(State, Prefixes, PrefixMap).
  465
  466graph_prefixes(State0, Graph, Prefixes) :-
  467    tw_state_expand(State0, Expand),
  468    tw_state_only_known_prefixes(State0, OnlyKnown),
  469    rdf_graph_prefixes(Graph, Prefixes,
  470                       [ filter(turtle_prefix(OnlyKnown)),
  471                         expand(Expand),
  472                         min_count(2),
  473                         get_prefix(turtle:iri_turtle_prefix)
  474                       ]).
  475
  476prefix_map(State, Prefixes, PrefixMap) :-
  477    remove_base(State, Prefixes, Prefixes2),
  478    prefix_names(Prefixes2, State, Pairs),
  479    transpose_pairs(Pairs, URI_Abrevs),
  480    reverse(URI_Abrevs, RURI_Abrevs),
  481    flip_pairs(RURI_Abrevs, PrefixMap).
  482
  483%!  user_prefix_map(+Prefixes, -PrefixMap) is det.
  484%
  485%   Convert a list of prefix specifications   to  a list Prefix-URI,
  486%   longest URI first.
  487
  488user_prefix_map(Prefixes, PrefixMap) :-
  489    must_be(list, Prefixes),
  490    maplist(prefix_pair, Prefixes, Pairs),
  491    map_list_to_pairs(prefix_length, Pairs, LenPairs),
  492    sort(LenPairs, LenPairs1),
  493    pairs_values(LenPairs1, RevPrefixMap),
  494    reverse(RevPrefixMap, PrefixMap).
  495
  496prefix_pair(Prefix-URI, Prefix-URI) :-
  497    !,
  498    must_be(atom, Prefix),
  499    must_be(atom, URI).
  500prefix_pair(Prefix, Prefix-URI) :-
  501    must_be(atom, Prefix),
  502    (   rdf_current_prefix(Prefix, URI)
  503    ->  true
  504    ;   existence_error(prefix, Prefix)
  505    ).
  506
  507prefix_length(_-URI, Len) :- atom_length(URI, Len).
  508
  509%!  turtle_prefix(+OnlyKnown, +Where, +Prefix, +URI) is semidet.
  510%
  511%   Test whether we want  to  include   the  proposed  prefix in the
  512%   @prefix declaration.
  513
  514:- public turtle_prefix/4.              % called through rdf_graph_prefixes/3.
  515
  516turtle_prefix(true, _, Prefix, _) :-
  517    !,
  518    rdf_current_prefix(_, Prefix),
  519    !.
  520turtle_prefix(_, _, Prefix, URI) :-
  521    sub_atom(Prefix, _, 1, 0, Last),
  522    turtle_prefix_char(Last),
  523    atom_concat(Prefix, Local, URI),
  524    \+ sub_atom(Local, _, _, _, '.').
  525
  526turtle_prefix_char('#').
  527turtle_prefix_char('/').
  528
  529
  530remove_base(State, Prefixes, PrefixesNoBase) :-
  531    tw_state_base_dir(State, BaseDir),
  532    atom(BaseDir),
  533    !,
  534    delete(Prefixes, BaseDir, PrefixesNoBase).
  535remove_base(_State, Prefixes, Prefixes).
  536
  537flip_pairs([], []).
  538flip_pairs([Key-Val|Pairs], [Val-Key|Flipped]) :-
  539    flip_pairs(Pairs, Flipped).
  540
  541prefix_names(URIs, State, Prefixes) :-
  542    prefix_names(URIs, State, 1, Prefixes, []).
  543
  544prefix_names([], _, _, List, List) :- !.
  545prefix_names(URIs, State, Len, Prefixes, Tail) :-
  546    prefix_names(URIs, State, Len, Prefixes, PTail, Rest),
  547    Len1 is Len + 1,
  548    prefix_names(Rest, State, Len1, PTail, Tail).
  549
  550prefix_names(URIs, State, Len, Prefixes, PTail, Rest) :-
  551    map_list_to_pairs(propose_abbrev(State, Len), URIs, Pairs),
  552    !,
  553    keysort(Pairs, Sorted),
  554    unique(Sorted, Prefixes, PTail, Rest).
  555prefix_names(URIs, _, _, Prefixes, PTail, []) :-
  556    number_prefixes(URIs, 1, Prefixes, PTail).
  557
  558number_prefixes([], _, PL, PL).
  559number_prefixes([H|T0], N, [P-H|PL], T) :-
  560    atomic_concat(ns, N, P),
  561    succ(N, N1),
  562    number_prefixes(T0, N1, PL, T).
  563
  564unique([], L, L, []).
  565unique([A-U|T0], [A-U|T], L, Rest) :-
  566    T0 \= [A-_|_],
  567    !,
  568    unique(T0, T, L, Rest).
  569unique([A-U|T0], Prefixes, L, [U|Rest0]) :-
  570    strip_keys(T0, A, T1, Rest0, Rest),
  571    unique(T1, Prefixes, L, Rest).
  572
  573strip_keys([A-U|T0], A, T, [U|R0], R) :-
  574    !,
  575    strip_keys(T0, A, T, R0, R).
  576strip_keys(L, _, L, R, R).
  577
  578
  579%!  propose_abbrev(+State, +Len, +URI, -Abbrev) is multi.
  580%
  581%   Propose an abbreviation for URI.  Backtracking yields longer
  582%   ones.
  583
  584propose_abbrev(_, _, URI, Abbrev) :-
  585    well_known_ns(Abbrev, URI),
  586    !.
  587propose_abbrev(State, _, URI, Abbrev) :-
  588    tw_state_user_prefixes(State, true),
  589    rdf_current_prefix(Abbrev, URI),
  590    !.
  591propose_abbrev(_, Len, URI, Abbrev) :-
  592    namespace_parts(URI, Parts),
  593    include(abbrev_part, Parts, Names),
  594    reverse(Names, RevNames),
  595    length(Use, Len),
  596    append(Use, _, RevNames),
  597    atomic_list_concat(Use, -, Abbrev).
  598
  599abbrev_part(X) :-
  600    xml_name(X),
  601    \+ well_known_ns(X, _),
  602    \+ well_known_extension(X).
  603
  604well_known_ns(rdf,  'http://www.w3.org/1999/02/22-rdf-syntax-ns#').
  605well_known_ns(rdfs, 'http://www.w3.org/2000/01/rdf-schema#').
  606well_known_ns(owl,  'http://www.w3.org/2002/07/owl#').
  607well_known_ns(xsd,  'http://www.w3.org/2001/XMLSchema#').
  608well_known_ns(dc,   'http://purl.org/dc/elements/1.1/').
  609
  610well_known_extension(ttl).
  611well_known_extension(nt).
  612well_known_extension(n3).
  613well_known_extension(xml).
  614well_known_extension(rdf).
  615well_known_extension(owl).
  616
  617%!  namespace_parts(+URL, -Parts)
  618
  619namespace_parts(URL, Parts) :-
  620    atom_codes(URL, Codes),
  621    phrase(parts(Parts), Codes),
  622    !.
  623namespace_parts(URL, _) :-
  624    format(user_error, 'Couldn\'t split ~q~n', [URL]),
  625    fail.
  626
  627parts(List) --> sep2, parts2(List).
  628
  629parts2([H|T]) -->
  630    string(Codes),  {Codes \== []},
  631    sep,
  632    !,
  633    {atom_codes(H, Codes)},
  634    parts2(T).
  635parts2([]) --> [].
  636
  637string([]) --> [].
  638string([H|T]) --> [H], string(T).
  639
  640sep --> sep_char, sep2.
  641sep([], []).
  642
  643sep2 --> sep_char, !, sep2.
  644sep2 --> [].
  645
  646sep_char --> "/".
  647sep_char --> ":".
  648sep_char --> ".".
  649sep_char --> "?".
  650sep_char --> "#".
  651
  652
  653%!  init_base(+State0, -State) is det.
  654%
  655%   Initialise dealing with the base URI.  It sets two attributes of
  656%   the state: base_root and base_path.
  657
  658init_base(State0, State) :-
  659    tw_state_base(State0, BaseURI),
  660    atom(BaseURI),
  661    !,
  662    parse_url(BaseURI, Attributes),
  663    include(root_part, Attributes, RootAttrs),
  664    parse_url(BaseRoot, RootAttrs),
  665    memberchk(path(BasePath), Attributes),
  666    file_directory_name(BasePath, BaseDir),
  667    atomic_list_concat([BaseRoot, BaseDir, /], BaseDirURI),
  668    set_base_root_of_tw_state(BaseRoot, State0, State1),
  669    set_base_path_of_tw_state(BasePath, State1, State2),
  670    set_base_dir_of_tw_state(BaseDirURI, State2, State).
  671init_base(State, State).
  672
  673root_part(protocol(_)).
  674root_part(host(_)).
  675root_part(port(_)).
  676
  677
  678                 /*******************************
  679                 *              SAVE            *
  680                 *******************************/
  681
  682%!  tw_graph(+State, +Out) is det.
  683%
  684%   Write an RDF graph as Turtle data.
  685%
  686%   @tbd Write unconnected and multi-connected blank-nodes.
  687
  688tw_graph(State, Out) :-
  689    subjects(State, Subjects),
  690    length(Subjects, SubjectCount),
  691    inc_subject_count(State, SubjectCount),
  692    partition(rdf_is_bnode, Subjects, BNodes, ProperSubjects),
  693    maplist(pair_var, BNodes, Pairs),
  694    ord_list_to_rbtree(Pairs, BNTree),
  695    tw_state_nodeid_map(State, BNTree),
  696    (   ProperSubjects == []
  697    ->  true
  698    ;   length(ProperSubjects, PSCount),
  699        comment(State, 'Named toplevel resources (~D)', [PSCount], Out),
  700        tw_proper_subjects(ProperSubjects, State, Out)
  701    ),
  702    tw_bnodes(Pairs, State, Out).
  703
  704pair_var(BNode, BNode-_).
  705
  706tw_prefix_map(State, Out) :-
  707    tw_state_prefix_map(State, PrefixMap),
  708    tw_prefix_map(PrefixMap, State, Out).
  709
  710%!  tw_prefix_map(+PrefixMap, +State, +Out) is det.
  711%
  712%   Write the @base and @prefix declarations
  713
  714tw_prefix_map(PrefixMap, State, Out) :-
  715    tw_state_align_prefixes(State, true),
  716    !,
  717    longest_prefix(PrefixMap, 0, Length),
  718    PrefixCol is Length+10,
  719    tw_base(PrefixCol, State, Out),
  720    tw_prefix_map(PrefixMap, PrefixCol, State, Out).
  721tw_prefix_map(PrefixMap, State, Out) :-
  722    tw_base(0, State, Out),
  723    tw_prefix_map(PrefixMap, 0, State, Out).
  724
  725longest_prefix([], L, L).
  726longest_prefix([Prefix-_|T], L0, L) :-
  727    atom_length(Prefix, L1),
  728    L2 is max(L0, L1),
  729    longest_prefix(T, L2, L).
  730
  731
  732tw_base(Col, State, Out) :-
  733    tw_state_base(State, Base),
  734    atom(Base),
  735    !,
  736    format(Out, '@base ~t~*|', [Col]),
  737    turtle:turtle_write_uri(Out, Base),
  738    format(Out, ' .~n', []).
  739tw_base(_, _, _).
  740
  741
  742tw_prefix_map([], _, _, _).
  743tw_prefix_map([Prefix-URI|T], Col, State, Out) :-
  744    format(Out, '@prefix ~t~w: ~*|', [Prefix, Col]),
  745    tw_relative_uri(URI, State, Out),
  746    format(Out, ' .~n', []),
  747    (   T == []
  748    ->  true
  749    ;   tw_prefix_map(T, Col, State, Out)
  750    ).
  751
  752
  753%!  tw_proper_subjects(+Subjects, +State, +Out) is det.
  754%
  755%   Write the subjects that are not Bnodes.
  756
  757tw_proper_subjects([], _, _).
  758tw_proper_subjects([H|T], State, Out) :-
  759    separate_subjects(State, Out),
  760    tw_subject(H, H, State, Out),
  761    tw_proper_subjects(T, State, Out).
  762
  763
  764separate_subjects(State, Out) :-
  765    tw_state_subject_white_lines(State, ExtraLines),
  766    put_n(ExtraLines, '\n', Out).
  767
  768%!  tw_subject(+URI, +State, +Out) is det.
  769%
  770%   Write a toplevel non-bnode subject.
  771
  772tw_subject(URI, Ref, State, Out) :-
  773    subject_triples(URI, State, Pairs),
  774    length(Pairs, Count),
  775    inc_triple_count(State, Count),
  776    group_po(Pairs, Grouped),
  777    tw_subject_triples(Grouped, Ref, State, Out).
  778
  779group_po(Pairs, Grouped) :-
  780    group_pairs_by_key(Pairs, Grouped0),
  781    rdf_equal(rdf:type, RDFType),
  782    (   select(RDFType-Types, Grouped0, Grouped1)
  783    ->  Grouped = [RDFType-Types|Grouped1]
  784    ;   Grouped = Grouped0
  785    ).
  786
  787%!  tw_bnodes(+Pairs, +State, +Out) is det.
  788%
  789%   Write the Bnodes. Pairs is a list   URI-Ref, where Ref is one of
  790%   =written= if the Bnode is already written;   an integer if it is
  791%   used multiple times or a variable if   it  has not been written.
  792%   The order in which we deal with bnodes is defined as follows:
  793%
  794%       * First, write the bnodes that are not referenced at all
  795%       as toplevel bnodes using [ ... ] notation.
  796%
  797%       * Next, write the bnodes that need written as toplevel
  798%       nodes using the _:XX notation because they are referenced
  799%       multiple times in the graph. Continue this process until it
  800%       is exhausted.
  801
  802tw_bnodes(Pairs, State, Out) :-
  803    tw_top_bnodes(Pairs, State, Out, Rest1),
  804    tw_numbered_bnodes(Rest1, State, Out, 1, Rest2),
  805    tw_cyclic_bnodes(Rest2, State, Out, 0).
  806
  807
  808tw_numbered_bnodes([], _, _, _, []) :- !.
  809tw_numbered_bnodes(Pairs, State, Out, Level, Rest) :-
  810    multi_referenced(Pairs, RefPairs, Rest0),
  811    (   RefPairs == []
  812    ->  Rest = Rest0
  813    ;   length(RefPairs, Count),
  814        comment(State, 'Level ~D multi-referenced blank-nodes (~D)',
  815                [ Level, Count ], Out),
  816        tw_ref_bnodes(RefPairs, State, Out),
  817        Level1 is Level + 1,
  818        tw_numbered_bnodes(Rest0, State, Out, Level1, Rest)
  819    ).
  820
  821multi_referenced([], [], []).
  822multi_referenced([H|T], RefPairs, Rest) :-
  823    H = _-Ref,
  824    (   Ref == written
  825    ->  multi_referenced(T, RefPairs, Rest)
  826    ;   var(Ref)
  827    ->  Rest = [H|TR],
  828        multi_referenced(T, RefPairs, TR)
  829    ;   assertion(Ref = bnode(_)),
  830        RefPairs = [H|TRP],         % assigned reference
  831        multi_referenced(T, TRP, Rest)
  832    ).
  833
  834tw_ref_bnodes([], _, _).
  835tw_ref_bnodes([BNode-Ref|T], State, Out) :-
  836    separate_subjects(State, Out),
  837    tw_subject(BNode, Ref, State, Out),
  838    tw_ref_bnodes(T, State, Out).
  839
  840
  841%!  tw_top_bnodes(+Pairs, +State, +Out, -Rest)
  842%
  843%   Write the top bnodes: those that  do   not  appear  as an object
  844%   anywhere.
  845
  846tw_top_bnodes(Pairs, State, Out, Rest) :-
  847    unreferenced(Pairs, State, TopBNodes, Rest),
  848    (   TopBNodes == []
  849    ->  true
  850    ;   length(TopBNodes, Count),
  851        comment(State, 'Toplevel blank-nodes (~D)', [Count], Out),
  852        sort_bnodes(TopBNodes, SortedTopBNodes, State),
  853        tw_top_bnodes(SortedTopBNodes, State, Out)
  854    ).
  855
  856unreferenced([], _, [], []).
  857unreferenced([H|T], State, UnrefPairs, Rest) :-
  858    H = BNode-Ref,
  859    (   Ref == written
  860    ->  unreferenced(T, State, UnrefPairs, Rest)
  861    ;   var(Ref),
  862        object_link_count(BNode, State, 0)
  863    ->  UnrefPairs = [H|URT],
  864        unreferenced(T, State, URT, Rest)
  865    ;   Rest = [H|TR],
  866        unreferenced(T, State, UnrefPairs, TR)
  867    ).
  868
  869tw_top_bnodes([], _, _).
  870tw_top_bnodes([BNode-_|T], State, Out) :-
  871    tw_bnode(BNode, State, Out),
  872    tw_top_bnodes(T, State, Out).
  873
  874
  875tw_bnode(BNode, State, Out) :-
  876    subject_triples(BNode, State, Pairs),
  877    length(Pairs, Count),
  878    inc_triple_count(State, Count),
  879    (   tw_state_inline_bnodes(State, true)
  880    ->  tw_bnode_triples(Pairs, State, Out),
  881        format(Out, ' .~n', [])
  882    ;   next_bnode_id(State, BNode, Ref),
  883        tw_bnode_ntriples(Pairs, Ref, State, Out)
  884    ).
  885
  886tw_bnode_triples(Pairs, State, Out) :-
  887    group_po(Pairs, Grouped),
  888    (   tw_state_single_line_bnodes(State, true)
  889    ->  format(Out, '[ ', []),
  890        tw_triples(Grouped, -1, State, Out),
  891        format(Out, ' ]', [])
  892    ;   line_position(Out, Indent),
  893        format(Out, '[ ', []),
  894        line_position(Out, AIndent),
  895        tw_triples(Grouped, AIndent, State, Out),
  896        nl_indent(Out, State, Indent),
  897        format(Out, ']', [])
  898    ).
  899
  900tw_bnode_ntriples([], _, _, _).
  901tw_bnode_ntriples([P-O|T], Ref, State, Out) :-
  902    tw_bnode_ref(Ref, Out),
  903    format(Out, ' ', []),
  904    tw_predicate(P, State, Out),
  905    format(Out, ' ', []),
  906    tw_object(O, State, Out),
  907    format(Out, ' .~n', []),
  908    tw_bnode_ntriples(T, Ref, State, Out).
  909
  910
  911%!  tw_cyclic_bnodes(+Pairs, +BNode, +State, +Out, +Cycle)
  912%
  913%   The rest. These are groups of bnodes  that are reachable, but we
  914%   cannot find a starting point, neither from a named resource, nor
  915%   from an unlinked bnode. As long as we are not considering stable
  916%   canonical output, we can break the cycle at any point.
  917
  918tw_cyclic_bnodes([], _State, _Out, _) :- !.
  919tw_cyclic_bnodes(Pairs, State, Out, Cycle0) :-
  920    (   tw_state_canonical(State, true)
  921    ->  sort_bnode_pairs(Pairs, BNodes, State)
  922    ;   BNodes = Pairs
  923    ),
  924    succ(Cycle0, Cycle),
  925    BNodes = [BNode-Ref|_],
  926    next_bnode_id(State, BNode, Ref),
  927    comment(State, 'Breaking cycle ~D', [Cycle], Out),
  928    tw_numbered_bnodes(Pairs, State, Out, 1, Rest),
  929    tw_cyclic_bnodes(Rest, State, Out, Cycle).
  930
  931
  932%!  tw_subject_triples(+Grouped, +Subject, +State, +Out)
  933%
  934%   Save triples on Subject.  Combine groups of triples with the
  935%   same subject (;) and same subject+predicate (,).
  936%
  937%   @param  Subject is either a URI or an integer.  The latter is
  938%           used for writing a named bnode.
  939
  940tw_subject_triples([], _, _, _) :- !.
  941tw_subject_triples(Grouped, URI, State, Out) :-
  942    tw_state_group(State, false),
  943    !,
  944    tw_ungrouped_triples(Grouped, URI, State, Out).
  945tw_subject_triples(Grouped, URI, State, Out) :-
  946    tw_resource(URI, State, Out),
  947    (   tw_state_indent(State, Indent),
  948        Indent > 0
  949    ->  nl_indent(Out, State, Indent)
  950    ;   put_char(Out, ' '),
  951        line_position(Out, Indent)
  952    ),
  953    tw_triples(Grouped, Indent, State, Out),
  954    format(Out, ' .~n', []).
  955
  956%!  tw_ungrouped_triples(+Grouped, +URI, +State, +Out)
  957%
  958%   Write triples for subject URI as one line per triple.  Used
  959%   for canonical output.
  960
  961tw_ungrouped_triples([], _, _, _).
  962tw_ungrouped_triples([P-Vs|Groups], URI, State, Out) :-
  963    partition(rdf_is_bnode, Vs, BNVs, ProperVs),
  964    tw_ungrouped_values(ProperVs, P, URI, State, Out),
  965    sort_bnodes(BNVs, SortedBNVs, State),
  966    tw_ungrouped_values(SortedBNVs, P, URI, State, Out),
  967    tw_ungrouped_triples(Groups, URI, State, Out).
  968
  969tw_ungrouped_values([], _, _, _, _).
  970tw_ungrouped_values([V|T], P, URI, State, Out) :-
  971    tw_resource(URI, State, Out),
  972    put_char(Out, ' '),
  973    tw_predicate(P, State, Out),
  974    put_char(Out, ' '),
  975    tw_object(V, State, Out),
  976    format(Out, ' .~n', []),
  977    tw_ungrouped_values(T, P, URI, State, Out).
  978
  979
  980%!  tw_triples(+Groups, +Indent, +State, +Out) is det.
  981%
  982%   Triple writer that uses ; and ,- grouping
  983
  984tw_triples([P-Vs|MoreGroups], Indent, State, Out) :-
  985    tw_write_pvs(Vs, P, State, Out),
  986    (   MoreGroups == []
  987    ->  true
  988    ;   format(Out, ' ;', []),
  989        nl_indent(Out, State, Indent),
  990        tw_triples(MoreGroups, Indent, State, Out)
  991    ).
  992
  993tw_write_pvs(Values, P, State, Out) :-
  994    tw_predicate(P, State, Out),
  995    put_char(Out, ' '),
  996    line_position(Out, Indent),
  997    tw_write_vs(Values, Indent, State, Out).
  998
  999tw_predicate(P, State, Out) :-
 1000    (   rdf_equal(P, rdf:type),
 1001        tw_state_a(State, true)
 1002    ->  format(Out, 'a', [])
 1003    ;   tw_resource(P, State, Out)
 1004    ).
 1005
 1006tw_write_vs([H|T], Indent, State, Out) :-
 1007    tw_object(H, State, Out),
 1008    (   T == []
 1009    ->  true
 1010    ;   format(Out, ' ,', []),
 1011        nl_indent(Out, State, Indent),
 1012        tw_write_vs(T, Indent, State, Out)
 1013    ).
 1014
 1015%!  tw_object(+Value, +State, +Out) is det.
 1016%
 1017%   Write the object of a triple.
 1018
 1019tw_object(Value, State, Out) :-
 1020    rdf_is_bnode(Value),
 1021    !,
 1022    tw_bnode_object(Value, State, Out).
 1023tw_object(Value, State, Out) :-
 1024    atom(Value),
 1025    !,
 1026    tw_resource(Value, State, Out).
 1027tw_object(Literal, State, Out) :-
 1028    tw_literal(Literal, State, Out).
 1029
 1030%!  tw_bnode_object(+Value, +State, +Out) is det.
 1031%
 1032%   Write a Bnode value.  There are a number of cases:
 1033%
 1034%       * The BNode was already written.  Write the same ref.
 1035%       * The BNode is not shared.  Inline and set =written=
 1036%       * The BNode is shared.  Generate a NodeID and store it
 1037%       * The BNode is once as object: Generate a NodeID
 1038%       * The BNode is more than once object: Generate a NodeID
 1039%         and put in table.
 1040
 1041tw_bnode_object(BNode, State, Out) :-
 1042    tw_state_nodeid_map(State, BNTree),
 1043    rb_lookup(BNode, Ref, BNTree),
 1044    !,
 1045    (   var(Ref)
 1046    ->  (   tw_state_inline_bnodes(State, true),
 1047            tw_unshared_bnode(BNode, State, Out)
 1048        ->  Ref = written
 1049        ;   next_bnode_id(State, BNode, Ref),
 1050            tw_bnode_ref(Ref, Out)
 1051        )
 1052    ;   tw_bnode_ref(Ref, Out)
 1053    ).
 1054tw_bnode_object(BNode, State, Out) :-
 1055    object_link_count(BNode, State, N),
 1056    N > 1,
 1057    !,
 1058    tw_state_nodeid_map(State, BNTree0),
 1059    rb_insert(BNTree0, BNode, Ref, BNTree),
 1060    set_nodeid_map_of_tw_state(BNTree, State),
 1061    next_bnode_id(State, BNode, Ref),
 1062    tw_bnode_ref(Ref, Out).
 1063tw_bnode_object(BNode, State, Out) :-
 1064    next_bnode_id(State, BNode, Ref),
 1065    tw_bnode_ref(Ref, Out).
 1066
 1067tw_bnode_ref(bnode(Ref), Out) :-
 1068    (   integer(Ref)
 1069    ->  format(Out, '_:bn~w', [Ref])
 1070    ;   format(Out, '_:~w', [Ref])
 1071    ).
 1072
 1073%!  tw_unshared_bnode(+BNode, +State, +Out) is semidet.
 1074%
 1075%   Write a bnode if this is the only place it is used.
 1076
 1077tw_unshared_bnode(BNode, State, Out) :-
 1078    object_link_count(BNode, State, 1),
 1079    subject_triples(BNode, State, Pairs),
 1080    (   Pairs == []
 1081    ->  format(Out, '[]', [])
 1082    ;   pairs_unshared_collection(Pairs, State, Collection)
 1083    ->  (   Collection == []
 1084        ->  format(Out, '()', [])
 1085        ;   tw_state_nodeid_map(State, BNTree),
 1086            rb_lookup(BNode, written, BNTree),
 1087            length(Collection, NMembers),
 1088            Triples is 2*NMembers,
 1089            inc_triple_count(State, Triples),
 1090            (   tw_state_single_line_bnodes(State, true)
 1091            ->  format(Out, '( ', []),
 1092                tw_collection(Collection, -1, State, Out),
 1093                format(Out, ' )', [])
 1094            ;   line_position(Out, Indent),
 1095                format(Out, '( ', []),
 1096                line_position(Out, AIndent),
 1097                tw_collection(Collection, AIndent, State, Out),
 1098                nl_indent(Out, State, Indent),
 1099                format(Out, ')', [])
 1100            )
 1101        )
 1102    ;   tw_bnode_triples(Pairs, State, Out)
 1103    ).
 1104
 1105tw_collection([H|T], Indent, State, Out) :-
 1106    tw_object(H, State, Out),
 1107    (   T \== []
 1108    ->  nl_indent(Out, State, Indent),
 1109        tw_collection(T, Indent, State, Out)
 1110    ;   true
 1111    ).
 1112
 1113%!  unshared_collection(+URI, +State, -Members) is semidet.
 1114%
 1115%   True if URI denodes an RDF list that  is made up from bnodes, is
 1116%   linked exactly once  to  its  context   and  contains  no  extra
 1117%   triples.
 1118
 1119unshared_collection(C, _, []) :-
 1120    rdf_equal(C, rdf:nil),
 1121    !.
 1122unshared_collection(C, State, List) :-
 1123    rdf_is_bnode(C),
 1124    object_link_count(C, State, 1),
 1125    tw_state_nodeid_map(State, BNTree),
 1126    rb_lookup(C, written, BNTree),
 1127    subject_triples(C, State, Pairs),
 1128    pairs_unshared_collection(Pairs, State, List).
 1129
 1130pairs_unshared_collection(Pairs, State, [H|T]) :-
 1131    rdf_equal(rdf:first, RDFFirst),
 1132    rdf_equal(rdf:rest, RDFRest),
 1133    Pairs = [ RDFFirst-H,
 1134              RDFRest-Rest
 1135            | More
 1136            ],
 1137    (   More == []
 1138    ;   rdf_equal(rdf:type, RDFType),
 1139        rdf_equal(rdf:'List', RDFList),
 1140        More == [RDFType-RDFList]
 1141    ),
 1142    unshared_collection(Rest, State, T).
 1143
 1144
 1145%!  object_link_count(+BNode, +STate, -Count) is det.
 1146%
 1147%   Number of times BNode is used as an object in the graph
 1148
 1149object_link_count(BNode, State, Count) :-
 1150    tw_state_graph(State, Graph),
 1151    tw_state_expand(State, Expand),
 1152    findall(S-P, call(Expand,S,P,BNode,Graph), Pairs0),
 1153    sort(Pairs0, Pairs),            % remove duplicates
 1154    length(Pairs, Count).
 1155
 1156%!  nl_indent(+Out, +State, +Indent) is det.
 1157%
 1158%   Write a newline and indent to column Indent.
 1159
 1160nl_indent(Out, _, -1) :-
 1161    !,
 1162    put_char(Out, ' ').
 1163nl_indent(Out, State, Indent) :-
 1164    nl(Out),
 1165    tw_state_tab_distance(State, TD),
 1166    (   TD == 0
 1167    ->  tab(Out, Indent)
 1168    ;   Tabs is Indent//TD,
 1169        Spaces is Indent mod TD,
 1170        put_n(Tabs, '\t', Out),
 1171        put_n(Spaces, ' ', Out)
 1172    ).
 1173
 1174put_n(N, Char, Out) :-
 1175    N > 0,
 1176    !,
 1177    put_char(Out, Char),
 1178    N2 is N - 1,
 1179    put_n(N2, Char, Out).
 1180put_n(_, _, _).
 1181
 1182
 1183%!  subject_triples(+URI, +State, -Pairs) is det.
 1184%
 1185%   Pairs is a sorted list of P-O  pairs representing all triples on
 1186%   the subject URI.
 1187
 1188subject_triples(URI, State, Pairs) :-
 1189    tw_state_graph(State, Graph),
 1190    tw_state_expand(State, Expand),
 1191    findall(P-O, call(Expand, URI, P, O, Graph), Pairs0),
 1192    sort(Pairs0, Pairs).
 1193
 1194
 1195                 /*******************************
 1196                 *          GRAPH-LOGIC         *
 1197                 *******************************/
 1198
 1199%!  subjects(+State, -Subjects:ord_set) is det.
 1200%
 1201%   Subjects is a list of all subjects in the graph requested in
 1202%   State.
 1203
 1204subjects(State, Subjects) :-
 1205    tw_state_expand(State, Expand),
 1206    tw_state_graph(State, Graph),
 1207    (   Expand == lookup,
 1208        atom(Graph),
 1209        (   rdf_graph_property(Graph, triples(Count))
 1210        ->  true
 1211        ;   Count = 0                       % non-existing graph
 1212        ),
 1213        rdf_statistics(triples(Total)),
 1214        Count * 10 < Total
 1215    ->  findall(S, rdf(S,_,_,Graph), List),
 1216        sort(List, Subjects)
 1217    ;   Expand \== lookup
 1218    ->  findall(S, call(Expand, S,_,_,Graph), List),
 1219        sort(List, Subjects)
 1220    ;   findall(Subject, subject(State, Subject), AllSubjects),
 1221        sort(AllSubjects, Subjects)
 1222    ).
 1223
 1224
 1225subject(State, Subject) :-
 1226    tw_state_graph(State, Graph),
 1227    (   atom(Graph)
 1228    ->  rdf_resource(Subject),
 1229        (   rdf(Subject, _, _, Graph)
 1230        ->  true
 1231        )
 1232    ;   rdf_subject(Subject)
 1233    ).
 1234
 1235
 1236:- public lookup/4.                     % called from expand hook.
 1237
 1238lookup(S,P,O,G) :-
 1239    (   var(G)
 1240    ->  rdf(S,P,O)
 1241    ;   rdf(S,P,O,G)
 1242    ).
 1243
 1244
 1245                 /*******************************
 1246                 *        CANONICAL ORDERING    *
 1247                 *******************************/
 1248
 1249/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 1250This section deals with the two problems of canonical graphs:
 1251
 1252    * Keep blank nodes in the same order
 1253    * Assign stable names to blank nodes that we need to
 1254      give a name.  There are two cases: (1) a blank nodes is
 1255      used in more than one place and (2) a blank node series
 1256      is cyclic.
 1257- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
 1258
 1259%!  sort_bnodes(+BNodes, -Sorted, +State) is det.
 1260%
 1261%   Sort a list of blank nodes.
 1262
 1263sort_bnodes(BNodes, Sorted, _State) :-
 1264    sort(BNodes, Sorted).
 1265
 1266%!  sort_bnode_pairs(+Pairs, -Sorted, +State) is det.
 1267%
 1268%   Sort a list of Pairs BNode-Ref
 1269
 1270sort_bnode_pairs(Pairs, Sorted, _State) :-
 1271    sort(Pairs, Sorted).
 1272
 1273%!  bnode_to_term(+BNode, -Term, +State)
 1274%
 1275%   Term is a canonical representation of the graph formed by BNode.
 1276%   The transformation of a bnode is
 1277%
 1278%           bnode(p-[o1,o2,..], ..)
 1279%
 1280%   The arguments are alphabetically sorted   on predicate (can't we
 1281%   leave the preds out them?) and   the  objects are alphabetically
 1282%   sorted.  Sorting multiple bnode values?
 1283
 1284
 1285%!  next_bnode_id(+State, +BNode, -Ref) is det.
 1286%
 1287%   Generate a node-id for BNode.   When writing non-canonically, we
 1288%   simply number the bnodes.  Otherwise  we   want  a  more  stable
 1289%   numbering. Our numbering is a hash of  the content of the bnode.
 1290%   It is not unlikely that we find muliple copies, and therefore we
 1291%   number the full id is bn_<hash>_<n>, <n> counting 0...
 1292
 1293next_bnode_id(State, _BNode, bnode(Ref)) :-
 1294    tw_state_canonical(State, false),
 1295    !,
 1296    tw_state_bnode_id(State, Ref0),
 1297    Ref is Ref0+1,
 1298    nb_set_bnode_id_of_tw_state(Ref, State).
 1299next_bnode_id(State, BNode, bnode(Ref)) :-
 1300    bnode_hash(BNode, Hash),
 1301    tw_state_bnode_hash(State, BNHash),
 1302    (   var(BNHash)
 1303    ->  rb_empty(BNHash)
 1304    ;   true
 1305    ),
 1306    (   rb_update(BNHash, Hash, C0, C, BNHash1)
 1307    ->  C is C0+1
 1308    ;   C = 0,
 1309        rb_insert(BNHash, Hash, C, BNHash1)
 1310    ),
 1311    set_bnode_hash_of_tw_state(BNHash1, State),
 1312    format(atom(Ref), 'bn_~w_~d', [Hash, C]).
 1313
 1314%!  bnode_hash(+BNode, -Hash) is det.
 1315%
 1316%   Hash is the hash-value for a bnode.
 1317%
 1318%   @tbd: Hash on content.
 1319
 1320bnode_hash(BNode, Hash) :-
 1321    term_hash(BNode, Hash).
 1322
 1323
 1324                 /*******************************
 1325                 *           PRIMITIVES         *
 1326                 *******************************/
 1327
 1328%!  tw_resource(+Resource, +State, +Out) is det.
 1329%
 1330%   Write a resource
 1331
 1332tw_resource(BNodeID, _, Out) :-
 1333    BNodeID = bnode(_),
 1334    !,
 1335    tw_bnode_ref(BNodeID, Out).
 1336tw_resource(Resource, State, Out) :-
 1337    tw_state_prefix_map(State, PrefixMap),
 1338    member(Prefix-Full, PrefixMap),
 1339    atom_concat(Full, Name, Resource),
 1340    (   turtle:turtle_pn_local(Name)
 1341    ->  true
 1342    ;   Name == ''
 1343    ),
 1344    !,
 1345    format(Out, '~w:', [Prefix]),
 1346    turtle:turtle_write_pn_local(Out, Name).
 1347tw_resource(Resource, State, Out) :-
 1348    tw_relative_uri(Resource, State, Out).
 1349
 1350
 1351tw_relative_uri(Resource, State, Out) :-
 1352    tw_state_base_root(State, Root),
 1353    atom(Root),
 1354    atom_concat(Root, ResPath, Resource),
 1355    sub_atom(ResPath, 0, _, _, /),
 1356    tw_state_base_path(State, BasePath),
 1357    relative_path(ResPath, BasePath, RelPath),
 1358    !,
 1359    turtle:turtle_write_uri(Out, RelPath).
 1360tw_relative_uri(Resource, _, Out) :-
 1361    turtle:turtle_write_uri(Out, Resource).
 1362
 1363relative_path(Path, RelTo, RelPath) :-
 1364    atomic_list_concat(PL, /, Path),
 1365    atomic_list_concat(RL, /, RelTo),
 1366    delete_common_prefix(PL, RL, PL1, PL2),
 1367    to_dot_dot(PL2, DotDot, PL1),
 1368    atomic_list_concat(DotDot, /, RelPath).
 1369
 1370delete_common_prefix([H|T01], [H|T02], T1, T2) :-
 1371    !,
 1372    delete_common_prefix(T01, T02, T1, T2).
 1373delete_common_prefix(T1, T2, T1, T2).
 1374
 1375to_dot_dot([], Tail, Tail).
 1376to_dot_dot([_], Tail, Tail) :- !.
 1377to_dot_dot([_|T0], ['..'|T], Tail) :-
 1378    to_dot_dot(T0, T, Tail).
 1379
 1380
 1381%!  tw_literal(+Literal, +State, +Out) is det.
 1382%
 1383%   Write a literal value to the stream Out.
 1384
 1385tw_literal(literal(type(Type, Value)), State, Out) :-
 1386    !,
 1387    tw_typed_literal(Type, Value, State, Out).
 1388tw_literal(literal(lang(Lang, Value)), State, Out) :-
 1389    !,
 1390    tw_quoted_string(Value, State, Out),
 1391    downcase_atom(Lang, TurtleLang),        % Turtle lang = [a-z]+('-'[a-z0-9]+)*
 1392    format(Out, '@~w', [TurtleLang]).
 1393tw_literal(literal(Value), State, Out) :-
 1394    atom(Value),
 1395    !,
 1396    rdf_equal(xsd:string, TypeString),
 1397    tw_typed_literal(TypeString, Value, State, Out).
 1398                                                % Add types automatically
 1399tw_literal(literal(Value), State, Out) :-
 1400    integer(Value),
 1401    !,
 1402    rdf_equal(Type, xsd:integer),
 1403    tw_typed_literal(Type, Value, State, Out).
 1404tw_literal(literal(Value), State, Out) :-
 1405    float(Value),
 1406    !,
 1407    rdf_equal(Type, xsd:double),
 1408    tw_typed_literal(Type, Value, State, Out).
 1409tw_literal(literal(Value), State, Out) :-
 1410    xml_is_dom(Value),
 1411    !,
 1412    rdf_equal(Type, rdf:'XMLLiteral'),
 1413    tw_typed_literal(Type, Value, State, Out).
 1414tw_literal(Literal, _State, _Out) :-
 1415    type_error(rdf_literal, Literal).
 1416
 1417
 1418tw_typed_literal(Type, Value, State, Out) :-
 1419    tw_state_abbreviate_literals(State, true),
 1420    tw_abbreviated_literal(Type, Value, State, Out),
 1421    !.
 1422tw_typed_literal(Type, Value, State, Out) :-
 1423    (atom(Value) ; string(Value)),
 1424    !,
 1425    tw_quoted_string(Value, State, Out),
 1426    write(Out, '^^'),
 1427    tw_resource(Type, State, Out).
 1428tw_typed_literal(Type, Value, State, Out) :-
 1429    rdf_equal(Type, rdf:'XMLLiteral'),
 1430    !,
 1431    with_output_to(string(Tmp),
 1432                   xml_write(Value, [header(false)])),
 1433    tw_quoted_string(Tmp, State, Out),
 1434    write(Out, '^^'),
 1435    tw_resource(Type, State, Out).
 1436tw_typed_literal(Type, Value, State, Out) :-
 1437    format(string(Tmp), '~q', [Value]),
 1438    tw_quoted_string(Tmp, State, Out),
 1439    write(Out, '^^'),
 1440    tw_resource(Type, State, Out).
 1441
 1442
 1443%!  tw_abbreviated_literal(+Type, +Value, +State, +Out) is semidet.
 1444%
 1445%   Turtle abbreviated typed literals.
 1446%
 1447%   @tbd:   Deal with canonical forms (or is this a task of the
 1448%           RDF parser?
 1449%   @tbd:   What if the value is not in the lexical space of the type?
 1450
 1451term_expansion((tw_abbreviated_literal(NS:Local, Value, State, Out) :- Body),
 1452               (tw_abbreviated_literal(Type, Value, State, Out) :- Body)) :-
 1453    atom(NS),
 1454    rdf_global_id(NS:Local, Type).
 1455
 1456tw_abbreviated_literal(xsd:integer, Value, State, Out) :-
 1457    (   tw_state_canonize_numbers(State, false)
 1458    ->  write(Out, Value)
 1459    ;   atom_number(Value, Int),
 1460        format(Out, '~d', [Int])
 1461    ).
 1462tw_abbreviated_literal(xsd:double, Value, State, Out) :-
 1463    (   tw_state_canonize_numbers(State, false)
 1464    ->  write(Out, Value)
 1465    ;   ValueF is float(Value),
 1466        xsd_number_string(ValueF, FloatS),
 1467        format(Out, '~s', [FloatS])
 1468    ).
 1469tw_abbreviated_literal(xsd:string, Value, State, Out) :-
 1470    tw_quoted_string(Value, State, Out).
 1471tw_abbreviated_literal(xsd:decimal, Value, _, Out) :-
 1472    format(Out, '~w', [Value]).
 1473tw_abbreviated_literal(xsd:boolean, Value, _, Out) :-
 1474    format(Out, '~w', [Value]).
 1475
 1476
 1477%!  tw_quoted_string(+Atom, +State, +Out) is det.
 1478%
 1479%   Write  Atom  to  Out  as  a  quoted  string.  We  only  use  the
 1480%   single-"..." representation.
 1481
 1482tw_quoted_string(Atom, _, Out) :-
 1483    turtle:turtle_write_quoted_string(Out, Atom).
 1484
 1485
 1486                 /*******************************
 1487                 *             COMMENT          *
 1488                 *******************************/
 1489
 1490comment(State, Format, Args, Out) :-
 1491    tw_state_comment(State, true),
 1492    !,
 1493    format(Out, '~n# ', []),
 1494    format(Out, Format, Args),
 1495    format(Out, '~n', []).
 1496comment(_, _, _, _).
 1497
 1498
 1499
 1500                 /*******************************
 1501                 *           STATISTICS         *
 1502                 *******************************/
 1503
 1504inc_triple_count(State, Count) :-
 1505    tw_state_triple_count(State, C0),
 1506    C1 is C0+Count,
 1507    nb_set_triple_count_of_tw_state(C1, State).
 1508
 1509inc_subject_count(State, Count) :-
 1510    tw_state_subject_count(State, C0),
 1511    C1 is C0+Count,
 1512    nb_set_subject_count_of_tw_state(C1, State).
 1513
 1514:- multifile
 1515    prolog:message//1. 1516
 1517prolog:message(rdf(saved(File, Time, SavedSubjects, SavedTriples))) -->
 1518    [ 'Saved ~D triples about ~D subjects into '-[SavedTriples, SavedSubjects] ],
 1519    rdf_output(File),
 1520    [ ' (~3f sec)'-[Time] ].
 1521prolog:message(rdf(saved(File, Time, SavedSubjects, SavedTriples,
 1522                         SavedGraphs))) -->
 1523    [ 'Saved ~D graphs, ~D triples about ~D subjects into '-
 1524      [SavedGraphs, SavedTriples, SavedSubjects] ],
 1525    rdf_output(File),
 1526    [ ' (~3f sec)'-[Time] ].
 1527
 1528rdf_output(StreamSpec) -->
 1529    { (   StreamSpec = stream(Stream)
 1530      ->  true
 1531      ;   Stream = StreamSpec
 1532      ),
 1533      is_stream(Stream),
 1534      stream_property(Stream, file_name(File))
 1535    },
 1536    !,
 1537    [ '~p'-[File] ].
 1538rdf_output(File) -->
 1539    [ '~p'-[File] ]