View source with formatted comments or as raw
    1/*  Part of SWI-Prolog
    2
    3    Author:        Jan Wielemaker
    4    E-mail:        J.Wielemaker@vu.nl
    5    WWW:           http://www.swi-prolog.org
    6    Copyright (c)  2013-2015, University of Amsterdam
    7                              VU University Amsterdam
    8    All rights reserved.
    9
   10    Redistribution and use in source and binary forms, with or without
   11    modification, are permitted provided that the following conditions
   12    are met:
   13
   14    1. Redistributions of source code must retain the above copyright
   15       notice, this list of conditions and the following disclaimer.
   16
   17    2. Redistributions in binary form must reproduce the above copyright
   18       notice, this list of conditions and the following disclaimer in
   19       the documentation and/or other materials provided with the
   20       distribution.
   21
   22    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
   23    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
   24    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
   25    FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
   26    COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
   27    INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
   28    BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
   29    LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
   30    CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   31    LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
   32    ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
   33    POSSIBILITY OF SUCH DAMAGE.
   34*/
   35
   36:- module(turtle,
   37          [ rdf_load_turtle/3,                  % +Input, -Triples, +Options
   38            rdf_read_turtle/3,                  % +Input, -Triples, +Options
   39            rdf_process_turtle/3,               % +Input, :OnObject, +Options
   40                                                % re-exports
   41            rdf_save_turtle/2,                  % +File, +Options
   42            rdf_save_canonical_turtle/2,        % +File, +Options
   43            rdf_save_trig/2,                    % +File, +Options
   44            rdf_save_canonical_trig/2,          % +File, +Options
   45            rdf_save_ntriples/2                 % +File, +Options
   46          ]).   47:- use_module(library(option)).   48:- use_module(library(semweb/rdf_db)).   49:- use_module(library(semweb/rdf_turtle_write)). % re-exports
   50:- use_module(library(uri)).   51:- use_module(library(http/http_open)).   52
   53:- meta_predicate
   54    rdf_process_turtle(+,2,+).   55
   56:- predicate_options(rdf_load_turtle/3, 3,
   57                     [pass_to(rdf_read_turtle/3, 3)]).   58:- predicate_options(rdf_process_turtle/3, 3,
   59                     [ anon_prefix(atom),
   60                       base_uri(atom),
   61                       base_used(-atom),
   62                       db(atom),
   63                       error_count(-integer),
   64                       namespaces(-list),
   65                       on_error(oneof([warning,error])),
   66                       prefixes(-list),
   67                       resources(oneof([uri,iri]))
   68                     ]).   69:- predicate_options(rdf_read_turtle/3, 3,
   70                     [ anon_prefix(atom),
   71                       base_uri(atom),
   72                       base_used(-atom),
   73                       db(atom),
   74                       error_count(-integer),
   75                       namespaces(-list),
   76                       on_error(oneof([warning,error])),
   77                       prefixes(-list),
   78                       resources(oneof([uri,iri]))
   79                     ]).   80
   81:- use_foreign_library(foreign(turtle)).   82:- public                               % used by the writer
   83    turtle_pn_local/1,
   84    turtle_write_quoted_string/2,
   85    turtle_write_uri/2.   86
   87/** <module> Turtle: Terse RDF Triple Language
   88
   89This module implements the Turtle  language   for  representing  the RDF
   90triple model as defined by Dave Beckett  from the Institute for Learning
   91and Research Technology University of Bristol  and later standardized by
   92the W3C RDF working group.
   93
   94This module acts as a plugin to   rdf_load/2,  for processing files with
   95one of the extensions =|.ttl|= or =|.n3|=.
   96
   97@see    http://www.w3.org/TR/turtle/ (used W3C Recommendation 25
   98        February 2014)
   99*/
  100
  101%!  rdf_read_turtle(+Input, -Triples, +Options)
  102%
  103%   Read a stream or file into a set of triples or quadruples (if
  104%   faced with TRiG input) of the format
  105%
  106%           rdf(Subject, Predicate, Object [, Graph])
  107%
  108%   The representation is consistent with the SWI-Prolog RDF/XML
  109%   and ntriples parsers.  Provided options are:
  110%
  111%           * base_uri(+BaseURI)
  112%           Initial base URI.  Defaults to file://<file> for loading
  113%           files.
  114%
  115%           * anon_prefix(+Prefix)
  116%           Blank nodes are generated as <Prefix>1, <Prefix>2, etc.
  117%           If Prefix is not an atom blank nodes are generated as
  118%           node(1), node(2), ...
  119%
  120%           * format(+Format)
  121%           One of =auto= (default), =turtle= or =trig=.  The
  122%           auto mode switches to TRiG format of there is a
  123%           =|{|= before the first triple.  Finally, of the
  124%           format is explicitly stated as =turtle= and the
  125%           file appears to be a TRiG file, a warning is printed
  126%           and the data is loaded while ignoring the graphs.
  127%
  128%           * resources(URIorIRI)
  129%           Officially, Turtle resources are IRIs.  Quite a
  130%           few applications however send URIs.  By default we
  131%           do URI->IRI mapping because this rarely causes errors.
  132%           To force strictly conforming mode, pass =iri=.
  133%
  134%           * prefixes(-Pairs)
  135%           Return encountered prefix declarations as a
  136%           list of Alias-URI
  137%
  138%           * namespaces(-Pairs)
  139%           Same as prefixes(Pairs).  Compatibility to rdf_load/2.
  140%
  141%           * base_used(-Base)
  142%           Base URI used for processing the data.  Unified to
  143%           [] if there is no base-uri.
  144%
  145%           * on_error(+ErrorMode)
  146%           In =warning= (default), print the error and continue
  147%           parsing the remainder of the file.  If =error=, abort
  148%           with an exception on the first error encountered.
  149%
  150%           * error_count(-Count)
  151%           If on_error(warning) is active, this option cane be
  152%           used to retrieve the number of generated errors.
  153%
  154%   @param  Input is one of stream(Stream), atom(Atom), a =http=,
  155%           =https= or =file= url or a filename specification as
  156%           accepted by absolute_file_name/3.
  157
  158rdf_read_turtle(In, Triples, Options) :-
  159    base_uri(In, BaseURI, Options),
  160    setup_call_cleanup(
  161        ( open_input(In, Stream, Close),
  162          create_turtle_parser(Parser, Stream,
  163                               [ base_uri(BaseURI)
  164                               | Options
  165                               ])
  166        ),
  167        ( turtle_parse(Parser, Triples,
  168                       [ parse(document)
  169                       | Options
  170                       ]),
  171          post_options(Parser, Options)
  172        ),
  173        ( destroy_turtle_parser(Parser),
  174          call(Close)
  175        )).
  176
  177%!  rdf_load_turtle(+Input, -Triples, +Options)
  178%
  179%   @deprecated Use rdf_read_turtle/3
  180
  181rdf_load_turtle(Input, Triples, Options) :-
  182    rdf_read_turtle(Input, Triples, Options).
  183
  184
  185%!  rdf_process_turtle(+Input, :OnObject, +Options) is det.
  186%
  187%   Streaming  Turtle  parser.  The  predicate  rdf_process_turtle/3
  188%   processes Turtle data from Input, calling   OnObject with a list
  189%   of triples for every Turtle _statement_ found in Input. OnObject
  190%   is  called  as  below,  where  `ListOfTriples`   is  a  list  of
  191%   rdf(S,P,O) terms for a normal Turtle  file or rdf(S,P,O,G) terms
  192%   if the =GRAPH= keyword is used to  associate a set of triples in
  193%   the document with  a  particular   graph.  The  `Graph` argument
  194%   provides the default graph for storing the triples and _Line_ is
  195%   the line number where the statement started.
  196%
  197%     ==
  198%     call(OnObject, ListOfTriples, Graph:Line)
  199%     ==
  200%
  201%   This predicate supports the same Options as rdf_load_turtle/3.
  202%
  203%   Errors encountered are sent to  print_message/2, after which the
  204%   parser tries to recover and parse the remainder of the data.
  205%
  206%   @see  This  predicate  is  normally    used  by  load_rdf/2  for
  207%   processing RDF data.
  208
  209rdf_process_turtle(In, OnObject, Options) :-
  210    base_uri(In, BaseURI, Options),
  211    option(graph(Graph), Options, BaseURI),
  212    setup_call_cleanup(
  213        ( open_input(In, Stream, Close),
  214          create_turtle_parser(Parser, Stream, Options)
  215        ),
  216        ( process_turtle(Parser, Stream, OnObject, Graph,
  217                         [ parse(statement)
  218                         ]),
  219          post_options(Parser, Options)
  220        ),
  221        ( destroy_turtle_parser(Parser),
  222          call(Close)
  223        )).
  224
  225post_options(Parser, Options) :-
  226    prefix_option(Parser, Options),
  227    namespace_option(Parser, Options),
  228    base_option(Parser, Options),
  229    error_option(Parser, Options).
  230
  231prefix_option(Parser, Options) :-
  232    (   option(prefixes(Pairs), Options)
  233    ->  turtle_prefixes(Parser, Pairs)
  234    ;   true
  235    ).
  236namespace_option(Parser, Options) :-
  237    (   option(namespaces(Pairs), Options)
  238    ->  turtle_prefixes(Parser, Pairs)
  239    ;   true
  240    ).
  241base_option(Parser, Options) :-
  242    (   option(base_used(Base), Options)
  243    ->  turtle_base(Parser, Base)
  244    ;   true
  245    ).
  246error_option(Parser, Options) :-
  247    (   option(error_count(Count), Options)
  248    ->  turtle_error_count(Parser, Count)
  249    ;   true
  250    ).
  251
  252
  253process_turtle(_Parser, Stream, _OnObject, _Graph, _Options) :-
  254    at_end_of_stream(Stream),
  255    !.
  256process_turtle(Parser, Stream, OnObject, Graph, Options) :-
  257    stream_pair(Stream, In, _),
  258    line_count(In, LineNo),
  259    turtle_parse(Parser, Triples,
  260                 [ parse(statement)
  261                 | Options
  262                 ]),
  263    call(OnObject, Triples, Graph:LineNo),
  264    process_turtle(Parser, Stream, OnObject, Graph, Options).
  265
  266
  267%!  open_input(+Input, -Stream, -Close) is det.
  268%
  269%   Open given input.
  270%
  271%   @param  Close goal to undo the open action
  272%   @tbd    Synchronize with input handling of rdf_db.pl.
  273%   @error  existence_error, permission_error
  274
  275open_input(stream(Stream), Stream, Close) :-
  276    !,
  277    stream_property(Stream, encoding(Old)),
  278    (   unicode_encoding(Old)
  279    ->  Close = true
  280    ;   set_stream(Stream, encoding(utf8)),
  281        Close = set_stream(Stream, encoding(Old))
  282    ).
  283open_input(Stream, Stream, Close) :-
  284    is_stream(Stream),
  285    !,
  286    open_input(stream(Stream), Stream, Close).
  287open_input(atom(Atom), Stream, close(Stream)) :-
  288    !,
  289    atom_to_memory_file(Atom, MF),
  290    open_memory_file(MF, read, Stream, [free_on_close(true)]).
  291open_input(URL, Stream, close(Stream)) :-
  292    (   sub_atom(URL, 0, _, _, 'http://')
  293    ;   sub_atom(URL, 0, _, _, 'https://')
  294    ),
  295    !,
  296    http_open(URL, Stream, []),
  297    set_stream(Stream, encoding(utf8)).
  298open_input(URL, Stream, close(Stream)) :-
  299    uri_file_name(URL, Path),
  300    !,
  301    open(Path, read, Stream, [encoding(utf8)]).
  302open_input(File, Stream, close(Stream)) :-
  303    absolute_file_name(File, Path,
  304                       [ access(read),
  305                         extensions([ttl, ''])
  306                       ]),
  307    open(Path, read, Stream, [encoding(utf8)]).
  308
  309unicode_encoding(utf8).
  310unicode_encoding(wchar_t).
  311unicode_encoding(unicode_be).
  312unicode_encoding(unicode_le).
  313
  314%!  base_uri(+Input, -BaseURI, +Options)
  315%
  316%   Determine the base uri to use for processing.
  317
  318base_uri(_Input, BaseURI, Options) :-
  319    option(base_uri(BaseURI), Options),
  320    !.
  321base_uri(_Input, BaseURI, Options) :-
  322    option(graph(BaseURI), Options),
  323    !.
  324base_uri(stream(Input), BaseURI, _Options) :-
  325    stream_property(Input, file_name(Name)),
  326    !,
  327    name_uri(Name, BaseURI).
  328base_uri(Stream, BaseURI, Options) :-
  329    is_stream(Stream),
  330    !,
  331    base_uri(stream(Stream), BaseURI, Options).
  332base_uri(Name, BaseURI, _Options) :-
  333    atom(Name),
  334    !,
  335    name_uri(Name, BaseURI).
  336base_uri(_, 'http://www.example.com/', _).
  337
  338name_uri(Name, BaseURI) :-
  339    uri_is_global(Name),
  340    !,
  341    uri_normalized(Name, BaseURI).
  342name_uri(Name, BaseURI) :-
  343    uri_file_name(BaseURI, Name).
  344
  345
  346                 /*******************************
  347                 *          WRITE SUPPORT       *
  348                 *******************************/
  349
  350%!  turtle_pn_local(+Atom:atom) is semidet.
  351%
  352%   True if Atom is a  valid   Turtle  _PN_LOCAL_ name. The PN_LOCAL
  353%   name is what can follow the : in  a resource. In the new Turtle,
  354%   this can be anything and this   function becomes meaningless. In
  355%   the old turtle, PN_LOCAL is defined   similar (but not equal) to
  356%   an XML name. This predicate  is   used  by  rdf_save_turtle/2 to
  357%   write files such that can be read by old parsers.
  358%
  359%   @see xml_name/2.
  360
  361%!  turtle_write_quoted_string(+Out, +Value, ?WriteLong) is det.
  362%
  363%   Write Value (an atom)  as  a   valid  Turtle  string.  WriteLong
  364%   determines wether the string is written   as a _short_ or _long_
  365%   string.  It takes the following values:
  366%
  367%     * true
  368%     Use Turtle's long string syntax. Embeded newlines and
  369%     single or double quotes are are emitted verbatim.
  370%     * false
  371%     Use Turtle's short string syntax.
  372%     * Var
  373%     If WriteLong is unbound, this predicate uses long syntax
  374%     if newlines appear in the string and short otherwise.  WriteLong
  375%     is unified with the decision taken.
  376
  377%!  turtle_write_quoted_string(+Out, +Value) is det.
  378%
  379%   Same as turtle_write_quoted_string(Out, Value, false), writing a
  380%   string with only a single =|"|=.   Embedded newlines are escapes
  381%   as =|\n|=.
  382
  383turtle_write_quoted_string(Out, Text) :-
  384    turtle_write_quoted_string(Out, Text, false).
  385
  386%!  turtle_write_uri(+Out, +Value) is det.
  387%
  388%   Write a URI as =|<...>|=
  389
  390
  391                 /*******************************
  392                 *          RDF-DB HOOK         *
  393                 *******************************/
  394
  395:- multifile
  396    rdf_db:rdf_load_stream/3,
  397    rdf_db:rdf_file_type/2.  398
  399%!  rdf_db:rdf_load_stream(+Format, +Stream, :Options)
  400%
  401%   (Turtle clauses)
  402
  403rdf_db:rdf_load_stream(turtle, Stream, Options) :-
  404    load_turtle_stream(Stream, Options).
  405rdf_db:rdf_load_stream(trig, Stream, Options) :-
  406    load_turtle_stream(Stream, Options).
  407
  408load_turtle_stream(Stream, _Module:Options) :-
  409    rdf_db:graph(Options, Graph),
  410    atom_concat('_:', Graph, BNodePrefix),
  411    rdf_transaction((  rdf_process_turtle(Stream, assert_triples,
  412                                          [ anon_prefix(BNodePrefix)
  413                                          | Options
  414                                          ]),
  415                       rdf_set_graph(Graph, modified(false))
  416                    ),
  417                    parse(Graph)).
  418
  419assert_triples([], _).
  420assert_triples([H|T], Location) :-
  421    assert_triple(H, Location),
  422    assert_triples(T, Location).
  423
  424assert_triple(rdf(S,P,O), Location) :-
  425    rdf_assert(S,P,O,Location).
  426assert_triple(rdf(S,P,O,G), _) :-
  427    rdf_assert(S,P,O,G).
  428
  429
  430rdf_db:rdf_file_type(ttl,  turtle).
  431rdf_db:rdf_file_type(n3,   turtle).     % not really, but good enough
  432rdf_db:rdf_file_type(trig, trig).
  433
  434
  435                 /*******************************
  436                 *             MESSAGES         *
  437                 *******************************/
  438
  439:- multifile prolog:error_message//1.  440
  441prolog:error_message(existence_error(turtle_prefix, '')) -->
  442    [ 'Turtle empty prefix (:) is not defined' ]