View source with raw comments or as raw
    1/*  Part of SWI-Prolog
    2
    3    Author:        Jan Wielemaker
    4    E-mail:        J.Wielemaker@vu.nl
    5    WWW:           http://www.swi-prolog.org
    6    Copyright (c)  2013-2015, VU University Amsterdam
    7    All rights reserved.
    8
    9    Redistribution and use in source and binary forms, with or without
   10    modification, are permitted provided that the following conditions
   11    are met:
   12
   13    1. Redistributions of source code must retain the above copyright
   14       notice, this list of conditions and the following disclaimer.
   15
   16    2. Redistributions in binary form must reproduce the above copyright
   17       notice, this list of conditions and the following disclaimer in
   18       the documentation and/or other materials provided with the
   19       distribution.
   20
   21    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
   22    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
   23    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
   24    FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
   25    COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
   26    INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
   27    BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
   28    LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
   29    CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   30    LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
   31    ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
   32    POSSIBILITY OF SUCH DAMAGE.
   33*/
   34
   35:- module(rdf_ntriples,
   36          [ rdf_read_ntriples/3,        % +Input, -Triples, +Options
   37            rdf_read_nquads/3,          % +Input, -Quads, +Options
   38            rdf_process_ntriples/3,     % +Input, :CallBack, +Options
   39
   40            read_ntriple/2,             % +Stream, -Triple
   41            read_nquad/2,               % +Stream, -Quad
   42            read_ntuple/2               % +Stream, -TripleOrQuad
   43          ]).   44:- use_module(library(record)).   45:- use_module(library(uri)).   46:- use_module(library(option)).   47:- use_module(library(http/http_open)).   48:- use_module(library(semweb/rdf_db)).   49:- use_foreign_library(foreign(ntriples)).

Process files in the RDF N-Triples format

The library(semweb/rdf_ntriples) provides a fast reader for the RDF N-Triples and N-Quads format. N-Triples is a simple format, originally used to support the W3C RDF test suites. The current format has been extended and is a subset of the Turtle format (see library(semweb/turtle)).

The API of this library is almost identical to library(semweb/turtle). This module provides a plugin into rdf_load/2, making this predicate support the format ntriples and nquads.

See also
- http://www.w3.org/TR/n-triples/
To be done
- Sync with RDF 1.1. specification. */
   67:- predicate_options(rdf_read_ntriples/3, 3,
   68                     [ anon_prefix(any), % atom or node(_)
   69                       base_uri(atom),
   70                       error_count(-integer),
   71                       on_error(oneof([warning,error]))
   72                     ]).   73:- predicate_options(rdf_read_nquads/3, 3,
   74                     [ anon_prefix(any), % atom or node(_)
   75                       base_uri(atom),
   76                       error_count(-integer),
   77                       on_error(oneof([warning,error])),
   78                       graph(atom)
   79                     ]).   80:- predicate_options(rdf_process_ntriples/3, 3,
   81                     [ graph(atom),
   82                       pass_to(rdf_read_ntriples/3, 3)
   83                     ]).   84
   85:- meta_predicate
   86    rdf_process_ntriples(+,2,+).
 read_ntriple(+Stream, -Triple) is det
Read the next triple from Stream as Triple. Stream must have UTF-8 encoding.
Arguments:
Triple- is a term triple(Subject,Predicate,Object). Arguments follow the normal conventions of the RDF libraries. NodeID elements are mapped to node(Id). If end-of-file is reached, Triple is unified with end_of_file.
Errors
- syntax_error(Message) on syntax errors
 read_nquad(+Stream, -Quad) is det
Read the next quad from Stream as Quad. Stream must have UTF-8 encoding.
Arguments:
Quad- is a term quad(Subject,Predicate,Object,Graph). Arguments follow the normal conventions of the RDF libraries. NodeID elements are mapped to node(Id). If end-of-file is reached, Quad is unified with end_of_file.
Errors
- syntax_error(Message) on syntax errors
 read_ntuple(+Stream, -Tuple) is det
Read the next triple or quad from Stream as Tuple. Tuple is one of the terms below. See read_ntriple/2 and read_nquad/2 for details.
  122:- record nt_state(anon_prefix,
  123               graph,
  124               on_error:oneof([warning,error])=warning,
  125               format:oneof([ntriples,nquads]),
  126               error_count=0).
 rdf_read_ntriples(+Input, -Triples, +Options) is det
 rdf_read_nquads(+Input, -Quads, +Options) is det
True when Triples/Quads is a list of triples/quads from Input. Options:
anon_prefix(+AtomOrNode)
Prefix nodeIDs with this atom. If AtomOrNode is the term node(_), bnodes are returned as node(Id).
base_uri(+Atom)
Defines the default anon_prefix as _:<baseuri>_
on_error(Action)
One of warning (default) or error
error_count(-Count)
If on_error is warning, unify Count with th number of errors.
graph(+Graph)
For rdf_read_nquads/3, this defines the graph associated to triples loaded from the input. For rdf_read_ntriples/3 this opion is ignored.
Arguments:
Triples- is a list of rdf(Subject, Predicate, Object)
Quads- is a list of rdf(Subject, Predicate, Object, Graph)
  153rdf_read_ntriples(Input, Triples, Options) :-
  154    rdf_read_ntuples(Input, Triples, [format(ntriples)|Options]).
  155
  156rdf_read_nquads(Input, Triples, Options) :-
  157    rdf_read_ntuples(Input, Triples, [format(nquads)|Options]).
  158
  159
  160rdf_read_ntuples(Input, Triples, Options) :-
  161    setup_call_cleanup(
  162        open_input(Input, Stream, Close),
  163        (   init_state(Input, Options, State0),
  164            read_ntuples(Stream, Triples, State0, State)
  165        ),
  166        Close),
  167    option(error_count(Count), Options, _),
  168    nt_state_error_count(State, Count).
 rdf_process_ntriples(+Input, :CallBack, +Options)
Call-back interface, compatible with the other triple readers. In addition to the options from rdf_read_ntriples/3, this processes the option graph(Graph).
Arguments:
CallBack- is called as call(CallBack, Triples, Graph), where Triples is a list holding a single rdf(S,P,O) triple. Graph is passed from the graph option and unbound if this option is omitted.
  181rdf_process_ntriples(Input, CallBack, Options) :-
  182    setup_call_cleanup(
  183        open_input(Input, Stream, Close),
  184        (   init_state(Input, Options, State0),
  185            process_ntriple(Stream, CallBack, State0, State)
  186        ),
  187        Close),
  188    option(error_count(Count), Options, _),
  189    nt_state_error_count(State, Count).
 read_ntuples(+Stream, -Triples, +State0, -State)
  194read_ntuples(Stream, Triples, State0, State) :-
  195    read_ntuple(Stream, Triple0, State0, State1),
  196    (   Triple0 == end_of_file
  197    ->  Triples = [],
  198        State = State1
  199    ;   map_nodes(Triple0, Triple, State1, State2),
  200        Triples = [Triple|More],
  201        read_ntuples(Stream, More, State2, State)
  202    ).
 process_ntriple(+Stream, :CallBack, +State0, -State)
  206process_ntriple(Stream, CallBack, State0, State) :-
  207    read_ntuple(Stream, Triple0, State0, State1),
  208    (   Triple0 == end_of_file
  209    ->  State = State1
  210    ;   map_nodes(Triple0, Triple, State1, State2),
  211        nt_state_graph(State2, Graph),
  212        call(CallBack, [Triple], Graph),
  213        process_ntriple(Stream, CallBack, State2, State)
  214    ).
 read_ntuple(+Stream, -Tuple, +State0, -State) is det
True when Tuple is the next triple on Stream. May increment the error count on State.
  221read_ntuple(Stream, Triple, State0, State) :-
  222    nt_state_on_error(State0, error),
  223    !,
  224    read_ntuple(Stream, Triple, State0),
  225    State = State0.
  226read_ntuple(Stream, Triple, State0, State) :-
  227    catch(read_ntuple(Stream, Triple, State0), E, true),
  228    (   var(E)
  229    ->  State = State0
  230    ;   print_message(warning, E),
  231        nt_state_error_count(State0, EC0),
  232        EC is EC0+1,
  233        set_error_count_of_nt_state(EC, State0, State1),
  234        read_ntuple(Stream, Triple, State1, State)
  235    ).
  236
  237read_ntuple(Stream, Triple, State0) :-
  238    nt_state_format(State0, Format),
  239    format_read_ntuple(Format, Stream, Triple, State0).
  240
  241format_read_ntuple(ntriples, Stream, Triple, _) :-
  242    !,
  243    read_ntriple(Stream, Triple).
  244format_read_ntuple(nquads, Stream, Quad, State) :-
  245    !,
  246    read_ntuple(Stream, Tuple),
  247    to_quad(Tuple, Quad, State).
  248
  249to_quad(Quad, Quad, _) :-
  250    functor(Quad, quad, 4),
  251    !.
  252to_quad(triple(S,P,O), quad(S,P,O,Graph), State) :-
  253    nt_state_graph(State, Graph).
  254to_quad(end_of_file, end_of_file, _).
  255
  256
  257map_nodes(triple(S0,P0,O0), rdf(S,P,O), State0, State) :-
  258    map_node(S0, S, State0, State1),
  259    map_node(P0, P, State1, State2),
  260    map_node(O0, O, State2, State).
  261map_nodes(quad(S0,P0,O0,G0), rdf(S,P,O,G), State0, State) :-
  262    map_node(S0, S, State0, State1),
  263    map_node(P0, P, State1, State2),
  264    map_node(O0, O, State2, State3),
  265    map_node(G0, G, State3, State).
  266
  267map_node(node(NodeId), BNode, State, State) :-
  268    nt_state_anon_prefix(State, Prefix),
  269    atom(Prefix),
  270    !,
  271    atom_concat(Prefix, NodeId, BNode).
  272map_node(Node, Node, State, State).
 open_input(+Input, -Stream, -Close) is det
Open input for reading ntriples. The default encoding is UTF-8. If the input has a different encoding, Input must be a stream with the correct encoding and the stream type must be text.
  281open_input(stream(Stream), Stream, Close) :-
  282    !,
  283    (   stream_property(Stream, type(binary))
  284    ->  set_stream(Stream, encoding(utf8)),
  285        Close = set_stream(Stream, type(binary))
  286    ;   stream_property(Stream, encoding(Old)),
  287        (   n3_encoding(Old)
  288        ->  true
  289        ;   domain_error(ntriples_encoding, Old)
  290        ),
  291        Close = true
  292    ).
  293open_input(Stream, Stream, Close) :-
  294    is_stream(Stream),
  295    !,
  296    open_input(stream(Stream), Stream, Close).
  297open_input(atom(Atom), Stream, close(Stream)) :-
  298    !,
  299    atom_to_memory_file(Atom, MF),
  300    open_memory_file(MF, read, Stream, [free_on_close(true)]).
  301open_input(URL, Stream, close(Stream)) :-
  302    (   sub_atom(URL, 0, _, _, 'http://')
  303    ;   sub_atom(URL, 0, _, _, 'https://')
  304    ),
  305    !,
  306    http_open(URL, Stream, []),
  307    set_stream(Stream, encoding(utf8)).
  308open_input(URL, Stream, close(Stream)) :-
  309    uri_file_name(URL, Path),
  310    !,
  311    open(Path, read, Stream, [encoding(utf8)]).
  312open_input(File, Stream, close(Stream)) :-
  313    absolute_file_name(File, Path,
  314                       [ access(read),
  315                         extensions(['', nt, ntriples])
  316                       ]),
  317    open(Path, read, Stream, [encoding(utf8)]).
  318
  319n3_encoding(octet).
  320n3_encoding(ascii).
  321n3_encoding(iso_latin_1).
  322n3_encoding(utf8).
  323n3_encoding(text).
 init_state(+Input, +Options, -State) is det
  327init_state(In, Options, State) :-
  328    (   option(base_uri(BaseURI), Options)
  329    ->  true
  330    ;   In = stream(_)
  331    ->  BaseURI = []
  332    ;   is_stream(In)
  333    ->  BaseURI = []
  334    ;   In = atom(_)
  335    ->  BaseURI = []
  336    ;   uri_is_global(In),
  337        \+ is_absolute_file_name(In)        % Avoid C:Path in Windows
  338    ->  uri_normalized(In, BaseURI)
  339    ;   uri_file_name(BaseURI, In)
  340    ),
  341    (   option(anon_prefix(Prefix), Options)
  342    ->  true
  343    ;   BaseURI == []
  344    ->  Prefix = '_:genid'
  345    ;   atom_concat('_:', BaseURI, Prefix)
  346    ),
  347    option(on_error(OnError), Options, warning),
  348    % If the format is not set explicitly we assume N-Triples.
  349    % The format option _must_ be set before make_nt_state/2.
  350    option(format(Format), Options, ntriples),
  351    rdf_db:graph(Options, Graph),
  352    (   var(Graph)
  353    ->  Graph = user
  354    ;   true
  355    ),
  356    make_nt_state([ anon_prefix(Prefix),
  357                    on_error(OnError),
  358                    format(Format),
  359                    graph(Graph)
  360                  ], State).
  361
  362
  363                 /*******************************
  364                 *          RDF-DB HOOK         *
  365                 *******************************/
  366
  367:- multifile
  368    rdf_db:rdf_load_stream/3,
  369    rdf_db:rdf_file_type/2.
 rdf_db:rdf_load_stream(+Format, +Stream, :Options) is semidet
Plugin rule that supports loading the ntriples and nquads formats.
  376rdf_db:rdf_load_stream(ntriples, Stream, _Module:Options) :-
  377    rdf_db:graph(Options, Graph),
  378    rdf_transaction((  rdf_process_ntriples(Stream, assert_tuples, Options),
  379                       rdf_set_graph(Graph, modified(false))
  380                    ),
  381                    parse(Graph)).
  382rdf_db:rdf_load_stream(nquads, Stream, _Module:Options) :-
  383    rdf_db:graph(Options, Graph),
  384    (   var(Graph)
  385    ->  Graph = user
  386    ;   true
  387    ),
  388    rdf_transaction((  rdf_process_ntriples(Stream, assert_tuples, Options),
  389                       rdf_set_graph(Graph, modified(false))
  390                    ),
  391                    parse(Graph)).
  392
  393assert_tuples([], _).
  394assert_tuples([H|T], Graph) :-
  395    assert_tuple(H, Graph),
  396    assert_tuples(T, Graph).
  397
  398assert_tuple(rdf(S,P,O), Graph) :-
  399    rdf_assert(S,P,O,Graph).
  400assert_tuple(rdf(S,P,O,Graph), _) :-
  401    rdf_assert(S,P,O,Graph).
 rdf_db:rdf_file_type(+Extension, -Format)
Bind the ntriples reader to files with the extensions nt, ntriples and nquads.
  409rdf_db:rdf_file_type(nt,       ntriples).
  410rdf_db:rdf_file_type(ntriples, ntriples).
  411rdf_db:rdf_file_type(nq,       nquads).
  412rdf_db:rdf_file_type(nquads,   nquads)