View source with formatted comments or as raw
    1/*  Part of SWI-Prolog
    2
    3    Author:        Jan Wielemaker
    4    E-mail:        J.Wielemaker@vu.nl
    5    WWW:           http://www.swi-prolog.org
    6    Copyright (c)  2013-2015, VU University Amsterdam
    7    All rights reserved.
    8
    9    Redistribution and use in source and binary forms, with or without
   10    modification, are permitted provided that the following conditions
   11    are met:
   12
   13    1. Redistributions of source code must retain the above copyright
   14       notice, this list of conditions and the following disclaimer.
   15
   16    2. Redistributions in binary form must reproduce the above copyright
   17       notice, this list of conditions and the following disclaimer in
   18       the documentation and/or other materials provided with the
   19       distribution.
   20
   21    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
   22    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
   23    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
   24    FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
   25    COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
   26    INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
   27    BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
   28    LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
   29    CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   30    LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
   31    ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
   32    POSSIBILITY OF SUCH DAMAGE.
   33*/
   34
   35:- module(rdf_ntriples,
   36          [ rdf_read_ntriples/3,        % +Input, -Triples, +Options
   37            rdf_read_nquads/3,          % +Input, -Quads, +Options
   38            rdf_process_ntriples/3,     % +Input, :CallBack, +Options
   39
   40            read_ntriple/2,             % +Stream, -Triple
   41            read_nquad/2,               % +Stream, -Quad
   42            read_ntuple/2               % +Stream, -TripleOrQuad
   43          ]).   44:- use_module(library(record)).   45:- use_module(library(uri)).   46:- use_module(library(option)).   47:- use_module(library(http/http_open)).   48:- use_module(library(semweb/rdf_db)).   49:- use_foreign_library(foreign(ntriples)).   50
   51/** <module> Process files in the RDF N-Triples format
   52
   53The library(semweb/rdf_ntriples) provides a  fast   reader  for  the RDF
   54N-Triples and N-Quads format. N-Triples is   a simple format, originally
   55used to support the W3C RDF  test   suites.  The current format has been
   56extended   and   is   a   subset    of     the    Turtle   format   (see
   57library(semweb/turtle)).
   58
   59The API of this library is   almost identical to library(semweb/turtle).
   60This module provides a plugin  into   rdf_load/2,  making this predicate
   61support the format =ntriples= and =nquads=.
   62
   63@see http://www.w3.org/TR/n-triples/
   64@tbd Sync with RDF 1.1. specification.
   65*/
   66
   67:- predicate_options(rdf_read_ntriples/3, 3,
   68                     [ anon_prefix(any), % atom or node(_)
   69                       base_uri(atom),
   70                       error_count(-integer),
   71                       on_error(oneof([warning,error]))
   72                     ]).   73:- predicate_options(rdf_read_nquads/3, 3,
   74                     [ anon_prefix(any), % atom or node(_)
   75                       base_uri(atom),
   76                       error_count(-integer),
   77                       on_error(oneof([warning,error])),
   78                       graph(atom)
   79                     ]).   80:- predicate_options(rdf_process_ntriples/3, 3,
   81                     [ graph(atom),
   82                       pass_to(rdf_read_ntriples/3, 3)
   83                     ]).   84
   85:- meta_predicate
   86    rdf_process_ntriples(+,2,+).   87
   88
   89%!  read_ntriple(+Stream, -Triple) is det.
   90%
   91%   Read the next triple from Stream as Triple. Stream must have UTF-8
   92%   encoding.
   93%
   94%   @param  Triple is a term triple(Subject,Predicate,Object).
   95%           Arguments follow the normal conventions of the RDF
   96%           libraries.  NodeID elements are mapped to node(Id).
   97%           If end-of-file is reached, Triple is unified with
   98%           =end_of_file=.
   99%   @error  syntax_error(Message) on syntax errors
  100
  101%!  read_nquad(+Stream, -Quad) is det.
  102%
  103%   Read the next quad from Stream as Quad.  Stream must have UTF-8
  104%   encoding.
  105%
  106%   @param  Quad is a term quad(Subject,Predicate,Object,Graph).
  107%           Arguments follow the normal conventions of the RDF
  108%           libraries.  NodeID elements are mapped to node(Id).
  109%           If end-of-file is reached, Quad is unified with
  110%           =end_of_file=.
  111%   @error  syntax_error(Message) on syntax errors
  112
  113%!  read_ntuple(+Stream, -Tuple) is det.
  114%
  115%   Read the next triple or quad from  Stream as Tuple. Tuple is one
  116%   of the terms below.  See   read_ntriple/2  and  read_nquad/2 for
  117%   details.
  118%
  119%     - triple(Subject,Predicate,Object)
  120%     - quad(Subject,Predicate,Object,Graph).
  121
  122:- record nt_state(anon_prefix,
  123               graph,
  124               on_error:oneof([warning,error])=warning,
  125               format:oneof([ntriples,nquads]),
  126               error_count=0).  127
  128
  129%!  rdf_read_ntriples(+Input, -Triples, +Options) is det.
  130%!  rdf_read_nquads(+Input, -Quads, +Options) is det.
  131%
  132%   True when Triples/Quads is a list   of triples/quads from Input.
  133%   Options:
  134%
  135%     * anon_prefix(+AtomOrNode)
  136%     Prefix nodeIDs with this atom.  If AtomOrNode is the term
  137%     node(_), bnodes are returned as node(Id).
  138%     * base_uri(+Atom)
  139%     Defines the default anon_prefix as _:<baseuri>_
  140%     * on_error(Action)
  141%     One of =warning= (default) or =error=
  142%     * error_count(-Count)
  143%     If =on_error= is =warning=, unify Count with th number of
  144%     errors.
  145%     * graph(+Graph)
  146%     For rdf_read_nquads/3, this defines the graph associated
  147%     to _triples_ loaded from the input.  For rdf_read_ntriples/3
  148%     this opion is ignored.
  149%
  150%   @arg Triples is a list of rdf(Subject, Predicate, Object)
  151%   @arg Quads is a list of rdf(Subject, Predicate, Object, Graph)
  152
  153rdf_read_ntriples(Input, Triples, Options) :-
  154    rdf_read_ntuples(Input, Triples, [format(ntriples)|Options]).
  155
  156rdf_read_nquads(Input, Triples, Options) :-
  157    rdf_read_ntuples(Input, Triples, [format(nquads)|Options]).
  158
  159
  160rdf_read_ntuples(Input, Triples, Options) :-
  161    setup_call_cleanup(
  162        open_input(Input, Stream, Close),
  163        (   init_state(Input, Options, State0),
  164            read_ntuples(Stream, Triples, State0, State)
  165        ),
  166        Close),
  167    option(error_count(Count), Options, _),
  168    nt_state_error_count(State, Count).
  169
  170%!  rdf_process_ntriples(+Input, :CallBack, +Options)
  171%
  172%   Call-back interface, compatible with the   other triple readers.
  173%   In  addition  to  the  options  from  rdf_read_ntriples/3,  this
  174%   processes the option graph(Graph).
  175%
  176%   @param  CallBack is called as call(CallBack, Triples, Graph),
  177%           where Triples is a list holding a single rdf(S,P,O)
  178%           triple.  Graph is passed from the =graph= option and
  179%           unbound if this option is omitted.
  180
  181rdf_process_ntriples(Input, CallBack, Options) :-
  182    setup_call_cleanup(
  183        open_input(Input, Stream, Close),
  184        (   init_state(Input, Options, State0),
  185            process_ntriple(Stream, CallBack, State0, State)
  186        ),
  187        Close),
  188    option(error_count(Count), Options, _),
  189    nt_state_error_count(State, Count).
  190
  191
  192%!  read_ntuples(+Stream, -Triples, +State0, -State)
  193
  194read_ntuples(Stream, Triples, State0, State) :-
  195    read_ntuple(Stream, Triple0, State0, State1),
  196    (   Triple0 == end_of_file
  197    ->  Triples = [],
  198        State = State1
  199    ;   map_nodes(Triple0, Triple, State1, State2),
  200        Triples = [Triple|More],
  201        read_ntuples(Stream, More, State2, State)
  202    ).
  203
  204%!  process_ntriple(+Stream, :CallBack, +State0, -State)
  205
  206process_ntriple(Stream, CallBack, State0, State) :-
  207    read_ntuple(Stream, Triple0, State0, State1),
  208    (   Triple0 == end_of_file
  209    ->  State = State1
  210    ;   map_nodes(Triple0, Triple, State1, State2),
  211        nt_state_graph(State2, Graph),
  212        call(CallBack, [Triple], Graph),
  213        process_ntriple(Stream, CallBack, State2, State)
  214    ).
  215
  216%!  read_ntuple(+Stream, -Tuple, +State0, -State) is det.
  217%
  218%   True when Tuple is the next triple on Stream. May increment
  219%   the error count on State.
  220
  221read_ntuple(Stream, Triple, State0, State) :-
  222    nt_state_on_error(State0, error),
  223    !,
  224    read_ntuple(Stream, Triple, State0),
  225    State = State0.
  226read_ntuple(Stream, Triple, State0, State) :-
  227    catch(read_ntuple(Stream, Triple, State0), E, true),
  228    (   var(E)
  229    ->  State = State0
  230    ;   print_message(warning, E),
  231        nt_state_error_count(State0, EC0),
  232        EC is EC0+1,
  233        set_error_count_of_nt_state(EC, State0, State1),
  234        read_ntuple(Stream, Triple, State1, State)
  235    ).
  236
  237read_ntuple(Stream, Triple, State0) :-
  238    nt_state_format(State0, Format),
  239    format_read_ntuple(Format, Stream, Triple, State0).
  240
  241format_read_ntuple(ntriples, Stream, Triple, _) :-
  242    !,
  243    read_ntriple(Stream, Triple).
  244format_read_ntuple(nquads, Stream, Quad, State) :-
  245    !,
  246    read_ntuple(Stream, Tuple),
  247    to_quad(Tuple, Quad, State).
  248
  249to_quad(Quad, Quad, _) :-
  250    functor(Quad, quad, 4),
  251    !.
  252to_quad(triple(S,P,O), quad(S,P,O,Graph), State) :-
  253    nt_state_graph(State, Graph).
  254to_quad(end_of_file, end_of_file, _).
  255
  256
  257map_nodes(triple(S0,P0,O0), rdf(S,P,O), State0, State) :-
  258    map_node(S0, S, State0, State1),
  259    map_node(P0, P, State1, State2),
  260    map_node(O0, O, State2, State).
  261map_nodes(quad(S0,P0,O0,G0), rdf(S,P,O,G), State0, State) :-
  262    map_node(S0, S, State0, State1),
  263    map_node(P0, P, State1, State2),
  264    map_node(O0, O, State2, State3),
  265    map_node(G0, G, State3, State).
  266
  267map_node(node(NodeId), BNode, State, State) :-
  268    nt_state_anon_prefix(State, Prefix),
  269    atom(Prefix),
  270    !,
  271    atom_concat(Prefix, NodeId, BNode).
  272map_node(Node, Node, State, State).
  273
  274
  275%!  open_input(+Input, -Stream, -Close) is det.
  276%
  277%   Open input for reading ntriples. The  default encoding is UTF-8.
  278%   If the input has a different encoding,   Input  must be a stream
  279%   with the correct encoding and the stream type must be =text=.
  280
  281open_input(stream(Stream), Stream, Close) :-
  282    !,
  283    (   stream_property(Stream, type(binary))
  284    ->  set_stream(Stream, encoding(utf8)),
  285        Close = set_stream(Stream, type(binary))
  286    ;   stream_property(Stream, encoding(Old)),
  287        (   n3_encoding(Old)
  288        ->  true
  289        ;   domain_error(ntriples_encoding, Old)
  290        ),
  291        Close = true
  292    ).
  293open_input(Stream, Stream, Close) :-
  294    is_stream(Stream),
  295    !,
  296    open_input(stream(Stream), Stream, Close).
  297open_input(atom(Atom), Stream, close(Stream)) :-
  298    !,
  299    atom_to_memory_file(Atom, MF),
  300    open_memory_file(MF, read, Stream, [free_on_close(true)]).
  301open_input(URL, Stream, close(Stream)) :-
  302    (   sub_atom(URL, 0, _, _, 'http://')
  303    ;   sub_atom(URL, 0, _, _, 'https://')
  304    ),
  305    !,
  306    http_open(URL, Stream, []),
  307    set_stream(Stream, encoding(utf8)).
  308open_input(URL, Stream, close(Stream)) :-
  309    uri_file_name(URL, Path),
  310    !,
  311    open(Path, read, Stream, [encoding(utf8)]).
  312open_input(File, Stream, close(Stream)) :-
  313    absolute_file_name(File, Path,
  314                       [ access(read),
  315                         extensions(['', nt, ntriples])
  316                       ]),
  317    open(Path, read, Stream, [encoding(utf8)]).
  318
  319n3_encoding(octet).
  320n3_encoding(ascii).
  321n3_encoding(iso_latin_1).
  322n3_encoding(utf8).
  323n3_encoding(text).
  324
  325%!  init_state(+Input, +Options, -State) is det.
  326
  327init_state(In, Options, State) :-
  328    (   option(base_uri(BaseURI), Options)
  329    ->  true
  330    ;   In = stream(_)
  331    ->  BaseURI = []
  332    ;   is_stream(In)
  333    ->  BaseURI = []
  334    ;   In = atom(_)
  335    ->  BaseURI = []
  336    ;   uri_is_global(In),
  337        \+ is_absolute_file_name(In)        % Avoid C:Path in Windows
  338    ->  uri_normalized(In, BaseURI)
  339    ;   uri_file_name(BaseURI, In)
  340    ),
  341    (   option(anon_prefix(Prefix), Options)
  342    ->  true
  343    ;   BaseURI == []
  344    ->  Prefix = '_:genid'
  345    ;   atom_concat('_:', BaseURI, Prefix)
  346    ),
  347    option(on_error(OnError), Options, warning),
  348    % If the format is not set explicitly we assume N-Triples.
  349    % The format option _must_ be set before make_nt_state/2.
  350    option(format(Format), Options, ntriples),
  351    rdf_db:graph(Options, Graph),
  352    (   var(Graph)
  353    ->  Graph = user
  354    ;   true
  355    ),
  356    make_nt_state([ anon_prefix(Prefix),
  357                    on_error(OnError),
  358                    format(Format),
  359                    graph(Graph)
  360                  ], State).
  361
  362
  363                 /*******************************
  364                 *          RDF-DB HOOK         *
  365                 *******************************/
  366
  367:- multifile
  368    rdf_db:rdf_load_stream/3,
  369    rdf_db:rdf_file_type/2.  370
  371%!  rdf_db:rdf_load_stream(+Format, +Stream, :Options) is semidet.
  372%
  373%   Plugin rule that supports loading   the  =ntriples= and =nquads=
  374%   formats.
  375
  376rdf_db:rdf_load_stream(ntriples, Stream, _Module:Options) :-
  377    rdf_db:graph(Options, Graph),
  378    rdf_transaction((  rdf_process_ntriples(Stream, assert_tuples, Options),
  379                       rdf_set_graph(Graph, modified(false))
  380                    ),
  381                    parse(Graph)).
  382rdf_db:rdf_load_stream(nquads, Stream, _Module:Options) :-
  383    rdf_db:graph(Options, Graph),
  384    (   var(Graph)
  385    ->  Graph = user
  386    ;   true
  387    ),
  388    rdf_transaction((  rdf_process_ntriples(Stream, assert_tuples, Options),
  389                       rdf_set_graph(Graph, modified(false))
  390                    ),
  391                    parse(Graph)).
  392
  393assert_tuples([], _).
  394assert_tuples([H|T], Graph) :-
  395    assert_tuple(H, Graph),
  396    assert_tuples(T, Graph).
  397
  398assert_tuple(rdf(S,P,O), Graph) :-
  399    rdf_assert(S,P,O,Graph).
  400assert_tuple(rdf(S,P,O,Graph), _) :-
  401    rdf_assert(S,P,O,Graph).
  402
  403
  404%!  rdf_db:rdf_file_type(+Extension, -Format)
  405%
  406%   Bind the ntriples reader to  files   with  the  extensions =nt=,
  407%   =ntriples= and =nquads=.
  408
  409rdf_db:rdf_file_type(nt,       ntriples).
  410rdf_db:rdf_file_type(ntriples, ntriples).
  411rdf_db:rdf_file_type(nq,       nquads).
  412rdf_db:rdf_file_type(nquads,   nquads)