View source with raw comments or as raw
    1/*  Part of SWI-Prolog
    2
    3    Author:        Jan Wielemaker
    4    E-mail:        J.Wielemaker@vu.nl
    5    WWW:           http://www.swi-prolog.org
    6    Copyright (c)  2010-2013, University of Amsterdam
    7    All rights reserved.
    8
    9    Redistribution and use in source and binary forms, with or without
   10    modification, are permitted provided that the following conditions
   11    are met:
   12
   13    1. Redistributions of source code must retain the above copyright
   14       notice, this list of conditions and the following disclaimer.
   15
   16    2. Redistributions in binary form must reproduce the above copyright
   17       notice, this list of conditions and the following disclaimer in
   18       the documentation and/or other materials provided with the
   19       distribution.
   20
   21    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
   22    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
   23    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
   24    FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
   25    COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
   26    INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
   27    BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
   28    LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
   29    CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   30    LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
   31    ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
   32    POSSIBILITY OF SUCH DAMAGE.
   33*/
   34
   35:- module(rdf_ntriples_old,
   36          [ load_rdf_ntriples/2,        % +File, -Triples
   37            rdf_ntriple_part/4          % +Field, -Value, <DCG>
   38          ]).

RDF N-triples parser (obsolete)

This module parses n-triple files as defined by the W3C RDF working in http://www.w3.org/TR/rdf-testcases/#ntriples. This format is a simplified version of the RDF N3 notation used in the *.nt files that are used to describe the normative outcome of the RDF test-cases.

The returned list terms are of the form

rdf(Subject, Predicate, Object)

where

Subject
is an atom or node(Id) for anonymous nodes
Predicate
is an atom
Object
is an atom, node(Id), literal(Atom) or xml(Atom)
deprecated
-
This library will shortly be replaced with a stub that calls library(semweb/rdf_ntriples). */
   65%       load_rdf_ntriples(+Source, -Triples)
   66%
   67%       Load a file or stream to a list of rdf(S,P,O) triples.
   68
   69load_rdf_ntriples(File, Triples) :-
   70    open_nt_file(File, In, Close),
   71    call_cleanup(stream_to_triples(In, Triples), Close).
   72
   73%       open_nt_file(+Input, -Stream, -Close)
   74%
   75%       Open Input, returning Stream and a goal to cleanup Stream if it
   76%       was opened.
   77
   78open_nt_file(stream(Stream), Stream, true) :- !.
   79open_nt_file(Stream, Stream, true) :-
   80    is_stream(Stream),
   81    !.
   82open_nt_file(Spec, Stream, close(Stream)) :-
   83    absolute_file_name(Spec,
   84                       [ access(read),
   85                         extensions([nt,''])
   86                       ], Path),
   87    open(Path, read, Stream).
   88
   89
   90%       rdf_ntriple_part(+Type, -Value, <DCG>)
   91%
   92%       Parse one of the fields of  an   ntriple.  This  is used for the
   93%       SWI-Prolog Sesame (www.openrdf.org) implementation   to  realise
   94%       /servlets/removeStatements. I do not think   public  use of this
   95%       predicate should be stimulated.
   96
   97rdf_ntriple_part(subject, Subject) -->
   98    subject(Subject).
   99rdf_ntriple_part(predicate, Predicate) -->
  100    predicate(Predicate).
  101rdf_ntriple_part(object, Object) -->
  102    predicate(Object).
  103
  104
  105%       stream_to_triples(+Stream, -ListOfTriples)
  106%
  107%       Read Stream, returning all its triples
  108
  109stream_to_triples(In, Triples) :-
  110    read_line_to_codes(In, Line),
  111    (   Line == end_of_file
  112    ->  Triples = []
  113    ;   phrase(line(Triples, Tail), Line),
  114        stream_to_triples(In, Tail)
  115    ).
  116
  117line(Triples, Tail) -->
  118    wss,
  119    (   comment
  120    ->  {Triples = Tail}
  121    ;   triple(Triple)
  122    ->  {Triples = [Triple|Tail]}
  123    ).
  124
  125comment -->
  126    "#",
  127    !,
  128    skip_rest.
  129comment -->
  130    end_of_input.
  131
  132triple(rdf(Subject, Predicate, Object)) -->
  133    subject(Subject), ws, wss,
  134    predicate(Predicate), ws, wss,
  135    object(Object), wss, ".", wss.
  136
  137subject(Subject) -->
  138    uniref(Subject),
  139    !.
  140subject(Subject) -->
  141    node_id(Subject).
  142
  143predicate(Predicate) -->
  144    uniref(Predicate).
  145
  146object(Object) -->
  147    uniref(Object),
  148    !.
  149object(Object) -->
  150    node_id(Object).
  151object(Object) -->
  152    literal(Object).
  153
  154
  155uniref(URI) -->
  156    "<",
  157    escaped_uri_codes(Codes),
  158    ">",
  159    !,
  160    { atom_codes(URI, Codes)
  161    }.
  162
  163node_id(node(Id)) -->                   % anonymous nodes
  164    "_:",
  165    name_start(C0),
  166    name_codes(Codes),
  167    { atom_codes(Id, [C0|Codes])
  168    }.
  169
  170literal(Literal) -->
  171    lang_string(Literal),
  172    !.
  173literal(Literal) -->
  174    xml_string(Literal).
  175
  176
  177%       name_start(-Code)
  178%       name_codes(-ListfCodes)
  179%
  180%       Parse identifier names
  181
  182name_start(C) -->
  183    [C],
  184    { code_type(C, alpha)
  185    }.
  186
  187name_codes([C|T]) -->
  188    [C],
  189    { code_type(C, alnum)
  190    },
  191    !,
  192    name_codes(T).
  193name_codes([]) -->
  194    [].
  195
  196
  197%       escaped_uri_codes(-CodeList)
  198%
  199%       Decode string holding %xx escaped characters.
  200
  201escaped_uri_codes([]) -->
  202    [].
  203escaped_uri_codes([C|T]) -->
  204    "%", [D0,D1],
  205    { code_type(D0, xdigit(V0)),
  206      code_type(D1, xdigit(V1)),
  207      !,
  208      C is V0<<4 + V1
  209    },
  210    escaped_uri_codes(T).
  211escaped_uri_codes([C|T]) -->
  212    "\\u", [D0,D1,D2,D3],
  213    !,
  214    { code_type(D0, xdigit(V0)),
  215      code_type(D1, xdigit(V1)),
  216      code_type(D2, xdigit(V2)),
  217      code_type(D3, xdigit(V3)),
  218      C is V0<<12 + V1<<8 + V2<<4 + V3
  219    },
  220    escaped_uri_codes(T).
  221escaped_uri_codes([C|T]) -->
  222    "\\U", [D0,D1,D2,D3,D4,D5,D6,D7],
  223    !,
  224    { code_type(D0, xdigit(V0)),
  225      code_type(D1, xdigit(V1)),
  226      code_type(D2, xdigit(V2)),
  227      code_type(D3, xdigit(V3)),
  228      code_type(D4, xdigit(V4)),
  229      code_type(D5, xdigit(V5)),
  230      code_type(D6, xdigit(V6)),
  231      code_type(D7, xdigit(V7)),
  232      C is V0<<28 + V1<<24 + V2<<20 + V3<<16 +
  233           V4<<12 + V5<<8 + V6<<4 + V7
  234    },
  235    escaped_uri_codes(T).
  236escaped_uri_codes([C|T]) -->
  237    [C],
  238    escaped_uri_codes(T).
  239
  240
  241%       lang_string()
  242%
  243%       Process a language string
  244
  245lang_string(String) -->
  246    "\"",
  247    string(Codes),
  248    "\"",
  249    !,
  250    { atom_codes(Atom, Codes)
  251    },
  252    (   langsep
  253    ->  language(Lang),
  254        { String = literal(lang(Lang, Atom))
  255        }
  256    ;   "^^"
  257    ->  uniref(Type),
  258        { String = literal(type(Type, Atom))
  259        }
  260    ;   { String = literal(Atom)
  261        }
  262    ).
  263
  264langsep -->
  265    "-".
  266langsep -->
  267    "@".
  268
  269%       xml_string(String)
  270%
  271%       Handle xml"..."
  272
  273xml_string(xml(String)) -->
  274    "xml\"",                        % really no whitespace?
  275    string(Codes),
  276    "\"",
  277    { atom_codes(String, Codes)
  278    }.
  279
  280string([]) -->
  281    [].
  282string([C0|T]) -->
  283    string_char(C0),
  284    string(T).
  285
  286string_char(0'\\) -->
  287    "\\\\".
  288string_char(0'") -->
  289    "\\\"".
  290string_char(10) -->
  291    "\\n".
  292string_char(13) -->
  293    "\\r".
  294string_char(9) -->
  295    "\\t".
  296string_char(C) -->
  297    "\\u",
  298    '4xdigits'(C).
  299string_char(C) -->
  300    "\\U",
  301    '4xdigits'(C0),
  302    '4xdigits'(C1),
  303    { C is C0<<16 + C1
  304    }.
  305string_char(C) -->
  306    [C].
  307
  308'4xdigits'(C) -->
  309    [C0,C1,C2,C3],
  310    { code_type(C0, xdigit(V0)),
  311      code_type(C1, xdigit(V1)),
  312      code_type(C2, xdigit(V2)),
  313      code_type(C3, xdigit(V3)),
  314
  315      C is V0<<12 + V1<<8 + V2<<4 + V3
  316    }.
  317
  318%       language(-Lang)
  319%
  320%       Return xml:lang language identifier.
  321
  322language(Lang) -->
  323    lang_code(C0),
  324    lang_codes(Codes),
  325    { atom_codes(Lang, [C0|Codes])
  326    }.
  327
  328lang_code(C) -->
  329    [C],
  330    { C \== 0'.,
  331      \+ code_type(C, white)
  332    }.
  333
  334lang_codes([C|T]) -->
  335    lang_code(C),
  336    !,
  337    lang_codes(T).
  338lang_codes([]) -->
  339    [].
  340
  341
  342                 /*******************************
  343                 *             BASICS           *
  344                 *******************************/
  345
  346skip_rest(_,[]).
  347
  348ws -->
  349    [C],
  350    { code_type(C, white)
  351    }.
  352
  353end_of_input([], []).
  354
  355
  356wss -->
  357    ws,
  358    !,
  359    wss.
  360wss -->
  361    []