View source with formatted comments or as raw
    1/*  Part of SWI-Prolog
    2
    3    Author:        Jan Wielemaker
    4    E-mail:        J.Wielemaker@vu.nl
    5    WWW:           http://www.swi-prolog.org
    6    Copyright (C): 2008-2016, University of Amsterdam
    7                              VU University Amsterdam
    8    All rights reserved.
    9
   10    Redistribution and use in source and binary forms, with or without
   11    modification, are permitted provided that the following conditions
   12    are met:
   13
   14    1. Redistributions of source code must retain the above copyright
   15       notice, this list of conditions and the following disclaimer.
   16
   17    2. Redistributions in binary form must reproduce the above copyright
   18       notice, this list of conditions and the following disclaimer in
   19       the documentation and/or other materials provided with the
   20       distribution.
   21
   22    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
   23    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
   24    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
   25    FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
   26    COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
   27    INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
   28    BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
   29    LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
   30    CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   31    LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
   32    ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
   33    POSSIBILITY OF SUCH DAMAGE.
   34*/
   35
   36:- module(pure_input,
   37          [ phrase_from_file/2,         % :Grammar, +File
   38            phrase_from_file/3,         % :Grammar, +File, +Options
   39            phrase_from_stream/2,       % :Grammar, +Stream
   40            stream_to_lazy_list/2,      % :Stream -List
   41
   42            syntax_error//1,            % +ErrorTerm
   43                                        % Low level interface
   44            lazy_list_location//1,      % -Location
   45            lazy_list_character_count//1 % -CharacterCount
   46          ]).   47:- use_module(library(error)).   48:- set_prolog_flag(generate_debug_info, false).   49
   50/** <module> Pure Input from files and streams
   51
   52This module is part of pio.pl,   dealing with _pure_ _input_: processing
   53input streams from the outside  world   using  pure  predicates, notably
   54grammar rules (DCG).  Using  pure   predicates  makes  non-deterministic
   55processing of input much simpler.
   56
   57Pure input uses attributed variables  to   read  input from the external
   58source into a list _|on demand|_. The   overhead of lazy reading is more
   59than compensated for by using block reads based on read_pending_codes/3.
   60
   61Ulrich Neumerkel came up with the idea to use coroutining for creating a
   62_lazy list_. His implementation  repositioned  the   file  to  deal with
   63re-reading  that  can  be  necessary    on   backtracking.  The  current
   64implementation uses destructive assignment together  with more low-level
   65attribute handling to realise pure input on any (buffered) stream.
   66
   67@tbd    Provide support for alternative input readers, e.g. reading
   68        terms, tokens, etc.
   69*/
   70
   71:- predicate_options(phrase_from_file/3, 3,
   72                     [ pass_to(system:open/4, 4)
   73                     ]).   74
   75%!  phrase_from_file(:Grammar, +File) is nondet.
   76%
   77%   Process the content of File  using   the  DCG  rule Grammar. The
   78%   space usage of this mechanism depends on   the length of the not
   79%   committed part of Grammar. Committed parts of the temporary list
   80%   are reclaimed by the  garbage  collector,   while  the  list  is
   81%   extended on demand due to  unification   of  the attributed tail
   82%   variable. Below is an example that counts  the number of times a
   83%   string appears in  a  file.   The  library  dcg/basics  provides
   84%   string//1 matching an arbitrary string   and  remainder//1 which
   85%   matches the remainder of the input without parsing.
   86%
   87%   ==
   88%   :- use_module(library(dcg/basics)).
   89%
   90%   file_contains(File, Pattern) :-
   91%           phrase_from_file(match(Pattern), File).
   92%
   93%   match(Pattern) -->
   94%           string(_),
   95%           string(Pattern),
   96%           remainder(_).
   97%
   98%   match_count(File, Pattern, Count) :-
   99%           aggregate_all(count, file_contains(File, Pattern), Count).
  100%   ==
  101%
  102%   This can be called as (note that   the  pattern must be a string
  103%   (code list)):
  104%
  105%   ==
  106%   ?- match_count('pure_input.pl', `file`, Count).
  107%   ==
  108
  109:- meta_predicate
  110    phrase_from_file(//, +),
  111    phrase_from_file(//, +, +),
  112    phrase_from_stream(//, +).  113
  114phrase_from_file(Grammar, File) :-
  115    phrase_from_file(Grammar, File, []).
  116
  117%!  phrase_from_file(:Grammar, +File, +Options) is nondet.
  118%
  119%   As phrase_from_file/2, providing additional Options. Options are
  120%   passed to open/4.
  121
  122phrase_from_file(Grammar, File, Options) :-
  123    setup_call_cleanup(
  124        open(File, read, In, Options),
  125        phrase_from_stream(Grammar, In),
  126        close(In)).
  127
  128%!  phrase_from_stream(:Grammer, +Stream)
  129%
  130%   Run Grammer against the character codes   on Stream. Stream must
  131%   be buffered.
  132
  133phrase_from_stream(Grammar, In) :-
  134    stream_to_lazy_list(In, List),
  135    phrase(Grammar, List).
  136
  137%!  syntax_error(+Error)//
  138%
  139%   Throw the syntax error Error  at   the  current  location of the
  140%   input. This predicate is designed to  be called from the handler
  141%   of phrase_from_file/3.
  142%
  143%   @throws error(syntax_error(Error), Location)
  144
  145syntax_error(Error) -->
  146    lazy_list_location(Location),
  147    { throw(error(syntax_error(Error), Location))
  148    }.
  149
  150%!  lazy_list_location(-Location)// is det.
  151%
  152%   Determine current (error) location in  a   lazy  list. True when
  153%   Location is an (error) location term that represents the current
  154%   location in the DCG list.
  155%
  156%   @arg    Location is a term file(Name, Line, LinePos, CharNo) or
  157%           stream(Stream, Line, LinePos, CharNo) if no file is
  158%           associated to the stream RestLazyList.  Finally, if the
  159%           Lazy list is fully materialized (ends in =|[]|=), Location
  160%           is unified with `end_of_file-CharCount`.
  161%   @see    lazy_list_character_count//1 only provides the character
  162%           count.
  163
  164lazy_list_location(Location, Here, Here) :-
  165    lazy_list_location(Here, Location).
  166
  167lazy_list_location(Here, Location) :-
  168    '$skip_list'(Skipped, Here, Tail),
  169    (   attvar(Tail)
  170    ->  get_attr(Tail, pure_input, State),
  171        State = lazy_input(Stream, PrevPos, Pos, _),
  172        Details = [Line, LinePos, CharNo],
  173        (   stream_property(Stream, file_name(File))
  174        ->  PosParts = [file, File|Details]
  175        ;   PosParts = [stream, Stream|Details]
  176        ),
  177        Location =.. PosParts,
  178        (   PrevPos == (-)                  % nothing is read.
  179        ->  Line = 1, LinePos = 0, CharNo = 0
  180        ;   stream_position_data(char_count, Pos, EndRecordCharNo),
  181            CharNo is EndRecordCharNo - Skipped,
  182            set_stream_position(Stream, PrevPos),
  183            stream_position_data(char_count, PrevPos, StartRecordCharNo),
  184            Skip is CharNo-StartRecordCharNo,
  185            forall(between(1, Skip, _), get_code(Stream, _)),
  186            stream_property(Stream, position(ErrorPos)),
  187            stream_position_data(line_count, ErrorPos, Line),
  188            stream_position_data(line_position, ErrorPos, LinePos)
  189        )
  190    ;   Tail == []
  191    ->  Location = end_of_file-Skipped
  192    ;   type_error(lazy_list, Here)
  193    ).
  194
  195
  196%!  lazy_list_character_count(-CharCount)//
  197%
  198%   True when CharCount is the current   character count in the Lazy
  199%   list. The character count is computed by finding the distance to
  200%   the next frozen tail of the lazy list. CharCount is one of:
  201%
  202%     - An integer
  203%     - A term end_of_file-Count
  204%
  205%   @see    lazy_list_location//1 provides full details of the location
  206%           for error reporting.
  207
  208lazy_list_character_count(Location, Here, Here) :-
  209    lazy_list_character_count(Here, Location).
  210
  211lazy_list_character_count(Here, CharNo) :-
  212    '$skip_list'(Skipped, Here, Tail),
  213    (   attvar(Tail)
  214    ->  get_attr(Tail, pure_input, State),
  215        arg(3, State, Pos),
  216        stream_position_data(char_count, Pos, EndRecordCharNo),
  217        CharNo is EndRecordCharNo - Skipped
  218    ;   Tail == []
  219    ->  CharNo = end_of_file-Skipped
  220    ;   type_error(lazy_list, Here)
  221    ).
  222
  223
  224%!  stream_to_lazy_list(+Stream, -List) is det.
  225%
  226%   Create a lazy list representing the   character codes in Stream.
  227%   List is a  partial  list  ending   in  an  attributed  variable.
  228%   Unifying this variable reads the next   block of data. The block
  229%   is stored with the attribute value such that there is no need to
  230%   re-read it.
  231%
  232%   @compat Unlike the previous version of this predicate this
  233%           version does not require a repositionable stream.  It
  234%           does require a buffer size of at least the maximum
  235%           number of bytes of a multi-byte sequence (6).
  236
  237stream_to_lazy_list(Stream, List) :-
  238    (   stream_property(Stream, buffer(false))
  239    ->  permission_error(create, lazy_list, Stream)
  240    ;   true
  241    ),
  242    stream_to_lazy_list(Stream, -, List).
  243
  244stream_to_lazy_list(Stream, PrevPos, List) :-
  245    stream_property(Stream, position(Pos)),
  246    put_attr(List, pure_input, lazy_input(Stream, PrevPos, Pos, _)).
  247
  248attr_unify_hook(State, Value) :-
  249    notrace(attr_unify_hook_ndebug(State, Value)).
  250
  251attr_unify_hook_ndebug(State, Value) :-
  252    State = lazy_input(Stream, _PrevPos, Pos, Read),
  253    (   var(Read)
  254    ->  fill_buffer(Stream),
  255        read_pending_codes(Stream, NewList, Tail),
  256        (   Tail == []
  257        ->  nb_setarg(4, State, []),
  258            Value = []
  259        ;   stream_to_lazy_list(Stream, Pos, Tail),
  260            nb_linkarg(4, State, NewList),
  261            Value = NewList
  262        )
  263    ;   Value = Read
  264    )