View source with raw comments or as raw
    1/*  Part of SWI-Prolog
    2
    3    Author:        Jan Wielemaker
    4    E-mail:        J.Wielemaker@vu.nl
    5    WWW:           http://www.swi-prolog.org
    6    Copyright (c)  2012-2016, University of Amsterdam
    7                              VU University Amsterdam
    8    All rights reserved.
    9
   10    Redistribution and use in source and binary forms, with or without
   11    modification, are permitted provided that the following conditions
   12    are met:
   13
   14    1. Redistributions of source code must retain the above copyright
   15       notice, this list of conditions and the following disclaimer.
   16
   17    2. Redistributions in binary form must reproduce the above copyright
   18       notice, this list of conditions and the following disclaimer in
   19       the documentation and/or other materials provided with the
   20       distribution.
   21
   22    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
   23    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
   24    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
   25    FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
   26    COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
   27    INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
   28    BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
   29    LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
   30    CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   31    LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
   32    ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
   33    POSSIBILITY OF SUCH DAMAGE.
   34*/
   35
   36:- module(dcg_basics,
   37	  [ white//0,			% <white inside line>
   38	    whites//0,			% <white inside line>*
   39	    blank//0,			% <blank>
   40	    blanks//0,			% <blank>*
   41	    nonblank//1,		% <nonblank>
   42	    nonblanks//1,		% <nonblank>* --> chars		(long)
   43	    blanks_to_nl//0,		% [space,tab,ret]*nl
   44	    string//1,			% <any>* -->chars		(short)
   45	    string_without//2,		% Exclude, -->chars		(long)
   46					% Characters
   47	    alpha_to_lower//1,		% Get lower|upper, return lower
   48					% Decimal numbers
   49	    digits//1,			% [0-9]* -->chars
   50	    digit//1,			% [0-9] --> char
   51	    integer//1,			% [+-][0-9]+ --> integer
   52	    float//1,			% [+-]?[0-9]+(.[0-9]*)?(e[+-]?[0-9]+)? --> float
   53	    number//1,			% integer | float
   54					% Hexadecimal numbers
   55	    xdigits//1,			% [0-9a-f]* --> 0-15*
   56	    xdigit//1,			% [0-9a-f] --> 0-15
   57	    xinteger//1,		% [0-9a-f]+ --> integer
   58
   59	    prolog_var_name//1,		% Read a Prolog variable name
   60
   61	    eos//0,			% Test end of input.
   62	    remainder//1,		% -List
   63
   64					% generation (TBD)
   65	    atom//1			% generate atom
   66	  ]).   67:- use_module(library(lists)).   68:- use_module(library(error)).

Various general DCG utilities

This library provides various commonly used DCG primitives acting on list of character codes. Character classification is based on code_type/2.

This module started its life as library(http/dcg_basics) to support the HTTP protocol. Since then, it was increasingly used in code that has no relation to HTTP and therefore this library was moved to the core library.

To be done
-
This is just a starting point. We need a comprehensive set of generally useful DCG primitives. */
 string_without(+EndCodes, -Codes)// is det
Take as many codes from the input until the next character code appears in the list EndCodes. The terminating code itself is left on the input. Typical use is to read upto a defined delimiter such as a newline or other reserved character. For example:
    ...,
    string_without("\n", RestOfLine)
Arguments:
EndCodes- is a list of character codes.
See also
- string//1.
  102string_without(End, Codes) -->
  103	{ string(End), !,
  104	  string_codes(End, EndCodes)
  105	},
  106	list_string_without(EndCodes, Codes).
  107string_without(End, Codes) -->
  108	list_string_without(End, Codes).
  109
  110list_string_without(Not, [C|T]) -->
  111	[C],
  112	{ \+ memberchk(C, Not)
  113	}, !,
  114	list_string_without(Not, T).
  115list_string_without(_, []) -->
  116	[].
 string(-Codes)// is nondet
Take as few as possible tokens from the input, taking one more each time on backtracking. This code is normally followed by a test for a delimiter. For example:
upto_colon(Atom) -->
        string(Codes), ":", !,
        { atom_codes(Atom, Codes) }.
See also
- string_without//2.
  132string([]) -->
  133	[].
  134string([H|T]) -->
  135	[H],
  136	string(T).
 blanks// is det
Skip zero or more white-space characters.
  142blanks -->
  143	blank, !,
  144	blanks.
  145blanks -->
  146	[].
 blank// is semidet
Take next space character from input. Space characters include newline.
See also
- white//0
  155blank -->
  156	[C],
  157	{ nonvar(C),
  158	  code_type(C, space)
  159	}.
 nonblanks(-Codes)// is det
Take all graph characters
  165nonblanks([H|T]) -->
  166	[H],
  167	{ code_type(H, graph)
  168	}, !,
  169	nonblanks(T).
  170nonblanks([]) -->
  171	[].
 nonblank(-Code)// is semidet
Code is the next non-blank (graph) character.
  177nonblank(H) -->
  178	[H],
  179	{ code_type(H, graph)
  180	}.
 blanks_to_nl// is semidet
Take a sequence of blank//0 codes if blanks are followed by a newline or end of the input.
  187blanks_to_nl -->
  188	"\n", !.
  189blanks_to_nl -->
  190	blank, !,
  191	blanks_to_nl.
  192blanks_to_nl -->
  193	eos.
 whites// is det
Skip white space inside a line.
See also
- blanks//0 also skips newlines.
  201whites -->
  202	white, !,
  203	whites.
  204whites -->
  205	[].
 white// is semidet
Take next white character from input. White characters do not include newline.
  212white -->
  213	[C],
  214	{ nonvar(C),
  215	  code_type(C, white)
  216	}.
  217
  218
  219		 /*******************************
  220		 *	 CHARACTER STUFF	*
  221		 *******************************/
 alpha_to_lower(?C)// is semidet
Read a letter (class alpha) and return it as a lowercase letter. If C is instantiated and the DCG list is already bound, C must be lower and matches both a lower and uppercase letter. If the output list is unbound, its first element is bound to C. For example:
?- alpha_to_lower(0'a, `AB`, R).
R = [66].
?- alpha_to_lower(C, `AB`, R).
C = 97, R = [66].
?- alpha_to_lower(0'a, L, R).
L = [97|R].
  240alpha_to_lower(L) -->
  241	[C],
  242	{   nonvar(C)
  243	->  code_type(C, alpha),
  244	    code_type(C, to_upper(L))
  245	;   L = C
  246	}.
  247
  248
  249		 /*******************************
  250		 *	      NUMBERS		*
  251		 *******************************/
 digits(?Chars)// is det
 digit(?Char)// is det
 integer(?Integer)// is det
Number processing. The predicate digits//1 matches a posibly empty set of digits, digit//1 processes a single digit and integer processes an optional sign followed by a non-empty sequence of digits into an integer.
  262digits([H|T]) -->
  263	digit(H), !,
  264	digits(T).
  265digits([]) -->
  266	[].
  267
  268digit(C) -->
  269	[C],
  270	{ code_type(C, digit)
  271	}.
  272
  273integer(I, Head, Tail) :-
  274	nonvar(I), !,
  275	format(codes(Head, Tail), '~d', [I]).
  276integer(I) -->
  277	int_codes(Codes),
  278	{ number_codes(I, Codes)
  279	}.
  280
  281int_codes([C,D0|D]) -->
  282	sign(C), !,
  283	digit(D0),
  284	digits(D).
  285int_codes([D0|D]) -->
  286	digit(D0),
  287	digits(D).
 float(?Float)// is det
Process a floating point number. The actual conversion is controlled by number_codes/2.
  295float(F, Head, Tail) :-
  296	float(F), !,
  297	with_output_to(codes(Head, Tail), write(F)).
  298float(F) -->
  299	number(F),
  300	{ float(F) }.
 number(+Number)// is det
number(-Number)// is semidet
Generate extract a number. Handles both integers and floating point numbers.
  308number(N, Head, Tail) :-
  309	number(N), !,
  310	format(codes(Head, Tail), '~w', N).
  311number(N) -->
  312	int_codes(I),
  313	(   dot,
  314	    digit(DF0),
  315	    digits(DF)
  316	->  {F = [0'., DF0|DF]}
  317	;   {F = []}
  318	),
  319	(   exp
  320	->  int_codes(DI),
  321	    {E=[0'e|DI]}
  322	;   {E = []}
  323	),
  324	{ append([I, F, E], Codes),
  325	  number_codes(N, Codes)
  326	}.
  327
  328sign(0'-) --> "-".
  329sign(0'+) --> "+".
  330
  331dot --> ".".
  332
  333exp --> "e".
  334exp --> "E".
  335
  336		 /*******************************
  337		 *	    HEX NUMBERS		*
  338		 *******************************/
 xinteger(+Integer)// is det
xinteger(-Integer)// is semidet
Generate or extract an integer from a sequence of hexadecimal digits.
  346xinteger(Val, Head, Tail) :-
  347	integer(Val),
  348	format(codes(Head, Tail), '~16r', [Val]).
  349xinteger(Val) -->
  350	xdigit(D0),
  351	xdigits(D),
  352	{ mkval([D0|D], 16, Val)
  353	}.
 xdigit(-Weight)// is semidet
True if the next code is a hexdecimal digit with Weight. Weight is between 0 and 15.
  360xdigit(D) -->
  361	[C],
  362	{ code_type(C, xdigit(D))
  363	}.
 xdigits(-WeightList)// is det
List of weights of a sequence of hexadecimal codes. WeightList may be empty.
  370xdigits([D0|D]) -->
  371	xdigit(D0), !,
  372	xdigits(D).
  373xdigits([]) -->
  374	[].
  375
  376mkval([W0|Weights], Base, Val) :-
  377	mkval(Weights, Base, W0, Val).
  378
  379mkval([], _, W, W).
  380mkval([H|T], Base, W0, W) :-
  381	W1 is W0*Base+H,
  382	mkval(T, Base, W1, W).
  383
  384
  385		 /*******************************
  386		 *	   END-OF-STRING	*
  387		 *******************************/
 eos//
Matches end-of-input. The implementation behaves as the following portable implementation:
eos --> call(eos_).
eos_([], []).
To be done
- This is a difficult concept and violates the context free property of DCGs. Explain the exact problems.
  402eos([], []).
 remainder(-List)//
Unify List with the remainder of the input.
  408remainder(List, List, []).
  409
  410
  411		 /*******************************
  412		 *	   PROLOG SYNTAX		*
  413		 *******************************/
 prolog_var_name(-Name:atom)// is semidet
Matches a Prolog variable name. Primarily intended to deal with quasi quotations that embed Prolog variables.
  420prolog_var_name(Name) -->
  421	[C0], { code_type(C0, prolog_var_start) }, !,
  422	prolog_id_cont(CL),
  423	{ atom_codes(Name, [C0|CL]) }.
  424
  425prolog_id_cont([H|T]) -->
  426	[H], { code_type(H, prolog_identifier_continue) }, !,
  427	prolog_id_cont(T).
  428prolog_id_cont([]) --> "".
  429
  430
  431		 /*******************************
  432		 *	     GENERATION		*
  433		 *******************************/
 atom(++Atom)// is det
Generate codes of Atom. Current implementation uses write/1, dealing with any Prolog term. Atom must be ground though.
  440atom(Atom, Head, Tail) :-
  441	must_be(ground, Atom),
  442	format(codes(Head, Tail), '~w', [Atom])