View source with formatted comments or as raw
    1/*  Part of SWI-Prolog
    2
    3    Author:        Jan Wielemaker
    4    E-mail:        J.Wielemaker@vu.nl
    5    WWW:           http://www.swi-prolog.org
    6    Copyright (c)  2012-2016, University of Amsterdam
    7                              VU University Amsterdam
    8    All rights reserved.
    9
   10    Redistribution and use in source and binary forms, with or without
   11    modification, are permitted provided that the following conditions
   12    are met:
   13
   14    1. Redistributions of source code must retain the above copyright
   15       notice, this list of conditions and the following disclaimer.
   16
   17    2. Redistributions in binary form must reproduce the above copyright
   18       notice, this list of conditions and the following disclaimer in
   19       the documentation and/or other materials provided with the
   20       distribution.
   21
   22    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
   23    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
   24    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
   25    FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
   26    COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
   27    INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
   28    BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
   29    LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
   30    CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   31    LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
   32    ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
   33    POSSIBILITY OF SUCH DAMAGE.
   34*/
   35
   36:- module(dcg_basics,
   37	  [ white//0,			% <white inside line>
   38	    whites//0,			% <white inside line>*
   39	    blank//0,			% <blank>
   40	    blanks//0,			% <blank>*
   41	    nonblank//1,		% <nonblank>
   42	    nonblanks//1,		% <nonblank>* --> chars		(long)
   43	    blanks_to_nl//0,		% [space,tab,ret]*nl
   44	    string//1,			% <any>* -->chars		(short)
   45	    string_without//2,		% Exclude, -->chars		(long)
   46					% Characters
   47	    alpha_to_lower//1,		% Get lower|upper, return lower
   48					% Decimal numbers
   49	    digits//1,			% [0-9]* -->chars
   50	    digit//1,			% [0-9] --> char
   51	    integer//1,			% [+-][0-9]+ --> integer
   52	    float//1,			% [+-]?[0-9]+(.[0-9]*)?(e[+-]?[0-9]+)? --> float
   53	    number//1,			% integer | float
   54					% Hexadecimal numbers
   55	    xdigits//1,			% [0-9a-f]* --> 0-15*
   56	    xdigit//1,			% [0-9a-f] --> 0-15
   57	    xinteger//1,		% [0-9a-f]+ --> integer
   58
   59	    prolog_var_name//1,		% Read a Prolog variable name
   60
   61	    eos//0,			% Test end of input.
   62	    remainder//1,		% -List
   63
   64					% generation (TBD)
   65	    atom//1			% generate atom
   66	  ]).   67:- use_module(library(lists)).   68:- use_module(library(error)).   69
   70
   71/** <module> Various general DCG utilities
   72
   73This library provides various commonly  used   DCG  primitives acting on
   74list  of  character  *codes*.  Character   classification  is  based  on
   75code_type/2.
   76
   77This module started its life as  library(http/dcg_basics) to support the
   78HTTP protocol. Since then, it was increasingly  used in code that has no
   79relation to HTTP and therefore  this  library   was  moved  to  the core
   80library.
   81
   82@tbd	This is just a starting point. We need a comprehensive set of
   83	generally useful DCG primitives.
   84*/
   85
   86%%	string_without(+EndCodes, -Codes)// is det.
   87%
   88%	Take as many codes from the input  until the next character code
   89%	appears in the list EndCodes.  The   terminating  code itself is
   90%	left on the input.  Typical  use  is   to  read  upto  a defined
   91%	delimiter such as a newline  or   other  reserved character. For
   92%	example:
   93%
   94%	    ==
   95%	        ...,
   96%	        string_without("\n", RestOfLine)
   97%	    ==
   98%
   99%	@arg EndCodes is a list of character codes.
  100%	@see string//1.
  101
  102string_without(End, Codes) -->
  103	{ string(End), !,
  104	  string_codes(End, EndCodes)
  105	},
  106	list_string_without(EndCodes, Codes).
  107string_without(End, Codes) -->
  108	list_string_without(End, Codes).
  109
  110list_string_without(Not, [C|T]) -->
  111	[C],
  112	{ \+ memberchk(C, Not)
  113	}, !,
  114	list_string_without(Not, T).
  115list_string_without(_, []) -->
  116	[].
  117
  118%%	string(-Codes)// is nondet.
  119%
  120%	Take as few as possible tokens from the input, taking one more
  121%	each time on backtracking. This code is normally followed by a
  122%	test for a delimiter.  For example:
  123%
  124%	==
  125%	upto_colon(Atom) -->
  126%		string(Codes), ":", !,
  127%		{ atom_codes(Atom, Codes) }.
  128%	==
  129%
  130%	@see string_without//2.
  131
  132string([]) -->
  133	[].
  134string([H|T]) -->
  135	[H],
  136	string(T).
  137
  138%%	blanks// is det.
  139%
  140%	Skip zero or more white-space characters.
  141
  142blanks -->
  143	blank, !,
  144	blanks.
  145blanks -->
  146	[].
  147
  148%%	blank// is semidet.
  149%
  150%	Take next =space= character from input. Space characters include
  151%	newline.
  152%
  153%	@see white//0
  154
  155blank -->
  156	[C],
  157	{ nonvar(C),
  158	  code_type(C, space)
  159	}.
  160
  161%%	nonblanks(-Codes)// is det.
  162%
  163%	Take all =graph= characters
  164
  165nonblanks([H|T]) -->
  166	[H],
  167	{ code_type(H, graph)
  168	}, !,
  169	nonblanks(T).
  170nonblanks([]) -->
  171	[].
  172
  173%%	nonblank(-Code)// is semidet.
  174%
  175%	Code is the next non-blank (=graph=) character.
  176
  177nonblank(H) -->
  178	[H],
  179	{ code_type(H, graph)
  180	}.
  181
  182%%	blanks_to_nl// is semidet.
  183%
  184%	Take a sequence of blank//0 codes if blanks are followed by a
  185%	newline or end of the input.
  186
  187blanks_to_nl -->
  188	"\n", !.
  189blanks_to_nl -->
  190	blank, !,
  191	blanks_to_nl.
  192blanks_to_nl -->
  193	eos.
  194
  195%%	whites// is det.
  196%
  197%	Skip white space _inside_ a line.
  198%
  199%	@see blanks//0 also skips newlines.
  200
  201whites -->
  202	white, !,
  203	whites.
  204whites -->
  205	[].
  206
  207%%	white// is semidet.
  208%
  209%	Take next =white= character from input. White characters do
  210%	_not_ include newline.
  211
  212white -->
  213	[C],
  214	{ nonvar(C),
  215	  code_type(C, white)
  216	}.
  217
  218
  219		 /*******************************
  220		 *	 CHARACTER STUFF	*
  221		 *******************************/
  222
  223%%	alpha_to_lower(?C)// is semidet.
  224%
  225%	Read a letter (class  =alpha=)  and   return  it  as a lowercase
  226%	letter. If C is instantiated and the  DCG list is already bound,
  227%	C must be =lower= and matches both a lower and uppercase letter.
  228%	If the output list is unbound, its first element is bound to C.
  229%	For example:
  230%
  231%	  ==
  232%	  ?- alpha_to_lower(0'a, `AB`, R).
  233%	  R = [66].
  234%	  ?- alpha_to_lower(C, `AB`, R).
  235%	  C = 97, R = [66].
  236%	  ?- alpha_to_lower(0'a, L, R).
  237%	  L = [97|R].
  238%	  ==
  239
  240alpha_to_lower(L) -->
  241	[C],
  242	{   nonvar(C)
  243	->  code_type(C, alpha),
  244	    code_type(C, to_upper(L))
  245	;   L = C
  246	}.
  247
  248
  249		 /*******************************
  250		 *	      NUMBERS		*
  251		 *******************************/
  252
  253%%	digits(?Chars)// is det.
  254%%	digit(?Char)// is det.
  255%%	integer(?Integer)// is det.
  256%
  257%	Number processing. The predicate  digits//1   matches  a posibly
  258%	empty set of digits,  digit//1  processes   a  single  digit and
  259%	integer processes an  optional  sign   followed  by  a non-empty
  260%	sequence of digits into an integer.
  261
  262digits([H|T]) -->
  263	digit(H), !,
  264	digits(T).
  265digits([]) -->
  266	[].
  267
  268digit(C) -->
  269	[C],
  270	{ code_type(C, digit)
  271	}.
  272
  273integer(I, Head, Tail) :-
  274	nonvar(I), !,
  275	format(codes(Head, Tail), '~d', [I]).
  276integer(I) -->
  277	int_codes(Codes),
  278	{ number_codes(I, Codes)
  279	}.
  280
  281int_codes([C,D0|D]) -->
  282	sign(C), !,
  283	digit(D0),
  284	digits(D).
  285int_codes([D0|D]) -->
  286	digit(D0),
  287	digits(D).
  288
  289
  290%%	float(?Float)// is det.
  291%
  292%	Process a floating  point  number.   The  actual  conversion  is
  293%	controlled by number_codes/2.
  294
  295float(F, Head, Tail) :-
  296	float(F), !,
  297	with_output_to(codes(Head, Tail), write(F)).
  298float(F) -->
  299	number(F),
  300	{ float(F) }.
  301
  302%%	number(+Number)// is det.
  303%%	number(-Number)// is semidet.
  304%
  305%	Generate extract a number. Handles   both  integers and floating
  306%	point numbers.
  307
  308number(N, Head, Tail) :-
  309	number(N), !,
  310	format(codes(Head, Tail), '~w', N).
  311number(N) -->
  312	int_codes(I),
  313	(   dot,
  314	    digit(DF0),
  315	    digits(DF)
  316	->  {F = [0'., DF0|DF]}
  317	;   {F = []}
  318	),
  319	(   exp
  320	->  int_codes(DI),
  321	    {E=[0'e|DI]}
  322	;   {E = []}
  323	),
  324	{ append([I, F, E], Codes),
  325	  number_codes(N, Codes)
  326	}.
  327
  328sign(0'-) --> "-".
  329sign(0'+) --> "+".
  330
  331dot --> ".".
  332
  333exp --> "e".
  334exp --> "E".
  335
  336		 /*******************************
  337		 *	    HEX NUMBERS		*
  338		 *******************************/
  339
  340%%	xinteger(+Integer)// is det.
  341%%	xinteger(-Integer)// is semidet.
  342%
  343%	Generate or extract an integer from   a  sequence of hexadecimal
  344%	digits.
  345
  346xinteger(Val, Head, Tail) :-
  347	integer(Val),
  348	format(codes(Head, Tail), '~16r', [Val]).
  349xinteger(Val) -->
  350	xdigit(D0),
  351	xdigits(D),
  352	{ mkval([D0|D], 16, Val)
  353	}.
  354
  355%%	xdigit(-Weight)// is semidet.
  356%
  357%	True if the next code is a  hexdecimal digit with Weight. Weight
  358%	is between 0 and 15.
  359
  360xdigit(D) -->
  361	[C],
  362	{ code_type(C, xdigit(D))
  363	}.
  364
  365%%	xdigits(-WeightList)// is det.
  366%
  367%	List of weights of a sequence of hexadecimal codes.  WeightList
  368%	may be empty.
  369
  370xdigits([D0|D]) -->
  371	xdigit(D0), !,
  372	xdigits(D).
  373xdigits([]) -->
  374	[].
  375
  376mkval([W0|Weights], Base, Val) :-
  377	mkval(Weights, Base, W0, Val).
  378
  379mkval([], _, W, W).
  380mkval([H|T], Base, W0, W) :-
  381	W1 is W0*Base+H,
  382	mkval(T, Base, W1, W).
  383
  384
  385		 /*******************************
  386		 *	   END-OF-STRING	*
  387		 *******************************/
  388
  389%%	eos//
  390%
  391%	Matches  end-of-input.  The  implementation    behaves   as  the
  392%	following portable implementation:
  393%
  394%	  ==
  395%	  eos --> call(eos_).
  396%	  eos_([], []).
  397%	  ==
  398%
  399%	@tbd	This is a difficult concept and violates the _context free_
  400%		property of DCGs.  Explain the exact problems.
  401
  402eos([], []).
  403
  404%%	remainder(-List)//
  405%
  406%	Unify List with the remainder of the input.
  407
  408remainder(List, List, []).
  409
  410
  411		 /*******************************
  412		 *	   PROLOG SYNTAX		*
  413		 *******************************/
  414
  415%%	prolog_var_name(-Name:atom)// is semidet.
  416%
  417%	Matches a Prolog variable name. Primarily  intended to deal with
  418%	quasi quotations that embed Prolog variables.
  419
  420prolog_var_name(Name) -->
  421	[C0], { code_type(C0, prolog_var_start) }, !,
  422	prolog_id_cont(CL),
  423	{ atom_codes(Name, [C0|CL]) }.
  424
  425prolog_id_cont([H|T]) -->
  426	[H], { code_type(H, prolog_identifier_continue) }, !,
  427	prolog_id_cont(T).
  428prolog_id_cont([]) --> "".
  429
  430
  431		 /*******************************
  432		 *	     GENERATION		*
  433		 *******************************/
  434
  435%%	atom(++Atom)// is det.
  436%
  437%	Generate codes of Atom.  Current implementation uses write/1,
  438%	dealing with any Prolog term.  Atom must be ground though.
  439
  440atom(Atom, Head, Tail) :-
  441	must_be(ground, Atom),
  442	format(codes(Head, Tail), '~w', [Atom])