1/* Part of SWI-Prolog 2 3 Author: Jan Wielemaker 4 E-mail: J.Wielemaker@vu.nl 5 WWW: http://www.swi-prolog.org 6 Copyright (c) 2009-2017, VU University Amsterdam 7 All rights reserved. 8 9 Redistribution and use in source and binary forms, with or without 10 modification, are permitted provided that the following conditions 11 are met: 12 13 1. Redistributions of source code must retain the above copyright 14 notice, this list of conditions and the following disclaimer. 15 16 2. Redistributions in binary form must reproduce the above copyright 17 notice, this list of conditions and the following disclaimer in 18 the documentation and/or other materials provided with the 19 distribution. 20 21 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 24 FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 25 COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 26 INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 27 BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 28 LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 29 CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 30 LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN 31 ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 32 POSSIBILITY OF SUCH DAMAGE. 33*/ 34 35:- module(csv, 36 [ csv//1, % +Rows 37 csv//2, % +Rows, +Options 38 39 csv_read_file/2, % +File, -Data 40 csv_read_file/3, % +File, -Data, +Options 41 csv_read_file_row/3, % +File, -Row, +Options 42 csv_read_row/3, % +Stream, -Row, +CompiledOptions 43 csv_options/2, % -Compiled, +Options 44 45 csv_write_file/2, % +File, +Data 46 csv_write_file/3, % +File, +Data, +Options 47 csv_write_stream/3 % +Stream, +Data, +Options 48 ]). 49:- use_module(library(record)). 50:- use_module(library(error)). 51:- use_module(library(pure_input)). 52:- use_module(library(debug)). 53:- use_module(library(option)).
68:- predicate_options(csv//2, 2, 69 [ separator(nonneg), % mustv be code 70 strip(boolean), 71 ignore_quotes(boolean), 72 convert(boolean), 73 case(oneof([down,preserve,up])), 74 functor(atom), 75 arity(-nonneg), % actually ?nonneg 76 match_arity(boolean) 77 ]). 78:- predicate_options(csv_read_file/3, 3, 79 [ pass_to(csv//2, 2), 80 pass_to(phrase_from_file/3, 3) 81 ]). 82:- predicate_options(csv_read_file_row/3, 3, 83 [ pass_to(csv//2, 2), 84 pass_to(open/4, 4) 85 ]). 86:- predicate_options(csv_write_file/3, 3, 87 [ pass_to(csv//2, 2), 88 pass_to(open/4, 4) 89 ]). 90:- predicate_options(csv_write_stream/3, 3, 91 [ pass_to(csv//2, 2) 92 ]). 93 94 95:- record 96 csv_options(separator:integer=0',, 97 strip:boolean=false, 98 ignore_quotes:boolean=false, 99 convert:boolean=true, 100 case:oneof([down,preserve,up])=preserve, 101 functor:atom=row, 102 arity:integer, 103 match_arity:boolean=true).
\t
for
.tsv
files and ,
otherwise.
Suppose we want to create a predicate table/6 from a CSV file
that we know contains 6 fields per record. This can be done
using the code below. Without the option arity(6)
, this would
generate a predicate table/N, where N is the number of fields
per record in the data.
?- csv_read_file(File, Rows, [functor(table), arity(6)]), maplist(assert, Rows).
127csv_read_file(File, Rows) :- 128 csv_read_file(File, Rows, []). 129 130csv_read_file(File, Rows, Options) :- 131 default_separator(File, Options, Options1), 132 make_csv_options(Options1, Record, RestOptions), 133 phrase_from_file(csv_roptions(Rows, Record), File, RestOptions). 134 135 136default_separator(File, Options0, Options) :- 137 ( option(separator(_), Options0) 138 -> Options = Options0 139 ; file_name_extension(_, Ext0, File), 140 downcase_atom(Ext0, Ext), 141 ext_separator(Ext, Sep) 142 -> Options = [separator(Sep)|Options0] 143 ; Options = Options0 144 ). 145 146ext_separator(csv, 0',). 147ext_separator(tsv, 0'\t).
separator(0';)
parses
a semicolon separated file.true
(default false), threat double quotes as a normal
character.true
(default false
), strip leading and trailing
blank space. RFC4180 says that blank space is part of the
data.true
(default), use name/2 on the field data. This
translates the field into a number if possible.down
, downcase atomic values. If up
, upcase them
and if preserve
(default), do not change the case.row
.domain_error(row_arity(Expected), Found)
if a row is
found with different arity.false
(default true
), do not reject CSV files where
lines provide a varying number of fields (columns). This
can be a work-around to use some incorrect CSV files.191csv(Rows) --> 192 csv(Rows, []). 193 194csv(Rows, Options) --> 195 { make_csv_options(Options, Record, _) }, 196 csv_roptions(Rows, Record). 197 198csv_roptions(Rows, Record) --> 199 { ground(Rows) }, 200 !, 201 emit_csv(Rows, Record). 202csv_roptions(Rows, Record) --> 203 csv_data(Rows, Record). 204 205csv_data([], _) --> 206 eof, 207 !. 208csv_data([Row|More], Options) --> 209 row(Row, Options), 210 !, 211 { debug(csv, 'Row: ~p', [Row]) }, 212 csv_data(More, Options). 213 214eof([], []). 215 216row(Row, Options) --> 217 fields(Fields, Options), 218 { csv_options_functor(Options, Functor), 219 Row =.. [Functor|Fields], 220 functor(Row, _, Arity), 221 check_arity(Options, Arity) 222 }. 223 224check_arity(Options, Arity) :- 225 csv_options_arity(Options, Arity), 226 !. 227check_arity(Options, _) :- 228 csv_options_match_arity(Options, false), 229 !. 230check_arity(Options, Arity) :- 231 csv_options_arity(Options, Expected), 232 domain_error(row_arity(Expected), Arity). 233 234fields([F|T], Options) --> 235 field(F, Options), 236 ( separator(Options) 237 -> fields(T, Options) 238 ; end_of_record 239 -> { T = [] } 240 ). 241 242field(Value, Options) --> 243 "\"", 244 { csv_options_ignore_quotes(Options, false) }, 245 !, 246 string_codes(Codes), 247 { make_value(Codes, Value, Options) }. 248field(Value, Options) --> 249 { csv_options_strip(Options, true) }, 250 !, 251 stripped_field(Value, Options). 252field(Value, Options) --> 253 { csv_options_separator(Options, Sep) }, 254 field_codes(Codes, Sep), 255 { make_value(Codes, Value, Options) }. 256 257 258stripped_field(Value, Options) --> 259 ws, 260 ( "\"", 261 { csv_options_strip(Options, false) } 262 -> string_codes(Codes), 263 ws 264 ; { csv_options_separator(Options, Sep) }, 265 field_codes(Codes0, Sep), 266 { strip_trailing_ws(Codes0, Codes) } 267 ), 268 { make_value(Codes, Value, Options) }. 269 270ws --> " ", !, ws. 271ws --> "\t", !, ws. 272ws --> "". 273 274strip_trailing_ws(List, Stripped) :- 275 append(Stripped, WS, List), 276 all_ws(WS). 277 278all_ws([]). 279all_ws([32|T]) :- all_ws(T). 280all_ws([9|T]) :- all_ws(T).
288string_codes(List) --> 289 [H], 290 ( { H == 0'" } 291 -> ( "\"" 292 -> { List = [H|T] }, 293 string_codes(T) 294 ; { List = [] } 295 ) 296 ; { List = [H|T] }, 297 string_codes(T) 298 ). 299 300field_codes([], Sep), [Sep] --> [Sep], !. 301field_codes([], _), "\n" --> "\r\n", !. 302field_codes([], _), "\n" --> "\n", !. 303field_codes([H|T], Sep) --> [H], !, field_codes(T, Sep). 304field_codes([], _) --> []. % unterminated last record
311make_value(Codes, Value, Options) :- 312 csv_options_convert(Options, Convert), 313 csv_options_case(Options, Case), 314 make_value(Convert, Case, Codes, Value). 315 316make_value(true, preserve, Codes, Value) :- 317 !, 318 name(Value, Codes). 319make_value(true, Case, Codes, Value) :- 320 !, 321 ( number_string(Value, Codes) 322 -> true 323 ; make_value(false, Case, Codes, Value) 324 ). 325make_value(false, preserve, Codes, Value) :- 326 !, 327 atom_codes(Value, Codes). 328make_value(false, down, Codes, Value) :- 329 !, 330 string_codes(String, Codes), 331 downcase_atom(String, Value). 332make_value(false, up, Codes, Value) :- 333 string_codes(String, Codes), 334 upcase_atom(String, Value). 335 336separator(Options) --> 337 { csv_options_separator(Options, Sep) }, 338 [Sep]. 339 340end_of_record --> "\n". % Unix files 341end_of_record --> "\r\n". % DOS files 342end_of_record --> "\r". % MacOS files 343end_of_record --> eof. % unterminated last record
In addition to the options of csv_read_file/3, this predicate processes the option:
367csv_read_file_row(File, Row, Options) :- 368 default_separator(File, Options, Options1), 369 make_csv_options(Options1, RecordOptions, Options2), 370 select_option(line(Line), Options2, RestOptions, _), 371 setup_call_cleanup( 372 open(File, read, Stream, RestOptions), 373 csv_read_stream_row(Stream, Row, Line, RecordOptions), 374 close(Stream)). 375 376csv_read_stream_row(Stream, Row, Line, Options) :- 377 between(1, infinite, Line), 378 ( csv_read_row(Stream, Row0, Options), 379 Row0 \== end_of_file 380 -> Row = Row0 381 ; !, 382 fail 383 ).
end_of_file
upon reaching the
end of the input.393csv_read_row(Stream, Row, _Record) :- 394 at_end_of_stream(Stream), 395 !, 396 Row = end_of_file. 397csv_read_row(Stream, Row, Record) :- 398 read_lines_to_codes(Stream, Codes), 399 phrase(row(Row0, Record), Codes), 400 !, 401 Row = Row0. 402 403read_lines_to_codes(Stream, Codes) :- 404 read_line_to_codes(Stream, Codes, Tail), 405 ( Tail == [] 406 -> true 407 ; Tail = [] 408 ; read_lines_to_codes(Stream, Tail) 409 ).
419csv_options(Compiled, Options) :- 420 make_csv_options(Options, Compiled, _Ignored). 421 422 423 /******************************* 424 * OUTPUT * 425 *******************************/
\t
for
.tsv
files and ,
otherwise.435csv_write_file(File, Data) :- 436 csv_write_file(File, Data, []). 437 438csv_write_file(File, Data, Options) :- 439 must_be(list, Data), 440 default_separator(File, Options, Options1), 441 make_csv_options(Options1, Record, RestOptions), 442 phrase(emit_csv(Data, Record), String), 443 setup_call_cleanup( 444 open(File, write, Out, RestOptions), 445 format(Out, '~s', [String]), 446 close(Out)). 447 448 449emit_csv([], _) --> []. 450emit_csv([H|T], Options) --> 451 emit_row(H, Options), "\r\n", % RFC 4180 demands \r\n 452 emit_csv(T, Options). 453 454emit_row(Row, Options) --> 455 { Row =.. [_|Fields] }, 456 emit_fields(Fields, Options). 457 458emit_fields([H|T], Options) --> 459 emit_field(H, Options), 460 ( { T == [] } 461 -> [] 462 ; { csv_options_separator(Options, Sep) }, 463 [Sep], 464 emit_fields(T, Options) 465 ). 466 467emit_field(H, Options) --> 468 { ( atom(H) 469 -> atom_codes(H, Codes) 470 ; string(H) 471 -> string_codes(H, Codes) 472 ) 473 }, 474 !, 475 ( { needs_quotes(H, Options) } 476 -> "\"", emit_string(Codes), "\"" 477 ; emit_codes(Codes) 478 ). 479emit_field([], _) --> 480 !, 481 { atom_codes('[]', Codes) }, 482 emit_codes(Codes). 483emit_field(H, _) --> 484 { number_codes(H,Codes) }, 485 emit_codes(Codes). 486 487needs_quotes(Atom, _) :- 488 sub_atom(Atom, _, _, _, '"'), 489 !. 490needs_quotes(Atom, _) :- 491 sub_atom(Atom, _, _, _, '\n'), 492 !. 493needs_quotes(Atom, _) :- 494 sub_atom(Atom, _, _, _, '\r'), 495 !. 496needs_quotes(Atom, Options) :- 497 csv_options_separator(Options, Sep), 498 char_code(Char, Sep), 499 sub_atom(Atom, _, _, _, Char), 500 !. 501 502emit_string([]) --> "". 503emit_string([0'"|T]) --> !, "\"\"", emit_string(T). 504emit_string([H|T]) --> [H], emit_string(T). 505 506emit_codes([]) --> "". 507emit_codes([0'"|T]) --> !, "\"\"", emit_codes(T). 508emit_codes([H|T]) --> [H], emit_codes(T).
save_data(File) :- setup_call_cleanup( open(File, write, Out), forall(data(C1,C2,C3), csv_write_stream(Out, [row(C1,C2,C3)], [])), close(Out)),
527csv_write_stream(Stream, Data, Options) :-
528 must_be(list, Data),
529 make_csv_options(Options, Record, _),
530 phrase(emit_csv(Data, Record), String),
531 format(Stream, '~s', [String])
Process CSV (Comma-Separated Values) data
This library parses and generates CSV data. CSV data is represented in Prolog as a list of rows. Each row is a compound term, where all rows have the same name and arity.