1/* Part of SWI-Prolog 2 3 Author: Jan Wielemaker and Matt Lilley 4 E-mail: J.Wielemaker@cs.vu.nl 5 WWW: http://www.swi-prolog.org 6 Copyright (c) 2012-2016, VU University Amsterdam 7 All rights reserved. 8 9 Redistribution and use in source and binary forms, with or without 10 modification, are permitted provided that the following conditions 11 are met: 12 13 1. Redistributions of source code must retain the above copyright 14 notice, this list of conditions and the following disclaimer. 15 16 2. Redistributions in binary form must reproduce the above copyright 17 notice, this list of conditions and the following disclaimer in 18 the documentation and/or other materials provided with the 19 distribution. 20 21 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 24 FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 25 COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 26 INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 27 BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 28 LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 29 CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 30 LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN 31 ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 32 POSSIBILITY OF SUCH DAMAGE. 33*/ 34 35:- module(archive, 36 [ archive_open/3, % +Stream, -Archive, +Options 37 archive_open/4, % +Stream, +Mode, -Archive, +Options 38 archive_create/3, % +OutputFile, +InputFileList, +Options 39 archive_close/1, % +Archive 40 archive_property/2, % +Archive, ?Property 41 archive_next_header/2, % +Archive, -Name 42 archive_open_entry/2, % +Archive, -EntryStream 43 archive_header_property/2, % +Archive, ?Property 44 archive_set_header_property/2, % +Archive, +Property 45 archive_extract/3, % +Archive, +Dir, +Options 46 47 archive_entries/2, % +Archive, -Entries 48 archive_data_stream/3 % +Archive, -DataStream, +Options 49 ]). 50:- use_module(library(error)). 51:- use_module(library(option)). 52:- use_module(library(filesex)). 53 54/** <module> Access several archive formats 55 56This library uses _libarchive_ to access a variety of archive formats. 57The following example lists the entries in an archive: 58 59 == 60 list_archive(File) :- 61 archive_open(File, Archive, []), 62 repeat, 63 ( archive_next_header(Archive, Path) 64 -> format('~w~n', [Path]), 65 fail 66 ; !, 67 archive_close(Archive) 68 ). 69 == 70 71@see http://code.google.com/p/libarchive/ 72*/ 73 74:- use_foreign_library(foreign(archive4pl)). 75 76archive_open(Stream, Archive, Options) :- 77 archive_open(Stream, read, Archive, Options). 78 79:- predicate_options(archive_open/4, 4, 80 [ close_parent(boolean), 81 filter(oneof([all,bzip2,compress,gzip,grzip,lrzip, 82 lzip,lzma,lzop,none,rpm,uu,xz])), 83 format(oneof([all,'7zip',ar,cab,cpio,empty,gnutar, 84 iso9660,lha,mtree,rar,raw,tar,xar,zip])) 85 ]). 86:- predicate_options(archive_create/3, 3, 87 [ directory(atom), 88 pass_to(archive_open/4, 4) 89 ]). 90 91%! archive_open(+Data, +Mode, -Archive, +Options) is det. 92% 93% Open the archive in Data and unify Archive with a handle to the 94% opened archive. Data is either a file or a stream that contains 95% a valid archive. Details are controlled by Options. Typically, 96% the option close_parent(true) is used to close stream if the 97% archive is closed using archive_close/1. For other options, the 98% defaults are typically fine. The option format(raw) must be used 99% to process compressed streams that do not contain explicit 100% entries (e.g., gzip'ed data) unambibuously. The =raw= format 101% creates a _pseudo archive_ holding a single member named =data=. 102% 103% * close_parent(+Boolean) 104% If this option is =true= (default =false=), Stream is closed 105% if archive_close/1 is called on Archive. 106% 107% * compression(+Compression) 108% Synomym for filter(Compression). Deprecated. 109% 110% * filter(+Filter) 111% Support the indicated filter. This option may be 112% used multiple times to support multiple filters. In read mode, 113% If no filter options are provided, =all= is assumed. In write 114% mode, none is assumed. 115% Supported values are =all=, =bzip2=, =compress=, =gzip=, 116% =grzip=, =lrzip=, =lzip=, =lzma=, =lzop=, =none=, =rpm=, =uu= 117% and =xz=. The value =all= is default for read, =none= for write. 118% 119% * format(+Format) 120% Support the indicated format. This option may be used 121% multiple times to support multiple formats in read mode. 122% In write mode, you must supply a single format. If no format 123% options are provided, =all= is assumed for read mode. Note that 124% =all= does *not* include =raw=. To open both archive 125% and non-archive files, _both_ format(all) and 126% format(raw) must be specified. Supported values are: =all=, 127% =7zip=, =ar=, =cab=, =cpio=, =empty=, =gnutar=, =iso9660=, 128% =lha=, =mtree=, =rar=, =raw=, =tar=, =xar= and =zip=. The 129% value =all= is default for read. 130% 131% Note that the actually supported compression types and formats 132% may vary depending on the version and installation options of 133% the underlying libarchive library. This predicate raises a 134% domain error if the (explicitly) requested format is not 135% supported. 136% 137% @error domain_error(filter, Filter) if the requested 138% filter is not supported. 139% @error domain_error(format, Format) if the requested 140% format type is not supported. 141 142archive_open(stream(Stream), Mode, Archive, Options) :- 143 !, 144 archive_open_stream(Stream, Mode, Archive, Options). 145archive_open(Stream, Mode, Archive, Options) :- 146 is_stream(Stream), 147 !, 148 archive_open_stream(Stream, Mode, Archive, Options). 149archive_open(File, Mode, Archive, Options) :- 150 open(File, Mode, Stream, [type(binary)]), 151 catch(archive_open_stream(Stream, Mode, Archive, [close_parent(true)|Options]), 152 E, (close(Stream, [force(true)]), throw(E))). 153 154 155%! archive_close(+Archive) is det. 156% 157% Close the archive. If close_parent(true) is specified, the 158% underlying stream is closed too. If there is an entry opened 159% with archive_open_entry/2, actually closing the archive is 160% delayed until the stream associated with the entry is closed. 161% This can be used to open a stream to an archive entry without 162% having to worry about closing the archive: 163% 164% == 165% archive_open_named(ArchiveFile, EntryName, Stream) :- 166% archive_open(ArchiveFile, Handle, []), 167% archive_next_header(Handle, Name), 168% archive_open_entry(Handle, Stream), 169% archive_close(Archive). 170% == 171 172 173%! archive_property(+Handle, ?Property) is nondet. 174% 175% True when Property is a property of the archive Handle. Defined 176% properties are: 177% 178% * filters(List) 179% True when the indicated filters are applied before reaching 180% the archive format. 181 182archive_property(Handle, Property) :- 183 defined_archive_property(Property), 184 Property =.. [Name,Value], 185 archive_property(Handle, Name, Value). 186 187defined_archive_property(filter(_)). 188 189 190%! archive_next_header(+Handle, -Name) is semidet. 191% 192% Forward to the next entry of the archive for which Name unifies 193% with the pathname of the entry. Fails silently if the name of 194% the archive is reached before success. Name is typically 195% specified if a single entry must be accessed and unbound 196% otherwise. The following example opens a Prolog stream to a 197% given archive entry. Note that _Stream_ must be closed using 198% close/1 and the archive must be closed using archive_close/1 199% after the data has been used. See also setup_call_cleanup/3. 200% 201% == 202% open_archive_entry(ArchiveFile, Entry, Stream) :- 203% open(ArchiveFile, read, In, [type(binary)]), 204% archive_open(In, Archive, [close_parent(true)]), 205% archive_next_header(Archive, Entry), 206% archive_open_entry(Archive, Stream). 207% == 208% 209% @error permission_error(next_header, archive, Handle) if a 210% previously opened entry is not closed. 211 212%! archive_open_entry(+Archive, -Stream) is det. 213% 214% Open the current entry as a stream. Stream must be closed. 215% If the stream is not closed before the next call to 216% archive_next_header/2, a permission error is raised. 217 218 219%! archive_set_header_property(+Archive, +Property) 220% 221% Set Property of the current header. Write-mode only. Defined 222% properties are: 223% 224% * filetype(-Type) 225% Type is one of =file=, =link=, =socket=, =character_device=, 226% =block_device=, =directory= or =fifo=. It appears that this 227% library can also return other values. These are returned as 228% an integer. 229% * mtime(-Time) 230% True when entry was last modified at time. 231% * size(-Bytes) 232% True when entry is Bytes long. 233% * link_target(-Target) 234% Target for a link. Currently only supported for symbolic 235% links. 236 237%! archive_header_property(+Archive, ?Property) 238% 239% True when Property is a property of the current header. Defined 240% properties are: 241% 242% * filetype(-Type) 243% Type is one of =file=, =link=, =socket=, =character_device=, 244% =block_device=, =directory= or =fifo=. It appears that this 245% library can also return other values. These are returned as 246% an integer. 247% * mtime(-Time) 248% True when entry was last modified at time. 249% * size(-Bytes) 250% True when entry is Bytes long. 251% * link_target(-Target) 252% Target for a link. Currently only supported for symbolic 253% links. 254% * format(-Format) 255% Provides the name of the archive format applicable to the 256% current entry. The returned value is the lowercase version 257% of the output of archive_format_name(). 258% * permissions(-Integer) 259% True when entry has the indicated permission mask. 260 261archive_header_property(Archive, Property) :- 262 ( nonvar(Property) 263 -> true 264 ; header_property(Property) 265 ), 266 archive_header_prop_(Archive, Property). 267 268header_property(filetype(_)). 269header_property(mtime(_)). 270header_property(size(_)). 271header_property(link_target(_)). 272header_property(format(_)). 273header_property(permissions(_)). 274 275 276%! archive_extract(+ArchiveFile, +Dir, +Options) 277% 278% Extract files from the given archive into Dir. Supported 279% options: 280% 281% * remove_prefix(+Prefix) 282% Strip Prefix from all entries before extracting 283% * exclude(+ListOfPatterns) 284% Ignore members that match one of the given patterns. 285% Patterns are handed to wildcard_match/2. 286% 287% @error existence_error(directory, Dir) if Dir does not exist 288% or is not a directory. 289% @error domain_error(path_prefix(Prefix), Path) if a path in 290% the archive does not start with Prefix 291% @tbd Add options 292 293archive_extract(Archive, Dir, Options) :- 294 ( exists_directory(Dir) 295 -> true 296 ; existence_error(directory, Dir) 297 ), 298 setup_call_cleanup( 299 archive_open(Archive, Handle, Options), 300 extract(Handle, Dir, Options), 301 archive_close(Handle)). 302 303extract(Archive, Dir, Options) :- 304 archive_next_header(Archive, Path), 305 !, 306 ( archive_header_property(Archive, filetype(file)), 307 \+ excluded(Path, Options) 308 -> archive_header_property(Archive, permissions(Perm)), 309 ( option(remove_prefix(Remove), Options) 310 -> ( atom_concat(Remove, ExtractPath, Path) 311 -> true 312 ; domain_error(path_prefix(Remove), Path) 313 ) 314 ; ExtractPath = Path 315 ), 316 directory_file_path(Dir, ExtractPath, Target), 317 file_directory_name(Target, FileDir), 318 make_directory_path(FileDir), 319 setup_call_cleanup( 320 archive_open_entry(Archive, In), 321 setup_call_cleanup( 322 open(Target, write, Out, [type(binary)]), 323 copy_stream_data(In, Out), 324 close(Out)), 325 close(In)), 326 set_permissions(Perm, Target) 327 ; true 328 ), 329 extract(Archive, Dir, Options). 330extract(_, _, _). 331 332excluded(Path, Options) :- 333 option(exclude(Patterns), Options), 334 split_string(Path, "/", "/", Parts), 335 member(Segment, Parts), 336 Segment \== "", 337 member(Pattern, Patterns), 338 wildcard_match(Pattern, Segment). 339 340 341%! set_permissions(+Perm:integer, +Target:atom) 342% 343% Restore the permissions. Currently only restores the executable 344% permission. 345 346set_permissions(Perm, Target) :- 347 Perm /\ 0o100 =\= 0, 348 !, 349 '$mark_executable'(Target). 350set_permissions(_, _). 351 352 353 /******************************* 354 * HIGH LEVEL PREDICATES * 355 *******************************/ 356 357%! archive_entries(+Archive, -Paths) is det. 358% 359% True when Paths is a list of pathnames appearing in Archive. 360 361archive_entries(Archive, Paths) :- 362 setup_call_cleanup( 363 archive_open(Archive, Handle, []), 364 contents(Handle, Paths), 365 archive_close(Handle)). 366 367contents(Handle, [Path|T]) :- 368 archive_next_header(Handle, Path), 369 !, 370 contents(Handle, T). 371contents(_, []). 372 373%! archive_data_stream(+Archive, -DataStream, +Options) is nondet. 374% 375% True when DataStream is a stream to a data object inside 376% Archive. This predicate transparently unpacks data inside 377% _possibly nested_ archives, e.g., a _tar_ file inside a _zip_ 378% file. It applies the appropriate decompression filters and thus 379% ensures that Prolog reads the plain data from DataStream. 380% DataStream must be closed after the content has been processed. 381% Backtracking opens the next member of the (nested) archive. This 382% predicate processes the following options: 383% 384% - meta_data(-Data:list(dict)) 385% If provided, Data is unified with a list of filters applied to 386% the (nested) archive to open the current DataStream. The first 387% element describes the outermost archive. Each Data dict 388% contains the header properties (archive_header_property/2) as 389% well as the keys: 390% 391% - filters(Filters:list(atom)) 392% Filter list as obtained from archive_property/2 393% - name(Atom) 394% Name of the entry. 395% 396% Non-archive files are handled as pseudo-archives that hold a 397% single stream. This is implemented by using archive_open/3 with 398% the options `[format(all),format(raw)]`. 399 400archive_data_stream(Archive, DataStream, Options) :- 401 option(meta_data(MetaData), Options, _), 402 archive_content(Archive, DataStream, MetaData, []). 403 404archive_content(Archive, Entry, [EntryMetadata|PipeMetadataTail], PipeMetadata2) :- 405 archive_property(Archive, filter(Filters)), 406 repeat, 407 ( archive_next_header(Archive, EntryName) 408 -> findall(EntryProperty, 409 archive_header_property(Archive, EntryProperty), 410 EntryProperties), 411 dict_create(EntryMetadata, archive_meta_data, 412 [ filters(Filters), 413 name(EntryName) 414 | EntryProperties 415 ]), 416 ( EntryMetadata.filetype == file 417 -> archive_open_entry(Archive, Entry0), 418 ( EntryName == data, 419 EntryMetadata.format == raw 420 -> % This is the last entry in this nested branch. 421 % We therefore close the choicepoint created by repeat/0. 422 % Not closing this choicepoint would cause 423 % archive_next_header/2 to throw an exception. 424 !, 425 PipeMetadataTail = PipeMetadata2, 426 Entry = Entry0 427 ; PipeMetadataTail = PipeMetadata1, 428 open_substream(Entry0, 429 Entry, 430 PipeMetadata1, 431 PipeMetadata2) 432 ) 433 ; fail 434 ) 435 ; !, 436 fail 437 ). 438 439open_substream(In, Entry, ArchiveMetadata, PipeTailMetadata) :- 440 setup_call_cleanup( 441 archive_open(stream(In), 442 Archive, 443 [ close_parent(true), 444 format(all), 445 format(raw) 446 ]), 447 archive_content(Archive, Entry, ArchiveMetadata, PipeTailMetadata), 448 archive_close(Archive)). 449 450 451%! archive_create(+OutputFile, +InputFiles, +Options) is det. 452% 453% Convenience predicate to create an archive in OutputFile with 454% data from a list of InputFiles and the given Options. 455% 456% Besides options supported by archive_open/4, the following 457% options are supported: 458% 459% * directory(+Directory) 460% Changes the directory before adding input files. If this is 461% specified, paths of input files must be relative to 462% Directory and archived files will not have Directory 463% as leading path. This is to simulate =|-C|= option of 464% the =tar= program. 465% 466% * format(+Format) 467% Write mode supports the following formats: `7zip`, `cpio`, 468% `gnutar`, `iso9660`, `xar` and `zip`. Note that a particular 469% installation may support only a subset of these, depending on 470% the configuration of `libarchive`. 471 472archive_create(OutputFile, InputFiles, Options) :- 473 must_be(list(text), InputFiles), 474 option(directory(BaseDir), Options, '.'), 475 setup_call_cleanup( 476 archive_open(OutputFile, write, Archive, Options), 477 archive_create_1(Archive, BaseDir, BaseDir, InputFiles, top), 478 archive_close(Archive)). 479 480archive_create_1(_, _, _, [], _) :- !. 481archive_create_1(Archive, Base, Current, ['.'|Files], sub) :- 482 !, 483 archive_create_1(Archive, Base, Current, Files, sub). 484archive_create_1(Archive, Base, Current, ['..'|Files], Where) :- 485 !, 486 archive_create_1(Archive, Base, Current, Files, Where). 487archive_create_1(Archive, Base, Current, [File|Files], Where) :- 488 directory_file_path(Current, File, Filename), 489 archive_create_2(Archive, Base, Filename), 490 archive_create_1(Archive, Base, Current, Files, Where). 491 492archive_create_2(Archive, Base, Directory) :- 493 exists_directory(Directory), 494 !, 495 entry_name(Base, Directory, Directory0), 496 archive_next_header(Archive, Directory0), 497 time_file(Directory, Time), 498 archive_set_header_property(Archive, mtime(Time)), 499 archive_set_header_property(Archive, filetype(directory)), 500 archive_open_entry(Archive, EntryStream), 501 close(EntryStream), 502 directory_files(Directory, Files), 503 archive_create_1(Archive, Base, Directory, Files, sub). 504archive_create_2(Archive, Base, Filename) :- 505 entry_name(Base, Filename, Filename0), 506 archive_next_header(Archive, Filename0), 507 size_file(Filename, Size), 508 time_file(Filename, Time), 509 archive_set_header_property(Archive, size(Size)), 510 archive_set_header_property(Archive, mtime(Time)), 511 setup_call_cleanup( 512 archive_open_entry(Archive, EntryStream), 513 setup_call_cleanup( 514 open(Filename, read, DataStream, [type(binary)]), 515 copy_stream_data(DataStream, EntryStream), 516 close(DataStream)), 517 close(EntryStream)). 518 519entry_name('.', Name, Name) :- !. 520entry_name(Base, Name, EntryName) :- 521 directory_file_path(Base, EntryName, Name)