View source with raw comments or as raw
    1:- module(rdf_cache,
    2          [ rdf_set_cache_options/1,    % +Options
    3            rdf_cache_file/3            % +URL, +RW, -File
    4          ]).    5:- use_module(library(error)).    6:- use_module(library(filesex)).

Cache RDF triples

The library library(semweb/rdf_cache) defines the caching strategy for triples sources. When using large RDF sources, caching triples greatly speedup loading RDF documents. The cache library implements two caching strategies that are controlled by rdf_set_cache_options/1.

Local caching This approach applies to files only. Triples are cached in a sub-directory of the directory holding the source. This directory is called .cache (_cache on Windows). If the cache option create_local_directory is true, a cache directory is created if posible.

Global caching This approach applies to all sources, except for unnamed streams. Triples are cached in directory defined by the cache option global_directory.

When loading an RDF file, the system scans the configured cache files unless cache(false) is specified as option to rdf_load/2 or caching is disabled. If caching is enabled but no cache exists, the system will try to create a cache file. First it will try to do this locally. On failure it will try to configured global cache. */

   32:- dynamic
   33    cache_option/1.   34
   35set_setfault_options :-
   36    assert(cache_option(enabled(true))),
   37    (   current_prolog_flag(windows, true)
   38    ->  assert(cache_option(local_directory('_cache')))
   39    ;   assert(cache_option(local_directory('.cache')))
   40    ).
   41
   42:- set_setfault_options.                % _only_ when loading!
 rdf_set_cache_options(+Options)
Change the cache policy. Provided options are:
   64rdf_set_cache_options([]) :- !.
   65rdf_set_cache_options([H|T]) :-
   66    !,
   67    rdf_set_cache_options(H),
   68    rdf_set_cache_options(T).
   69rdf_set_cache_options(Opt) :-
   70    functor(Opt, Name, Arity),
   71    arg(1, Opt, Value),
   72    (   cache_option(Name, Type)
   73    ->  must_be(Type, Value)
   74    ;   domain_error(cache_option, Opt)
   75    ),
   76    functor(Gen, Name, Arity),
   77    retractall(cache_option(Gen)),
   78    expand_option(Opt, EOpt),
   79    assert(cache_option(EOpt)).
   80
   81cache_option(enabled,                 boolean).
   82cache_option(local_directory,         atom).
   83cache_option(create_local_directory,  boolean).
   84cache_option(global_directory,        atom).
   85cache_option(create_global_directory, boolean).
   86
   87expand_option(global_directory(Local), global_directory(Global)) :-
   88    !,
   89    absolute_file_name(Local, Global).
   90expand_option(Opt, Opt).
 rdf_cache_file(+URL, +ReadWrite, -File) is semidet
File is the cache file for URL. If ReadWrite is read, it returns the name of an existing file. If write it returns where a new cache file can be overwritten or created.
   99rdf_cache_file(_URL, _, _File) :-
  100    cache_option(enabled(false)),
  101    !,
  102    fail.
  103rdf_cache_file(URL, read, File) :-
  104    !,
  105    (   atom_concat('file://', Path, URL),
  106        cache_option(local_directory(Local)),
  107        file_directory_name(Path, Dir),
  108        local_cache_file(URL, LocalFile),
  109        atomic_list_concat([Dir, Local, LocalFile], /, File)
  110    ;   cache_option(global_directory(Dir)),
  111        url_cache_file(URL, Dir, trp, read, File)
  112    ),
  113    access_file(File, read),
  114    !.
  115rdf_cache_file(URL, write, File) :-
  116    !,
  117    (   atom_concat('file://', Path, URL),
  118        cache_option(local_directory(Local)),
  119        file_directory_name(Path, Dir),
  120        (   cache_option(create_local_directory(true))
  121        ->  RWDir = write
  122        ;   RWDir = read
  123        ),
  124        ensure_dir(Dir, Local, RWDir, CacheDir),
  125        local_cache_file(URL, LocalFile),
  126        atomic_list_concat([CacheDir, LocalFile], /, File)
  127    ;   cache_option(global_directory(Dir)),
  128        ensure_global_cache(Dir),
  129        url_cache_file(URL, Dir, trp, write, File)
  130    ),
  131    access_file(File, write),
  132    !.
  133
  134
  135ensure_global_cache(Dir) :-
  136    exists_directory(Dir),
  137    !.
  138ensure_global_cache(Dir) :-
  139    cache_option(create_global_directory(true)),
  140    make_directory_path(Dir),
  141    print_message(informational, rdf(cache_created(Dir))).
  142
  143
  144                 /*******************************
  145                 *         LOCAL CACHE          *
  146                 *******************************/
 local_cache_file(+FileURL, -File) is det
Return the name of the cache file for FileURL. The name is the plain filename with the .trp extension. As the URL is a file URL, it is guaranteed to be a valid filename. Assumes the hosting OS can handle multiple exensions (.x.y) though. These days thats even true on Windows.
  156local_cache_file(URL, File) :-
  157    file_base_name(URL, Name),
  158    file_name_extension(Name, trp, File).
  159
  160
  161                 /*******************************
  162                 *         GLOBAL CACHE         *
  163                 *******************************/
 url_cache_file(+URL, +Dir, +Ext, +RW, -Path) is semidet
Determine location of cache-file for the given URL in Dir. If Ext is provided, the returned Path is ensured to have the specified extension.
Arguments:
RW- If read, no directories are created and the call fails if URL is not in the cache.
  174url_cache_file(URL, Dir, Ext, RW, Path) :-
  175    term_hash(URL, Hash0),
  176    Hash is Hash0 + 100000,         % make sure > 4 characters
  177    format(string(Hex), '~16r', [Hash]),
  178    sub_atom(Hex, _, 2, 0, L1),
  179    ensure_dir(Dir, L1, RW, Dir1),
  180    sub_atom(Hex, _, 2, 2, L2),
  181    ensure_dir(Dir1, L2, RW, Dir2),
  182    url_to_file(URL, File),
  183    ensure_ext(File, Ext, FileExt),
  184    atomic_list_concat([Dir2, /, FileExt], Path).
  185
  186ensure_dir(D0, Sub, RW, Dir) :-
  187    atomic_list_concat([D0, /, Sub], Dir),
  188    (   exists_directory(Dir)
  189    ->  true
  190    ;   RW == write
  191    ->  catch(make_directory(Dir), _, fail)
  192    ).
  193
  194ensure_ext(File, '', File) :- !.
  195ensure_ext(File, Ext, File) :-
  196    file_name_extension(_, Ext, File),
  197    !.
  198ensure_ext(File, Ext, FileExt) :-
  199    file_name_extension(File, Ext, FileExt).
 url_to_file(+URL, -File)
Convert a URL in something that fits in a file, i.e. avoiding / and :. We simply replace these by -. We could also use www_form_encode/2, but confusion when to replace as well as the fact that we loose the '.' (extension) makes this a less ideal choice. We could also consider base64 encoding of the name.
  209url_to_file(URL, File) :-
  210    atom_codes(URL, Codes),
  211    phrase(safe_file_name(Codes), FileCodes),
  212    atom_codes(File, FileCodes).
  213
  214safe_file_name([]) -->
  215    [].
  216safe_file_name([H|T]) -->
  217    replace(H),
  218    !,
  219    safe_file_name(T).
  220safe_file_name([H|T]) -->
  221    [H],
  222    safe_file_name(T).
 replace(+Code)//
Replace a character code that cannot safely be put in a filename. Should we use %XX?
  229replace(0'/)  --> "-".                  % directory separator
  230replace(0'\\) --> "-".                  % not allowed in Windows filename
  231replace(0':)  --> "-".                  % idem
  232replace(0'?)  --> "-".                  % idem
  233replace(0'*)  --> "-".                  % idem
  234
  235
  236                 /*******************************
  237                 *             MESSAGES         *
  238                 *******************************/
  239
  240:- multifile prolog:message/3.  241
  242prolog:message(rdf(cache_created(Dir))) -->
  243    [ 'Created RDF cache directory ~w'-[Dir] ]