View source with formatted comments or as raw
    1:- module(rdf_cache,
    2          [ rdf_set_cache_options/1,    % +Options
    3            rdf_cache_file/3            % +URL, +RW, -File
    4          ]).    5:- use_module(library(error)).    6:- use_module(library(filesex)).    7
    8/** <module> Cache RDF triples
    9
   10The library library(semweb/rdf_cache) defines the   caching strategy for
   11triples sources. When using large RDF   sources, caching triples greatly
   12speedup loading RDF documents. The cache  library implements two caching
   13strategies that are controlled by rdf_set_cache_options/1.
   14
   15*|Local caching|* This approach  applies  to   files  only.  Triples are
   16cached in a sub-directory of  the   directory  holding  the source. This
   17directory is called =|.cache|= (=|_cache|=  on   Windows).  If the cache
   18option =create_local_directory= is =true=, a  cache directory is created
   19if posible.
   20
   21*|Global caching|* This approach applies  to   all  sources,  except for
   22unnamed streams. Triples are cached in   directory  defined by the cache
   23option =global_directory=.
   24
   25When loading an RDF file, the system   scans  the configured cache files
   26unless cache(false) is specified as option   to rdf_load/2 or caching is
   27disabled. If caching is enabled but no cache exists, the system will try
   28to create a cache file. First it will try to do this locally. On failure
   29it will try to configured global cache.
   30*/
   31
   32:- dynamic
   33    cache_option/1.   34
   35set_setfault_options :-
   36    assert(cache_option(enabled(true))),
   37    (   current_prolog_flag(windows, true)
   38    ->  assert(cache_option(local_directory('_cache')))
   39    ;   assert(cache_option(local_directory('.cache')))
   40    ).
   41
   42:- set_setfault_options.                % _only_ when loading!
   43
   44%!  rdf_set_cache_options(+Options)
   45%
   46%   Change the cache policy.  Provided options are:
   47%
   48%     * enabled(Boolean)
   49%     If =true=, caching is enabled.
   50%
   51%     * local_directory(Name).
   52%     Plain name of local directory.  Default =|.cache|=
   53%     (=|_cache|= on Windows).
   54%
   55%     * create_local_directory(Bool)
   56%     If =true=, try to create local cache directories
   57%
   58%     * global_directory(Dir)
   59%     Writeable directory for storing cached parsed files.
   60%
   61%     * create_global_directory(Bool)
   62%     If =true=, try to create the global cache directory.
   63
   64rdf_set_cache_options([]) :- !.
   65rdf_set_cache_options([H|T]) :-
   66    !,
   67    rdf_set_cache_options(H),
   68    rdf_set_cache_options(T).
   69rdf_set_cache_options(Opt) :-
   70    functor(Opt, Name, Arity),
   71    arg(1, Opt, Value),
   72    (   cache_option(Name, Type)
   73    ->  must_be(Type, Value)
   74    ;   domain_error(cache_option, Opt)
   75    ),
   76    functor(Gen, Name, Arity),
   77    retractall(cache_option(Gen)),
   78    expand_option(Opt, EOpt),
   79    assert(cache_option(EOpt)).
   80
   81cache_option(enabled,                 boolean).
   82cache_option(local_directory,         atom).
   83cache_option(create_local_directory,  boolean).
   84cache_option(global_directory,        atom).
   85cache_option(create_global_directory, boolean).
   86
   87expand_option(global_directory(Local), global_directory(Global)) :-
   88    !,
   89    absolute_file_name(Local, Global).
   90expand_option(Opt, Opt).
   91
   92
   93%!  rdf_cache_file(+URL, +ReadWrite, -File) is semidet.
   94%
   95%   File is the cache file  for  URL.   If  ReadWrite  is =read=, it
   96%   returns the name of an existing file.  If =write= it returns
   97%   where a new cache file can be overwritten or created.
   98
   99rdf_cache_file(_URL, _, _File) :-
  100    cache_option(enabled(false)),
  101    !,
  102    fail.
  103rdf_cache_file(URL, read, File) :-
  104    !,
  105    (   atom_concat('file://', Path, URL),
  106        cache_option(local_directory(Local)),
  107        file_directory_name(Path, Dir),
  108        local_cache_file(URL, LocalFile),
  109        atomic_list_concat([Dir, Local, LocalFile], /, File)
  110    ;   cache_option(global_directory(Dir)),
  111        url_cache_file(URL, Dir, trp, read, File)
  112    ),
  113    access_file(File, read),
  114    !.
  115rdf_cache_file(URL, write, File) :-
  116    !,
  117    (   atom_concat('file://', Path, URL),
  118        cache_option(local_directory(Local)),
  119        file_directory_name(Path, Dir),
  120        (   cache_option(create_local_directory(true))
  121        ->  RWDir = write
  122        ;   RWDir = read
  123        ),
  124        ensure_dir(Dir, Local, RWDir, CacheDir),
  125        local_cache_file(URL, LocalFile),
  126        atomic_list_concat([CacheDir, LocalFile], /, File)
  127    ;   cache_option(global_directory(Dir)),
  128        ensure_global_cache(Dir),
  129        url_cache_file(URL, Dir, trp, write, File)
  130    ),
  131    access_file(File, write),
  132    !.
  133
  134
  135ensure_global_cache(Dir) :-
  136    exists_directory(Dir),
  137    !.
  138ensure_global_cache(Dir) :-
  139    cache_option(create_global_directory(true)),
  140    make_directory_path(Dir),
  141    print_message(informational, rdf(cache_created(Dir))).
  142
  143
  144                 /*******************************
  145                 *         LOCAL CACHE          *
  146                 *******************************/
  147
  148%!  local_cache_file(+FileURL, -File) is det.
  149%
  150%   Return the name of the cache file   for FileURL. The name is the
  151%   plain filename with the .trp extension.  As   the  URL is a file
  152%   URL, it is guaranteed  to  be   a  valid  filename.  Assumes the
  153%   hosting OS can handle  multiple   exensions  (=|.x.y|=)  though.
  154%   These days thats even true on Windows.
  155
  156local_cache_file(URL, File) :-
  157    file_base_name(URL, Name),
  158    file_name_extension(Name, trp, File).
  159
  160
  161                 /*******************************
  162                 *         GLOBAL CACHE         *
  163                 *******************************/
  164
  165%!  url_cache_file(+URL, +Dir, +Ext, +RW, -Path) is semidet.
  166%
  167%   Determine location of cache-file for the   given  URL in Dir. If
  168%   Ext is provided, the  returned  Path   is  ensured  to  have the
  169%   specified extension.
  170%
  171%   @param RW       If =read=, no directories are created and the call
  172%                   fails if URL is not in the cache.
  173
  174url_cache_file(URL, Dir, Ext, RW, Path) :-
  175    term_hash(URL, Hash0),
  176    Hash is Hash0 + 100000,         % make sure > 4 characters
  177    format(string(Hex), '~16r', [Hash]),
  178    sub_atom(Hex, _, 2, 0, L1),
  179    ensure_dir(Dir, L1, RW, Dir1),
  180    sub_atom(Hex, _, 2, 2, L2),
  181    ensure_dir(Dir1, L2, RW, Dir2),
  182    url_to_file(URL, File),
  183    ensure_ext(File, Ext, FileExt),
  184    atomic_list_concat([Dir2, /, FileExt], Path).
  185
  186ensure_dir(D0, Sub, RW, Dir) :-
  187    atomic_list_concat([D0, /, Sub], Dir),
  188    (   exists_directory(Dir)
  189    ->  true
  190    ;   RW == write
  191    ->  catch(make_directory(Dir), _, fail)
  192    ).
  193
  194ensure_ext(File, '', File) :- !.
  195ensure_ext(File, Ext, File) :-
  196    file_name_extension(_, Ext, File),
  197    !.
  198ensure_ext(File, Ext, FileExt) :-
  199    file_name_extension(File, Ext, FileExt).
  200
  201%!  url_to_file(+URL, -File)
  202%
  203%   Convert a URL in something that fits  in a file, i.e. avoiding /
  204%   and :. We  simply  replace  these  by   -.  We  could  also  use
  205%   www_form_encode/2, but confusion when to replace  as well as the
  206%   fact that we loose the '.' (extension)   makes this a less ideal
  207%   choice.  We could also consider base64 encoding of the name.
  208
  209url_to_file(URL, File) :-
  210    atom_codes(URL, Codes),
  211    phrase(safe_file_name(Codes), FileCodes),
  212    atom_codes(File, FileCodes).
  213
  214safe_file_name([]) -->
  215    [].
  216safe_file_name([H|T]) -->
  217    replace(H),
  218    !,
  219    safe_file_name(T).
  220safe_file_name([H|T]) -->
  221    [H],
  222    safe_file_name(T).
  223
  224%!  replace(+Code)//
  225%
  226%   Replace a character  code  that  cannot   safely  be  put  in  a
  227%   filename. Should we use %XX?
  228
  229replace(0'/)  --> "-".                  % directory separator
  230replace(0'\\) --> "-".                  % not allowed in Windows filename
  231replace(0':)  --> "-".                  % idem
  232replace(0'?)  --> "-".                  % idem
  233replace(0'*)  --> "-".                  % idem
  234
  235
  236                 /*******************************
  237                 *             MESSAGES         *
  238                 *******************************/
  239
  240:- multifile prolog:message/3.  241
  242prolog:message(rdf(cache_created(Dir))) -->
  243    [ 'Created RDF cache directory ~w'-[Dir] ]