View source with formatted comments or as raw
    1/*  Part of SWI-Prolog
    2
    3    Author:        Jan Wielemaker
    4    E-mail:        J.Wielemaker@vu.nl
    5    WWW:           http://www.swi-prolog.org
    6    Copyright (c)  2002-2016, University of Amsterdam
    7                              VU University Amsterdam
    8    All rights reserved.
    9
   10    Redistribution and use in source and binary forms, with or without
   11    modification, are permitted provided that the following conditions
   12    are met:
   13
   14    1. Redistributions of source code must retain the above copyright
   15       notice, this list of conditions and the following disclaimer.
   16
   17    2. Redistributions in binary form must reproduce the above copyright
   18       notice, this list of conditions and the following disclaimer in
   19       the documentation and/or other materials provided with the
   20       distribution.
   21
   22    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
   23    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
   24    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
   25    FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
   26    COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
   27    INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
   28    BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
   29    LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
   30    CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   31    LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
   32    ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
   33    POSSIBILITY OF SUCH DAMAGE.
   34*/
   35
   36:- module(http_open,
   37          [ http_open/3,                % +URL, -Stream, +Options
   38            http_set_authorization/2,   % +URL, +Authorization
   39            http_close_keep_alive/1     % +Address
   40          ]).   41:- use_module(library(uri)).   42:- use_module(library(readutil)).   43:- use_module(library(socket)).   44:- use_module(library(lists)).   45:- use_module(library(option)).   46:- use_module(library(error)).   47:- use_module(library(base64)).   48:- use_module(library(debug)).   49:- use_module(library(aggregate)).   50:- use_module(library(apply)).   51:- use_module(library(http/http_header), [http_parse_header/2]).   52:- use_module(library(http/http_stream)).   53
   54/** <module> HTTP client library
   55
   56This library defines http_open/3, which opens a  URL as a Prolog stream.
   57The functionality of the  library  can   be  extended  by  loading two
   58additional modules that act as plugins:
   59
   60    * library(http/http_ssl_plugin)
   61    Loading this library causes http_open/3 to handle HTTPS connections.
   62    Relevant options for SSL certificate handling are handed to
   63    ssl_context/3. This plugin is loaded automatically if the scheme
   64    `https` is requested using a default SSL context. See the plugin for
   65    additional information regarding security.
   66
   67    * library(http/http_cookie)
   68    Loading this library adds tracking cookies to http_open/3. Returned
   69    cookies are collected in the Prolog database and supplied for
   70    subsequent requests.
   71
   72Here is a simple example to fetch a web-page:
   73
   74  ==
   75  ?- http_open('http://www.google.com/search?q=prolog', In, []),
   76     copy_stream_data(In, user_output),
   77     close(In).
   78  <!doctype html><head><title>prolog - Google Search</title><script>
   79  ...
   80  ==
   81
   82The example below fetches the modification time of a web-page. Note that
   83Modified is '' (the empty atom)  if   the  web-server does not provide a
   84time-stamp for the resource. See also parse_time/2.
   85
   86  ==
   87  modified(URL, Stamp) :-
   88          http_open(URL, In,
   89                    [ method(head),
   90                      header(last_modified, Modified)
   91                    ]),
   92          close(In),
   93          Modified \== '',
   94          parse_time(Modified, Stamp).
   95  ==
   96
   97Then next example uses Google search. It exploits library(uri) to manage
   98URIs, library(sgml) to load  an  HTML   document  and  library(xpath) to
   99navigate the parsed HTML. Note that  you   may  need to adjust the XPath
  100queries if the data returned by Google changes.
  101
  102  ==
  103  :- use_module(library(http/http_open)).
  104  :- use_module(library(xpath)).
  105  :- use_module(library(sgml)).
  106  :- use_module(library(uri)).
  107
  108  google(For, Title, HREF) :-
  109          uri_encoded(query_value, For, Encoded),
  110          atom_concat('http://www.google.com/search?q=', Encoded, URL),
  111          http_open(URL, In, []),
  112          call_cleanup(
  113              load_html(In, DOM, []),
  114              close(In)),
  115          xpath(DOM, //h3(@class=r), Result),
  116          xpath(Result, //a(@href=HREF0, text), Title),
  117          uri_components(HREF0, Components),
  118          uri_data(search, Components, Query),
  119          uri_query_components(Query, Parts),
  120          memberchk(q=HREF, Parts).
  121  ==
  122
  123An example query is below:
  124
  125==
  126?- google(prolog, Title, HREF).
  127Title = 'SWI-Prolog',
  128HREF = 'http://www.swi-prolog.org/' ;
  129Title = 'Prolog - Wikipedia',
  130HREF = 'https://nl.wikipedia.org/wiki/Prolog' ;
  131Title = 'Prolog - Wikipedia, the free encyclopedia',
  132HREF = 'https://en.wikipedia.org/wiki/Prolog' ;
  133Title = 'Pro-Log is logistiek dienstverlener m.b.t. vervoer over water.',
  134HREF = 'http://www.pro-log.nl/' ;
  135Title = 'Learn Prolog Now!',
  136HREF = 'http://www.learnprolognow.org/' ;
  137Title = 'Free Online Version - Learn Prolog
  138...
  139==
  140
  141@see load_html/3 and xpath/3 can be used to parse and navigate HTML
  142     documents.
  143@see http_get/3 and http_post/4 provide an alternative interface that
  144     convert the reply depending on the =|Content-Type|= header.
  145*/
  146
  147:- multifile
  148    http:encoding_filter/3,           % +Encoding, +In0, -In
  149    http:current_transfer_encoding/1, % ?Encoding
  150    http:disable_encoding_filter/1,   % +ContentType
  151    http:http_protocol_hook/5,        % +Protocol, +Parts, +StreamPair,
  152                                      % -NewStreamPair, +Options
  153    http:open_options/2,              % +Parts, -Options
  154    http:write_cookies/3,             % +Out, +Parts, +Options
  155    http:update_cookies/3,            % +CookieLine, +Parts, +Options
  156    http:authenticate_client/2,       % +URL, +Action
  157    http:http_connection_over_proxy/6.  158
  159:- meta_predicate
  160    http_open(+,-,:).  161
  162:- predicate_options(http_open/3, 3,
  163                     [ authorization(compound),
  164                       final_url(-atom),
  165                       header(+atom, -atom),
  166                       headers(-list),
  167                       connection(+atom),
  168                       method(oneof([delete,get,put,head,post,patch,options])),
  169                       size(-integer),
  170                       status_code(-integer),
  171                       output(-stream),
  172                       timeout(number),
  173                       proxy(atom, integer),
  174                       proxy_authorization(compound),
  175                       bypass_proxy(boolean),
  176                       request_header(any),
  177                       user_agent(atom),
  178                       version(-compound),
  179        % The option below applies if library(http/http_header) is loaded
  180                       post(any),
  181        % The options below apply if library(http/http_ssl_plugin)) is loaded
  182                       pem_password_hook(callable),
  183                       cacert_file(atom),
  184                       cert_verify_hook(callable)
  185                     ]).  186
  187%!  user_agent(-Agent) is det.
  188%
  189%   Default value for =|User-Agent|=,  can   be  overruled using the
  190%   option user_agent(Agent) of http_open/3.
  191
  192user_agent('SWI-Prolog').
  193
  194%!  http_open(+URL, -Stream, +Options) is det.
  195%
  196%   Open the data at the HTTP  server   as  a  Prolog stream. URL is
  197%   either an atom  specifying  a  URL   or  a  list  representing a
  198%   broken-down  URL  as  specified  below.   After  this  predicate
  199%   succeeds the data can be read from Stream. After completion this
  200%   stream must be  closed  using   the  built-in  Prolog  predicate
  201%   close/1. Options provides additional options:
  202%
  203%     * authenticate(+Boolean)
  204%     If `false` (default `true`), do _not_ try to automatically
  205%     authenticate the client if a 401 (Unauthorized) status code
  206%     is received.
  207%
  208%     * authorization(+Term)
  209%     Send authorization. See also http_set_authorization/2. Supported
  210%     schemes:
  211%
  212%       - basic(+User, +Password)
  213%       HTTP Basic authentication.
  214%       - bearer(+Token)
  215%       HTTP Bearer authentication.
  216%       - digest(+User, +Password)
  217%       HTTP Digest authentication.  This option is only provided
  218%       if the plugin library(http/http_digest) is also loaded.
  219%
  220%     * connection(+Connection)
  221%     Specify the =Connection= header.  Default is =close=.  The
  222%     alternative is =|Keep-alive|=.  This maintains a pool of
  223%     available connections as determined by keep_connection/1.
  224%     The library(http/websockets) uses =|Keep-alive, Upgrade|=.
  225%     Keep-alive connections can be closed explicitly using
  226%     http_close_keep_alive/1. Keep-alive connections may
  227%     significantly improve repetitive requests on the same server,
  228%     especially if the IP route is long, HTTPS is used or the
  229%     connection uses a proxy.
  230%
  231%     * final_url(-FinalURL)
  232%     Unify FinalURL with the final   destination. This differs from
  233%     the  original  URL  if  the  returned  head  of  the  original
  234%     indicates an HTTP redirect (codes 301,  302 or 303). Without a
  235%     redirect, FinalURL is the same as URL if  URL is an atom, or a
  236%     URL constructed from the parts.
  237%
  238%     * header(Name, -AtomValue)
  239%     If provided, AtomValue is  unified  with   the  value  of  the
  240%     indicated  field  in  the  reply    header.  Name  is  matched
  241%     case-insensitive and the underscore  (_)   matches  the hyphen
  242%     (-). Multiple of these options  may   be  provided  to extract
  243%     multiple  header  fields.  If  the  header  is  not  available
  244%     AtomValue is unified to the empty atom ('').
  245%
  246%     * headers(-List)
  247%     If provided, List is unified with  a list of Name(Value) pairs
  248%     corresponding to fields in the reply   header.  Name and Value
  249%     follow the same conventions  used   by  the header(Name,Value)
  250%     option.
  251%
  252%     * method(+Method)
  253%     One of =get= (default), =head=, =delete=, =post=,   =put=   or
  254%     =patch=.
  255%     The  =head= message can be
  256%     used in combination with  the   header(Name,  Value) option to
  257%     access information on the resource   without actually fetching
  258%     the resource itself.  The  returned   stream  must  be  closed
  259%     immediately.
  260%
  261%     If post(Data) is provided, the default is =post=.
  262%
  263%     * size(-Size)
  264%     Size is unified with the   integer value of =|Content-Length|=
  265%     in the reply header.
  266%
  267%     * version(-Version)
  268%     Version is a _pair_ `Major-Minor`, where `Major` and `Minor`
  269%     are integers representing the HTTP version in the reply header.
  270%
  271%     * range(+Range)
  272%     Ask for partial content. Range   is  a term _|Unit(From,To)|_,
  273%     where `From` is an integer and `To`   is  either an integer or
  274%     the atom `end`. HTTP 1.1 only   supports Unit = `bytes`. E.g.,
  275%     to   ask   for    bytes    1000-1999,     use    the    option
  276%     range(bytes(1000,1999))
  277%
  278%     * redirect(+Boolean)
  279%     If `false` (default `true`), do _not_ automatically redirect
  280%     if a 3XX code is received.  Must be combined with
  281%     status_code(Code) and one of the header options to read the
  282%     redirect reply. In particular, without status_code(Code) a
  283%     redirect is mapped to an exception.
  284%
  285%     * status_code(-Code)
  286%     If this option is  present  and   Code  unifies  with the HTTP
  287%     status code, do *not* translate errors (4xx, 5xx) into an
  288%     exception. Instead, http_open/3 behaves as if 200 (success) is
  289%     returned, providing the application to read the error document
  290%     from the returned stream.
  291%
  292%     * output(-Out)
  293%     Unify the output stream with Out and do not close it. This can
  294%     be used to upgrade a connection.
  295%
  296%     * timeout(+Timeout)
  297%     If provided, set a timeout on   the stream using set_stream/2.
  298%     With this option if no new data arrives within Timeout seconds
  299%     the stream raises an exception.  Default   is  to wait forever
  300%     (=infinite=).
  301%
  302%     * post(+Data)
  303%     Issue a =POST= request on the HTTP server.  Data is
  304%     handed to http_post_data/3.
  305%
  306%     * proxy(+Host:Port)
  307%     Use an HTTP proxy to connect to the outside world.  See also
  308%     socket:proxy_for_url/3.  This option overrules the proxy
  309%     specification defined by socket:proxy_for_url/3.
  310%
  311%     * proxy(+Host, +Port)
  312%     Synonym for proxy(+Host:Port).  Deprecated.
  313%
  314%     * proxy_authorization(+Authorization)
  315%     Send authorization to the proxy.  Otherwise   the  same as the
  316%     =authorization= option.
  317%
  318%     * bypass_proxy(+Boolean)
  319%     If =true=, bypass proxy hooks.  Default is =false=.
  320%
  321%     * request_header(Name = Value)
  322%     Additional  name-value  parts  are  added   in  the  order  of
  323%     appearance to the HTTP request   header.  No interpretation is
  324%     done.
  325%
  326%     * max_redirect(+Max)
  327%     Sets the maximum length of a redirection chain.  This is needed
  328%     for some IRIs that redirect indefinitely to other IRIs without
  329%     looping (e.g., redirecting to IRIs with a random element in them).
  330%     Max must be either a non-negative integer or the atom `infinite`.
  331%     The default value is `10`.
  332%
  333%     * user_agent(+Agent)
  334%     Defines the value of the  =|User-Agent|=   field  of  the HTTP
  335%     header. Default is =SWI-Prolog=.
  336%
  337%   The hook http:open_options/2 can  be   used  to  provide default
  338%   options   based   on   the   broken-down     URL.   The   option
  339%   status_code(-Code)  is  particularly  useful   to  query  *REST*
  340%   interfaces that commonly return status   codes  other than `200`
  341%   that need to be be processed by the client code.
  342%
  343%   @param URL is either an atom or string (url) or a list of _parts_.
  344
  345%               When provided, this list may contain the fields
  346%               =scheme=, =user=, =password=, =host=, =port=, =path=
  347%               and either =query_string= (whose argument is an atom)
  348%               or =search= (whose argument is a list of
  349%               =|Name(Value)|= or =|Name=Value|= compound terms).
  350%               Only =host= is mandatory.  The example below opens the
  351%               URL =|http://www.example.com/my/path?q=Hello%20World&lang=en|=.
  352%               Note that values must *not* be quoted because the
  353%               library inserts the required quotes.
  354%
  355%               ==
  356%               http_open([ host('www.example.com'),
  357%                           path('/my/path'),
  358%                           search([ q='Hello world',
  359%                                    lang=en
  360%                                  ])
  361%                         ])
  362%               ==
  363%
  364%       @error existence_error(url, Id)
  365%       @see ssl_context/3 for SSL related options if
  366%       library(http/http_ssl_plugin) is loaded.
  367
  368:- multifile
  369    socket:proxy_for_url/3.           % +URL, +Host, -ProxyList
  370
  371http_open(URL, Stream, QOptions) :-
  372    meta_options(is_meta, QOptions, Options),
  373    (   atomic(URL)
  374    ->  parse_url_ex(URL, Parts)
  375    ;   Parts = URL
  376    ),
  377    autoload_https(Parts),
  378    add_authorization(Parts, Options, Options1),
  379    findall(HostOptions,
  380            http:open_options(Parts, HostOptions),
  381            AllHostOptions),
  382    foldl(merge_options_rev, AllHostOptions, Options1, Options2),
  383    (   option(bypass_proxy(true), Options)
  384    ->  try_http_proxy(direct, Parts, Stream, Options2)
  385    ;   term_variables(Options2, Vars2),
  386        findall(Result-Vars2,
  387                try_a_proxy(Parts, Result, Options2),
  388                ResultList),
  389        last(ResultList, Status-Vars2)
  390    ->  (   Status = true(_Proxy, Stream)
  391        ->  true
  392        ;   throw(error(proxy_error(tried(ResultList)), _))
  393        )
  394    ;   try_http_proxy(direct, Parts, Stream, Options2)
  395    ).
  396
  397try_a_proxy(Parts, Result, Options) :-
  398    parts_uri(Parts, AtomicURL),
  399    option(host(Host), Parts),
  400    (   (   option(proxy(ProxyHost:ProxyPort), Options)
  401        ;   is_list(Options),
  402            memberchk(proxy(ProxyHost,ProxyPort), Options)
  403        )
  404    ->  Proxy = proxy(ProxyHost, ProxyPort)
  405    ;   socket:proxy_for_url(AtomicURL, Host, Proxy)
  406    ),
  407    debug(http(proxy),
  408          'http_open: Connecting via ~w to ~w', [Proxy, AtomicURL]),
  409    (   catch(try_http_proxy(Proxy, Parts, Stream, Options), E, true)
  410    ->  (   var(E)
  411        ->  !, Result = true(Proxy, Stream)
  412        ;   Result = error(Proxy, E)
  413        )
  414    ;   Result = false(Proxy)
  415    ),
  416    debug(http(proxy), 'http_open: ~w: ~p', [Proxy, Result]).
  417
  418try_http_proxy(Method, Parts, Stream, Options0) :-
  419    option(host(Host), Parts),
  420    (   Method == direct
  421    ->  parts_request_uri(Parts, RequestURI)
  422    ;   parts_uri(Parts, RequestURI)
  423    ),
  424    select_option(visited(Visited0), Options0, OptionsV, []),
  425    Options = [visited([Parts|Visited0])|OptionsV],
  426    parts_scheme(Parts, Scheme),
  427    default_port(Scheme, DefPort),
  428    url_part(port(Port), Parts, DefPort),
  429    host_and_port(Host, DefPort, Port, HostPort),
  430    (   option(connection(Connection), Options0),
  431        keep_alive(Connection),
  432        get_from_pool(Host:Port, StreamPair),
  433        debug(http(connection), 'Trying Keep-alive to ~p using ~p',
  434              [ Host:Port, StreamPair ]),
  435        catch(send_rec_header(StreamPair, Stream, HostPort,
  436                              RequestURI, Parts, Options),
  437              error(E,_),
  438              keep_alive_error(E))
  439    ->  true
  440    ;   http:http_connection_over_proxy(Method, Parts, Host:Port,
  441                                        SocketStreamPair, Options, Options1),
  442        (   catch(http:http_protocol_hook(Scheme, Parts,
  443                                          SocketStreamPair,
  444                                          StreamPair, Options),
  445                  Error,
  446                  ( close(SocketStreamPair, [force(true)]),
  447                    throw(Error)))
  448        ->  true
  449        ;   StreamPair = SocketStreamPair
  450        ),
  451        send_rec_header(StreamPair, Stream, HostPort,
  452                        RequestURI, Parts, Options1)
  453    ),
  454    return_final_url(Options).
  455
  456http:http_connection_over_proxy(direct, _, Host:Port,
  457                                StreamPair, Options, Options) :-
  458    !,
  459    open_socket(Host:Port, StreamPair, Options).
  460http:http_connection_over_proxy(proxy(ProxyHost, ProxyPort), Parts, _,
  461                                StreamPair, Options, Options) :-
  462    \+ ( memberchk(scheme(Scheme), Parts),
  463         secure_scheme(Scheme)
  464       ),
  465    !,
  466    % We do not want any /more/ proxy after this
  467    open_socket(ProxyHost:ProxyPort, StreamPair,
  468                [bypass_proxy(true)|Options]).
  469http:http_connection_over_proxy(socks(SocksHost, SocksPort), _Parts, Host:Port,
  470                                StreamPair, Options, Options) :-
  471    !,
  472    tcp_connect(SocksHost:SocksPort, StreamPair, [bypass_proxy(true)]),
  473    catch(negotiate_socks_connection(Host:Port, StreamPair),
  474          Error,
  475          ( close(StreamPair, [force(true)]),
  476            throw(Error)
  477          )).
  478
  479
  480merge_options_rev(Old, New, Merged) :-
  481    merge_options(New, Old, Merged).
  482
  483is_meta(pem_password_hook).             % SSL plugin callbacks
  484is_meta(cert_verify_hook).
  485
  486
  487http:http_protocol_hook(http, _, StreamPair, StreamPair, _).
  488
  489default_port(https, 443) :- !.
  490default_port(wss,   443) :- !.
  491default_port(_,     80).
  492
  493host_and_port(Host, DefPort, DefPort, Host) :- !.
  494host_and_port(Host, _,       Port,    Host:Port).
  495
  496%!  autoload_https(+Parts) is det.
  497%
  498%   If the requested scheme is https or wss, load the HTTPS plugin.
  499
  500autoload_https(Parts) :-
  501    memberchk(scheme(S), Parts),
  502    secure_scheme(S),
  503    \+ clause(http:http_protocol_hook(S, _, StreamPair, StreamPair, _),_),
  504    exists_source(library(http/http_ssl_plugin)),
  505    !,
  506    use_module(library(http/http_ssl_plugin)).
  507autoload_https(_).
  508
  509secure_scheme(https).
  510secure_scheme(wss).
  511
  512%!  send_rec_header(+StreamPair, -Stream,
  513%!                  +Host, +RequestURI, +Parts, +Options) is det.
  514%
  515%   Send header to Out and process reply.  If there is an error or
  516%   failure, close In and Out and return the error or failure.
  517
  518send_rec_header(StreamPair, Stream, Host, RequestURI, Parts, Options) :-
  519    (   catch(guarded_send_rec_header(StreamPair, Stream,
  520                                      Host, RequestURI, Parts, Options),
  521              E, true)
  522    ->  (   var(E)
  523        ->  (   option(output(StreamPair), Options)
  524            ->  true
  525            ;   true
  526            )
  527        ;   close(StreamPair, [force(true)]),
  528            throw(E)
  529        )
  530    ;   close(StreamPair, [force(true)]),
  531        fail
  532    ).
  533
  534guarded_send_rec_header(StreamPair, Stream, Host, RequestURI, Parts, Options) :-
  535    user_agent(Agent, Options),
  536    method(Options, MNAME),
  537    http_version(Version),
  538    option(connection(Connection), Options, close),
  539    debug(http(send_request), "> ~w ~w HTTP/~w", [MNAME, RequestURI, Version]),
  540    debug(http(send_request), "> Host: ~w", [Host]),
  541    debug(http(send_request), "> User-Agent: ~w", [Agent]),
  542    debug(http(send_request), "> Connection: ~w", [Connection]),
  543    format(StreamPair,
  544           '~w ~w HTTP/~w\r\n\c
  545               Host: ~w\r\n\c
  546               User-Agent: ~w\r\n\c
  547               Connection: ~w\r\n',
  548           [MNAME, RequestURI, Version, Host, Agent, Connection]),
  549    parts_uri(Parts, URI),
  550    x_headers(Options, URI, StreamPair),
  551    write_cookies(StreamPair, Parts, Options),
  552    (   option(post(PostData), Options)
  553    ->  http_header:http_post_data(PostData, StreamPair, [])
  554    ;   format(StreamPair, '\r\n', [])
  555    ),
  556    flush_output(StreamPair),
  557                                    % read the reply header
  558    read_header(StreamPair, Parts, ReplyVersion, Code, Comment, Lines),
  559    update_cookies(Lines, Parts, Options),
  560    do_open(ReplyVersion, Code, Comment, Lines, Options, Parts, Host,
  561            StreamPair, Stream).
  562
  563
  564%!  http_version(-Version:atom) is det.
  565%
  566%   HTTP version we publish. We  can  only   use  1.1  if we support
  567%   chunked encoding.
  568
  569http_version('1.1') :-
  570    http:current_transfer_encoding(chunked),
  571    !.
  572http_version('1.0').
  573
  574method(Options, MNAME) :-
  575    option(post(_), Options),
  576    !,
  577    option(method(M), Options, post),
  578    (   map_method(M, MNAME0)
  579    ->  MNAME = MNAME0
  580    ;   domain_error(method, M)
  581    ).
  582method(Options, MNAME) :-
  583    option(method(M), Options, get),
  584    (   map_method(M, MNAME0)
  585    ->  MNAME = MNAME0
  586    ;   map_method(_, M)
  587    ->  MNAME = M
  588    ;   domain_error(method, M)
  589    ).
  590
  591map_method(delete,  'DELETE').
  592map_method(get,     'GET').
  593map_method(head,    'HEAD').
  594map_method(post,    'POST').
  595map_method(put,     'PUT').
  596map_method(patch,   'PATCH').
  597map_method(options, 'OPTIONS').
  598
  599%!  x_headers(+Options, +URI, +Out) is det.
  600%
  601%   Emit extra headers from   request_header(Name=Value)  options in
  602%   Options.
  603%
  604%   @tbd Use user/password fields
  605
  606x_headers(Options, URI, Out) :-
  607    x_headers_(Options, [url(URI)|Options], Out).
  608
  609x_headers_([], _, _).
  610x_headers_([H|T], Options, Out) :-
  611    x_header(H, Options, Out),
  612    x_headers_(T, Options, Out).
  613
  614x_header(request_header(Name=Value), _, Out) :-
  615    !,
  616    debug(http(send_request), "> ~w: ~w", [Name, Value]),
  617    format(Out, '~w: ~w\r\n', [Name, Value]).
  618x_header(proxy_authorization(ProxyAuthorization), Options, Out) :-
  619    !,
  620    auth_header(ProxyAuthorization, Options, 'Proxy-Authorization', Out).
  621x_header(authorization(Authorization), Options, Out) :-
  622    !,
  623    auth_header(Authorization, Options, 'Authorization', Out).
  624x_header(range(Spec), _, Out) :-
  625    !,
  626    Spec =.. [Unit, From, To],
  627    (   To == end
  628    ->  ToT = ''
  629    ;   must_be(integer, To),
  630        ToT = To
  631    ),
  632    debug(http(send_request), "> Range: ~w=~d-~w", [Unit, From, ToT]),
  633    format(Out, 'Range: ~w=~d-~w\r\n', [Unit, From, ToT]).
  634x_header(_, _, _).
  635
  636%!  auth_header(+AuthOption, +Options, +HeaderName, +Out)
  637
  638auth_header(basic(User, Password), _, Header, Out) :-
  639    !,
  640    format(codes(Codes), '~w:~w', [User, Password]),
  641    phrase(base64(Codes), Base64Codes),
  642    debug(http(send_request), "> ~w: Basic ~s", [Header, Base64Codes]),
  643    format(Out, '~w: Basic ~s\r\n', [Header, Base64Codes]).
  644auth_header(bearer(Token), _, Header, Out) :-
  645    !,
  646    debug(http(send_request), "> ~w: Bearer ~w", [Header,Token]),
  647    format(Out, '~w: Bearer ~w\r\n', [Header, Token]).
  648auth_header(Auth, Options, _, Out) :-
  649    option(url(URL), Options),
  650    add_method(Options, Options1),
  651    http:authenticate_client(URL, send_auth_header(Auth, Out, Options1)),
  652    !.
  653auth_header(Auth, _, _, _) :-
  654    domain_error(authorization, Auth).
  655
  656user_agent(Agent, Options) :-
  657    (   option(user_agent(Agent), Options)
  658    ->  true
  659    ;   user_agent(Agent)
  660    ).
  661
  662add_method(Options0, Options) :-
  663    option(method(_), Options0),
  664    !,
  665    Options = Options0.
  666add_method(Options0, Options) :-
  667    option(post(_), Options0),
  668    !,
  669    Options = [method(post)|Options0].
  670add_method(Options0, [method(get)|Options0]).
  671
  672
  673%!  do_open(+HTTPVersion, +HTTPStatusCode, +HTTPStatusComment, +Header,
  674%!          +Options, +Parts, +Host, +In, -FinalIn) is det.
  675%
  676%   Handle the HTTP status. If 200, we   are ok. If a redirect, redo
  677%   the open, returning a new stream. Else issue an error.
  678%
  679%   @error  existence_error(url, URL)
  680
  681                                        % Redirections
  682do_open(_, Code, _, Lines, Options0, Parts, _, In, Stream) :-
  683    redirect_code(Code),
  684    option(redirect(true), Options0, true),
  685    location(Lines, RequestURI),
  686    !,
  687    debug(http(redirect), 'http_open: redirecting to ~w', [RequestURI]),
  688    close(In),
  689    parts_uri(Parts, Base),
  690    uri_resolve(RequestURI, Base, Redirected),
  691    parse_url_ex(Redirected, RedirectedParts),
  692    (   redirect_limit_exceeded(Options0, Max)
  693    ->  format(atom(Comment), 'max_redirect (~w) limit exceeded', [Max]),
  694        throw(error(permission_error(redirect, http, Redirected),
  695                    context(_, Comment)))
  696    ;   redirect_loop(RedirectedParts, Options0)
  697    ->  throw(error(permission_error(redirect, http, Redirected),
  698                    context(_, 'Redirection loop')))
  699    ;   true
  700    ),
  701    redirect_options(Options0, Options),
  702    http_open(RedirectedParts, Stream, Options).
  703                                        % Need authentication
  704do_open(_Version, Code, _Comment, Lines, Options0, Parts, _Host, In0, Stream) :-
  705    authenticate_code(Code),
  706    option(authenticate(true), Options0, true),
  707    parts_uri(Parts, URI),
  708    parse_headers(Lines, Headers),
  709    http:authenticate_client(
  710             URI,
  711             auth_reponse(Headers, Options0, Options)),
  712    !,
  713    close(In0),
  714    http_open(Parts, Stream, Options).
  715                                        % Accepted codes
  716do_open(Version, Code, _, Lines, Options, Parts, Host, In0, In) :-
  717    (   option(status_code(Code), Options),
  718        Lines \== []
  719    ->  true
  720    ;   Code == 200
  721    ),
  722    !,
  723    parts_uri(Parts, URI),
  724    parse_headers(Lines, Headers),
  725    return_version(Options, Version),
  726    return_size(Options, Headers),
  727    return_fields(Options, Headers),
  728    return_headers(Options, Headers),
  729    consider_keep_alive(Lines, Parts, Host, In0, In1, Options),
  730    transfer_encoding_filter(Lines, In1, In),
  731                                    % properly re-initialise the stream
  732    set_stream(In, file_name(URI)),
  733    set_stream(In, record_position(true)).
  734do_open(_, _, _, [], Options, _, _, _, _) :-
  735    option(connection(Connection), Options),
  736    keep_alive(Connection),
  737    !,
  738    throw(error(keep_alive(closed),_)).
  739                                        % report anything else as error
  740do_open(_Version, Code, Comment, _,  _, Parts, _, _, _) :-
  741    parts_uri(Parts, URI),
  742    (   map_error_code(Code, Error)
  743    ->  Formal =.. [Error, url, URI]
  744    ;   Formal = existence_error(url, URI)
  745    ),
  746    throw(error(Formal, context(_, status(Code, Comment)))).
  747
  748
  749%!  redirect_limit_exceeded(+Options:list(compound), -Max:nonneg) is semidet.
  750%
  751%   True if we have exceeded the maximum redirection length (default 10).
  752
  753redirect_limit_exceeded(Options, Max) :-
  754    option(visited(Visited), Options, []),
  755    length(Visited, N),
  756    option(max_redirect(Max), Options, 10),
  757    (Max == infinite -> fail ; N > Max).
  758
  759
  760%!  redirect_loop(+Parts, +Options) is semidet.
  761%
  762%   True if we are in  a  redirection   loop.  Note  that some sites
  763%   redirect once to the same place using  cookies or similar, so we
  764%   allow for two tries. In fact,   we  should probably test whether
  765%   authorization or cookie headers have changed.
  766
  767redirect_loop(Parts, Options) :-
  768    option(visited(Visited), Options, []),
  769    include(==(Parts), Visited, Same),
  770    length(Same, Count),
  771    Count > 2.
  772
  773
  774%!  redirect_options(+Options0, -Options) is det.
  775%
  776%   A redirect from a POST should do a GET on the returned URI. This
  777%   means we must remove  the   method(post)  and post(Data) options
  778%   from the original option-list.
  779
  780redirect_options(Options0, Options) :-
  781    (   select_option(post(_), Options0, Options1)
  782    ->  true
  783    ;   Options1 = Options0
  784    ),
  785    (   select_option(method(Method), Options1, Options),
  786        \+ redirect_method(Method)
  787    ->  true
  788    ;   Options = Options1
  789    ).
  790
  791redirect_method(delete).
  792redirect_method(get).
  793redirect_method(head).
  794
  795
  796%!  map_error_code(+HTTPCode, -PrologError) is semidet.
  797%
  798%   Map HTTP error codes to Prolog errors.
  799%
  800%   @tbd    Many more maps. Unfortunately many have no sensible Prolog
  801%           counterpart.
  802
  803map_error_code(401, permission_error).
  804map_error_code(403, permission_error).
  805map_error_code(404, existence_error).
  806map_error_code(405, permission_error).
  807map_error_code(407, permission_error).
  808map_error_code(410, existence_error).
  809
  810redirect_code(301).                     % moved permanently
  811redirect_code(302).                     % moved temporary
  812redirect_code(303).                     % see also
  813
  814authenticate_code(401).
  815
  816%!  open_socket(+Address, -StreamPair, +Options) is det.
  817%
  818%   Create and connect a client socket to Address.  Options
  819%
  820%       * timeout(+Timeout)
  821%       Sets timeout on the stream, *after* connecting the
  822%       socket.
  823%
  824%   @tbd    Make timeout also work on tcp_connect/4.
  825%   @tbd    This is the same as do_connect/4 in http_client.pl
  826
  827open_socket(Address, StreamPair, Options) :-
  828    debug(http(open), 'http_open: Connecting to ~p ...', [Address]),
  829    tcp_connect(Address, StreamPair, Options),
  830    stream_pair(StreamPair, In, Out),
  831    debug(http(open), '\tok ~p ---> ~p', [In, Out]),
  832    set_stream(In, record_position(false)),
  833    (   option(timeout(Timeout), Options)
  834    ->  set_stream(In, timeout(Timeout))
  835    ;   true
  836    ).
  837
  838
  839return_version(Options, Major-Minor) :-
  840    option(version(Major-Minor), Options, _).
  841
  842return_size(Options, Headers) :-
  843    (   memberchk(content_length(Size), Headers)
  844    ->  option(size(Size), Options, _)
  845    ;   true
  846    ).
  847
  848return_fields([], _).
  849return_fields([header(Name, Value)|T], Headers) :-
  850    !,
  851    (   Term =.. [Name,Value],
  852        memberchk(Term, Headers)
  853    ->  true
  854    ;   Value = ''
  855    ),
  856    return_fields(T, Headers).
  857return_fields([_|T], Lines) :-
  858    return_fields(T, Lines).
  859
  860return_headers(Options, Headers) :-
  861    option(headers(Headers), Options, _).
  862
  863%!  parse_headers(+Lines, -Headers:list(compound)) is det.
  864%
  865%   Parse the header lines for   the  headers(-List) option. Invalid
  866%   header   lines   are   skipped,   printing   a   warning   using
  867%   pring_message/2.
  868
  869parse_headers([], []) :- !.
  870parse_headers([Line|Lines], Headers) :-
  871    catch(http_parse_header(Line, [Header]), Error, true),
  872    (   var(Error)
  873    ->  Headers = [Header|More]
  874    ;   print_message(warning, Error),
  875        Headers = More
  876    ),
  877    parse_headers(Lines, More).
  878
  879
  880%!  return_final_url(+Options) is semidet.
  881%
  882%   If Options contains final_url(URL), unify URL with the final
  883%   URL after redirections.
  884
  885return_final_url(Options) :-
  886    option(final_url(URL), Options),
  887    var(URL),
  888    !,
  889    option(visited([Parts|_]), Options),
  890    parts_uri(Parts, URL).
  891return_final_url(_).
  892
  893
  894%!  transfer_encoding_filter(+Lines, +In0, -In) is det.
  895%
  896%   Install filters depending on the transfer  encoding. If In0 is a
  897%   stream-pair, we close the output   side. If transfer-encoding is
  898%   not specified, the content-encoding is  interpreted as a synonym
  899%   for transfer-encoding, because many   servers incorrectly depend
  900%   on  this.  Exceptions  to  this   are  content-types  for  which
  901%   disable_encoding_filter/1 holds.
  902
  903transfer_encoding_filter(Lines, In0, In) :-
  904    transfer_encoding(Lines, Encoding),
  905    !,
  906    transfer_encoding_filter_(Encoding, In0, In).
  907transfer_encoding_filter(Lines, In0, In) :-
  908    content_encoding(Lines, Encoding),
  909    content_type(Lines, Type),
  910    \+ http:disable_encoding_filter(Type),
  911    !,
  912    transfer_encoding_filter_(Encoding, In0, In).
  913transfer_encoding_filter(_, In, In).
  914
  915transfer_encoding_filter_(Encoding, In0, In) :-
  916    stream_pair(In0, In1, Out),
  917    (   nonvar(Out)
  918    ->  close(Out)
  919    ;   true
  920    ),
  921    (   http:encoding_filter(Encoding, In1, In)
  922    ->  true
  923    ;   domain_error(http_encoding, Encoding)
  924    ).
  925
  926content_type(Lines, Type) :-
  927    member(Line, Lines),
  928    phrase(field('content-type'), Line, Rest),
  929    !,
  930    atom_codes(Type, Rest).
  931
  932%!  http:disable_encoding_filter(+ContentType) is semidet.
  933%
  934%   Do not use  the   =|Content-encoding|=  as =|Transfer-encoding|=
  935%   encoding for specific values of   ContentType. This predicate is
  936%   multifile and can thus be extended by the user.
  937
  938http:disable_encoding_filter('application/x-gzip').
  939http:disable_encoding_filter('application/x-tar').
  940http:disable_encoding_filter('x-world/x-vrml').
  941http:disable_encoding_filter('application/zip').
  942http:disable_encoding_filter('application/x-gzip').
  943http:disable_encoding_filter('application/x-zip-compressed').
  944http:disable_encoding_filter('application/x-compress').
  945http:disable_encoding_filter('application/x-compressed').
  946http:disable_encoding_filter('application/x-spoon').
  947
  948%!  transfer_encoding(+Lines, -Encoding) is semidet.
  949%
  950%   True if Encoding  is  the   value  of  the =|Transfer-encoding|=
  951%   header.
  952
  953transfer_encoding(Lines, Encoding) :-
  954    what_encoding(transfer_encoding, Lines, Encoding).
  955
  956what_encoding(What, Lines, Encoding) :-
  957    member(Line, Lines),
  958    phrase(encoding_(What, Debug), Line, Rest),
  959    !,
  960    atom_codes(Encoding, Rest),
  961    debug(http(What), '~w: ~p', [Debug, Rest]).
  962
  963encoding_(content_encoding, 'Content-encoding') -->
  964    field('content-encoding').
  965encoding_(transfer_encoding, 'Transfer-encoding') -->
  966    field('transfer-encoding').
  967
  968%!  content_encoding(+Lines, -Encoding) is semidet.
  969%
  970%   True if Encoding is the value of the =|Content-encoding|=
  971%   header.
  972
  973content_encoding(Lines, Encoding) :-
  974    what_encoding(content_encoding, Lines, Encoding).
  975
  976%!  read_header(+In:stream, +Parts, -Version, -Code:int,
  977%!  -Comment:atom, -Lines:list) is det.
  978%
  979%   Read the HTTP reply-header.  If the reply is completely empty
  980%   an existence error is thrown.  If the replied header is
  981%   otherwise invalid a 500 HTTP error is simulated, having the
  982%   comment =|Invalid reply header|=.
  983%
  984%   @param Parts    A list of compound terms that describe the
  985%                   parsed request URI.
  986%   @param Version  HTTP reply version as Major-Minor pair
  987%   @param Code     Numeric HTTP reply-code
  988%   @param Comment  Comment of reply-code as atom
  989%   @param Lines    Remaining header lines as code-lists.
  990%
  991%   @error existence_error(http_reply, Uri)
  992
  993read_header(In, Parts, Major-Minor, Code, Comment, Lines) :-
  994    read_line_to_codes(In, Line),
  995    (   Line == end_of_file
  996    ->  parts_uri(Parts, Uri),
  997        existence_error(http_reply,Uri)
  998    ;   true
  999    ),
 1000    Line \== end_of_file,
 1001    phrase(first_line(Major-Minor, Code, Comment), Line),
 1002    debug(http(open), 'HTTP/~d.~d ~w ~w', [Major, Minor, Code, Comment]),
 1003    read_line_to_codes(In, Line2),
 1004    rest_header(Line2, In, Lines),
 1005    !,
 1006    (   debugging(http(open))
 1007    ->  forall(member(HL, Lines),
 1008               debug(http(open), '~s', [HL]))
 1009    ;   true
 1010    ).
 1011read_header(_, _, 1-1, 500, 'Invalid reply header', []).
 1012
 1013rest_header([], _, []) :- !.            % blank line: end of header
 1014rest_header(L0, In, [L0|L]) :-
 1015    read_line_to_codes(In, L1),
 1016    rest_header(L1, In, L).
 1017
 1018%!  content_length(+Header, -Length:int) is semidet.
 1019%
 1020%   Find the Content-Length in an HTTP reply-header.
 1021
 1022content_length(Lines, Length) :-
 1023    member(Line, Lines),
 1024    phrase(content_length(Length0), Line),
 1025    !,
 1026    Length = Length0.
 1027
 1028location(Lines, RequestURI) :-
 1029    member(Line, Lines),
 1030    phrase(atom_field(location, RequestURI), Line),
 1031    !.
 1032
 1033connection(Lines, Connection) :-
 1034    member(Line, Lines),
 1035    phrase(atom_field(connection, Connection0), Line),
 1036    !,
 1037    Connection = Connection0.
 1038
 1039first_line(Major-Minor, Code, Comment) -->
 1040    "HTTP/", integer(Major), ".", integer(Minor),
 1041    skip_blanks,
 1042    integer(Code),
 1043    skip_blanks,
 1044    rest(Comment).
 1045
 1046atom_field(Name, Value) -->
 1047    field(Name),
 1048    rest(Value).
 1049
 1050content_length(Len) -->
 1051    field('content-length'),
 1052    integer(Len).
 1053
 1054field(Name) -->
 1055    { atom_codes(Name, Codes) },
 1056    field_codes(Codes).
 1057
 1058field_codes([]) -->
 1059    ":",
 1060    skip_blanks.
 1061field_codes([H|T]) -->
 1062    [C],
 1063    { match_header_char(H, C)
 1064    },
 1065    field_codes(T).
 1066
 1067match_header_char(C, C) :- !.
 1068match_header_char(C, U) :-
 1069    code_type(C, to_lower(U)),
 1070    !.
 1071match_header_char(0'_, 0'-).
 1072
 1073
 1074skip_blanks -->
 1075    [C],
 1076    { code_type(C, white)
 1077    },
 1078    !,
 1079    skip_blanks.
 1080skip_blanks -->
 1081    [].
 1082
 1083%!  integer(-Int)//
 1084%
 1085%   Read 1 or more digits and return as integer.
 1086
 1087integer(Code) -->
 1088    digit(D0),
 1089    digits(D),
 1090    { number_codes(Code, [D0|D])
 1091    }.
 1092
 1093digit(C) -->
 1094    [C],
 1095    { code_type(C, digit)
 1096    }.
 1097
 1098digits([D0|D]) -->
 1099    digit(D0),
 1100    !,
 1101    digits(D).
 1102digits([]) -->
 1103    [].
 1104
 1105%!  rest(-Atom:atom)//
 1106%
 1107%   Get rest of input as an atom.
 1108
 1109rest(Atom) --> call(rest_(Atom)).
 1110
 1111rest_(Atom, L, []) :-
 1112    atom_codes(Atom, L).
 1113
 1114
 1115                 /*******************************
 1116                 *   AUTHORIZATION MANAGEMENT   *
 1117                 *******************************/
 1118
 1119%!  http_set_authorization(+URL, +Authorization) is det.
 1120%
 1121%   Set user/password to supply with URLs   that have URL as prefix.
 1122%   If  Authorization  is  the   atom    =|-|=,   possibly   defined
 1123%   authorization is cleared.  For example:
 1124%
 1125%   ==
 1126%   ?- http_set_authorization('http://www.example.com/private/',
 1127%                             basic('John', 'Secret'))
 1128%   ==
 1129%
 1130%   @tbd    Move to a separate module, so http_get/3, etc. can use this
 1131%           too.
 1132
 1133:- dynamic
 1134    stored_authorization/2,
 1135    cached_authorization/2. 1136
 1137http_set_authorization(URL, Authorization) :-
 1138    must_be(atom, URL),
 1139    retractall(stored_authorization(URL, _)),
 1140    (   Authorization = (-)
 1141    ->  true
 1142    ;   check_authorization(Authorization),
 1143        assert(stored_authorization(URL, Authorization))
 1144    ),
 1145    retractall(cached_authorization(_,_)).
 1146
 1147check_authorization(Var) :-
 1148    var(Var),
 1149    !,
 1150    instantiation_error(Var).
 1151check_authorization(basic(User, Password)) :-
 1152    must_be(atom, User),
 1153    must_be(text, Password).
 1154check_authorization(digest(User, Password)) :-
 1155    must_be(atom, User),
 1156    must_be(text, Password).
 1157
 1158%!  authorization(+URL, -Authorization) is semidet.
 1159%
 1160%   True if Authorization must be supplied for URL.
 1161%
 1162%   @tbd    Cleanup cache if it gets too big.
 1163
 1164authorization(_, _) :-
 1165    \+ stored_authorization(_, _),
 1166    !,
 1167    fail.
 1168authorization(URL, Authorization) :-
 1169    cached_authorization(URL, Authorization),
 1170    !,
 1171    Authorization \== (-).
 1172authorization(URL, Authorization) :-
 1173    (   stored_authorization(Prefix, Authorization),
 1174        sub_atom(URL, 0, _, _, Prefix)
 1175    ->  assert(cached_authorization(URL, Authorization))
 1176    ;   assert(cached_authorization(URL, -)),
 1177        fail
 1178    ).
 1179
 1180add_authorization(_, Options, Options) :-
 1181    option(authorization(_), Options),
 1182    !.
 1183add_authorization(Parts, Options0, Options) :-
 1184    url_part(user(User), Parts),
 1185    url_part(password(Passwd), Parts),
 1186    Options = [authorization(basic(User,Passwd))|Options0].
 1187add_authorization(Parts, Options0, Options) :-
 1188    stored_authorization(_, _) ->   % quick test to avoid work
 1189    parts_uri(Parts, URL),
 1190    authorization(URL, Auth),
 1191    !,
 1192    Options = [authorization(Auth)|Options0].
 1193add_authorization(_, Options, Options).
 1194
 1195
 1196%!  parse_url_ex(+URL, -Parts)
 1197%
 1198%   Parts:  Scheme,  Host,  Port,    User:Password,  RequestURI  (no
 1199%   fragment).
 1200
 1201parse_url_ex(URL, [uri(URL)|Parts]) :-
 1202    uri_components(URL, Components),
 1203    phrase(components(Components), Parts),
 1204    (   option(host(_), Parts)
 1205    ->  true
 1206    ;   domain_error(url, URL)
 1207    ).
 1208
 1209components(Components) -->
 1210    uri_scheme(Components),
 1211    uri_authority(Components),
 1212    uri_request_uri(Components).
 1213
 1214uri_scheme(Components) -->
 1215    { uri_data(scheme, Components, Scheme), nonvar(Scheme) },
 1216    !,
 1217    [ scheme(Scheme)
 1218    ].
 1219uri_scheme(_) --> [].
 1220
 1221uri_authority(Components) -->
 1222    { uri_data(authority, Components, Auth), nonvar(Auth),
 1223      !,
 1224      uri_authority_components(Auth, Data)
 1225    },
 1226    [ authority(Auth) ],
 1227    auth_field(user, Data),
 1228    auth_field(password, Data),
 1229    auth_field(host, Data),
 1230    auth_field(port, Data).
 1231uri_authority(_) --> [].
 1232
 1233auth_field(Field, Data) -->
 1234    { uri_authority_data(Field, Data, EncValue), nonvar(EncValue),
 1235      !,
 1236      (   atom(EncValue)
 1237      ->  uri_encoded(query_value, Value, EncValue)
 1238      ;   Value = EncValue
 1239      ),
 1240      Part =.. [Field,Value]
 1241    },
 1242    [ Part ].
 1243auth_field(_, _) --> [].
 1244
 1245uri_request_uri(Components) -->
 1246    { uri_data(path, Components, Path0),
 1247      uri_data(search, Components, Search),
 1248      (   Path0 == ''
 1249      ->  Path = (/)
 1250      ;   Path = Path0
 1251      ),
 1252      uri_data(path, Components2, Path),
 1253      uri_data(search, Components2, Search),
 1254      uri_components(RequestURI, Components2)
 1255    },
 1256    [ request_uri(RequestURI)
 1257    ].
 1258
 1259%!  parts_scheme(+Parts, -Scheme) is det.
 1260%!  parts_uri(+Parts, -URI) is det.
 1261%!  parts_request_uri(+Parts, -RequestURI) is det.
 1262%!  parts_search(+Parts, -Search) is det.
 1263%!  parts_authority(+Parts, -Authority) is semidet.
 1264
 1265parts_scheme(Parts, Scheme) :-
 1266    url_part(scheme(Scheme), Parts),
 1267    !.
 1268parts_scheme(Parts, Scheme) :-          % compatibility with library(url)
 1269    url_part(protocol(Scheme), Parts),
 1270    !.
 1271parts_scheme(_, http).
 1272
 1273parts_authority(Parts, Auth) :-
 1274    url_part(authority(Auth), Parts),
 1275    !.
 1276parts_authority(Parts, Auth) :-
 1277    url_part(host(Host), Parts, _),
 1278    url_part(port(Port), Parts, _),
 1279    url_part(user(User), Parts, _),
 1280    url_part(password(Password), Parts, _),
 1281    uri_authority_components(Auth,
 1282                             uri_authority(User, Password, Host, Port)).
 1283
 1284parts_request_uri(Parts, RequestURI) :-
 1285    option(request_uri(RequestURI), Parts),
 1286    !.
 1287parts_request_uri(Parts, RequestURI) :-
 1288    url_part(path(Path), Parts, /),
 1289    ignore(parts_search(Parts, Search)),
 1290    uri_data(path, Data, Path),
 1291    uri_data(search, Data, Search),
 1292    uri_components(RequestURI, Data).
 1293
 1294parts_search(Parts, Search) :-
 1295    option(query_string(Search), Parts),
 1296    !.
 1297parts_search(Parts, Search) :-
 1298    option(search(Fields), Parts),
 1299    !,
 1300    uri_query_components(Search, Fields).
 1301
 1302
 1303parts_uri(Parts, URI) :-
 1304    option(uri(URI), Parts),
 1305    !.
 1306parts_uri(Parts, URI) :-
 1307    parts_scheme(Parts, Scheme),
 1308    ignore(parts_authority(Parts, Auth)),
 1309    parts_request_uri(Parts, RequestURI),
 1310    uri_components(RequestURI, Data),
 1311    uri_data(scheme, Data, Scheme),
 1312    uri_data(authority, Data, Auth),
 1313    uri_components(URI, Data).
 1314
 1315parts_port(Parts, Port) :-
 1316    parts_scheme(Parts, Scheme),
 1317    default_port(Scheme, DefPort),
 1318    url_part(port(Port), Parts, DefPort).
 1319
 1320url_part(Part, Parts) :-
 1321    Part =.. [Name,Value],
 1322    Gen =.. [Name,RawValue],
 1323    option(Gen, Parts),
 1324    !,
 1325    Value = RawValue.
 1326
 1327url_part(Part, Parts, Default) :-
 1328    Part =.. [Name,Value],
 1329    Gen =.. [Name,RawValue],
 1330    (   option(Gen, Parts)
 1331    ->  Value = RawValue
 1332    ;   Value = Default
 1333    ).
 1334
 1335
 1336                 /*******************************
 1337                 *            COOKIES           *
 1338                 *******************************/
 1339
 1340write_cookies(Out, Parts, Options) :-
 1341    http:write_cookies(Out, Parts, Options),
 1342    !.
 1343write_cookies(_, _, _).
 1344
 1345update_cookies(_, _, _) :-
 1346    predicate_property(http:update_cookies(_,_,_), number_of_clauses(0)),
 1347    !.
 1348update_cookies(Lines, Parts, Options) :-
 1349    (   member(Line, Lines),
 1350        phrase(atom_field('set_cookie', CookieData), Line),
 1351        http:update_cookies(CookieData, Parts, Options),
 1352        fail
 1353    ;   true
 1354    ).
 1355
 1356
 1357                 /*******************************
 1358                 *           OPEN ANY           *
 1359                 *******************************/
 1360
 1361:- multifile iostream:open_hook/6. 1362
 1363%!  iostream:open_hook(+Spec, +Mode, -Stream, -Close,
 1364%!                     +Options0, -Options) is semidet.
 1365%
 1366%   Hook implementation that makes  open_any/5   support  =http= and
 1367%   =https= URLs for `Mode == read`.
 1368
 1369iostream:open_hook(URL, read, Stream, Close, Options0, Options) :-
 1370    (atom(URL) -> true ; string(URL)),
 1371    uri_is_global(URL),
 1372    uri_components(URL, Components),
 1373    uri_data(scheme, Components, Scheme),
 1374    http_scheme(Scheme),
 1375    !,
 1376    Options = Options0,
 1377    Close = close(Stream),
 1378    http_open(URL, Stream, Options0).
 1379
 1380http_scheme(http).
 1381http_scheme(https).
 1382
 1383
 1384                 /*******************************
 1385                 *          KEEP-ALIVE          *
 1386                 *******************************/
 1387
 1388%!  consider_keep_alive(+HeaderLines, +Parts, +Host,
 1389%!                      +Stream0, -Stream,
 1390%!                      +Options) is det.
 1391
 1392consider_keep_alive(Lines, Parts, Host, StreamPair, In, Options) :-
 1393    option(connection(Asked), Options),
 1394    keep_alive(Asked),
 1395    connection(Lines, Given),
 1396    keep_alive(Given),
 1397    content_length(Lines, Bytes),
 1398    !,
 1399    stream_pair(StreamPair, In0, _),
 1400    connection_address(Host, Parts, HostPort),
 1401    debug(http(connection),
 1402          'Keep-alive to ~w (~D bytes)', [HostPort, Bytes]),
 1403    stream_range_open(In0, In,
 1404                      [ size(Bytes),
 1405                        onclose(keep_alive(StreamPair, HostPort))
 1406                      ]).
 1407consider_keep_alive(_, _, _, Stream, Stream, _).
 1408
 1409connection_address(Host, _, Host) :-
 1410    Host = _:_,
 1411    !.
 1412connection_address(Host, Parts, Host:Port) :-
 1413    parts_port(Parts, Port).
 1414
 1415keep_alive(keep_alive) :- !.
 1416keep_alive(Connection) :-
 1417    downcase_atom(Connection, 'keep-alive').
 1418
 1419:- public keep_alive/4. 1420
 1421keep_alive(StreamPair, Host, In, Left) :-
 1422    read_incomplete(In, Left),
 1423    add_to_pool(Host, StreamPair),
 1424    !.
 1425keep_alive(StreamPair, _, _, _) :-
 1426    close(StreamPair, [force(true)]).
 1427
 1428%!  read_incomplete(+In, +Left) is semidet.
 1429%
 1430%   If we have not all input from  a Keep-alive connection, read the
 1431%   remainder if it is short. Else, we fail and close the stream.
 1432
 1433read_incomplete(_, 0) :- !.
 1434read_incomplete(In, Left) :-
 1435    Left < 100,
 1436    !,
 1437    catch(setup_call_cleanup(
 1438              open_null_stream(Null),
 1439              copy_stream_data(In, Null, Left),
 1440              close(Null)),
 1441          _,
 1442          fail).
 1443
 1444:- dynamic
 1445    connection_pool/4,              % Hash, Address, Stream, Time
 1446    connection_gc_time/1. 1447
 1448add_to_pool(Address, StreamPair) :-
 1449    keep_connection(Address),
 1450    get_time(Now),
 1451    term_hash(Address, Hash),
 1452    assertz(connection_pool(Hash, Address, StreamPair, Now)).
 1453
 1454get_from_pool(Address, StreamPair) :-
 1455    term_hash(Address, Hash),
 1456    retract(connection_pool(Hash, Address, StreamPair, _)).
 1457
 1458%!  keep_connection(+Address) is semidet.
 1459%
 1460%   Succeeds if we want to keep   the  connection open. We currently
 1461%   keep a maximum of 10 connections  waiting   and  a  maximum of 2
 1462%   waiting for the same address. Connections   older than 2 seconds
 1463%   are closed.
 1464
 1465keep_connection(Address) :-
 1466    close_old_connections(2),
 1467    predicate_property(connection_pool(_,_,_,_), number_of_clauses(C)),
 1468    C =< 10,
 1469    term_hash(Address, Hash),
 1470    aggregate_all(count, connection_pool(Hash, Address, _, _), Count),
 1471    Count =< 2.
 1472
 1473close_old_connections(Timeout) :-
 1474    get_time(Now),
 1475    Before is Now - Timeout,
 1476    (   connection_gc_time(GC),
 1477        GC > Before
 1478    ->  true
 1479    ;   (   retractall(connection_gc_time(_)),
 1480            asserta(connection_gc_time(Now)),
 1481            connection_pool(Hash, Address, StreamPair, Added),
 1482            Added < Before,
 1483            retract(connection_pool(Hash, Address, StreamPair, Added)),
 1484            debug(http(connection),
 1485                  'Closing inactive keep-alive to ~p', [Address]),
 1486            close(StreamPair, [force(true)]),
 1487            fail
 1488        ;   true
 1489        )
 1490    ).
 1491
 1492
 1493%!  http_close_keep_alive(+Address) is det.
 1494%
 1495%   Close all keep-alive connections matching Address. Address is of
 1496%   the  form  Host:Port.  In  particular,  http_close_keep_alive(_)
 1497%   closes all currently known keep-alive connections.
 1498
 1499http_close_keep_alive(Address) :-
 1500    forall(get_from_pool(Address, StreamPair),
 1501           close(StreamPair, [force(true)])).
 1502
 1503%!  keep_alive_error(+Error)
 1504%
 1505%   Deal with an error from reusing  a keep-alive connection. If the
 1506%   error is due to an I/O error   or end-of-file, fail to backtrack
 1507%   over get_from_pool/2. Otherwise it is a   real error and we thus
 1508%   re-raise it.
 1509
 1510keep_alive_error(keep_alive(closed)) :-
 1511    !,
 1512    debug(http(connection), 'Keep-alive connection was closed', []),
 1513    fail.
 1514keep_alive_error(io_error(_,_)) :-
 1515    !,
 1516    debug(http(connection), 'IO error on Keep-alive connection', []),
 1517    fail.
 1518keep_alive_error(Error) :-
 1519    throw(Error).
 1520
 1521
 1522                 /*******************************
 1523                 *     HOOK DOCUMENTATION       *
 1524                 *******************************/
 1525
 1526%!  http:open_options(+Parts, -Options) is nondet.
 1527%
 1528%   This hook is used by the HTTP   client library to define default
 1529%   options based on the the broken-down request-URL.  The following
 1530%   example redirects all trafic, except for localhost over a proxy:
 1531%
 1532%       ==
 1533%       :- multifile
 1534%           http:open_options/2.
 1535%
 1536%       http:open_options(Parts, Options) :-
 1537%           option(host(Host), Parts),
 1538%           Host \== localhost,
 1539%           Options = [proxy('proxy.local', 3128)].
 1540%       ==
 1541%
 1542%   This hook may return multiple   solutions.  The returned options
 1543%   are  combined  using  merge_options/3  where  earlier  solutions
 1544%   overrule later solutions.
 1545
 1546%!  http:write_cookies(+Out, +Parts, +Options) is semidet.
 1547%
 1548%   Emit a =|Cookie:|= header for the  current connection. Out is an
 1549%   open stream to the HTTP server, Parts is the broken-down request
 1550%   (see uri_components/2) and Options is the list of options passed
 1551%   to http_open.  The predicate is called as if using ignore/1.
 1552%
 1553%   @see complements http:update_cookies/3.
 1554%   @see library(http/http_cookie) implements cookie handling on
 1555%   top of these hooks.
 1556
 1557%!  http:update_cookies(+CookieData, +Parts, +Options) is semidet.
 1558%
 1559%   Update the cookie database.  CookieData  is   the  value  of the
 1560%   =|Set-Cookie|= field, Parts is  the   broken-down  request  (see
 1561%   uri_components/2) and Options is the list   of options passed to
 1562%   http_open.
 1563%
 1564%   @see complements http:write_cookies
 1565%   @see library(http/http_cookies) implements cookie handling on
 1566%   top of these hooks.