1/* Part of SWI-Prolog 2 3 Author: Jan Wielemaker 4 E-mail: J.Wielemaker@vu.nl 5 WWW: http://www.swi-prolog.org 6 Copyright (c) 2000-2015, University of Amsterdam 7 All rights reserved. 8 9 Redistribution and use in source and binary forms, with or without 10 modification, are permitted provided that the following conditions 11 are met: 12 13 1. Redistributions of source code must retain the above copyright 14 notice, this list of conditions and the following disclaimer. 15 16 2. Redistributions in binary form must reproduce the above copyright 17 notice, this list of conditions and the following disclaimer in 18 the documentation and/or other materials provided with the 19 distribution. 20 21 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 24 FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 25 COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 26 INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 27 BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 28 LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 29 CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 30 LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN 31 ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 32 POSSIBILITY OF SUCH DAMAGE. 33*/ 34 35:- module(url, 36 [ parse_url/2, % +URL, -Parts | -URL +Parts 37 parse_url/3, % +URL|URI, +BaseURL, -Parts 38 % -URL, +BaseURL, +Parts 39 is_absolute_url/1, % +URL 40 global_url/3, % +Local, +Base, -Global 41 http_location/2, % ?Parts, ?Location 42 www_form_encode/2, % Value <-> Encoded 43 parse_url_search/2, % Form-data <-> Form fields 44 45 url_iri/2, % ?URL, ?IRI 46 47 file_name_to_url/2, % ?FileName, ?URL 48 49 set_url_encoding/2 % ?Old, +New 50 ]). 51:- use_module(library(lists)). 52:- use_module(library(error)). 53:- use_module(library(utf8)).
74 /******************************* 75 * GLOBALISE * 76 *******************************/
84global_url(URL, BaseURL, Global) :-
85 ( is_absolute_url(URL),
86 \+ sub_atom(URL, _, _, _, '%') % may have escape, use general
87 -> Global = URL
88 ; sub_atom(URL, 0, _, _, '//')
89 -> parse_url(BaseURL, [], Attributes),
90 memberchk(protocol(Proto), Attributes),
91 atomic_list_concat([Proto, :, URL], Global)
92 ; sub_atom(URL, 0, _, _, #)
93 -> ( sub_atom(BaseURL, _, _, 0, #)
94 -> sub_atom(URL, 1, _, 0, NoHash),
95 atom_concat(BaseURL, NoHash, Global)
96 ; atom_concat(BaseURL, URL, Global)
97 )
98 ; parse_url(URL, BaseURL, Attributes)
99 -> phrase(curl(Attributes), Chars),
100 atom_codes(Global, Chars)
101 ; throw(error(syntax_error(illegal_url), URL))
102 ).
109is_absolute_url(URL) :- 110 sub_atom(URL, 0, _, _, 'http://'), 111 !. 112is_absolute_url(URL) :- 113 sub_atom(URL, 0, _, _, 'https://'), 114 !. 115is_absolute_url(URL) :- 116 sub_atom(URL, 0, _, _, 'ftp://'), 117 !. 118is_absolute_url(URL) :- 119 sub_atom(URL, 0, _, _, 'file://'), 120 !. 121is_absolute_url(URL) :- 122 atom_codes(URL, Codes), 123 phrase(absolute_url, Codes, _), 124 !. 125 126 127 /******************************* 128 * CREATE URL/URI * 129 *******************************/
<Action> <Location> HTTP/<version>
144http_location(Parts, Location) :- % Parts --> Location 145 nonvar(Parts), 146 !, 147 phrase(curi(Parts), String), 148 !, 149 atom_codes(Location, String). 150http_location(Parts, Location) :- % Location --> Parts 151 atom(Location), 152 !, 153 atom_codes(Location, Codes), 154 phrase(http_location(Parts), Codes). 155http_location(Parts, Codes) :- % LocationCodes --> Parts 156 is_list(Codes), 157 phrase(http_location(Parts), Codes). 158 159 160curl(A) --> 161 { memberchk(protocol(Protocol), A) 162 }, 163 !, 164 catomic(Protocol), 165 ":", 166 curl(Protocol, A). 167curl(A) --> 168 curl(http, A). 169 170curl(file, A) --> 171 !, 172 ( "//" 173 -> cpath(A) 174 ; cpath(A) 175 ). 176curl(_, A) --> 177 "//", 178 cuser(A), 179 chost(A), 180 cport(A), 181 cpath(A), 182 csearch(A), 183 cfragment(A). 184 185curi(A) --> 186 cpath(A), 187 csearch(A). 188 189cpath(A) --> 190 ( { memberchk(path(Path), A) } 191 -> { atom_codes(Path, Codes) }, 192 www_encode(Codes, [0'/, 0'+, 0':, 0',]) 193 ; "" 194 ). 195 196cuser(A) --> 197 ( { memberchk(user(User), A) } 198 -> { atom_codes(User, Codes) }, 199 www_encode(Codes, [0':]), 200 "@" 201 ; "" 202 ). 203 204chost(A) --> 205 ( { memberchk(host(Host), A) } 206 -> { atom_codes(Host, Codes) }, 207 www_encode(Codes, []) 208 ; "" 209 ). 210 211cport(A) --> 212 ( { memberchk(port(Port), A), Port \== 80 } 213 -> { number_codes(Port, Codes) }, 214 ":", 215 www_encode(Codes, []) 216 ; "" 217 ). 218 219 220catomic(A, In, Out) :- 221 atom_codes(A, Codes), 222 append(Codes, Out, In).
226csearch(A)--> 227 ( { memberchk(search(Parameters), A) } 228 -> csearch(Parameters, [0'?]) 229 ; [] 230 ). 231 232csearch([], _) --> 233 []. 234csearch([Parameter|Parameters], Sep) --> 235 !, 236 codes(Sep), 237 cparam(Parameter), 238 csearch(Parameters, [0'&]). 239 240cparam(Name=Value) --> 241 !, 242 cname(Name), 243 "=", 244 cvalue(Value). 245cparam(NameValue) --> % allow to feed Name(Value) 246 { compound(NameValue), 247 !, 248 NameValue =.. [Name,Value] 249 }, 250 cname(Name), 251 "=", 252 cvalue(Value). 253cparam(Name)--> 254 cname(Name). 255 256codes([]) --> []. 257codes([H|T]) --> [H], codes(T). 258 259cname(Atom) --> 260 { atom_codes(Atom, Codes) }, 261 www_encode(Codes, []).
268cvalue(Value) --> 269 { atomic(Value), 270 !, 271 atom_codes(Value, Codes) 272 }, 273 www_encode(Codes, []). 274cvalue(Codes) --> 275 { must_be(codes, Codes) 276 }, 277 www_encode(Codes, []).
282cfragment(A) --> 283 { memberchk(fragment(Frag), A), 284 !, 285 atom_codes(Frag, Codes) 286 }, 287 "#", 288 www_encode(Codes, []). 289cfragment(_) --> 290 "". 291 292 293 /******************************* 294 * PARSING * 295 *******************************/
url:
, an
identifier separated from the remainder of the URL using :.
parse_url/2 assumes the http
protocol if no protocol is
specified and the URL can be parsed as a valid HTTP url. In
addition to the RFC-1738 specified protocols, the file
protocol is supported as well.ftp
, http
and file
protocols. If no path appears, the
library generates the path /
.?
, normally used to transfer data from HTML forms that
use the GET
protocol. In the URL it consists of a
www-form-encoded list of Name=Value pairs. This is mapped to
a list of Prolog Name=Value terms with decoded names and
values.#
character.The example below illustrates all of this for an HTTP URL.
?- parse_url('http://www.xyz.org/hello?msg=Hello+World%21#x', P). P = [ protocol(http), host('www.xyz.org'), fragment(x), search([ msg = 'Hello World!' ]), path('/hello') ]
By instantiating the parts-list this predicate can be used to create a URL.
356parse_url(URL, Attributes) :- 357 nonvar(URL), 358 !, 359 atom_codes(URL, Codes), 360 phrase(url(Attributes), Codes). 361parse_url(URL, Attributes) :- 362 phrase(curl(Attributes), Codes), 363 !, 364 atom_codes(URL, Codes).
371parse_url(URL, BaseURL, Attributes) :- 372 nonvar(URL), 373 !, 374 atom_codes(URL, Codes), 375 ( phrase(absolute_url, Codes, _) 376 -> phrase(url(Attributes), Codes) 377 ; ( atomic(BaseURL) 378 -> parse_url(BaseURL, BaseA0) 379 ; BaseA0 = BaseURL 380 ), 381 select(path(BasePath), BaseA0, BaseA1), 382 delete(BaseA1, search(_), BaseA2), 383 delete(BaseA2, fragment(_), BaseA3), 384 phrase(relative_uri(URIA0), Codes), 385 select(path(LocalPath), URIA0, URIA1), 386 !, 387 globalise_path(LocalPath, BasePath, Path), 388 append(BaseA3, [path(Path)|URIA1], Attributes) 389 ). 390parse_url(URL, BaseURL, Attributes) :- 391 parse_url(BaseURL, BaseAttributes), 392 memberchk(path(BasePath), BaseAttributes), 393 ( memberchk(path(LocalPath), Attributes) 394 -> globalise_path(LocalPath, BasePath, Path) 395 ; Path = BasePath 396 ), 397 append([path(Path)|Attributes], BaseAttributes, GlobalAttributes), 398 phrase(curl(GlobalAttributes), Chars), 399 atom_codes(URL, Chars).
409globalise_path(LocalPath, _, LocalPath) :- 410 sub_atom(LocalPath, 0, _, _, /), 411 !. 412globalise_path(LocalPath, _, LocalPath) :- 413 is_absolute_file_name(LocalPath), 414 !. 415globalise_path(Local, Base, Path) :- 416 base_dir(Base, BaseDir), 417 make_path(BaseDir, Local, Path). 418 419base_dir(BasePath, BaseDir) :- 420 ( atom_concat(BaseDir, /, BasePath) 421 -> true 422 ; file_directory_name(BasePath, BaseDir) 423 ). 424 425make_path(Dir, Local, Path) :- 426 atom_concat('../', L2, Local), 427 file_directory_name(Dir, Parent), 428 Parent \== Dir, 429 !, 430 make_path(Parent, L2, Path). 431make_path(/, Local, Path) :- 432 !, 433 atom_concat(/, Local, Path). 434make_path(Dir, Local, Path) :- 435 atomic_list_concat([Dir, /, Local], Path).
444absolute_url --> 445 lwalpha(_First), 446 schema_chars(Rest), 447 { Rest \== [] }, 448 ":", 449 !. 450 451 452 /******************************* 453 * SEQUENCES * 454 *******************************/ 455 456digits(L) --> 457 digits(L, []). 458 459digits([C|T0], T) --> 460 digit(C), 461 !, 462 digits(T0, T). 463digits(T, T) --> 464 []. 465 466 467digit(C, [C|T], T) :- code_type(C, digit). 468 469 /******************************* 470 * RFC-3986 * 471 *******************************/
475url([protocol(Schema)|Parts]) --> 476 schema(Schema), 477 ":", 478 !, 479 hier_part(Schema, Parts, P2), 480 query(P2, P3), 481 fragment(P3, []). 482url([protocol(http)|Parts]) --> % implicit HTTP 483 authority(Parts, [path(Path)]), 484 path_abempty(Path). 485 486relative_uri(Parts) --> 487 relative_part(Parts, P2), 488 query(P2, P3), 489 fragment(P3, []). 490 491relative_part(Parts, Tail) --> 492 "//", 493 !, 494 authority(Parts, [path(Path)|Tail]), 495 path_abempty(Path). 496relative_part([path(Path)|T], T) --> 497 ( path_absolute(Path) 498 ; path_noschema(Path) 499 ; path_empty(Path) 500 ), 501 !. 502 503http_location([path(Path)|P2]) --> 504 path_abempty(Path), 505 query(P2, P3), 506 fragment(P3, []).
Schema ::= ALPHA *(ALPHA|DIGIT|"+"|"-"|".")
517schema(Schema) --> 518 lwalpha(C0), 519 schema_chars(Codes), 520 { atom_codes(Schema, [C0|Codes]) }. 521 522schema_chars([H|T]) --> 523 schema_char(H), 524 !, 525 schema_chars(T). 526schema_chars([]) --> 527 []. 528 529schema_char(H) --> 530 [C], 531 { C < 128, 532 ( code_type(C, alpha) 533 -> code_type(H, to_lower(C)) 534 ; code_type(C, digit) 535 -> H = C 536 ; schema_extra(C) 537 -> H = C 538 ) 539 }. 540 541schema_extra(0'+). 542schema_extra(0'-). 543schema_extra(0'.). % 0'
548hier_part(file, [path(Path)|Tail], Tail) --> 549 !, 550 "//", 551 ( win_drive_path(Path) 552 ; path_absolute(Path) 553 ; path_rootless(Path) 554 ; path_empty(Path) 555 ), 556 !. 557hier_part(_, Parts, Tail) --> 558 "//", 559 !, 560 authority(Parts, [path(Path)|Tail]), 561 path_abempty(Path). 562hier_part(_, [path(Path)|T], T) --> 563 ( path_absolute(Path) 564 ; path_rootless(Path) 565 ; path_empty(Path) 566 ), 567 !. 568 Parts, Tail) (--> 570 user_info_chars(UserChars), 571 "@", 572 !, 573 { atom_codes(User, UserChars), 574 Parts = [user(User),host(Host)|T0] 575 }, 576 host(Host), 577 port(T0,Tail). 578authority([host(Host)|T0], Tail) --> 579 host(Host), 580 port(T0, Tail). 581 582user_info_chars([H|T]) --> 583 user_info_char(H), 584 !, 585 user_info_chars(T). 586user_info_chars([]) --> 587 []. 588 589user_info_char(_) --> "@", !, {fail}. 590user_info_char(C) --> pchar(C). 591 592%host(Host) --> ip_literal(Host), !. % TBD: IP6 addresses 593host(Host) --> ip4_address(Host), !. 594host(Host) --> reg_name(Host). 595 596ip4_address(Atom) --> 597 i256_chars(Chars, [0'.|T0]), 598 i256_chars(T0, [0'.|T1]), 599 i256_chars(T1, [0'.|T2]), 600 i256_chars(T2, []), 601 { atom_codes(Atom, Chars) }. 602 603i256_chars(Chars, T) --> 604 digits(Chars, T), 605 { \+ \+ ( T = [], 606 Chars \== [], 607 number_codes(I, Chars), 608 I < 256 609 ) 610 }. 611 612reg_name(Host) --> 613 reg_name_chars(Chars), 614 { atom_codes(Host, Chars) }. 615 616reg_name_chars([H|T]) --> 617 reg_name_char(H), 618 !, 619 reg_name_chars(T). 620reg_name_chars([]) --> 621 []. 622 623reg_name_char(C) --> 624 pchar(C), 625 { C \== 0':, 626 C \== 0'@ 627 }. 628 629port([port(Port)|T], T) --> 630 ":", 631 !, 632 digit(D0), 633 digits(Ds), 634 { number_codes(Port, [D0|Ds]) }. 635port(T, T) --> 636 []. 637 638path_abempty(Path) --> 639 segments_chars(Chars, []), 640 { Chars == [] 641 -> Path = '/' 642 ; atom_codes(Path, Chars) 643 }. 644 645 646win_drive_path(Path) --> 647 drive_letter(C0), 648 ":", 649 ( "/" 650 -> {Codes = [C0, 0':, 0'/|Chars]} 651 ; {Codes = [C0, 0':|Chars]} 652 ), 653 segment_nz_chars(Chars, T0), 654 segments_chars(T0, []), 655 { atom_codes(Path, Codes) }. 656 657 658path_absolute(Path) --> 659 "/", 660 segment_nz_chars(Chars, T0), 661 segments_chars(T0, []), 662 { atom_codes(Path, [0'/| Chars]) }. 663 664path_noschema(Path) --> 665 segment_nz_nc_chars(Chars, T0), 666 segments_chars(T0, []), 667 { atom_codes(Path, Chars) }. 668 669path_rootless(Path) --> 670 segment_nz_chars(Chars, T0), 671 segments_chars(T0, []), 672 { atom_codes(Path, Chars) }. 673 674path_empty('/') --> 675 "". 676 677segments_chars([0'/|Chars], T) --> % 0' 678 "/", 679 !, 680 segment_chars(Chars, T0), 681 segments_chars(T0, T). 682segments_chars(T, T) --> 683 []. 684 685segment_chars([H|T0], T) --> 686 pchar(H), 687 !, 688 segment_chars(T0, T). 689segment_chars(T, T) --> 690 []. 691 692segment_nz_chars([H|T0], T) --> 693 pchar(H), 694 segment_chars(T0, T). 695 696segment_nz_nc_chars([H|T0], T) --> 697 segment_nz_nc_char(H), 698 !, 699 segment_nz_nc_chars(T0, T). 700segment_nz_nc_chars(T, T) --> 701 []. 702 703segment_nz_nc_char(_) --> ":", !, {fail}. 704segment_nz_nc_char(C) --> pchar(C).
711query([search(Params)|T], T) --> 712 "?", 713 !, 714 search(Params). 715query(T,T) --> 716 []. 717 718search([Parameter|Parameters])--> 719 parameter(Parameter), 720 !, 721 ( search_sep 722 -> search(Parameters) 723 ; { Parameters = [] } 724 ). 725search([]) --> 726 []. 727 728parameter(Param)--> 729 !, 730 search_chars(NameS), 731 { atom_codes(Name, NameS) 732 }, 733 ( "=" 734 -> search_value_chars(ValueS), 735 { atom_codes(Value, ValueS), 736 Param = (Name = Value) 737 } 738 ; { Param = Name 739 } 740 ). 741 742search_chars([C|T]) --> 743 search_char(C), 744 !, 745 search_chars(T). 746search_chars([]) --> 747 []. 748 749search_char(_) --> search_sep, !, { fail }. 750search_char(_) --> "=", !, { fail }. 751search_char(C) --> fragment_char(C). 752 753search_value_chars([C|T]) --> 754 search_value_char(C), 755 !, 756 search_value_chars(T). 757search_value_chars([]) --> 758 []. 759 760search_value_char(_) --> search_sep, !, { fail }. 761search_value_char(C) --> fragment_char(C).
771search_sep --> "&", !. 772search_sep --> ";".
779fragment([fragment(Fragment)|T], T) --> 780 "#", 781 !, 782 fragment_chars(Codes), 783 { atom_codes(Fragment, Codes) }. 784fragment(T, T) --> 785 []. 786 787fragment_chars([H|T]) --> 788 fragment_char(H), 789 !, 790 fragment_chars(T). 791fragment_chars([]) --> 792 [].
799fragment_char(C) --> pchar(C), !. 800fragment_char(0'/) --> "/", !. 801fragment_char(0'?) --> "?", !. 802fragment_char(0'[) --> "[", !. % Not according RDF3986! 803fragment_char(0']) --> "]", !. 804 805 806 /******************************* 807 * CHARACTER CLASSES * 808 *******************************/
Performs UTF-8 decoding of percent encoded strings.
816pchar(0'\s) --> "+", !. 817pchar(C) --> 818 [C], 819 { unreserved(C) 820 ; sub_delim(C) 821 ; C == 0': 822 ; C == 0'@ 823 }, 824 !. 825pchar(C) --> 826 percent_coded(C).
832lwalpha(H) --> 833 [C], 834 { C < 128, 835 code_type(C, alpha), 836 code_type(H, to_lower(C)) 837 }. 838 839drive_letter(C) --> 840 [C], 841 { C < 128, 842 code_type(C, alpha) 843 }. 844 845 846 /******************************* 847 * RESERVED CHARACTERS * 848 *******************************/
854sub_delim(0'!). 855sub_delim(0'$). 856sub_delim(0'&). 857sub_delim(0''). 858sub_delim(0'(). 859sub_delim(0')). 860sub_delim(0'*). 861sub_delim(0'+). 862sub_delim(0',). 863sub_delim(0';). 864sub_delim(0'=).
872term_expansion(unreserved(map), Clauses) :- 873 findall(unreserved(C), unreserved_(C), Clauses). 874 875unreserved_(C) :- 876 between(1, 128, C), 877 code_type(C, alnum). 878unreserved_(0'-). 879unreserved_(0'.). 880unreserved_(0'_). 881unreserved_(0'~). % 0' 882 883unreserved(map). % Expanded 884 885 886 /******************************* 887 * FORMS * 888 *******************************/ 889 890/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 891Encoding/decoding of form-fields using the popular www-form-encoded 892encoding used with the HTTP GET. 893- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
alnum
(see code_type/2)), and one of "-._~" using percent
encoding. Newline is mapped to %OD%OA
. When decoding,
newlines appear as a single newline (10) character.
Note that a space is encoded as %20
instead of +
.
Decoding decodes both to a space.
909www_form_encode(Value, Encoded) :- 910 atomic(Value), 911 !, 912 atom_codes(Value, Codes), 913 phrase(www_encode(Codes, []), EncCodes), 914 atom_codes(Encoded, EncCodes). 915www_form_encode(Value, Encoded) :- 916 atom_codes(Encoded, EncCodes), 917 phrase(www_decode(Codes), EncCodes), 918 atom_codes(Value, Codes).
922www_encode([0'\r, 0'\n|T], Extra) --> 923 !, 924 "%0D%0A", 925 www_encode(T, Extra). 926www_encode([0'\n|T], Extra) --> 927 !, 928 "%0D%0A", 929 www_encode(T, Extra). 930www_encode([H|T], Extra) --> 931 percent_encode(H, Extra), 932 www_encode(T, Extra). 933www_encode([], _) --> 934 "". 935 936percent_encode(C, _Extra) --> 937 { unreserved(C) }, 938 !, 939 [C]. 940percent_encode(C, Extra) --> 941 { memberchk(C, Extra) }, 942 !, 943 [C]. 944%percent_encode(0' , _) --> !, "+". % Deprecated: use %20 945percent_encode(C, _) --> 946 { C =< 127 }, 947 !, 948 percent_byte(C). 949percent_encode(C, _) --> % Unicode characters 950 { current_prolog_flag(url_encoding, utf8), 951 !, 952 phrase(utf8_codes([C]), Bytes) 953 }, 954 percent_bytes(Bytes). 955percent_encode(C, _) --> 956 { C =< 255 }, 957 !, 958 percent_byte(C). 959percent_encode(_C, _) --> 960 { representation_error(url_character) 961 }. 962 963percent_bytes([]) --> 964 "". 965percent_bytes([H|T]) --> 966 percent_byte(H), 967 percent_bytes(T). 968 969percent_byte(C) --> 970 [0'%, D1, D2], 971 { nonvar(C) 972 -> Dv1 is (C>>4 /\ 0xf), 973 Dv2 is (C /\ 0xf), 974 code_type(D1, xdigit(Dv1)), 975 code_type(D2, xdigit(Dv2)) 976 ; code_type(D1, xdigit(Dv1)), 977 code_type(D2, xdigit(Dv2)), 978 C is ((Dv1)<<4) + Dv2 979 }. 980 981percent_coded(C) --> 982 percent_byte(C0), 983 !, 984 ( { C0 == 13 % %0D%0A --> \n 985 }, 986 "%0", 987 ( "A" ; "a" ) 988 -> { C = 10 989 } 990 ; { C0 >= 0xc0 }, % UTF-8 lead-in 991 utf8_cont(Cs), 992 { phrase(utf8_codes([C]), [C0|Cs]) } 993 -> [] 994 ; { C = C0 995 } 996 ).
1000www_decode([0' |T]) --> 1001 "+", 1002 !, 1003 www_decode(T). 1004www_decode([C|T]) --> 1005 percent_coded(C), 1006 !, 1007 www_decode(T). 1008www_decode([C|T]) --> 1009 [C], 1010 !, 1011 www_decode(T). 1012www_decode([]) --> 1013 []. 1014 1015utf8_cont([H|T]) --> 1016 percent_byte(H), 1017 { between(0x80, 0xbf, H) }, 1018 !, 1019 utf8_cont(T). 1020utf8_cont([]) --> 1021 [].
utf8
.
The only other defined value is iso_latin_1
.
1032:- create_prolog_flag(url_encoding, utf8, [type(atom)]). 1033 1034set_url_encoding(Old, New) :- 1035 current_prolog_flag(url_encoding, Old), 1036 ( Old == New 1037 -> true 1038 ; must_be(oneof([utf8, iso_latin_1]), New), 1039 set_prolog_flag(url_encoding, New) 1040 ). 1041 1042 1043 /******************************* 1044 * IRI PROCESSING * 1045 *******************************/
1054url_iri(Encoded, Decoded) :- 1055 nonvar(Encoded), 1056 !, 1057 ( sub_atom(Encoded, _, _, _, '%') 1058 -> atom_codes(Encoded, Codes), 1059 unescape_precent(Codes, UTF8), 1060 phrase(utf8_codes(Unicodes), UTF8), 1061 atom_codes(Decoded, Unicodes) 1062 ; Decoded = Encoded 1063 ). 1064url_iri(URL, IRI) :- 1065 atom_codes(IRI, IRICodes), 1066 atom_codes('/:?#&=', ExtraEscapes), 1067 phrase(www_encode(IRICodes, ExtraEscapes), UrlCodes), 1068 atom_codes(URL, UrlCodes). 1069 1070 1071unescape_precent([], []). 1072unescape_precent([0'%,C1,C2|T0], [H|T]) :- %' 1073 !, 1074 code_type(C1, xdigit(D1)), 1075 code_type(C2, xdigit(D2)), 1076 H is D1*16 + D2, 1077 unescape_precent(T0, T). 1078unescape_precent([H|T0], [H|T]) :- 1079 unescape_precent(T0, T). 1080 1081 1082 /******************************* 1083 * FORM DATA * 1084 *******************************/
application/x-www-form-urlencoded
as used in HTTP GET
requests.1093parse_url_search(Spec, Fields) :- 1094 atomic(Spec), 1095 !, 1096 atom_codes(Spec, Codes), 1097 phrase(search(Fields), Codes). 1098parse_url_search(Codes, Fields) :- 1099 is_list(Codes), 1100 !, 1101 phrase(search(Fields), Codes). 1102parse_url_search(Codes, Fields) :- 1103 must_be(list, Fields), 1104 phrase(csearch(Fields, []), Codes). 1105 1106 1107 /******************************* 1108 * FILE URLs * 1109 *******************************/
1119file_name_to_url(File, FileURL) :- 1120 nonvar(File), 1121 !, 1122 absolute_file_name(File, Path), 1123 atom_concat('file://', Path, FileURL), 1124 !. 1125file_name_to_url(File, FileURL) :- 1126 atom_concat('file://', File, FileURL), 1127 !
Analysing and constructing URL
This library deals with the analysis and construction of a URL, Universal Resource Locator. URL is the basis for communicating locations of resources (data) on the web. A URL consists of a protocol identifier (e.g. HTTP, FTP, and a protocol-specific syntax further defining the location. URLs are standardized in RFC-1738.
The implementation in this library covers only a small portion of the defined protocols. Though the initial implementation followed RFC-1738 strictly, the current is more relaxed to deal with frequent violations of the standard encountered in practical use.