34
35:- module(url,
36 [ parse_url/2, 37 parse_url/3, 38 39 is_absolute_url/1, 40 global_url/3, 41 http_location/2, 42 www_form_encode/2, 43 parse_url_search/2, 44
45 url_iri/2, 46
47 file_name_to_url/2, 48
49 set_url_encoding/2 50 ]). 51:- use_module(library(lists)). 52:- use_module(library(error)). 53:- use_module(library(utf8)). 54
73
74 77
83
84global_url(URL, BaseURL, Global) :-
85 ( is_absolute_url(URL),
86 \+ sub_atom(URL, _, _, _, '%') 87 -> Global = URL
88 ; sub_atom(URL, 0, _, _, '//')
89 -> parse_url(BaseURL, [], Attributes),
90 memberchk(protocol(Proto), Attributes),
91 atomic_list_concat([Proto, :, URL], Global)
92 ; sub_atom(URL, 0, _, _, #)
93 -> ( sub_atom(BaseURL, _, _, 0, #)
94 -> sub_atom(URL, 1, _, 0, NoHash),
95 atom_concat(BaseURL, NoHash, Global)
96 ; atom_concat(BaseURL, URL, Global)
97 )
98 ; parse_url(URL, BaseURL, Attributes)
99 -> phrase(curl(Attributes), Chars),
100 atom_codes(Global, Chars)
101 ; throw(error(syntax_error(illegal_url), URL))
102 ).
103
108
109is_absolute_url(URL) :-
110 sub_atom(URL, 0, _, _, 'http://'),
111 !.
112is_absolute_url(URL) :-
113 sub_atom(URL, 0, _, _, 'https://'),
114 !.
115is_absolute_url(URL) :-
116 sub_atom(URL, 0, _, _, 'ftp://'),
117 !.
118is_absolute_url(URL) :-
119 sub_atom(URL, 0, _, _, 'file://'),
120 !.
121is_absolute_url(URL) :-
122 atom_codes(URL, Codes),
123 phrase(absolute_url, Codes, _),
124 !.
125
126
127 130
143
144http_location(Parts, Location) :- 145 nonvar(Parts),
146 !,
147 phrase(curi(Parts), String),
148 !,
149 atom_codes(Location, String).
150http_location(Parts, Location) :- 151 atom(Location),
152 !,
153 atom_codes(Location, Codes),
154 phrase(http_location(Parts), Codes).
155http_location(Parts, Codes) :- 156 is_list(Codes),
157 phrase(http_location(Parts), Codes).
158
159
160curl(A) -->
161 { memberchk(protocol(Protocol), A)
162 },
163 !,
164 catomic(Protocol),
165 ":",
166 curl(Protocol, A).
167curl(A) -->
168 curl(http, A).
169
170curl(file, A) -->
171 !,
172 ( "//"
173 -> cpath(A)
174 ; cpath(A)
175 ).
176curl(_, A) -->
177 "//",
178 cuser(A),
179 chost(A),
180 cport(A),
181 cpath(A),
182 csearch(A),
183 cfragment(A).
184
185curi(A) -->
186 cpath(A),
187 csearch(A).
188
189cpath(A) -->
190 ( { memberchk(path(Path), A) }
191 -> { atom_codes(Path, Codes) },
192 www_encode(Codes, [0'/, 0'+, 0':, 0',])
193 ; ""
194 ).
195
196cuser(A) -->
197 ( { memberchk(user(User), A) }
198 -> { atom_codes(User, Codes) },
199 www_encode(Codes, [0':]),
200 "@"
201 ; ""
202 ).
203
204chost(A) -->
205 ( { memberchk(host(Host), A) }
206 -> { atom_codes(Host, Codes) },
207 www_encode(Codes, [])
208 ; ""
209 ).
210
211cport(A) -->
212 ( { memberchk(port(Port), A), Port \== 80 }
213 -> { number_codes(Port, Codes) },
214 ":",
215 www_encode(Codes, [])
216 ; ""
217 ).
218
219
220catomic(A, In, Out) :-
221 atom_codes(A, Codes),
222 append(Codes, Out, In).
223
225
226csearch(A)-->
227 ( { memberchk(search(Parameters), A) }
228 -> csearch(Parameters, [0'?])
229 ; []
230 ).
231
232csearch([], _) -->
233 [].
234csearch([Parameter|Parameters], Sep) -->
235 !,
236 codes(Sep),
237 cparam(Parameter),
238 csearch(Parameters, [0'&]).
239
240cparam(Name=Value) -->
241 !,
242 cname(Name),
243 "=",
244 cvalue(Value).
245cparam(NameValue) --> 246 { compound(NameValue),
247 !,
248 NameValue =.. [Name,Value]
249 },
250 cname(Name),
251 "=",
252 cvalue(Value).
253cparam(Name)-->
254 cname(Name).
255
256codes([]) --> [].
257codes([H|T]) --> [H], codes(T).
258
259cname(Atom) -->
260 { atom_codes(Atom, Codes) },
261 www_encode(Codes, []).
262
267
268cvalue(Value) -->
269 { atomic(Value),
270 !,
271 atom_codes(Value, Codes)
272 },
273 www_encode(Codes, []).
274cvalue(Codes) -->
275 { must_be(codes, Codes)
276 },
277 www_encode(Codes, []).
278
279
281
282cfragment(A) -->
283 { memberchk(fragment(Frag), A),
284 !,
285 atom_codes(Frag, Codes)
286 },
287 "#",
288 www_encode(Codes, []).
289cfragment(_) -->
290 "".
291
292
293 296
355
356parse_url(URL, Attributes) :-
357 nonvar(URL),
358 !,
359 atom_codes(URL, Codes),
360 phrase(url(Attributes), Codes).
361parse_url(URL, Attributes) :-
362 phrase(curl(Attributes), Codes),
363 !,
364 atom_codes(URL, Codes).
365
370
371parse_url(URL, BaseURL, Attributes) :-
372 nonvar(URL),
373 !,
374 atom_codes(URL, Codes),
375 ( phrase(absolute_url, Codes, _)
376 -> phrase(url(Attributes), Codes)
377 ; ( atomic(BaseURL)
378 -> parse_url(BaseURL, BaseA0)
379 ; BaseA0 = BaseURL
380 ),
381 select(path(BasePath), BaseA0, BaseA1),
382 delete(BaseA1, search(_), BaseA2),
383 delete(BaseA2, fragment(_), BaseA3),
384 phrase(relative_uri(URIA0), Codes),
385 select(path(LocalPath), URIA0, URIA1),
386 !,
387 globalise_path(LocalPath, BasePath, Path),
388 append(BaseA3, [path(Path)|URIA1], Attributes)
389 ).
390parse_url(URL, BaseURL, Attributes) :-
391 parse_url(BaseURL, BaseAttributes),
392 memberchk(path(BasePath), BaseAttributes),
393 ( memberchk(path(LocalPath), Attributes)
394 -> globalise_path(LocalPath, BasePath, Path)
395 ; Path = BasePath
396 ),
397 append([path(Path)|Attributes], BaseAttributes, GlobalAttributes),
398 phrase(curl(GlobalAttributes), Chars),
399 atom_codes(URL, Chars).
400
401
408
409globalise_path(LocalPath, _, LocalPath) :-
410 sub_atom(LocalPath, 0, _, _, /),
411 !.
412globalise_path(LocalPath, _, LocalPath) :-
413 is_absolute_file_name(LocalPath),
414 !.
415globalise_path(Local, Base, Path) :-
416 base_dir(Base, BaseDir),
417 make_path(BaseDir, Local, Path).
418
419base_dir(BasePath, BaseDir) :-
420 ( atom_concat(BaseDir, /, BasePath)
421 -> true
422 ; file_directory_name(BasePath, BaseDir)
423 ).
424
425make_path(Dir, Local, Path) :-
426 atom_concat('../', L2, Local),
427 file_directory_name(Dir, Parent),
428 Parent \== Dir,
429 !,
430 make_path(Parent, L2, Path).
431make_path(/, Local, Path) :-
432 !,
433 atom_concat(/, Local, Path).
434make_path(Dir, Local, Path) :-
435 atomic_list_concat([Dir, /, Local], Path).
436
437
443
444absolute_url -->
445 lwalpha(_First),
446 schema_chars(Rest),
447 { Rest \== [] },
448 ":",
449 !.
450
451
452 455
456digits(L) -->
457 digits(L, []).
458
459digits([C|T0], T) -->
460 digit(C),
461 !,
462 digits(T0, T).
463digits(T, T) -->
464 [].
465
466
467digit(C, [C|T], T) :- code_type(C, digit).
468
469 472
474
475url([protocol(Schema)|Parts]) -->
476 schema(Schema),
477 ":",
478 !,
479 hier_part(Schema, Parts, P2),
480 query(P2, P3),
481 fragment(P3, []).
482url([protocol(http)|Parts]) --> 483 authority(Parts, [path(Path)]),
484 path_abempty(Path).
485
486relative_uri(Parts) -->
487 relative_part(Parts, P2),
488 query(P2, P3),
489 fragment(P3, []).
490
491relative_part(Parts, Tail) -->
492 "//",
493 !,
494 authority(Parts, [path(Path)|Tail]),
495 path_abempty(Path).
496relative_part([path(Path)|T], T) -->
497 ( path_absolute(Path)
498 ; path_noschema(Path)
499 ; path_empty(Path)
500 ),
501 !.
502
503http_location([path(Path)|P2]) -->
504 path_abempty(Path),
505 query(P2, P3),
506 fragment(P3, []).
507
516
517schema(Schema) -->
518 lwalpha(C0),
519 schema_chars(Codes),
520 { atom_codes(Schema, [C0|Codes]) }.
521
522schema_chars([H|T]) -->
523 schema_char(H),
524 !,
525 schema_chars(T).
526schema_chars([]) -->
527 [].
528
529schema_char(H) -->
530 [C],
531 { C < 128,
532 ( code_type(C, alpha)
533 -> code_type(H, to_lower(C))
534 ; code_type(C, digit)
535 -> H = C
536 ; schema_extra(C)
537 -> H = C
538 )
539 }.
540
(0'+).
542schema_extra(0'-).
543schema_extra(0'.). 544
545
547
548hier_part(file, [path(Path)|Tail], Tail) -->
549 !,
550 "//",
551 ( win_drive_path(Path)
552 ; path_absolute(Path)
553 ; path_rootless(Path)
554 ; path_empty(Path)
555 ),
556 !.
557hier_part(_, Parts, Tail) -->
558 "//",
559 !,
560 authority(Parts, [path(Path)|Tail]),
561 path_abempty(Path).
562hier_part(_, [path(Path)|T], T) -->
563 ( path_absolute(Path)
564 ; path_rootless(Path)
565 ; path_empty(Path)
566 ),
567 !.
568
569authority(Parts, Tail) -->
570 user_info_chars(UserChars),
571 "@",
572 !,
573 { atom_codes(User, UserChars),
574 Parts = [user(User),host(Host)|T0]
575 },
576 host(Host),
577 port(T0,Tail).
578authority([host(Host)|T0], Tail) -->
579 host(Host),
580 port(T0, Tail).
581
582user_info_chars([H|T]) -->
583 user_info_char(H),
584 !,
585 user_info_chars(T).
586user_info_chars([]) -->
587 [].
588
589user_info_char(_) --> "@", !, {fail}.
590user_info_char(C) --> pchar(C).
591
593host(Host) --> ip4_address(Host), !.
594host(Host) --> reg_name(Host).
595
596ip4_address(Atom) -->
597 i256_chars(Chars, [0'.|T0]),
598 i256_chars(T0, [0'.|T1]),
599 i256_chars(T1, [0'.|T2]),
600 i256_chars(T2, []),
601 { atom_codes(Atom, Chars) }.
602
603i256_chars(Chars, T) -->
604 digits(Chars, T),
605 { \+ \+ ( T = [],
606 Chars \== [],
607 number_codes(I, Chars),
608 I < 256
609 )
610 }.
611
612reg_name(Host) -->
613 reg_name_chars(Chars),
614 { atom_codes(Host, Chars) }.
615
616reg_name_chars([H|T]) -->
617 reg_name_char(H),
618 !,
619 reg_name_chars(T).
620reg_name_chars([]) -->
621 [].
622
623reg_name_char(C) -->
624 pchar(C),
625 { C \== 0':,
626 C \== 0'@
627 }.
628
629port([port(Port)|T], T) -->
630 ":",
631 !,
632 digit(D0),
633 digits(Ds),
634 { number_codes(Port, [D0|Ds]) }.
635port(T, T) -->
636 [].
637
638path_abempty(Path) -->
639 segments_chars(Chars, []),
640 { Chars == []
641 -> Path = '/'
642 ; atom_codes(Path, Chars)
643 }.
644
645
646win_drive_path(Path) -->
647 drive_letter(C0),
648 ":",
649 ( "/"
650 -> {Codes = [C0, 0':, 0'/|Chars]}
651 ; {Codes = [C0, 0':|Chars]}
652 ),
653 segment_nz_chars(Chars, T0),
654 segments_chars(T0, []),
655 { atom_codes(Path, Codes) }.
656
657
658path_absolute(Path) -->
659 "/",
660 segment_nz_chars(Chars, T0),
661 segments_chars(T0, []),
662 { atom_codes(Path, [0'/| Chars]) }.
663
664path_noschema(Path) -->
665 segment_nz_nc_chars(Chars, T0),
666 segments_chars(T0, []),
667 { atom_codes(Path, Chars) }.
668
669path_rootless(Path) -->
670 segment_nz_chars(Chars, T0),
671 segments_chars(T0, []),
672 { atom_codes(Path, Chars) }.
673
674path_empty('/') -->
675 "".
676
677segments_chars([0'/|Chars], T) --> 678 "/",
679 !,
680 segment_chars(Chars, T0),
681 segments_chars(T0, T).
682segments_chars(T, T) -->
683 [].
684
685segment_chars([H|T0], T) -->
686 pchar(H),
687 !,
688 segment_chars(T0, T).
689segment_chars(T, T) -->
690 [].
691
692segment_nz_chars([H|T0], T) -->
693 pchar(H),
694 segment_chars(T0, T).
695
696segment_nz_nc_chars([H|T0], T) -->
697 segment_nz_nc_char(H),
698 !,
699 segment_nz_nc_chars(T0, T).
700segment_nz_nc_chars(T, T) -->
701 [].
702
703segment_nz_nc_char(_) --> ":", !, {fail}.
704segment_nz_nc_char(C) --> pchar(C).
705
706
710
711query([search(Params)|T], T) -->
712 "?",
713 !,
714 search(Params).
715query(T,T) -->
716 [].
717
718search([Parameter|Parameters])-->
719 parameter(Parameter),
720 !,
721 ( search_sep
722 -> search(Parameters)
723 ; { Parameters = [] }
724 ).
725search([]) -->
726 [].
727
728parameter(Param)-->
729 !,
730 search_chars(NameS),
731 { atom_codes(Name, NameS)
732 },
733 ( "="
734 -> search_value_chars(ValueS),
735 { atom_codes(Value, ValueS),
736 Param = (Name = Value)
737 }
738 ; { Param = Name
739 }
740 ).
741
742search_chars([C|T]) -->
743 search_char(C),
744 !,
745 search_chars(T).
746search_chars([]) -->
747 [].
748
749search_char(_) --> search_sep, !, { fail }.
750search_char(_) --> "=", !, { fail }.
751search_char(C) --> fragment_char(C).
752
753search_value_chars([C|T]) -->
754 search_value_char(C),
755 !,
756 search_value_chars(T).
757search_value_chars([]) -->
758 [].
759
760search_value_char(_) --> search_sep, !, { fail }.
761search_value_char(C) --> fragment_char(C).
762
770
771search_sep --> "&", !.
772search_sep --> ";".
773
774
778
779fragment([fragment(Fragment)|T], T) -->
780 "#",
781 !,
782 fragment_chars(Codes),
783 { atom_codes(Fragment, Codes) }.
784fragment(T, T) -->
785 [].
786
787fragment_chars([H|T]) -->
788 fragment_char(H),
789 !,
790 fragment_chars(T).
791fragment_chars([]) -->
792 [].
793
794
798
799fragment_char(C) --> pchar(C), !.
800fragment_char(0'/) --> "/", !.
801fragment_char(0'?) --> "?", !.
802fragment_char(0'[) --> "[", !. 803fragment_char(0']) --> "]", !.
804
805
806 809
815
816pchar(0'\s) --> "+", !.
817pchar(C) -->
818 [C],
819 { unreserved(C)
820 ; sub_delim(C)
821 ; C == 0':
822 ; C == 0'@
823 },
824 !.
825pchar(C) -->
826 percent_coded(C).
827
831
832lwalpha(H) -->
833 [C],
834 { C < 128,
835 code_type(C, alpha),
836 code_type(H, to_lower(C))
837 }.
838
839drive_letter(C) -->
840 [C],
841 { C < 128,
842 code_type(C, alpha)
843 }.
844
845
846 849
853
854sub_delim(0'!).
855sub_delim(0'$).
856sub_delim(0'&).
857sub_delim(0'').
858sub_delim(0'().
859sub_delim(0')).
860sub_delim(0'*).
861sub_delim(0'+).
862sub_delim(0',).
863sub_delim(0';).
864sub_delim(0'=).
865
866
871
872term_expansion(unreserved(map), Clauses) :-
873 findall(unreserved(C), unreserved_(C), Clauses).
874
875unreserved_(C) :-
876 between(1, 128, C),
877 code_type(C, alnum).
878unreserved_(0'-).
879unreserved_(0'.).
880unreserved_(0'_).
881unreserved_(0'~). 882
883unreserved(map). 884
885
886 889
894
908
909www_form_encode(Value, Encoded) :-
910 atomic(Value),
911 !,
912 atom_codes(Value, Codes),
913 phrase(www_encode(Codes, []), EncCodes),
914 atom_codes(Encoded, EncCodes).
915www_form_encode(Value, Encoded) :-
916 atom_codes(Encoded, EncCodes),
917 phrase(www_decode(Codes), EncCodes),
918 atom_codes(Value, Codes).
919
921
922www_encode([0'\r, 0'\n|T], Extra) -->
923 !,
924 "%0D%0A",
925 www_encode(T, Extra).
926www_encode([0'\n|T], Extra) -->
927 !,
928 "%0D%0A",
929 www_encode(T, Extra).
930www_encode([H|T], Extra) -->
931 percent_encode(H, Extra),
932 www_encode(T, Extra).
933www_encode([], _) -->
934 "".
935
936percent_encode(C, _Extra) -->
937 { unreserved(C) },
938 !,
939 [C].
940percent_encode(C, Extra) -->
941 { memberchk(C, Extra) },
942 !,
943 [C].
945percent_encode(C, _) -->
946 { C =< 127 },
947 !,
948 percent_byte(C).
949percent_encode(C, _) --> 950 { current_prolog_flag(url_encoding, utf8),
951 !,
952 phrase(utf8_codes([C]), Bytes)
953 },
954 percent_bytes(Bytes).
955percent_encode(C, _) -->
956 { C =< 255 },
957 !,
958 percent_byte(C).
959percent_encode(_C, _) -->
960 { representation_error(url_character)
961 }.
962
963percent_bytes([]) -->
964 "".
965percent_bytes([H|T]) -->
966 percent_byte(H),
967 percent_bytes(T).
968
969percent_byte(C) -->
970 [0'%, D1, D2],
971 { nonvar(C)
972 -> Dv1 is (C>>4 /\ 0xf),
973 Dv2 is (C /\ 0xf),
974 code_type(D1, xdigit(Dv1)),
975 code_type(D2, xdigit(Dv2))
976 ; code_type(D1, xdigit(Dv1)),
977 code_type(D2, xdigit(Dv2)),
978 C is ((Dv1)<<4) + Dv2
979 }.
980
981percent_coded(C) -->
982 percent_byte(C0),
983 !,
984 ( { C0 == 13 985 },
986 "%0",
987 ( "A" ; "a" )
988 -> { C = 10
989 }
990 ; { C0 >= 0xc0 }, 991 utf8_cont(Cs),
992 { phrase(utf8_codes([C]), [C0|Cs]) }
993 -> []
994 ; { C = C0
995 }
996 ).
997
999
1000www_decode([0' |T]) -->
1001 "+",
1002 !,
1003 www_decode(T).
1004www_decode([C|T]) -->
1005 percent_coded(C),
1006 !,
1007 www_decode(T).
1008www_decode([C|T]) -->
1009 [C],
1010 !,
1011 www_decode(T).
1012www_decode([]) -->
1013 [].
1014
1015utf8_cont([H|T]) -->
1016 percent_byte(H),
1017 { between(0x80, 0xbf, H) },
1018 !,
1019 utf8_cont(T).
1020utf8_cont([]) -->
1021 [].
1022
1023
1031
1032:- create_prolog_flag(url_encoding, utf8, [type(atom)]). 1033
1034set_url_encoding(Old, New) :-
1035 current_prolog_flag(url_encoding, Old),
1036 ( Old == New
1037 -> true
1038 ; must_be(oneof([utf8, iso_latin_1]), New),
1039 set_prolog_flag(url_encoding, New)
1040 ).
1041
1042
1043 1046
1053
1054url_iri(Encoded, Decoded) :-
1055 nonvar(Encoded),
1056 !,
1057 ( sub_atom(Encoded, _, _, _, '%')
1058 -> atom_codes(Encoded, Codes),
1059 unescape_precent(Codes, UTF8),
1060 phrase(utf8_codes(Unicodes), UTF8),
1061 atom_codes(Decoded, Unicodes)
1062 ; Decoded = Encoded
1063 ).
1064url_iri(URL, IRI) :-
1065 atom_codes(IRI, IRICodes),
1066 atom_codes('/:?#&=', ExtraEscapes),
1067 phrase(www_encode(IRICodes, ExtraEscapes), UrlCodes),
1068 atom_codes(URL, UrlCodes).
1069
1070
1071unescape_precent([], []).
1072unescape_precent([0'%,C1,C2|T0], [H|T]) :- 1073 !,
1074 code_type(C1, xdigit(D1)),
1075 code_type(C2, xdigit(D2)),
1076 H is D1*16 + D2,
1077 unescape_precent(T0, T).
1078unescape_precent([H|T0], [H|T]) :-
1079 unescape_precent(T0, T).
1080
1081
1082 1085
1092
1093parse_url_search(Spec, Fields) :-
1094 atomic(Spec),
1095 !,
1096 atom_codes(Spec, Codes),
1097 phrase(search(Fields), Codes).
1098parse_url_search(Codes, Fields) :-
1099 is_list(Codes),
1100 !,
1101 phrase(search(Fields), Codes).
1102parse_url_search(Codes, Fields) :-
1103 must_be(list, Fields),
1104 phrase(csearch(Fields, []), Codes).
1105
1106
1107 1110
1118
1119file_name_to_url(File, FileURL) :-
1120 nonvar(File),
1121 !,
1122 absolute_file_name(File, Path),
1123 atom_concat('file://', Path, FileURL),
1124 !.
1125file_name_to_url(File, FileURL) :-
1126 atom_concat('file://', File, FileURL),
1127 !