34
35:- module(http_sgml_plugin, []). 36:- use_module(http_client, []). 37:- use_module(library(sgml)). 38:- use_module(library(debug)). 39
40:- multifile
41 http_client:http_convert_data/4. 42
43:- multifile
44 markup_type/2.
59http_client:http_convert_data(In, Fields, Data, Options) :-
60 memberchk(content_type(Type), Fields),
61 debug(sgml_plugin, 'Content type: ~w', [Type]),
62 ( markup_type(Type, ParseOptions)
63 -> true
64 ; type_major_props(Type, Major, Props),
65 default_markup_type(Major, ParseOptions0),
66 type_props(Props, ParseOptions0, ParseOptions)
67 ),
68 merge_options([ max_errors(-1),
69 syntax_errors(quiet)
70 | ParseOptions
71 ], Options, Merged),
72 markup_options(Fields, Merged, MarkupOptions),
73 debug(sgml_plugin, 'Markup options: ~p', [MarkupOptions]),
74 load_structure(stream(In), Data, MarkupOptions).
75
76
77type_major_props(Type0, Type, Props) :-
78 sub_atom(Type0, B, _, A, ;),
79 !,
80 sub_atom(Type0, 0, B, _, Major),
81 sub_atom(Type0, _, A, 0, Props),
82 normalize_space(atom(Type), Major).
83type_major_props(Type, Type, '').
84
85type_props('', L, L).
86type_props(Props, L0, L) :-
87 sub_atom(Props, _, _, A, 'charset='),
88 sub_atom(Props, _, A, 0, CharSet0),
89 downcase_atom(CharSet0, CharSet),
90 known_charset(CharSet),
91 L = [encoding(CharSet)|L0].
92type_props(_, L, L).
93
94known_charset('iso-8859-1').
95known_charset('us-ascii').
96known_charset('utf-8').
105default_markup_type('text/xml',
106 [ dialect(xmlns)
107 ]).
108default_markup_type('text/html',
109 [ dtd(DTD),
110 dialect(sgml),
111 shorttag(false)
112 ]) :-
113 dtd(html, DTD).
114default_markup_type('text/x-sgml',
115 [ dialect(sgml)
116 ]).
117
118markup_options(Fields, Opt0, Options) :-
119 ( memberchk(content_length(Bytes), Fields)
120 -> Options = [content_length(Bytes)|Opt0]
121 ; Options = Opt0
122 ).
129merge_options([], Options, Options).
130merge_options([H|T], Options0, Options) :-
131 functor(H, Name, Arity),
132 functor(H0, Name, Arity),
133 memberchk(H0, Options0),
134 !,
135 merge_options(T, Options0, Options).
136merge_options([H|T], Options0, Options) :-
137 merge_options(T, [H|Options0], Options)
Parse of HTML and XML documents for the HTTP client libs
This module provides a plugin for the HTTP client to handle xml, html and sgml files using the SWI-Prolog sgml-parser from
library(sgml)
. Using this library avoids unnecessary copying of data as the sgml-parser reads directly from the stream that established the HTTP connection.This is a plugin for http_get/3 and http_post/4