doc_wiki.pl

View source with formatted comments or as raw
    1/*  Part of SWI-Prolog
    2
    3    Author:        Jan Wielemaker
    4    E-mail:        J.Wielemaker@vu.nl
    5    WWW:           http://www.swi-prolog.org
    6    Copyright (c)  2006-2017, University of Amsterdam
    7                              VU University Amsterdam
    8    All rights reserved.
    9
   10    Redistribution and use in source and binary forms, with or without
   11    modification, are permitted provided that the following conditions
   12    are met:
   13
   14    1. Redistributions of source code must retain the above copyright
   15       notice, this list of conditions and the following disclaimer.
   16
   17    2. Redistributions in binary form must reproduce the above copyright
   18       notice, this list of conditions and the following disclaimer in
   19       the documentation and/or other materials provided with the
   20       distribution.
   21
   22    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
   23    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
   24    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
   25    FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
   26    COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
   27    INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
   28    BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
   29    LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
   30    CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   31    LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
   32    ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
   33    POSSIBILITY OF SUCH DAMAGE.
   34*/
   35
   36:- module(pldoc_wiki,
   37          [ wiki_codes_to_dom/3,        % +Codes, +Args, -DOM
   38            wiki_lines_to_dom/3,        % +Lines, +Map, -DOM
   39            section_comment_header/3,   % +Lines, -Header, -RestLines
   40            summary_from_lines/2,       % +Lines, -Codes
   41            indented_lines/3,           % +Text, +PrefixChars, -Lines
   42            strip_leading_par/2,        % +DOM0, -DOM
   43            normalise_white_space/3,    % -Text, //
   44            autolink_extension/2,       % ?Extension, ?Type
   45            autolink_file/2             % +FileName, -Type
   46          ]).   47:- use_module(library(lists)).   48:- use_module(library(debug)).   49:- use_module(library(error)).   50:- use_module(library(memfile)).   51:- use_module(library(pairs)).   52:- use_module(library(option)).   53:- use_module(library(debug)).   54:- use_module(library(apply)).   55:- use_module(library(dcg/basics)).   56
   57
   58/** <module> PlDoc wiki parser
   59
   60This file defines the PlDoc wiki parser,  which parses both comments and
   61wiki text files. The original version of this SWI-Prolog wiki format was
   62largely modeled after Twiki (http://twiki.org/).  The current version is
   63extended to take many aspects from   markdown, in particular the doxygen
   64refinement thereof.
   65
   66@see http://www.stack.nl/~dimitri/doxygen/manual/markdown.html
   67*/
   68
   69:- multifile
   70    prolog:doc_wiki_face//2,        % -Out, +VarNames
   71    prolog:doc_url_expansion/3,     % +Alias(Rest), -HREF, -Label
   72    prolog:url_expansion_hook/3,    % +Term, -Ref, -Label
   73    prolog:doc_autolink_extension/2.% +Extension, -Type
   74
   75
   76                 /*******************************
   77                 *          WIKI PARSING        *
   78                 *******************************/
   79
   80%!  wiki_lines_to_dom(+Lines:lines, +Args:list(atom), -Term) is det
   81%
   82%   Translate a Wiki text into  an   HTML  term suitable for html//1
   83%   from the html_write library.
   84
   85wiki_lines_to_dom(Lines, Args, HTML) :-
   86    tokenize_lines(Lines, Tokens0),
   87    normalise_indentation(Tokens0, Tokens),
   88    wiki_structure(Tokens, -1, Pars),
   89    wiki_faces(Pars, Args, HTML).
   90
   91
   92%!  wiki_codes_to_dom(+String, +Args, -DOM) is det.
   93%
   94%   Translate a plain text into a DOM term.
   95%
   96%   @param String   Plain text.  Either a string or a list of codes.
   97
   98wiki_codes_to_dom(Codes, Args, DOM) :-
   99    indented_lines(Codes, [], Lines),
  100    wiki_lines_to_dom(Lines, Args, DOM).
  101
  102
  103%!  wiki_structure(+Lines:lines, +BaseIndent,
  104%!                 -Blocks:list(block)) is det
  105%
  106%   Get the structure in terms  of block-level elements: paragraphs,
  107%   lists and tables. This processing uses   a mixture of layout and
  108%   punctuation.
  109
  110wiki_structure([], _, []) :- !.
  111wiki_structure([_-[]|T], BI, Pars) :-          % empty lines
  112    !,
  113    wiki_structure(T, BI, Pars).
  114wiki_structure(Lines, _, [\tags(Tags)]) :-
  115    tags(Lines, Tags),
  116    !.
  117wiki_structure(Lines, BI, [P1|PL]) :-
  118    take_block(Lines, BI, P1, RestLines),
  119    wiki_structure(RestLines, BI, PL).
  120
  121%!  take_block(+Lines, +BaseIndent, ?Block, -RestLines) is semidet.
  122%
  123%   Take a block-structure from the input.  Defined block elements
  124%   are lists, table, hrule, section header and paragraph.
  125
  126take_block([_-[]|Lines], BaseIndent, Block, Rest) :-
  127    !,
  128    take_block(Lines, BaseIndent, Block, Rest).
  129take_block([N-_|_], BaseIndent, _, _) :-
  130    N < BaseIndent,
  131    !,
  132    fail.                           % less indented
  133take_block(Lines, BaseIndent, List, Rest) :-
  134    list_item(Lines, Type, Indent, LI, LIT, Rest0),
  135    !,
  136    Indent > BaseIndent,
  137    rest_list(Rest0, Type, Indent, LIT, [], Rest),
  138    List0 =.. [Type, LI],
  139    (   ul_to_dl(List0, List)
  140    ->  true
  141    ;   List0 = dl(Items)
  142    ->  List = dl(class=wiki, Items)
  143    ;   List = List0
  144    ).
  145take_block([N-['|'|RL1]|LT], _, Table, Rest) :-
  146    phrase(row(R0), RL1),
  147    rest_table(LT, N, RL, Rest),
  148    !,
  149    Table = table(class=wiki, [tr(R0)|RL]).
  150take_block([0-[-,-|More]|LT], _, Block, LT) :-  % seperation line
  151    maplist(=(-), More),
  152    !,
  153    Block = hr([]).
  154take_block([_-Line|LT], _, Block, LT) :-        % seperation line
  155    ruler(Line),
  156    !,
  157    Block = hr([]).
  158take_block([_-[@|_]], _, _, _) :-              % starts @tags section
  159    !,
  160    fail.
  161take_block(Lines, _BaseIndent, Section, RestLines) :-
  162    section_header(Lines, Section, RestLines),
  163    !.
  164take_block([_-Verb|Lines], _, Verb, Lines) :-
  165    verbatim_term(Verb),
  166    !.
  167take_block([I-L1|LT], BaseIndent, Elem, Rest) :-
  168    !,
  169    append(L1, PT, Par),
  170    rest_par(LT, PT, I, BaseIndent, MaxI, Rest),
  171    (   MaxI >= BaseIndent+16
  172    ->  Elem = center(Par)
  173    ;   MaxI >= BaseIndent+4
  174    ->  Elem = blockquote(Par)
  175    ;   Elem = p(Par)
  176    ).
  177take_block([Verb|Lines], _, Verb, Lines).
  178
  179%!  ruler(+Line) is semidet.
  180%
  181%   True if Line contains 3 ruler chars and otherwise spaces.
  182
  183ruler([C0|Line]) :-
  184    rule_char(C0),
  185    phrase(ruler(C0, 1), Line).
  186
  187ruler(C, N) --> [C], !, { N2 is N+1 }, ruler(C, N2).
  188ruler(C, N) --> [' '], !, ruler(C, N).
  189ruler(_, N) --> { N >= 3 }.
  190
  191rule_char('-').
  192rule_char('_').
  193rule_char('*').
  194
  195%!  list_item(+Lines, ?Type, ?Indent, -LI0, -LIT, -RestLines) is det.
  196%
  197%   Create a list-item. Naturally this should produce a single item,
  198%   but DL lists produce two items, so   we create the list of items
  199%   as a difference list.
  200%
  201%   @tbd    Pass base-indent
  202
  203list_item([Indent-Line|LT], Type, Indent, Items, ItemT, Rest) :-
  204    !,
  205    list_item_prefix(Type, Line, L1),
  206    (   Type == dl
  207    ->  split_dt(L1, DT0, DD1),
  208        append(DD1, LIT, DD),
  209        strip_ws_tokens(DT0, DT),
  210        Items = [dt(DT),dd(DD)|ItemT]
  211    ;   append(L1, LIT, LI0),
  212        Items = [li(LI0)|ItemT]
  213    ),
  214    rest_list_item(LT, Type, Indent, LIT, Rest).
  215
  216%!  rest_list_item(+Lines, +Type, +Indent, -RestItem, -RestLines) is det
  217%
  218%   Extract the remainder (after the first line) of a list item.
  219
  220rest_list_item(Lines, _Type, Indent, RestItem, RestLines) :-
  221    take_blocks_at_indent(Lines, Indent, Blocks, RestLines),
  222    (   Blocks = [p(Par)|MoreBlocks]
  223    ->  append(['\n'|Par], MoreBlocks, RestItem)
  224    ;   RestItem = Blocks
  225    ).
  226
  227%!  take_blocks_at_indent(+Lines, +Indent, -Pars, -RestLines) is det.
  228%
  229%   Process paragraphs and verbatim blocks (==..==) in bullet-lists.
  230
  231take_blocks_at_indent(Lines, _, [], Lines) :-
  232    skip_empty_lines(Lines, Lines1),
  233    section_header(Lines1, _, _),
  234    !.
  235take_blocks_at_indent(Lines, N, [Block|RestBlocks], RestLines) :-
  236    take_block(Lines, N, Block, Rest0),
  237    !,
  238    take_blocks_at_indent(Rest0, N, RestBlocks, RestLines).
  239take_blocks_at_indent(Lines, _, [], Lines).
  240
  241
  242%!  rest_list(+Lines, +Type, +Indent,
  243%!            -Items, -ItemTail, -RestLines) is det.
  244
  245rest_list(Lines, Type, N, Items, IT, Rest) :-
  246    skip_empty_lines(Lines, Lines1),
  247    list_item(Lines1, Type, N, Items, IT0, Rest0),
  248    !,
  249    rest_list(Rest0, Type, N, IT0, IT, Rest).
  250rest_list(Rest, _, _, IT, IT, Rest).
  251
  252%!  list_item_prefix(?Type, +Line, -Rest) is det.
  253
  254list_item_prefix(ul, [*, ' '|T], T) :- !.
  255list_item_prefix(ul, [-, ' '|T], T) :- !.
  256list_item_prefix(dl, [$, ' '|T], T) :-
  257    split_dt(T, _, _),
  258    !.
  259list_item_prefix(ol, [w(N), '.', ' '|T], T) :-
  260    atom_codes(N, [D]),
  261    between(0'0, 0'9, D).
  262
  263%!  split_dt(+LineAfterDollar, -DT, -Rest)
  264%
  265%   First see whether the entire line is the item. This allows
  266%   creating items holding : by using $ <tokens> :\n
  267
  268split_dt(In, DT, []) :-
  269    append(DT, [':'], In),
  270    !.
  271split_dt(In, DT, Rest) :-
  272    append(DT, [':'|Rest0], In),
  273    (   Rest0 == []
  274    ->  Rest = []
  275    ;   Rest0 = [' '|Rest]
  276    ),
  277    !.
  278
  279
  280%!  ul_to_dl(+UL, -DL) is semidet.
  281%
  282%   Translate an UL list into a DL list   if  all entries are of the
  283%   form "* <term> nl, <description>" and at least one <description>
  284%   is   non-empty,   or    all    items     are    of    the   form
  285%   [[PredicateIndicator]].
  286
  287ul_to_dl(ul(Items), Description) :-
  288    term_items(Items, DLItems, []),
  289    (   terms_to_predicate_includes(DLItems, Preds)
  290    ->  Description = dl(class(predicates), Preds)
  291    ;   member(dd(DD), DLItems), DD \== []
  292    ->  Description = dl(class(termlist), DLItems)
  293    ).
  294
  295term_items([], T, T).
  296term_items([LI|LIs], DLItems, Tail) :-
  297    term_item(LI, DLItems, Tail1),
  298    term_items(LIs, Tail1, Tail).
  299
  300%!  term_item(+LI, -DLItem, ?Tail) is semidet.
  301%
  302%   If LI is of the form <Term> followed  by a newline, return it as
  303%   dt-dd  tuple.  The  <dt>  item    contains  a  term
  304%
  305%       \term(Text, Term, Bindings).
  306
  307term_item(li(Tokens),
  308          [ dt(class=term, \term(Text, Term, Bindings)),
  309            dd(Descr)
  310          | Tail
  311          ], Tail) :-
  312    (   (   append(TermTokens, ['\n'|Descr], Tokens)
  313        ->  true
  314        ;   TermTokens = Tokens,
  315            Descr = []
  316        )
  317    ->  setup_call_cleanup(
  318            ( new_memory_file(MemFile),
  319              open_memory_file(MemFile, write, Out)
  320            ),
  321            ( forall(member(T, TermTokens),
  322                     write_token(Out, T)),
  323              write(Out, ' .\n')
  324            ),
  325            close(Out)),
  326        catch(setup_call_cleanup(
  327                  open_memory_file(MemFile, read, In,
  328                                   [ free_on_close(true)
  329                                   ]),
  330                  ( read_dt_term(In, Term, Bindings),
  331                    read_dt_term(In, end_of_file, []),
  332                    memory_file_to_atom(MemFile, Text)
  333                  ),
  334                  close(In)),
  335              _, fail)
  336    ).
  337
  338write_token(Out, w(X)) :-
  339    !,
  340    write(Out, X).
  341write_token(Out, X) :-
  342    write(Out, X).
  343
  344read_dt_term(In, Term, Bindings) :-
  345    read_term(In, Term,
  346              [ variable_names(Bindings),
  347                module(pldoc_modes)
  348              ]).
  349
  350terms_to_predicate_includes([], []).
  351terms_to_predicate_includes([dt(class=term, \term(_, [[PI]], [])), dd([])|T0],
  352                            [\include(PI, predicate, [])|T]) :-
  353    is_pi(PI),
  354    terms_to_predicate_includes(T0, T).
  355
  356is_pi(Name/Arity) :-
  357    atom(Name),
  358    integer(Arity),
  359    between(0, 20, Arity).
  360is_pi(Name//Arity) :-
  361    atom(Name),
  362    integer(Arity),
  363    between(0, 20, Arity).
  364
  365
  366%!  row(-Cells)// is det.
  367
  368row([C0|CL]) -->
  369    cell(C0),
  370    !,
  371    row(CL).
  372row([]) -->
  373    [].
  374
  375cell(td(C)) -->
  376    face_tokens(C0),
  377    ['|'],
  378    !,
  379    { strip_ws_tokens(C0, C)
  380    }.
  381
  382face_tokens([]) -->
  383    [].
  384face_tokens(Tokens) -->
  385    face_token(H),                          % Deal with embedded *|...|*, etc.
  386    token('|'),
  387    face_tokens(Face),
  388    token('|'),
  389    face_token(H),
  390    !,
  391    { append([[H,'|'], Face, ['|', H], Rest], Tokens) },
  392    face_tokens(Rest).
  393face_tokens([H|T]) -->
  394    token(H),
  395    face_tokens(T).
  396
  397face_token(=) --> [=].
  398face_token(*) --> [*].
  399face_token('_') --> ['_'].
  400
  401rest_table([N-Line|LT], N, RL, Rest) :-
  402    md_table_structure_line(Line),
  403    !,
  404    rest_table(LT, N, RL, Rest).
  405rest_table([N-['|'|RL1]|LT], N, [tr(R0)|RL], Rest) :-
  406    !,
  407    phrase(row(R0), RL1),
  408    rest_table(LT, N, RL, Rest).
  409rest_table(Rest, _, [], Rest).
  410
  411%!  md_table_structure_line(+Chars)
  412%
  413%   True if Chars represents Markdown  table structure. We currently
  414%   ignore the structure information.
  415
  416md_table_structure_line(Line) :-
  417    forall(member(Char, Line),
  418           md_table_structure_char(Char)).
  419
  420md_table_structure_char(' ').
  421md_table_structure_char('-').
  422md_table_structure_char('|').
  423md_table_structure_char(':').
  424
  425%!  rest_par(+Lines, -Par,
  426%!           +BaseIndent, +MaxI0, -MaxI, -RestLines) is det.
  427%
  428%   Take the rest of a paragraph. Paragraphs   are  ended by a blank
  429%   line or the start of a list-item.   The latter is a bit dubious.
  430%   Why not a general  block-level   object?  The current definition
  431%   allows for writing lists without a blank line between the items.
  432
  433rest_par([], [], BI, MaxI0, MaxI, []) :-
  434    !,
  435    MaxI is max(BI, MaxI0).
  436rest_par([_-[]|Rest], [], _, MaxI, MaxI, Rest) :- !.
  437rest_par(Lines, [], _, MaxI, MaxI, Lines) :-
  438    Lines = [_-Verb|_],
  439    verbatim_term(Verb),
  440    !.
  441rest_par([I-L|Rest], [], _, MaxI, MaxI, [I-L|Rest]) :-
  442    list_item_prefix(_, L, _),
  443    !.
  444rest_par([I-L1|LT], ['\n'|Par], BI, MaxI0, MaxI, Rest) :-
  445    append(L1, PT, Par),
  446    MaxI1 is max(I, MaxI0),
  447    rest_par(LT, PT, BI, MaxI1, MaxI, Rest).
  448
  449
  450%!  section_header(+Lines, -Section, -RestLines) is semidet.
  451%
  452%   Get a section line from the input.
  453
  454section_header([_-L1|LT], Section, LT) :-
  455    twiki_section_line(L1, Section),
  456    !.
  457section_header([0-L1|LT], Section, LT) :-
  458    md_section_line(L1, Section),
  459    !.
  460section_header([_-L1,0-L2|LT], Section, LT) :-
  461    md_section_line(L1, L2, Section),
  462    !.
  463
  464%!  twiki_section_line(+Tokens, -Section) is semidet.
  465%
  466%   Extract a section using the Twiki   conventions. The section may
  467%   be preceeded by [Word], in which case we generate an anchor name
  468%   Word for the section.
  469
  470twiki_section_line([-,-,-|Rest], Section) :-
  471    plusses(Rest, Section).
  472
  473plusses([+, ' '|Rest], h1(Attrs, Content)) :-
  474    hdr_attributes(Rest, Attrs, Content).
  475plusses([+, +, ' '|Rest], h2(Attrs, Content)) :-
  476    hdr_attributes(Rest, Attrs, Content).
  477plusses([+, +, +, ' '|Rest], h3(Attrs, Content)) :-
  478    hdr_attributes(Rest, Attrs, Content).
  479plusses([+, +, +, +, ' '|Rest], h4(Attrs, Content)) :-
  480    hdr_attributes(Rest, Attrs, Content).
  481
  482hdr_attributes(List, Attrs, Content) :-
  483    strip_leading_ws(List, List2),
  484    (   List2 = ['[',w(Name),']'|List3]
  485    ->  strip_ws_tokens(List3, Content),
  486        Attrs = [class(wiki), id(Name)]
  487    ;   Attrs = class(wiki),
  488        strip_ws_tokens(List, Content)
  489    ).
  490
  491%!  md_section_line(+Tokens, -Section) is semidet.
  492%
  493%   Handle markdown section lines staring with #
  494
  495md_section_line([#, ' '|Rest], h1(Attrs, Content)) :-
  496    md_section_attributes(Rest, Attrs, Content).
  497md_section_line([#, #, ' '|Rest], h2(Attrs, Content)) :-
  498    md_section_attributes(Rest, Attrs, Content).
  499md_section_line([#, #, #, ' '|Rest], h3(Attrs, Content)) :-
  500    md_section_attributes(Rest, Attrs, Content).
  501md_section_line([#, #, #, #, ' '|Rest], h4(Attrs, Content)) :-
  502    md_section_attributes(Rest, Attrs, Content).
  503
  504md_section_attributes(List, Attrs, Content) :-
  505    phrase((tokens(Content), [' '], section_label(Label)), List),
  506    !,
  507    Attrs = [class(wiki), id(Label)].
  508md_section_attributes(Content, Attrs, Content) :-
  509    Attrs = [class(wiki)].
  510
  511section_label(Label) -->
  512    [ '{', '#', w(Name) ],
  513    label_conts(Cont), ['}'],
  514    !,
  515    { atomic_list_concat([Name|Cont], Label) }.
  516
  517label_conts([H|T]) --> label_cont(H), !, label_conts(T).
  518label_conts([]) --> [].
  519
  520label_cont(-) --> [-].
  521label_cont(Name) --> [w(Name)].
  522
  523
  524md_section_line(Line1, Line2, Header) :-
  525    Line1 \== [],
  526    section_underline(Line2, Type),
  527    phrase(wiki_words(_), Line1),  % Should not have structure elements
  528    !,
  529    (   phrase(labeled_section_line(Title, Attrs), Line1)
  530    ->  true
  531    ;   Title = Line1,
  532        Attrs = []
  533    ),
  534    Header =.. [Type, [class(wiki)|Attrs], Title].
  535
  536section_underline([=,=,=|T], h1) :-
  537    maplist(=(=), T),
  538    !.
  539section_underline([-,-,-|T], h2) :-
  540    maplist(=(-), T),
  541    !.
  542
  543labeled_section_line(Title, Attrs) -->
  544    tokens(Title), [' '], section_label(Label),
  545    !,
  546    { Attrs = [id(Label)] }.
  547
  548
  549%!  strip_ws_tokens(+Tokens, -Stripped)
  550%
  551%   Strip leading and trailing whitespace from a token list.  Note
  552%   the the whitespace is already normalised.
  553
  554strip_ws_tokens([' '|T0], T) :-
  555    !,
  556    strip_ws_tokens(T0, T).
  557strip_ws_tokens(L0, L) :-
  558    append(L, [' '], L0),
  559    !.
  560strip_ws_tokens(L, L).
  561
  562
  563%!  strip_leading_ws(+Tokens, -Stripped) is det.
  564%
  565%   Strip leading whitespace from a token list.
  566
  567strip_leading_ws([' '|T], T) :- !.
  568strip_leading_ws(T, T).
  569
  570
  571                 /*******************************
  572                 *             TAGS             *
  573                 *******************************/
  574
  575%!  tags(+Lines:lines, -Tags) is semidet.
  576%
  577%   If the first line is a @tag, read the remainder of the lines to
  578%   a list of \tag(Name, Value) terms.
  579
  580tags(Lines, Tags) :-
  581    collect_tags(Lines, Tags0),
  582    keysort(Tags0, Tags1),
  583    pairs_values(Tags1, Tags2),
  584    combine_tags(Tags2, Tags).
  585
  586%!  collect_tags(+IndentedLines, -Tags) is semidet
  587%
  588%   Create a list Order-tag(Tag,Tokens) for   each @tag encountered.
  589%   Order is the desired position as defined by tag_order/2.
  590%
  591%   @tbd Tag content is  often  poorly   aligned.  We  now  find the
  592%   alignment of subsequent lines  and  assume   the  first  line is
  593%   alligned with the remaining lines.
  594
  595collect_tags([], []).
  596collect_tags([Indent-[@,String|L0]|Lines], [Order-tag(Tag,Value)|Tags]) :-
  597    tag_name(String, Tag, Order),
  598    !,
  599    strip_leading_ws(L0, L),
  600    rest_tag(Lines, Indent, VT, RestLines),
  601    normalise_indentation(VT, VT1),
  602    wiki_structure([0-L|VT1], -1, Value0),
  603    strip_leading_par(Value0, Value),
  604    collect_tags(RestLines, Tags).
  605
  606
  607%!  tag_name(+String, -Tag:atom, -Order:int) is semidet.
  608%
  609%   If String denotes a know tag-name,
  610
  611tag_name(w(Name), Tag, Order) :-
  612    (   renamed_tag(Name, Tag, Level),
  613        tag_order(Tag, Order)
  614    ->  print_message(Level, pldoc(deprecated_tag(Name, Tag)))
  615    ;   tag_order(Name, Order)
  616    ->  Tag = Name
  617    ;   print_message(warning, pldoc(unknown_tag(Name))),
  618        fail
  619    ).
  620
  621
  622rest_tag([], _, [], []) :- !.
  623rest_tag(Lines, Indent, [], Lines) :-
  624    Lines = [Indent-[@,Word|_]|_],
  625    tag_name(Word, _, _),
  626    !.
  627rest_tag([L|Lines0], Indent, [L|VT], Lines) :-
  628    rest_tag(Lines0, Indent, VT, Lines).
  629
  630
  631%!  renamed_tag(+DeprecatedTag:atom, -Tag:atom, -Warn) is semidet.
  632%
  633%   Declaration for deprecated tags.
  634
  635renamed_tag(exception, throws, warning).
  636renamed_tag(param,     arg,    silent).
  637
  638
  639%!  tag_order(+Tag:atom, -Order:int) is semidet.
  640%
  641%   Both declares the know tags and  their expected order. Currently
  642%   the tags are forced into  this   order  without  warning. Future
  643%   versions may issue a warning if the order is inconsistent.
  644
  645:- multifile
  646    pldoc:tag_order/2.  647
  648tag_order(Tag, Order) :-
  649    pldoc:tag_order(Tag, Order),
  650    !.
  651tag_order(arg,         100).
  652tag_order(error,       200).            % same as throw
  653tag_order(throws,      300).
  654tag_order(author,      400).
  655tag_order(version,     500).
  656tag_order(see,         600).
  657tag_order(deprecated,  700).
  658tag_order(compat,      800).            % PlDoc extension
  659tag_order(copyright,   900).
  660tag_order(license,    1000).
  661tag_order(bug,        1100).
  662tag_order(tbd,        1200).
  663
  664%!  combine_tags(+Tags:list(tag(Key, Value)), -Tags:list) is det.
  665%
  666%   Creates the final tag-list.  Tags is a list of
  667%
  668%           * \params(list(param(Name, Descr)))
  669%           * \tag(Name, list(Descr))
  670%
  671%   Descr is a list of tokens.
  672
  673combine_tags([], []).
  674combine_tags([tag(arg, V1)|T0], [\args([P1|PL])|Tags]) :-
  675    !,
  676    arg_tag(V1, P1),
  677    arg_tags(T0, PL, T1),
  678    combine_tags(T1, Tags).
  679combine_tags([tag(Tag,V0)|T0], [\tag(Tag, [V0|Vs])|T]) :-
  680    same_tag(Tag, T0, T1, Vs),
  681    combine_tags(T1, T).
  682
  683arg_tag([PT|Descr0], arg(PN, Descr)) :-
  684    word_of(PT, PN),
  685    strip_leading_ws(Descr0, Descr).
  686
  687word_of(w(W), W) :- !.                  % TBD: check non-word arg
  688word_of(W, W).
  689
  690arg_tags([tag(arg, V1)|T0], [P1|PL], T) :-
  691    !,
  692    arg_tag(V1, P1),
  693    arg_tags(T0, PL, T).
  694arg_tags(T, [], T).
  695
  696same_tag(Tag, [tag(Tag, V)|T0], T, [V|Vs]) :-
  697    !,
  698    same_tag(Tag, T0, T, Vs).
  699same_tag(_, L, L, []).
  700
  701
  702                 /*******************************
  703                 *             FACES            *
  704                 *******************************/
  705
  706%!  wiki_faces(+Structure, +ArgNames, -HTML) is det.
  707%
  708%   Given the wiki structure, analyse the content of the paragraphs,
  709%   list items and table cells and apply font faces and links.
  710
  711wiki_faces([dt(Class, \term(Text, Term, Bindings)), dd(Descr0)|T0],
  712           ArgNames,
  713           [dt(Class, \term(Text, Term, Bindings)), dd(Descr)|T]) :-
  714    !,
  715    varnames(Bindings, VarNames, ArgNames),
  716    wiki_faces(Descr0, VarNames, Descr),
  717    wiki_faces(T0, ArgNames, T).
  718wiki_faces(DOM0, ArgNames, DOM) :-
  719    structure_term(DOM0, Functor, Content0),
  720    !,
  721    wiki_faces_list(Content0, ArgNames, Content),
  722    structure_term(DOM, Functor, Content).
  723wiki_faces(Verb, _, Verb) :-
  724    verbatim_term(Verb),
  725    !.
  726wiki_faces(Content0, ArgNames, Content) :-
  727    assertion(is_list(Content0)),
  728    phrase(wiki_faces(Content, ArgNames), Content0),
  729    !.
  730
  731varnames([], List, List).
  732varnames([Name=_|T0], [Name|T], List) :-
  733    varnames(T0, T, List).
  734
  735wiki_faces_list([], _, []).
  736wiki_faces_list([H0|T0], Args, [H|T]) :-
  737    wiki_faces(H0, Args, H),
  738    wiki_faces_list(T0, Args, T).
  739
  740%!  structure_term(+Term, -Functor, -Content) is semidet.
  741%!  structure_term(-Term, +Functor, +Content) is det.
  742%
  743%   (Un)pack a term describing structure, so  we can process Content
  744%   and re-pack the structure.
  745
  746structure_term(\tags(Tags), tags, [Tags]) :- !.
  747structure_term(\args(Params), args, [Params]) :- !.
  748structure_term(arg(Name,Descr), arg(Name), [Descr]) :- !.
  749structure_term(\tag(Name,Value), tag(Name), [Value]) :- !.
  750structure_term(\include(What,Type,Opts), include(What,Type,Opts), []) :- !.
  751structure_term(dl(Att, Args), dl(Att), [Args]) :- !.
  752structure_term(dt(Att, Args), dt(Att), [Args]) :- !.
  753structure_term(table(Att, Args), table(Att), [Args]) :- !.
  754structure_term(h1(Att, Args), h1(Att), [Args]) :- !.
  755structure_term(h2(Att, Args), h2(Att), [Args]) :- !.
  756structure_term(h3(Att, Args), h3(Att), [Args]) :- !.
  757structure_term(h4(Att, Args), h4(Att), [Args]) :- !.
  758structure_term(hr(Att), hr(Att), []) :- !.
  759structure_term(p(Args), p, [Args]) :- !.
  760structure_term(Term, Functor, Args) :-
  761    structure_term_any(Term, Functor, Args).
  762
  763structure_term(Term) :-
  764    structure_term_any(Term, _Functor, _Args).
  765
  766structure_term_any(Term, Functor, Args) :-
  767    functor(Term, Functor, 1),
  768    structure_tag(Functor),
  769    !,
  770    Term =.. [Functor|Args].
  771
  772structure_tag(ul).
  773structure_tag(ol).
  774structure_tag(dl).
  775structure_tag(li).
  776structure_tag(dt).
  777structure_tag(dd).
  778structure_tag(table).
  779structure_tag(tr).
  780structure_tag(td).
  781structure_tag(blockquote).
  782structure_tag(center).
  783
  784
  785%!  verbatim_term(?Term) is det
  786%
  787%   True if Term must be passes verbatim.
  788
  789verbatim_term(pre(_,_)).
  790verbatim_term(\term(_,_,_)).
  791
  792%!  matches(:Goal, -Input, -Last)//
  793%
  794%   True when Goal runs successfully on the DCG input and Input
  795%   is the list of matched tokens.
  796
  797matches(Goal, Input, Last, List, Rest) :-
  798    call(Goal, List, Rest),
  799    input(List, Rest, Input, Last).
  800
  801input([H|T0], Rest, Input, Last) :-
  802    (   T0 == Rest
  803    ->  Input = [H],
  804        Last = H
  805    ;   Input = [H|T],
  806        input(T0, Rest, T, Last)
  807    ).
  808
  809
  810%!  wiki_faces(-WithFaces, +ArgNames)// is nondet.
  811%!  wiki_faces(-WithFaces, +ArgNames, +Options)// is nondet.
  812%
  813%   Apply font-changes and automatic  links   to  running  text. The
  814%   faces are applied after discovering   the structure (paragraphs,
  815%   lists, tables, keywords).
  816%
  817%   @arg Options is a dict, minimally containing `depth`
  818
  819wiki_faces(WithFaces, ArgNames, List, Rest) :-
  820    default_faces_options(Options),
  821    catch(wiki_faces(WithFaces, ArgNames, Options, List, Rest),
  822          pldoc(depth_limit),
  823          failed_faces(WithFaces, List, Rest)).
  824
  825default_faces_options(_{depth:5}).
  826
  827failed_faces(WithFaces) -->
  828    { debug(markdown(overflow), 'Depth limit exceeded', []) },
  829    wiki_words(WithFaces).
  830
  831wiki_faces([EmphTerm|T], ArgNames, Options) -->
  832    emphasis_seq(EmphTerm, ArgNames, Options),
  833    !,
  834    wiki_faces_int(T, ArgNames).
  835wiki_faces(Faces, ArgNames, Options) -->
  836    wiki_faces_int(Faces, ArgNames, Options).
  837
  838wiki_faces_int(WithFaces, ArgNames) -->
  839    { default_faces_options(Options)
  840    },
  841    wiki_faces_int(WithFaces, ArgNames, Options).
  842
  843wiki_faces_int([], _, _) -->
  844    [].
  845wiki_faces_int([H|T], ArgNames, Options) -->
  846    wiki_face(H, ArgNames, Options),
  847    !,
  848    wiki_faces(T, ArgNames, Options).
  849wiki_faces_int([Before,EmphTerm|T], ArgNames, Options) -->
  850    emphasis_before(Before),
  851    emphasis_seq(EmphTerm, ArgNames, Options),
  852    !,
  853    wiki_faces_int(T, ArgNames, Options).
  854wiki_faces_int([H|T], ArgNames, Options) -->
  855    wiki_face_simple(H, ArgNames, Options),
  856    !,
  857    wiki_faces_int(T, ArgNames, Options).
  858
  859next_level(Options0, Options) -->
  860    {   succ(NewDepth, Options0.depth)
  861    ->  Options = Options0.put(depth, NewDepth)
  862    ;   throw(pldoc(depth_limit))
  863    }.
  864
  865%!  prolog:doc_wiki_face(-Out, +VarNames)// is semidet.
  866%!  prolog:doc_wiki_face(-Out, +VarNames, +Options0)// is semidet.
  867%
  868%   Hook that can be  used  to   provide  additional  processing for
  869%   additional _inline_ wiki constructs.  The DCG list is a list of
  870%   tokens.  Defined tokens are:
  871%
  872%     - w(Atom)
  873%     Recognised word (alphanumerical)
  874%     - Atom
  875%     Single character atom representing punctuation marks or the
  876%     atom =|' '|= (space), representing white-space.
  877%
  878%   The  Out  variable  is  input  for    the  backends  defined  in
  879%   doc_latex.pl and doc_html.pl. Roughly, these   are terms similar
  880%   to what html//1 from library(http/html_write) accepts.
  881
  882wiki_face(Out, Args, _) -->
  883    prolog:doc_wiki_face(Out, Args),
  884    !.
  885wiki_face(var(Arg), ArgNames, _) -->
  886    [w(Arg)],
  887    { memberchk(Arg, ArgNames)
  888    },
  889    !.
  890wiki_face(b(Bold), ArgNames, Options) -->
  891    [*,'|'], string(Tokens), ['|',*],
  892    !,
  893    { phrase(wiki_faces(Bold, ArgNames, Options), Tokens) }.
  894wiki_face(i(Italic), ArgNames, Options) -->
  895    ['_','|'], string(Tokens), ['|','_'],
  896    !,
  897    { phrase(wiki_faces(Italic, ArgNames, Options), Tokens) }.
  898wiki_face(code(Code), _, _) -->
  899    [=], eq_code_words(Words), [=],
  900    !,
  901    { atomic_list_concat(Words, Code) }.
  902wiki_face(code(Code), _, _) -->
  903    [=,'|'], wiki_words(Code), ['|',=],
  904    !.
  905wiki_face(Code, _, _) -->
  906    ['`'], code_words(Words), ['`'],
  907    { atomic_list_concat(Words, Text),
  908      catch(atom_to_term(Text, Term, Vars), _, fail),
  909      !,
  910      code_face(Text, Term, Vars, Code)
  911    }.
  912wiki_face(Face, _, _) -->
  913    [ w(Name) ], arg_list(List),
  914    { atomic_list_concat([Name|List], Text),
  915      catch(atom_to_term(Text, Term, Vars), _, fail),
  916      term_face(Text, Term, Vars, Face)
  917    },
  918    !.
  919wiki_face(br([]), _, _) -->
  920    [<,w(br),>,'\n'], !.
  921wiki_face(br([]), _, _) -->
  922    [<,w(br),/,>,'\n'], !.
  923        % Below this, we only do links.
  924wiki_face(_, _, Options) -->
  925    { Options.get(link) == false,
  926      !,
  927      fail
  928    }.
  929wiki_face(\predref(Name/Arity), _, _) -->
  930    [ w(Name), '/' ], arity(Arity),
  931    { functor_name(Name)
  932    },
  933    !.
  934wiki_face(\predref(Module:(Name/Arity)), _, _) -->
  935    [ w(Module), ':', w(Name), '/' ], arity(Arity),
  936    { functor_name(Name)
  937    },
  938    !.
  939wiki_face(\predref(Name/Arity), _, _) -->
  940    prolog_symbol_char(S0),
  941    symbol_string(SRest), [ '/' ], arity(Arity),
  942    !,
  943    { atom_chars(Name, [S0|SRest])
  944    }.
  945wiki_face(\predref(Name//Arity), _, _) -->
  946    [ w(Name), '/', '/' ], arity(Arity),
  947    { functor_name(Name)
  948    },
  949    !.
  950wiki_face(\predref(Module:(Name//Arity)), _, _) -->
  951    [ w(Module), ':', w(Name), '/', '/' ], arity(Arity),
  952    { functor_name(Name)
  953    },
  954    !.
  955wiki_face(\include(Name, Type, Options), _, _) -->
  956    ['[','['], file_name(Base, Ext), [']',']'],
  957    { autolink_extension(Ext, Type),
  958      !,
  959      file_name_extension(Base, Ext, Name),
  960      resolve_file(Name, Options, [])
  961    },
  962    !.
  963wiki_face(\include(Name, Type, [caption(Caption)|Options]), _, _) -->
  964    (   ['!','['], tokens(100, Caption), [']','(']
  965    ->  file_name(Base, Ext), [')'],
  966        { autolink_extension(Ext, Type),
  967          !,
  968          file_name_extension(Base, Ext, Name),
  969          resolve_file(Name, Options, [])
  970        }
  971    ),
  972    !.
  973wiki_face(Link, ArgNames, Options) -->          % TWiki: [[Label][Link]]
  974    (   ['[','['], wiki_label(Label, ArgNames, Options), [']','[']
  975    ->  wiki_link(Link, [label(Label), relative(true), end(']')]),
  976        [']',']'], !
  977    ).
  978wiki_face(Link, ArgNames, Options) -->          % Markdown: [Label](Link)
  979    (   ['['], wiki_label(Label, ArgNames, Options), [']','(']
  980    ->  wiki_link(Link, [label(Label), relative(true), end(')')]),
  981        [')'], !
  982    ).
  983wiki_face(Link, _ArgNames, _) -->
  984    wiki_link(Link, []),
  985    !.
  986
  987wiki_label(Label, _ArgNames, _Options) -->
  988    image_label(Label).
  989wiki_label(Label, ArgNames, Options) -->
  990    next_level(Options, NOptions),
  991    limit(40, wiki_faces(Label, ArgNames, NOptions.put(link,false))).
  992
  993%!  wiki_face_simple(-Out, +ArgNames, +Options)
  994%
  995%   Skip simple (non-markup) wiki.
  996
  997wiki_face_simple(Word, _, _) -->
  998    [ w(Word) ],
  999    !.
 1000wiki_face_simple(SpaceOrPunct, _, _) -->
 1001    [ SpaceOrPunct ],
 1002    { atomic(SpaceOrPunct) },
 1003    !.
 1004wiki_face_simple(FT, ArgNames, _) -->
 1005    [Structure],
 1006    { wiki_faces(Structure, ArgNames, FT)
 1007    }.
 1008
 1009wiki_words([]) --> [].
 1010wiki_words([Word|T]) --> [w(Word)], !, wiki_words(T).
 1011wiki_words([Punct|T]) --> [Punct], {atomic(Punct)}, wiki_words(T).
 1012
 1013%!  code_words(-Words)//
 1014%
 1015%   True when Words is the  content   as  it  appears in =|`code`|=,
 1016%   where =|``|= is mapped to =|`|=.
 1017
 1018code_words([]) --> [].
 1019code_words([Word|T]) --> [w(Word)], code_words(T).
 1020code_words(CodeL) --> ['`','`'], {CodeL = ['`'|T]}, code_words(T).
 1021code_words([Punct|T]) --> [Punct], {atomic(Punct)}, code_words(T).
 1022
 1023%!  eq_code_words(-Words)//
 1024%
 1025%   Stuff that can be between single `=`.  This is limited to
 1026%
 1027%           - Start and end must be a word
 1028%           - In between may be the following punctuation chars:
 1029%             =|.-:/|=, notably dealing with file names and
 1030%             identifiers in various external languages.
 1031
 1032eq_code_words([Word]) -->
 1033    [ w(Word) ].
 1034eq_code_words([Word|T]) -->
 1035    [ w(Word) ], eq_code_internals(T, [End]), [w(End)].
 1036
 1037eq_code_internals(T, T) --> [].
 1038eq_code_internals([H|T], Tail) -->
 1039    eq_code_internal(H),
 1040    eq_code_internals(T, Tail).
 1041
 1042eq_code_internal(Word) -->
 1043    [w(Word)].
 1044eq_code_internal(Punct) -->
 1045    [Punct],
 1046    { eq_code_internal_punct(Punct) }.
 1047
 1048eq_code_internal_punct('.').
 1049eq_code_internal_punct('-').
 1050eq_code_internal_punct(':').
 1051eq_code_internal_punct('/').
 1052
 1053
 1054%!  code_face(+Text, +Term, +Vars, -Code) is det.
 1055%
 1056%   Deal with =|`... code ...`|=  sequences.   Text  is  the matched
 1057%   text, Term is the parsed Prolog term   and Code is the resulting
 1058%   intermediate code.
 1059
 1060code_face(Text, Var, _, Code) :-
 1061    var(Var),
 1062    !,
 1063    Code = var(Text).
 1064code_face(Text, _, _, code(Text)).
 1065
 1066
 1067%!  emphasis_seq(-Out, +ArgNames, +Options) is semidet.
 1068%
 1069%   Recognise emphasis sequences
 1070
 1071emphasis_seq(EmphTerm, ArgNames, Options) -->
 1072    emphasis_start(C),
 1073    next_level(Options, NOptions),
 1074    matches(limit(100, wiki_faces(Emph, ArgNames, NOptions)), Input, Last),
 1075    emphasis_end(C),
 1076    { emph_markdown(Last, Input),
 1077      emphasis_term(C, Emph, EmphTerm)
 1078    },
 1079    !.
 1080
 1081
 1082%!  emphasis_term(+Emphasis, +Tokens, -Term) is det.
 1083%!  emphasis_before(-Before)// is semidet.
 1084%!  emphasis_start(-Emphasis)// is semidet.
 1085%!  emphasis_end(+Emphasis)// is semidet.
 1086%
 1087%   Primitives for Doxygen emphasis handling.
 1088
 1089emphasis_term('_',   Term, i(Term)).
 1090emphasis_term('*',   Term, b(Term)).
 1091emphasis_term('__',  Term, strong(Term)).
 1092emphasis_term('**',  Term, strong(Term)).
 1093
 1094emph_markdown(_, [w(_)]) :- !.
 1095emph_markdown(Last, Tokens) :-
 1096    \+ emphasis_after_sep(Last),
 1097    catch(b_getval(pldoc_object, Obj), _, Obj = '??'),
 1098    debug(markdown(emphasis), '~q: additionally emphasis: ~p',
 1099          [Obj, Tokens]).
 1100
 1101emphasis_before(Before) -->
 1102    [Before],
 1103    { emphasis_start_sep(Before) }.
 1104
 1105emphasis_start_sep('\n').
 1106emphasis_start_sep(' ').
 1107emphasis_start_sep('<').
 1108emphasis_start_sep('{').
 1109emphasis_start_sep('(').
 1110emphasis_start_sep('[').
 1111emphasis_start_sep(',').
 1112emphasis_start_sep(':').
 1113emphasis_start_sep(';').
 1114
 1115emphasis_start(Which), [w(Word)] -->
 1116    emphasis(Which),
 1117    [w(Word)].
 1118
 1119emphasis(**)   --> [*, *].
 1120emphasis(*)    --> [*].
 1121emphasis('__') --> ['_', '_'].
 1122emphasis('_')  --> ['_'].
 1123
 1124emphasis_end(Which), [After] -->
 1125    emphasis(Which),
 1126    [ After ],
 1127    !,
 1128    { emphasis_close_sep(After) -> true }.
 1129emphasis_end(Which) -->
 1130    emphasis(Which).
 1131
 1132% these characters should not be before a closing * or _.
 1133
 1134emphasis_after_sep('\n').
 1135emphasis_after_sep(' ').
 1136emphasis_after_sep('(').
 1137emphasis_after_sep('[').
 1138emphasis_after_sep('<').
 1139emphasis_after_sep('=').
 1140emphasis_after_sep('+').
 1141emphasis_after_sep('\\').
 1142emphasis_after_sep('@').
 1143
 1144emphasis_close_sep('\n').                       % white
 1145emphasis_close_sep(' ').                        % white
 1146emphasis_close_sep(',').                        % sentence punctuation
 1147emphasis_close_sep('.').
 1148emphasis_close_sep('!').
 1149emphasis_close_sep('?').
 1150emphasis_close_sep(':').
 1151emphasis_close_sep(';').
 1152emphasis_close_sep(']').                        % [**label**](link)
 1153emphasis_close_sep(')').                        % ... _italic_)
 1154emphasis_close_sep('}').                        % ... _italic_}
 1155emphasis_close_sep(Token) :-
 1156    structure_term(Token).
 1157
 1158
 1159%!  arg_list(-Atoms) is nondet.
 1160%
 1161%   Atoms  is  a  token-list  for  a    Prolog   argument  list.  An
 1162%   argument-list is a sequence of tokens '(' ... ')'.
 1163%
 1164%   @bug    the current implementation does not deal correctly with
 1165%           brackets that are embedded in quoted strings.
 1166
 1167arg_list(['('|T]) -->
 1168    ['('], arg_list_close(T, 1).
 1169
 1170arg_list_close(Tokens, Depth) -->
 1171    [')'],
 1172    !,
 1173    (   { Depth == 1 }
 1174    ->  { Tokens = [')'] }
 1175    ;   { Depth > 1 }
 1176    ->  { Tokens = [')'|More],
 1177          NewDepth is Depth - 1
 1178        },
 1179        arg_list_close(More, NewDepth)
 1180    ).
 1181arg_list_close(['('|T], Depth) -->
 1182    ['('], { NewDepth is Depth+1 },
 1183    arg_list_close(T, NewDepth).
 1184arg_list_close([H|T], Depth) -->
 1185    [w(H)],
 1186    !,
 1187    arg_list_close(T, Depth).
 1188arg_list_close([H|T], Depth) -->
 1189    [H],
 1190    arg_list_close(T, Depth).
 1191
 1192
 1193%!  term_face(+Text, +Term, +Vars, -Face) is semidet.
 1194%
 1195%   Process embedded Prolog-terms. Currently   processes  Alias(Arg)
 1196%   terms that refer to files.  Future   versions  will also provide
 1197%   pretty-printing of Prolog terms.
 1198
 1199term_face(_Text, Term, _Vars, \file(Name, FileOptions)) :-
 1200    ground(Term),
 1201    compound(Term),
 1202    compound_name_arguments(Term, Alias, [_]),
 1203    user:file_search_path(Alias, _),
 1204    existing_file(Term, FileOptions, []),
 1205    !,
 1206    format(atom(Name), '~q', [Term]).
 1207term_face(Text, Term, Vars, Face) :-
 1208    code_face(Text, Term, Vars, Face).
 1209
 1210untag([], []).
 1211untag([w(W)|T0], [W|T]) :-
 1212    !,
 1213    untag(T0, T).
 1214untag([H|T0], [H|T]) :-
 1215    untag(T0, T).
 1216
 1217%!  image_label(-Label)//
 1218%
 1219%   Match File[;param=value[,param=value]*]
 1220
 1221image_label(\include(Name, image, Options)) -->
 1222    file_name(Base, Ext),
 1223    { autolink_extension(Ext, image),
 1224      file_name_extension(Base, Ext, Name),
 1225      resolve_file(Name, Options, RestOptions)
 1226    },
 1227    file_options(RestOptions).
 1228
 1229
 1230%!  file_options(-Options) is det.
 1231%
 1232%   Extracts additional processing options for  files. The format is
 1233%   ;name="value",name2=value2,... Spaces are not allowed.
 1234
 1235file_options(Options) -->
 1236    [;], nv_pairs(Options),
 1237    !.
 1238file_options([]) -->
 1239    [].
 1240
 1241nv_pairs([H|T]) -->
 1242    nv_pair(H),
 1243    (   [',']
 1244    ->  nv_pairs(T)
 1245    ;   {T=[]}
 1246    ).
 1247
 1248nv_pair(Option) -->
 1249    [ w(Name), =,'"'], tokens(Tokens), ['"'],
 1250    !,
 1251    { untag(Tokens, Atoms),
 1252      atomic_list_concat(Atoms, Value0),
 1253      (   atom_number(Value0, Value)
 1254      ->  true
 1255      ;   Value = Value0
 1256      ),
 1257      Option =.. [Name,Value]
 1258    }.
 1259
 1260
 1261%!  wiki_link(-Link, +Options)// is semidet.
 1262%
 1263%   True if we can find a link to a file or URL. Links are described
 1264%   as one of:
 1265%
 1266%       $ filename :
 1267%       A filename defined using autolink_file/2 or
 1268%       autolink_extension/2
 1269%       $ <url-protocol>://<rest-url> :
 1270%       A fully qualified URL
 1271%       $ '<' URL '>' :
 1272%       Be more relaxed on the URL specification.
 1273
 1274:- multifile
 1275    user:url_path/2. 1276
 1277wiki_link(\file(Name, FileOptions), Options) -->
 1278    file_name(Base, Ext),
 1279    { file_name_extension(Base, Ext, Name),
 1280      (   autolink_file(Name, _)
 1281      ;   autolink_extension(Ext, _)
 1282      ),
 1283      !,
 1284      resolve_file(Name, FileOptions, Options)
 1285    }.
 1286wiki_link(\file(Name, FileOptions), Options) -->
 1287    [w(Name)],
 1288    { autolink_file(Name, _),
 1289      !,
 1290      resolve_file(Name, FileOptions, Options)
 1291    },
 1292    !.
 1293wiki_link(a(href(Ref), Label), Options) -->
 1294    [ w(Prot),:,/,/], { url_protocol(Prot) },
 1295    { option(end(End), Options, space)
 1296    },
 1297    tokens_no_whitespace(Rest), peek_end_url(End),
 1298    !,
 1299    { atomic_list_concat([Prot, :,/,/ | Rest], Ref),
 1300      option(label(Label), Options, Ref)
 1301    }.
 1302wiki_link(a(href(Ref), Label), _Options) -->
 1303    [<, w(Alias), :],
 1304    tokens_no_whitespace(Rest), [>],
 1305    { Term = (Alias:Rest),
 1306      prolog:url_expansion_hook(Term, Ref, Label), !
 1307    }.
 1308wiki_link(a(href(Ref), Label), Options) -->
 1309    [<, w(Alias), :],
 1310    { user:url_path(Alias, _)
 1311    },
 1312    tokens_no_whitespace(Rest), [>],
 1313    { atomic_list_concat(Rest, Local),
 1314      (   Local == ''
 1315      ->  Term =.. [Alias,'.']
 1316      ;   Term =.. [Alias,Local]
 1317      ),
 1318      catch(expand_url_path(Term, Ref), _, fail),
 1319      option(label(Label), Options, Ref)
 1320    }.
 1321wiki_link(a(href(Ref), Label), Options) -->
 1322    [<],
 1323    (   { option(relative(true), Options),
 1324          Parts = Rest
 1325        }
 1326    ->  tokens_no_whitespace(Rest)
 1327    ;   { Parts = [Prot, : | Rest]
 1328        },
 1329        [w(Prot), :], tokens_no_whitespace(Rest)
 1330    ),
 1331    [>],
 1332    !,
 1333    { atomic_list_concat(Parts, Ref),
 1334      option(label(Label), Options, Ref)
 1335    }.
 1336
 1337%!  prolog:url_expansion_hook(+Term, -HREF, -Label) is semidet.
 1338%
 1339%   This hook is called after   recognising  =|<Alias:Rest>|=, where
 1340%   Term is of the form Alias(Rest). If   it  succeeds, it must bind
 1341%   HREF to an atom or string representing the link target and Label
 1342%   to an html//1 expression for the label.
 1343
 1344%!  file_name(-Name:atom, -Ext:atom)// is semidet.
 1345%
 1346%   Matches a filename.  A filename is defined as a sequence
 1347%   <segment>{/<segment}.<ext>.
 1348
 1349file_name(FileBase, Extension) -->
 1350    segment(S1),
 1351    segments(List),
 1352    ['.'], file_extension(Extension),
 1353    !,
 1354    { atomic_list_concat([S1|List], '/', FileBase) }.
 1355
 1356segment(..) -->
 1357    ['.','.'],
 1358    !.
 1359segment(Word) -->
 1360    [w(Word)].
 1361segment(Dir) -->
 1362    [w(Word),'.',w(d)],
 1363    { atom_concat(Word, '.d', Dir) }.
 1364
 1365segments([H|T]) -->
 1366    ['/'],
 1367    !,
 1368    segment(H),
 1369    segments(T).
 1370segments([]) -->
 1371    [].
 1372
 1373file_extension(Ext) -->
 1374    [w(Ext)],
 1375    { autolink_extension(Ext, _)
 1376    }.
 1377
 1378
 1379%!  resolve_file(+Name, -Options, ?RestOptions) is det.
 1380%
 1381%   Find the actual file based on the pldoc_file global variable. If
 1382%   present  and  the   file   is    resolvable,   add   an   option
 1383%   absolute_path(Path) that reflects the current   location  of the
 1384%   file.
 1385
 1386resolve_file(Name, Options, Rest) :-
 1387    existing_file(Name, Options, Rest),
 1388    !.
 1389resolve_file(_, Options, Options).
 1390
 1391
 1392existing_file(Name, Options, Rest) :-
 1393    catch(existing_file_p(Name, Options, Rest), _, fail).
 1394
 1395existing_file_p(Name, Options, Rest) :-
 1396    nb_current(pldoc_file, RelativeTo),
 1397    RelativeTo \== [],
 1398    (   compound(Name)
 1399    ->  Extra = [file_type(prolog)]
 1400    ;   Extra = []
 1401    ),
 1402    absolute_file_name(Name, Path,
 1403                       [ relative_to(RelativeTo),
 1404                         access(read),
 1405                         file_errors(fail)
 1406                       | Extra
 1407                       ]),
 1408    Options = [ absolute_path(Path) | Rest ].
 1409
 1410%!  arity(-Arity:int)// is semidet.
 1411%
 1412%   True if the next token can be  interpreted as an arity. That is,
 1413%   refers to a non-negative integers of at most 20. Although Prolog
 1414%   allows for higher arities, we assume 20   is  a fair maximum for
 1415%   user-created predicates that are documented.
 1416
 1417arity(Arity) -->
 1418    [ w(Word) ],
 1419    { catch(atom_number(Word, Arity), _, fail),
 1420      Arity >= 0, Arity < 20
 1421    }.
 1422
 1423%!  symbol_string(-String)// is nondet
 1424%
 1425%   Accept a sequence of Prolog symbol characters, starting with the
 1426%   shortest (empty) match.
 1427
 1428symbol_string([]) -->
 1429    [].
 1430symbol_string([H|T]) -->
 1431    [H],
 1432    { prolog_symbol_char(H) },
 1433    symbol_string(T).
 1434
 1435prolog_symbol_char(C) -->
 1436    [C],
 1437    { prolog_symbol_char(C) }.
 1438
 1439%!  prolog_symbol_char(?Char)
 1440%
 1441%   True if char is classified by Prolog as a symbol char.
 1442
 1443prolog_symbol_char(#).
 1444prolog_symbol_char($).
 1445prolog_symbol_char(&).
 1446prolog_symbol_char(*).
 1447prolog_symbol_char(+).
 1448prolog_symbol_char(-).
 1449prolog_symbol_char(.).
 1450prolog_symbol_char(/).
 1451prolog_symbol_char(:).
 1452prolog_symbol_char(<).
 1453prolog_symbol_char(=).
 1454prolog_symbol_char(>).
 1455prolog_symbol_char(?).
 1456prolog_symbol_char(@).
 1457prolog_symbol_char(\).
 1458prolog_symbol_char(^).
 1459prolog_symbol_char(~).
 1460
 1461
 1462functor_name(String) :-
 1463    sub_atom(String, 0, 1, _, Char),
 1464    char_type(Char, lower).
 1465
 1466url_protocol(http).
 1467url_protocol(https).
 1468url_protocol(ftp).
 1469url_protocol(mailto).
 1470
 1471peek_end_url(space) -->
 1472    peek(End),
 1473    { space_token(End) },
 1474    !.
 1475peek_end_url(space, [], []) :- !.
 1476peek_end_url(Token) -->
 1477    peek(Token),
 1478    !.
 1479
 1480space_token(' ') :- !.
 1481space_token('\r') :- !.
 1482space_token('\n') :- !.
 1483space_token(T) :-
 1484    \+ atom(T),                     % high level format like p(...)
 1485    \+ T = w(_).
 1486
 1487%!  autolink_extension(?Ext, ?Type) is nondet.
 1488%
 1489%   True if Ext is a filename extensions that create automatic links
 1490%   in the documentation.
 1491
 1492autolink_extension(Ext, Type) :-
 1493    prolog:doc_autolink_extension(Ext, Type),
 1494    !.
 1495autolink_extension(Ext, prolog) :-
 1496    user:prolog_file_type(Ext,prolog),
 1497    !.
 1498autolink_extension(txt, wiki).
 1499autolink_extension(md,  wiki).
 1500autolink_extension(gif, image).
 1501autolink_extension(png, image).
 1502autolink_extension(jpg, image).
 1503autolink_extension(jpeg, image).
 1504autolink_extension(svg, image).
 1505
 1506%!  autolink_file(?File, -Type) is nondet.
 1507%
 1508%   Files to which we automatically create links, regardless of the
 1509%   extension.
 1510
 1511autolink_file('README', wiki).
 1512autolink_file('TODO', wiki).
 1513autolink_file('ChangeLog', wiki).
 1514
 1515                 /*******************************
 1516                 *           SECTIONS           *
 1517                 *******************************/
 1518
 1519%!  section_comment_header(+Lines, -Header, -RestLines) is semidet.
 1520%
 1521%   Processes   /**   <section>   comments.   Header   is   a   term
 1522%   \section(Type, Title), where  Title  is   an  atom  holding  the
 1523%   section title and Type is an atom holding the text between <>.
 1524%
 1525%   @param Lines    List of Indent-Codes.
 1526%   @param Header   DOM term of the format \section(Type, Title),
 1527%                   where Type is an atom from <type> and Title is
 1528%                   a string holding the type.
 1529
 1530section_comment_header([_-Line|Lines], Header, Lines) :-
 1531    phrase(section_line(Header), Line).
 1532
 1533section_line(\section(Type, Title)) -->
 1534    ws, "<", word(Codes), ">", normalise_white_space(TitleCodes),
 1535    { atom_codes(Type, Codes),
 1536      atom_codes(Title, TitleCodes)
 1537    }.
 1538
 1539
 1540%!  normalise_white_space(-Text)// is det.
 1541%
 1542%   Text is input after deleting leading   and  trailing white space
 1543%   and mapping all internal white space to a single space.
 1544
 1545normalise_white_space(Text) -->
 1546    ws,
 1547    normalise_white_space2(Text).
 1548
 1549normalise_white_space2(Text) -->
 1550    non_ws(Text, Tail),
 1551    ws,
 1552    (   eos
 1553    ->  { Tail = [] }
 1554    ;   { Tail = [0'\s|T2] },
 1555        normalise_white_space2(T2)
 1556    ).
 1557
 1558
 1559                 /*******************************
 1560                 *           TOKENIZER          *
 1561                 *******************************/
 1562
 1563%!  tokenize_lines(+Lines:lines, -TokenLines) is det
 1564%
 1565%   Convert Indent-Codes into Indent-Tokens
 1566
 1567tokenize_lines(Lines, TokenLines) :-
 1568    tokenize_lines(Lines, -1, TokenLines).
 1569
 1570tokenize_lines([], _, []) :- !.
 1571tokenize_lines(Lines, Indent, [Pre|T]) :-
 1572    verbatim(Lines, Indent, Pre, RestLines),
 1573    !,
 1574    tokenize_lines(RestLines, Indent, T).
 1575tokenize_lines([I-H0|T0], Indent0, [I-H|T]) :-
 1576    phrase(line_tokens(H), H0),
 1577    (   H == []
 1578    ->  Indent = Indent0
 1579    ;   Indent = I
 1580    ),
 1581    tokenize_lines(T0, Indent, T).
 1582
 1583
 1584%!  line_tokens(-Tokens:list)// is det.
 1585%
 1586%   Create a list of tokens, where  is  token   is  either  a ' ' to
 1587%   denote spaces, a  term  w(Word)  denoting   a  word  or  an atom
 1588%   denoting a punctuation  character.   Underscores  (_)  appearing
 1589%   inside an alphanumerical string are considered part of the word.
 1590%   E.g., "hello_world_" tokenizes into [w(hello_world), '_'].
 1591
 1592line_tokens([H|T]) -->
 1593    line_token(H),
 1594    !,
 1595    line_tokens(T).
 1596line_tokens([]) -->
 1597    [].
 1598
 1599line_token(T) -->
 1600    [C],
 1601    (   { code_type(C, space) }
 1602    ->  ws,
 1603        { T = ' ' }
 1604    ;   { code_type(C, alnum) },
 1605        word(Rest),
 1606        { atom_codes(W, [C|Rest]),
 1607          T = w(W)
 1608        }
 1609    ;   { char_code(T, C) }
 1610    ).
 1611
 1612word([C0|T]) -->
 1613    [C0],  { code_type(C0, alnum) },
 1614    !,
 1615    word(T).
 1616word([0'_, C1|T]) -->
 1617    [0'_, C1],  { code_type(C1, alnum) },
 1618    !,
 1619    word(T).
 1620word([]) -->
 1621    [].
 1622
 1623alphas([C0|T]) -->
 1624    [C0],  { code_type(C0, alpha) },
 1625    !,
 1626    alphas(T).
 1627alphas([]) -->
 1628    [].
 1629
 1630%!  verbatim(+Lines, +EnvIndent, -Pre, -RestLines) is det.
 1631%
 1632%   Extract a verbatim environment.  The  returned   Pre  is  of the
 1633%   format pre(Attributes, String). The indentation   of the leading
 1634%   fence is substracted from the indentation of the verbatim lines.
 1635%   Two types of fences are supported:   the  traditional =|==|= and
 1636%   the Doxygen =|~~~|= (minimum  3   =|~|=  characters), optionally
 1637%   followed by =|{.ext}|= to indicate the language.
 1638%
 1639%   Verbatim environment is delimited as
 1640%
 1641%     ==
 1642%       ...,
 1643%       verbatim(Lines, Pre, Rest)
 1644%       ...,
 1645%     ==
 1646%
 1647%   In addition, a verbatim environment may  simply be indented. The
 1648%   restrictions are described in the documentation.
 1649
 1650verbatim(Lines, _,
 1651         Indent-pre([class(code), ext(Ext)],Pre),
 1652         RestLines) :-
 1653    skip_empty_lines(Lines, [Indent-FenceLine|CodeLines]),
 1654    verbatim_fence(FenceLine, Fence, Ext),
 1655    verbatim_body(CodeLines, Indent, [10|PreCodes], [],
 1656                  [Indent-Fence|RestLines]),
 1657    !,
 1658    atom_codes(Pre, PreCodes).
 1659verbatim([_-[],Indent-Line|Lines], EnvIndent,
 1660         Indent-pre(class(code),Pre),
 1661         RestLines) :-
 1662    EnvIndent >= 0,
 1663    Indent >= EnvIndent+4, Indent =< EnvIndent+8,
 1664    valid_verbatim_opening(Line),
 1665    indented_verbatim_body([Indent-Line|Lines], Indent,
 1666                           CodeLines, RestLines),
 1667    !,
 1668    lines_code_text(CodeLines, Indent, [10|PreCodes]),
 1669    atom_codes(Pre, PreCodes).
 1670
 1671verbatim_body(Lines, _, PreT, PreT, Lines).
 1672verbatim_body([I-L|Lines], Indent, [10|Pre], PreT, RestLines) :-
 1673    PreI is I - Indent,
 1674    phrase(pre_indent(PreI), Pre, PreT0),
 1675    verbatim_line(L, PreT0, PreT1),
 1676    verbatim_body(Lines, Indent, PreT1, PreT, RestLines).
 1677
 1678verbatim_fence(Line, Fence, '') :-
 1679    Line == [0'=,0'=],
 1680    !,
 1681    Fence = Line.
 1682verbatim_fence(Line, Fence, Ext) :-
 1683    tilde_fence(Line, Fence, 0, Ext).
 1684verbatim_fence(Line, Fence, Ext) :-
 1685    md_fence(Line, Fence, 0, Ext).
 1686
 1687tilde_fence([0'~|T0], [0'~|F0], C0, Ext) :-
 1688    !,
 1689    C1 is C0+1,
 1690    tilde_fence(T0, F0, C1, Ext).
 1691tilde_fence(List, [], C, Ext) :-
 1692    C >= 3,
 1693    (   List == []
 1694    ->  Ext = ''
 1695    ;   phrase(tilde_fence_ext(ExtCodes), List)
 1696    ->  atom_codes(Ext, ExtCodes)
 1697    ).
 1698
 1699%!  tilde_fence_ext(-Ext)// is semidet.
 1700%
 1701%   Detect ```{.prolog} (Doxygen) or ```{prolog} (GitHub)
 1702
 1703tilde_fence_ext(Ext) -->
 1704    "{.", !, alphas(Ext), "}".
 1705tilde_fence_ext(Ext) -->
 1706    "{", alphas(Ext), "}".
 1707
 1708md_fence([0'`|T0], [0'`|F0], C0, Ext) :-
 1709    !,
 1710    C1 is C0+1,
 1711    md_fence(T0, F0, C1, Ext).
 1712md_fence(List, [], C, Ext) :-
 1713    C >= 3,
 1714    (   List == []
 1715    ->  Ext = ''
 1716    ;   phrase(md_fence_ext(ExtCodes), List),
 1717        atom_codes(Ext, ExtCodes)
 1718    ).
 1719
 1720% Also support Doxygen's curly bracket notation.
 1721md_fence_ext(Ext) -->
 1722    tilde_fence_ext(Ext),
 1723    !.
 1724% In Markdown language names appear without brackets.
 1725md_fence_ext(Ext) -->
 1726    alphas(Ext).
 1727
 1728%!  indented_verbatim_body(+Lines, +Indent, -CodeLines, -RestLines)
 1729%
 1730%   Takes more verbatim lines. The input   ends  with the first line
 1731%   that is indented less than Indent. There cannot be more than one
 1732%   consequtive empty line in the verbatim body.
 1733
 1734indented_verbatim_body([I-L|T0], Indent, [I-L|T], RestLines) :-
 1735    L \== [], I >= Indent,
 1736    !,
 1737    indented_verbatim_body(T0, Indent, T, RestLines).
 1738indented_verbatim_body([I0-[],I-L|T0], Indent, [I0-[],I-L|T], RestLines) :-
 1739    I >= Indent,
 1740    valid_verbatim_opening(L),
 1741    indented_verbatim_body(T0, Indent, T, RestLines).
 1742indented_verbatim_body(Lines, _, [], Lines).
 1743
 1744%!  valid_verbatim_opening(+Line) is semidet.
 1745%
 1746%   Tests that line does not look like a list item or table.
 1747
 1748valid_verbatim_opening([0'||_]) :- !, fail.
 1749valid_verbatim_opening(Line) :-
 1750    Line \== [],
 1751    \+ ( phrase(line_tokens(Tokens), Line),
 1752         list_item_prefix(_Type, Tokens, _Rest)
 1753       ).
 1754
 1755%!  lines_code_text(+Lines, +Indent, -Codes) is det.
 1756%
 1757%   Extract the actual code content from a list of line structures.
 1758
 1759lines_code_text([], _, []).
 1760lines_code_text([_-[]|T0], Indent, [10|T]) :-
 1761    !,
 1762    lines_code_text(T0, Indent, T).
 1763lines_code_text([I-Line|T0], Indent, [10|T]) :-
 1764    PreI is I-Indent,
 1765    phrase(pre_indent(PreI), T, T1),
 1766    verbatim_line(Line, T1, T2),
 1767    lines_code_text(T0, Indent, T2).
 1768
 1769
 1770%!  pre_indent(+Indent)// is det.
 1771%
 1772%   Insert Indent leading spaces.  Note we cannot use tabs as these
 1773%   are not expanded by the HTML <pre> element.
 1774
 1775pre_indent(N) -->
 1776    { N > 0,
 1777      !,
 1778      N2 is N - 1
 1779    }, " ",
 1780    pre_indent(N2).
 1781pre_indent(_) -->
 1782    "".
 1783
 1784verbatim_line(Line, Pre, PreT) :-
 1785    append(Line, PreT, Pre).
 1786
 1787
 1788                 /*******************************
 1789                 *            SUMMARY           *
 1790                 *******************************/
 1791
 1792%!  summary_from_lines(+Lines:lines, -Summary:list(codes)) is det.
 1793%
 1794%   Produce a summary for Lines. Similar  to JavaDoc, the summary is
 1795%   defined as the first sentence of the documentation. In addition,
 1796%   a sentence is also ended by an  empty   line  or  the end of the
 1797%   comment.
 1798
 1799summary_from_lines(Lines, Sentence) :-
 1800    skip_empty_lines(Lines, Lines1),
 1801    summary2(Lines1, Sentence0),
 1802    end_sentence(Sentence0, Sentence).
 1803
 1804summary2(_, Sentence) :-
 1805    Sentence == [],
 1806    !.              % we finished our sentence
 1807summary2([], []) :- !.
 1808summary2([_-[]|_], []) :- !.            % empty line
 1809summary2([_-[0'@|_]|_], []) :- !.       % keyword line
 1810summary2([_-L0|Lines], Sentence) :-
 1811    phrase(sentence(Sentence, Tail), L0, _),
 1812    summary2(Lines, Tail).
 1813
 1814sentence([C,End], []) -->
 1815    [C,End],
 1816    { \+ code_type(C, period),
 1817      code_type(End, period)                % ., !, ?
 1818    },
 1819    space_or_eos,
 1820    !.
 1821sentence([0' |T0], T) -->
 1822    space,
 1823    !,
 1824    ws,
 1825    sentence(T0, T).
 1826sentence([H|T0], T) -->
 1827    [H],
 1828    sentence(T0, T).
 1829sentence([0' |T], T) -->                % '
 1830    eos.
 1831
 1832space_or_eos -->
 1833    [C],
 1834    !,
 1835    {code_type(C, space)}.
 1836space_or_eos -->
 1837    eos.
 1838
 1839%!  skip_empty_lines(+LinesIn, -LinesOut) is det.
 1840%
 1841%   Remove empty lines from the start of the input.  Note that
 1842%   this is used both to process character and token data.
 1843
 1844skip_empty_lines([], []).
 1845skip_empty_lines([_-[]|Lines0], Lines) :-
 1846    !,
 1847    skip_empty_lines(Lines0, Lines).
 1848skip_empty_lines(Lines, Lines).
 1849
 1850end_sentence([], []).
 1851end_sentence([0'\s], [0'.]) :- !.
 1852end_sentence([H|T0], [H|T]) :-
 1853    end_sentence(T0, T).
 1854
 1855
 1856                 /*******************************
 1857                 *        CREATE LINES          *
 1858                 *******************************/
 1859
 1860%!  indented_lines(+Text:list(codes), +Prefixes:list(codes),
 1861%!                 -Lines:list) is det.
 1862%
 1863%   Extract a list of lines  without   leading  blanks or characters
 1864%   from Prefix from Text. Each line   is a term Indent-Codes, where
 1865%   Indent specifies the line_position of the real text of the line.
 1866
 1867indented_lines(Comment, Prefixes, Lines) :-
 1868    must_be(codes, Comment),
 1869    phrase(split_lines(Prefixes, Lines), Comment),
 1870    !.
 1871
 1872split_lines(_, []) -->
 1873    end_of_comment.
 1874split_lines(Prefixes, [Indent-L1|Ls]) -->
 1875    take_prefix(Prefixes, 0, Indent0),
 1876    white_prefix(Indent0, Indent),
 1877    take_line(L1),
 1878    split_lines(Prefixes, Ls).
 1879
 1880
 1881%!  end_of_comment//
 1882%
 1883%   Succeeds if we hit the end of the comment.
 1884%
 1885%   @bug    %*/ will be seen as the end of the comment.
 1886
 1887end_of_comment -->
 1888    eos.
 1889end_of_comment -->
 1890    ws, stars, "*/".
 1891
 1892stars --> [].
 1893stars --> "*", !, stars.
 1894
 1895
 1896%!  take_prefix(+Prefixes:list(codes), +Indent0:int, -Indent:int)// is det.
 1897%
 1898%   Get the leading characters  from  the   input  and  compute  the
 1899%   line-position at the end of the leading characters.
 1900
 1901take_prefix(Prefixes, I0, I) -->
 1902    { member(Prefix, Prefixes),
 1903      string_codes(Prefix, PrefixCodes)
 1904    },
 1905    prefix(PrefixCodes),
 1906    !,
 1907    { string_update_linepos(PrefixCodes, I0, I) }.
 1908take_prefix(_, I, I) -->
 1909    [].
 1910
 1911prefix([]) --> [].
 1912prefix([H|T]) --> [H], prefix(T).
 1913
 1914white_prefix(I0, I) -->
 1915    [C],
 1916    {  code_type(C, white),
 1917       !,
 1918       update_linepos(C, I0, I1)
 1919    },
 1920    white_prefix(I1, I).
 1921white_prefix(I, I) -->
 1922    [].
 1923
 1924%!  string_update_linepos(+Codes, +Pos0, -Pos) is det.
 1925%
 1926%   Update line-position after adding Codes at Pos0.
 1927
 1928string_update_linepos([], I, I).
 1929string_update_linepos([H|T], I0, I) :-
 1930    update_linepos(H, I0, I1),
 1931    string_update_linepos(T, I1, I).
 1932
 1933%!  update_linepos(+Code, +Pos0, -Pos) is det.
 1934%
 1935%   Update line-position after adding Code.
 1936%
 1937%   @tbd    Currently assumes tab-width of 8.
 1938
 1939update_linepos(0'\t, I0, I) :-
 1940    !,
 1941    I is (I0\/7)+1.
 1942update_linepos(0'\b, I0, I) :-
 1943    !,
 1944    I is max(0, I0-1).
 1945update_linepos(0'\r, _, 0) :- !.
 1946update_linepos(0'\n, _, 0) :- !.
 1947update_linepos(_, I0, I) :-
 1948    I is I0 + 1.
 1949
 1950%!  take_line(-Line:codes)// is det.
 1951%
 1952%   Take  a  line  from  the  input.   Line  does  not  include  the
 1953%   terminating \r or \n character(s), nor trailing whitespace.
 1954
 1955take_line([]) -->
 1956    "\r\n",
 1957    !.                      % DOS file
 1958take_line([]) -->
 1959    "\n",
 1960    !.                        % Unix file
 1961take_line(Line) -->
 1962    [H], { code_type(H, white) },
 1963    !,
 1964    take_white(White, WT),
 1965    (   nl
 1966    ->  { Line = [] }
 1967    ;   { Line = [H|White] },
 1968        take_line(WT)
 1969    ).
 1970take_line([H|T]) -->
 1971    [H],
 1972    !,
 1973    take_line(T).
 1974take_line([]) -->                       % end of string
 1975    [].
 1976
 1977take_white([H|T0], T) -->
 1978    [H],  { code_type(H, white) },
 1979    !,
 1980    take_white(T0, T).
 1981take_white(T, T) -->
 1982    [].
 1983
 1984%!  normalise_indentation(+LinesIn, -LinesOut) is det.
 1985%
 1986%   Re-normalise the indentation, such that the  lef-most line is at
 1987%   zero.  Note that we skip empty lines in the computation.
 1988
 1989normalise_indentation(Lines0, Lines) :-
 1990    skip_empty_lines(Lines0, Lines1),
 1991    Lines1 = [I0-_|Lines2],
 1992    !,
 1993    smallest_indentation(Lines2, I0, Subtract),
 1994    (   Subtract == 0
 1995    ->  Lines = Lines0
 1996    ;   maplist(substract_indent(Subtract), Lines0, Lines)
 1997    ).
 1998normalise_indentation(Lines, Lines).
 1999
 2000smallest_indentation([], I, I).
 2001smallest_indentation([_-[]|T], I0, I) :-
 2002    !,
 2003    smallest_indentation(T, I0, I).
 2004smallest_indentation([X-_|T], I0, I) :-
 2005    I1 is min(I0, X),
 2006    smallest_indentation(T, I1, I).
 2007
 2008substract_indent(Subtract, I0-L, I-L) :-
 2009    I is max(0,I0-Subtract).
 2010
 2011
 2012                 /*******************************
 2013                 *             MISC             *
 2014                 *******************************/
 2015
 2016%!  strip_leading_par(+Dom0, -Dom) is det.
 2017%
 2018%   Remove the leading paragraph for  environments where a paragraph
 2019%   is not required.
 2020
 2021strip_leading_par([p(C)|T], L) :-
 2022    !,
 2023    append(C, T, L).
 2024strip_leading_par(L, L).
 2025
 2026
 2027                 /*******************************
 2028                 *           DCG BASICS         *
 2029                 *******************************/
 2030
 2031%!  ws// is det
 2032%
 2033%   Eagerly skip layout characters
 2034
 2035ws -->
 2036    [C], {code_type(C, space)},
 2037    !,
 2038    ws.
 2039ws -->
 2040    [].
 2041
 2042%       space// is det
 2043%
 2044%       True if then next code is layout.
 2045
 2046space -->
 2047    [C],
 2048    {code_type(C, space)}.
 2049
 2050%!  non_ws(-Text, ?Tail) is det.
 2051%
 2052%   True if the  difference  list  Text-Tail   is  the  sequence  of
 2053%   non-white-space characters.
 2054
 2055non_ws([H|T0], T) -->
 2056    [H],
 2057    { \+ code_type(H, space) },
 2058    !,
 2059    non_ws(T0, T).
 2060non_ws(T, T) -->
 2061    [].
 2062
 2063
 2064%!  nl//
 2065%
 2066%   Get end-of-line
 2067
 2068nl -->
 2069    "\r\n",
 2070    !.
 2071nl -->
 2072    "\n".
 2073
 2074%!  peek(H)//
 2075%
 2076%   True if next token is H without eating it.
 2077
 2078peek(H, L, L) :-
 2079    L = [H|_].
 2080
 2081%!  tokens(-Tokens:list)// is nondet.
 2082%!  tokens(+Max, -Tokens:list)// is nondet.
 2083%
 2084%   Defensively take tokens from the input.  Backtracking takes more
 2085%   tokens.  Do not include structure terms.
 2086
 2087tokens([]) --> [].
 2088tokens([H|T]) --> token(H), tokens(T).
 2089
 2090tokens(_, []) --> [].
 2091tokens(C, [H|T]) --> token(H), {succ(C1, C)}, tokens(C1, T).
 2092
 2093%!  tokens_no_whitespace(-Tokens:list(atom))// is nondet.
 2094%
 2095%   Defensively take tokens from the  input. Backtracking takes more
 2096%   tokens.  Tokens  cannot  include  whitespace.  Word  tokens  are
 2097%   returned as their represented words.
 2098
 2099tokens_no_whitespace([]) -->
 2100    [].
 2101tokens_no_whitespace([Word|T]) -->
 2102    [ w(Word) ],
 2103    !,
 2104    tokens_no_whitespace(T).
 2105tokens_no_whitespace([H|T]) -->
 2106    [H],
 2107    { \+ space_token(H) },
 2108    tokens_no_whitespace(T).
 2109
 2110token(Token) -->
 2111    [Token],
 2112    { token(Token) }.
 2113
 2114token(w(_)) :- !.
 2115token(Token) :- atom(Token).
 2116
 2117%!  limit(+Count, :Rule)//
 2118%
 2119%   As limit/2, but for grammar rules.
 2120
 2121:- meta_predicate limit(+,2,?,?). 2122
 2123limit(Count, Rule, Input, Rest) :-
 2124    Count > 0,
 2125    State = count(0),
 2126    call(Rule, Input, Rest),
 2127    arg(1, State, N0),
 2128    N is N0+1,
 2129    (   N =:= Count
 2130    ->  !
 2131    ;   nb_setarg(1, State, N)
 2132    ).
 2133
 2134
 2135                 /*******************************
 2136                 *           MESSAGES           *
 2137                 *******************************/
 2138
 2139:- multifile
 2140    prolog:message//1. 2141
 2142prolog:message(pldoc(deprecated_tag(Name, Tag))) -->
 2143    [ 'PlDoc: Deprecated tag @~w (use @~w)'-[Name, Tag]
 2144    ].
 2145prolog:message(pldoc(unknown_tag(Name))) -->
 2146    [ 'PlDoc: unknown tag @~w'-[Name]
 2147    ]