Subversion Repositories SE.SVN

Rev

Details | Last modification | View Log | RSS feed

Rev Author Line No. Line
12 7u83 1
#!/usr/bin/env escript
2
%% -*- mode: erlang -*-
3
-export([main/1]).
4
 
5
%% @doc Script used to generate mochiweb_charref.erl table.
6
 
7
main(_) ->
8
    application:start(inets),
9
    code:add_patha("ebin"),
10
    {ok, {_, _, HTML}} = httpc:request("http://www.w3.org/TR/html5/named-character-references.html"),
11
    print(lists:sort(search(mochiweb_html:parse(HTML)))).
12
 
13
print([F | T]) ->
14
    io:put_chars([clause(F), ";\n"]),
15
    print(T);
16
print([]) ->
17
    io:put_chars(["entity(_) -> undefined.\n"]),
18
    ok.
19
 
20
clause({Title, [Codepoint]}) ->
21
    ["entity(\"", Title, "\") -> 16#", Codepoint];
22
clause({Title, [First | Rest]}) ->
23
    ["entity(\"", Title, "\") -> [16#", First,
24
     [[", 16#", Codepoint] || Codepoint <- Rest],
25
     "]"].
26
 
27
 
28
search(Elem) ->
29
    search(Elem, []).
30
 
31
search({<<"tr">>, [{<<"id">>, <<"entity-", _/binary>>} | _], Children}, Acc) ->
32
    %% HTML5 charrefs can have more than one code point(!)
33
    [{<<"td">>, _, [{<<"code">>, _, [TitleSemi]}]},
34
     {<<"td">>, [], [RawCPs]} | _] = Children,
35
    L = byte_size(TitleSemi) - 1,
36
    <<Title:L/binary, $;>> = TitleSemi,
37
    {match, Matches} = re:run(RawCPs, "(?:\\s*U\\+)([a-fA-F0-9]+)",
38
                              [{capture, all, binary}, global]),
39
    [{Title, [CP || [_, CP] <- Matches]} | Acc];
40
search({Tag, Attrs, [H | T]}, Acc) ->
41
    search({Tag, Attrs, T}, search(H, Acc));
42
search({_Tag, _Attrs, []}, Acc) ->
43
    Acc;
44
search(<<_/binary>>, Acc) ->
45
    Acc.