Subversion Repositories planix.SVN

Rev

Blame | Last modification | View Log | RSS feed

#include <u.h>
#include <libc.h>
#include <bio.h>

enum{
        Nfont = 11,
        Wid = 20,       /* tmac.anhtml sets page width to 20" so we can recognize .nf text */
};

typedef uintptr Char;
typedef struct Troffchar Troffchar;
typedef struct Htmlchar Htmlchar;
typedef struct Font Font;
typedef struct HTMLfont HTMLfont;

/*
 * a Char is >= 32 bits. low 16 bits are the rune. higher are attributes.
 * must be able to hold a pointer.
 */
enum
{
        Italic  =       16,
        Bold,
        CW,
        Indent1,
        Indent2,
        Indent3,
        Heading =       25,
        Anchor =        26,     /* must be last */
};

enum    /* magic emissions */
{
        Estring = 0,
        Epp = 1<<16,
};

int attrorder[] = { Indent1, Indent2, Indent3, Heading, Anchor, Italic, Bold, CW };

int nest[10];
int nnest;

struct Troffchar
{
        char *name;
        char *value;
};

struct Htmlchar
{
        char *utf;
        char *name;
        int value;
};

#include "chars.h"

struct Font{
        char            *name;
        HTMLfont        *htmlfont;
};

struct HTMLfont{
        char    *name;
        char    *htmlname;
        int     bit;
};

/* R must be first; it's the default representation for fonts we don't recognize */
HTMLfont htmlfonts[] =
{
        "R",            nil,    0,
        "LucidaSans",   nil,    0,
        "I",            "i",    Italic,
        "LucidaSansI",  "i",    Italic,
        "CW",           "tt",   CW,
        "LucidaCW",     "tt",   CW,
        nil,    nil,
};

#define TABLE "<table border=0 cellpadding=0 cellspacing=0>"

char*
onattr[8*sizeof(int)] =
{
        0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0,
        "<i>",                  /* italic */
        "<b>",                  /* bold */
        "<tt><font size=+1>",   /* cw */
        "<+table border=0 cellpadding=0 cellspacing=0><tr height=2><td><tr><td width=20><td>\n",                /* indent1 */
        "<+table border=0 cellpadding=0 cellspacing=0><tr height=2><td><tr><td width=20><td>\n",                /* indent2 */
        "<+table border=0 cellpadding=0 cellspacing=0><tr height=2><td><tr><td width=20><td>\n",                /* indent3 */
        0,
        0,
        0,
        "<p><font size=+1><b>", /* heading 25 */
        "<unused>",             /* anchor 26 */
};

char*
offattr[8*sizeof(int)] =
{
        0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0,
        "</i>",                 /* italic */
        "</b>",                 /* bold */
        "</font></tt>",         /* cw */
        "<-/table>",            /* indent1 */
        "<-/table>",            /* indent2 */
        "<-/table>",            /* indent3 */
        0,
        0,
        0,
        "</b></font>",          /* heading 25 */
        "</a>",                 /* anchor 26 */
};

Font    *font[Nfont];

Biobuf  bout;
int     debug = 0;

/* troff state */
int     page = 1;
int     ft = 1;
int     vp = 0;
int     hp = 0;
int     ps = 1;
int     res = 720;

int     didP = 0;
int     atnewline = 1;
int     prevlineH = 0;
Char    attr = 0;       /* or'ed into each Char */

Char    *chars;
int     nchars;
int     nalloc;
char**  anchors;        /* allocated in order */
int     nanchors;

char    *filename;
int     cno;
char    buf[8192];
char    *title = "Plan 9 man page";

void    process(Biobuf*, char*);
void    mountfont(int, char*);
void    switchfont(int);
void    header(char*);
void    flush(void);
void    trailer(void);

void*
emalloc(ulong n)
{
        void *p;

        p = malloc(n);
        if(p == nil)
                sysfatal("malloc failed: %r");
        return p;
}

void*
erealloc(void *p, ulong n)
{

        p = realloc(p, n);
        if(p == nil)
                sysfatal("realloc failed: %r");
        return p;
}

char*
estrdup(char *s)
{
        char *t;

        t = strdup(s);
        if(t == nil)
                sysfatal("strdup failed: %r");
        return t;
}

void
usage(void)
{
        fprint(2, "usage: troff2html [-d] [-t title] [file ...]\n");
        exits("usage");
}

int
hccmp(const void *va, const void *vb)
{
        Htmlchar *a, *b;

        a = (Htmlchar*)va;
        b = (Htmlchar*)vb;
        return a->value - b->value;
}

void
main(int argc, char *argv[])
{
        int i;
        Biobuf in, *inp;
        Rune r;

        for(i=0; i<nelem(htmlchars); i++){
                chartorune(&r, htmlchars[i].utf);
                htmlchars[i].value = r;
        }
        qsort(htmlchars, nelem(htmlchars), sizeof(htmlchars[0]), hccmp);

        ARGBEGIN{
        case 't':
                title = ARGF();
                if(title == nil)
                        usage();
                break;
        case 'd':
                debug++;
                break;
        default:
                usage();
        }ARGEND

        Binit(&bout, 1, OWRITE);
        if(argc == 0){
                header(title);
                Binit(&in, 0, OREAD);
                process(&in, "<stdin>");
        }else{
                header(title);
                for(i=0; i<argc; i++){
                        inp = Bopen(argv[i], OREAD);
                        if(inp == nil)
                                sysfatal("can't open %s: %r", argv[i]);
                        process(inp, argv[i]);
                        Bterm(inp);
                }
        }
        flush();
        trailer();
        exits(nil);
}

void
emitchar(Char c)
{
        if(nalloc == nchars){
                nalloc += 10000;
                chars = realloc(chars, nalloc*sizeof(chars[0]));
                if(chars == nil)
                        sysfatal("malloc failed: %r");
        }
        chars[nchars++] = c;
}

void
emit(Rune r)
{
        emitchar(r | attr);
        /*
         * Close man page references early, so that 
         * .IR proof (1),
         * doesn't make the comma part of the link.
         */
        if(r == ')')
                attr &= ~(1<<Anchor);
}

void
emitstr(char *s)
{
        emitchar(Estring);
        emitchar((Char)s);
}

int indentlevel;
int linelen;

void
iputrune(Biobuf *b, Rune r)
{
        int i;

        if(linelen++ > 60 && r == ' ')
                r = '\n';
        Bputrune(b, r);
        if(r == '\n'){
                for(i=0; i<indentlevel; i++)
                        Bprint(b, "    ");
                linelen = 0;
        }
}

void
iputs(Biobuf *b, char *s)
{
        if(s[0]=='<' && s[1]=='+'){
                iputrune(b, '\n');
                Bprint(b, "<%s", s+2);
                indentlevel++;
                iputrune(b, '\n');
        }else if(s[0]=='<' && s[1]=='-'){
                indentlevel--;
                iputrune(b, '\n');
                Bprint(b, "<%s", s+2);
                iputrune(b, '\n');
        }else
                Bprint(b, "%s", s);
}

void
setattr(Char a)
{
        Char on, off;
        int i, j;

        on = a & ~attr;
        off = attr & ~a;

        /* walk up the nest stack until we reach something we need to turn off. */
        for(i=0; i<nnest; i++)
                if(off&(1<<nest[i]))
                        break;

        /* turn off everything above that */
        for(j=nnest-1; j>=i; j--)
                iputs(&bout, offattr[nest[j]]);

        /* turn on everything we just turned off but didn't want to */
        for(j=i; j<nnest; j++)
                if(a&(1<<nest[j]))
                        iputs(&bout, onattr[nest[j]]);
                else
                        nest[j] = 0;

        /* shift the zeros (turned off things) up */
        for(i=j=0; i<nnest; i++)
                if(nest[i] != 0)
                        nest[j++] = nest[i];
        nnest = j;

        /* now turn on the new attributes */
        for(i=0; i<nelem(attrorder); i++){
                j = attrorder[i];
                if(on&(1<<j)){
                        if(j == Anchor)
                                onattr[j] = anchors[nanchors++];
                        iputs(&bout, onattr[j]);
                        if(nnest >= nelem(nest))
                                sysfatal("nesting too deep");
                        nest[nnest++] = j;
                }
        }
        attr = a;
}

void
flush(void)
{
        int i;
        Char c, a;

        nanchors = 0;
        for(i=0; i<nchars; i++){
                c = chars[i];
                if(c == Estring){
                        /* next word is string to print */
                        iputs(&bout, (char*)chars[++i]);
                        continue;
                }
                if(c == Epp){
                        iputrune(&bout, '\n');
                        iputs(&bout, TABLE "<tr height=5><td></table>");
                        iputrune(&bout, '\n');
                        continue;
                }
                a = c & ~0xFFFF;
                c &= 0xFFFF;
                /*
                 * If we're going to something off after a space,
                 * let's just turn it off before.
                 */
                if(c == ' ' && i<nchars-1 && (chars[i+1]&0xFFFF) >= 32)
                        a ^= a & ~chars[i+1];
                setattr(a);
                iputrune(&bout, c & 0xFFFF);
        }
}

void
header(char *s)
{
        Bprint(&bout, "<head>\n");
        Bprint(&bout, "<title>%s</title>\n", s);
        Bprint(&bout, "<meta content=\"text/html; charset=utf-8\" http-equiv=Content-Type>\n");
        Bprint(&bout, "</head>\n");
        Bprint(&bout, "<body bgcolor=#ffffff>\n");
}

void
trailer(void)
{

#ifdef LUCENT
        Tm *t;
        t = localtime(time(nil));
        Bprint(&bout, TABLE "<tr height=20><td></table>\n");
        Bprint(&bout, "<font size=-1><a href=\"http://www.lucent.com/copyright.html\">\n");
        Bprint(&bout, "Copyright</A> &#169; %d Alcatel-Lucent.  All rights reserved.</font>\n", t->year+1900);
#endif
        Bprint(&bout, "</body></html>\n");
}

int
getc(Biobuf *b)
{
        cno++;
        return Bgetrune(b);
}

void
ungetc(Biobuf *b)
{
        cno--;
        Bungetrune(b);
}

char*
getline(Biobuf *b)
{
        int i, c;

        for(i=0; i<sizeof buf; i++){
                c = getc(b);
                if(c == Beof)
                        return nil;
                buf[i] = c;
                if(c == '\n'){
                        buf[i] = '\0';
                        break;
                }
        }
        return buf;
}

int
getnum(Biobuf *b)
{
        int i, c;

        i = 0;
        for(;;){
                c = getc(b);
                if(c<'0' || '9'<c){
                        ungetc(b);
                        break;
                }
                i = i*10 + (c-'0');
        }
        return i;
}

char*
getstr(Biobuf *b)
{
        int i, c;

        for(i=0; i<sizeof buf; i++){
                /* must get bytes not runes */
                cno++;
                c = Bgetc(b);
                if(c == Beof)
                        return nil;
                buf[i] = c;
                if(c == '\n' || c==' ' || c=='\t'){
                        ungetc(b);
                        buf[i] = '\0';
                        break;
                }
        }
        return buf;
}

int
setnum(Biobuf *b, char *name, int min, int max)
{
        int i;

        i = getnum(b);
        if(debug > 2)
                fprint(2, "set %s = %d\n", name, i);
        if(min<=i && i<max)
                return i;
        sysfatal("value of %s is %d; min %d max %d at %s:#%d", name, i, min, max, filename, cno);
        return i;
}

void
xcmd(Biobuf *b)
{
        char *p, *fld[16], buf[1024];

        int i, nfld;

        p = getline(b);
        if(p == nil)
                sysfatal("xcmd error: %r");
        if(debug)
                fprint(2, "x command '%s'\n", p);
        nfld = tokenize(p, fld, nelem(fld));
        if(nfld == 0)
                return;
        switch(fld[0][0]){
        case 'f':
                /* mount font */
                if(nfld != 3)
                        break;
                i = atoi(fld[1]);
                if(i<0 || Nfont<=i)
                        sysfatal("font %d out of range at %s:#%d", i, filename, cno);
                mountfont(i, fld[2]);
                return;
        case 'i':
                /* init */
                return;
        case 'r':
                if(nfld<2 || atoi(fld[1])!=res)
                        sysfatal("typesetter has unexpected resolution %s", fld[1]? fld[1] : "<unspecified>");
                return;
        case 's':
                /* stop */
                return;
        case 't':
                /* trailer */
                return;
        case 'T':
                if(nfld!=2 || strcmp(fld[1], "utf")!=0)
                        sysfatal("output for unknown typesetter type %s", fld[1]);
                return;
        case 'X':
                if(nfld<3 || strcmp(fld[1], "html")!=0)
                        break;
                /* is it a man reference of the form cp(1)? */
                /* X manref start/end cp (1) */
                if(nfld==6 && strcmp(fld[2], "manref")==0){
                        /* was the right macro; is it the right form? */
                        if(strlen(fld[5])>=3 &&
                           fld[5][0]=='(' && fld[5][2]==')' &&
                           '0'<=fld[5][1] && fld[5][1]<='9'){
                                if(strcmp(fld[3], "start") == 0){
                                        /* set anchor attribute and remember string */
                                        attr |= (1<<Anchor);
                                        snprint(buf, sizeof buf,
                                                "<a href=\"/magic/man2html/%c/%s\">",
                                                fld[5][1], fld[4]);
                                        nanchors++;
                                        anchors = erealloc(anchors, nanchors*sizeof(char*));
                                        anchors[nanchors-1] = estrdup(buf);
                                }else if(strcmp(fld[3], "end") == 0)
                                        attr &= ~(1<<Anchor);
                        }
                }else if(strcmp(fld[2], "manPP") == 0){
                        didP = 1;
                        emitchar(Epp);
                }else if(nfld<4 || strcmp(fld[2], "manref")!=0){
                        if(nfld>2 && strcmp(fld[2], "<P>")==0){ /* avoid triggering extra <br> */
                                didP = 1;
                                /* clear all font attributes before paragraph */
                                emitchar(' ' | (attr & ~(0xFFFF|((1<<Italic)|(1<<Bold)|(1<<CW)))));
                                emitstr("<P>");
                                /* next emittec char will turn font attributes back on */
                        }else if(nfld>2 && strcmp(fld[2], "<H4>")==0)
                                attr |= (1<<Heading);
                        else if(nfld>2 && strcmp(fld[2], "</H4>")==0)
                                attr &= ~(1<<Heading);
                        else if(debug)
                                fprint(2, "unknown in-line html %s... at %s:%#d\n",
                                        fld[2], filename, cno);
                }
                return;
        }
        if(debug)
                fprint(2, "unknown or badly formatted x command %s\n", fld[0]);
}

int
lookup(int c, Htmlchar tab[], int ntab)
{
        int low, high, mid;

        low = 0;
        high = ntab - 1;
        while(low <= high){
                mid = (low+high)/2;
                if(c < tab[mid].value)
                        high = mid - 1;
                else if(c > tab[mid].value)
                        low = mid + 1;
                else
                        return mid;
        }
        return -1;      /* no match */
}

void
emithtmlchar(int r)
{
        static char buf[10];
        int i;

        i = lookup(r, htmlchars, nelem(htmlchars));
        if(i >= 0)
                emitstr(htmlchars[i].name);
        else
                emit(r);
}

char*
troffchar(char *s)
{
        int i;

        for(i=0; troffchars[i].name!=nil; i++)
                if(strcmp(s, troffchars[i].name) == 0)
                        return troffchars[i].value;
        return "??";
}

void
indent(void)
{
        int nind;

        didP = 0;
        if(atnewline){
                if(hp != prevlineH){
                        prevlineH = hp;
                        /* these most peculiar numbers appear in the troff -man output */
                        nind = ((prevlineH-1*res)+323)/324;
                        attr &= ~((1<<Indent1)|(1<<Indent2)|(1<<Indent3));
                        if(nind >= 1)
                                attr |= (1<<Indent1);
                        if(nind >= 2)
                                attr |= (1<<Indent2);
                        if(nind >= 3)
                                attr |= (1<<Indent3);
                }
                atnewline = 0;
        }
}

void
process(Biobuf *b, char *name)
{
        int c, r, v, i;
        char *p;

        cno = 0;
        prevlineH = res;
        filename = name;
        for(;;){
                c = getc(b);
                switch(c){
                case Beof:
                        /* go to ground state */
                        attr = 0;
                        emit('\n');
                        return;
                case '\n':
                        break;
                case '0': case '1': case '2': case '3': case '4':
                case '5': case '6': case '7': case '8': case '9':
                        v = c-'0';
                        c = getc(b);
                        if(c<'0' || '9'<c)
                                sysfatal("illegal character motion at %s:#%d", filename, cno);
                        v = v*10 + (c-'0');
                        hp += v;
                        /* fall through to character case */
                case 'c':
                        indent();
                        r = getc(b);
                        emithtmlchar(r);
                        break;
                case 'D':
                        /* draw line; ignore */
                        do
                                c = getc(b);
                        while(c!='\n' && c!= Beof);
                        break;
                case 'f':
                        v = setnum(b, "font", 0, Nfont);
                        switchfont(v);
                        break;
                case 'h':
                        v = setnum(b, "hpos", -20000, 20000);
                        /* generate spaces if motion is large and within a line */
                        if(!atnewline && v>2*72)
                                for(i=0; i<v; i+=72)
                                        emitstr("&nbsp;");
                        hp += v;
                        break;
                case 'n':
                        setnum(b, "n1", -10000, 10000);
                        //Bprint(&bout, " N1=%d", v);
                        getc(b);        /* space separates */
                        setnum(b, "n2", -10000, 10000);
                        atnewline = 1;
                        if(!didP && hp < (Wid-1)*res)   /* if line is less than 19" long, probably need a line break */
                                emitstr("<br>");
                        emit('\n');
                        break;
                case 'p':
                        page = setnum(b, "ps", -10000, 10000);
                        break;
                case 's':
                        ps = setnum(b, "ps", 1, 1000);
                        break;
                case 'v':
                        vp += setnum(b, "vpos", -10000, 10000);
                        /* BUG: ignore motion */
                        break;
                case 'x':
                        xcmd(b);
                        break;
                case 'w':
                        emit(' ');
                        break;
                case 'C':
                        indent();
                        p = getstr(b);
                        emitstr(troffchar(p));
                        break;
                case 'H':
                        hp = setnum(b, "hpos", 0, 20000);
                        //Bprint(&bout, " H=%d ", hp);
                        break;
                case 'V':
                        vp = setnum(b, "vpos", 0, 10000);
                        break;
                default:
                        fprint(2, "dhtml: unknown directive %c(0x%.2ux) at %s:#%d\n", c, c, filename, cno);
                        return;
                }
        }
}

HTMLfont*
htmlfont(char *name)
{
        int i;

        for(i=0; htmlfonts[i].name!=nil; i++)
                if(strcmp(name, htmlfonts[i].name) == 0)
                        return &htmlfonts[i];
        return &htmlfonts[0];
}

void
mountfont(int pos, char *name)
{
        if(debug)
                fprint(2, "mount font %s on %d\n", name, pos);
        if(font[pos] != nil){
                free(font[pos]->name);
                free(font[pos]);
        }
        font[pos] = emalloc(sizeof(Font));
        font[pos]->name = estrdup(name);
        font[pos]->htmlfont = htmlfont(name);
}

void
switchfont(int pos)
{
        HTMLfont *hf;

        if(debug)
                fprint(2, "font change from %d (%s) to %d (%s)\n", ft, font[ft]->name, pos, font[pos]->name);
        if(pos == ft)
                return;
        hf = font[ft]->htmlfont;
        if(hf->bit != 0)
                attr &= ~(1<<hf->bit);
        ft = pos;
        hf = font[ft]->htmlfont;
        if(hf->bit != 0)
                attr |= (1<<hf->bit);
}