Subversion Repositories planix.SVN

Rev

Rev 2 | Details | Compare with Previous | Last modification | View Log | RSS feed

Rev Author Line No. Line
2 - 1
#include <u.h>
2
#include <libc.h>
3
#include <bio.h>
4
 
5
enum{
6
	Nfont = 11,
7
	Wid = 20,	/* tmac.anhtml sets page width to 20" so we can recognize .nf text */
8
};
9
 
10
typedef uintptr Char;
11
typedef struct Troffchar Troffchar;
12
typedef struct Htmlchar Htmlchar;
13
typedef struct Font Font;
14
typedef struct HTMLfont HTMLfont;
15
 
16
/*
17
 * a Char is >= 32 bits. low 16 bits are the rune. higher are attributes.
18
 * must be able to hold a pointer.
19
 */
20
enum
21
{
22
	Italic	=	16,
23
	Bold,
24
	CW,
25
	Indent1,
26
	Indent2,
27
	Indent3,
28
	Heading =	25,
29
	Anchor =	26,	/* must be last */
30
};
31
 
32
enum	/* magic emissions */
33
{
34
	Estring = 0,
35
	Epp = 1<<16,
36
};
37
 
38
int attrorder[] = { Indent1, Indent2, Indent3, Heading, Anchor, Italic, Bold, CW };
39
 
40
int nest[10];
41
int nnest;
42
 
43
struct Troffchar
44
{
45
	char *name;
46
	char *value;
47
};
48
 
49
struct Htmlchar
50
{
51
	char *utf;
52
	char *name;
53
	int value;
54
};
55
 
56
#include "chars.h"
57
 
58
struct Font{
59
	char		*name;
60
	HTMLfont	*htmlfont;
61
};
62
 
63
struct HTMLfont{
64
	char	*name;
65
	char	*htmlname;
66
	int	bit;
67
};
68
 
69
/* R must be first; it's the default representation for fonts we don't recognize */
70
HTMLfont htmlfonts[] =
71
{
72
	"R",		nil,	0,
73
	"LucidaSans",	nil,	0,
74
	"I",		"i",	Italic,
75
	"LucidaSansI",	"i",	Italic,
76
	"CW",		"tt",	CW,
77
	"LucidaCW",	"tt",	CW,
78
	nil,	nil,
79
};
80
 
81
#define TABLE "<table border=0 cellpadding=0 cellspacing=0>"
82
 
83
char*
84
onattr[8*sizeof(int)] =
85
{
86
	0, 0, 0, 0, 0, 0, 0, 0,
87
	0, 0, 0, 0, 0, 0, 0, 0,
88
	"<i>",			/* italic */
89
	"<b>",			/* bold */
90
	"<tt><font size=+1>",	/* cw */
91
	"<+table border=0 cellpadding=0 cellspacing=0><tr height=2><td><tr><td width=20><td>\n",		/* indent1 */
92
	"<+table border=0 cellpadding=0 cellspacing=0><tr height=2><td><tr><td width=20><td>\n",		/* indent2 */
93
	"<+table border=0 cellpadding=0 cellspacing=0><tr height=2><td><tr><td width=20><td>\n",		/* indent3 */
94
	0,
95
	0,
96
	0,
97
	"<p><font size=+1><b>",	/* heading 25 */
98
	"<unused>",		/* anchor 26 */
99
};
100
 
101
char*
102
offattr[8*sizeof(int)] =
103
{
104
	0, 0, 0, 0, 0, 0, 0, 0,
105
	0, 0, 0, 0, 0, 0, 0, 0,
106
	"</i>",			/* italic */
107
	"</b>",			/* bold */
108
	"</font></tt>",		/* cw */
109
	"<-/table>",		/* indent1 */
110
	"<-/table>",		/* indent2 */
111
	"<-/table>",		/* indent3 */
112
	0,
113
	0,
114
	0,
115
	"</b></font>",		/* heading 25 */
116
	"</a>",			/* anchor 26 */
117
};
118
 
119
Font	*font[Nfont];
120
 
121
Biobuf	bout;
122
int	debug = 0;
123
 
124
/* troff state */
125
int	page = 1;
126
int	ft = 1;
127
int	vp = 0;
128
int	hp = 0;
129
int	ps = 1;
130
int	res = 720;
131
 
132
int	didP = 0;
133
int	atnewline = 1;
134
int	prevlineH = 0;
135
Char	attr = 0;	/* or'ed into each Char */
136
 
137
Char	*chars;
138
int	nchars;
139
int	nalloc;
140
char**	anchors;	/* allocated in order */
141
int	nanchors;
142
 
143
char	*filename;
144
int	cno;
145
char	buf[8192];
146
char	*title = "Plan 9 man page";
147
 
148
void	process(Biobuf*, char*);
149
void	mountfont(int, char*);
150
void	switchfont(int);
151
void	header(char*);
152
void	flush(void);
153
void	trailer(void);
154
 
155
void*
156
emalloc(ulong n)
157
{
158
	void *p;
159
 
160
	p = malloc(n);
161
	if(p == nil)
162
		sysfatal("malloc failed: %r");
163
	return p;
164
}
165
 
166
void*
167
erealloc(void *p, ulong n)
168
{
169
 
170
	p = realloc(p, n);
171
	if(p == nil)
172
		sysfatal("realloc failed: %r");
173
	return p;
174
}
175
 
176
char*
177
estrdup(char *s)
178
{
179
	char *t;
180
 
181
	t = strdup(s);
182
	if(t == nil)
183
		sysfatal("strdup failed: %r");
184
	return t;
185
}
186
 
187
void
188
usage(void)
189
{
190
	fprint(2, "usage: troff2html [-d] [-t title] [file ...]\n");
191
	exits("usage");
192
}
193
 
194
int
195
hccmp(const void *va, const void *vb)
196
{
197
	Htmlchar *a, *b;
198
 
199
	a = (Htmlchar*)va;
200
	b = (Htmlchar*)vb;
201
	return a->value - b->value;
202
}
203
 
204
void
205
main(int argc, char *argv[])
206
{
207
	int i;
208
	Biobuf in, *inp;
209
	Rune r;
210
 
211
	for(i=0; i<nelem(htmlchars); i++){
212
		chartorune(&r, htmlchars[i].utf);
213
		htmlchars[i].value = r;
214
	}
215
	qsort(htmlchars, nelem(htmlchars), sizeof(htmlchars[0]), hccmp);
216
 
217
	ARGBEGIN{
218
	case 't':
219
		title = ARGF();
220
		if(title == nil)
221
			usage();
222
		break;
223
	case 'd':
224
		debug++;
225
		break;
226
	default:
227
		usage();
228
	}ARGEND
229
 
230
	Binit(&bout, 1, OWRITE);
231
	if(argc == 0){
232
		header(title);
233
		Binit(&in, 0, OREAD);
234
		process(&in, "<stdin>");
235
	}else{
236
		header(title);
237
		for(i=0; i<argc; i++){
238
			inp = Bopen(argv[i], OREAD);
239
			if(inp == nil)
240
				sysfatal("can't open %s: %r", argv[i]);
241
			process(inp, argv[i]);
242
			Bterm(inp);
243
		}
244
	}
245
	flush();
246
	trailer();
247
	exits(nil);
248
}
249
 
250
void
251
emitchar(Char c)
252
{
253
	if(nalloc == nchars){
254
		nalloc += 10000;
255
		chars = realloc(chars, nalloc*sizeof(chars[0]));
256
		if(chars == nil)
257
			sysfatal("malloc failed: %r");
258
	}
259
	chars[nchars++] = c;
260
}
261
 
262
void
263
emit(Rune r)
264
{
265
	emitchar(r | attr);
266
	/*
267
	 * Close man page references early, so that 
268
	 * .IR proof (1),
269
	 * doesn't make the comma part of the link.
270
	 */
271
	if(r == ')')
272
		attr &= ~(1<<Anchor);
273
}
274
 
275
void
276
emitstr(char *s)
277
{
278
	emitchar(Estring);
279
	emitchar((Char)s);
280
}
281
 
282
int indentlevel;
283
int linelen;
284
 
285
void
286
iputrune(Biobuf *b, Rune r)
287
{
288
	int i;
289
 
290
	if(linelen++ > 60 && r == ' ')
291
		r = '\n';
292
	Bputrune(b, r);
293
	if(r == '\n'){
294
		for(i=0; i<indentlevel; i++)
295
			Bprint(b, "    ");
296
		linelen = 0;
297
	}
298
}
299
 
300
void
301
iputs(Biobuf *b, char *s)
302
{
303
	if(s[0]=='<' && s[1]=='+'){
304
		iputrune(b, '\n');
305
		Bprint(b, "<%s", s+2);
306
		indentlevel++;
307
		iputrune(b, '\n');
308
	}else if(s[0]=='<' && s[1]=='-'){
309
		indentlevel--;
310
		iputrune(b, '\n');
311
		Bprint(b, "<%s", s+2);
312
		iputrune(b, '\n');
313
	}else
314
		Bprint(b, "%s", s);
315
}
316
 
317
void
318
setattr(Char a)
319
{
320
	Char on, off;
321
	int i, j;
322
 
323
	on = a & ~attr;
324
	off = attr & ~a;
325
 
326
	/* walk up the nest stack until we reach something we need to turn off. */
327
	for(i=0; i<nnest; i++)
328
		if(off&(1<<nest[i]))
329
			break;
330
 
331
	/* turn off everything above that */
332
	for(j=nnest-1; j>=i; j--)
333
		iputs(&bout, offattr[nest[j]]);
334
 
335
	/* turn on everything we just turned off but didn't want to */
336
	for(j=i; j<nnest; j++)
337
		if(a&(1<<nest[j]))
338
			iputs(&bout, onattr[nest[j]]);
339
		else
340
			nest[j] = 0;
341
 
342
	/* shift the zeros (turned off things) up */
343
	for(i=j=0; i<nnest; i++)
344
		if(nest[i] != 0)
345
			nest[j++] = nest[i];
346
	nnest = j;
347
 
348
	/* now turn on the new attributes */
349
	for(i=0; i<nelem(attrorder); i++){
350
		j = attrorder[i];
351
		if(on&(1<<j)){
352
			if(j == Anchor)
353
				onattr[j] = anchors[nanchors++];
354
			iputs(&bout, onattr[j]);
355
			if(nnest >= nelem(nest))
356
				sysfatal("nesting too deep");
357
			nest[nnest++] = j;
358
		}
359
	}
360
	attr = a;
361
}
362
 
363
void
364
flush(void)
365
{
366
	int i;
367
	Char c, a;
368
 
369
	nanchors = 0;
370
	for(i=0; i<nchars; i++){
371
		c = chars[i];
372
		if(c == Estring){
373
			/* next word is string to print */
374
			iputs(&bout, (char*)chars[++i]);
375
			continue;
376
		}
377
		if(c == Epp){
378
			iputrune(&bout, '\n');
379
			iputs(&bout, TABLE "<tr height=5><td></table>");
380
			iputrune(&bout, '\n');
381
			continue;
382
		}
383
		a = c & ~0xFFFF;
384
		c &= 0xFFFF;
385
		/*
386
		 * If we're going to something off after a space,
387
		 * let's just turn it off before.
388
		 */
389
		if(c == ' ' && i<nchars-1 && (chars[i+1]&0xFFFF) >= 32)
390
			a ^= a & ~chars[i+1];
391
		setattr(a);
392
		iputrune(&bout, c & 0xFFFF);
393
	}
394
}
395
 
396
void
397
header(char *s)
398
{
399
	Bprint(&bout, "<head>\n");
400
	Bprint(&bout, "<title>%s</title>\n", s);
401
	Bprint(&bout, "<meta content=\"text/html; charset=utf-8\" http-equiv=Content-Type>\n");
402
	Bprint(&bout, "</head>\n");
403
	Bprint(&bout, "<body bgcolor=#ffffff>\n");
404
}
405
 
406
void
407
trailer(void)
408
{
409
 
410
#ifdef LUCENT
411
	Tm *t;
412
	t = localtime(time(nil));
413
	Bprint(&bout, TABLE "<tr height=20><td></table>\n");
414
	Bprint(&bout, "<font size=-1><a href=\"http://www.lucent.com/copyright.html\">\n");
415
	Bprint(&bout, "Copyright</A> &#169; %d Alcatel-Lucent.  All rights reserved.</font>\n", t->year+1900);
416
#endif
417
	Bprint(&bout, "</body></html>\n");
418
}
419
 
420
int
421
getc(Biobuf *b)
422
{
423
	cno++;
424
	return Bgetrune(b);
425
}
426
 
427
void
428
ungetc(Biobuf *b)
429
{
430
	cno--;
431
	Bungetrune(b);
432
}
433
 
434
char*
435
getline(Biobuf *b)
436
{
437
	int i, c;
438
 
439
	for(i=0; i<sizeof buf; i++){
440
		c = getc(b);
441
		if(c == Beof)
442
			return nil;
443
		buf[i] = c;
444
		if(c == '\n'){
445
			buf[i] = '\0';
446
			break;
447
		}
448
	}
449
	return buf;
450
}
451
 
452
int
453
getnum(Biobuf *b)
454
{
455
	int i, c;
456
 
457
	i = 0;
458
	for(;;){
459
		c = getc(b);
460
		if(c<'0' || '9'<c){
461
			ungetc(b);
462
			break;
463
		}
464
		i = i*10 + (c-'0');
465
	}
466
	return i;
467
}
468
 
469
char*
470
getstr(Biobuf *b)
471
{
472
	int i, c;
473
 
474
	for(i=0; i<sizeof buf; i++){
475
		/* must get bytes not runes */
476
		cno++;
477
		c = Bgetc(b);
478
		if(c == Beof)
479
			return nil;
480
		buf[i] = c;
481
		if(c == '\n' || c==' ' || c=='\t'){
482
			ungetc(b);
483
			buf[i] = '\0';
484
			break;
485
		}
486
	}
487
	return buf;
488
}
489
 
490
int
491
setnum(Biobuf *b, char *name, int min, int max)
492
{
493
	int i;
494
 
495
	i = getnum(b);
496
	if(debug > 2)
497
		fprint(2, "set %s = %d\n", name, i);
498
	if(min<=i && i<max)
499
		return i;
500
	sysfatal("value of %s is %d; min %d max %d at %s:#%d", name, i, min, max, filename, cno);
501
	return i;
502
}
503
 
504
void
505
xcmd(Biobuf *b)
506
{
507
	char *p, *fld[16], buf[1024];
508
 
509
	int i, nfld;
510
 
511
	p = getline(b);
512
	if(p == nil)
513
		sysfatal("xcmd error: %r");
514
	if(debug)
515
		fprint(2, "x command '%s'\n", p);
516
	nfld = tokenize(p, fld, nelem(fld));
517
	if(nfld == 0)
518
		return;
519
	switch(fld[0][0]){
520
	case 'f':
521
		/* mount font */
522
		if(nfld != 3)
523
			break;
524
		i = atoi(fld[1]);
525
		if(i<0 || Nfont<=i)
526
			sysfatal("font %d out of range at %s:#%d", i, filename, cno);
527
		mountfont(i, fld[2]);
528
		return;
529
	case 'i':
530
		/* init */
531
		return;
532
	case 'r':
533
		if(nfld<2 || atoi(fld[1])!=res)
534
			sysfatal("typesetter has unexpected resolution %s", fld[1]? fld[1] : "<unspecified>");
535
		return;
536
	case 's':
537
		/* stop */
538
		return;
539
	case 't':
540
		/* trailer */
541
		return;
542
	case 'T':
543
		if(nfld!=2 || strcmp(fld[1], "utf")!=0)
544
			sysfatal("output for unknown typesetter type %s", fld[1]);
545
		return;
546
	case 'X':
547
		if(nfld<3 || strcmp(fld[1], "html")!=0)
548
			break;
549
		/* is it a man reference of the form cp(1)? */
550
		/* X manref start/end cp (1) */
551
		if(nfld==6 && strcmp(fld[2], "manref")==0){
552
			/* was the right macro; is it the right form? */
553
			if(strlen(fld[5])>=3 &&
554
			   fld[5][0]=='(' && fld[5][2]==')' &&
555
			   '0'<=fld[5][1] && fld[5][1]<='9'){
556
				if(strcmp(fld[3], "start") == 0){
557
					/* set anchor attribute and remember string */
558
					attr |= (1<<Anchor);
559
					snprint(buf, sizeof buf,
560
						"<a href=\"/magic/man2html/%c/%s\">",
561
						fld[5][1], fld[4]);
562
					nanchors++;
563
					anchors = erealloc(anchors, nanchors*sizeof(char*));
564
					anchors[nanchors-1] = estrdup(buf);
565
				}else if(strcmp(fld[3], "end") == 0)
566
					attr &= ~(1<<Anchor);
567
			}
568
		}else if(strcmp(fld[2], "manPP") == 0){
569
			didP = 1;
570
			emitchar(Epp);
571
		}else if(nfld<4 || strcmp(fld[2], "manref")!=0){
572
			if(nfld>2 && strcmp(fld[2], "<P>")==0){	/* avoid triggering extra <br> */
573
				didP = 1;
574
				/* clear all font attributes before paragraph */
575
				emitchar(' ' | (attr & ~(0xFFFF|((1<<Italic)|(1<<Bold)|(1<<CW)))));
576
				emitstr("<P>");
577
				/* next emittec char will turn font attributes back on */
578
			}else if(nfld>2 && strcmp(fld[2], "<H4>")==0)
579
				attr |= (1<<Heading);
580
			else if(nfld>2 && strcmp(fld[2], "</H4>")==0)
581
				attr &= ~(1<<Heading);
582
			else if(debug)
583
				fprint(2, "unknown in-line html %s... at %s:%#d\n",
584
					fld[2], filename, cno);
585
		}
586
		return;
587
	}
588
	if(debug)
589
		fprint(2, "unknown or badly formatted x command %s\n", fld[0]);
590
}
591
 
592
int
593
lookup(int c, Htmlchar tab[], int ntab)
594
{
595
	int low, high, mid;
596
 
597
	low = 0;
598
	high = ntab - 1;
599
	while(low <= high){
600
		mid = (low+high)/2;
601
		if(c < tab[mid].value)
602
			high = mid - 1;
603
		else if(c > tab[mid].value)
604
			low = mid + 1;
605
		else
606
			return mid;
607
	}
608
	return -1;	/* no match */
609
}
610
 
611
void
612
emithtmlchar(int r)
613
{
614
	static char buf[10];
615
	int i;
616
 
617
	i = lookup(r, htmlchars, nelem(htmlchars));
618
	if(i >= 0)
619
		emitstr(htmlchars[i].name);
620
	else
621
		emit(r);
622
}
623
 
624
char*
625
troffchar(char *s)
626
{
627
	int i;
628
 
629
	for(i=0; troffchars[i].name!=nil; i++)
630
		if(strcmp(s, troffchars[i].name) == 0)
631
			return troffchars[i].value;
632
	return "??";
633
}
634
 
635
void
636
indent(void)
637
{
638
	int nind;
639
 
640
	didP = 0;
641
	if(atnewline){
642
		if(hp != prevlineH){
643
			prevlineH = hp;
644
			/* these most peculiar numbers appear in the troff -man output */
645
			nind = ((prevlineH-1*res)+323)/324;
646
			attr &= ~((1<<Indent1)|(1<<Indent2)|(1<<Indent3));
647
			if(nind >= 1)
648
				attr |= (1<<Indent1);
649
			if(nind >= 2)
650
				attr |= (1<<Indent2);
651
			if(nind >= 3)
652
				attr |= (1<<Indent3);
653
		}
654
		atnewline = 0;
655
	}
656
}
657
 
658
void
659
process(Biobuf *b, char *name)
660
{
661
	int c, r, v, i;
662
	char *p;
663
 
664
	cno = 0;
665
	prevlineH = res;
666
	filename = name;
667
	for(;;){
668
		c = getc(b);
669
		switch(c){
670
		case Beof:
671
			/* go to ground state */
672
			attr = 0;
673
			emit('\n');
674
			return;
675
		case '\n':
676
			break;
677
		case '0': case '1': case '2': case '3': case '4':
678
		case '5': case '6': case '7': case '8': case '9':
679
			v = c-'0';
680
			c = getc(b);
681
			if(c<'0' || '9'<c)
682
				sysfatal("illegal character motion at %s:#%d", filename, cno);
683
			v = v*10 + (c-'0');
684
			hp += v;
685
			/* fall through to character case */
686
		case 'c':
687
			indent();
688
			r = getc(b);
689
			emithtmlchar(r);
690
			break;
691
		case 'D':
692
			/* draw line; ignore */
693
			do
694
				c = getc(b);
695
			while(c!='\n' && c!= Beof);
696
			break;
697
		case 'f':
698
			v = setnum(b, "font", 0, Nfont);
699
			switchfont(v);
700
			break;
701
		case 'h':
702
			v = setnum(b, "hpos", -20000, 20000);
703
			/* generate spaces if motion is large and within a line */
704
			if(!atnewline && v>2*72)
705
				for(i=0; i<v; i+=72)
706
					emitstr("&nbsp;");
707
			hp += v;
708
			break;
709
		case 'n':
710
			setnum(b, "n1", -10000, 10000);
711
			//Bprint(&bout, " N1=%d", v);
712
			getc(b);	/* space separates */
713
			setnum(b, "n2", -10000, 10000);
714
			atnewline = 1;
715
			if(!didP && hp < (Wid-1)*res)	/* if line is less than 19" long, probably need a line break */
716
				emitstr("<br>");
717
			emit('\n');
718
			break;
719
		case 'p':
720
			page = setnum(b, "ps", -10000, 10000);
721
			break;
722
		case 's':
723
			ps = setnum(b, "ps", 1, 1000);
724
			break;
725
		case 'v':
726
			vp += setnum(b, "vpos", -10000, 10000);
727
			/* BUG: ignore motion */
728
			break;
729
		case 'x':
730
			xcmd(b);
731
			break;
732
		case 'w':
733
			emit(' ');
734
			break;
735
		case 'C':
736
			indent();
737
			p = getstr(b);
738
			emitstr(troffchar(p));
739
			break;
740
		case 'H':
741
			hp = setnum(b, "hpos", 0, 20000);
742
			//Bprint(&bout, " H=%d ", hp);
743
			break;
744
		case 'V':
745
			vp = setnum(b, "vpos", 0, 10000);
746
			break;
747
		default:
748
			fprint(2, "dhtml: unknown directive %c(0x%.2ux) at %s:#%d\n", c, c, filename, cno);
749
			return;
750
		}
751
	}
752
}
753
 
754
HTMLfont*
755
htmlfont(char *name)
756
{
757
	int i;
758
 
759
	for(i=0; htmlfonts[i].name!=nil; i++)
760
		if(strcmp(name, htmlfonts[i].name) == 0)
761
			return &htmlfonts[i];
762
	return &htmlfonts[0];
763
}
764
 
765
void
766
mountfont(int pos, char *name)
767
{
768
	if(debug)
769
		fprint(2, "mount font %s on %d\n", name, pos);
770
	if(font[pos] != nil){
771
		free(font[pos]->name);
772
		free(font[pos]);
773
	}
774
	font[pos] = emalloc(sizeof(Font));
775
	font[pos]->name = estrdup(name);
776
	font[pos]->htmlfont = htmlfont(name);
777
}
778
 
779
void
780
switchfont(int pos)
781
{
782
	HTMLfont *hf;
783
 
784
	if(debug)
785
		fprint(2, "font change from %d (%s) to %d (%s)\n", ft, font[ft]->name, pos, font[pos]->name);
786
	if(pos == ft)
787
		return;
788
	hf = font[ft]->htmlfont;
789
	if(hf->bit != 0)
790
		attr &= ~(1<<hf->bit);
791
	ft = pos;
792
	hf = font[ft]->htmlfont;
793
	if(hf->bit != 0)
794
		attr |= (1<<hf->bit);
795
}