Warning: Attempt to read property "date" on null in /usr/local/www/websvn.planix.org/blame.php on line 247

Warning: Attempt to read property "msg" on null in /usr/local/www/websvn.planix.org/blame.php on line 247
WebSVN – planix.SVN – Blame – /os/branches/feature_fixcpp/sys/src/cmd/tcs/html.c – Rev 2

Subversion Repositories planix.SVN

Rev

Go to most recent revision | Details | Last modification | View Log | RSS feed

Rev Author Line No. Line
2 - 1
#include <u.h>
2
#include <libc.h>
3
#include <bio.h>
4
#include "hdr.h"
5
#include "conv.h"
6
 
7
typedef struct Hchar Hchar;
8
struct Hchar
9
{
10
	char *s;
11
	Rune r;
12
};
13
 
14
/* &lt;, &gt;, &quot;, &amp; intentionally omitted */
15
 
16
/*
17
 * Names beginning with _ are names we recognize
18
 * (without the underscore) but will not generate,
19
 * because they are nonstandard.
20
 */
21
static Hchar byname[] =
22
{
23
	{"AElig", 198},
24
	{"Aacute", 193},
25
	{"Acirc", 194},
26
	{"Agrave", 192},
27
	{"Alpha", 913},
28
	{"Aring", 197},
29
	{"Atilde", 195},
30
	{"Auml", 196},
31
	{"Beta", 914},
32
	{"Ccedil", 199},
33
	{"Chi", 935},
34
	{"Dagger", 8225},
35
	{"Delta", 916},
36
	{"ETH", 208},
37
	{"Eacute", 201},
38
	{"Ecirc", 202},
39
	{"Egrave", 200},
40
	{"Epsilon", 917},
41
	{"Eta", 919},
42
	{"Euml", 203},
43
	{"Gamma", 915},
44
	{"Iacute", 205},
45
	{"Icirc", 206},
46
	{"Igrave", 204},
47
	{"Iota", 921},
48
	{"Iuml", 207},
49
	{"Kappa", 922},
50
	{"Lambda", 923},
51
	{"Mu", 924},
52
	{"Ntilde", 209},
53
	{"Nu", 925},
54
	{"OElig", 338},
55
	{"Oacute", 211},
56
	{"Ocirc", 212},
57
	{"Ograve", 210},
58
	{"Omega", 937},
59
	{"Omicron", 927},
60
	{"Oslash", 216},
61
	{"Otilde", 213},
62
	{"Ouml", 214},
63
	{"Phi", 934},
64
	{"Pi", 928},
65
	{"Prime", 8243},
66
	{"Psi", 936},
67
	{"Rho", 929},
68
	{"Scaron", 352},
69
	{"Sigma", 931},
70
	{"THORN", 222},
71
	{"Tau", 932},
72
	{"Theta", 920},
73
	{"Uacute", 218},
74
	{"Ucirc", 219},
75
	{"Ugrave", 217},
76
	{"Upsilon", 933},
77
	{"Uuml", 220},
78
	{"Xi", 926},
79
	{"Yacute", 221},
80
	{"Yuml", 376},
81
	{"Zeta", 918},
82
	{"aacute", 225},
83
	{"acirc", 226},
84
	{"acute", 180},
85
	{"aelig", 230},
86
	{"agrave", 224},
87
	{"alefsym", 8501},
88
	{"alpha", 945},
89
	{"amp", 38},
90
	{"and", 8743},
91
	{"ang", 8736},
92
	{"aring", 229},
93
	{"asymp", 8776},
94
	{"atilde", 227},
95
	{"auml", 228},
96
	{"bdquo", 8222},
97
	{"beta", 946},
98
	{"brvbar", 166},
99
	{"bull", 8226},
100
	{"cap", 8745},
101
	{"ccedil", 231},
102
	{"cdots", 8943},
103
	{"cedil", 184},
104
	{"cent", 162},
105
	{"chi", 967},
106
	{"circ", 710},
107
	{"clubs", 9827},
108
	{"cong", 8773},
109
	{"copy", 169},
110
	{"crarr", 8629},
111
	{"cup", 8746},
112
	{"curren", 164},
113
	{"dArr", 8659},
114
	{"dagger", 8224},
115
	{"darr", 8595},
116
	{"ddots", 8945},
117
	{"deg", 176},
118
	{"delta", 948},
119
	{"diams", 9830},
120
	{"divide", 247},
121
	{"eacute", 233},
122
	{"ecirc", 234},
123
	{"egrave", 232},
124
	{"_emdash", 8212},	/* non-standard but commonly used */
125
	{"empty", 8709},
126
	{"emsp", 8195},
127
	{"_endash", 8211},	/* non-standard but commonly used */
128
	{"ensp", 8194},
129
	{"epsilon", 949},
130
	{"equiv", 8801},
131
	{"eta", 951},
132
	{"eth", 240},
133
	{"euml", 235},
134
	{"euro", 8364},
135
	{"exist", 8707},
136
	{"fnof", 402},
137
	{"forall", 8704},
138
	{"frac12", 189},
139
	{"frac14", 188},
140
	{"frac34", 190},
141
	{"frasl", 8260},
142
	{"gamma", 947},
143
	{"ge", 8805},
144
	{"gt", 62},
145
	{"hArr", 8660},
146
	{"harr", 8596},
147
	{"hearts", 9829},
148
	{"hellip", 8230},
149
	{"iacute", 237},
150
	{"icirc", 238},
151
	{"iexcl", 161},
152
	{"igrave", 236},
153
	{"image", 8465},
154
	{"infin", 8734},
155
	{"int", 8747},
156
	{"iota", 953},
157
	{"iquest", 191},
158
	{"isin", 8712},
159
	{"iuml", 239},
160
	{"kappa", 954},
161
	{"lArr", 8656},
162
	{"lambda", 955},
163
	{"lang", 9001},
164
	{"laquo", 171},
165
	{"larr", 8592},
166
	{"lceil", 8968},
167
	{"_ldots", 8230},
168
	{"ldquo", 8220},
169
	{"le", 8804},
170
	{"lfloor", 8970},
171
	{"lowast", 8727},
172
	{"loz", 9674},
173
	{"lrm", 8206},
174
	{"lsaquo", 8249},
175
	{"lsquo", 8216},
176
	{"lt", 60},
177
	{"macr", 175},
178
	{"mdash", 8212},
179
	{"micro", 181},
180
	{"middot", 183},
181
	{"minus", 8722},
182
	{"mu", 956},
183
	{"nabla", 8711},
184
	{"nbsp", 160},
185
	{"ndash", 8211},
186
	{"ne", 8800},
187
	{"ni", 8715},
188
	{"not", 172},
189
	{"notin", 8713},
190
	{"nsub", 8836},
191
	{"ntilde", 241},
192
	{"nu", 957},
193
	{"oacute", 243},
194
	{"ocirc", 244},
195
	{"oelig", 339},
196
	{"ograve", 242},
197
	{"oline", 8254},
198
	{"omega", 969},
199
	{"omicron", 959},
200
	{"oplus", 8853},
201
	{"or", 8744},
202
	{"ordf", 170},
203
	{"ordm", 186},
204
	{"oslash", 248},
205
	{"otilde", 245},
206
	{"otimes", 8855},
207
	{"ouml", 246},
208
	{"para", 182},
209
	{"part", 8706},
210
	{"permil", 8240},
211
	{"perp", 8869},
212
	{"phi", 966},
213
	{"pi", 960},
214
	{"piv", 982},
215
	{"plusmn", 177},
216
	{"pound", 163},
217
	{"prime", 8242},
218
	{"prod", 8719},
219
	{"prop", 8733},
220
	{"psi", 968},
221
	{"quad", 8193},
222
	{"quot", 34},
223
	{"rArr", 8658},
224
	{"radic", 8730},
225
	{"rang", 9002},
226
	{"raquo", 187},
227
	{"rarr", 8594},
228
	{"rceil", 8969},
229
	{"rdquo", 8221},
230
	{"real", 8476},
231
	{"reg", 174},
232
	{"rfloor", 8971},
233
	{"rho", 961},
234
	{"rlm", 8207},
235
	{"rsaquo", 8250},
236
	{"rsquo", 8217},
237
	{"sbquo", 8218},
238
	{"scaron", 353},
239
	{"sdot", 8901},
240
	{"sect", 167},
241
	{"shy", 173},
242
	{"sigma", 963},
243
	{"sigmaf", 962},
244
	{"sim", 8764},
245
	{"_sp", 8194},
246
	{"spades", 9824},
247
	{"sub", 8834},
248
	{"sube", 8838},
249
	{"sum", 8721},
250
	{"sup", 8835},
251
	{"sup1", 185},
252
	{"sup2", 178},
253
	{"sup3", 179},
254
	{"supe", 8839},
255
	{"szlig", 223},
256
	{"tau", 964},
257
	{"there4", 8756},
258
	{"theta", 952},
259
	{"thetasym", 977},
260
	{"thinsp", 8201},
261
	{"thorn", 254},
262
	{"tilde", 732},
263
	{"times", 215},
264
	{"trade", 8482},
265
	{"uArr", 8657},
266
	{"uacute", 250},
267
	{"uarr", 8593},
268
	{"ucirc", 251},
269
	{"ugrave", 249},
270
	{"uml", 168},
271
	{"upsih", 978},
272
	{"upsilon", 965},
273
	{"uuml", 252},
274
	{"_varepsilon", 8712},
275
	{"varphi", 981},
276
	{"_varpi", 982},
277
	{"varrho", 1009},
278
	{"vdots", 8942},
279
	{"_vsigma", 962},
280
	{"_vtheta", 977},
281
	{"weierp", 8472},
282
	{"xi", 958},
283
	{"yacute", 253},
284
	{"yen", 165},
285
	{"yuml", 255},
286
	{"zeta", 950},
287
	{"zwj", 8205},
288
	{"zwnj", 8204}
289
};
290
 
291
static Hchar byrune[nelem(byname)];
292
 
293
static int
294
hnamecmp(const void *va, const void *vb)
295
{
296
	Hchar *a, *b;
297
 
298
	a = (Hchar*)va;
299
	b = (Hchar*)vb;
300
	return strcmp(a->s, b->s);
301
}
302
 
303
static int
304
hrunecmp(const void *va, const void *vb)
305
{
306
	Hchar *a, *b;
307
 
308
	a = (Hchar*)va;
309
	b = (Hchar*)vb;
310
	return a->r - b->r;
311
}
312
 
313
static void
314
html_init(void)
315
{
316
	static int init;
317
	int i;
318
 
319
	if(init)
320
		return;
321
	init = 1;
322
	memmove(byrune, byname, sizeof byrune);
323
 
324
	/* Eliminate names we aren't allowed to generate. */
325
	for(i=0; i<nelem(byrune); i++){
326
		if(byrune[i].s[0] == '_'){
327
			byrune[i].r = Runeerror;
328
			byname[i].s++;
329
		}
330
	}
331
 
332
	qsort(byname, nelem(byname), sizeof byname[0], hnamecmp);
333
	qsort(byrune, nelem(byrune), sizeof byrune[0], hrunecmp);
334
}
335
 
336
static Rune
337
findbyname(char *s)
338
{
339
	Hchar *h;
340
	int n, m, x;
341
 
342
	h = byname;
343
	n = nelem(byname);
344
	while(n > 0){
345
		m = n/2;
346
		x = strcmp(h[m].s, s);
347
		if(x == 0)
348
			return h[m].r;
349
		if(x < 0){
350
			h += m+1;
351
			n -= m+1;
352
		}else
353
			n = m;
354
	}
355
	return Runeerror;
356
}
357
 
358
static char*
359
findbyrune(Rune r)
360
{
361
	Hchar *h;
362
	int n, m;
363
 
364
	if(r == Runeerror)
365
		return nil;
366
	h = byrune;
367
	n = nelem(byrune);
368
	while(n > 0){
369
		m = n/2;
370
		if(h[m].r == r)
371
			return h[m].s;
372
		if(h[m].r < r){
373
			h += m+1;
374
			n -= m+1;
375
		}else
376
			n = m;
377
	}
378
	return nil;
379
}
380
 
381
void
382
html_in(int fd, long *x, struct convert *out)
383
{
384
	char buf[100], *p;
385
	Biobuf b;
386
	Rune rbuf[N];
387
	Rune *r, *er;
388
	int c, i;
389
 
390
	USED(x);
391
 
392
	html_init();
393
	r = rbuf;
394
	er = rbuf+N;
395
	Binit(&b, fd, OREAD);
396
	while((c = Bgetrune(&b)) != Beof){
397
		if(r >= er){
398
			OUT(out, rbuf, r-rbuf);
399
			r = rbuf;
400
		}
401
		if(c == '&'){
402
			buf[0] = c;
403
			for(i=1; i<nelem(buf)-1;){
404
				c = Bgetc(&b);
405
				if(c == Beof)
406
					break;
407
				buf[i++] = c;
408
				if(strchr("; \t\r\n", c))
409
					break;
410
			}
411
			buf[i] = 0;
412
			if(buf[i-1] == ';'){
413
				buf[i-1] = 0;
414
				if((c = findbyname(buf+1)) != Runeerror){
415
					*r++ = c;
416
					continue;
417
				}
418
				buf[i-1] = ';';
419
				if(buf[1] == '#'){
420
					if(buf[2] == 'x')
421
						c = strtol(buf+3, &p, 16);
422
					else
423
						c = strtol(buf+2, &p, 10);
424
					if(*p != ';' || c >= NRUNE || c < 0)
425
						goto bad;
426
					*r++ = c;
427
					continue;
428
				}
429
			}
430
		bad:
431
			for(p=buf; p<buf+i; ){
432
				p += chartorune(r++, p);
433
				if(r >= er){
434
					OUT(out, rbuf, r-rbuf);
435
					r = rbuf;
436
				}
437
			}
438
			continue;
439
		}
440
		*r++ = c;
441
	}
442
	if(r > rbuf)
443
		OUT(out, rbuf, r-rbuf);
444
	OUT(out, rbuf, 0);
445
}
446
 
447
/*
448
 * use biobuf because can use more than UTFmax bytes per rune
449
 */
450
void
451
html_out(Rune *r, int n, long *x)
452
{
453
	char *s;
454
	Biobuf b;
455
	Rune *er;
456
 
457
	USED(x);
458
	html_init();
459
	Binit(&b, 1, OWRITE);
460
	er = r+n;
461
	for(; r<er; r++){
462
		if(*r < Runeself)
463
			Bputrune(&b, *r);
464
		else if((s = findbyrune(*r)) != nil)
465
			Bprint(&b, "&%s;", s);
466
		else
467
			Bprint(&b, "&#%d;", *r);
468
	}
469
	Bflush(&b);
470
}
471