Warning: Attempt to read property "date" on null in /usr/local/www/websvn.planix.org/blame.php on line 247

Warning: Attempt to read property "msg" on null in /usr/local/www/websvn.planix.org/blame.php on line 247
WebSVN – planix.SVN – Blame – /os/branches/feature_tlsv12/sys/src/cmd/htmlfmt/html.c – Rev 2

Subversion Repositories planix.SVN

Rev

Go to most recent revision | Details | Last modification | View Log | RSS feed

Rev Author Line No. Line
2 - 1
#include <u.h>
2
#include <libc.h>
3
#include <bio.h>
4
#include <draw.h>
5
#include <regexp.h>
6
#include <html.h>
7
#include <ctype.h>
8
#include "dat.h"
9
 
10
char urlexpr[] =
11
	"^(https?|ftp|file|gopher|mailto|news|nntp|telnet|wais|prospero)"
12
	"://([a-zA-Z0-9_@\\-]+([.:][a-zA-Z0-9_@\\-]+)*)";
13
Reprog	*urlprog;
14
 
15
int newitextitem;
16
int inword = 0;
17
int col = 0;
18
int wordi = 0;
19
 
20
char*
21
loadhtml(int fd)
22
{
23
	URLwin *u;
24
	Bytes *b;
25
	int n;
26
	char buf[4096];
27
 
28
	u = emalloc(sizeof(URLwin));
29
	u->infd = fd;
30
	u->outfd = 1;
31
	u->url = estrdup(url);
32
	u->type = TextHtml;
33
 
34
	b = emalloc(sizeof(Bytes));
35
	while((n = read(fd, buf, sizeof buf)) > 0)
36
		growbytes(b, buf, n);
37
	if(b->b == nil)
38
		return nil;	/* empty file */
39
	rendertext(u, b);
40
	freeurlwin(u);
41
	return nil;
42
}
43
 
44
char*
45
runetobyte(Rune *r, int n)
46
{
47
	char *s;
48
 
49
	if(n == 0)
50
		return emalloc(1);
51
	s = smprint("%.*S", n, r);
52
	if(s == nil)
53
		error("malloc failed");
54
	return s;
55
}
56
 
57
int
58
closingpunct(char c)
59
{
60
	return strchr(".,:;'\")]}>!?", c) != nil;
61
}
62
 
63
void
64
emitword(Bytes *b, Rune *r, int nr)
65
{
66
	char *s;
67
	int space;
68
 
69
	if(nr == 0)
70
		return;
71
	s = smprint("%.*S", nr, r);
72
	space = b->n > 0 && !isspace(b->b[b->n-1]) && (!newitextitem || !closingpunct(*s));
73
	if(col > 0 && col+space+nr > width){
74
		growbytes(b, "\n", 1);
75
		space = 0;
76
		col = 0;
77
	}
78
	if(space && col > 0){
79
		growbytes(b, " ", 1);
80
		col++;
81
	}
82
	growbytes(b, s, strlen(s));
83
	col += nr;
84
	free(s);
85
	inword = 0;
86
	newitextitem = 0;
87
}
88
 
89
void
90
renderrunes(Bytes *b, Rune *r)
91
{
92
	int i, n;
93
 
94
	newitextitem = 1;
95
 
96
	n = runestrlen(r);
97
	for(i=0; i<n; i++){
98
		switch(r[i]){
99
		case '\n':
100
			if(inword)
101
				emitword(b, r+wordi, i-wordi);
102
			col = 0;
103
			if(b->n == 0)
104
				break;	/* don't start with blank lines */
105
			if(b->n<2 || b->b[b->n-1]!='\n' || b->b[b->n-2]!='\n')
106
				growbytes(b, "\n", 1);
107
			break;
108
		case ' ':
109
			if(inword)
110
				emitword(b, r+wordi, i-wordi);
111
			break;
112
		default:
113
			if(!inword)
114
				wordi = i;
115
			inword = 1;
116
			break;
117
		}
118
	}
119
	if(inword)
120
		emitword(b, r+wordi, i-wordi);
121
}
122
 
123
void
124
renderbytes(Bytes *b, char *fmt, ...)
125
{
126
	Rune *r;
127
	va_list arg;
128
 
129
	va_start(arg, fmt);
130
	r = runevsmprint(fmt, arg);
131
	va_end(arg);
132
	renderrunes(b, r);
133
	free(r);
134
}
135
 
136
char*
137
baseurl(char *url)
138
{
139
	char *base, *slash;
140
	Resub rs[10];
141
 
142
	if(url == nil)
143
		return nil;
144
	if(urlprog == nil){
145
		urlprog = regcomp(urlexpr);
146
		if(urlprog == nil)
147
			error("can't compile URL regexp");
148
	}
149
	memset(rs, 0, sizeof rs);
150
	if(regexec(urlprog, url, rs, nelem(rs)) == 0)
151
		return nil;
152
	base = estrdup(url);
153
	slash = strrchr(base, '/');
154
	if(slash!=nil && slash>=&base[rs[0].ep-rs[0].sp])
155
		*slash = '\0';
156
	else
157
		base[rs[0].ep-rs[0].sp] = '\0';
158
	return base;
159
}
160
 
161
char*
162
fullurl(URLwin *u, Rune *rhref)
163
{
164
	char *base, *href, *hrefbase;
165
	char *result;
166
 
167
	if(rhref == nil)
168
		return estrdup("NULL URL");
169
	href = runetobyte(rhref, runestrlen(rhref));
170
	hrefbase = baseurl(href);
171
	result = nil;
172
	if(hrefbase==nil && (base = baseurl(u->url))!=nil){
173
		result = estrdup(base);
174
		if(base[strlen(base)-1]!='/' && (href==nil || href[0]!='/'))
175
			result = eappend(result, "/", "");
176
		free(base);
177
	}
178
	if(href){
179
		if(result)
180
			result = eappend(result, "", href);
181
		else
182
			result = estrdup(href);
183
	}
184
	free(hrefbase);
185
	if(result == nil)
186
		return estrdup("***unknown***");
187
	return result;
188
}
189
 
190
void
191
render(URLwin *u, Bytes *t, Item *items, int curanchor)
192
{
193
	Item *il;
194
	Itext *it;
195
	Ifloat *ifl;
196
	Ispacer *is;
197
	Itable *ita;
198
	Iimage *im;
199
	Anchor *a;
200
	Table *tab;
201
	Tablecell *cell;
202
	char *href;
203
 
204
	inword = 0;
205
	col = 0;
206
	wordi = 0;
207
 
208
	for(il=items; il!=nil; il=il->next){
209
		if(il->state & IFbrk)
210
			renderbytes(t, "\n");
211
		if(il->state & IFbrksp)
212
			renderbytes(t, "\n");
213
 
214
		switch(il->tag){
215
		case Itexttag:
216
			it = (Itext*)il;
217
			if(it->state & IFwrap)
218
				renderrunes(t, it->s);
219
			else {
220
				newitextitem = 1;
221
				emitword(t, it->s, runestrlen(it->s));
222
			}
223
			break;
224
		case Iruletag:
225
			if(t->n>0 && t->b[t->n-1]!='\n')
226
				renderbytes(t, "\n");
227
			renderbytes(t, "=======\n");
228
			break;
229
		case Iimagetag:
230
			if(!aflag)
231
				break;
232
			im = (Iimage*)il;
233
			if(im->imsrc){
234
				href = fullurl(u, im->imsrc);
235
				renderbytes(t, "[image %s]", href);
236
				free(href);
237
			}
238
			break;
239
		case Iformfieldtag:
240
			if(aflag)
241
				renderbytes(t, "[formfield]");
242
			break;
243
		case Itabletag:
244
			ita = (Itable*)il;
245
			tab = ita->table;
246
			for(cell=tab->cells; cell!=nil; cell=cell->next){
247
				render(u, t, cell->content, curanchor);
248
			}
249
			if(t->n>0 && t->b[t->n-1]!='\n')
250
				renderbytes(t, "\n");
251
			break;
252
		case Ifloattag:
253
			ifl = (Ifloat*)il;
254
			render(u, t, ifl->item, curanchor);
255
			break;
256
		case Ispacertag:
257
			is = (Ispacer*)il;
258
			if(is->spkind != ISPnull)
259
				renderbytes(t, " ");
260
			break;
261
		default:
262
			error("unknown item tag %d\n", il->tag);
263
		}
264
		if(il->anchorid != 0 && il->anchorid!=curanchor){
265
			for(a=u->docinfo->anchors; a!=nil; a=a->next)
266
				if(aflag && a->index == il->anchorid){
267
					href = fullurl(u, a->href);
268
					renderbytes(t, "[%s]", href);
269
					free(href);
270
					break;
271
				}
272
			curanchor = il->anchorid;
273
		}
274
	}
275
	if(t->n>0 && t->b[t->n-1]!='\n')
276
		renderbytes(t, "\n");
277
}
278
 
279
void
280
rerender(URLwin *u)
281
{
282
	Bytes *t;
283
 
284
	t = emalloc(sizeof(Bytes));
285
 
286
	render(u, t, u->items, 0);
287
 
288
	if(t->n)
289
		write(u->outfd, (char*)t->b, t->n);
290
	free(t->b);
291
	free(t);
292
}
293
 
294
/*
295
 * Somewhat of a hack.  Not a full parse, just looks for strings in the beginning
296
 * of the document (cistrstr only looks at first somewhat bytes).
297
 */
298
int
299
charset(char *s)
300
{
301
	char *meta, *emeta, *charset;
302
 
303
	if(defcharset == 0)
304
		defcharset = ISO_8859_1;
305
	meta = cistrstr(s, "<meta");
306
	if(meta == nil)
307
		return defcharset;
308
	for(emeta=meta; *emeta!='>' && *emeta!='\0'; emeta++)
309
		;
310
	charset = cistrstr(s, "charset=");
311
	if(charset == nil)
312
		return defcharset;
313
	charset += 8;
314
	if(*charset == '"')
315
		charset++;
316
	if(cistrncmp(charset, "utf-8", 5) || cistrncmp(charset, "utf8", 4))
317
		return UTF_8;
318
	return defcharset;
319
}
320
 
321
void
322
rendertext(URLwin *u, Bytes *b)
323
{
324
	Rune *rurl;
325
 
326
	rurl = toStr((uchar*)u->url, strlen(u->url), ISO_8859_1);
327
	u->items = parsehtml(b->b, b->n, rurl, u->type, charset((char*)b->b), &u->docinfo);
328
//	free(rurl);
329
 
330
	rerender(u);
331
}
332
 
333
 
334
void
335
freeurlwin(URLwin *u)
336
{
337
	freeitems(u->items);
338
	u->items = nil;
339
	freedocinfo(u->docinfo);
340
	u->docinfo = nil;
341
	free(u);
342
}