Subversion Repositories planix.SVN

Rev

Rev 2 | Details | Compare with Previous | Last modification | View Log | RSS feed

Rev Author Line No. Line
2 - 1
#include <u.h>
2
#include <libc.h>
3
#include <bio.h>
4
#include "httpd.h"
5
#include "httpsrv.h"
6
 
7
enum{ URLmax = 65536, HINTmax = 20 };
8
#define RECIPLOG2 1.44269504089
9
 
10
char **urlname;				/* array of url strings    1,...,nurl */
11
static int nurl;
12
static uint urltab[URLmax];		/* hashstr(url)  1,...,nurl */
13
static int urlnext[URLmax];		/* index urltab of next url in chain */
14
static int urlhash[URLmax];		/* initially 0, meaning empty buckets */
15
 
16
typedef struct Hint {
17
	ushort url;
18
	uchar prob;
19
} Hint;
20
Hint *hints[URLmax];
21
uchar nhint[URLmax];
22
 
23
vlong
24
Bfilelen(void *vb)
25
{
26
	Biobuf *b;
27
	vlong n;
28
 
29
	b = vb;
30
	n = Bseek(b, 0L, 2);
31
	Bseek(b, 0L, 0);
32
	return n;
33
}
34
 
35
static uint 
36
hashstr(char* key)
37
{
38
	/* asu works better than pjw for urls */
39
	uchar *k = (unsigned char*)key;
40
	uint h = 0;
41
	while(*k!=0)
42
		h = 65599*h + *k++;
43
        return h;
44
}
45
 
46
static int
47
urllookup(uint url)
48
{
49
	/* returns +index into urltab, else -hash */
50
	int j, hash;
51
 
52
	hash = 1 + url%(URLmax-1);
53
	j = urlhash[hash];
54
	for(;;){
55
		if(j==0)
56
			return -hash;
57
		if(url==urltab[j])
58
			return j;
59
		j = urlnext[j];
60
	}
61
}
62
 
63
int
64
Bage(Biobuf *b)
65
{
66
	Dir *dir;
67
	long mtime;
68
 
69
	dir = dirfstat(Bfildes(b));
70
	if(dir != nil)
71
		mtime = dir->mtime;
72
	else
73
		mtime = 0;
74
	free(dir);
75
	return time(nil) - mtime;
76
}
77
 
78
void
79
urlinit(void)
80
{
81
	static Biobuf *b = nil;
82
	static vlong filelen = 0;
83
	vlong newlen;
84
	char *s, *arena;
85
	int i, j, n;
86
	uint url;
87
	char *file;
88
 
89
	if(filelen < 0)
90
		return;
91
	file = "/sys/log/httpd/url";
92
	if(b == nil){
93
		b = Bopen(file, OREAD); /* first time */
94
		if(b == nil){
95
			syslog(0, HTTPLOG, "no %s, abandon prefetch hints", file);
96
			filelen = -1;
97
			return;
98
		}
99
	}
100
	newlen = Bfilelen(b); /* side effect: rewinds b */
101
	if(newlen == filelen || Bage(b)<300)
102
		return;
103
	filelen = newlen;
104
	if(filelen < 0)
105
		return;
106
	if(nurl){ /* free existing tables */
107
		free(urlname[0]); /* arena */
108
		memset(urlhash,0,sizeof urlhash);
109
		memset(urlnext,0,sizeof urlnext);
110
		nurl = 0;
111
	}
112
	if(urlname==nil)
113
		urlname = (char**)ezalloc(URLmax*sizeof(*urlname));
114
	arena = (char*)ezalloc(filelen);  /* enough for all the strcpy below */
115
	i = 1;
116
	while((s=Brdline(b,'\n'))!=0){
117
		/* read lines of the form:  999 /url/path */
118
		n = Blinelen(b) - 1;
119
		if(n>2 && s[n]=='\n'){
120
			s[n] = '\0';
121
		}else{
122
			sysfatal("missing fields or newline in url-db");
123
		}
124
		j = strtoul(s,&s,10);
125
		while(*s==' ')
126
			s++;
127
		if(i++!=j)
128
			sysfatal("url-db synchronization error");
129
		url = hashstr(s);
130
		j = urllookup(url);
131
		if(j>=0)
132
			sysfatal("duplicate url");
133
		j = -j;
134
		nurl++;
135
		if(nurl>=URLmax){
136
			syslog(0, HTTPLOG, "urlinit overflow at %s",s);
137
			free(urlname[0]); /* arena */
138
			memset(urlhash,0,sizeof urlhash);
139
			memset(urlnext,0,sizeof urlnext);
140
			nurl = 0;
141
			return;
142
		}
143
		urltab[nurl] = url;
144
		urlnext[nurl] = urlhash[j];
145
		urlhash[j] = nurl;
146
		strcpy(arena,s);
147
		urlname[nurl] = arena;
148
		arena += strlen(s)+1;
149
	}
150
	syslog(0, HTTPLOG, "prefetch-hints url=%d (%.1fMB)", nurl, 1.e-6*(URLmax*sizeof(*urlname)+filelen));
151
	/* b is held open, because namespace will be chopped */
152
}
153
 
154
void
155
statsinit(void)
156
{
157
	static Biobuf *b = nil;
158
	static vlong filelen = 0;
159
	vlong newlen;
160
	int iq, n, i, nstats = 0;
161
	uchar *s, buf[3+HINTmax*3];  /* iq, n, (url,prob)... */
162
	Hint *arena, *h;
163
	char *file;
164
	static void *oldarena = nil;
165
 
166
	file = "/sys/log/httpd/pathstat";
167
	if(b == nil){
168
		if(filelen == -1)
169
			return; /* if failed first time */
170
		b = Bopen(file, OREAD); /* first time */
171
		if(b == nil){
172
			syslog(0, HTTPLOG, "no %s, abandon prefetch hints", file);
173
			filelen = -1;
174
			return;
175
		}
176
	}
177
	newlen = Bfilelen(b); /* side effect: rewinds b */
178
	if(newlen == filelen || Bage(b)<300)
179
		return;
180
	filelen = newlen;
181
	if(oldarena){
182
		free(oldarena);
183
		memset(nhint,0,sizeof nhint);
184
	}
185
	arena = (Hint*)ezalloc((filelen/3)*sizeof(Hint));
186
	oldarena = arena;
187
	for(;;){
188
		i = Bread(b,buf,3);
189
		if(i<3)
190
			break;
191
		nstats++;
192
		iq = buf[0];
193
		iq = (iq<<8) | buf[1];
194
		n = buf[2];
195
		h = arena;
196
		arena += n;
197
		hints[iq] = h;
198
		nhint[iq] = n;
199
		if(Bread(b,buf,3*n)!=3*n)
200
			sysfatal("stats read error");
201
		for(i=0; i<n; i++){
202
			s = &buf[3*i];
203
			h[i].url = (s[0]<<8) | s[1];
204
			h[i].prob = s[2];
205
		}
206
	}
207
	syslog(0, HTTPLOG, "prefetch-hints stats=%d (%.1fMB)", nstats, 1.e-6*((filelen/3)*sizeof(Hint)));
208
}
209
 
210
void
211
urlcanon(char *url)
212
{
213
	/* all the changes here can be implemented by rewriting in-place */
214
	char *p, *q;
215
 
216
	/* remove extraneous '/' in the middle and at the end */
217
	p = url+1;  /* first char needs no change */
218
	q = p;
219
	while(q[0]){
220
		if(q[0]=='/' && q[-1]=='/'){
221
			q++;
222
			continue;
223
		}
224
		*p++ = *q++;
225
	}
226
	if(q[-1]=='/'){  /* trailing '/' */
227
		p[-1] = '\0';
228
	}else{
229
		p[0] = '\0';
230
	}
231
 
232
	/* specific to the cm.bell-labs.com web site */
233
	if(strncmp(url,"/cm/",4)==0){
234
		if(strchr("cims",url[4]) && strncmp(url+5,"s/who/",6)==0)
235
			/* strip off /cm/cs */
236
			memmove(url,url+6,strlen(url+6)+1);
237
		else if(strncmp(url+4,"ms/what/wavelet",15)==0)
238
			/* /cm/ms/what */
239
			memmove(url,url+11,strlen(url+11)+1);
240
	}
241
}
242
 
243
void
244
hintprint(HConnect *hc, Hio *hout, char *uri, int thresh, int havej)
245
{
246
	int i, j, pr, prefix, fd, siz, havei, newhint = 0, n;
247
	char *query, *sf, etag[32], *wurl;
248
	Dir *dir;
249
	Hint *h, *haveh;
250
 
251
	query = hstrdup(hc, uri);
252
	urlcanon(query);
253
	j = urllookup(hashstr(query));
254
	if(j < 0)
255
		return;
256
	query = strrchr(uri,'/');
257
	if(!query)
258
		return;  /* can't happen */
259
	prefix = query-uri+1;  /* = strlen(dirname)+1 */
260
	h = hints[j];
261
	for(i=0; i<nhint[j]; i++){
262
		if(havej > 0 && havej < URLmax){ /* exclude hints client has */
263
			haveh = hints[havej];
264
			for(havei=0; havei<nhint[havej]; havei++)
265
				if( haveh[havei].url == h[i].url)
266
					goto continuei;
267
		}
268
		sf = urlname[h[i].url];
269
		pr = h[i].prob;
270
		if(pr<thresh)
271
			break;
272
		n = strlen(webroot) + strlen(sf) + 1;
273
		wurl = halloc(hc, n);
274
		strcpy(wurl, webroot);
275
		strcat(wurl, sf);
276
		fd = open(wurl, OREAD);
277
		if(fd<0)
278
			continue;
279
		dir = dirfstat(fd);
280
		if(dir == nil){
281
			close(fd);
282
			continue;
283
		}
284
		close(fd);
285
		snprint(etag, sizeof(etag), "\"%lluxv%lux\"", dir->qid.path, dir->qid.vers);
286
		siz = (int)( log((double)dir->length) * RECIPLOG2 + 0.9999);
287
		free(dir);
288
		if(strncmp(uri,sf,prefix)==0 && strchr(sf+prefix,'/')==0 && sf[prefix]!=0)
289
			sf = sf+prefix;
290
		hprint(hout, "Fresh: %d,%s,%d,%s\r\n", pr, etag, siz, sf);
291
		newhint++;
292
continuei: ;
293
	}
294
	if(newhint)
295
		hprint(hout, "Fresh: have/%d\r\n", j);
296
}
297