Warning: Attempt to read property "date" on null in /usr/local/www/websvn.planix.org/blame.php on line 247

Warning: Attempt to read property "msg" on null in /usr/local/www/websvn.planix.org/blame.php on line 247
WebSVN – planix.SVN – Blame – /os/branches/feature_tlsv12/sys/src/cmd/upas/bayes/regen.c – Rev 2

Subversion Repositories planix.SVN

Rev

Go to most recent revision | Details | Last modification | View Log | RSS feed

Rev Author Line No. Line
2 - 1
#include <u.h>
2
#include <libc.h>
3
#include <bio.h>
4
#include <regexp.h>
5
#include "dfa.h"
6
 
7
/***
8
 * Regular expression for matching.
9
 */
10
 
11
char *ignore[] = 
12
{
13
	/* HTML that isn't A, IMG, or FONT */
14
	/* Must have a space somewhere to avoid catching <email@address> */
15
	"<[ 	\n\r]*("
16
		"[^aif]|"
17
		"a[^> \t\r\n]|"
18
		"i[^mM \t\r\n]|"
19
		"im[^gG \t\r\n]|"
20
		"img[^> \t\r\n]|"
21
		"f[^oO \t\r\n]|"
22
		"fo[^Nn \t\r\n]|"
23
		"fon[^tT \t\r\n]|"
24
		"font[^> \r\t\n]"
25
	")[^>]*[ \t\n\r][^>]*>",
26
	"<[ 	\n\r]*("
27
		"i|im|f|fo|fon"
28
	")[ \t\r\n][^>]*>",
29
 
30
	/* ignore html comments */
31
	"<!--([^\\-]|-[^\\-]|--[^>]|\n)*-->",
32
 
33
	/* random mail strings */
34
	"^message-id:.*\n([ 	].*\n)*",
35
	"^in-reply-to:.*\n([ 	].*\n)*",
36
	"^references:.*\n([ 	].*\n)*",
37
	"^date:.*\n([ 	].*\n)*",
38
	"^delivery-date:.*\n([ 	].*\n)*",
39
	"e?smtp id .*",
40
	"^	id.*",
41
	"boundary=.*",
42
	"name=\"",
43
	"filename=\"",
44
	"news:<[^>]+>",
45
	"^--[^ 	]*$",
46
 
47
	/* base64 encoding */
48
	"^[0-9a-zA-Z+\\-=/]+$",
49
 
50
	/* uu encoding */
51
	"^[!-Z]+$",
52
 
53
	/* little things */
54
	".",
55
	"\n",
56
};
57
 
58
char *keywords[] =
59
{
60
	"([a-zA-Z'`$!¡-￿]|[0-9]([.,][0-9])*)+",
61
};
62
 
63
int debug;
64
 
65
Dreprog*
66
dregcomp(char *buf)
67
{
68
	Reprog *r;
69
	Dreprog *d;
70
 
71
	if(debug)
72
		print(">>> '%s'\n", buf);
73
 
74
	r = regcomp(buf);
75
	if(r == nil)
76
		sysfatal("regcomp");
77
	d = dregcvt(r);
78
	if(d == nil)
79
		sysfatal("dregcomp");
80
	free(r);
81
	return d;
82
}
83
 
84
char*
85
strcpycase(char *d, char *s)
86
{
87
	int cc, esc;
88
 
89
	cc = 0;
90
	esc = 0;
91
	while(*s){
92
		if(*s == '[')
93
			cc++;
94
		if(*s == ']')
95
			cc--;
96
		if(!cc && 'a' <= *s && *s <= 'z'){
97
			*d++ = '[';
98
			*d++ = *s;
99
			*d++ = *s+'A'-'a';
100
			*d++ = ']';
101
		}else
102
			*d++ = *s;
103
		if(*s == '\\')
104
			esc++;
105
		else if(esc)
106
			esc--;
107
		s++;
108
	}
109
	return d;
110
}
111
 
112
void
113
regerror(char *msg)
114
{
115
	sysfatal("regerror: %s", msg);
116
}
117
 
118
void
119
buildre(Dreprog *re[3])
120
{
121
	int i;
122
	static char buf[16384], *s;
123
 
124
	re[0] = dregcomp("^From ");
125
 
126
	s = buf;
127
	for(i=0; i<nelem(keywords); i++){
128
		if(i != 0)
129
			*s++ = '|';
130
		s = strcpycase(s, keywords[i]);
131
	}
132
	*s = 0;
133
	re[1] = dregcomp(buf);
134
 
135
	s = buf;
136
	for(i=0; i<nelem(ignore); i++){
137
		if(i != 0)
138
			*s++ = '|';
139
		s = strcpycase(s, ignore[i]);
140
	}
141
	*s = 0;
142
	re[2] = dregcomp(buf);
143
}
144
 
145
void
146
usage(void)
147
{
148
	fprint(2, "usage: regen [-d]\n");
149
	exits("usage");
150
}
151
 
152
void
153
main(int argc, char **argv)
154
{
155
	Dreprog *re[3];
156
	Biobuf b;
157
 
158
	ARGBEGIN{
159
	default:
160
		usage();
161
	case 'd':
162
		debug = 1;
163
	}ARGEND
164
 
165
	if(argc != 0)
166
		usage();
167
 
168
	buildre(re);
169
	Binit(&b, 1, OWRITE);
170
	Bprintdfa(&b, re[0]);
171
	Bprintdfa(&b, re[1]);
172
	Bprintdfa(&b, re[2]);
173
	exits(0);
174
}
175
 
176