Subversion Repositories planix.SVN

Rev

Rev 2 | Details | Compare with Previous | Last modification | View Log | RSS feed

Rev Author Line No. Line
2 - 1
#include <u.h>
2
#include <libc.h>
3
#include <bio.h>
4
#include "dict.h"
5
 
6
Dict dicts[] = {
7
	{"oed",		"Oxford English Dictionary, 2nd Ed.",
8
	 "/lib/dict/oed2",	"/lib/dict/oed2index",
9
	 oednextoff,	oedprintentry,		oedprintkey},
10
	{"ahd",		"American Heritage Dictionary, 2nd College Ed.",
11
	 "/lib/ahd/DICT.DB",	"/lib/ahd/index",
12
	 ahdnextoff,	ahdprintentry,		ahdprintkey},
13
	{"pgw",		"Project Gutenberg Webster Dictionary",
14
	 "/lib/dict/pgw",	"/lib/dict/pgwindex",
15
	 pgwnextoff,	pgwprintentry,		pgwprintkey},
16
	{"thesaurus",	"Collins Thesaurus",
17
	 "/lib/dict/thesaurus",	"/lib/dict/thesindex",
18
	 thesnextoff,	thesprintentry,	thesprintkey},
19
	{"roget",		"Project Gutenberg Roget's Thesaurus",
20
	 "/lib/dict/roget", "/lib/dict/rogetindex",
21
	 rogetnextoff,	rogetprintentry,	rogetprintkey},
22
 
23
	{"ce",		"Gendai Chinese->English",
24
	 "/lib/dict/world/sansdata/sandic24.dat",
25
	 "/lib/dict/world/sansdata/ceindex",
26
	 worldnextoff,	worldprintentry,	worldprintkey},
27
	{"ceh",		"Gendai Chinese->English (Hanzi index)",
28
	 "/lib/dict/world/sansdata/sandic24.dat",
29
	 "/lib/dict/world/sansdata/cehindex",
30
	 worldnextoff,	worldprintentry,	worldprintkey},
31
	{"ec",		"Gendai English->Chinese",
32
	 "/lib/dict/world/sansdata/sandic24.dat",
33
	 "/lib/dict/world/sansdata/ecindex",
34
	 worldnextoff,	worldprintentry,	worldprintkey},
35
 
36
	{"dae",		"Gyldendal Danish->English",
37
	 "/lib/dict/world/gylddata/sandic30.dat",
38
	 "/lib/dict/world/gylddata/daeindex",
39
	 worldnextoff,	worldprintentry,	worldprintkey},
40
	{"eda",		"Gyldendal English->Danish",
41
	 "/lib/dict/world/gylddata/sandic29.dat",
42
	 "/lib/dict/world/gylddata/edaindex",
43
	 worldnextoff,	worldprintentry,	worldprintkey},
44
 
45
	{"due",		"Wolters-Noordhoff Dutch->English",
46
	 "/lib/dict/world/woltdata/sandic07.dat",
47
	 "/lib/dict/world/woltdata/deindex",
48
	 worldnextoff,	worldprintentry,	worldprintkey},
49
	{"edu",		"Wolters-Noordhoff English->Dutch",
50
	 "/lib/dict/world/woltdata/sandic06.dat",
51
	 "/lib/dict/world/woltdata/edindex",
52
	 worldnextoff,	worldprintentry,	worldprintkey},
53
 
54
	{"fie",		"WSOY Finnish->English",
55
	 "/lib/dict/world/werndata/sandic32.dat",
56
	 "/lib/dict/world/werndata/fieindex",
57
	 worldnextoff,	worldprintentry,	worldprintkey},
58
	{"efi",		"WSOY English->Finnish",
59
	 "/lib/dict/world/werndata/sandic31.dat",
60
	 "/lib/dict/world/werndata/efiindex",
61
	 worldnextoff,	worldprintentry,	worldprintkey},
62
 
63
	{"fe",		"Collins French->English",
64
	 "/lib/dict/fe",	"/lib/dict/feindex",
65
	 pcollnextoff,	pcollprintentry,	pcollprintkey},
66
	{"ef",		"Collins English->French",
67
	 "/lib/dict/ef",	"/lib/dict/efindex",
68
	 pcollnextoff,	pcollprintentry,	pcollprintkey},
69
 
70
	{"ge",		"Collins German->English",
71
	 "/lib/dict/ge",	"/lib/dict/geindex",
72
	 pcollgnextoff,	pcollgprintentry,	pcollgprintkey},
73
	{"eg",		"Collins English->German",
74
	 "/lib/dict/eg",	"/lib/dict/egindex",
75
	 pcollgnextoff,	pcollgprintentry,	pcollgprintkey},
76
 
77
	{"ie",		"Collins Italian->English",
78
	 "/lib/dict/ie",	"/lib/dict/ieindex",
79
	 pcollnextoff,	pcollprintentry,	pcollprintkey},
80
	{"ei",		"Collins English->Italian",
81
	 "/lib/dict/ei",	"/lib/dict/eiindex",
82
	 pcollnextoff,	pcollprintentry,	pcollprintkey},
83
 
84
	{"je",		"Sanshusha Japanese->English",
85
	 "/lib/dict/world/sansdata/sandic18.dat",
86
	 "/lib/dict/world/sansdata/jeindex",
87
	 worldnextoff,	worldprintentry,	worldprintkey},
88
	{"jek",		"Sanshusha Japanese->English (Kanji index)",
89
	 "/lib/dict/world/sansdata/sandic18.dat",
90
	 "/lib/dict/world/sansdata/jekindex",
91
	 worldnextoff,	worldprintentry,	worldprintkey},
92
	{"ej",		"Sanshusha English->Japanese",
93
	 "/lib/dict/world/sansdata/sandic18.dat",
94
	 "/lib/dict/world/sansdata/ejindex",
95
	 worldnextoff,	worldprintentry,	worldprintkey},
96
 
97
	{"tjeg",	"Sanshusha technical Japanese->English,German",
98
	 "/lib/dict/world/sansdata/sandic16.dat",
99
	 "/lib/dict/world/sansdata/tjegindex",
100
	 worldnextoff,	worldprintentry,	worldprintkey},
101
	{"tjegk",	"Sanshusha technical Japanese->English,German (Kanji index)",
102
	 "/lib/dict/world/sansdata/sandic16.dat",
103
	 "/lib/dict/world/sansdata/tjegkindex",
104
	 worldnextoff,	worldprintentry,	worldprintkey},
105
	{"tegj",	"Sanshusha technical English->German,Japanese",
106
	 "/lib/dict/world/sansdata/sandic16.dat",
107
	 "/lib/dict/world/sansdata/tegjindex",
108
	 worldnextoff,	worldprintentry,	worldprintkey},
109
	{"tgje",	"Sanshusha technical German->Japanese,English",
110
	 "/lib/dict/world/sansdata/sandic16.dat",
111
	 "/lib/dict/world/sansdata/tgjeindex",
112
	 worldnextoff,	worldprintentry,	worldprintkey},
113
 
114
	{"ne",		"Kunnskapforlaget Norwegian->English",
115
	 "/lib/dict/world/kunndata/sandic28.dat",
116
	 "/lib/dict/world/kunndata/neindex",
117
	 worldnextoff,	worldprintentry,	worldprintkey},
118
	{"en",		"Kunnskapforlaget English->Norwegian",
119
	 "/lib/dict/world/kunndata/sandic27.dat",
120
	 "/lib/dict/world/kunndata/enindex",
121
	 worldnextoff,	worldprintentry,	worldprintkey},
122
 
123
	{"re",		"Leon Ungier Russian->English",
124
	 "/lib/dict/re",	"/lib/dict/reindex",
125
	 simplenextoff,	simpleprintentry,	simpleprintkey},
126
	{"er",		"Leon Ungier English->Russian",
127
	 "/lib/dict/re",	"/lib/dict/erindex",
128
	 simplenextoff,	simpleprintentry,	simpleprintkey},
129
 
130
	{"se",		"Collins Spanish->English",
131
	 "/lib/dict/se",	"/lib/dict/seindex",
132
	 pcollnextoff,	pcollprintentry,	pcollprintkey},
133
	{"es",		"Collins English->Spanish",
134
	 "/lib/dict/es",	"/lib/dict/esindex",
135
	 pcollnextoff,	pcollprintentry,	pcollprintkey},
136
 
137
	{"swe",		"Esselte Studium Swedish->English",
138
	 "/lib/dict/world/essedata/sandic34.dat",
139
	 "/lib/dict/world/essedata/sweindex",
140
	 worldnextoff,	worldprintentry,	worldprintkey},
141
	{"esw",		"Esselte Studium English->Swedish",
142
	 "/lib/dict/world/essedata/sandic33.dat",
143
	 "/lib/dict/world/essedata/eswindex",
144
	 worldnextoff,	worldprintentry,	worldprintkey},
145
 
146
	{"movie",	"Movies -- by title",
147
	 "/lib/movie/data",	"/lib/dict/movtindex",
148
	 movienextoff,	movieprintentry,	movieprintkey},
149
	{"moviea",	"Movies -- by actor",
150
	 "/lib/movie/data",	"/lib/dict/movaindex",
151
	 movienextoff,	movieprintentry,	movieprintkey},
152
	{"movied",	"Movies -- by director",
153
	 "/lib/movie/data",	"/lib/dict/movdindex",
154
	 movienextoff,	movieprintentry,	movieprintkey},
155
 
156
	{"slang",	"English Slang",
157
	 "/lib/dict/slang",	"/lib/dict/slangindex",
158
	 slangnextoff,	slangprintentry,	slangprintkey},
159
 
160
	{"robert",	"Robert Électronique",
161
	 "/lib/dict/robert/_pointers",	"/lib/dict/robert/_index",
162
	 robertnextoff,	robertindexentry,	robertprintkey},
163
	{"robertv",	"Robert Électronique - formes des verbes",
164
	 "/lib/dict/robert/flex.rob",	"/lib/dict/robert/_flexindex",
165
	 robertnextflex,	robertflexentry,	robertprintkey},
166
 
167
	{0, 0, 0, 0, 0}
168
};
169
 
170
typedef struct Lig Lig;
171
struct Lig {
172
	Rune	start;		/* accent rune */
173
	Rune	*pairs;		/* <char,accented version> pairs */
174
};
175
 
176
static Lig ligtab[Nligs] = {
177
[LACU-LIGS]	{L'´',	L"AÁaáCĆcćEÉeégģIÍiíıíLĹlĺNŃnńOÓoóRŔrŕSŚsśUÚuúYÝyýZŹzź"},
178
[LGRV-LIGS]	{L'ˋ',	L"AÀaàEÈeèIÌiìıìOÒoòUÙuù"},
179
[LUML-LIGS]	{L'¨',	L"AÄaäEËeëIÏiïOÖoöUÜuüYŸyÿ"},
180
[LCED-LIGS]	{L'¸',	L"CÇcçGĢKĶkķLĻlļNŅnņRŖrŗSŞsşTŢtţ"},
181
[LTIL-LIGS]	{L'˜',	L"AÃaãIĨiĩıĩNÑnñOÕoõUŨuũ"},
182
[LBRV-LIGS]	{L'˘',	L"AĂaăEĔeĕGĞgğIĬiĭıĭOŎoŏUŬuŭ"},
183
[LRNG-LIGS]	{L'˚',	L"AÅaåUŮuů"},
184
[LDOT-LIGS]	{L'˙',	L"CĊcċEĖeėGĠgġIİLĿlŀZŻzż"},
185
[LDTB-LIGS]	{L'.',	L""},
186
[LFRN-LIGS]	{L'⌢',	L"AÂaâCĈcĉEÊeêGĜgĝHĤhĥIÎiîıîJĴjĵOÔoôSŜsŝUÛuûWŴwŵYŶyŷ"},
187
[LFRB-LIGS]	{L'̯',	L""},
188
[LOGO-LIGS]	{L'˛',	L"AĄaąEĘeęIĮiįıįUŲuų"},
189
[LMAC-LIGS]	{L'¯',	L"AĀaāEĒeēIĪiīıīOŌoōUŪuū"},
190
[LHCK-LIGS]	{L'ˇ',	L"CČcčDĎdďEĚeěLĽlľNŇnňRŘrřSŠsšTŤtťZŽzž"},
191
[LASP-LIGS]	{L'ʽ',	L""},
192
[LLEN-LIGS]	{L'ʼ',	L""},
193
[LBRB-LIGS]	{L'̮',	L""}
194
};
195
 
196
Rune *multitab[Nmulti] = {
197
[MAAS-MULTI]	L"ʽα",
198
[MALN-MULTI]	L"ʼα",
199
[MAND-MULTI]	L"and",
200
[MAOQ-MULTI]	L"a/q",
201
[MBRA-MULTI]	L"<|",
202
[MDD-MULTI]	L"..",
203
[MDDD-MULTI]	L"...",
204
[MEAS-MULTI]	L"ʽε",
205
[MELN-MULTI]	L"ʼε",
206
[MEMM-MULTI]	L"——",
207
[MHAS-MULTI]	L"ʽη",
208
[MHLN-MULTI]	L"ʼη",
209
[MIAS-MULTI]	L"ʽι",
210
[MILN-MULTI]	L"ʼι",
211
[MLCT-MULTI]	L"ct",
212
[MLFF-MULTI]	L"ff",
213
[MLFFI-MULTI]	L"ffi",
214
[MLFFL-MULTI]	L"ffl",
215
[MLFL-MULTI]	L"fl",
216
[MLFI-MULTI]	L"fi",
217
[MLLS-MULTI]	L"ɫɫ",
218
[MLST-MULTI]	L"st",
219
[MOAS-MULTI]	L"ʽο",
220
[MOLN-MULTI]	L"ʼο",
221
[MOR-MULTI]	L"or",
222
[MRAS-MULTI]	L"ʽρ",
223
[MRLN-MULTI]	L"ʼρ",
224
[MTT-MULTI]	L"~~",
225
[MUAS-MULTI]	L"ʽυ",
226
[MULN-MULTI]	L"ʼυ",
227
[MWAS-MULTI]	L"ʽω",
228
[MWLN-MULTI]	L"ʼω",
229
[MOE-MULTI]	L"oe",
230
[MES-MULTI]	L"  ",
231
};
232
 
233
static Rune 	*ttabstack[20];
234
static int	ntt;
235
 
236
/*
237
 * tab is an array of n Assoc's, sorted by key.
238
 * Look for key in tab, and return corresponding val
239
 * or -1 if not there
240
 */
241
long
242
lookassoc(Assoc *tab, int n, char *key)
243
{
244
	Assoc *q;
245
	long i, low, high;
246
	int r;
247
 
248
	for(low = -1, high = n; high > low+1; ){
249
		i = (high+low)/2;
250
		q = &tab[i];
251
		if((r=strcmp(key, q->key))<0)
252
			high = i;
253
		else if(r == 0)
254
			return q->val;
255
		else
256
			low=i;
257
	}
258
	return -1;
259
}
260
 
261
long
262
looknassoc(Nassoc *tab, int n, long key)
263
{
264
	Nassoc *q;
265
	long i, low, high;
266
 
267
	for(low = -1, high = n; high > low+1; ){
268
		i = (high+low)/2;
269
		q = &tab[i];
270
		if(key < q->key)
271
			high = i;
272
		else if(key == q->key)
273
			return q->val;
274
		else
275
			low=i;
276
	}
277
	return -1;
278
}
279
 
280
void
281
err(char *fmt, ...)
282
{
283
	char buf[1000];
284
	va_list v;
285
 
286
	va_start(v, fmt);
287
	vsnprint(buf, sizeof(buf), fmt, v);
288
	va_end(v);
289
	fprint(2, "%s: %s\n", argv0, buf);
290
}
291
 
292
/*
293
 * Write the rune r to bout, keeping track of line length
294
 * and breaking the lines (at blanks) when they get too long
295
 */
296
void
297
outrune(long r)
298
{
299
	if(outinhibit)
300
		return;
301
	if(++linelen > breaklen && r == L' ') {
302
		Bputc(bout, '\n');
303
		linelen = 0;
304
	} else
305
		Bputrune(bout, r);
306
}
307
 
308
void
309
outrunes(Rune *rp)
310
{
311
	Rune r;
312
 
313
	while((r = *rp++) != 0)
314
		outrune(r);
315
}
316
 
317
/* like outrune, but when arg is know to be a char */
318
void
319
outchar(int c)
320
{
321
	if(outinhibit)
322
		return;
323
	if(++linelen > breaklen && c == ' ') {
324
		c ='\n';
325
		linelen = 0;
326
	}
327
	Bputc(bout, c);
328
}
329
 
330
void
331
outchars(char *s)
332
{
333
	char c;
334
 
335
	while((c = *s++) != 0)
336
		outchar(c);
337
}
338
 
339
void
340
outprint(char *fmt, ...)
341
{
342
	char buf[1000];
343
	va_list v;
344
 
345
	va_start(v, fmt);
346
	vsnprint(buf, sizeof(buf), fmt, v);
347
	va_end(v);
348
	outchars(buf);
349
}
350
 
351
void
352
outpiece(char *b, char *e)
353
{
354
	int c, lastc;
355
 
356
	lastc = 0;
357
	while(b < e) {
358
		c = *b++;
359
		if(c == '\n')
360
			c = ' ';
361
		if(!(c == ' ' && lastc == ' '))
362
			outchar(c);
363
		lastc = c;
364
	}
365
}
366
 
367
/*
368
 * Go to new line if not already there; indent if ind != 0.
369
 * If ind > 1, leave a blank line too.
370
 * Slight hack: assume if current line is only one or two
371
 * characters long, then they were spaces.
372
 */
373
void
374
outnl(int ind)
375
{
376
	if(outinhibit)
377
		return;
378
	if(ind) {
379
		if(ind > 1) {
380
			if(linelen > 2)
381
				Bputc(bout, '\n');
382
			Bprint(bout, "\n  ");
383
		} else if(linelen == 0)
384
			Bprint(bout, "  ");
385
		else if(linelen == 1)
386
			Bputc(bout, ' ');
387
		else if(linelen != 2)
388
			Bprint(bout, "\n  ");
389
		linelen = 2;
390
	} else {
391
		if(linelen) {
392
			Bputc(bout, '\n');
393
			linelen = 0;
394
		}
395
	}
396
}
397
 
398
/*
399
 * Fold the runes in null-terminated rp.
400
 * Use the sort(1) definition of folding (uppercase to lowercase,
401
 * accented characters to corresponding unaccented chars)
402
 */
403
void
404
fold(Rune *rp)
405
{
406
	Rune r;
407
 
408
	while((r = *rp) != 0) {
409
		r = tobaserune(r);
410
		if(isupperrune(r))
411
			r = tolowerrune(r);
412
		*rp++ = r;
413
	}
414
}
415
 
416
/*
417
 * Like fold, but put folded result into new
418
 * (assumed to have enough space).
419
 * old is a regular expression, but we know that
420
 * metacharacters aren't affected
421
 */
422
void
423
foldre(char *new, char *old)
424
{
425
	Rune r;
426
 
427
	while(*old) {
428
		old += chartorune(&r, old);
429
		r = tobaserune(r);
430
		if(isupperrune(r))
431
			r = tolowerrune(r);
432
		new += runetochar(new, &r);
433
	}
434
	*new = 0;
435
}
436
 
437
/*
438
 *	acomp(s, t) returns:
439
 *		-2 if s strictly precedes t
440
 *		-1 if s is a prefix of t
441
 *		0 if s is the same as t
442
 *		1 if t is a prefix of s
443
 *		2 if t strictly precedes s
444
 */
445
 
446
int
447
acomp(Rune *s, Rune *t)
448
{
449
	int cs, ct;
450
 
451
	for(;;) {
452
		cs = *s;
453
		ct = *t;
454
		if(cs != ct)
455
			break;
456
		if(cs == 0)
457
			return 0;
458
		s++;
459
		t++;
460
	}
461
	if(cs == 0)
462
		return -1;
463
	if(ct == 0)
464
		return 1;
465
	if(cs < ct)
466
		return -2;
467
	return 2;
468
}
469
 
470
/*
471
 * Copy null terminated Runes from 'from' to 'to'.
472
 */
473
void
474
runescpy(Rune *to, Rune *from)
475
{
476
	while((*to++ = *from++) != 0)
477
		continue;
478
}
479
 
480
/*
481
 * Conversion of unsigned number to long, no overflow detection
482
 */
483
long
484
runetol(Rune *r)
485
{
486
	int c;
487
	long n;
488
 
489
	n = 0;
490
	for(;; r++){
491
		c = *r;
492
		if(L'0'<=c && c<=L'9')
493
			c -= '0';
494
		else
495
			break;
496
		n = n*10 + c;
497
	}
498
	return n;
499
}
500
 
501
/*
502
 * See if there is a rune corresponding to the accented
503
 * version of r with accent acc (acc in [LIGS..LIGE-1]),
504
 * and return it if so, else return NONE.
505
 */
506
Rune
507
liglookup(Rune acc, Rune r)
508
{
509
	Rune *p;
510
 
511
	if(acc < LIGS || acc >= LIGE)
512
		return NONE;
513
	for(p = ligtab[acc-LIGS].pairs; *p; p += 2)
514
		if(*p == r)
515
			return *(p+1);
516
	return NONE;
517
}
518
 
519
/*
520
 * Maintain a translation table stack (a translation table
521
 * is an array of Runes indexed by bytes or 7-bit bytes).
522
 * If starting is true, push the curtab onto the stack
523
 * and return newtab; else pop the top of the stack and
524
 * return it.
525
 * If curtab is 0, initialize the stack and return.
526
 */
527
Rune *
528
changett(Rune *curtab, Rune *newtab, int starting)
529
{
530
	if(curtab == 0) {
531
		ntt = 0;
532
		return 0;
533
	}
534
	if(starting) {
535
		if(ntt >= asize(ttabstack)) {
536
			if(debug)
537
				err("translation stack overflow");
538
			return curtab;
539
		}
540
		ttabstack[ntt++] = curtab;
541
		return newtab;
542
	} else {
543
		if(ntt == 0) {
544
			if(debug)
545
				err("translation stack underflow");
546
			return curtab;
547
		}
548
		return ttabstack[--ntt];
549
	}
550
}