Subversion Repositories planix.SVN

Rev

Details | Last modification | View Log | RSS feed

Rev Author Line No. Line
2 - 1
/*	join F1 F2 on stuff */
2
#include <u.h>
3
#include <libc.h>
4
#include <bio.h>
5
#include <ctype.h>
6
 
7
enum {
8
	F1,
9
	F2,
10
	NIN,
11
	F0,
12
};
13
 
14
#define	NFLD	100	/* max field per line */
15
#define comp() runestrcmp(ppi[F1][j1], ppi[F2][j2])
16
 
17
Biobuf *f[NIN];
18
Rune buf[NIN][Bsize];	/* input lines */
19
Rune *ppi[NIN][NFLD+1];	/* pointers to fields in lines */
20
Rune	sep1	= ' ';	/* default field separator */
21
Rune	sep2	= '\t';
22
int	j1	= 1;	/* join of this field of file 1 */
23
int	j2	= 1;	/* join of this field of file 2 */
24
int	a1;
25
int 	a2;
26
 
27
int	olist[NIN*NFLD];  /* output these fields */
28
int	olistf[NIN*NFLD]; /* from these files */
29
int	no;		/* number of entries in olist */
30
char *sepstr	= " ";
31
int	discard;	/* count of truncated lines */
32
Rune	null[Bsize]	= L"";
33
Biobuf binbuf, boutbuf;
34
Biobuf *bin, *bout;
35
 
36
char	*getoptarg(int*, char***);
37
int	input(int);
38
void	join(int);
39
void	oparse(char*);
40
void	output(int, int);
41
Rune	*strtorune(Rune *, char *);
42
 
43
void
44
main(int argc, char **argv)
45
{
46
	int i;
47
	vlong off1, off2;
48
 
49
	bin = &binbuf;
50
	bout = &boutbuf;
51
	Binit(bin, 0, OREAD);
52
	Binit(bout, 1, OWRITE);
53
 
54
	argv0 = argv[0];
55
	while (argc > 1 && argv[1][0] == '-') {
56
		if (argv[1][1] == '\0')
57
			break;
58
		switch (argv[1][1]) {
59
		case '-':
60
			argc--;
61
			argv++;
62
			goto proceed;
63
		case 'a':
64
			switch(*getoptarg(&argc, &argv)) {
65
			case '1':
66
				a1++;
67
				break;
68
			case '2':
69
				a2++;
70
				break;
71
			default:
72
				sysfatal("incomplete option -a");
73
			}
74
			break;
75
		case 'e':
76
			strtorune(null, getoptarg(&argc, &argv));
77
			break;
78
		case 't':
79
			sepstr=getoptarg(&argc, &argv);
80
			chartorune(&sep1, sepstr);
81
			sep2 = sep1;
82
			break;
83
		case 'o':
84
			if(argv[1][2]!=0 ||
85
			   argc>2 && strchr(argv[2],',')!=0)
86
				oparse(getoptarg(&argc, &argv));
87
			else for (no = 0; no<2*NFLD && argc>2; no++){
88
				if (argv[2][0] == '1' && argv[2][1] == '.') {
89
					olistf[no] = F1;
90
					olist[no] = atoi(&argv[2][2]);
91
				} else if (argv[2][0] == '2' && argv[2][1] == '.') {
92
					olist[no] = atoi(&argv[2][2]);
93
					olistf[no] = F2;
94
				} else if (argv[2][0] == '0')
95
					olistf[no] = F0;
96
				else
97
					break;
98
				argc--;
99
				argv++;
100
			}
101
			break;
102
		case 'j':
103
			if(argc <= 2)
104
				break;
105
			if (argv[1][2] == '1')
106
				j1 = atoi(argv[2]);
107
			else if (argv[1][2] == '2')
108
				j2 = atoi(argv[2]);
109
			else
110
				j1 = j2 = atoi(argv[2]);
111
			argc--;
112
			argv++;
113
			break;
114
		case '1':
115
			j1 = atoi(getoptarg(&argc, &argv));
116
			break;
117
		case '2':
118
			j2 = atoi(getoptarg(&argc, &argv));
119
			break;
120
		}
121
		argc--;
122
		argv++;
123
	}
124
proceed:
125
	for (i = 0; i < no; i++)
126
		if (olist[i]-- > NFLD)	/* 0 origin */
127
			sysfatal("field number too big in -o");
128
	if (argc != 3) {
129
		fprint(2, "usage: join [-1 x -2 y] [-o list] file1 file2\n");
130
		exits("usage");
131
	}
132
	if (j1 < 1  || j2 < 1)
133
		sysfatal("invalid field indices");
134
	j1--;
135
	j2--;	/* everyone else believes in 0 origin */
136
 
137
	if (strcmp(argv[1], "-") == 0)
138
		f[F1] = bin;
139
	else if ((f[F1] = Bopen(argv[1], OREAD)) == 0)
140
		sysfatal("can't open %s: %r", argv[1]);
141
	if(strcmp(argv[2], "-") == 0)
142
		f[F2] = bin;
143
	else if ((f[F2] = Bopen(argv[2], OREAD)) == 0)
144
		sysfatal("can't open %s: %r", argv[2]);
145
 
146
	off1 = Boffset(f[F1]);
147
	off2 = Boffset(f[F2]);
148
	if(Bseek(f[F2], 0, 2) >= 0){
149
		Bseek(f[F2], off2, 0);
150
		join(F2);
151
	}else if(Bseek(f[F1], 0, 2) >= 0){
152
		Bseek(f[F1], off1, 0);
153
		Bseek(f[F2], off2, 0);
154
		join(F1);
155
	}else
156
		sysfatal("neither file is randomly accessible");
157
	if (discard)
158
		sysfatal("some input line was truncated");
159
	exits("");
160
}
161
 
162
char *
163
runetostr(char *buf, Rune *r)
164
{
165
	char *s;
166
 
167
	for(s = buf; *r; r++)
168
		s += runetochar(s, r);
169
	*s = '\0';
170
	return buf;
171
}
172
 
173
Rune *
174
strtorune(Rune *buf, char *s)
175
{
176
	Rune *r;
177
 
178
	for (r = buf; *s; r++)
179
		s += chartorune(r, s);
180
	*r = '\0';
181
	return buf;
182
}
183
 
184
void
185
readboth(int n[])
186
{
187
	n[F1] = input(F1);
188
	n[F2] = input(F2);
189
}
190
 
191
void
192
seekbotreadboth(int seekf, vlong bot, int n[])
193
{
194
	Bseek(f[seekf], bot, 0);
195
	readboth(n);
196
}
197
 
198
void
199
join(int seekf)
200
{
201
	int cmp, less;
202
	int n[NIN];
203
	vlong top, bot;
204
 
205
	less = seekf == F2;
206
	top = 0;
207
	bot = Boffset(f[seekf]);
208
	readboth(n);
209
	while(n[F1]>0 && n[F2]>0 || (a1||a2) && n[F1]+n[F2]>0) {
210
		cmp = comp();
211
		if(n[F1]>0 && n[F2]>0 && cmp>0 || n[F1]==0) {
212
			if(a2)
213
				output(0, n[F2]);
214
			if (seekf == F2)
215
				bot = Boffset(f[seekf]);
216
			n[F2] = input(F2);
217
		} else if(n[F1]>0 && n[F2]>0 && cmp<0 || n[F2]==0) {
218
			if(a1)
219
				output(n[F1], 0);
220
			if (seekf == F1)
221
				bot = Boffset(f[seekf]);
222
			n[F1] = input(F1);
223
		} else {
224
			/* n[F1]>0 && n[F2]>0 && cmp==0 */
225
			while(n[F2]>0 && cmp==0) {
226
				output(n[F1], n[F2]);
227
				top = Boffset(f[seekf]);
228
				n[seekf] = input(seekf);
229
				cmp = comp();
230
			}
231
			seekbotreadboth(seekf, bot, n);
232
			for(;;) {
233
				cmp = comp();
234
				if(n[F1]>0 && n[F2]>0 && cmp==0) {
235
					output(n[F1], n[F2]);
236
					n[seekf] = input(seekf);
237
				} else if(n[F1]>0 && n[F2]>0 &&
238
				    (less? cmp<0 :cmp>0) || n[seekf]==0)
239
					seekbotreadboth(seekf, bot, n);
240
				else {
241
					/*
242
					 * n[F1]>0 && n[F2]>0 &&
243
					 * (less? cmp>0 :cmp<0) ||
244
					 * n[seekf==F1? F2: F1]==0
245
					 */
246
					Bseek(f[seekf], top, 0);
247
					bot = top;
248
					n[seekf] = input(seekf);
249
					break;
250
				}
251
			}
252
		}
253
	}
254
}
255
 
256
int
257
input(int n)		/* get input line and split into fields */
258
{
259
	int c, i, len;
260
	char *line;
261
	Rune *bp;
262
	Rune **pp;
263
 
264
	bp = buf[n];
265
	pp = ppi[n];
266
	line = Brdline(f[n], '\n');
267
	if (line == nil)
268
		return(0);
269
	len = Blinelen(f[n]) - 1;
270
	c = line[len];
271
	line[len] = '\0';
272
	strtorune(bp, line);
273
	line[len] = c;			/* restore delimiter */
274
	if (c != '\n')
275
		discard++;
276
 
277
	i = 0;
278
	do {
279
		i++;
280
		if (sep1 == ' ')	/* strip multiples */
281
			while ((c = *bp) == sep1 || c == sep2)
282
				bp++;	/* skip blanks */
283
		*pp++ = bp;		/* record beginning */
284
		while ((c = *bp) != sep1 && c != sep2 && c != '\0')
285
			bp++;
286
		*bp++ = '\0';		/* mark end by overwriting blank */
287
	} while (c != '\0' && i < NFLD-1);
288
 
289
	*pp = 0;
290
	return(i);
291
}
292
 
293
void
294
prfields(int f, int on, int jn)
295
{
296
	int i;
297
	char buf[Bsize];
298
 
299
	for (i = 0; i < on; i++)
300
		if (i != jn)
301
			Bprint(bout, "%s%s", sepstr, runetostr(buf, ppi[f][i]));
302
}
303
 
304
void
305
output(int on1, int on2)	/* print items from olist */
306
{
307
	int i;
308
	Rune *temp;
309
	char buf[Bsize];
310
 
311
	if (no <= 0) {	/* default case */
312
		Bprint(bout, "%s", runetostr(buf, on1? ppi[F1][j1]: ppi[F2][j2]));
313
		prfields(F1, on1, j1);
314
		prfields(F2, on2, j2);
315
		Bputc(bout, '\n');
316
	} else {
317
		for (i = 0; i < no; i++) {
318
			if (olistf[i]==F0 && on1>j1)
319
				temp = ppi[F1][j1];
320
			else if (olistf[i]==F0 && on2>j2)
321
				temp = ppi[F2][j2];
322
			else {
323
				temp = ppi[olistf[i]][olist[i]];
324
				if(olistf[i]==F1 && on1<=olist[i] ||
325
				   olistf[i]==F2 && on2<=olist[i] ||
326
				   *temp==0)
327
					temp = null;
328
			}
329
			Bprint(bout, "%s", runetostr(buf, temp));
330
			if (i == no - 1)
331
				Bputc(bout, '\n');
332
			else
333
				Bprint(bout, "%s", sepstr);
334
		}
335
	}
336
}
337
 
338
char *
339
getoptarg(int *argcp, char ***argvp)
340
{
341
	int argc = *argcp;
342
	char **argv = *argvp;
343
	if(argv[1][2] != 0)
344
		return &argv[1][2];
345
	if(argc<=2 || argv[2][0]=='-')
346
		sysfatal("incomplete option %s", argv[1]);
347
	*argcp = argc-1;
348
	*argvp = ++argv;
349
	return argv[1];
350
}
351
 
352
void
353
oparse(char *s)
354
{
355
	for (no = 0; no<2*NFLD && *s; no++, s++) {
356
		switch(*s) {
357
		case 0:
358
			return;
359
		case '0':
360
			olistf[no] = F0;
361
			break;
362
		case '1':
363
		case '2':
364
			if(s[1] == '.' && isdigit(s[2])) {
365
				olistf[no] = *s=='1'? F1: F2;
366
				olist[no] = atoi(s += 2);
367
				break;
368
			}
369
			/* fall thru */
370
		default:
371
			sysfatal("invalid -o list");
372
		}
373
		if(s[1] == ',')
374
			s++;
375
	}
376
}