Subversion Repositories planix.SVN

Rev

Rev 59 | Details | Compare with Previous | Last modification | View Log | RSS feed

Rev Author Line No. Line
49 7u83 1
/*
2
 * Copyright (c) 1989, 1993
3
 *	The Regents of the University of California.  All rights reserved.
59 7u83 4
 * Copyright (c) 2019
5
 * 	The PLANIX Project. All rights reserved
49 7u83 6
 *
7
 * This code is derived from software contributed to Berkeley by
8
 * Adam S. Moskowitz of Menlo Consulting and Marciano Pitargue.
9
 *
10
 * Redistribution and use in source and binary forms, with or without
11
 * modification, are permitted provided that the following conditions
12
 * are met:
13
 * 1. Redistributions of source code must retain the above copyright
14
 *    notice, this list of conditions and the following disclaimer.
15
 * 2. Redistributions in binary form must reproduce the above copyright
16
 *    notice, this list of conditions and the following disclaimer in the
17
 *    documentation and/or other materials provided with the distribution.
18
 * 3. Neither the name of the University nor the names of its contributors
19
 *    may be used to endorse or promote products derived from this software
20
 *    without specific prior written permission.
21
 *
22
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32
 * SUCH DAMAGE.
33
 */
34
 
35
#include <ctype.h>
36
#include <errno.h>
37
#include <limits.h>
38
#include <locale.h>
39
#include <stdio.h>
40
#include <stdlib.h>
41
#include <string.h>
42
#include <unistd.h>
43
#include <wchar.h>
44
 
59 7u83 45
#include <err.h>
49 7u83 46
 
59 7u83 47
 
49 7u83 48
static int	bflag;
49
static int	cflag;
50
static wchar_t	dchar;
51
static char	dcharmb[MB_LEN_MAX + 1];
52
static int	dflag;
53
static int	fflag;
54
static int	nflag;
55
static int	sflag;
56
static int	wflag;
57
 
58
static size_t	autostart, autostop, maxval;
59
static char *	positions;
60
 
61
static int	b_cut(FILE *, const char *);
62
static int	b_n_cut(FILE *, const char *);
63
static int	c_cut(FILE *, const char *);
64
static int	f_cut(FILE *, const char *);
65
static void	get_list(char *);
66
static int	is_delim(wchar_t);
67
static void	needpos(size_t);
68
static void	usage(void);
69
 
70
int
71
main(int argc, char *argv[])
72
{
73
	FILE *fp;
74
	int (*fcn)(FILE *, const char *);
75
	int ch, rval;
76
	size_t n;
77
 
78
	setlocale(LC_ALL, "");
79
 
80
	fcn = NULL;
81
	dchar = '\t';			/* default delimiter is \t */
82
	strcpy(dcharmb, "\t");
83
 
84
	while ((ch = getopt(argc, argv, "b:c:d:f:snw")) != -1)
85
		switch(ch) {
86
		case 'b':
87
			get_list(optarg);
88
			bflag = 1;
89
			break;
90
		case 'c':
91
			get_list(optarg);
92
			cflag = 1;
93
			break;
94
		case 'd':
59 7u83 95
//			n = mbrtowc(&dchar, optarg, MB_LEN_MAX, NULL);
96
			n = mbtowc(&dchar, optarg, MB_LEN_MAX);
49 7u83 97
			if (dchar == '\0' || n != strlen(optarg))
98
				errx(1, "bad delimiter");
99
			strcpy(dcharmb, optarg);
100
			dflag = 1;
101
			break;
102
		case 'f':
103
			get_list(optarg);
104
			fflag = 1;
105
			break;
106
		case 's':
107
			sflag = 1;
108
			break;
109
		case 'n':
110
			nflag = 1;
111
			break;
112
		case 'w':
113
			wflag = 1;
114
			break;
115
		case '?':
116
		default:
117
			usage();
118
		}
119
	argc -= optind;
120
	argv += optind;
121
 
122
	if (fflag) {
123
		if (bflag || cflag || nflag || (wflag && dflag))
124
			usage();
125
	} else if (!(bflag || cflag) || dflag || sflag || wflag)
126
		usage();
127
	else if (!bflag && nflag)
128
		usage();
129
 
130
	if (fflag)
131
		fcn = f_cut;
132
	else if (cflag)
133
		fcn = MB_CUR_MAX > 1 ? c_cut : b_cut;
134
	else if (bflag)
135
		fcn = nflag && MB_CUR_MAX > 1 ? b_n_cut : b_cut;
136
 
137
	rval = 0;
138
	if (*argv)
139
		for (; *argv; ++argv) {
140
			if (strcmp(*argv, "-") == 0)
141
				rval |= fcn(stdin, "stdin");
142
			else {
143
				if (!(fp = fopen(*argv, "r"))) {
144
					warn("%s", *argv);
145
					rval = 1;
146
					continue;
147
				}
148
				fcn(fp, *argv);
149
				(void)fclose(fp);
150
			}
151
		}
152
	else
153
		rval = fcn(stdin, "stdin");
154
	exit(rval);
59 7u83 155
	return rval;
49 7u83 156
}
157
 
158
static void
159
get_list(char *list)
160
{
161
	size_t setautostart, start, stop;
162
	char *pos;
163
	char *p;
164
 
165
	/*
166
	 * set a byte in the positions array to indicate if a field or
167
	 * column is to be selected; use +1, it's 1-based, not 0-based.
168
	 * Numbers and number ranges may be overlapping, repeated, and in
169
	 * any order. We handle "-3-5" although there's no real reason to.
170
	 */
171
	for (; (p = strsep(&list, ", \t")) != NULL;) {
172
		setautostart = start = stop = 0;
173
		if (*p == '-') {
174
			++p;
175
			setautostart = 1;
176
		}
177
		if (isdigit((unsigned char)*p)) {
178
			start = stop = strtol(p, &p, 10);
179
			if (setautostart && start > autostart)
180
				autostart = start;
181
		}
182
		if (*p == '-') {
183
			if (isdigit((unsigned char)p[1]))
184
				stop = strtol(p + 1, &p, 10);
185
			if (*p == '-') {
186
				++p;
187
				if (!autostop || autostop > stop)
188
					autostop = stop;
189
			}
190
		}
191
		if (*p)
192
			errx(1, "[-bcf] list: illegal list value");
193
		if (!stop || !start)
194
			errx(1, "[-bcf] list: values may not include zero");
195
		if (maxval < stop) {
196
			maxval = stop;
197
			needpos(maxval + 1);
198
		}
199
		for (pos = positions + start; start++ <= stop; *pos++ = 1);
200
	}
201
 
202
	/* overlapping ranges */
203
	if (autostop && maxval > autostop) {
204
		maxval = autostop;
205
		needpos(maxval + 1);
206
	}
207
 
208
	/* reversed range with autostart */
209
	if (maxval < autostart) {
210
		maxval = autostart;
211
		needpos(maxval + 1);
212
	}
213
 
214
	/* set autostart */
215
	if (autostart)
216
		memset(positions + 1, '1', autostart);
217
}
218
 
219
static void
220
needpos(size_t n)
221
{
222
	static size_t npos;
223
	size_t oldnpos;
224
 
225
	/* Grow the positions array to at least the specified size. */
226
	if (n > npos) {
227
		oldnpos = npos;
228
		if (npos == 0)
229
			npos = n;
230
		while (n > npos)
231
			npos *= 2;
232
		if ((positions = realloc(positions, npos)) == NULL)
233
			err(1, "realloc");
234
		memset((char *)positions + oldnpos, 0, npos - oldnpos);
235
	}
236
}
237
 
238
static int
59 7u83 239
b_cut(FILE *fp, const char *fname )
49 7u83 240
{
241
	int ch, col;
242
	char *pos;
243
 
244
	ch = 0;
245
	for (;;) {
246
		pos = positions + 1;
247
		for (col = maxval; col; --col) {
248
			if ((ch = getc(fp)) == EOF)
249
				return (0);
250
			if (ch == '\n')
251
				break;
252
			if (*pos++)
253
				(void)putchar(ch);
254
		}
255
		if (ch != '\n') {
256
			if (autostop)
257
				while ((ch = getc(fp)) != EOF && ch != '\n')
258
					(void)putchar(ch);
259
			else
260
				while ((ch = getc(fp)) != EOF && ch != '\n');
261
		}
262
		(void)putchar('\n');
263
	}
264
	return (0);
265
}
266
 
267
/*
268
 * Cut based on byte positions, taking care not to split multibyte characters.
269
 * Although this function also handles the case where -n is not specified,
270
 * b_cut() ought to be much faster.
271
 */
272
static int
273
b_n_cut(FILE *fp, const char *fname)
274
{
275
	size_t col, i, lbuflen;
276
	char *lbuf;
277
	int canwrite, clen, warned;
278
	mbstate_t mbs;
279
 
280
	memset(&mbs, 0, sizeof(mbs));
281
	warned = 0;
59 7u83 282
 
283
	lbuf = NULL;
284
 
285
//	while ((lbuf = fgetln(fp, &lbuflen)) != NULL) {
286
	while ((lbuflen = getline(&lbuf,0, fp)) != -1) {
49 7u83 287
		for (col = 0; lbuflen > 0; col += clen) {
288
			if ((clen = mbrlen(lbuf, lbuflen, &mbs)) < 0) {
289
				if (!warned) {
290
					warn("%s", fname);
291
					warned = 1;
292
				}
293
				memset(&mbs, 0, sizeof(mbs));
294
				clen = 1;
295
			}
296
			if (clen == 0 || *lbuf == '\n')
297
				break;
298
			if (col < maxval && !positions[1 + col]) {
299
				/*
300
				 * Print the character if (1) after an initial
301
				 * segment of un-selected bytes, the rest of
302
				 * it is selected, and (2) the last byte is
303
				 * selected.
304
				 */
305
				i = col;
306
				while (i < col + clen && i < maxval &&
307
				    !positions[1 + i])
308
					i++;
309
				canwrite = i < col + clen;
310
				for (; i < col + clen && i < maxval; i++)
311
					canwrite &= positions[1 + i];
312
				if (canwrite)
313
					fwrite(lbuf, 1, clen, stdout);
314
			} else {
315
				/*
316
				 * Print the character if all of it has
317
				 * been selected.
318
				 */
319
				canwrite = 1;
320
				for (i = col; i < col + clen; i++)
321
					if ((i >= maxval && !autostop) ||
322
					    (i < maxval && !positions[1 + i])) {
323
						canwrite = 0;
324
						break;
325
					}
326
				if (canwrite)
327
					fwrite(lbuf, 1, clen, stdout);
328
			}
329
			lbuf += clen;
330
			lbuflen -= clen;
331
		}
332
		if (lbuflen > 0)
333
			putchar('\n');
334
	}
59 7u83 335
	free(lbuf);
49 7u83 336
	return (warned);
337
}
338
 
339
static int
340
c_cut(FILE *fp, const char *fname)
341
{
342
	wint_t ch;
343
	int col;
344
	char *pos;
345
 
346
	ch = 0;
347
	for (;;) {
348
		pos = positions + 1;
349
		for (col = maxval; col; --col) {
350
			if ((ch = getwc(fp)) == WEOF)
351
				goto out;
352
			if (ch == '\n')
353
				break;
354
			if (*pos++)
355
				(void)putwchar(ch);
356
		}
357
		if (ch != '\n') {
358
			if (autostop)
359
				while ((ch = getwc(fp)) != WEOF && ch != '\n')
360
					(void)putwchar(ch);
361
			else
362
				while ((ch = getwc(fp)) != WEOF && ch != '\n');
363
		}
364
		(void)putwchar('\n');
365
	}
366
out:
59 7u83 367
 
49 7u83 368
	if (ferror(fp)) {
369
		warn("%s", fname);
370
		return (1);
371
	}
372
	return (0);
373
}
374
 
375
static int
376
is_delim(wchar_t ch)
377
{
378
	if (wflag) {
379
		if (ch == ' ' || ch == '\t')
380
			return 1;
381
	} else {
382
		if (ch == dchar)
383
			return 1;
384
	}
385
	return 0;
386
}
387
 
388
static int
389
f_cut(FILE *fp, const char *fname)
390
{
391
	wchar_t ch;
392
	int field, i, isdelim;
393
	char *pos, *p;
394
	int output;
395
	char *lbuf, *mlbuf;
396
	size_t clen, lbuflen, reallen;
397
 
398
	mlbuf = NULL;
59 7u83 399
 
400
	lbuf = NULL;
401
 
402
 
403
	//while ((lbuf = fgetln(fp, &lbuflen)) != NULL) 
404
	while ((lbuflen = getline(&lbuf,0, fp)) != -1) 
405
	{
49 7u83 406
		reallen = lbuflen;
407
		/* Assert EOL has a newline. */
408
		if (*(lbuf + lbuflen - 1) != '\n') {
409
			/* Can't have > 1 line with no trailing newline. */
410
			mlbuf = malloc(lbuflen + 1);
411
			if (mlbuf == NULL)
412
				err(1, "malloc");
413
			memcpy(mlbuf, lbuf, lbuflen);
414
			*(mlbuf + lbuflen) = '\n';
415
			lbuf = mlbuf;
416
			reallen++;
417
		}
418
		output = 0;
419
		for (isdelim = 0, p = lbuf;; p += clen) {
59 7u83 420
//			clen = mbrtowc(&ch, p, lbuf + reallen - p, NULL);
421
			clen = mbtowc(&ch, p, lbuf + reallen - p);
49 7u83 422
			if (clen == (size_t)-1 || clen == (size_t)-2) {
59 7u83 423
//				warnc(EILSEQ, "%s", fname);
424
				free(lbuf);
49 7u83 425
				free(mlbuf);
426
				return (1);
427
			}
428
			if (clen == 0)
429
				clen = 1;
430
			/* this should work if newline is delimiter */
431
			if (is_delim(ch))
432
				isdelim = 1;
433
			if (ch == '\n') {
434
				if (!isdelim && !sflag)
435
					(void)fwrite(lbuf, lbuflen, 1, stdout);
436
				break;
437
			}
438
		}
439
		if (!isdelim)
440
			continue;
441
 
442
		pos = positions + 1;
443
		for (field = maxval, p = lbuf; field; --field, ++pos) {
444
			if (*pos && output++)
445
				for (i = 0; dcharmb[i] != '\0'; i++)
446
					putchar(dcharmb[i]);
447
			for (;;) {
59 7u83 448
//				clen = mbrtowc(&ch, p, lbuf + reallen - p,
449
//				    NULL);
450
				clen = mbtowc(&ch, p, lbuf + reallen - p);
49 7u83 451
				if (clen == (size_t)-1 || clen == (size_t)-2) {
59 7u83 452
//					warnc(EILSEQ, "%s", fname);
453
					free(lbuf);
49 7u83 454
					free(mlbuf);
455
					return (1);
456
				}
457
				if (clen == 0)
458
					clen = 1;
459
				p += clen;
460
				if (ch == '\n' || is_delim(ch)) {
461
					/* compress whitespace */
462
					if (wflag && ch != '\n')
463
						while (is_delim(*p))
464
							p++;
465
					break;
466
				}
467
				if (*pos)
468
					for (i = 0; i < (int)clen; i++)
469
						putchar(p[i - clen]);
470
			}
471
			if (ch == '\n')
472
				break;
473
		}
474
		if (ch != '\n') {
475
			if (autostop) {
476
				if (output)
477
					for (i = 0; dcharmb[i] != '\0'; i++)
478
						putchar(dcharmb[i]);
479
				for (; (ch = *p) != '\n'; ++p)
480
					(void)putchar(ch);
481
			} else
482
				for (; (ch = *p) != '\n'; ++p);
483
		}
484
		(void)putchar('\n');
485
	}
486
	free(mlbuf);
59 7u83 487
	free(lbuf);
488
 
49 7u83 489
	return (0);
490
}
491
 
492
static void
493
usage(void)
494
{
495
	(void)fprintf(stderr, "%s\n%s\n%s\n",
496
		"usage: cut -b list [-n] [file ...]",
497
		"       cut -c list [file ...]",
498
		"       cut -f list [-s] [-w | -d delim] [file ...]");
499
	exit(1);
500
}