WebSVN – planix.SVN – Blame – /os/branches/feature-vt/sys/src/ape/cmd/diff/analyze.c

Rev	Author	Line No.	Line
2	-	1	`/* Analyze file differences for GNU DIFF.`
		2	`Copyright (C) 1988, 1989, 1992, 1993 Free Software Foundation, Inc.`
		3
		4	`This file is part of GNU DIFF.`
		5
		6	`GNU DIFF is free software; you can redistribute it and/or modify`
		7	`it under the terms of the GNU General Public License as published by`
		8	`the Free Software Foundation; either version 2, or (at your option)`
		9	`any later version.`
		10
		11	`GNU DIFF is distributed in the hope that it will be useful,`
		12	`but WITHOUT ANY WARRANTY; without even the implied warranty of`
		13	`MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the`
		14	`GNU General Public License for more details.`
		15
		16	`You should have received a copy of the GNU General Public License`
		17	`along with GNU DIFF; see the file COPYING. If not, write to`
		18	`the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. */`
		19
		20	`/* The basic algorithm is described in:`
		21	`"An O(ND) Difference Algorithm and its Variations", Eugene Myers,`
		22	`Algorithmica Vol. 1 No. 2, 1986, pp. 251-266;`
		23	`see especially section 4.2, which describes the variation used below.`
		24	`Unless the --minimal option is specified, this code uses the TOO_EXPENSIVE`
		25	`heuristic, by Paul Eggert, to limit the cost to O(N**1.5 log N)`
		26	`at the price of producing suboptimal output for large inputs with`
		27	`many differences.`
		28
		29	`The basic algorithm was independently discovered as described in:`
		30	`"Algorithms for Approximate String Matching", E. Ukkonen,`
		31	`Information and Control Vol. 64, 1985, pp. 100-118. */`
		32
		33	`#include "diff.h"`
		34	`#include "cmpbuf.h"`
		35
		36	`extern int no_discards;`
		37
		38	`static int xvec, yvec; /* Vectors being compared. */`
		39	`static int fdiag; / Vector, indexed by diagonal, containing`
		40	`1 + the X coordinate of the point furthest`
		41	`along the given diagonal in the forward`
		42	`search of the edit matrix. */`
		43	`static int bdiag; / Vector, indexed by diagonal, containing`
		44	`the X coordinate of the point furthest`
		45	`along the given diagonal in the backward`
		46	`search of the edit matrix. */`
		47	`static int too_expensive; /* Edit scripts longer than this are too`
		48	`expensive to compute. */`
		49
		50	#define SNAKE_LIMIT 20 /* Snakes bigger than this are considered `big'. */
		51
		52	`struct partition`
		53	`{`
		54	`int xmid, ymid; /* Midpoints of this partition. */`
		55	`int lo_minimal; /* Nonzero if low half will be analyzed minimally. */`
		56	`int hi_minimal; /* Likewise for high half. */`
		57	`};`
		58
		59	`static int diag PARAMS((int, int, int, int, int, struct partition *));`
		60	`static struct change add_change PARAMS((int, int, int, int, struct change ));`
		61	`static struct change *build_reverse_script PARAMS((struct file_data const[]));`
		62	`static struct change *build_script PARAMS((struct file_data const[]));`
		63	`static void briefly_report PARAMS((int, struct file_data const[]));`
		64	`static void compareseq PARAMS((int, int, int, int, int));`
		65	`static void discard_confusing_lines PARAMS((struct file_data[]));`
		66	`static void shift_boundaries PARAMS((struct file_data[]));`
		67
		68	`/* Find the midpoint of the shortest edit script for a specified`
		69	`portion of the two files.`
		70
		71	`Scan from the beginnings of the files, and simultaneously from the ends,`
		72	`doing a breadth-first search through the space of edit-sequence.`
		73	`When the two searches meet, we have found the midpoint of the shortest`
		74	`edit sequence.`
		75
		76	`If MINIMAL is nonzero, find the minimal edit script regardless`
		77	`of expense. Otherwise, if the search is too expensive, use`
		78	`heuristics to stop the search and report a suboptimal answer.`
		79
		80	`Set PART->(XMID,YMID) to the midpoint (XMID,YMID). The diagonal number`
		81	`XMID - YMID equals the number of inserted lines minus the number`
		82	`of deleted lines (counting only lines before the midpoint).`
		83	`Return the approximate edit cost; this is the total number of`
		84	`lines inserted or deleted (counting only lines before the midpoint),`
		85	`unless a heuristic is used to terminate the search prematurely.`
		86
		87	`Set PART->LEFT_MINIMAL to nonzero iff the minimal edit script for the`
		88	`left half of the partition is known; similarly for PART->RIGHT_MINIMAL.`
		89
		90	`This function assumes that the first lines of the specified portions`
		91	`of the two files do not match, and likewise that the last lines do not`
		92	`match. The caller must trim matching lines from the beginning and end`
		93	`of the portions it is going to specify.`
		94
		95	`If we return the "wrong" partitions,`
		96	`the worst this can do is cause suboptimal diff output.`
		97	`It cannot cause incorrect diff output. */`
		98
		99	`static int`
		100	`diag (xoff, xlim, yoff, ylim, minimal, part)`
		101	`int xoff, xlim, yoff, ylim, minimal;`
		102	`struct partition *part;`
		103	`{`
		104	`int const fd = fdiag; / Give the compiler a chance. */`
		105	`int const bd = bdiag; / Additional help for the compiler. */`
		106	`int const const xv = xvec; / Still more help for the compiler. */`
		107	`int const const yv = yvec; / And more and more . . . */`
		108	`int const dmin = xoff - ylim; /* Minimum valid diagonal. */`
		109	`int const dmax = xlim - yoff; /* Maximum valid diagonal. */`
		110	`int const fmid = xoff - yoff; /* Center diagonal of top-down search. */`
		111	`int const bmid = xlim - ylim; /* Center diagonal of bottom-up search. */`
		112	`int fmin = fmid, fmax = fmid; /* Limits of top-down search. */`
		113	`int bmin = bmid, bmax = bmid; /* Limits of bottom-up search. */`
		114	`int c; /* Cost. */`
		115	`int odd = (fmid - bmid) & 1; /* True if southeast corner is on an odd`
		116	`diagonal with respect to the northwest. */`
		117
		118	`fd[fmid] = xoff;`
		119	`bd[bmid] = xlim;`
		120
		121	`for (c = 1;; ++c)`
		122	`{`
		123	`int d; /* Active diagonal. */`
		124	`int big_snake = 0;`
		125
		126	`/* Extend the top-down search by an edit step in each diagonal. */`
		127	`fmin > dmin ? fd[--fmin - 1] = -1 : ++fmin;`
		128	`fmax < dmax ? fd[++fmax + 1] = -1 : --fmax;`
		129	`for (d = fmax; d >= fmin; d -= 2)`
		130	`{`
		131	`int x, y, oldx, tlo = fd[d - 1], thi = fd[d + 1];`
		132
		133	`if (tlo >= thi)`
		134	`x = tlo + 1;`
		135	`else`
		136	`x = thi;`
		137	`oldx = x;`
		138	`y = x - d;`
		139	`while (x < xlim && y < ylim && xv[x] == yv[y])`
		140	`++x, ++y;`
		141	`if (x - oldx > SNAKE_LIMIT)`
		142	`big_snake = 1;`
		143	`fd[d] = x;`
		144	`if (odd && bmin <= d && d <= bmax && bd[d] <= x)`
		145	`{`
		146	`part->xmid = x;`
		147	`part->ymid = y;`
		148	`part->lo_minimal = part->hi_minimal = 1;`
		149	`return 2 * c - 1;`
		150	`}`
		151	`}`
		152
		153	`/* Similarly extend the bottom-up search. */`
		154	`bmin > dmin ? bd[--bmin - 1] = INT_MAX : ++bmin;`
		155	`bmax < dmax ? bd[++bmax + 1] = INT_MAX : --bmax;`
		156	`for (d = bmax; d >= bmin; d -= 2)`
		157	`{`
		158	`int x, y, oldx, tlo = bd[d - 1], thi = bd[d + 1];`
		159
		160	`if (tlo < thi)`
		161	`x = tlo;`
		162	`else`
		163	`x = thi - 1;`
		164	`oldx = x;`
		165	`y = x - d;`
		166	`while (x > xoff && y > yoff && xv[x - 1] == yv[y - 1])`
		167	`--x, --y;`
		168	`if (oldx - x > SNAKE_LIMIT)`
		169	`big_snake = 1;`
		170	`bd[d] = x;`
		171	`if (!odd && fmin <= d && d <= fmax && x <= fd[d])`
		172	`{`
		173	`part->xmid = x;`
		174	`part->ymid = y;`
		175	`part->lo_minimal = part->hi_minimal = 1;`
		176	`return 2 * c;`
		177	`}`
		178	`}`
		179
		180	`if (minimal)`
		181	`continue;`
		182
		183	`/* Heuristic: check occasionally for a diagonal that has made`
		184	`lots of progress compared with the edit distance.`
		185	`If we have any such, find the one that has made the most`
		186	`progress and return it as if it had succeeded.`
		187
		188	`With this heuristic, for files with a constant small density`
		189	`of changes, the algorithm is linear in the file size. */`
		190
		191	`if (c > 200 && big_snake && heuristic)`
		192	`{`
		193	`int best;`
		194
		195	`best = 0;`
		196	`for (d = fmax; d >= fmin; d -= 2)`
		197	`{`
		198	`int dd = d - fmid;`
		199	`int x = fd[d];`
		200	`int y = x - d;`
		201	`int v = (x - xoff) * 2 - dd;`
		202	`if (v > 12 * (c + (dd < 0 ? -dd : dd)))`
		203	`{`
		204	`if (v > best`
		205	`&& xoff + SNAKE_LIMIT <= x && x < xlim`
		206	`&& yoff + SNAKE_LIMIT <= y && y < ylim)`
		207	`{`
		208	`/* We have a good enough best diagonal;`
		209	`now insist that it end with a significant snake. */`
		210	`int k;`
		211
		212	`for (k = 1; xv[x - k] == yv[y - k]; k++)`
		213	`if (k == SNAKE_LIMIT)`
		214	`{`
		215	`best = v;`
		216	`part->xmid = x;`
		217	`part->ymid = y;`
		218	`break;`
		219	`}`
		220	`}`
		221	`}`
		222	`}`
		223	`if (best > 0)`
		224	`{`
		225	`part->lo_minimal = 1;`
		226	`part->hi_minimal = 0;`
		227	`return 2 * c - 1;`
		228	`}`
		229
		230	`best = 0;`
		231	`for (d = bmax; d >= bmin; d -= 2)`
		232	`{`
		233	`int dd = d - bmid;`
		234	`int x = bd[d];`
		235	`int y = x - d;`
		236	`int v = (xlim - x) * 2 + dd;`
		237	`if (v > 12 * (c + (dd < 0 ? -dd : dd)))`
		238	`{`
		239	`if (v > best`
		240	`&& xoff < x && x <= xlim - SNAKE_LIMIT`
		241	`&& yoff < y && y <= ylim - SNAKE_LIMIT)`
		242	`{`
		243	`/* We have a good enough best diagonal;`
		244	`now insist that it end with a significant snake. */`
		245	`int k;`
		246
		247	`for (k = 0; xv[x + k] == yv[y + k]; k++)`
		248	`if (k == SNAKE_LIMIT - 1)`
		249	`{`
		250	`best = v;`
		251	`part->xmid = x;`
		252	`part->ymid = y;`
		253	`break;`
		254	`}`
		255	`}`
		256	`}`
		257	`}`
		258	`if (best > 0)`
		259	`{`
		260	`part->lo_minimal = 0;`
		261	`part->hi_minimal = 1;`
		262	`return 2 * c - 1;`
		263	`}`
		264	`}`
		265
		266	`/* Heuristic: if we've gone well beyond the call of duty,`
		267	`give up and report halfway between our best results so far. */`
		268	`if (c >= too_expensive)`
		269	`{`
		270	`int fxybest, fxbest;`
		271	`int bxybest, bxbest;`
		272
		273	fxbest = bxbest = 0; /* Pacify `gcc -Wall'. */
		274
		275	`/* Find forward diagonal that maximizes X + Y. */`
		276	`fxybest = -1;`
		277	`for (d = fmax; d >= fmin; d -= 2)`
		278	`{`
		279	`int x = min (fd[d], xlim);`
		280	`int y = x - d;`
		281	`if (ylim < y)`
		282	`x = ylim + d, y = ylim;`
		283	`if (fxybest < x + y)`
		284	`{`
		285	`fxybest = x + y;`
		286	`fxbest = x;`
		287	`}`
		288	`}`
		289
		290	`/* Find backward diagonal that minimizes X + Y. */`
		291	`bxybest = INT_MAX;`
		292	`for (d = bmax; d >= bmin; d -= 2)`
		293	`{`
		294	`int x = max (xoff, bd[d]);`
		295	`int y = x - d;`
		296	`if (y < yoff)`
		297	`x = yoff + d, y = yoff;`
		298	`if (x + y < bxybest)`
		299	`{`
		300	`bxybest = x + y;`
		301	`bxbest = x;`
		302	`}`
		303	`}`
		304
		305	`/* Use the better of the two diagonals. */`
		306	`if ((xlim + ylim) - bxybest < fxybest - (xoff + yoff))`
		307	`{`
		308	`part->xmid = fxbest;`
		309	`part->ymid = fxybest - fxbest;`
		310	`part->lo_minimal = 1;`
		311	`part->hi_minimal = 0;`
		312	`}`
		313	`else`
		314	`{`
		315	`part->xmid = bxbest;`
		316	`part->ymid = bxybest - bxbest;`
		317	`part->lo_minimal = 0;`
		318	`part->hi_minimal = 1;`
		319	`}`
		320	`return 2 * c - 1;`
		321	`}`
		322	`}`
		323	`}`
		324
		325	`/* Compare in detail contiguous subsequences of the two files`
		326	`which are known, as a whole, to match each other.`
		327
		328	`The results are recorded in the vectors files[N].changed_flag, by`
		329	`storing a 1 in the element for each line that is an insertion or deletion.`
		330
		331	`The subsequence of file 0 is [XOFF, XLIM) and likewise for file 1.`
		332
		333	`Note that XLIM, YLIM are exclusive bounds.`
		334	`All line numbers are origin-0 and discarded lines are not counted.`
		335
		336	`If MINIMAL is nonzero, find a minimal difference no matter how`
		337	`expensive it is. */`
		338
		339	`static void`
		340	`compareseq (xoff, xlim, yoff, ylim, minimal)`
		341	`int xoff, xlim, yoff, ylim, minimal;`
		342	`{`
		343	`int * const xv = xvec; /* Help the compiler. */`
		344	`int * const yv = yvec;`
		345
		346	`/* Slide down the bottom initial diagonal. */`
		347	`while (xoff < xlim && yoff < ylim && xv[xoff] == yv[yoff])`
		348	`++xoff, ++yoff;`
		349	`/* Slide up the top initial diagonal. */`
		350	`while (xlim > xoff && ylim > yoff && xv[xlim - 1] == yv[ylim - 1])`
		351	`--xlim, --ylim;`
		352
		353	`/* Handle simple cases. */`
		354	`if (xoff == xlim)`
		355	`while (yoff < ylim)`
		356	`files[1].changed_flag[files[1].realindexes[yoff++]] = 1;`
		357	`else if (yoff == ylim)`
		358	`while (xoff < xlim)`
		359	`files[0].changed_flag[files[0].realindexes[xoff++]] = 1;`
		360	`else`
		361	`{`
		362	`int c;`
		363	`struct partition part;`
		364
		365	`/* Find a point of correspondence in the middle of the files. */`
		366
		367	`c = diag (xoff, xlim, yoff, ylim, minimal, &part);`
		368
		369	`if (c == 1)`
		370	`{`
		371	`/* This should be impossible, because it implies that`
		372	`one of the two subsequences is empty,`
		373	and that case was handled above without calling `diag'.
		374	`Let's verify that this is true. */`
		375	`abort ();`
		376	`#if 0`
		377	`/* The two subsequences differ by a single insert or delete;`
		378	`record it and we are done. */`
		379	`if (part.xmid - part.ymid < xoff - yoff)`
		380	`files[1].changed_flag[files[1].realindexes[part.ymid - 1]] = 1;`
		381	`else`
		382	`files[0].changed_flag[files[0].realindexes[part.xmid]] = 1;`
		383	`#endif`
		384	`}`
		385	`else`
		386	`{`
		387	`/* Use the partitions to split this problem into subproblems. */`
		388	`compareseq (xoff, part.xmid, yoff, part.ymid, part.lo_minimal);`
		389	`compareseq (part.xmid, xlim, part.ymid, ylim, part.hi_minimal);`
		390	`}`
		391	`}`
		392	`}`
		393
		394	`/* Discard lines from one file that have no matches in the other file.`
		395
		396	`A line which is discarded will not be considered by the actual`
		397	`comparison algorithm; it will be as if that line were not in the file.`
		398	The file's `realindexes' table maps virtual line numbers
		399	`(which don't count the discarded lines) into real line numbers;`
		400	`this is how the actual comparison algorithm produces results`
		401	`that are comprehensible when the discarded lines are counted.`
		402
		403	`When we discard a line, we also mark it as a deletion or insertion`
		404	`so that it will be printed in the output. */`
		405
		406	`static void`
		407	`discard_confusing_lines (filevec)`
		408	`struct file_data filevec[];`
		409	`{`
		410	`unsigned int f, i;`
		411	`char *discarded[2];`
		412	`int *equiv_count[2];`
		413	`int *p;`
		414
		415	`/* Allocate our results. */`
		416	`p = (int *) xmalloc ((filevec[0].buffered_lines + filevec[1].buffered_lines)`
		417	`* (2 * sizeof (int)));`
		418	`for (f = 0; f < 2; f++)`
		419	`{`
		420	`filevec[f].undiscarded = p; p += filevec[f].buffered_lines;`
		421	`filevec[f].realindexes = p; p += filevec[f].buffered_lines;`
		422	`}`
		423
		424	`/* Set up equiv_count[F][I] as the number of lines in file F`
		425	`that fall in equivalence class I. */`
		426
		427	`p = (int ) xmalloc (filevec[0].equiv_max (2 * sizeof (int)));`
		428	`equiv_count[0] = p;`
		429	`equiv_count[1] = p + filevec[0].equiv_max;`
		430	`bzero (p, filevec[0].equiv_max * (2 * sizeof (int)));`
		431
		432	`for (i = 0; i < filevec[0].buffered_lines; ++i)`
		433	`++equiv_count[0][filevec[0].equivs[i]];`
		434	`for (i = 0; i < filevec[1].buffered_lines; ++i)`
		435	`++equiv_count[1][filevec[1].equivs[i]];`
		436
		437	`/* Set up tables of which lines are going to be discarded. */`
		438
		439	`discarded[0] = xmalloc (sizeof (char)`
		440	`* (filevec[0].buffered_lines`
		441	`+ filevec[1].buffered_lines));`
		442	`discarded[1] = discarded[0] + filevec[0].buffered_lines;`
		443	`bzero (discarded[0], sizeof (char) * (filevec[0].buffered_lines`
		444	`+ filevec[1].buffered_lines));`
		445
		446	`/* Mark to be discarded each line that matches no line of the other file.`
		447	`If a line matches many lines, mark it as provisionally discardable. */`
		448
		449	`for (f = 0; f < 2; f++)`
		450	`{`
		451	`unsigned int end = filevec[f].buffered_lines;`
		452	`char *discards = discarded[f];`
		453	`int *counts = equiv_count[1 - f];`
		454	`int *equivs = filevec[f].equivs;`
		455	`unsigned int many = 5;`
		456	`unsigned int tem = end / 64;`
		457
		458	`/* Multiply MANY by approximate square root of number of lines.`
		459	`That is the threshold for provisionally discardable lines. */`
		460	`while ((tem = tem >> 2) > 0)`
		461	`many *= 2;`
		462
		463	`for (i = 0; i < end; i++)`
		464	`{`
		465	`int nmatch;`
		466	`if (equivs[i] == 0)`
		467	`continue;`
		468	`nmatch = counts[equivs[i]];`
		469	`if (nmatch == 0)`
		470	`discards[i] = 1;`
		471	`else if (nmatch > many)`
		472	`discards[i] = 2;`
		473	`}`
		474	`}`
		475
		476	`/* Don't really discard the provisional lines except when they occur`
		477	`in a run of discardables, with nonprovisionals at the beginning`
		478	`and end. */`
		479
		480	`for (f = 0; f < 2; f++)`
		481	`{`
		482	`unsigned int end = filevec[f].buffered_lines;`
		483	`register char *discards = discarded[f];`
		484
		485	`for (i = 0; i < end; i++)`
		486	`{`
		487	`/* Cancel provisional discards not in middle of run of discards. */`
		488	`if (discards[i] == 2)`
		489	`discards[i] = 0;`
		490	`else if (discards[i] != 0)`
		491	`{`
		492	`/* We have found a nonprovisional discard. */`
		493	`register int j;`
		494	`unsigned int length;`
		495	`unsigned int provisional = 0;`
		496
		497	`/* Find end of this run of discardable lines.`
		498	`Count how many are provisionally discardable. */`
		499	`for (j = i; j < end; j++)`
		500	`{`
		501	`if (discards[j] == 0)`
		502	`break;`
		503	`if (discards[j] == 2)`
		504	`++provisional;`
		505	`}`
		506
		507	`/* Cancel provisional discards at end, and shrink the run. */`
		508	`while (j > i && discards[j - 1] == 2)`
		509	`discards[--j] = 0, --provisional;`
		510
		511	`/* Now we have the length of a run of discardable lines`
		512	`whose first and last are not provisional. */`
		513	`length = j - i;`
		514
		515	`/* If 1/4 of the lines in the run are provisional,`
		516	`cancel discarding of all provisional lines in the run. */`
		517	`if (provisional * 4 > length)`
		518	`{`
		519	`while (j > i)`
		520	`if (discards[--j] == 2)`
		521	`discards[j] = 0;`
		522	`}`
		523	`else`
		524	`{`
		525	`register unsigned int consec;`
		526	`unsigned int minimum = 1;`
		527	`unsigned int tem = length / 4;`
		528
		529	`/* MINIMUM is approximate square root of LENGTH/4.`
		530	`A subrun of two or more provisionals can stand`
		531	`when LENGTH is at least 16.`
		532	`A subrun of 4 or more can stand when LENGTH >= 64. */`
		533	`while ((tem = tem >> 2) > 0)`
		534	`minimum *= 2;`
		535	`minimum++;`
		536
		537	`/* Cancel any subrun of MINIMUM or more provisionals`
		538	`within the larger run. */`
		539	`for (j = 0, consec = 0; j < length; j++)`
		540	`if (discards[i + j] != 2)`
		541	`consec = 0;`
		542	`else if (minimum == ++consec)`
		543	`/* Back up to start of subrun, to cancel it all. */`
		544	`j -= consec;`
		545	`else if (minimum < consec)`
		546	`discards[i + j] = 0;`
		547
		548	`/* Scan from beginning of run`
		549	`until we find 3 or more nonprovisionals in a row`
		550	`or until the first nonprovisional at least 8 lines in.`
		551	`Until that point, cancel any provisionals. */`
		552	`for (j = 0, consec = 0; j < length; j++)`
		553	`{`
		554	`if (j >= 8 && discards[i + j] == 1)`
		555	`break;`
		556	`if (discards[i + j] == 2)`
		557	`consec = 0, discards[i + j] = 0;`
		558	`else if (discards[i + j] == 0)`
		559	`consec = 0;`
		560	`else`
		561	`consec++;`
		562	`if (consec == 3)`
		563	`break;`
		564	`}`
		565
		566	`/* I advances to the last line of the run. */`
		567	`i += length - 1;`
		568
		569	`/* Same thing, from end. */`
		570	`for (j = 0, consec = 0; j < length; j++)`
		571	`{`
		572	`if (j >= 8 && discards[i - j] == 1)`
		573	`break;`
		574	`if (discards[i - j] == 2)`
		575	`consec = 0, discards[i - j] = 0;`
		576	`else if (discards[i - j] == 0)`
		577	`consec = 0;`
		578	`else`
		579	`consec++;`
		580	`if (consec == 3)`
		581	`break;`
		582	`}`
		583	`}`
		584	`}`
		585	`}`
		586	`}`
		587
		588	`/* Actually discard the lines. */`
		589	`for (f = 0; f < 2; f++)`
		590	`{`
		591	`char *discards = discarded[f];`
		592	`unsigned int end = filevec[f].buffered_lines;`
		593	`unsigned int j = 0;`
		594	`for (i = 0; i < end; ++i)`
		595	`if (no_discards \|\| discards[i] == 0)`
		596	`{`
		597	`filevec[f].undiscarded[j] = filevec[f].equivs[i];`
		598	`filevec[f].realindexes[j++] = i;`
		599	`}`
		600	`else`
		601	`filevec[f].changed_flag[i] = 1;`
		602	`filevec[f].nondiscarded_lines = j;`
		603	`}`
		604
		605	`free (discarded[0]);`
		606	`free (equiv_count[0]);`
		607	`}`
		608
		609	`/* Adjust inserts/deletes of identical lines to join changes`
		610	`as much as possible.`
		611
		612	`We do something when a run of changed lines include a`
		613	`line at one end and have an excluded, identical line at the other.`
		614	`We are free to choose which identical line is included.`
		615	`compareseq' usually chooses the one at the beginning,
		616	`but usually it is cleaner to consider the following identical line`
		617	`to be the "change". */`
		618
		619	`int inhibit;`
		620
		621	`static void`
		622	`shift_boundaries (filevec)`
		623	`struct file_data filevec[];`
		624	`{`
		625	`int f;`
		626
		627	`if (inhibit)`
		628	`return;`
		629
		630	`for (f = 0; f < 2; f++)`
		631	`{`
		632	`char *changed = filevec[f].changed_flag;`
		633	`char const *other_changed = filevec[1-f].changed_flag;`
		634	`int const *equivs = filevec[f].equivs;`
		635	`int i = 0;`
		636	`int j = 0;`
		637	`int i_end = filevec[f].buffered_lines;`
		638
		639	`while (1)`
		640	`{`
		641	`int runlength, start, corresponding;`
		642
		643	`/* Scan forwards to find beginning of another run of changes.`
		644	`Also keep track of the corresponding point in the other file. */`
		645
		646	`while (i < i_end && changed[i] == 0)`
		647	`{`
		648	`while (other_changed[j++])`
		649	`continue;`
		650	`i++;`
		651	`}`
		652
		653	`if (i == i_end)`
		654	`break;`
		655
		656	`start = i;`
		657
		658	`/* Find the end of this run of changes. */`
		659
		660	`while (changed[++i])`
		661	`continue;`
		662	`while (other_changed[j])`
		663	`j++;`
		664
		665	`do`
		666	`{`
		667	`/* Record the length of this run of changes, so that`
		668	`we can later determine whether the run has grown. */`
		669	`runlength = i - start;`
		670
		671	`/* Move the changed region back, so long as the`
		672	`previous unchanged line matches the last changed one.`
		673	`This merges with previous changed regions. */`
		674
		675	`while (start && equivs[start - 1] == equivs[i - 1])`
		676	`{`
		677	`changed[--start] = 1;`
		678	`changed[--i] = 0;`
		679	`while (changed[start - 1])`
		680	`start--;`
		681	`while (other_changed[--j])`
		682	`continue;`
		683	`}`
		684
		685	`/* Set CORRESPONDING to the end of the changed run, at the last`
		686	`point where it corresponds to a changed run in the other file.`
		687	`CORRESPONDING == I_END means no such point has been found. */`
		688	`corresponding = other_changed[j - 1] ? i : i_end;`
		689
		690	`/* Move the changed region forward, so long as the`
		691	`first changed line matches the following unchanged one.`
		692	`This merges with following changed regions.`
		693	`Do this second, so that if there are no merges,`
		694	`the changed region is moved forward as far as possible. */`
		695
		696	`while (i != i_end && equivs[start] == equivs[i])`
		697	`{`
		698	`changed[start++] = 0;`
		699	`changed[i++] = 1;`
		700	`while (changed[i])`
		701	`i++;`
		702	`while (other_changed[++j])`
		703	`corresponding = i;`
		704	`}`
		705	`}`
		706	`while (runlength != i - start);`
		707
		708	`/* If possible, move the fully-merged run of changes`
		709	`back to a corresponding run in the other file. */`
		710
		711	`while (corresponding < i)`
		712	`{`
		713	`changed[--start] = 1;`
		714	`changed[--i] = 0;`
		715	`while (other_changed[--j])`
		716	`continue;`
		717	`}`
		718	`}`
		719	`}`
		720	`}`
		721
		722	`/* Cons an additional entry onto the front of an edit script OLD.`
		723	`LINE0 and LINE1 are the first affected lines in the two files (origin 0).`
		724	`DELETED is the number of lines deleted here from file 0.`
		725	`INSERTED is the number of lines inserted here in file 1.`
		726
		727	`If DELETED is 0 then LINE0 is the number of the line before`
		728	`which the insertion was done; vice versa for INSERTED and LINE1. */`
		729
		730	`static struct change *`
		731	`add_change (line0, line1, deleted, inserted, old)`
		732	`int line0, line1, deleted, inserted;`
		733	`struct change *old;`
		734	`{`
		735	`struct change new = (struct change ) xmalloc (sizeof (struct change));`
		736
		737	`new->line0 = line0;`
		738	`new->line1 = line1;`
		739	`new->inserted = inserted;`
		740	`new->deleted = deleted;`
		741	`new->link = old;`
		742	`return new;`
		743	`}`
		744
		745	`/* Scan the tables of which lines are inserted and deleted,`
		746	`producing an edit script in reverse order. */`
		747
		748	`static struct change *`
		749	`build_reverse_script (filevec)`
		750	`struct file_data const filevec[];`
		751	`{`
		752	`struct change *script = 0;`
		753	`char *changed0 = filevec[0].changed_flag;`
		754	`char *changed1 = filevec[1].changed_flag;`
		755	`int len0 = filevec[0].buffered_lines;`
		756	`int len1 = filevec[1].buffered_lines;`
		757
		758	`/* Note that changedN[len0] does exist, and contains 0. */`
		759
		760	`int i0 = 0, i1 = 0;`
		761
		762	`while (i0 < len0 \|\| i1 < len1)`
		763	`{`
		764	`if (changed0[i0] \|\| changed1[i1])`
		765	`{`
		766	`int line0 = i0, line1 = i1;`
		767
		768	`/* Find # lines changed here in each file. */`
		769	`while (changed0[i0]) ++i0;`
		770	`while (changed1[i1]) ++i1;`
		771
		772	`/* Record this change. */`
		773	`script = add_change (line0, line1, i0 - line0, i1 - line1, script);`
		774	`}`
		775
		776	`/* We have reached lines in the two files that match each other. */`
		777	`i0++, i1++;`
		778	`}`
		779
		780	`return script;`
		781	`}`
		782
		783	`/* Scan the tables of which lines are inserted and deleted,`
		784	`producing an edit script in forward order. */`
		785
		786	`static struct change *`
		787	`build_script (filevec)`
		788	`struct file_data const filevec[];`
		789	`{`
		790	`struct change *script = 0;`
		791	`char *changed0 = filevec[0].changed_flag;`
		792	`char *changed1 = filevec[1].changed_flag;`
		793	`int i0 = filevec[0].buffered_lines, i1 = filevec[1].buffered_lines;`
		794
		795	`/* Note that changedN[-1] does exist, and contains 0. */`
		796
		797	`while (i0 >= 0 \|\| i1 >= 0)`
		798	`{`
		799	`if (changed0[i0 - 1] \|\| changed1[i1 - 1])`
		800	`{`
		801	`int line0 = i0, line1 = i1;`
		802
		803	`/* Find # lines changed here in each file. */`
		804	`while (changed0[i0 - 1]) --i0;`
		805	`while (changed1[i1 - 1]) --i1;`
		806
		807	`/* Record this change. */`
		808	`script = add_change (i0, i1, line0 - i0, line1 - i1, script);`
		809	`}`
		810
		811	`/* We have reached lines in the two files that match each other. */`
		812	`i0--, i1--;`
		813	`}`
		814
		815	`return script;`
		816	`}`
		817
		818	`/* If CHANGES, briefly report that two files differed. */`
		819	`static void`
		820	`briefly_report (changes, filevec)`
		821	`int changes;`
		822	`struct file_data const filevec[];`
		823	`{`
		824	`if (changes)`
		825	`message (no_details_flag ? "Files %s and %s differ\n"`
		826	`: "Binary files %s and %s differ\n",`
		827	`filevec[0].name, filevec[1].name);`
		828	`}`
		829
		830	`/* Report the differences of two files. DEPTH is the current directory`
		831	`depth. */`
		832	`int`
		833	`diff_2_files (filevec, depth)`
		834	`struct file_data filevec[];`
		835	`int depth;`
		836	`{`
		837	`int diags;`
		838	`int i;`
		839	`struct change e, p;`
		840	`struct change *script;`
		841	`int changes;`
		842
		843
		844	`/* If we have detected that either file is binary,`
		845	`compare the two files as binary. This can happen`
		846	`only when the first chunk is read.`
		847	`Also, --brief without any --ignore-* options means`
		848	`we can speed things up by treating the files as binary. */`
		849
		850	`if (read_files (filevec, no_details_flag & ~ignore_some_changes))`
		851	`{`
		852	`/* Files with different lengths must be different. */`
		853	`if (filevec[0].stat.st_size != filevec[1].stat.st_size`
		854	`&& (filevec[0].desc < 0 \|\| S_ISREG (filevec[0].stat.st_mode))`
		855	`&& (filevec[1].desc < 0 \|\| S_ISREG (filevec[1].stat.st_mode)))`
		856	`changes = 1;`
		857
		858	`/* Standard input equals itself. */`
		859	`else if (filevec[0].desc == filevec[1].desc)`
		860	`changes = 0;`
		861
		862	`else`
		863	`/* Scan both files, a buffer at a time, looking for a difference. */`
		864	`{`
		865	`/* Allocate same-sized buffers for both files. */`
		866	`size_t buffer_size = buffer_lcm (STAT_BLOCKSIZE (filevec[0].stat),`
		867	`STAT_BLOCKSIZE (filevec[1].stat));`
		868	`for (i = 0; i < 2; i++)`
		869	`filevec[i].buffer = xrealloc (filevec[i].buffer, buffer_size);`
		870
		871	`for (;; filevec[0].buffered_chars = filevec[1].buffered_chars = 0)`
		872	`{`
		873	`/* Read a buffer's worth from both files. */`
		874	`for (i = 0; i < 2; i++)`
		875	`if (0 <= filevec[i].desc)`
		876	`while (filevec[i].buffered_chars != buffer_size)`
		877	`{`
		878	`int r = read (filevec[i].desc,`
		879	`filevec[i].buffer`
		880	`+ filevec[i].buffered_chars,`
		881	`buffer_size - filevec[i].buffered_chars);`
		882	`if (r == 0)`
		883	`break;`
		884	`if (r < 0)`
		885	`pfatal_with_name (filevec[i].name);`
		886	`filevec[i].buffered_chars += r;`
		887	`}`
		888
		889	`/* If the buffers differ, the files differ. */`
		890	`if (filevec[0].buffered_chars != filevec[1].buffered_chars`
		891	`\|\| (filevec[0].buffered_chars != 0`
		892	`&& memcmp (filevec[0].buffer,`
		893	`filevec[1].buffer,`
		894	`filevec[0].buffered_chars) != 0))`
		895	`{`
		896	`changes = 1;`
		897	`break;`
		898	`}`
		899
		900	`/* If we reach end of file, the files are the same. */`
		901	`if (filevec[0].buffered_chars != buffer_size)`
		902	`{`
		903	`changes = 0;`
		904	`break;`
		905	`}`
		906	`}`
		907	`}`
		908
		909	`briefly_report (changes, filevec);`
		910	`}`
		911	`else`
		912	`{`
		913	`/* Allocate vectors for the results of comparison:`
		914	`a flag for each line of each file, saying whether that line`
		915	`is an insertion or deletion.`
		916	`Allocate an extra element, always zero, at each end of each vector. */`
		917
		918	`size_t s = filevec[0].buffered_lines + filevec[1].buffered_lines + 4;`
		919	`filevec[0].changed_flag = xmalloc (s);`
		920	`bzero (filevec[0].changed_flag, s);`
		921	`filevec[0].changed_flag++;`
		922	`filevec[1].changed_flag = filevec[0].changed_flag`
		923	`+ filevec[0].buffered_lines + 2;`
		924
		925	`/* Some lines are obviously insertions or deletions`
		926	`because they don't match anything. Detect them now, and`
		927	`avoid even thinking about them in the main comparison algorithm. */`
		928
		929	`discard_confusing_lines (filevec);`
		930
		931	`/* Now do the main comparison algorithm, considering just the`
		932	`undiscarded lines. */`
		933
		934	`xvec = filevec[0].undiscarded;`
		935	`yvec = filevec[1].undiscarded;`
		936	`diags = filevec[0].nondiscarded_lines + filevec[1].nondiscarded_lines + 3;`
		937	`fdiag = (int ) xmalloc (diags (2 * sizeof (int)));`
		938	`bdiag = fdiag + diags;`
		939	`fdiag += filevec[1].nondiscarded_lines + 1;`
		940	`bdiag += filevec[1].nondiscarded_lines + 1;`
		941
		942	`/* Set TOO_EXPENSIVE to be approximate square root of input size,`
		943	`bounded below by 256. */`
		944	`too_expensive = 1;`
		945	`for (i = filevec[0].nondiscarded_lines + filevec[1].nondiscarded_lines;`
		946	`i != 0; i >>= 2)`
		947	`too_expensive <<= 1;`
		948	`too_expensive = max (256, too_expensive);`
		949
		950	`files[0] = filevec[0];`
		951	`files[1] = filevec[1];`
		952
		953	`compareseq (0, filevec[0].nondiscarded_lines,`
		954	`0, filevec[1].nondiscarded_lines, no_discards);`
		955
		956	`free (fdiag - (filevec[1].nondiscarded_lines + 1));`
		957
		958	`/* Modify the results slightly to make them prettier`
		959	`in cases where that can validly be done. */`
		960
		961	`shift_boundaries (filevec);`
		962
		963	`/* Get the results of comparison in the form of a chain`
		964	of `struct change's -- an edit script. */
		965
		966	`if (output_style == OUTPUT_ED)`
		967	`script = build_reverse_script (filevec);`
		968	`else`
		969	`script = build_script (filevec);`
		970
		971	`/* Set CHANGES if we had any diffs.`
		972	`If some changes are ignored, we must scan the script to decide. */`
		973	`if (ignore_blank_lines_flag \|\| ignore_regexp_list)`
		974	`{`
		975	`struct change *next = script;`
		976	`changes = 0;`
		977
		978	`while (next && changes == 0)`
		979	`{`
		980	`struct change this, end;`
		981	`int first0, last0, first1, last1, deletes, inserts;`
		982
		983	`/* Find a set of changes that belong together. */`
		984	`this = next;`
		985	`end = find_change (next);`
		986
		987	`/* Disconnect them from the rest of the changes, making them`
		988	`a hunk, and remember the rest for next iteration. */`
		989	`next = end->link;`
		990	`end->link = 0;`
		991
		992	`/* Determine whether this hunk is really a difference. */`
		993	`analyze_hunk (this, &first0, &last0, &first1, &last1,`
		994	`&deletes, &inserts);`
		995
		996	`/* Reconnect the script so it will all be freed properly. */`
		997	`end->link = next;`
		998
		999	`if (deletes \|\| inserts)`
		1000	`changes = 1;`
		1001	`}`
		1002	`}`
		1003	`else`
		1004	`changes = (script != 0);`
		1005
		1006	`if (no_details_flag)`
		1007	`briefly_report (changes, filevec);`
		1008	`else`
		1009	`{`
		1010	`if (changes \|\| ! no_diff_means_no_output)`
		1011	`{`
		1012	`/* Record info for starting up output,`
		1013	`to be used if and when we have some output to print. */`
		1014	`setup_output (files[0].name, files[1].name, depth);`
		1015
		1016	`switch (output_style)`
		1017	`{`
		1018	`case OUTPUT_CONTEXT:`
		1019	`print_context_script (script, 0);`
		1020	`break;`
		1021
		1022	`case OUTPUT_UNIFIED:`
		1023	`print_context_script (script, 1);`
		1024	`break;`
		1025
		1026	`case OUTPUT_ED:`
		1027	`print_ed_script (script);`
		1028	`break;`
		1029
		1030	`case OUTPUT_FORWARD_ED:`
		1031	`pr_forward_ed_script (script);`
		1032	`break;`
		1033
		1034	`case OUTPUT_RCS:`
		1035	`print_rcs_script (script);`
		1036	`break;`
		1037
		1038	`case OUTPUT_NORMAL:`
		1039	`print_normal_script (script);`
		1040	`break;`
		1041
		1042	`case OUTPUT_IFDEF:`
		1043	`print_ifdef_script (script);`
		1044	`break;`
		1045
		1046	`case OUTPUT_SDIFF:`
		1047	`print_sdiff_script (script);`
		1048	`}`
		1049
		1050	`finish_output ();`
		1051	`}`
		1052	`}`
		1053
		1054	`free (filevec[0].undiscarded);`
		1055
		1056	`free (filevec[0].changed_flag - 1);`
		1057
		1058	`for (i = 1; i >= 0; --i)`
		1059	`free (filevec[i].equivs);`
		1060
		1061	`for (i = 0; i < 2; ++i)`
		1062	`free (filevec[i].linbuf + filevec[i].linbuf_base);`
		1063
		1064	`for (e = script; e; e = p)`
		1065	`{`
		1066	`p = e->link;`
		1067	`free (e);`
		1068	`}`
		1069
		1070	`if (! ROBUST_OUTPUT_STYLE (output_style))`
		1071	`for (i = 0; i < 2; ++i)`
		1072	`if (filevec[i].missing_newline)`
		1073	`{`
		1074	`error ("No newline at end of file %s", filevec[i].name, "");`
		1075	`changes = 2;`
		1076	`}`
		1077	`}`
		1078
		1079	`if (filevec[0].buffer != filevec[1].buffer)`
		1080	`free (filevec[0].buffer);`
		1081	`free (filevec[1].buffer);`
		1082
		1083	`return changes;`
		1084	`}`

Subversion Repositories planix.SVN

(root)/os/branches/feature-vt/sys/src/ape/cmd/diff/analyze.c – Rev 98