Subversion Repositories planix.SVN

Rev

Rev 2 | Details | Compare with Previous | Last modification | View Log | RSS feed

Rev Author Line No. Line
2 - 1
/* Analyze file differences for GNU DIFF.
2
   Copyright (C) 1988, 1989, 1992, 1993 Free Software Foundation, Inc.
3
 
4
This file is part of GNU DIFF.
5
 
6
GNU DIFF is free software; you can redistribute it and/or modify
7
it under the terms of the GNU General Public License as published by
8
the Free Software Foundation; either version 2, or (at your option)
9
any later version.
10
 
11
GNU DIFF is distributed in the hope that it will be useful,
12
but WITHOUT ANY WARRANTY; without even the implied warranty of
13
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14
GNU General Public License for more details.
15
 
16
You should have received a copy of the GNU General Public License
17
along with GNU DIFF; see the file COPYING.  If not, write to
18
the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.  */
19
 
20
/* The basic algorithm is described in:
21
   "An O(ND) Difference Algorithm and its Variations", Eugene Myers,
22
   Algorithmica Vol. 1 No. 2, 1986, pp. 251-266;
23
   see especially section 4.2, which describes the variation used below.
24
   Unless the --minimal option is specified, this code uses the TOO_EXPENSIVE
25
   heuristic, by Paul Eggert, to limit the cost to O(N**1.5 log N)
26
   at the price of producing suboptimal output for large inputs with
27
   many differences.
28
 
29
   The basic algorithm was independently discovered as described in:
30
   "Algorithms for Approximate String Matching", E. Ukkonen,
31
   Information and Control Vol. 64, 1985, pp. 100-118.  */
32
 
33
#include "diff.h"
34
#include "cmpbuf.h"
35
 
36
extern int no_discards;
37
 
38
static int *xvec, *yvec;	/* Vectors being compared. */
39
static int *fdiag;		/* Vector, indexed by diagonal, containing
40
				   1 + the X coordinate of the point furthest
41
				   along the given diagonal in the forward
42
				   search of the edit matrix. */
43
static int *bdiag;		/* Vector, indexed by diagonal, containing
44
				   the X coordinate of the point furthest
45
				   along the given diagonal in the backward
46
				   search of the edit matrix. */
47
static int too_expensive;	/* Edit scripts longer than this are too
48
				   expensive to compute.  */
49
 
50
#define SNAKE_LIMIT 20	/* Snakes bigger than this are considered `big'.  */
51
 
52
struct partition
53
{
54
  int xmid, ymid;	/* Midpoints of this partition.  */
55
  int lo_minimal;	/* Nonzero if low half will be analyzed minimally.  */
56
  int hi_minimal;	/* Likewise for high half.  */
57
};
58
 
59
static int diag PARAMS((int, int, int, int, int, struct partition *));
60
static struct change *add_change PARAMS((int, int, int, int, struct change *));
61
static struct change *build_reverse_script PARAMS((struct file_data const[]));
62
static struct change *build_script PARAMS((struct file_data const[]));
63
static void briefly_report PARAMS((int, struct file_data const[]));
64
static void compareseq PARAMS((int, int, int, int, int));
65
static void discard_confusing_lines PARAMS((struct file_data[]));
66
static void shift_boundaries PARAMS((struct file_data[]));
67
 
68
/* Find the midpoint of the shortest edit script for a specified
69
   portion of the two files.
70
 
71
   Scan from the beginnings of the files, and simultaneously from the ends,
72
   doing a breadth-first search through the space of edit-sequence.
73
   When the two searches meet, we have found the midpoint of the shortest
74
   edit sequence.
75
 
76
   If MINIMAL is nonzero, find the minimal edit script regardless
77
   of expense.  Otherwise, if the search is too expensive, use
78
   heuristics to stop the search and report a suboptimal answer.
79
 
80
   Set PART->(XMID,YMID) to the midpoint (XMID,YMID).  The diagonal number
81
   XMID - YMID equals the number of inserted lines minus the number
82
   of deleted lines (counting only lines before the midpoint).
83
   Return the approximate edit cost; this is the total number of
84
   lines inserted or deleted (counting only lines before the midpoint),
85
   unless a heuristic is used to terminate the search prematurely.
86
 
87
   Set PART->LEFT_MINIMAL to nonzero iff the minimal edit script for the
88
   left half of the partition is known; similarly for PART->RIGHT_MINIMAL.
89
 
90
   This function assumes that the first lines of the specified portions
91
   of the two files do not match, and likewise that the last lines do not
92
   match.  The caller must trim matching lines from the beginning and end
93
   of the portions it is going to specify.
94
 
95
   If we return the "wrong" partitions,
96
   the worst this can do is cause suboptimal diff output.
97
   It cannot cause incorrect diff output.  */
98
 
99
static int
100
diag (xoff, xlim, yoff, ylim, minimal, part)
101
     int xoff, xlim, yoff, ylim, minimal;
102
     struct partition *part;
103
{
104
  int *const fd = fdiag;	/* Give the compiler a chance. */
105
  int *const bd = bdiag;	/* Additional help for the compiler. */
106
  int const *const xv = xvec;	/* Still more help for the compiler. */
107
  int const *const yv = yvec;	/* And more and more . . . */
108
  int const dmin = xoff - ylim;	/* Minimum valid diagonal. */
109
  int const dmax = xlim - yoff;	/* Maximum valid diagonal. */
110
  int const fmid = xoff - yoff;	/* Center diagonal of top-down search. */
111
  int const bmid = xlim - ylim;	/* Center diagonal of bottom-up search. */
112
  int fmin = fmid, fmax = fmid;	/* Limits of top-down search. */
113
  int bmin = bmid, bmax = bmid;	/* Limits of bottom-up search. */
114
  int c;			/* Cost. */
115
  int odd = (fmid - bmid) & 1;	/* True if southeast corner is on an odd
116
				   diagonal with respect to the northwest. */
117
 
118
  fd[fmid] = xoff;
119
  bd[bmid] = xlim;
120
 
121
  for (c = 1;; ++c)
122
    {
123
      int d;			/* Active diagonal. */
124
      int big_snake = 0;
125
 
126
      /* Extend the top-down search by an edit step in each diagonal. */
127
      fmin > dmin ? fd[--fmin - 1] = -1 : ++fmin;
128
      fmax < dmax ? fd[++fmax + 1] = -1 : --fmax;
129
      for (d = fmax; d >= fmin; d -= 2)
130
	{
131
	  int x, y, oldx, tlo = fd[d - 1], thi = fd[d + 1];
132
 
133
	  if (tlo >= thi)
134
	    x = tlo + 1;
135
	  else
136
	    x = thi;
137
	  oldx = x;
138
	  y = x - d;
139
	  while (x < xlim && y < ylim && xv[x] == yv[y])
140
	    ++x, ++y;
141
	  if (x - oldx > SNAKE_LIMIT)
142
	    big_snake = 1;
143
	  fd[d] = x;
144
	  if (odd && bmin <= d && d <= bmax && bd[d] <= x)
145
	    {
146
	      part->xmid = x;
147
	      part->ymid = y;
148
	      part->lo_minimal = part->hi_minimal = 1;
149
	      return 2 * c - 1;
150
	    }
151
	}
152
 
153
      /* Similarly extend the bottom-up search.  */
154
      bmin > dmin ? bd[--bmin - 1] = INT_MAX : ++bmin;
155
      bmax < dmax ? bd[++bmax + 1] = INT_MAX : --bmax;
156
      for (d = bmax; d >= bmin; d -= 2)
157
	{
158
	  int x, y, oldx, tlo = bd[d - 1], thi = bd[d + 1];
159
 
160
	  if (tlo < thi)
161
	    x = tlo;
162
	  else
163
	    x = thi - 1;
164
	  oldx = x;
165
	  y = x - d;
166
	  while (x > xoff && y > yoff && xv[x - 1] == yv[y - 1])
167
	    --x, --y;
168
	  if (oldx - x > SNAKE_LIMIT)
169
	    big_snake = 1;
170
	  bd[d] = x;
171
	  if (!odd && fmin <= d && d <= fmax && x <= fd[d])
172
	    {
173
	      part->xmid = x;
174
	      part->ymid = y;
175
	      part->lo_minimal = part->hi_minimal = 1;
176
	      return 2 * c;
177
	    }
178
	}
179
 
180
      if (minimal)
181
	continue;
182
 
183
      /* Heuristic: check occasionally for a diagonal that has made
184
	 lots of progress compared with the edit distance.
185
	 If we have any such, find the one that has made the most
186
	 progress and return it as if it had succeeded.
187
 
188
	 With this heuristic, for files with a constant small density
189
	 of changes, the algorithm is linear in the file size.  */
190
 
191
      if (c > 200 && big_snake && heuristic)
192
	{
193
	  int best;
194
 
195
	  best = 0;
196
	  for (d = fmax; d >= fmin; d -= 2)
197
	    {
198
	      int dd = d - fmid;
199
	      int x = fd[d];
200
	      int y = x - d;
201
	      int v = (x - xoff) * 2 - dd;
202
	      if (v > 12 * (c + (dd < 0 ? -dd : dd)))
203
		{
204
		  if (v > best
205
		      && xoff + SNAKE_LIMIT <= x && x < xlim
206
		      && yoff + SNAKE_LIMIT <= y && y < ylim)
207
		    {
208
		      /* We have a good enough best diagonal;
209
			 now insist that it end with a significant snake.  */
210
		      int k;
211
 
212
		      for (k = 1; xv[x - k] == yv[y - k]; k++)
213
			if (k == SNAKE_LIMIT)
214
			  {
215
			    best = v;
216
			    part->xmid = x;
217
			    part->ymid = y;
218
			    break;
219
			  }
220
		    }
221
		}
222
	    }
223
	  if (best > 0)
224
	    {
225
	      part->lo_minimal = 1;
226
	      part->hi_minimal = 0;
227
	      return 2 * c - 1;
228
	    }
229
 
230
	  best = 0;
231
	  for (d = bmax; d >= bmin; d -= 2)
232
	    {
233
	      int dd = d - bmid;
234
	      int x = bd[d];
235
	      int y = x - d;
236
	      int v = (xlim - x) * 2 + dd;
237
	      if (v > 12 * (c + (dd < 0 ? -dd : dd)))
238
		{
239
		  if (v > best
240
		      && xoff < x && x <= xlim - SNAKE_LIMIT
241
		      && yoff < y && y <= ylim - SNAKE_LIMIT)
242
		    {
243
		      /* We have a good enough best diagonal;
244
			 now insist that it end with a significant snake.  */
245
		      int k;
246
 
247
		      for (k = 0; xv[x + k] == yv[y + k]; k++)
248
			if (k == SNAKE_LIMIT - 1)
249
			  {
250
			    best = v;
251
			    part->xmid = x;
252
			    part->ymid = y;
253
			    break;
254
			  }
255
		    }
256
		}
257
	    }
258
	  if (best > 0)
259
	    {
260
	      part->lo_minimal = 0;
261
	      part->hi_minimal = 1;
262
	      return 2 * c - 1;
263
	    }
264
	}
265
 
266
      /* Heuristic: if we've gone well beyond the call of duty,
267
	 give up and report halfway between our best results so far.  */
268
      if (c >= too_expensive)
269
	{
270
	  int fxybest, fxbest;
271
	  int bxybest, bxbest;
272
 
273
	  fxbest = bxbest = 0;  /* Pacify `gcc -Wall'.  */
274
 
275
	  /* Find forward diagonal that maximizes X + Y.  */
276
	  fxybest = -1;
277
	  for (d = fmax; d >= fmin; d -= 2)
278
	    {
279
	      int x = min (fd[d], xlim);
280
	      int y = x - d;
281
	      if (ylim < y)
282
		x = ylim + d, y = ylim;
283
	      if (fxybest < x + y)
284
		{
285
		  fxybest = x + y;
286
		  fxbest = x;
287
		}
288
	    }
289
 
290
	  /* Find backward diagonal that minimizes X + Y.  */
291
	  bxybest = INT_MAX;
292
	  for (d = bmax; d >= bmin; d -= 2)
293
	    {
294
	      int x = max (xoff, bd[d]);
295
	      int y = x - d;
296
	      if (y < yoff)
297
		x = yoff + d, y = yoff;
298
	      if (x + y < bxybest)
299
		{
300
		  bxybest = x + y;
301
		  bxbest = x;
302
		}
303
	    }
304
 
305
	  /* Use the better of the two diagonals.  */
306
	  if ((xlim + ylim) - bxybest < fxybest - (xoff + yoff))
307
	    {
308
	      part->xmid = fxbest;
309
	      part->ymid = fxybest - fxbest;
310
	      part->lo_minimal = 1;
311
	      part->hi_minimal = 0;
312
	    }
313
	  else
314
	    {
315
	      part->xmid = bxbest;
316
	      part->ymid = bxybest - bxbest;
317
	      part->lo_minimal = 0;
318
	      part->hi_minimal = 1;
319
	    }
320
	  return 2 * c - 1;
321
	}
322
    }
323
}
324
 
325
/* Compare in detail contiguous subsequences of the two files
326
   which are known, as a whole, to match each other.
327
 
328
   The results are recorded in the vectors files[N].changed_flag, by
329
   storing a 1 in the element for each line that is an insertion or deletion.
330
 
331
   The subsequence of file 0 is [XOFF, XLIM) and likewise for file 1.
332
 
333
   Note that XLIM, YLIM are exclusive bounds.
334
   All line numbers are origin-0 and discarded lines are not counted.
335
 
336
   If MINIMAL is nonzero, find a minimal difference no matter how
337
   expensive it is.  */
338
 
339
static void
340
compareseq (xoff, xlim, yoff, ylim, minimal)
341
     int xoff, xlim, yoff, ylim, minimal;
342
{
343
  int * const xv = xvec; /* Help the compiler.  */
344
  int * const yv = yvec;
345
 
346
  /* Slide down the bottom initial diagonal. */
347
  while (xoff < xlim && yoff < ylim && xv[xoff] == yv[yoff])
348
    ++xoff, ++yoff;
349
  /* Slide up the top initial diagonal. */
350
  while (xlim > xoff && ylim > yoff && xv[xlim - 1] == yv[ylim - 1])
351
    --xlim, --ylim;
352
 
353
  /* Handle simple cases. */
354
  if (xoff == xlim)
355
    while (yoff < ylim)
356
      files[1].changed_flag[files[1].realindexes[yoff++]] = 1;
357
  else if (yoff == ylim)
358
    while (xoff < xlim)
359
      files[0].changed_flag[files[0].realindexes[xoff++]] = 1;
360
  else
361
    {
362
      int c;
363
      struct partition part;
364
 
365
      /* Find a point of correspondence in the middle of the files.  */
366
 
367
      c = diag (xoff, xlim, yoff, ylim, minimal, &part);
368
 
369
      if (c == 1)
370
	{
371
	  /* This should be impossible, because it implies that
372
	     one of the two subsequences is empty,
373
	     and that case was handled above without calling `diag'.
374
	     Let's verify that this is true.  */
375
	  abort ();
376
#if 0
377
	  /* The two subsequences differ by a single insert or delete;
378
	     record it and we are done.  */
379
	  if (part.xmid - part.ymid < xoff - yoff)
380
	    files[1].changed_flag[files[1].realindexes[part.ymid - 1]] = 1;
381
	  else
382
	    files[0].changed_flag[files[0].realindexes[part.xmid]] = 1;
383
#endif
384
	}
385
      else
386
	{
387
	  /* Use the partitions to split this problem into subproblems.  */
388
	  compareseq (xoff, part.xmid, yoff, part.ymid, part.lo_minimal);
389
	  compareseq (part.xmid, xlim, part.ymid, ylim, part.hi_minimal);
390
	}
391
    }
392
}
393
 
394
/* Discard lines from one file that have no matches in the other file.
395
 
396
   A line which is discarded will not be considered by the actual
397
   comparison algorithm; it will be as if that line were not in the file.
398
   The file's `realindexes' table maps virtual line numbers
399
   (which don't count the discarded lines) into real line numbers;
400
   this is how the actual comparison algorithm produces results
401
   that are comprehensible when the discarded lines are counted.
402
 
403
   When we discard a line, we also mark it as a deletion or insertion
404
   so that it will be printed in the output.  */
405
 
406
static void
407
discard_confusing_lines (filevec)
408
     struct file_data filevec[];
409
{
410
  unsigned int f, i;
411
  char *discarded[2];
412
  int *equiv_count[2];
413
  int *p;
414
 
415
  /* Allocate our results.  */
416
  p = (int *) xmalloc ((filevec[0].buffered_lines + filevec[1].buffered_lines)
417
		       * (2 * sizeof (int)));
418
  for (f = 0; f < 2; f++)
419
    {
420
      filevec[f].undiscarded = p;  p += filevec[f].buffered_lines;
421
      filevec[f].realindexes = p;  p += filevec[f].buffered_lines;
422
    }
423
 
424
  /* Set up equiv_count[F][I] as the number of lines in file F
425
     that fall in equivalence class I.  */
426
 
427
  p = (int *) xmalloc (filevec[0].equiv_max * (2 * sizeof (int)));
428
  equiv_count[0] = p;
429
  equiv_count[1] = p + filevec[0].equiv_max;
430
  bzero (p, filevec[0].equiv_max * (2 * sizeof (int)));
431
 
432
  for (i = 0; i < filevec[0].buffered_lines; ++i)
433
    ++equiv_count[0][filevec[0].equivs[i]];
434
  for (i = 0; i < filevec[1].buffered_lines; ++i)
435
    ++equiv_count[1][filevec[1].equivs[i]];
436
 
437
  /* Set up tables of which lines are going to be discarded.  */
438
 
439
  discarded[0] = xmalloc (sizeof (char)
440
			  * (filevec[0].buffered_lines
441
			     + filevec[1].buffered_lines));
442
  discarded[1] = discarded[0] + filevec[0].buffered_lines;
443
  bzero (discarded[0], sizeof (char) * (filevec[0].buffered_lines
444
					+ filevec[1].buffered_lines));
445
 
446
  /* Mark to be discarded each line that matches no line of the other file.
447
     If a line matches many lines, mark it as provisionally discardable.  */
448
 
449
  for (f = 0; f < 2; f++)
450
    {
451
      unsigned int end = filevec[f].buffered_lines;
452
      char *discards = discarded[f];
453
      int *counts = equiv_count[1 - f];
454
      int *equivs = filevec[f].equivs;
455
      unsigned int many = 5;
456
      unsigned int tem = end / 64;
457
 
458
      /* Multiply MANY by approximate square root of number of lines.
459
	 That is the threshold for provisionally discardable lines.  */
460
      while ((tem = tem >> 2) > 0)
461
	many *= 2;
462
 
463
      for (i = 0; i < end; i++)
464
	{
465
	  int nmatch;
466
	  if (equivs[i] == 0)
467
	    continue;
468
	  nmatch = counts[equivs[i]];
469
	  if (nmatch == 0)
470
	    discards[i] = 1;
471
	  else if (nmatch > many)
472
	    discards[i] = 2;
473
	}
474
    }
475
 
476
  /* Don't really discard the provisional lines except when they occur
477
     in a run of discardables, with nonprovisionals at the beginning
478
     and end.  */
479
 
480
  for (f = 0; f < 2; f++)
481
    {
482
      unsigned int end = filevec[f].buffered_lines;
483
      register char *discards = discarded[f];
484
 
485
      for (i = 0; i < end; i++)
486
	{
487
	  /* Cancel provisional discards not in middle of run of discards.  */
488
	  if (discards[i] == 2)
489
	    discards[i] = 0;
490
	  else if (discards[i] != 0)
491
	    {
492
	      /* We have found a nonprovisional discard.  */
493
	      register int j;
494
	      unsigned int length;
495
	      unsigned int provisional = 0;
496
 
497
	      /* Find end of this run of discardable lines.
498
		 Count how many are provisionally discardable.  */
499
	      for (j = i; j < end; j++)
500
		{
501
		  if (discards[j] == 0)
502
		    break;
503
		  if (discards[j] == 2)
504
		    ++provisional;
505
		}
506
 
507
	      /* Cancel provisional discards at end, and shrink the run.  */
508
	      while (j > i && discards[j - 1] == 2)
509
		discards[--j] = 0, --provisional;
510
 
511
	      /* Now we have the length of a run of discardable lines
512
		 whose first and last are not provisional.  */
513
	      length = j - i;
514
 
515
	      /* If 1/4 of the lines in the run are provisional,
516
		 cancel discarding of all provisional lines in the run.  */
517
	      if (provisional * 4 > length)
518
		{
519
		  while (j > i)
520
		    if (discards[--j] == 2)
521
		      discards[j] = 0;
522
		}
523
	      else
524
		{
525
		  register unsigned int consec;
526
		  unsigned int minimum = 1;
527
		  unsigned int tem = length / 4;
528
 
529
		  /* MINIMUM is approximate square root of LENGTH/4.
530
		     A subrun of two or more provisionals can stand
531
		     when LENGTH is at least 16.
532
		     A subrun of 4 or more can stand when LENGTH >= 64.  */
533
		  while ((tem = tem >> 2) > 0)
534
		    minimum *= 2;
535
		  minimum++;
536
 
537
		  /* Cancel any subrun of MINIMUM or more provisionals
538
		     within the larger run.  */
539
		  for (j = 0, consec = 0; j < length; j++)
540
		    if (discards[i + j] != 2)
541
		      consec = 0;
542
		    else if (minimum == ++consec)
543
		      /* Back up to start of subrun, to cancel it all.  */
544
		      j -= consec;
545
		    else if (minimum < consec)
546
		      discards[i + j] = 0;
547
 
548
		  /* Scan from beginning of run
549
		     until we find 3 or more nonprovisionals in a row
550
		     or until the first nonprovisional at least 8 lines in.
551
		     Until that point, cancel any provisionals.  */
552
		  for (j = 0, consec = 0; j < length; j++)
553
		    {
554
		      if (j >= 8 && discards[i + j] == 1)
555
			break;
556
		      if (discards[i + j] == 2)
557
			consec = 0, discards[i + j] = 0;
558
		      else if (discards[i + j] == 0)
559
			consec = 0;
560
		      else
561
			consec++;
562
		      if (consec == 3)
563
			break;
564
		    }
565
 
566
		  /* I advances to the last line of the run.  */
567
		  i += length - 1;
568
 
569
		  /* Same thing, from end.  */
570
		  for (j = 0, consec = 0; j < length; j++)
571
		    {
572
		      if (j >= 8 && discards[i - j] == 1)
573
			break;
574
		      if (discards[i - j] == 2)
575
			consec = 0, discards[i - j] = 0;
576
		      else if (discards[i - j] == 0)
577
			consec = 0;
578
		      else
579
			consec++;
580
		      if (consec == 3)
581
			break;
582
		    }
583
		}
584
	    }
585
	}
586
    }
587
 
588
  /* Actually discard the lines. */
589
  for (f = 0; f < 2; f++)
590
    {
591
      char *discards = discarded[f];
592
      unsigned int end = filevec[f].buffered_lines;
593
      unsigned int j = 0;
594
      for (i = 0; i < end; ++i)
595
	if (no_discards || discards[i] == 0)
596
	  {
597
	    filevec[f].undiscarded[j] = filevec[f].equivs[i];
598
	    filevec[f].realindexes[j++] = i;
599
	  }
600
	else
601
	  filevec[f].changed_flag[i] = 1;
602
      filevec[f].nondiscarded_lines = j;
603
    }
604
 
605
  free (discarded[0]);
606
  free (equiv_count[0]);
607
}
608
 
609
/* Adjust inserts/deletes of identical lines to join changes
610
   as much as possible.
611
 
612
   We do something when a run of changed lines include a
613
   line at one end and have an excluded, identical line at the other.
614
   We are free to choose which identical line is included.
615
   `compareseq' usually chooses the one at the beginning,
616
   but usually it is cleaner to consider the following identical line
617
   to be the "change".  */
618
 
619
int inhibit;
620
 
621
static void
622
shift_boundaries (filevec)
623
     struct file_data filevec[];
624
{
625
  int f;
626
 
627
  if (inhibit)
628
    return;
629
 
630
  for (f = 0; f < 2; f++)
631
    {
632
      char *changed = filevec[f].changed_flag;
633
      char const *other_changed = filevec[1-f].changed_flag;
634
      int const *equivs = filevec[f].equivs;
635
      int i = 0;
636
      int j = 0;
637
      int i_end = filevec[f].buffered_lines;
638
 
639
      while (1)
640
	{
641
	  int runlength, start, corresponding;
642
 
643
	  /* Scan forwards to find beginning of another run of changes.
644
	     Also keep track of the corresponding point in the other file.  */
645
 
646
	  while (i < i_end && changed[i] == 0)
647
	    {
648
	      while (other_changed[j++])
649
		continue;
650
	      i++;
651
	    }
652
 
653
	  if (i == i_end)
654
	    break;
655
 
656
	  start = i;
657
 
658
	  /* Find the end of this run of changes.  */
659
 
660
	  while (changed[++i])
661
	    continue;
662
	  while (other_changed[j])
663
	    j++;
664
 
665
	  do
666
	    {
667
	      /* Record the length of this run of changes, so that
668
		 we can later determine whether the run has grown.  */
669
	      runlength = i - start;
670
 
671
	      /* Move the changed region back, so long as the
672
		 previous unchanged line matches the last changed one.
673
		 This merges with previous changed regions.  */
674
 
675
	      while (start && equivs[start - 1] == equivs[i - 1])
676
		{
677
		  changed[--start] = 1;
678
		  changed[--i] = 0;
679
		  while (changed[start - 1])
680
		    start--;
681
		  while (other_changed[--j])
682
		    continue;
683
		}
684
 
685
	      /* Set CORRESPONDING to the end of the changed run, at the last
686
		 point where it corresponds to a changed run in the other file.
687
		 CORRESPONDING == I_END means no such point has been found.  */
688
	      corresponding = other_changed[j - 1] ? i : i_end;
689
 
690
	      /* Move the changed region forward, so long as the
691
		 first changed line matches the following unchanged one.
692
		 This merges with following changed regions.
693
		 Do this second, so that if there are no merges,
694
		 the changed region is moved forward as far as possible.  */
695
 
696
	      while (i != i_end && equivs[start] == equivs[i])
697
		{
698
		  changed[start++] = 0;
699
		  changed[i++] = 1;
700
		  while (changed[i])
701
		    i++;
702
		  while (other_changed[++j])
703
		    corresponding = i;
704
		}
705
	    }
706
	  while (runlength != i - start);
707
 
708
	  /* If possible, move the fully-merged run of changes
709
	     back to a corresponding run in the other file.  */
710
 
711
	  while (corresponding < i)
712
	    {
713
	      changed[--start] = 1;
714
	      changed[--i] = 0;
715
	      while (other_changed[--j])
716
		continue;
717
	    }
718
	}
719
    }
720
}
721
 
722
/* Cons an additional entry onto the front of an edit script OLD.
723
   LINE0 and LINE1 are the first affected lines in the two files (origin 0).
724
   DELETED is the number of lines deleted here from file 0.
725
   INSERTED is the number of lines inserted here in file 1.
726
 
727
   If DELETED is 0 then LINE0 is the number of the line before
728
   which the insertion was done; vice versa for INSERTED and LINE1.  */
729
 
730
static struct change *
731
add_change (line0, line1, deleted, inserted, old)
732
     int line0, line1, deleted, inserted;
733
     struct change *old;
734
{
735
  struct change *new = (struct change *) xmalloc (sizeof (struct change));
736
 
737
  new->line0 = line0;
738
  new->line1 = line1;
739
  new->inserted = inserted;
740
  new->deleted = deleted;
741
  new->link = old;
742
  return new;
743
}
744
 
745
/* Scan the tables of which lines are inserted and deleted,
746
   producing an edit script in reverse order.  */
747
 
748
static struct change *
749
build_reverse_script (filevec)
750
     struct file_data const filevec[];
751
{
752
  struct change *script = 0;
753
  char *changed0 = filevec[0].changed_flag;
754
  char *changed1 = filevec[1].changed_flag;
755
  int len0 = filevec[0].buffered_lines;
756
  int len1 = filevec[1].buffered_lines;
757
 
758
  /* Note that changedN[len0] does exist, and contains 0.  */
759
 
760
  int i0 = 0, i1 = 0;
761
 
762
  while (i0 < len0 || i1 < len1)
763
    {
764
      if (changed0[i0] || changed1[i1])
765
	{
766
	  int line0 = i0, line1 = i1;
767
 
768
	  /* Find # lines changed here in each file.  */
769
	  while (changed0[i0]) ++i0;
770
	  while (changed1[i1]) ++i1;
771
 
772
	  /* Record this change.  */
773
	  script = add_change (line0, line1, i0 - line0, i1 - line1, script);
774
	}
775
 
776
      /* We have reached lines in the two files that match each other.  */
777
      i0++, i1++;
778
    }
779
 
780
  return script;
781
}
782
 
783
/* Scan the tables of which lines are inserted and deleted,
784
   producing an edit script in forward order.  */
785
 
786
static struct change *
787
build_script (filevec)
788
     struct file_data const filevec[];
789
{
790
  struct change *script = 0;
791
  char *changed0 = filevec[0].changed_flag;
792
  char *changed1 = filevec[1].changed_flag;
793
  int i0 = filevec[0].buffered_lines, i1 = filevec[1].buffered_lines;
794
 
795
  /* Note that changedN[-1] does exist, and contains 0.  */
796
 
797
  while (i0 >= 0 || i1 >= 0)
798
    {
799
      if (changed0[i0 - 1] || changed1[i1 - 1])
800
	{
801
	  int line0 = i0, line1 = i1;
802
 
803
	  /* Find # lines changed here in each file.  */
804
	  while (changed0[i0 - 1]) --i0;
805
	  while (changed1[i1 - 1]) --i1;
806
 
807
	  /* Record this change.  */
808
	  script = add_change (i0, i1, line0 - i0, line1 - i1, script);
809
	}
810
 
811
      /* We have reached lines in the two files that match each other.  */
812
      i0--, i1--;
813
    }
814
 
815
  return script;
816
}
817
 
818
/* If CHANGES, briefly report that two files differed.  */
819
static void
820
briefly_report (changes, filevec)
821
     int changes;
822
     struct file_data const filevec[];
823
{
824
  if (changes)
825
    message (no_details_flag ? "Files %s and %s differ\n"
826
	     : "Binary files %s and %s differ\n",
827
	     filevec[0].name, filevec[1].name);
828
}
829
 
830
/* Report the differences of two files.  DEPTH is the current directory
831
   depth. */
832
int
833
diff_2_files (filevec, depth)
834
     struct file_data filevec[];
835
     int depth;
836
{
837
  int diags;
838
  int i;
839
  struct change *e, *p;
840
  struct change *script;
841
  int changes;
842
 
843
 
844
  /* If we have detected that either file is binary,
845
     compare the two files as binary.  This can happen
846
     only when the first chunk is read.
847
     Also, --brief without any --ignore-* options means
848
     we can speed things up by treating the files as binary.  */
849
 
850
  if (read_files (filevec, no_details_flag & ~ignore_some_changes))
851
    {
852
      /* Files with different lengths must be different.  */
853
      if (filevec[0].stat.st_size != filevec[1].stat.st_size
854
	  && (filevec[0].desc < 0 || S_ISREG (filevec[0].stat.st_mode))
855
	  && (filevec[1].desc < 0 || S_ISREG (filevec[1].stat.st_mode)))
856
	changes = 1;
857
 
858
      /* Standard input equals itself.  */
859
      else if (filevec[0].desc == filevec[1].desc)
860
	changes = 0;
861
 
862
      else
863
	/* Scan both files, a buffer at a time, looking for a difference.  */
864
	{
865
	  /* Allocate same-sized buffers for both files.  */
866
	  size_t buffer_size = buffer_lcm (STAT_BLOCKSIZE (filevec[0].stat),
867
					   STAT_BLOCKSIZE (filevec[1].stat));
868
	  for (i = 0; i < 2; i++)
869
	    filevec[i].buffer = xrealloc (filevec[i].buffer, buffer_size);
870
 
871
	  for (;;  filevec[0].buffered_chars = filevec[1].buffered_chars = 0)
872
	    {
873
	      /* Read a buffer's worth from both files.  */
874
	      for (i = 0; i < 2; i++)
875
		if (0 <= filevec[i].desc)
876
		  while (filevec[i].buffered_chars != buffer_size)
877
		    {
878
		      int r = read (filevec[i].desc,
879
				    filevec[i].buffer
880
				    + filevec[i].buffered_chars,
881
				    buffer_size - filevec[i].buffered_chars);
882
		      if (r == 0)
883
			break;
884
		      if (r < 0)
885
			pfatal_with_name (filevec[i].name);
886
		      filevec[i].buffered_chars += r;
887
		    }
888
 
889
	      /* If the buffers differ, the files differ.  */
890
	      if (filevec[0].buffered_chars != filevec[1].buffered_chars
891
		  || (filevec[0].buffered_chars != 0
892
		      && memcmp (filevec[0].buffer,
893
				 filevec[1].buffer,
894
				 filevec[0].buffered_chars) != 0))
895
		{
896
		  changes = 1;
897
		  break;
898
		}
899
 
900
	      /* If we reach end of file, the files are the same.  */
901
	      if (filevec[0].buffered_chars != buffer_size)
902
		{
903
		  changes = 0;
904
		  break;
905
		}
906
	    }
907
	}
908
 
909
      briefly_report (changes, filevec);
910
    }
911
  else
912
    {
913
      /* Allocate vectors for the results of comparison:
914
	 a flag for each line of each file, saying whether that line
915
	 is an insertion or deletion.
916
	 Allocate an extra element, always zero, at each end of each vector.  */
917
 
918
      size_t s = filevec[0].buffered_lines + filevec[1].buffered_lines + 4;
919
      filevec[0].changed_flag = xmalloc (s);
920
      bzero (filevec[0].changed_flag, s);
921
      filevec[0].changed_flag++;
922
      filevec[1].changed_flag = filevec[0].changed_flag
923
				+ filevec[0].buffered_lines + 2;
924
 
925
      /* Some lines are obviously insertions or deletions
926
	 because they don't match anything.  Detect them now, and
927
	 avoid even thinking about them in the main comparison algorithm.  */
928
 
929
      discard_confusing_lines (filevec);
930
 
931
      /* Now do the main comparison algorithm, considering just the
932
	 undiscarded lines.  */
933
 
934
      xvec = filevec[0].undiscarded;
935
      yvec = filevec[1].undiscarded;
936
      diags = filevec[0].nondiscarded_lines + filevec[1].nondiscarded_lines + 3;
937
      fdiag = (int *) xmalloc (diags * (2 * sizeof (int)));
938
      bdiag = fdiag + diags;
939
      fdiag += filevec[1].nondiscarded_lines + 1;
940
      bdiag += filevec[1].nondiscarded_lines + 1;
941
 
942
      /* Set TOO_EXPENSIVE to be approximate square root of input size,
943
	 bounded below by 256.  */
944
      too_expensive = 1;
945
      for (i = filevec[0].nondiscarded_lines + filevec[1].nondiscarded_lines;
946
	   i != 0; i >>= 2)
947
	too_expensive <<= 1;
948
      too_expensive = max (256, too_expensive);
949
 
950
      files[0] = filevec[0];
951
      files[1] = filevec[1];
952
 
953
      compareseq (0, filevec[0].nondiscarded_lines,
954
		  0, filevec[1].nondiscarded_lines, no_discards);
955
 
956
      free (fdiag - (filevec[1].nondiscarded_lines + 1));
957
 
958
      /* Modify the results slightly to make them prettier
959
	 in cases where that can validly be done.  */
960
 
961
      shift_boundaries (filevec);
962
 
963
      /* Get the results of comparison in the form of a chain
964
	 of `struct change's -- an edit script.  */
965
 
966
      if (output_style == OUTPUT_ED)
967
	script = build_reverse_script (filevec);
968
      else
969
	script = build_script (filevec);
970
 
971
      /* Set CHANGES if we had any diffs.
972
	 If some changes are ignored, we must scan the script to decide.  */
973
      if (ignore_blank_lines_flag || ignore_regexp_list)
974
	{
975
	  struct change *next = script;
976
	  changes = 0;
977
 
978
	  while (next && changes == 0)
979
	    {
980
	      struct change *this, *end;
981
	      int first0, last0, first1, last1, deletes, inserts;
982
 
983
	      /* Find a set of changes that belong together.  */
984
	      this = next;
985
	      end = find_change (next);
986
 
987
	      /* Disconnect them from the rest of the changes, making them
988
		 a hunk, and remember the rest for next iteration.  */
989
	      next = end->link;
990
	      end->link = 0;
991
 
992
	      /* Determine whether this hunk is really a difference.  */
993
	      analyze_hunk (this, &first0, &last0, &first1, &last1,
994
			    &deletes, &inserts);
995
 
996
	      /* Reconnect the script so it will all be freed properly.  */
997
	      end->link = next;
998
 
999
	      if (deletes || inserts)
1000
		changes = 1;
1001
	    }
1002
	}
1003
      else
1004
	changes = (script != 0);
1005
 
1006
      if (no_details_flag)
1007
	briefly_report (changes, filevec);
1008
      else
1009
	{
1010
	  if (changes || ! no_diff_means_no_output)
1011
	    {
1012
	      /* Record info for starting up output,
1013
		 to be used if and when we have some output to print.  */
1014
	      setup_output (files[0].name, files[1].name, depth);
1015
 
1016
	      switch (output_style)
1017
		{
1018
		case OUTPUT_CONTEXT:
1019
		  print_context_script (script, 0);
1020
		  break;
1021
 
1022
		case OUTPUT_UNIFIED:
1023
		  print_context_script (script, 1);
1024
		  break;
1025
 
1026
		case OUTPUT_ED:
1027
		  print_ed_script (script);
1028
		  break;
1029
 
1030
		case OUTPUT_FORWARD_ED:
1031
		  pr_forward_ed_script (script);
1032
		  break;
1033
 
1034
		case OUTPUT_RCS:
1035
		  print_rcs_script (script);
1036
		  break;
1037
 
1038
		case OUTPUT_NORMAL:
1039
		  print_normal_script (script);
1040
		  break;
1041
 
1042
		case OUTPUT_IFDEF:
1043
		  print_ifdef_script (script);
1044
		  break;
1045
 
1046
		case OUTPUT_SDIFF:
1047
		  print_sdiff_script (script);
1048
		}
1049
 
1050
	      finish_output ();
1051
	    }
1052
	}
1053
 
1054
      free (filevec[0].undiscarded);
1055
 
1056
      free (filevec[0].changed_flag - 1);
1057
 
1058
      for (i = 1; i >= 0; --i)
1059
	free (filevec[i].equivs);
1060
 
1061
      for (i = 0; i < 2; ++i)
1062
	free (filevec[i].linbuf + filevec[i].linbuf_base);
1063
 
1064
      for (e = script; e; e = p)
1065
	{
1066
	  p = e->link;
1067
	  free (e);
1068
	}
1069
 
1070
      if (! ROBUST_OUTPUT_STYLE (output_style))
1071
	for (i = 0; i < 2; ++i)
1072
	  if (filevec[i].missing_newline)
1073
	    {
1074
	      error ("No newline at end of file %s", filevec[i].name, "");
1075
	      changes = 2;
1076
	    }
1077
    }
1078
 
1079
  if (filevec[0].buffer != filevec[1].buffer)
1080
    free (filevec[0].buffer);
1081
  free (filevec[1].buffer);
1082
 
1083
  return changes;
1084
}