Warning: Attempt to read property "date" on null in /usr/local/www/websvn.planix.org/blame.php on line 247

Warning: Attempt to read property "msg" on null in /usr/local/www/websvn.planix.org/blame.php on line 247
WebSVN – planix.SVN – Blame – /os/branches/feature_tlsv12/sys/src/cmd/bzip2/lib/blocksort.c – Rev 2

Subversion Repositories planix.SVN

Rev

Go to most recent revision | Details | Last modification | View Log | RSS feed

Rev Author Line No. Line
2 - 1
 
2
/*-------------------------------------------------------------*/
3
/*--- Block sorting machinery                               ---*/
4
/*---                                           blocksort.c ---*/
5
/*-------------------------------------------------------------*/
6
 
7
/*--
8
  This file is a part of bzip2 and/or libbzip2, a program and
9
  library for lossless, block-sorting data compression.
10
 
11
  Copyright (C) 1996-2000 Julian R Seward.  All rights reserved.
12
 
13
  Redistribution and use in source and binary forms, with or without
14
  modification, are permitted provided that the following conditions
15
  are met:
16
 
17
  1. Redistributions of source code must retain the above copyright
18
     notice, this list of conditions and the following disclaimer.
19
 
20
  2. The origin of this software must not be misrepresented; you must 
21
     not claim that you wrote the original software.  If you use this 
22
     software in a product, an acknowledgment in the product 
23
     documentation would be appreciated but is not required.
24
 
25
  3. Altered source versions must be plainly marked as such, and must
26
     not be misrepresented as being the original software.
27
 
28
  4. The name of the author may not be used to endorse or promote 
29
     products derived from this software without specific prior written 
30
     permission.
31
 
32
  THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
33
  OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
34
  WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
35
  ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
36
  DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
37
  DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
38
  GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
39
  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
40
  WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
41
  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
42
  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
43
 
44
  Julian Seward, Cambridge, UK.
45
  jseward@acm.org
46
  bzip2/libbzip2 version 1.0 of 21 March 2000
47
 
48
  This program is based on (at least) the work of:
49
     Mike Burrows
50
     David Wheeler
51
     Peter Fenwick
52
     Alistair Moffat
53
     Radford Neal
54
     Ian H. Witten
55
     Robert Sedgewick
56
     Jon L. Bentley
57
 
58
  For more information on these sources, see the manual.
59
 
60
  To get some idea how the block sorting algorithms in this file 
61
  work, read my paper 
62
     On the Performance of BWT Sorting Algorithms
63
  in Proceedings of the IEEE Data Compression Conference 2000,
64
  Snowbird, Utah, USA, 27-30 March 2000.  The main sort in this
65
  file implements the algorithm called  cache  in the paper.
66
--*/
67
 
68
#include "os.h"
69
#include "bzlib.h"
70
#include "bzlib_private.h"
71
 
72
/*---------------------------------------------*/
73
/*--- Fallback O(N log(N)^2) sorting        ---*/
74
/*--- algorithm, for repetitive blocks      ---*/
75
/*---------------------------------------------*/
76
 
77
/*---------------------------------------------*/
78
static 
79
__inline__
80
void fallbackSimpleSort ( UInt32* fmap, 
81
                          UInt32* eclass, 
82
                          Int32   lo, 
83
                          Int32   hi )
84
{
85
   Int32 i, j, tmp;
86
   UInt32 ec_tmp;
87
 
88
   if (lo == hi) return;
89
 
90
   if (hi - lo > 3) {
91
      for ( i = hi-4; i >= lo; i-- ) {
92
         tmp = fmap[i];
93
         ec_tmp = eclass[tmp];
94
         for ( j = i+4; j <= hi && ec_tmp > eclass[fmap[j]]; j += 4 )
95
            fmap[j-4] = fmap[j];
96
         fmap[j-4] = tmp;
97
      }
98
   }
99
 
100
   for ( i = hi-1; i >= lo; i-- ) {
101
      tmp = fmap[i];
102
      ec_tmp = eclass[tmp];
103
      for ( j = i+1; j <= hi && ec_tmp > eclass[fmap[j]]; j++ )
104
         fmap[j-1] = fmap[j];
105
      fmap[j-1] = tmp;
106
   }
107
}
108
 
109
 
110
/*---------------------------------------------*/
111
#define fswap(zz1, zz2) \
112
   { Int32 zztmp = zz1; zz1 = zz2; zz2 = zztmp; }
113
 
114
#define fvswap(zzp1, zzp2, zzn)       \
115
{                                     \
116
   Int32 yyp1 = (zzp1);               \
117
   Int32 yyp2 = (zzp2);               \
118
   Int32 yyn  = (zzn);                \
119
   while (yyn > 0) {                  \
120
      fswap(fmap[yyp1], fmap[yyp2]);  \
121
      yyp1++; yyp2++; yyn--;          \
122
   }                                  \
123
}
124
 
125
 
126
#define fmin(a,b) ((a) < (b)) ? (a) : (b)
127
 
128
#define fpush(lz,hz) { stackLo[sp] = lz; \
129
                       stackHi[sp] = hz; \
130
                       sp++; }
131
 
132
#define fpop(lz,hz) { sp--;              \
133
                      lz = stackLo[sp];  \
134
                      hz = stackHi[sp]; }
135
 
136
#define FALLBACK_QSORT_SMALL_THRESH 10
137
#define FALLBACK_QSORT_STACK_SIZE   100
138
 
139
 
140
static
141
void fallbackQSort3 ( UInt32* fmap, 
142
                      UInt32* eclass,
143
                      Int32   loSt, 
144
                      Int32   hiSt )
145
{
146
   Int32 unLo, unHi, ltLo, gtHi, n, m;
147
   Int32 sp, lo, hi;
148
   UInt32 med, r, r3;
149
   Int32 stackLo[FALLBACK_QSORT_STACK_SIZE];
150
   Int32 stackHi[FALLBACK_QSORT_STACK_SIZE];
151
 
152
   r = 0;
153
 
154
   sp = 0;
155
   fpush ( loSt, hiSt );
156
 
157
   while (sp > 0) {
158
 
159
      AssertH ( sp < FALLBACK_QSORT_STACK_SIZE, 1004 );
160
 
161
      fpop ( lo, hi );
162
      if (hi - lo < FALLBACK_QSORT_SMALL_THRESH) {
163
         fallbackSimpleSort ( fmap, eclass, lo, hi );
164
         continue;
165
      }
166
 
167
      /* Random partitioning.  Median of 3 sometimes fails to
168
         avoid bad cases.  Median of 9 seems to help but 
169
         looks rather expensive.  This too seems to work but
170
         is cheaper.  Guidance for the magic constants 
171
         7621 and 32768 is taken from Sedgewick's algorithms
172
         book, chapter 35.
173
      */
174
      r = ((r * 7621) + 1) % 32768;
175
      r3 = r % 3;
176
      if (r3 == 0) med = eclass[fmap[lo]]; else
177
      if (r3 == 1) med = eclass[fmap[(lo+hi)>>1]]; else
178
                   med = eclass[fmap[hi]];
179
 
180
      unLo = ltLo = lo;
181
      unHi = gtHi = hi;
182
 
183
      while (1) {
184
         while (1) {
185
            if (unLo > unHi) break;
186
            n = (Int32)eclass[fmap[unLo]] - (Int32)med;
187
            if (n == 0) { 
188
               fswap(fmap[unLo], fmap[ltLo]); 
189
               ltLo++; unLo++; 
190
               continue; 
191
            };
192
            if (n > 0) break;
193
            unLo++;
194
         }
195
         while (1) {
196
            if (unLo > unHi) break;
197
            n = (Int32)eclass[fmap[unHi]] - (Int32)med;
198
            if (n == 0) { 
199
               fswap(fmap[unHi], fmap[gtHi]); 
200
               gtHi--; unHi--; 
201
               continue; 
202
            };
203
            if (n < 0) break;
204
            unHi--;
205
         }
206
         if (unLo > unHi) break;
207
         fswap(fmap[unLo], fmap[unHi]); unLo++; unHi--;
208
      }
209
 
210
      AssertD ( unHi == unLo-1, "fallbackQSort3(2)" );
211
 
212
      if (gtHi < ltLo) continue;
213
 
214
      n = fmin(ltLo-lo, unLo-ltLo); fvswap(lo, unLo-n, n);
215
      m = fmin(hi-gtHi, gtHi-unHi); fvswap(unLo, hi-m+1, m);
216
 
217
      n = lo + unLo - ltLo - 1;
218
      m = hi - (gtHi - unHi) + 1;
219
 
220
      if (n - lo > hi - m) {
221
         fpush ( lo, n );
222
         fpush ( m, hi );
223
      } else {
224
         fpush ( m, hi );
225
         fpush ( lo, n );
226
      }
227
   }
228
}
229
 
230
#undef fmin
231
#undef fpush
232
#undef fpop
233
#undef fswap
234
#undef fvswap
235
#undef FALLBACK_QSORT_SMALL_THRESH
236
#undef FALLBACK_QSORT_STACK_SIZE
237
 
238
 
239
/*---------------------------------------------*/
240
/* Pre:
241
      nblock > 0
242
      eclass exists for [0 .. nblock-1]
243
      ((UChar*)eclass) [0 .. nblock-1] holds block
244
      ptr exists for [0 .. nblock-1]
245
 
246
   Post:
247
      ((UChar*)eclass) [0 .. nblock-1] holds block
248
      All other areas of eclass destroyed
249
      fmap [0 .. nblock-1] holds sorted order
250
      bhtab [ 0 .. 2+(nblock/32) ] destroyed
251
*/
252
 
253
#define       SET_BH(zz)  bhtab[(zz) >> 5] |= (1 << ((zz) & 31))
254
#define     CLEAR_BH(zz)  bhtab[(zz) >> 5] &= ~(1 << ((zz) & 31))
255
#define     ISSET_BH(zz)  (bhtab[(zz) >> 5] & (1 << ((zz) & 31)))
256
#define      WORD_BH(zz)  bhtab[(zz) >> 5]
257
#define UNALIGNED_BH(zz)  ((zz) & 0x01f)
258
 
259
static
260
void fallbackSort ( UInt32* fmap, 
261
                    UInt32* eclass, 
262
                    UInt32* bhtab,
263
                    Int32   nblock,
264
                    Int32   verb )
265
{
266
   Int32 ftab[257];
267
   Int32 ftabCopy[256];
268
   Int32 H, i, j, k, l, r, cc, cc1;
269
   Int32 nNotDone;
270
   Int32 nBhtab;
271
   UChar* eclass8 = (UChar*)eclass;
272
 
273
   /*--
274
      Initial 1-char radix sort to generate
275
      initial fmap and initial BH bits.
276
   --*/
277
   if (verb >= 4)
278
      VPrintf0 ( "        bucket sorting ...\n" );
279
   for (i = 0; i < 257;    i++) ftab[i] = 0;
280
   for (i = 0; i < nblock; i++) ftab[eclass8[i]]++;
281
   for (i = 0; i < 256;    i++) ftabCopy[i] = ftab[i];
282
   for (i = 1; i < 257;    i++) ftab[i] += ftab[i-1];
283
 
284
   for (i = 0; i < nblock; i++) {
285
      j = eclass8[i];
286
      k = ftab[j] - 1;
287
      ftab[j] = k;
288
      fmap[k] = i;
289
   }
290
 
291
   nBhtab = 2 + (nblock / 32);
292
   for (i = 0; i < nBhtab; i++) bhtab[i] = 0;
293
   for (i = 0; i < 256; i++) SET_BH(ftab[i]);
294
 
295
   /*--
296
      Inductively refine the buckets.  Kind-of an
297
      "exponential radix sort" (!), inspired by the
298
      Manber-Myers suffix array construction algorithm.
299
   --*/
300
 
301
   /*-- set sentinel bits for block-end detection --*/
302
   for (i = 0; i < 32; i++) { 
303
      SET_BH(nblock + 2*i);
304
      CLEAR_BH(nblock + 2*i + 1);
305
   }
306
 
307
   /*-- the log(N) loop --*/
308
   H = 1;
309
   while (1) {
310
 
311
      if (verb >= 4) 
312
         VPrintf1 ( "        depth %6d has ", H );
313
 
314
      j = 0;
315
      for (i = 0; i < nblock; i++) {
316
         if (ISSET_BH(i)) j = i;
317
         k = fmap[i] - H; if (k < 0) k += nblock;
318
         eclass[k] = j;
319
      }
320
 
321
      nNotDone = 0;
322
      r = -1;
323
      while (1) {
324
 
325
	 /*-- find the next non-singleton bucket --*/
326
         k = r + 1;
327
         while (ISSET_BH(k) && UNALIGNED_BH(k)) k++;
328
         if (ISSET_BH(k)) {
329
            while (WORD_BH(k) == 0xffffffff) k += 32;
330
            while (ISSET_BH(k)) k++;
331
         }
332
         l = k - 1;
333
         if (l >= nblock) break;
334
         while (!ISSET_BH(k) && UNALIGNED_BH(k)) k++;
335
         if (!ISSET_BH(k)) {
336
            while (WORD_BH(k) == 0x00000000) k += 32;
337
            while (!ISSET_BH(k)) k++;
338
         }
339
         r = k - 1;
340
         if (r >= nblock) break;
341
 
342
         /*-- now [l, r] bracket current bucket --*/
343
         if (r > l) {
344
            nNotDone += (r - l + 1);
345
            fallbackQSort3 ( fmap, eclass, l, r );
346
 
347
            /*-- scan bucket and generate header bits-- */
348
            cc = -1;
349
            for (i = l; i <= r; i++) {
350
               cc1 = eclass[fmap[i]];
351
               if (cc != cc1) { SET_BH(i); cc = cc1; };
352
            }
353
         }
354
      }
355
 
356
      if (verb >= 4) 
357
         VPrintf1 ( "%6d unresolved strings\n", nNotDone );
358
 
359
      H *= 2;
360
      if (H > nblock || nNotDone == 0) break;
361
   }
362
 
363
   /*-- 
364
      Reconstruct the original block in
365
      eclass8 [0 .. nblock-1], since the
366
      previous phase destroyed it.
367
   --*/
368
   if (verb >= 4)
369
      VPrintf0 ( "        reconstructing block ...\n" );
370
   j = 0;
371
   for (i = 0; i < nblock; i++) {
372
      while (ftabCopy[j] == 0) j++;
373
      ftabCopy[j]--;
374
      eclass8[fmap[i]] = (UChar)j;
375
   }
376
   AssertH ( j < 256, 1005 );
377
}
378
 
379
#undef       SET_BH
380
#undef     CLEAR_BH
381
#undef     ISSET_BH
382
#undef      WORD_BH
383
#undef UNALIGNED_BH
384
 
385
 
386
/*---------------------------------------------*/
387
/*--- The main, O(N^2 log(N)) sorting       ---*/
388
/*--- algorithm.  Faster for "normal"       ---*/
389
/*--- non-repetitive blocks.                ---*/
390
/*---------------------------------------------*/
391
 
392
/*---------------------------------------------*/
393
static
394
__inline__
395
Bool mainGtU ( UInt32  i1, 
396
               UInt32  i2,
397
               UChar*  block, 
398
               UInt16* quadrant,
399
               UInt32  nblock,
400
               Int32*  budget )
401
{
402
   Int32  k;
403
   UChar  c1, c2;
404
   UInt16 s1, s2;
405
 
406
   AssertD ( i1 != i2, "mainGtU" );
407
   /* 1 */
408
   c1 = block[i1]; c2 = block[i2];
409
   if (c1 != c2) return (c1 > c2);
410
   i1++; i2++;
411
   /* 2 */
412
   c1 = block[i1]; c2 = block[i2];
413
   if (c1 != c2) return (c1 > c2);
414
   i1++; i2++;
415
   /* 3 */
416
   c1 = block[i1]; c2 = block[i2];
417
   if (c1 != c2) return (c1 > c2);
418
   i1++; i2++;
419
   /* 4 */
420
   c1 = block[i1]; c2 = block[i2];
421
   if (c1 != c2) return (c1 > c2);
422
   i1++; i2++;
423
   /* 5 */
424
   c1 = block[i1]; c2 = block[i2];
425
   if (c1 != c2) return (c1 > c2);
426
   i1++; i2++;
427
   /* 6 */
428
   c1 = block[i1]; c2 = block[i2];
429
   if (c1 != c2) return (c1 > c2);
430
   i1++; i2++;
431
   /* 7 */
432
   c1 = block[i1]; c2 = block[i2];
433
   if (c1 != c2) return (c1 > c2);
434
   i1++; i2++;
435
   /* 8 */
436
   c1 = block[i1]; c2 = block[i2];
437
   if (c1 != c2) return (c1 > c2);
438
   i1++; i2++;
439
   /* 9 */
440
   c1 = block[i1]; c2 = block[i2];
441
   if (c1 != c2) return (c1 > c2);
442
   i1++; i2++;
443
   /* 10 */
444
   c1 = block[i1]; c2 = block[i2];
445
   if (c1 != c2) return (c1 > c2);
446
   i1++; i2++;
447
   /* 11 */
448
   c1 = block[i1]; c2 = block[i2];
449
   if (c1 != c2) return (c1 > c2);
450
   i1++; i2++;
451
   /* 12 */
452
   c1 = block[i1]; c2 = block[i2];
453
   if (c1 != c2) return (c1 > c2);
454
   i1++; i2++;
455
 
456
   k = nblock + 8;
457
 
458
   do {
459
      /* 1 */
460
      c1 = block[i1]; c2 = block[i2];
461
      if (c1 != c2) return (c1 > c2);
462
      s1 = quadrant[i1]; s2 = quadrant[i2];
463
      if (s1 != s2) return (s1 > s2);
464
      i1++; i2++;
465
      /* 2 */
466
      c1 = block[i1]; c2 = block[i2];
467
      if (c1 != c2) return (c1 > c2);
468
      s1 = quadrant[i1]; s2 = quadrant[i2];
469
      if (s1 != s2) return (s1 > s2);
470
      i1++; i2++;
471
      /* 3 */
472
      c1 = block[i1]; c2 = block[i2];
473
      if (c1 != c2) return (c1 > c2);
474
      s1 = quadrant[i1]; s2 = quadrant[i2];
475
      if (s1 != s2) return (s1 > s2);
476
      i1++; i2++;
477
      /* 4 */
478
      c1 = block[i1]; c2 = block[i2];
479
      if (c1 != c2) return (c1 > c2);
480
      s1 = quadrant[i1]; s2 = quadrant[i2];
481
      if (s1 != s2) return (s1 > s2);
482
      i1++; i2++;
483
      /* 5 */
484
      c1 = block[i1]; c2 = block[i2];
485
      if (c1 != c2) return (c1 > c2);
486
      s1 = quadrant[i1]; s2 = quadrant[i2];
487
      if (s1 != s2) return (s1 > s2);
488
      i1++; i2++;
489
      /* 6 */
490
      c1 = block[i1]; c2 = block[i2];
491
      if (c1 != c2) return (c1 > c2);
492
      s1 = quadrant[i1]; s2 = quadrant[i2];
493
      if (s1 != s2) return (s1 > s2);
494
      i1++; i2++;
495
      /* 7 */
496
      c1 = block[i1]; c2 = block[i2];
497
      if (c1 != c2) return (c1 > c2);
498
      s1 = quadrant[i1]; s2 = quadrant[i2];
499
      if (s1 != s2) return (s1 > s2);
500
      i1++; i2++;
501
      /* 8 */
502
      c1 = block[i1]; c2 = block[i2];
503
      if (c1 != c2) return (c1 > c2);
504
      s1 = quadrant[i1]; s2 = quadrant[i2];
505
      if (s1 != s2) return (s1 > s2);
506
      i1++; i2++;
507
 
508
      if (i1 >= nblock) i1 -= nblock;
509
      if (i2 >= nblock) i2 -= nblock;
510
 
511
      k -= 8;
512
      (*budget)--;
513
   }
514
      while (k >= 0);
515
 
516
   return False;
517
}
518
 
519
 
520
/*---------------------------------------------*/
521
/*--
522
   Knuth's increments seem to work better
523
   than Incerpi-Sedgewick here.  Possibly
524
   because the number of elems to sort is
525
   usually small, typically <= 20.
526
--*/
527
static
528
Int32 incs[14] = { 1, 4, 13, 40, 121, 364, 1093, 3280,
529
                   9841, 29524, 88573, 265720,
530
                   797161, 2391484 };
531
 
532
static
533
void mainSimpleSort ( UInt32* ptr,
534
                      UChar*  block,
535
                      UInt16* quadrant,
536
                      Int32   nblock,
537
                      Int32   lo, 
538
                      Int32   hi, 
539
                      Int32   d,
540
                      Int32*  budget )
541
{
542
   Int32 i, j, h, bigN, hp;
543
   UInt32 v;
544
 
545
   bigN = hi - lo + 1;
546
   if (bigN < 2) return;
547
 
548
   hp = 0;
549
   while (incs[hp] < bigN) hp++;
550
   hp--;
551
 
552
   for (; hp >= 0; hp--) {
553
      h = incs[hp];
554
 
555
      i = lo + h;
556
      while (True) {
557
 
558
         /*-- copy 1 --*/
559
         if (i > hi) break;
560
         v = ptr[i];
561
         j = i;
562
         while ( mainGtU ( 
563
                    ptr[j-h]+d, v+d, block, quadrant, nblock, budget 
564
                 ) ) {
565
            ptr[j] = ptr[j-h];
566
            j = j - h;
567
            if (j <= (lo + h - 1)) break;
568
         }
569
         ptr[j] = v;
570
         i++;
571
 
572
         /*-- copy 2 --*/
573
         if (i > hi) break;
574
         v = ptr[i];
575
         j = i;
576
         while ( mainGtU ( 
577
                    ptr[j-h]+d, v+d, block, quadrant, nblock, budget 
578
                 ) ) {
579
            ptr[j] = ptr[j-h];
580
            j = j - h;
581
            if (j <= (lo + h - 1)) break;
582
         }
583
         ptr[j] = v;
584
         i++;
585
 
586
         /*-- copy 3 --*/
587
         if (i > hi) break;
588
         v = ptr[i];
589
         j = i;
590
         while ( mainGtU ( 
591
                    ptr[j-h]+d, v+d, block, quadrant, nblock, budget 
592
                 ) ) {
593
            ptr[j] = ptr[j-h];
594
            j = j - h;
595
            if (j <= (lo + h - 1)) break;
596
         }
597
         ptr[j] = v;
598
         i++;
599
 
600
         if (*budget < 0) return;
601
      }
602
   }
603
}
604
 
605
 
606
/*---------------------------------------------*/
607
/*--
608
   The following is an implementation of
609
   an elegant 3-way quicksort for strings,
610
   described in a paper "Fast Algorithms for
611
   Sorting and Searching Strings", by Robert
612
   Sedgewick and Jon L. Bentley.
613
--*/
614
 
615
#define mswap(zz1, zz2) \
616
   { Int32 zztmp = zz1; zz1 = zz2; zz2 = zztmp; }
617
 
618
#define mvswap(zzp1, zzp2, zzn)       \
619
{                                     \
620
   Int32 yyp1 = (zzp1);               \
621
   Int32 yyp2 = (zzp2);               \
622
   Int32 yyn  = (zzn);                \
623
   while (yyn > 0) {                  \
624
      mswap(ptr[yyp1], ptr[yyp2]);    \
625
      yyp1++; yyp2++; yyn--;          \
626
   }                                  \
627
}
628
 
629
static 
630
__inline__
631
UChar mmed3 ( UChar a, UChar b, UChar c )
632
{
633
   UChar t;
634
   if (a > b) { t = a; a = b; b = t; };
635
   if (b > c) { 
636
      b = c;
637
      if (a > b) b = a;
638
   }
639
   return b;
640
}
641
 
642
#define mmin(a,b) ((a) < (b)) ? (a) : (b)
643
 
644
#define mpush(lz,hz,dz) { stackLo[sp] = lz; \
645
                          stackHi[sp] = hz; \
646
                          stackD [sp] = dz; \
647
                          sp++; }
648
 
649
#define mpop(lz,hz,dz) { sp--;             \
650
                         lz = stackLo[sp]; \
651
                         hz = stackHi[sp]; \
652
                         dz = stackD [sp]; }
653
 
654
 
655
#define mnextsize(az) (nextHi[az]-nextLo[az])
656
 
657
#define mnextswap(az,bz)                                        \
658
   { Int32 tz;                                                  \
659
     tz = nextLo[az]; nextLo[az] = nextLo[bz]; nextLo[bz] = tz; \
660
     tz = nextHi[az]; nextHi[az] = nextHi[bz]; nextHi[bz] = tz; \
661
     tz = nextD [az]; nextD [az] = nextD [bz]; nextD [bz] = tz; }
662
 
663
 
664
#define MAIN_QSORT_SMALL_THRESH 20
665
#define MAIN_QSORT_DEPTH_THRESH (BZ_N_RADIX + BZ_N_QSORT)
666
#define MAIN_QSORT_STACK_SIZE 100
667
 
668
static
669
void mainQSort3 ( UInt32* ptr,
670
                  UChar*  block,
671
                  UInt16* quadrant,
672
                  Int32   nblock,
673
                  Int32   loSt, 
674
                  Int32   hiSt, 
675
                  Int32   dSt,
676
                  Int32*  budget )
677
{
678
   Int32 unLo, unHi, ltLo, gtHi, n, m, med;
679
   Int32 sp, lo, hi, d;
680
 
681
   Int32 stackLo[MAIN_QSORT_STACK_SIZE];
682
   Int32 stackHi[MAIN_QSORT_STACK_SIZE];
683
   Int32 stackD [MAIN_QSORT_STACK_SIZE];
684
 
685
   Int32 nextLo[3];
686
   Int32 nextHi[3];
687
   Int32 nextD [3];
688
 
689
   sp = 0;
690
   mpush ( loSt, hiSt, dSt );
691
 
692
   while (sp > 0) {
693
 
694
      AssertH ( sp < MAIN_QSORT_STACK_SIZE, 1001 );
695
 
696
      mpop ( lo, hi, d );
697
      if (hi - lo < MAIN_QSORT_SMALL_THRESH || 
698
          d > MAIN_QSORT_DEPTH_THRESH) {
699
         mainSimpleSort ( ptr, block, quadrant, nblock, lo, hi, d, budget );
700
         if (*budget < 0) return;
701
         continue;
702
      }
703
 
704
      med = (Int32) 
705
            mmed3 ( block[ptr[ lo         ]+d],
706
                    block[ptr[ hi         ]+d],
707
                    block[ptr[ (lo+hi)>>1 ]+d] );
708
 
709
      unLo = ltLo = lo;
710
      unHi = gtHi = hi;
711
 
712
      while (True) {
713
         while (True) {
714
            if (unLo > unHi) break;
715
            n = ((Int32)block[ptr[unLo]+d]) - med;
716
            if (n == 0) { 
717
               mswap(ptr[unLo], ptr[ltLo]); 
718
               ltLo++; unLo++; continue; 
719
            };
720
            if (n >  0) break;
721
            unLo++;
722
         }
723
         while (True) {
724
            if (unLo > unHi) break;
725
            n = ((Int32)block[ptr[unHi]+d]) - med;
726
            if (n == 0) { 
727
               mswap(ptr[unHi], ptr[gtHi]); 
728
               gtHi--; unHi--; continue; 
729
            };
730
            if (n <  0) break;
731
            unHi--;
732
         }
733
         if (unLo > unHi) break;
734
         mswap(ptr[unLo], ptr[unHi]); unLo++; unHi--;
735
      }
736
 
737
      AssertD ( unHi == unLo-1, "mainQSort3(2)" );
738
 
739
      if (gtHi < ltLo) {
740
         mpush(lo, hi, d+1 );
741
         continue;
742
      }
743
 
744
      n = mmin(ltLo-lo, unLo-ltLo); mvswap(lo, unLo-n, n);
745
      m = mmin(hi-gtHi, gtHi-unHi); mvswap(unLo, hi-m+1, m);
746
 
747
      n = lo + unLo - ltLo - 1;
748
      m = hi - (gtHi - unHi) + 1;
749
 
750
      nextLo[0] = lo;  nextHi[0] = n;   nextD[0] = d;
751
      nextLo[1] = m;   nextHi[1] = hi;  nextD[1] = d;
752
      nextLo[2] = n+1; nextHi[2] = m-1; nextD[2] = d+1;
753
 
754
      if (mnextsize(0) < mnextsize(1)) mnextswap(0,1);
755
      if (mnextsize(1) < mnextsize(2)) mnextswap(1,2);
756
      if (mnextsize(0) < mnextsize(1)) mnextswap(0,1);
757
 
758
      AssertD (mnextsize(0) >= mnextsize(1), "mainQSort3(8)" );
759
      AssertD (mnextsize(1) >= mnextsize(2), "mainQSort3(9)" );
760
 
761
      mpush (nextLo[0], nextHi[0], nextD[0]);
762
      mpush (nextLo[1], nextHi[1], nextD[1]);
763
      mpush (nextLo[2], nextHi[2], nextD[2]);
764
   }
765
}
766
 
767
#undef mswap
768
#undef mvswap
769
#undef mpush
770
#undef mpop
771
#undef mmin
772
#undef mnextsize
773
#undef mnextswap
774
#undef MAIN_QSORT_SMALL_THRESH
775
#undef MAIN_QSORT_DEPTH_THRESH
776
#undef MAIN_QSORT_STACK_SIZE
777
 
778
 
779
/*---------------------------------------------*/
780
/* Pre:
781
      nblock > N_OVERSHOOT
782
      block32 exists for [0 .. nblock-1 +N_OVERSHOOT]
783
      ((UChar*)block32) [0 .. nblock-1] holds block
784
      ptr exists for [0 .. nblock-1]
785
 
786
   Post:
787
      ((UChar*)block32) [0 .. nblock-1] holds block
788
      All other areas of block32 destroyed
789
      ftab [0 .. 65536 ] destroyed
790
      ptr [0 .. nblock-1] holds sorted order
791
      if (*budget < 0), sorting was abandoned
792
*/
793
 
794
#define BIGFREQ(b) (ftab[((b)+1) << 8] - ftab[(b) << 8])
795
#define SETMASK (1 << 21)
796
#define CLEARMASK (~(SETMASK))
797
 
798
static
799
void mainSort ( UInt32* ptr, 
800
                UChar*  block,
801
                UInt16* quadrant, 
802
                UInt32* ftab,
803
                Int32   nblock,
804
                Int32   verb,
805
                Int32*  budget )
806
{
807
   Int32  i, j, k, ss, sb;
808
   Int32  runningOrder[256];
809
   Bool   bigDone[256];
810
   Int32  copyStart[256];
811
   Int32  copyEnd  [256];
812
   UChar  c1;
813
   Int32  numQSorted;
814
   UInt16 s;
815
   if (verb >= 4) VPrintf0 ( "        main sort initialise ...\n" );
816
 
817
   /*-- set up the 2-byte frequency table --*/
818
   for (i = 65536; i >= 0; i--) ftab[i] = 0;
819
 
820
   j = block[0] << 8;
821
   i = nblock-1;
822
   for (; i >= 3; i -= 4) {
823
      quadrant[i] = 0;
824
      j = (j >> 8) | ( ((UInt16)block[i]) << 8);
825
      ftab[j]++;
826
      quadrant[i-1] = 0;
827
      j = (j >> 8) | ( ((UInt16)block[i-1]) << 8);
828
      ftab[j]++;
829
      quadrant[i-2] = 0;
830
      j = (j >> 8) | ( ((UInt16)block[i-2]) << 8);
831
      ftab[j]++;
832
      quadrant[i-3] = 0;
833
      j = (j >> 8) | ( ((UInt16)block[i-3]) << 8);
834
      ftab[j]++;
835
   }
836
   for (; i >= 0; i--) {
837
      quadrant[i] = 0;
838
      j = (j >> 8) | ( ((UInt16)block[i]) << 8);
839
      ftab[j]++;
840
   }
841
 
842
   /*-- (emphasises close relationship of block & quadrant) --*/
843
   for (i = 0; i < BZ_N_OVERSHOOT; i++) {
844
      block   [nblock+i] = block[i];
845
      quadrant[nblock+i] = 0;
846
   }
847
 
848
   if (verb >= 4) VPrintf0 ( "        bucket sorting ...\n" );
849
 
850
   /*-- Complete the initial radix sort --*/
851
   for (i = 1; i <= 65536; i++) ftab[i] += ftab[i-1];
852
 
853
   s = block[0] << 8;
854
   i = nblock-1;
855
   for (; i >= 3; i -= 4) {
856
      s = (s >> 8) | (block[i] << 8);
857
      j = ftab[s] -1;
858
      ftab[s] = j;
859
      ptr[j] = i;
860
      s = (s >> 8) | (block[i-1] << 8);
861
      j = ftab[s] -1;
862
      ftab[s] = j;
863
      ptr[j] = i-1;
864
      s = (s >> 8) | (block[i-2] << 8);
865
      j = ftab[s] -1;
866
      ftab[s] = j;
867
      ptr[j] = i-2;
868
      s = (s >> 8) | (block[i-3] << 8);
869
      j = ftab[s] -1;
870
      ftab[s] = j;
871
      ptr[j] = i-3;
872
   }
873
   for (; i >= 0; i--) {
874
      s = (s >> 8) | (block[i] << 8);
875
      j = ftab[s] -1;
876
      ftab[s] = j;
877
      ptr[j] = i;
878
   }
879
 
880
   /*--
881
      Now ftab contains the first loc of every small bucket.
882
      Calculate the running order, from smallest to largest
883
      big bucket.
884
   --*/
885
   for (i = 0; i <= 255; i++) {
886
      bigDone     [i] = False;
887
      runningOrder[i] = i;
888
   }
889
 
890
   {
891
      Int32 vv;
892
      Int32 h = 1;
893
      do h = 3 * h + 1; while (h <= 256);
894
      do {
895
         h = h / 3;
896
         for (i = h; i <= 255; i++) {
897
            vv = runningOrder[i];
898
            j = i;
899
            while ( BIGFREQ(runningOrder[j-h]) > BIGFREQ(vv) ) {
900
               runningOrder[j] = runningOrder[j-h];
901
               j = j - h;
902
               if (j <= (h - 1)) goto zero;
903
            }
904
            zero:
905
            runningOrder[j] = vv;
906
         }
907
      } while (h != 1);
908
   }
909
 
910
   /*--
911
      The main sorting loop.
912
   --*/
913
 
914
   numQSorted = 0;
915
 
916
   for (i = 0; i <= 255; i++) {
917
 
918
      /*--
919
         Process big buckets, starting with the least full.
920
         Basically this is a 3-step process in which we call
921
         mainQSort3 to sort the small buckets [ss, j], but
922
         also make a big effort to avoid the calls if we can.
923
      --*/
924
      ss = runningOrder[i];
925
 
926
      /*--
927
         Step 1:
928
         Complete the big bucket [ss] by quicksorting
929
         any unsorted small buckets [ss, j], for j != ss.  
930
         Hopefully previous pointer-scanning phases have already
931
         completed many of the small buckets [ss, j], so
932
         we don't have to sort them at all.
933
      --*/
934
      for (j = 0; j <= 255; j++) {
935
         if (j != ss) {
936
            sb = (ss << 8) + j;
937
            if ( ! (ftab[sb] & SETMASK) ) {
938
               Int32 lo = ftab[sb]   & CLEARMASK;
939
               Int32 hi = (ftab[sb+1] & CLEARMASK) - 1;
940
               if (hi > lo) {
941
                  if (verb >= 4)
942
                     VPrintf4 ( "        qsort [0x%x, 0x%x]   "
943
                                "done %d   this %d\n",
944
                                ss, j, numQSorted, hi - lo + 1 );
945
                  mainQSort3 ( 
946
                     ptr, block, quadrant, nblock, 
947
                     lo, hi, BZ_N_RADIX, budget 
948
                  );   
949
                  numQSorted += (hi - lo + 1);
950
                  if (*budget < 0) return;
951
               }
952
            }
953
            ftab[sb] |= SETMASK;
954
         }
955
      }
956
 
957
      AssertH ( !bigDone[ss], 1006 );
958
 
959
      /*--
960
         Step 2:
961
         Now scan this big bucket [ss] so as to synthesise the
962
         sorted order for small buckets [t, ss] for all t,
963
         including, magically, the bucket [ss,ss] too.
964
         This will avoid doing Real Work in subsequent Step 1's.
965
      --*/
966
      {
967
         for (j = 0; j <= 255; j++) {
968
            copyStart[j] =  ftab[(j << 8) + ss]     & CLEARMASK;
969
            copyEnd  [j] = (ftab[(j << 8) + ss + 1] & CLEARMASK) - 1;
970
         }
971
         for (j = ftab[ss << 8] & CLEARMASK; j < copyStart[ss]; j++) {
972
            k = ptr[j]-1; if (k < 0) k += nblock;
973
            c1 = block[k];
974
            if (!bigDone[c1])
975
               ptr[ copyStart[c1]++ ] = k;
976
         }
977
         for (j = (ftab[(ss+1) << 8] & CLEARMASK) - 1; j > copyEnd[ss]; j--) {
978
            k = ptr[j]-1; if (k < 0) k += nblock;
979
            c1 = block[k];
980
            if (!bigDone[c1]) 
981
               ptr[ copyEnd[c1]-- ] = k;
982
         }
983
      }
984
 
985
      AssertH ( copyStart[ss]-1 == copyEnd[ss], 1007 );
986
 
987
      for (j = 0; j <= 255; j++) ftab[(j << 8) + ss] |= SETMASK;
988
 
989
      /*--
990
         Step 3:
991
         The [ss] big bucket is now done.  Record this fact,
992
         and update the quadrant descriptors.  Remember to
993
         update quadrants in the overshoot area too, if
994
         necessary.  The "if (i < 255)" test merely skips
995
         this updating for the last bucket processed, since
996
         updating for the last bucket is pointless.
997
 
998
         The quadrant array provides a way to incrementally
999
         cache sort orderings, as they appear, so as to 
1000
         make subsequent comparisons in fullGtU() complete
1001
         faster.  For repetitive blocks this makes a big
1002
         difference (but not big enough to be able to avoid
1003
         the fallback sorting mechanism, exponential radix sort).
1004
 
1005
         The precise meaning is: at all times:
1006
 
1007
            for 0 <= i < nblock and 0 <= j <= nblock
1008
 
1009
            if block[i] != block[j], 
1010
 
1011
               then the relative values of quadrant[i] and 
1012
                    quadrant[j] are meaningless.
1013
 
1014
               else {
1015
                  if quadrant[i] < quadrant[j]
1016
                     then the string starting at i lexicographically
1017
                     precedes the string starting at j
1018
 
1019
                  else if quadrant[i] > quadrant[j]
1020
                     then the string starting at j lexicographically
1021
                     precedes the string starting at i
1022
 
1023
                  else
1024
                     the relative ordering of the strings starting
1025
                     at i and j has not yet been determined.
1026
               }
1027
      --*/
1028
      bigDone[ss] = True;
1029
 
1030
      if (i < 255) {
1031
         Int32 bbStart  = ftab[ss << 8] & CLEARMASK;
1032
         Int32 bbSize   = (ftab[(ss+1) << 8] & CLEARMASK) - bbStart;
1033
         Int32 shifts   = 0;
1034
 
1035
         while ((bbSize >> shifts) > 65534) shifts++;
1036
 
1037
         for (j = bbSize-1; j >= 0; j--) {
1038
            Int32 a2update     = ptr[bbStart + j];
1039
            UInt16 qVal        = (UInt16)(j >> shifts);
1040
            quadrant[a2update] = qVal;
1041
            if (a2update < BZ_N_OVERSHOOT)
1042
               quadrant[a2update + nblock] = qVal;
1043
         }
1044
         AssertH ( ((bbSize-1) >> shifts) <= 65535, 1002 );
1045
      }
1046
 
1047
   }
1048
 
1049
   if (verb >= 4)
1050
      VPrintf3 ( "        %d pointers, %d sorted, %d scanned\n",
1051
                 nblock, numQSorted, nblock - numQSorted );
1052
}
1053
 
1054
#undef BIGFREQ
1055
#undef SETMASK
1056
#undef CLEARMASK
1057
 
1058
 
1059
/*---------------------------------------------*/
1060
/* Pre:
1061
      nblock > 0
1062
      arr2 exists for [0 .. nblock-1 +N_OVERSHOOT]
1063
      ((UChar*)arr2)  [0 .. nblock-1] holds block
1064
      arr1 exists for [0 .. nblock-1]
1065
 
1066
   Post:
1067
      ((UChar*)arr2) [0 .. nblock-1] holds block
1068
      All other areas of block destroyed
1069
      ftab [ 0 .. 65536 ] destroyed
1070
      arr1 [0 .. nblock-1] holds sorted order
1071
*/
1072
void BZ2_blockSort ( EState* s )
1073
{
1074
   UInt32* ptr    = s->ptr; 
1075
   UChar*  block  = s->block;
1076
   UInt32* ftab   = s->ftab;
1077
   Int32   nblock = s->nblock;
1078
   Int32   verb   = s->verbosity;
1079
   Int32   wfact  = s->workFactor;
1080
   UInt16* quadrant;
1081
   Int32   budget;
1082
   Int32   budgetInit;
1083
   Int32   i;
1084
 
1085
   if (nblock < 10000) {
1086
      fallbackSort ( s->arr1, s->arr2, ftab, nblock, verb );
1087
   } else {
1088
      /* Calculate the location for quadrant, remembering to get
1089
         the alignment right.  Assumes that &(block[0]) is at least
1090
         2-byte aligned -- this should be ok since block is really
1091
         the first section of arr2.
1092
      */
1093
      i = nblock+BZ_N_OVERSHOOT;
1094
      if (i & 1) i++;
1095
      quadrant = (UInt16*)(&(block[i]));
1096
 
1097
      /* (wfact-1) / 3 puts the default-factor-30
1098
         transition point at very roughly the same place as 
1099
         with v0.1 and v0.9.0.  
1100
         Not that it particularly matters any more, since the
1101
         resulting compressed stream is now the same regardless
1102
         of whether or not we use the main sort or fallback sort.
1103
      */
1104
      if (wfact < 1  ) wfact = 1;
1105
      if (wfact > 100) wfact = 100;
1106
      budgetInit = nblock * ((wfact-1) / 3);
1107
      budget = budgetInit;
1108
 
1109
      mainSort ( ptr, block, quadrant, ftab, nblock, verb, &budget );
1110
      if (verb >= 3) 
1111
         VPrintf3 ( "      %d work, %d block, ratio %5.2f\n",
1112
                    budgetInit - budget,
1113
                    nblock, 
1114
                    (float)(budgetInit - budget) /
1115
                    (float)(nblock==0 ? 1 : nblock) ); 
1116
      if (budget < 0) {
1117
         if (verb >= 2) 
1118
            VPrintf0 ( "    too repetitive; using fallback"
1119
                       " sorting algorithm\n" );
1120
         fallbackSort ( s->arr1, s->arr2, ftab, nblock, verb );
1121
      }
1122
   }
1123
 
1124
   s->origPtr = -1;
1125
   for (i = 0; i < s->nblock; i++)
1126
      if (ptr[i] == 0)
1127
         { s->origPtr = i; break; };
1128
 
1129
   AssertH( s->origPtr != -1, 1003 );
1130
}
1131
 
1132
 
1133
/*-------------------------------------------------------------*/
1134
/*--- end                                       blocksort.c ---*/
1135
/*-------------------------------------------------------------*/