Warning: Attempt to read property "date" on null in /usr/local/www/websvn.planix.org/blame.php on line 247

Warning: Attempt to read property "msg" on null in /usr/local/www/websvn.planix.org/blame.php on line 247
WebSVN – planix.SVN – Blame – /os/branches/planix-v0/sys/src/cmd/gs/src/gsfunc0.c – Rev 2

Subversion Repositories planix.SVN

Rev

Go to most recent revision | Details | Last modification | View Log | RSS feed

Rev Author Line No. Line
2 - 1
/* Copyright (C) 1997, 2000 Aladdin Enterprises.  All rights reserved.
2
 
3
  This software is provided AS-IS with no warranty, either express or
4
  implied.
5
 
6
  This software is distributed under license and may not be copied,
7
  modified or distributed except as expressly authorized under the terms
8
  of the license contained in the file LICENSE in this distribution.
9
 
10
  For more information about licensing, please refer to
11
  http://www.ghostscript.com/licensing/. For information on
12
  commercial licensing, go to http://www.artifex.com/licensing/ or
13
  contact Artifex Software, Inc., 101 Lucas Valley Road #110,
14
  San Rafael, CA  94903, U.S.A., +1(415)492-9861.
15
*/
16
 
17
/* $Id: gsfunc0.c,v 1.27 2005/07/15 03:36:03 dan Exp $ */
18
/* Implementation of FunctionType 0 (Sampled) Functions */
19
#include "math_.h"
20
#include "gx.h"
21
#include "gserrors.h"
22
#include "gsfunc0.h"
23
#include "gsparam.h"
24
#include "gxfarith.h"
25
#include "gxfunc.h"
26
#include "stream.h"
27
 
28
#define POLE_CACHE_DEBUG 0      /* A temporary development technology need. 
29
				   Remove after the beta testing. */
30
#define POLE_CACHE_GENERIC_1D 1 /* A temporary development technology need. 
31
				   Didn't decide yet - see fn_Sd_evaluate_cubic_cached_1d. */
32
#define POLE_CACHE_IGNORE 0     /* A temporary development technology need. 
33
				   Remove after the beta testing. */
34
 
35
#if POLE_CACHE_DEBUG
36
#   include <assert.h>
37
#endif
38
 
39
#define MAX_FAST_COMPS 8
40
 
41
typedef struct gs_function_Sd_s {
42
    gs_function_head_t head;
43
    gs_function_Sd_params_t params;
44
} gs_function_Sd_t;
45
 
46
/* GC descriptor */
47
private_st_function_Sd();
48
private
49
ENUM_PTRS_WITH(function_Sd_enum_ptrs, gs_function_Sd_t *pfn)
50
{
51
    index -= 6;
52
    if (index < st_data_source_max_ptrs)
53
	return ENUM_USING(st_data_source, &pfn->params.DataSource,
54
			  sizeof(pfn->params.DataSource), index);
55
    return ENUM_USING_PREFIX(st_function, st_data_source_max_ptrs);
56
}
57
ENUM_PTR3(0, gs_function_Sd_t, params.Encode, params.Decode, params.Size);
58
ENUM_PTR3(3, gs_function_Sd_t, params.pole, params.array_step, params.stream_step);
59
ENUM_PTRS_END
60
private
61
RELOC_PTRS_WITH(function_Sd_reloc_ptrs, gs_function_Sd_t *pfn)
62
{
63
    RELOC_PREFIX(st_function);
64
    RELOC_USING(st_data_source, &pfn->params.DataSource,
65
		sizeof(pfn->params.DataSource));
66
    RELOC_PTR3(gs_function_Sd_t, params.Encode, params.Decode, params.Size);
67
    RELOC_PTR3(gs_function_Sd_t, params.pole, params.array_step, params.stream_step);
68
}
69
RELOC_PTRS_END
70
 
71
/* Define the maximum plausible number of inputs and outputs */
72
/* for a Sampled function. */
73
#define max_Sd_m 16
74
#define max_Sd_n 16
75
 
76
/* Get one set of sample values. */
77
#define SETUP_SAMPLES(bps, nbytes)\
78
	int n = pfn->params.n;\
79
	byte buf[max_Sd_n * ((bps + 7) >> 3)];\
80
	const byte *p;\
81
	int i;\
82
\
83
	data_source_access(&pfn->params.DataSource, offset >> 3,\
84
			   nbytes, buf, &p)
85
 
86
private int
87
fn_gets_1(const gs_function_Sd_t * pfn, ulong offset, uint * samples)
88
{
89
    SETUP_SAMPLES(1, ((offset & 7) + n + 7) >> 3);
90
    for (i = 0; i < n; ++i) {
91
	samples[i] = (*p >> (~offset & 7)) & 1;
92
	if (!(++offset & 7))
93
	    p++;
94
    }
95
    return 0;
96
}
97
private int
98
fn_gets_2(const gs_function_Sd_t * pfn, ulong offset, uint * samples)
99
{
100
    SETUP_SAMPLES(2, (((offset & 7) >> 1) + n + 3) >> 2);
101
    for (i = 0; i < n; ++i) {
102
	samples[i] = (*p >> (6 - (offset & 7))) & 3;
103
	if (!((offset += 2) & 7))
104
	    p++;
105
    }
106
    return 0;
107
}
108
private int
109
fn_gets_4(const gs_function_Sd_t * pfn, ulong offset, uint * samples)
110
{
111
    SETUP_SAMPLES(4, (((offset & 7) >> 2) + n + 1) >> 1);
112
    for (i = 0; i < n; ++i) {
113
	samples[i] = ((offset ^= 4) & 4 ? *p >> 4 : *p++ & 0xf);
114
    }
115
    return 0;
116
}
117
private int
118
fn_gets_8(const gs_function_Sd_t * pfn, ulong offset, uint * samples)
119
{
120
    SETUP_SAMPLES(8, n);
121
    for (i = 0; i < n; ++i) {
122
	samples[i] = *p++;
123
    }
124
    return 0;
125
}
126
private int
127
fn_gets_12(const gs_function_Sd_t * pfn, ulong offset, uint * samples)
128
{
129
    SETUP_SAMPLES(12, (((offset & 7) >> 2) + 3 * n + 1) >> 1);
130
    for (i = 0; i < n; ++i) {
131
	if (offset & 4)
132
	    samples[i] = ((*p & 0xf) << 8) + p[1], p += 2;
133
	else
134
	    samples[i] = (*p << 4) + (p[1] >> 4), p++;
135
	offset ^= 4;
136
    }
137
    return 0;
138
}
139
private int
140
fn_gets_16(const gs_function_Sd_t * pfn, ulong offset, uint * samples)
141
{
142
    SETUP_SAMPLES(16, n * 2);
143
    for (i = 0; i < n; ++i) {
144
	samples[i] = (*p << 8) + p[1];
145
	p += 2;
146
    }
147
    return 0;
148
}
149
private int
150
fn_gets_24(const gs_function_Sd_t * pfn, ulong offset, uint * samples)
151
{
152
    SETUP_SAMPLES(24, n * 3);
153
    for (i = 0; i < n; ++i) {
154
	samples[i] = (*p << 16) + (p[1] << 8) + p[2];
155
	p += 3;
156
    }
157
    return 0;
158
}
159
private int
160
fn_gets_32(const gs_function_Sd_t * pfn, ulong offset, uint * samples)
161
{
162
    SETUP_SAMPLES(32, n * 4);
163
    for (i = 0; i < n; ++i) {
164
	samples[i] = (*p << 24) + (p[1] << 16) + (p[2] << 8) + p[3];
165
	p += 4;
166
    }
167
    return 0;
168
}
169
 
170
private int (*const fn_get_samples[]) (const gs_function_Sd_t * pfn,
171
				       ulong offset, uint * samples) =
172
{
173
    0, fn_gets_1, fn_gets_2, 0, fn_gets_4, 0, 0, 0,
174
	fn_gets_8, 0, 0, 0, fn_gets_12, 0, 0, 0,
175
	fn_gets_16, 0, 0, 0, 0, 0, 0, 0,
176
	fn_gets_24, 0, 0, 0, 0, 0, 0, 0,
177
	fn_gets_32
178
};
179
 
180
/*
181
 * Compute a value by cubic interpolation.
182
 * f[] = f(0), f(1), f(2), f(3); 1 < x < 2.
183
 * The formula is derived from those presented in
184
 * http://www.cs.uwa.edu.au/undergraduate/units/233.413/Handouts/Lecture04.html
185
 * (thanks to Raph Levien for the reference).
186
 */
187
private double
188
interpolate_cubic(floatp x, floatp f0, floatp f1, floatp f2, floatp f3)
189
{
190
    /*
191
     * The parameter 'a' affects the contribution of the high-frequency
192
     * components.  The abovementioned source suggests a = -0.5.
193
     */
194
#define a (-0.5) 
195
#define SQR(v) ((v) * (v))
196
#define CUBE(v) ((v) * (v) * (v))
197
    const double xm1 = x - 1, m2x = 2 - x, m3x = 3 - x;
198
    const double c =
199
	(a * CUBE(x) - 5 * a * SQR(x) + 8 * a * x - 4 * a) * f0 +
200
	((a+2) * CUBE(xm1) - (a+3) * SQR(xm1) + 1) * f1 +
201
	((a+2) * CUBE(m2x) - (a+3) * SQR(m2x) + 1) * f2 +
202
	(a * CUBE(m3x) - 5 * a * SQR(m3x) + 8 * a * m3x - 4 * a) * f3;
203
 
204
    if_debug6('~', "[~](%g, %g, %g, %g)order3(%g) => %g\n",
205
	      f0, f1, f2, f3, x, c);
206
    return c;
207
#undef a
208
#undef SQR
209
#undef CUBE
210
}
211
 
212
/*
213
 * Compute a value by quadratic interpolation.
214
 * f[] = f(0), f(1), f(2); 0 < x < 1.
215
 *
216
 * We used to use a quadratic formula for this, derived from
217
 * f(0) = f0, f(1) = f1, f'(1) = (f2 - f0) / 2, but now we
218
 * match what we believe is Acrobat Reader's behavior.
219
 */
220
inline private double
221
interpolate_quadratic(floatp x, floatp f0, floatp f1, floatp f2)
222
{
223
    return interpolate_cubic(x + 1, f0, f0, f1, f2);
224
}
225
 
226
/* Calculate a result by multicubic interpolation. */
227
private void
228
fn_interpolate_cubic(const gs_function_Sd_t *pfn, const float *fparts,
229
		     const int *iparts, const ulong *factors,
230
		     float *samples, ulong offset, int m)
231
{
232
    int j;
233
 
234
top:
235
    if (m == 0) {
236
	uint sdata[max_Sd_n];
237
 
238
	(*fn_get_samples[pfn->params.BitsPerSample])(pfn, offset, sdata);
239
	for (j = pfn->params.n - 1; j >= 0; --j)
240
	    samples[j] = (float)sdata[j];
241
    } else {
242
	float fpart = *fparts++;
243
	int ipart = *iparts++;
244
	ulong delta = *factors++;
245
	int size = pfn->params.Size[pfn->params.m - m];
246
	float samples1[max_Sd_n], samplesm1[max_Sd_n], samples2[max_Sd_n];
247
 
248
	--m;
249
	if (is_fzero(fpart))
250
	    goto top;
251
	fn_interpolate_cubic(pfn, fparts, iparts, factors, samples,
252
			     offset, m);
253
	fn_interpolate_cubic(pfn, fparts, iparts, factors, samples1,
254
			     offset + delta, m);
255
	/* Ensure we don't try to access out of bounds. */
256
	/*
257
	 * If size == 1, the only possible value for ipart and fpart is
258
	 * 0, so we've already handled this case.
259
	 */
260
	if (size == 2) {	/* ipart = 0 */
261
	    /* Use linear interpolation. */
262
	    for (j = pfn->params.n - 1; j >= 0; --j)
263
		samples[j] += (samples1[j] - samples[j]) * fpart;
264
	    return;
265
	}
266
	if (ipart == 0) {
267
	    /* Use quadratic interpolation. */
268
	    fn_interpolate_cubic(pfn, fparts, iparts, factors,
269
				 samples2, offset + delta * 2, m);
270
	    for (j = pfn->params.n - 1; j >= 0; --j)
271
		samples[j] =
272
		    interpolate_quadratic(fpart, samples[j],
273
					  samples1[j], samples2[j]);
274
	    return;
275
	}
276
	/* At this point we know ipart > 0, size >= 3. */
277
	fn_interpolate_cubic(pfn, fparts, iparts, factors, samplesm1,
278
			     offset - delta, m);
279
	if (ipart == size - 2) {
280
	    /* Use quadratic interpolation. */
281
	    for (j = pfn->params.n - 1; j >= 0; --j)
282
		samples[j] =
283
		    interpolate_quadratic(1 - fpart, samples1[j],
284
					  samples[j], samplesm1[j]);
285
	    return;
286
	}
287
	/* Now we know 0 < ipart < size - 2, size > 3. */
288
	fn_interpolate_cubic(pfn, fparts, iparts, factors,
289
			     samples2, offset + delta * 2, m);
290
	for (j = pfn->params.n - 1; j >= 0; --j)
291
	    samples[j] =
292
		interpolate_cubic(fpart + 1, samplesm1[j], samples[j],
293
				  samples1[j], samples2[j]);
294
    }
295
}
296
 
297
/* Calculate a result by multilinear interpolation. */
298
private void
299
fn_interpolate_linear(const gs_function_Sd_t *pfn, const float *fparts,
300
		 const ulong *factors, float *samples, ulong offset, int m)
301
{
302
    int j;
303
 
304
top:
305
    if (m == 0) {
306
	uint sdata[max_Sd_n];
307
 
308
	(*fn_get_samples[pfn->params.BitsPerSample])(pfn, offset, sdata);
309
	for (j = pfn->params.n - 1; j >= 0; --j)
310
	    samples[j] = (float)sdata[j];
311
    } else {
312
	float fpart = *fparts++;
313
	float samples1[max_Sd_n];
314
 
315
	if (is_fzero(fpart)) {
316
	    ++factors;
317
	    --m;
318
	    goto top;
319
	}
320
	fn_interpolate_linear(pfn, fparts, factors + 1, samples,
321
			      offset, m - 1);
322
	fn_interpolate_linear(pfn, fparts, factors + 1, samples1,
323
			      offset + *factors, m - 1);
324
	for (j = pfn->params.n - 1; j >= 0; --j)
325
	    samples[j] += (samples1[j] - samples[j]) * fpart;
326
    }
327
}
328
 
329
private inline double 
330
fn_Sd_encode(const gs_function_Sd_t *pfn, int i, double sample)
331
{
332
    float d0, d1, r0, r1;
333
    double value;
334
    int bps = pfn->params.BitsPerSample;
335
    /* x86 machines have problems with shifts if bps >= 32 */
336
    uint max_samp = (bps < (sizeof(uint) * 8)) ? ((1 << bps) - 1) : max_uint;
337
 
338
    if (pfn->params.Range)
339
	r0 = pfn->params.Range[2 * i], r1 = pfn->params.Range[2 * i + 1];
340
    else
341
	r0 = 0, r1 = (float)((1 << bps) - 1);
342
    if (pfn->params.Decode)
343
	d0 = pfn->params.Decode[2 * i], d1 = pfn->params.Decode[2 * i + 1];
344
    else
345
	d0 = r0, d1 = r1;
346
 
347
    value = sample * (d1 - d0) / max_samp + d0;
348
    if (value < r0)
349
	value = r0;
350
    else if (value > r1)
351
	value = r1;
352
    return value;
353
}
354
 
355
/* Evaluate a Sampled function. */
356
/* A generic algorithm with a recursion by dimentions. */
357
private int
358
fn_Sd_evaluate_general(const gs_function_t * pfn_common, const float *in, float *out)
359
{
360
    const gs_function_Sd_t *pfn = (const gs_function_Sd_t *)pfn_common;
361
    int bps = pfn->params.BitsPerSample;
362
    ulong offset = 0;
363
    int i;
364
    float encoded[max_Sd_m];
365
    int iparts[max_Sd_m];	/* only needed for cubic interpolation */
366
    ulong factors[max_Sd_m];
367
    float samples[max_Sd_n];
368
 
369
    /* Encode the input values. */
370
 
371
    for (i = 0; i < pfn->params.m; ++i) {
372
	float d0 = pfn->params.Domain[2 * i],
373
	    d1 = pfn->params.Domain[2 * i + 1];
374
	float arg = in[i], enc;
375
 
376
	if (arg < d0)
377
	    arg = d0;
378
	else if (arg > d1)
379
	    arg = d1;
380
	if (pfn->params.Encode) {
381
	    float e0 = pfn->params.Encode[2 * i];
382
	    float e1 = pfn->params.Encode[2 * i + 1];
383
 
384
	    enc = (arg - d0) * (e1 - e0) / (d1 - d0) + e0;
385
	    if (enc < 0)
386
		encoded[i] = 0;
387
	    else if (enc >= pfn->params.Size[i] - 1)
388
		encoded[i] = (float)pfn->params.Size[i] - 1;
389
	    else
390
		encoded[i] = enc;
391
	} else {
392
	    /* arg is guaranteed to be in bounds, ergo so is enc */
393
	    encoded[i] = (arg - d0) * (pfn->params.Size[i] - 1) / (d1 - d0);
394
	}
395
    }
396
 
397
    /* Look up and interpolate the output values. */
398
 
399
    {
400
	ulong factor = bps * pfn->params.n;
401
 
402
	for (i = 0; i < pfn->params.m; factor *= pfn->params.Size[i++]) {
403
	    int ipart = (int)encoded[i];
404
 
405
	    offset += (factors[i] = factor) * ipart;
406
	    iparts[i] = ipart;	/* only needed for cubic interpolation */
407
	    encoded[i] -= ipart;
408
	}
409
    }
410
    if (pfn->params.Order == 3)
411
	fn_interpolate_cubic(pfn, encoded, iparts, factors, samples,
412
			     offset, pfn->params.m);
413
    else
414
	fn_interpolate_linear(pfn, encoded, factors, samples, offset,
415
			      pfn->params.m);
416
 
417
    /* Encode the output values. */
418
 
419
    for (i = 0; i < pfn->params.n; ++i)
420
	out[i] = (float)fn_Sd_encode(pfn, i, samples[i]);
421
 
422
    return 0;
423
}
424
 
425
static const double double_stub = 1e90;
426
 
427
private inline void
428
fn_make_cubic_poles(double *p, double f0, double f1, double f2, double f3, 
429
	    const int pole_step_minor)
430
{   /* The following is poles of the polinomial,
431
       which represents interpolate_cubic in [1,2]. */
432
    const double a = -0.5;
433
 
434
    p[pole_step_minor * 1] = (a*f0 + 3*f1 - a*f2)/3.0;
435
    p[pole_step_minor * 2] = (-a*f1 + 3*f2 + a*f3)/3.0;
436
}
437
 
438
private void
439
fn_make_poles(double *p, const int pole_step, int power, int bias)
440
{
441
    const int pole_step_minor = pole_step / 3;
442
    switch(power) {
443
	case 1: /* A linear 3d power curve. */
444
	    /* bias must be 0. */
445
	    p[pole_step_minor * 1] = (2 * p[pole_step * 0] + 1 * p[pole_step * 1]) / 3;
446
	    p[pole_step_minor * 2] = (1 * p[pole_step * 0] + 2 * p[pole_step * 1]) / 3;
447
	    break;
448
	case 2:
449
	    /* bias may be be 0 or 1. */
450
	    /* Duplicate the beginning or the ending pole (the old code compatible). */
451
	    fn_make_cubic_poles(p + pole_step * bias, 
452
		    p[pole_step * 0], p[pole_step * bias], 
453
		    p[pole_step * (1 + bias)], p[pole_step * 2], 
454
		    pole_step_minor);
455
	    break;
456
	case 3:
457
	    /* bias must be 1. */
458
	    fn_make_cubic_poles(p + pole_step * bias, 
459
		    p[pole_step * 0], p[pole_step * 1], p[pole_step * 2], p[pole_step * 3], 
460
		    pole_step_minor);
461
	    break;
462
	default: /* Must not happen. */
463
	   DO_NOTHING; 
464
    }
465
}
466
 
467
/* Evaluate a Sampled function.
468
   A cubic interpolation with a pole cache.
469
   Allows a fast check for extreme suspection. */
470
/* This implementation is a particular case of 1 dimension. 
471
   maybe we'll use as an optimisation of the generic case,
472
   so keep it for a while. */
473
private int
474
fn_Sd_evaluate_cubic_cached_1d(const gs_function_Sd_t *pfn, const float *in, float *out)
475
{
476
    float d0 = pfn->params.Domain[2 * 0];
477
    float d1 = pfn->params.Domain[2 * 0 + 1];
478
    float x0 = in[0];
479
    const int pole_step_minor = pfn->params.n;
480
    const int pole_step = 3 * pole_step_minor;
481
    int i0; /* A cell index. */
482
    int ib, ie, i, k;
483
    double *p, t0, t1, tt;
484
 
485
    if (x0 < d0)
486
	x0 = d0;
487
    if (x0 > d1)
488
	x0 = d1;
489
    tt = (in[0] - d0) * (pfn->params.Size[0] - 1) / (d1 - d0);
490
    i0 = (int)floor(tt);
491
    ib = max(i0 - 1, 0);
492
    ie = min(pfn->params.Size[0], i0 + 3);
493
    for (i = ib; i < ie; i++) {
494
	if (pfn->params.pole[i * pole_step] == double_stub) {
495
	    uint sdata[max_Sd_n];
496
	    int bps = pfn->params.BitsPerSample;
497
 
498
	    p = &pfn->params.pole[i * pole_step];
499
	    fn_get_samples[pfn->params.BitsPerSample](pfn, i * bps * pfn->params.n, sdata);
500
	    for (k = 0; k < pfn->params.n; k++, p++)
501
		*p = fn_Sd_encode(pfn, k, (double)sdata[k]);
502
	}
503
    }
504
    p = &pfn->params.pole[i0 * pole_step];
505
    t0 = tt - i0;
506
    if (t0 == 0) {
507
	for (k = 0; k < pfn->params.n; k++, p++)
508
	    out[k] = *p;
509
    } else {
510
	if (p[1 * pole_step_minor] == double_stub) {
511
	    for (k = 0; k < pfn->params.n; k++)
512
		fn_make_poles(&pfn->params.pole[ib * pole_step + k], pole_step, 
513
			ie - ib - 1, i0 - ib);
514
	}
515
	t1 = 1 - t0;
516
	for (k = 0; k < pfn->params.n; k++, p++) {
517
	    double y = p[0 * pole_step_minor] * t1 * t1 * t1 +
518
		       p[1 * pole_step_minor] * t1 * t1 * t0 * 3 +
519
		       p[2 * pole_step_minor] * t1 * t0 * t0 * 3 +
520
		       p[3 * pole_step_minor] * t0 * t0 * t0;
521
	    if (y < pfn->params.Range[0])
522
		y = pfn->params.Range[0];
523
	    if (y > pfn->params.Range[1])
524
		y = pfn->params.Range[1];
525
	    out[k] = y;
526
	}
527
    }
528
    return 0;
529
}
530
 
531
private inline void
532
decode_argument(const gs_function_Sd_t *pfn, const float *in, double T[max_Sd_m], int I[max_Sd_m])
533
{
534
    int i;
535
 
536
    for (i = 0; i < pfn->params.m; i++) {
537
	float xi = in[i];
538
	float d0 = pfn->params.Domain[2 * i + 0];
539
	float d1 = pfn->params.Domain[2 * i + 1];
540
	double t;
541
 
542
	if (xi < d0)
543
	    xi = d0;
544
	if (xi > d1)
545
	    xi = d1;
546
	t = (xi - d0) * (pfn->params.Size[i] - 1) / (d1 - d0);
547
	I[i] = (int)floor(t);
548
	T[i] = t - I[i];
549
    }
550
}
551
 
552
private inline void
553
index_span(const gs_function_Sd_t *pfn, int *I, double *T, int ii, int *Ii, int *ib, int *ie)
554
{
555
    *Ii = I[ii];
556
    if (T[ii] != 0) {
557
	*ib = max(*Ii - 1, 0);
558
	*ie = min(pfn->params.Size[ii], *Ii + 3);
559
    } else {
560
	*ib = *Ii; 
561
	*ie = *Ii + 1;
562
    }
563
}
564
 
565
private inline int
566
load_vector_to(const gs_function_Sd_t *pfn, int s_offset, double *V)
567
{
568
    uint sdata[max_Sd_n];
569
    int k, code;
570
 
571
    code = fn_get_samples[pfn->params.BitsPerSample](pfn, s_offset, sdata);
572
    if (code < 0)
573
	return code;
574
    for (k = 0; k < pfn->params.n; k++)
575
	V[k] = fn_Sd_encode(pfn, k, (double)sdata[k]);
576
    return 0;
577
}
578
 
579
private inline int
580
load_vector(const gs_function_Sd_t *pfn, int a_offset, int s_offset)
581
{
582
    if (*(pfn->params.pole + a_offset) == double_stub) {
583
	uint sdata[max_Sd_n];
584
	int k, code;
585
 
586
	code = fn_get_samples[pfn->params.BitsPerSample](pfn, s_offset, sdata);
587
	if (code < 0)
588
	    return code;
589
	for (k = 0; k < pfn->params.n; k++)
590
	    *(pfn->params.pole + a_offset + k) = fn_Sd_encode(pfn, k, (double)sdata[k]);
591
    }
592
    return 0;
593
}
594
 
595
private inline void
596
interpolate_vector(const gs_function_Sd_t *pfn, int offset, int pole_step, int power, int bias)
597
{
598
    int k;
599
 
600
    for (k = 0; k < pfn->params.n; k++)
601
	fn_make_poles(pfn->params.pole + offset + k, pole_step, power, bias);
602
}
603
 
604
private inline void
605
interpolate_tensors(const gs_function_Sd_t *pfn, int *I, double *T, 
606
	int offset, int pole_step, int power, int bias, int ii)
607
{
608
    if (ii < 0)
609
	interpolate_vector(pfn, offset, pole_step, power, bias);
610
    else {
611
	int s = pfn->params.array_step[ii];
612
	int Ii = I[ii];
613
 
614
	if (T[ii] == 0) {
615
	    interpolate_tensors(pfn, I, T, offset + Ii * s, pole_step, power, bias, ii - 1);	
616
	} else {
617
	    int l;
618
 
619
	    for (l = 0; l < 4; l++) 
620
		interpolate_tensors(pfn, I, T, offset + Ii * s + l * s / 3, pole_step, power, bias, ii - 1);	
621
	}
622
    }
623
}
624
 
625
private inline bool 
626
is_tensor_done(const gs_function_Sd_t *pfn, int *I, double *T, int a_offset, int ii)
627
{
628
    /* Check an inner pole of the cell. */
629
    int i, o = 0;
630
 
631
    for (i = ii; i >= 0; i--) {
632
	o += I[i] * pfn->params.array_step[i];
633
	if (T[i] != 0)
634
	    o += pfn->params.array_step[i] / 3;
635
    }
636
    if (*(pfn->params.pole + a_offset + o) != double_stub)
637
	return true;
638
    return false;
639
}
640
 
641
/* Creates a tensor of Bezier coefficients by node interpolation. */
642
private inline int
643
make_interpolation_tensor(const gs_function_Sd_t *pfn, int *I, double *T,
644
			    int a_offset, int s_offset, int ii)
645
{
646
    /* Well, this function isn't obvious. Trying to explain what it does.
647
 
648
       Suppose we have a 4x4x4...x4 hypercube of nodes, and we want to build
649
       a multicubic interpolation function for the inner 2x2x2...x2 hypercube.
650
       We represent the multicubic function with a tensor of Besier poles,
651
       and the size of the tensor is 4x4x....x4. Note that the outer corners 
652
       of the tensor are equal to the corners of the 2x2x...x2 hypercube.
653
 
654
       We organized the 'pole' array so that a tensor of a cell 
655
       occupies the cell, and tensors for neighbour cells have a common hyperplane.
656
 
657
       For a 1-dimentional case the let the nodes are p0, p1, p2, p3,
658
       and let the tensor coefficients are q0, q1, q2, q3.
659
       We choose a cubic approximation, in which tangents at nodes p1, p2
660
       are parallel to (p2 - p0) and (p3 - p1) correspondingly.
661
       (Well, this doesn't give a the minimal curvity, but likely it is
662
       what Adobe implementations do, see the bug 687352, 
663
       and we agree that it's some reasonable).
664
 
665
       For a 2-dimensional case we apply the 1-dimentional case through
666
       the first dimension, and then construct a surface by varying the
667
       second coordinate as a parameter. It gives a bicubic surface,
668
       and the result doesn't depend on the order of coordinates 
669
       (I proved the latter with Matematica 3.0).
670
       Then we know that an interpolation by one coordinate and
671
       a differentiation by another coordinate are interchangeble operators.
672
       Due to that poles of the interpolated function are same as 
673
       interpolated poles of the function (well, we didn't spend time
674
       for a strong proof, but this fact was confirmed with testing the 
675
       implementation with POLE_CACHE_DEBUG).
676
 
677
       Then we apply the 2-dimentional considerations recursively 
678
       to all dimensions. This is exactly what this function does.
679
 
680
       When the source node array have an insufficient nomber of nodes
681
       along a dimension, we duplicate ending nodes. This solution is done 
682
       choosen to the compatibility to the old code, but definitely 
683
       there exists a better one.
684
     */
685
    int code;
686
 
687
    if (ii < 0) {
688
	if (POLE_CACHE_IGNORE || *(pfn->params.pole + a_offset) == double_stub) {
689
	    code = load_vector(pfn, a_offset, s_offset);
690
	    if (code < 0)
691
		return code;
692
	}
693
    } else {
694
	int Ii, ib, ie, i;
695
	int sa = pfn->params.array_step[ii];
696
	int ss = pfn->params.stream_step[ii];
697
 
698
	index_span(pfn, I, T, ii, &Ii, &ib, &ie);
699
	if (POLE_CACHE_IGNORE || !is_tensor_done(pfn, I, T, a_offset, ii)) {
700
	    for (i = ib; i < ie; i++) {
701
		code = make_interpolation_tensor(pfn, I, T, 
702
				a_offset + i * sa, s_offset + i * ss, ii - 1);
703
		if (code < 0)
704
		    return code;
705
	    }
706
	    if (T[ii] != 0)
707
		interpolate_tensors(pfn, I, T, a_offset + ib * sa, sa, ie - ib - 1, 
708
				Ii - ib, ii - 1);
709
	}
710
    }
711
    return 0;
712
}
713
 
714
/* Creates a subarray of samples. */
715
private inline int
716
make_interpolation_nodes(const gs_function_Sd_t *pfn, double *T0, double *T1,
717
			    int *I, double *T,
718
			    int a_offset, int s_offset, int ii)
719
{
720
    int code;
721
 
722
    if (ii < 0) {
723
	if (POLE_CACHE_IGNORE || *(pfn->params.pole + a_offset) == double_stub) {
724
	    code = load_vector(pfn, a_offset, s_offset);
725
	    if (code < 0)
726
		return code;
727
	}
728
	if (pfn->params.Order == 3) {
729
	    code = make_interpolation_tensor(pfn, I, T, 0, 0, pfn->params.m - 1);
730
	}
731
    } else {
732
	int i;
733
	int i0 = (int)floor(T0[ii]);
734
	int i1 = (int)ceil(T1[ii]);
735
	int sa = pfn->params.array_step[ii];
736
	int ss = pfn->params.stream_step[ii];
737
 
738
	if (i0 < 0 || i0 >= pfn->params.Size[ii])
739
	    return_error(gs_error_unregistered); /* Must not happen. */
740
	if (i1 < 0 || i1 >= pfn->params.Size[ii])
741
	    return_error(gs_error_unregistered); /* Must not happen. */
742
	I[ii] = i0;
743
	T[ii] = (i1 > i0 ? 1 : 0);
744
	for (i = i0; i <= i1; i++) {
745
	    code = make_interpolation_nodes(pfn, T0, T1, I, T,
746
			    a_offset + i * sa, s_offset + i * ss, ii - 1);
747
	    if (code < 0)
748
		return code;
749
	}
750
    }
751
    return 0;
752
}
753
 
754
private inline int
755
evaluate_from_tenzor(const gs_function_Sd_t *pfn, int *I, double *T, int offset, int ii, double *y)
756
{
757
    int s = pfn->params.array_step[ii], k, l, code;
758
 
759
    if (ii < 0) {
760
	for (k = 0; k < pfn->params.n; k++)
761
	    y[k] = *(pfn->params.pole + offset + k);
762
    } else if (T[ii] == 0) {
763
	return evaluate_from_tenzor(pfn, I, T, offset + s * I[ii], ii - 1, y);
764
    } else {
765
	double t0 = T[ii], t1 = 1 - t0;
766
	double p[4][max_Sd_n];
767
 
768
	for (l = 0; l < 4; l++) {
769
	    code = evaluate_from_tenzor(pfn, I, T, offset + s * I[ii] + l * (s / 3), ii - 1, p[l]);
770
	    if (code < 0)
771
		return code;
772
	}
773
	for (k = 0; k < pfn->params.n; k++)
774
	    y[k] = p[0][k] * t1 * t1 * t1 +
775
		   p[1][k] * t1 * t1 * t0 * 3 +
776
		   p[2][k] * t1 * t0 * t0 * 3 +
777
	   p[3][k] * t0 * t0 * t0;
778
    }
779
    return 0;
780
}
781
 
782
 
783
/* Evaluate a Sampled function. */
784
/* A cubic interpolation with pole cache. */
785
/* Allows a fast check for extreme suspection with is_tensor_monotonic. */
786
private int
787
fn_Sd_evaluate_multicubic_cached(const gs_function_Sd_t *pfn, const float *in, float *out)
788
{
789
    double T[max_Sd_m], y[max_Sd_n];
790
    int I[max_Sd_m], k, code;
791
 
792
    decode_argument(pfn, in, T, I);
793
    code = make_interpolation_tensor(pfn, I, T, 0, 0, pfn->params.m - 1);
794
    if (code < 0)
795
	return code;
796
    evaluate_from_tenzor(pfn, I, T, 0, pfn->params.m - 1, y);
797
    for (k = 0; k < pfn->params.n; k++) {
798
	double yk = y[k];
799
 
800
	if (yk < pfn->params.Range[k * 2 + 0])
801
	    yk = pfn->params.Range[k * 2 + 0];
802
	if (yk > pfn->params.Range[k * 2 + 1])
803
	    yk = pfn->params.Range[k * 2 + 1];
804
	out[k] = yk;
805
    }
806
    return 0;
807
}
808
 
809
/* Evaluate a Sampled function. */
810
private int
811
fn_Sd_evaluate(const gs_function_t * pfn_common, const float *in, float *out)
812
{
813
    const gs_function_Sd_t *pfn = (const gs_function_Sd_t *)pfn_common;
814
    int code;
815
 
816
    if (pfn->params.Order == 3) {
817
	if (POLE_CACHE_GENERIC_1D || pfn->params.m > 1)
818
	    code = fn_Sd_evaluate_multicubic_cached(pfn, in, out);
819
	else
820
	    code = fn_Sd_evaluate_cubic_cached_1d(pfn, in, out);
821
#	if POLE_CACHE_DEBUG
822
	{   float y[max_Sd_n];
823
	    int k, code1;
824
 
825
	    code1 = fn_Sd_evaluate_general(pfn_common, in, y);
826
	    assert(code == code1);
827
	    for (k = 0; k < pfn->params.n; k++)
828
		assert(any_abs(y[k] - out[k]) <= 1e-6 * (pfn->params.Range[k * 2 + 1] - pfn->params.Range[k * 2 + 0]));
829
	}
830
#	endif
831
    } else
832
	code = fn_Sd_evaluate_general(pfn_common, in, out);
833
    return code;
834
}
835
 
836
/* Map a function subdomain to the sample index subdomain. */
837
private inline int
838
get_scaled_range(const gs_function_Sd_t *const pfn,
839
		   const float *lower, const float *upper, 
840
		   int i, float *pw0, float *pw1)
841
{
842
    float d0 = pfn->params.Domain[i * 2 + 0], d1 = pfn->params.Domain[i * 2 + 1];
843
    float v0 = lower[i], v1 = upper[i];
844
    float e0, e1, w0, w1, w;
845
    const float small_noize = (float)1e-6;
846
 
847
    if (v0 < d0 || v0 > d1)
848
	return gs_error_rangecheck;
849
    if (pfn->params.Encode)
850
	e0 = pfn->params.Encode[i * 2 + 0], e1 = pfn->params.Encode[i * 2 + 1];
851
    else
852
	e0 = 0, e1 = (float)pfn->params.Size[i] - 1;
853
    w0 = (v0 - d0) * (e1 - e0) / (d1 - d0) + e0;
854
    if (w0 < 0)
855
	w0 = 0;
856
    else if (w0 >= pfn->params.Size[i] - 1)
857
	w0 = (float)pfn->params.Size[i] - 1;
858
    w1 = (v1 - d0) * (e1 - e0) / (d1 - d0) + e0;
859
    if (w1 < 0)
860
	w1 = 0;
861
    else if (w1 >= pfn->params.Size[i] - 1)
862
	w1 = (float)pfn->params.Size[i] - 1;
863
    if (w0 > w1) {
864
	w = w0; w0 = w1; w1 = w;
865
    }
866
    if (floor(w0 + 1) - w0 < small_noize * any_abs(e1 - e0))
867
	w0 = (floor(w0) + 1);
868
    if (w1 - floor(w1) < small_noize * any_abs(e1 - e0))
869
	w1 = floor(w1);
870
    if (w0 > w1)
871
	w0 = w1;
872
    *pw0 = w0;
873
    *pw1 = w1;
874
    return 0;
875
}
876
 
877
/* Copy a tensor to a differently indexed pole array. */
878
private int
879
copy_poles(const gs_function_Sd_t *pfn, int *I, double *T0, double *T1, int a_offset,
880
		int ii, double *pole, int p_offset, int pole_step)
881
{
882
    int i, ei, sa, code;
883
    int order = pfn->params.Order;
884
 
885
    if (pole_step <= 0)
886
	return_error(gs_error_limitcheck); /* Too small buffer. */
887
    ei = (T0[ii] == T1[ii] ? 1 : order + 1);
888
    sa = pfn->params.array_step[ii];
889
    if (ii == 0) {
890
	for (i = 0; i < ei; i++)
891
	    *(pole + p_offset + i * pole_step) = 
892
		    *(pfn->params.pole + a_offset + I[ii] * sa + i * (sa / order));
893
    } else {
894
	for (i = 0; i < ei; i++) {
895
	    code = copy_poles(pfn, I, T0, T1, a_offset + I[ii] * sa + i * (sa / order), ii - 1, 
896
			    pole, p_offset + i * pole_step, pole_step / 4);
897
	    if (code < 0)
898
		return code;
899
	}
900
    }
901
    return 0;
902
}
903
 
904
private inline void
905
subcurve(double *pole, int pole_step, double t0, double t1)
906
{
907
    /* Generated with subcurve.nb using Mathematica 3.0. */
908
    double q0 = pole[pole_step * 0];
909
    double q1 = pole[pole_step * 1];
910
    double q2 = pole[pole_step * 2];
911
    double q3 = pole[pole_step * 3];
912
    double t01 = t0 - 1, t11 = t1 - 1;
913
    double small = 1e-13;
914
 
915
#define Power2(a) (a) * (a)
916
#define Power3(a) (a) * (a) * (a)
917
    pole[pole_step * 0] = t0*(t0*(q3*t0 - 3*q2*t01) + 3*q1*Power2(t01)) - q0*Power3(t01);
918
    pole[pole_step * 1] = q1*t01*(-2*t0 - t1 + 3*t0*t1) + t0*(q2*t0 + 2*q2*t1 - 
919
			    3*q2*t0*t1 + q3*t0*t1) - q0*t11*Power2(t01);
920
    pole[pole_step * 2] = t1*(2*q2*t0 + q2*t1 - 3*q2*t0*t1 + q3*t0*t1) + 
921
			    q1*(-t0 - 2*t1 + 3*t0*t1)*t11 - q0*t01*Power2(t11);
922
    pole[pole_step * 3] = t1*(t1*(3*q2 - 3*q2*t1 + q3*t1) + 
923
			    3*q1*Power2(t11)) - q0*Power3(t11);
924
#undef Power2
925
#undef Power3
926
    if (any_abs(pole[pole_step * 1] - pole[pole_step * 0]) < small)
927
	pole[pole_step * 1] = pole[pole_step * 0];
928
    if (any_abs(pole[pole_step * 2] - pole[pole_step * 3]) < small)
929
	pole[pole_step * 2] = pole[pole_step * 3];
930
}
931
 
932
private inline void
933
subline(double *pole, int pole_step, double t0, double t1)
934
{
935
    double q0 = pole[pole_step * 0];
936
    double q1 = pole[pole_step * 1];
937
 
938
    pole[pole_step * 0] = (1 - t0) * q0 + t0 * q1;
939
    pole[pole_step * 1] = (1 - t1) * q0 + t1 * q1;
940
}
941
 
942
private void
943
clamp_poles(double *T0, double *T1, int ii, int i, double * pole, 
944
		int p_offset, int pole_step, int pole_step_i, int order)
945
{
946
    if (ii < 0) {
947
	if (order == 3)
948
	    subcurve(pole + p_offset, pole_step_i, T0[i], T1[i]);
949
	else
950
	    subline(pole + p_offset, pole_step_i, T0[i], T1[i]);
951
    } else if (i == ii) {
952
	clamp_poles(T0, T1, ii - 1, i, pole, p_offset, pole_step / 4, pole_step, order);
953
    } else {
954
	int j, ei = (T0[ii] == T1[ii] ? 1 : order + 1);
955
 
956
	for (j = 0; j < ei; j++)
957
	    clamp_poles(T0, T1, ii - 1, i, pole, p_offset + j * pole_step, 
958
			    pole_step / 4, pole_step_i, order);
959
    }
960
}
961
 
962
private inline int /* 3 - don't know, 2 - decreesing, 0 - constant, 1 - increasing. */
963
curve_monotonity(double *pole, int pole_step)
964
{
965
    double p0 = pole[pole_step * 0];
966
    double p1 = pole[pole_step * 1];
967
    double p2 = pole[pole_step * 2];
968
    double p3 = pole[pole_step * 3];
969
 
970
    if (p0 == p1 && any_abs(p1 - p2) < 1e-13 && p2 == p3)
971
	return 0;
972
    if (p0 <= p1 && p1 <= p2 && p2 <= p3)
973
	return 1;
974
    if (p0 >= p1 && p1 >= p2 && p2 >= p3)
975
	return 2;
976
    /* Maybe not monotonic. 
977
       Don't want to solve quadratic equations, so return "don't know". 
978
       This case should be rare.
979
     */
980
    return 3;
981
}
982
 
983
private inline int /* 2 - decreesing, 0 - constant, 1 - increasing. */
984
line_monotonity(double *pole, int pole_step)
985
{
986
    double p0 = pole[pole_step * 0];
987
    double p1 = pole[pole_step * 1];
988
 
989
    if (p1 - p0 > 1e-13)
990
	return 1;
991
    if (p0 - p1 > 1e-13)
992
	return 2;
993
    return 0;
994
}
995
 
996
private int /* 3 bits per guide : 3 - non-monotonic or don't know, 
997
		    2 - decreesing, 0 - constant, 1 - increasing. 
998
		    The number of guides is order+1. */
999
tensor_dimension_monotonity(const double *T0, const double *T1, int ii, int i0, double *pole, 
1000
		int p_offset, int pole_step, int pole_step_i, int order)
1001
{
1002
    if (ii < 0) {
1003
	if (order == 3)
1004
	    return curve_monotonity(pole + p_offset, pole_step_i);
1005
	else
1006
	    return line_monotonity(pole + p_offset, pole_step_i);
1007
    } else if (i0 == ii) {
1008
	/* Delay the dimension till the end, and adjust pole_step. */
1009
	return tensor_dimension_monotonity(T0, T1, ii - 1, i0, pole, p_offset, 
1010
			    pole_step / 4, pole_step, order);
1011
    } else {
1012
	int j, ei = (T0[ii] == T1[ii] ? 1 : order + 1), m = 0, mm;
1013
 
1014
	for (j = 0; j < ei; j++) {
1015
	    mm = tensor_dimension_monotonity(T0, T1, ii - 1, i0, pole, p_offset + j * pole_step, 
1016
			    pole_step/ 4, pole_step_i, order);
1017
	    m |= mm << (j * 3);
1018
	    if (mm == 3) {
1019
		/* If one guide is not monotonic, the dimension is not monotonic. 
1020
		   Can return early. */
1021
		break; 
1022
	    }
1023
	}
1024
	return m;
1025
    }
1026
}
1027
 
1028
private inline int
1029
is_tensor_monotonic_by_dimension(const gs_function_Sd_t *pfn, int *I, double *T0, double *T1, int i0, int k,
1030
		    uint *mask /* 3 bits per guide : 3 - non-monotonic or don't know, 
1031
		    2 - decreesing, 0 - constant, 1 - increasing. 
1032
		    The number of guides is order+1. */)
1033
{
1034
    double pole[4*4*4]; /* For a while restricting with 3-in cubic functions. 
1035
                 More arguments need a bigger buffer, but the rest of code is same. */
1036
    int i, code, ii = pfn->params.m - 1;
1037
    double TT0[3], TT1[3];
1038
 
1039
    *mask = 0;
1040
    if (ii >= 3) {
1041
	 /* Unimplemented. We don't know practical cases,
1042
	    because currently it is only called while decomposing a shading.  */
1043
	return_error(gs_error_limitcheck);
1044
    }
1045
    code = copy_poles(pfn, I, T0, T1, k, ii, pole, 0, count_of(pole) / 4);
1046
    if (code < 0)
1047
	return code;
1048
    for (i = ii; i >= 0; i--) {
1049
	TT0[i] = 0;
1050
	if (T0[i] != T1[i]) {
1051
	    if (T0[i] != 0 || T1[i] != 1)
1052
		clamp_poles(T0, T1, ii, i, pole, 0, count_of(pole) / 4, -1, pfn->params.Order);
1053
	    TT1[i] = 1;
1054
	} else
1055
	    TT1[i] = 0;
1056
    }
1057
    *mask = tensor_dimension_monotonity(TT0, TT1, ii, i0, pole, 0, 
1058
			count_of(pole) / 4, -1, pfn->params.Order);
1059
    return 0;
1060
}
1061
 
1062
private int /* error code */
1063
is_lattice_monotonic_by_dimension(const gs_function_Sd_t *pfn, const double *T0, const double *T1, 
1064
	int *I, double *S0, double *S1, int ii, int i0, int k,
1065
	uint *mask /* 3 bits per guide : 1 - non-monotonic or don't know, 0 - monotonic;
1066
		      The number of guides is order+1. */)
1067
{
1068
    if (ii == -1) {
1069
	/* fixme : could cache the cell monotonity against redundant evaluation. */
1070
	return is_tensor_monotonic_by_dimension(pfn, I, S0, S1, i0, k, mask);
1071
    } else {
1072
	int i1 = (ii > i0 ? ii : ii == 0 ? i0 : ii - 1); /* Delay the dimension i0 till the end of recursion. */
1073
	int j, code;
1074
	int bi = (int)floor(T0[i1]);
1075
	int ei = (int)floor(T1[i1]);
1076
	uint m, mm, m1 = 0x49249249 & ((1 << ((pfn->params.Order + 1) * 3)) - 1);
1077
 
1078
	if (floor(T1[i1]) == T1[i1])
1079
	    ei --;
1080
	m = 0;
1081
	for (j = bi; j <= ei; j++) {
1082
	    /* fixme : A better performance may be obtained with comparing central nodes with side ones. */
1083
	    I[i1] = j;
1084
	    S0[i1] = max(T0[i1] - j, 0);
1085
	    S1[i1] = min(T1[i1] - j, 1);
1086
	    code = is_lattice_monotonic_by_dimension(pfn, T0, T1, I, S0, S1, ii - 1, i0, k, &mm);
1087
	    if (code < 0)
1088
		return code;
1089
	    m |= mm;
1090
	    if (m == m1) /* Don't return early - shadings need to know about all dimensions. */
1091
		break;
1092
	}
1093
	if (ii == 0) {
1094
	    /* Detect non-monotonic guides. */
1095
	    m = m & (m >> 1);
1096
	}
1097
	*mask = m;
1098
	return 0;
1099
    }
1100
}
1101
 
1102
private inline int /* error code */
1103
is_lattice_monotonic(const gs_function_Sd_t *pfn, const double *T0, const double *T1, 
1104
	 int *I, double *S0, double *S1,
1105
	 int k, uint *mask /* 1 bit per dimension : 1 - non-monotonic or don't know, 
1106
 
1107
{
1108
    uint m, mm = 0;
1109
    int i, code;
1110
 
1111
    for (i = 0; i < pfn->params.m; i++) {
1112
	if (T0[i] != T1[i]) {
1113
	    code = is_lattice_monotonic_by_dimension(pfn, T0, T1, I, S0, S1, pfn->params.m - 1, i, k, &m);
1114
	    if (code < 0)
1115
		return code;
1116
	    if (m)
1117
		mm |= 1 << i;
1118
	}
1119
    }
1120
    *mask = mm;
1121
    return 0;
1122
}
1123
 
1124
private int /* 3 bits per result : 3 - non-monotonic or don't know, 		    
1125
	       2 - decreesing, 0 - constant, 1 - increasing,
1126
	       <0 - error. */
1127
fn_Sd_1arg_linear_monotonic_rec(const gs_function_Sd_t *const pfn, int i0, int i1, 
1128
				const double *V0, const double *V1)
1129
{
1130
    if (i1 - i0 <= 1) {
1131
	int code = 0, i;
1132
 
1133
	for (i = 0; i < pfn->params.n; i++) {
1134
	    if (V0[i] < V1[i])
1135
		code |= 1 << (i * 3);
1136
	    else if (V0[i] > V1[i])
1137
		code |= 2 << (i * 3);
1138
	}
1139
	return code;
1140
    } else {
1141
	double VV[MAX_FAST_COMPS];
1142
	int ii = (i0 + i1) / 2, code, cod1;
1143
 
1144
	code = load_vector_to(pfn, ii * pfn->params.n * pfn->params.BitsPerSample, VV);
1145
	if (code < 0)
1146
	    return code;
1147
	if (code & (code >>1))
1148
	    return code; /* Not monotonic by some component of the result. */
1149
	code = fn_Sd_1arg_linear_monotonic_rec(pfn, i0, ii, V0, VV);
1150
	if (code < 0)
1151
	    return code;
1152
	cod1 = fn_Sd_1arg_linear_monotonic_rec(pfn, ii, i1, VV, V1);
1153
	if (cod1 < 0)
1154
	    return cod1;
1155
	return code | cod1;	
1156
    }
1157
}
1158
 
1159
private int
1160
fn_Sd_1arg_linear_monotonic(const gs_function_Sd_t *const pfn, double T0, double T1,
1161
			    uint *mask /* 1 - non-monotonic or don't know, 0 - monotonic. */)
1162
{
1163
    int i0 = (int)floor(T0);
1164
    int i1 = (int)ceil(T1), code;
1165
    double V0[MAX_FAST_COMPS], V1[MAX_FAST_COMPS];
1166
 
1167
    if (i1 - i0 > 1) {
1168
	code = load_vector_to(pfn, i0 * pfn->params.n * pfn->params.BitsPerSample, V0);
1169
	if (code < 0)
1170
	    return code;
1171
	code = load_vector_to(pfn, i1 * pfn->params.n * pfn->params.BitsPerSample, V1);
1172
	if (code < 0)
1173
	    return code;
1174
	code = fn_Sd_1arg_linear_monotonic_rec(pfn, i0, i1, V0, V1);
1175
	if (code < 0)
1176
	    return code;
1177
	if (code & (code >> 1)) {
1178
	    *mask = 1;
1179
	    return 0;
1180
	}
1181
    }
1182
    *mask = 0;
1183
    return 1;
1184
}
1185
 
1186
#define DEBUG_Sd_1arg 0
1187
 
1188
/* Test whether a Sampled function is monotonic. */
1189
private int /* 1 = monotonic, 0 = not or don't know, <0 = error. */
1190
fn_Sd_is_monotonic_aux(const gs_function_Sd_t *const pfn,
1191
		   const float *lower, const float *upper, 
1192
		   uint *mask /* 1 bit per dimension : 1 - non-monotonic or don't know, 
1193
 
1194
{
1195
    int i, code, ii = pfn->params.m - 1;
1196
    int I[4];
1197
    double T0[count_of(I)], T1[count_of(I)];
1198
    double S0[count_of(I)], S1[count_of(I)]; 
1199
    uint m, mm, m1;
1200
#   if DEBUG_Sd_1arg
1201
    int code1, mask1;
1202
#   endif
1203
 
1204
    if (ii >= count_of(T0)) {
1205
	 /* Unimplemented. We don't know practical cases,
1206
	    because currently it is only called while decomposing a shading.  */
1207
	return_error(gs_error_limitcheck);
1208
    }
1209
    for (i = 0; i <= ii; i++) {
1210
	float w0, w1;
1211
 
1212
	code = get_scaled_range(pfn, lower, upper, i, &w0, &w1);
1213
	if (code < 0)
1214
	    return code;
1215
	T0[i] = w0;
1216
	T1[i] = w1;
1217
    }
1218
    if (pfn->params.m == 1 && pfn->params.Order == 1 && pfn->params.n <= MAX_FAST_COMPS) {
1219
	code = fn_Sd_1arg_linear_monotonic(pfn, T0[0], T1[0], mask);
1220
#	if !DEBUG_Sd_1arg
1221
	    return code;
1222
#	else
1223
	    mask1 = *mask;
1224
	    code1 = code;
1225
#	endif
1226
    }
1227
    m1 = (1 << pfn->params.m )- 1;
1228
    code = make_interpolation_nodes(pfn, T0, T1, I, S0, 0, 0, ii);
1229
    if (code < 0)
1230
	return code;
1231
    mm = 0;
1232
    for (i = 0; i < pfn->params.n; i++) {
1233
	code = is_lattice_monotonic(pfn, T0, T1, I, S0, S1, i, &m);
1234
	if (code < 0)
1235
	    return code;
1236
	mm |= m;
1237
	if (mm == m1) /* Don't return early - shadings need to know about all dimensions. */
1238
	    break; 
1239
    }
1240
#   if DEBUG_Sd_1arg
1241
	if (mask1 != mm)
1242
	    return_error(gs_error_unregistered);
1243
	if (code1 != !mm)
1244
	    return_error(gs_error_unregistered);
1245
#   endif
1246
    *mask = mm;
1247
    return !mm;
1248
}
1249
 
1250
/* Test whether a Sampled function is monotonic. */
1251
/* 1 = monotonic, 0 = don't know, <0 = error. */
1252
private int
1253
fn_Sd_is_monotonic(const gs_function_t * pfn_common,
1254
		   const float *lower, const float *upper, uint *mask)
1255
{
1256
    const gs_function_Sd_t *const pfn =
1257
	(const gs_function_Sd_t *)pfn_common;
1258
 
1259
    return fn_Sd_is_monotonic_aux(pfn, lower, upper, mask);
1260
}
1261
 
1262
/* Return Sampled function information. */
1263
private void
1264
fn_Sd_get_info(const gs_function_t *pfn_common, gs_function_info_t *pfi)
1265
{
1266
    const gs_function_Sd_t *const pfn =
1267
	(const gs_function_Sd_t *)pfn_common;
1268
    long size;
1269
    int i;
1270
 
1271
    gs_function_get_info_default(pfn_common, pfi);
1272
    pfi->DataSource = &pfn->params.DataSource;
1273
    for (i = 0, size = 1; i < pfn->params.m; ++i)
1274
	size *= pfn->params.Size[i];
1275
    pfi->data_size =
1276
	(size * pfn->params.n * pfn->params.BitsPerSample + 7) >> 3;
1277
}
1278
 
1279
/* Write Sampled function parameters on a parameter list. */
1280
private int
1281
fn_Sd_get_params(const gs_function_t *pfn_common, gs_param_list *plist)
1282
{
1283
    const gs_function_Sd_t *const pfn =
1284
	(const gs_function_Sd_t *)pfn_common;
1285
    int ecode = fn_common_get_params(pfn_common, plist);
1286
    int code;
1287
 
1288
    if (pfn->params.Order != 1) {
1289
	if ((code = param_write_int(plist, "Order", &pfn->params.Order)) < 0)
1290
	    ecode = code;
1291
    }
1292
    if ((code = param_write_int(plist, "BitsPerSample",
1293
				&pfn->params.BitsPerSample)) < 0)
1294
	ecode = code;
1295
    if (pfn->params.Encode) {
1296
	if ((code = param_write_float_values(plist, "Encode",
1297
					     pfn->params.Encode,
1298
					     2 * pfn->params.m, false)) < 0)
1299
	    ecode = code;
1300
    }
1301
    if (pfn->params.Decode) {
1302
	if ((code = param_write_float_values(plist, "Decode",
1303
					     pfn->params.Decode,
1304
					     2 * pfn->params.n, false)) < 0)
1305
	    ecode = code;
1306
    }
1307
    if (pfn->params.Size) {
1308
	if ((code = param_write_int_values(plist, "Size", pfn->params.Size,
1309
					   pfn->params.m, false)) < 0)
1310
	    ecode = code;
1311
    }
1312
    return ecode;
1313
}
1314
 
1315
/* Make a scaled copy of a Sampled function. */
1316
private int
1317
fn_Sd_make_scaled(const gs_function_Sd_t *pfn, gs_function_Sd_t **ppsfn,
1318
		  const gs_range_t *pranges, gs_memory_t *mem)
1319
{
1320
    gs_function_Sd_t *psfn =
1321
	gs_alloc_struct(mem, gs_function_Sd_t, &st_function_Sd,
1322
			"fn_Sd_make_scaled");
1323
    int code;
1324
 
1325
    if (psfn == 0)
1326
	return_error(gs_error_VMerror);
1327
    psfn->params = pfn->params;
1328
    psfn->params.Encode = 0;		/* in case of failure */
1329
    psfn->params.Decode = 0;
1330
    psfn->params.Size =
1331
	fn_copy_values(pfn->params.Size, pfn->params.m, sizeof(int), mem);
1332
    if ((code = (psfn->params.Size == 0 ?
1333
		 gs_note_error(gs_error_VMerror) : 0)) < 0 ||
1334
	(code = fn_common_scale((gs_function_t *)psfn,
1335
				(const gs_function_t *)pfn,
1336
				pranges, mem)) < 0 ||
1337
	(code = fn_scale_pairs(&psfn->params.Encode, pfn->params.Encode,
1338
			       pfn->params.m, NULL, mem)) < 0 ||
1339
	(code = fn_scale_pairs(&psfn->params.Decode, pfn->params.Decode,
1340
			       pfn->params.n, pranges, mem)) < 0) {
1341
	gs_function_free((gs_function_t *)psfn, true, mem);
1342
    } else
1343
	*ppsfn = psfn;
1344
    return code;
1345
}
1346
 
1347
/* Free the parameters of a Sampled function. */
1348
void
1349
gs_function_Sd_free_params(gs_function_Sd_params_t * params, gs_memory_t * mem)
1350
{
1351
    gs_free_const_object(mem, params->Size, "Size");
1352
    gs_free_const_object(mem, params->Decode, "Decode");
1353
    gs_free_const_object(mem, params->Encode, "Encode");
1354
    fn_common_free_params((gs_function_params_t *) params, mem);
1355
    gs_free_object(mem, params->pole, "gs_function_Sd_free_params");
1356
    gs_free_object(mem, params->array_step, "gs_function_Sd_free_params");
1357
    gs_free_object(mem, params->stream_step, "gs_function_Sd_free_params");
1358
}
1359
 
1360
/* aA helper for gs_function_Sd_serialize. */
1361
private int serialize_array(const float *a, int half_size, stream *s)
1362
{
1363
    uint n;
1364
    const float dummy[2] = {0, 0};
1365
    int i, code;
1366
 
1367
    if (a != NULL)
1368
	return sputs(s, (const byte *)a, sizeof(a[0]) * half_size * 2, &n);
1369
    for (i = 0; i < half_size; i++) {
1370
	code = sputs(s, (const byte *)dummy, sizeof(dummy), &n);
1371
	if (code < 0)
1372
	    return code;
1373
    }
1374
    return 0;
1375
}
1376
 
1377
/* Serialize. */
1378
private int
1379
gs_function_Sd_serialize(const gs_function_t * pfn, stream *s)
1380
{
1381
    uint n;
1382
    const gs_function_Sd_params_t * p = (const gs_function_Sd_params_t *)&pfn->params;
1383
    gs_function_info_t info;
1384
    int code = fn_common_serialize(pfn, s);
1385
    ulong pos;
1386
    uint count;
1387
    byte buf[100];
1388
    const byte *ptr;
1389
 
1390
    if (code < 0)
1391
	return code;
1392
    code = sputs(s, (const byte *)&p->Order, sizeof(p->Order), &n);
1393
    if (code < 0)
1394
	return code;
1395
    code = sputs(s, (const byte *)&p->BitsPerSample, sizeof(p->BitsPerSample), &n);
1396
    if (code < 0)
1397
	return code;
1398
    code = serialize_array(p->Encode, p->m, s);
1399
    if (code < 0)
1400
	return code;
1401
    code = serialize_array(p->Decode, p->n, s);
1402
    if (code < 0)
1403
	return code;
1404
    gs_function_get_info(pfn, &info);
1405
    code = sputs(s, (const byte *)&info.data_size, sizeof(info.data_size), &n);
1406
    if (code < 0)
1407
	return code;
1408
    for (pos = 0; pos < info.data_size; pos += count) {
1409
	count = min(sizeof(buf), info.data_size - pos);
1410
	data_source_access_only(info.DataSource, pos, count, buf, &ptr);
1411
	code = sputs(s, ptr, count, &n);
1412
	if (code < 0)
1413
	    return code;
1414
    }
1415
    return 0;
1416
}
1417
 
1418
/* Allocate and initialize a Sampled function. */
1419
int
1420
gs_function_Sd_init(gs_function_t ** ppfn,
1421
		  const gs_function_Sd_params_t * params, gs_memory_t * mem)
1422
{
1423
    static const gs_function_head_t function_Sd_head = {
1424
	function_type_Sampled,
1425
	{
1426
	    (fn_evaluate_proc_t) fn_Sd_evaluate,
1427
	    (fn_is_monotonic_proc_t) fn_Sd_is_monotonic,
1428
	    (fn_get_info_proc_t) fn_Sd_get_info,
1429
	    (fn_get_params_proc_t) fn_Sd_get_params,
1430
	    (fn_make_scaled_proc_t) fn_Sd_make_scaled,
1431
	    (fn_free_params_proc_t) gs_function_Sd_free_params,
1432
	    fn_common_free,
1433
	    (fn_serialize_proc_t) gs_function_Sd_serialize,
1434
	}
1435
    };
1436
    int code;
1437
    int i;
1438
 
1439
    *ppfn = 0;			/* in case of error */
1440
    code = fn_check_mnDR((const gs_function_params_t *)params,
1441
			 params->m, params->n);
1442
    if (code < 0)
1443
	return code;
1444
    if (params->m > max_Sd_m)
1445
	return_error(gs_error_limitcheck);
1446
    switch (params->Order) {
1447
	case 0:		/* use default */
1448
	case 1:
1449
	case 3:
1450
	    break;
1451
	default:
1452
	    return_error(gs_error_rangecheck);
1453
    }
1454
    switch (params->BitsPerSample) {
1455
	case 1:
1456
	case 2:
1457
	case 4:
1458
	case 8:
1459
	case 12:
1460
	case 16:
1461
	case 24:
1462
	case 32:
1463
	    break;
1464
	default:
1465
	    return_error(gs_error_rangecheck);
1466
    }
1467
    for (i = 0; i < params->m; ++i)
1468
	if (params->Size[i] <= 0)
1469
	    return_error(gs_error_rangecheck);
1470
    {
1471
	gs_function_Sd_t *pfn =
1472
	    gs_alloc_struct(mem, gs_function_Sd_t, &st_function_Sd,
1473
			    "gs_function_Sd_init");
1474
	int bps, sa, ss, i, order;
1475
 
1476
	if (pfn == 0)
1477
	    return_error(gs_error_VMerror);
1478
	pfn->params = *params;
1479
	if (params->Order == 0)
1480
	    pfn->params.Order = 1;	/* default */
1481
	pfn->params.pole = NULL;
1482
	pfn->params.array_step = NULL;
1483
	pfn->params.stream_step = NULL;
1484
	pfn->head = function_Sd_head;
1485
	pfn->params.array_size = 0;
1486
	if (pfn->params.m == 1 && pfn->params.Order == 1 && pfn->params.n <= MAX_FAST_COMPS && !DEBUG_Sd_1arg) {
1487
	    /* Won't use pole cache. Call fn_Sd_1arg_linear_monotonic instead. */
1488
	} else {
1489
	    pfn->params.array_step = (int *)gs_alloc_byte_array(mem, 
1490
				    max_Sd_m, sizeof(int), "gs_function_Sd_init");
1491
	    pfn->params.stream_step = (int *)gs_alloc_byte_array(mem, 
1492
				    max_Sd_m, sizeof(int), "gs_function_Sd_init");
1493
	    if (pfn->params.array_step == NULL || pfn->params.stream_step == NULL)
1494
		return_error(gs_error_VMerror);
1495
	    bps = pfn->params.BitsPerSample;
1496
	    sa = pfn->params.n;
1497
	    ss = pfn->params.n * bps;
1498
	    order = pfn->params.Order;
1499
	    for (i = 0; i < pfn->params.m; i++) {
1500
		pfn->params.array_step[i] = sa * order;
1501
		sa = (pfn->params.Size[i] * order - (order - 1)) * sa;
1502
		pfn->params.stream_step[i] = ss;
1503
		ss = pfn->params.Size[i] * ss;
1504
	    }
1505
	    pfn->params.pole = (double *)gs_alloc_byte_array(mem, 
1506
				    sa, sizeof(double), "gs_function_Sd_init");
1507
	    if (pfn->params.pole == NULL)
1508
		return_error(gs_error_VMerror);
1509
	    for (i = 0; i < sa; i++)
1510
		pfn->params.pole[i] = double_stub;
1511
	    pfn->params.array_size = sa;
1512
	}
1513
	*ppfn = (gs_function_t *) pfn;
1514
    }
1515
    return 0;
1516
}