Subversion Repositories planix.SVN

Rev

Rev 2 | Details | Compare with Previous | Last modification | View Log | RSS feed

Rev Author Line No. Line
2 - 1
/* Copyright (C) 2002 Aladdin Enterprises.  All rights reserved.
2
 
3
  This software is provided AS-IS with no warranty, either express or
4
  implied.
5
 
6
  This software is distributed under license and may not be copied,
7
  modified or distributed except as expressly authorized under the terms
8
  of the license contained in the file LICENSE in this distribution.
9
 
10
  For more information about licensing, please refer to
11
  http://www.ghostscript.com/licensing/. For information on
12
  commercial licensing, go to http://www.artifex.com/licensing/ or
13
  contact Artifex Software, Inc., 101 Lucas Valley Road #110,
14
  San Rafael, CA  94903, U.S.A., +1(415)492-9861.
15
*/
16
 
17
/* $Id: gsfcmap1.c,v 1.7 2004/08/04 19:36:12 stefan Exp $ */
18
/* Adobe-based CMap character decoding */
19
#include "memory_.h"
20
#include "string_.h"
21
#include "gx.h"
22
#include "gserrors.h"
23
#include "gsstruct.h"
24
#include "gsutil.h"		/* for gs_next_ids */
25
#include "gxfcmap1.h"
26
 
27
/* Get a big-endian integer. */
28
inline private ulong
29
bytes2int(const byte *p, int n)
30
{
31
    ulong v = 0;
32
    int i;
33
 
34
    for (i = 0; i < n; ++i)
35
        v = (v << 8) + p[i];
36
    return v;
37
}
38
 
39
/* ---------------- GC descriptors ---------------- */
40
 
41
public_st_cmap_adobe1();
42
/* Because lookup ranges can be elements of arrays, */
43
/* their enum_ptrs procedure must never return 0 prematurely. */
44
private 
45
ENUM_PTRS_WITH(cmap_lookup_range_enum_ptrs,
46
               gx_cmap_lookup_range_t *pclr) return 0;
47
case 0:
48
    if (pclr->value_type == CODE_VALUE_GLYPH) {
49
        const byte *pv = pclr->values.data;
50
	int size = pclr->value_size;
51
        int k;
52
 
53
        for (k = 0; k < pclr->num_entries; ++k, pv += size) {
54
            gs_glyph glyph = bytes2int(pv, size);
55
 
56
            pclr->cmap->mark_glyph(mem, glyph, pclr->cmap->mark_glyph_data);
57
        }
58
    }
59
    return ENUM_OBJ(pclr->cmap);
60
case 1: return ENUM_STRING(&pclr->keys);
61
case 2: return ENUM_STRING(&pclr->values);
62
ENUM_PTRS_END
63
private
64
RELOC_PTRS_WITH(cmap_lookup_range_reloc_ptrs, gx_cmap_lookup_range_t *pclr)
65
    RELOC_VAR(pclr->cmap);
66
    RELOC_STRING_VAR(pclr->keys);
67
    RELOC_STRING_VAR(pclr->values);
68
RELOC_PTRS_END
69
public_st_cmap_lookup_range();
70
public_st_cmap_lookup_range_element();
71
 
72
/* ---------------- Procedures ---------------- */
73
 
74
    /* ------ Decoding ------ */
75
 
76
/*
77
 * multi-dimensional range comparator
78
 */
79
 
80
private void
81
print_msg_str_in_range(const byte *str,
82
                       const byte *key_lo, const byte *key_hi,
83
                       int key_size)
84
{
85
    debug_print_string_hex(str, key_size);
86
    dlprintf(" in ");
87
    debug_print_string_hex(key_lo, key_size);
88
    dlprintf(" - ");
89
    debug_print_string_hex(key_hi, key_size);
90
    dlprintf("\n");
91
}
92
 
93
private int
94
gs_cmap_get_shortest_chr(const gx_code_map_t * pcmap, uint *pfidx)
95
{
96
    int i;
97
    int len_shortest = MAX_CMAP_CODE_SIZE;
98
    uint fidx_shortest = 0; /* font index for this fallback */
99
 
100
    for (i = pcmap->num_lookup - 1; i >= 0; --i) {
101
        const gx_cmap_lookup_range_t *pclr = &pcmap->lookup[i];
102
        if ((pclr->key_prefix_size + pclr->key_size) <= len_shortest) {
103
           len_shortest = (pclr->key_prefix_size + pclr->key_size);
104
           fidx_shortest = pclr->font_index;
105
        }
106
    }
107
 
108
    *pfidx = fidx_shortest;
109
    return len_shortest;
110
}
111
 
112
/*
113
 * multi-dimensional relative position calculator
114
 *
115
 * Returns offset of the given CID, considering CID range
116
 * as array of CIDs (the last index changes fastest).
117
 */
118
private int
119
gs_multidim_CID_offset(const byte *key_str,
120
                        const byte *key_lo, const byte *key_hi,
121
			int key_size)
122
{
123
 
124
    int i;	/* index for current dimension */
125
    int CID_offset = 0;
126
 
127
    if (gs_debug_c('J')) {
128
        dlprintf("[J]gmCo()         calc CID_offset for 0x");
129
        print_msg_str_in_range(key_str, key_lo, key_hi, key_size);
130
    }
131
 
132
    for (i = 0; i < key_size; i++)
133
        CID_offset = CID_offset * (key_hi[i] - key_lo[i] + 1) +
134
            key_str[i] - key_lo[i];
135
 
136
    if_debug1('J', "[J]gmCo()         CID_offset = %d\n", CID_offset);
137
    return CID_offset;
138
}
139
 
140
/*
141
 * Decode a character from a string using a code map, updating the index.
142
 * Return 0 for a CID or name, N > 0 for a character code where N is the
143
 * number of bytes in the code, or an error.  Store the decoded bytes in
144
 * *pchr.  For undefined characters, set *pglyph = gs_no_glyph and return 0.
145
 */
146
private int
147
code_map_decode_next_multidim_regime(const gx_code_map_t * pcmap,
148
                     const gs_const_string * pstr,
149
                     uint * pindex, uint * pfidx,
150
                     gs_char * pchr, gs_glyph * pglyph)
151
{
152
    const byte *str = pstr->data + *pindex;
153
    uint ssize = pstr->size - *pindex;
154
    /*
155
     * The keys are not sorted due to 'usecmap'.  Possible optimization :
156
     * merge and sort keys in 'zbuildcmap', then use binary search here.
157
     * This would be valuable for UniJIS-UTF8-H, which contains about 7000
158
     * keys.
159
     */
160
    int i;
161
 
162
    /*
163
     * In the fallback of CMap decoding procedure, there is "partial matching".
164
     * For detail, refer PostScript Ref. Manual v3 at the end of Fonts chapter.
165
     */
166
 
167
    /* "pm" stands for partial match (not pointer), temporal use. */
168
    int pm_maxlen = 0;		/* partial match: max length */
169
    int pm_index = *pindex;	/* partial match: ptr index (in str) */
170
    uint pm_fidx = *pfidx;	/* partial match: ptr font index */
171
    gs_char pm_chr = *pchr;	/* partial match: ptr character */
172
 
173
    *pchr = '\0';
174
 
175
    if (gs_debug_c('J')) {
176
        dlprintf("[J]CMDNmr() is called: str=(");
177
        debug_print_string_hex(str, ssize);
178
        dlprintf3(") @ 0x%lx ssize=%d, %d ranges to check\n",
179
                       str, ssize, pcmap->num_lookup);
180
    }
181
 
182
    for (i = pcmap->num_lookup - 1; i >= 0; --i) {
183
	/* main loop - scan the map passed via pcmap */
184
	/* reverse scan order due to 'usecmap' */
185
 
186
        const gx_cmap_lookup_range_t *pclr = &pcmap->lookup[i];
187
        int pre_size = pclr->key_prefix_size, key_size = pclr->key_size,
188
            chr_size = pre_size + key_size;
189
 
190
        int j = 0;
191
	/* length of the given byte stream is shorter than
192
         * chr-length of current range, no need for further check,
193
         * skip to the next range.
194
         */
195
        if (ssize < chr_size)
196
            continue;
197
 
198
        if (0 < pre_size) {
199
            const byte * prefix = pclr->key_prefix;
200
            /* check partial match in prefix */
201
            for (j = 0; j < pre_size; j++)
202
               if (prefix[j] != str[j])
203
                   break;
204
 
205
            if (0 == j)			/* no match, skip to next i */
206
                continue;
207
            else if (j < pre_size) {	/* not exact, partial match */
208
                if (gs_debug_c('J')) {
209
                    dlprintf("[J]CMDNmr() partial match with prefix:");
210
                    print_msg_str_in_range(str, prefix,
211
                                                prefix, pre_size);
212
                }
213
 
214
                if (pm_maxlen < j) {
215
                    pm_maxlen = chr_size;
216
                    pm_chr = bytes2int(str, chr_size);
217
                    pm_index = (*pindex) + chr_size;
218
                    pm_fidx = pclr->font_index;
219
                }
220
                continue ; /* no need to check key, skip to next i */
221
            }
222
 
223
            if (gs_debug_c('J')) {
224
                dlprintf("[J]CMDNmr()   full match with prefix:");
225
                print_msg_str_in_range(str, prefix, prefix, pre_size);
226
            }
227
 
228
        } /* if (0 < pre_size) */
229
 
230
        /* full match in prefix. check key */
231
        {
232
            const byte *key = pclr->keys.data;
233
            int step = key_size;
234
            int k, l;
235
            const byte *pvalue = NULL;
236
 
237
	    /* when range is "range", 2 keys for lo-end and hi-end
238
             * are stacked. So twice the step. current "key" points
239
             * lo-end of current range, and the pointer for hi-end
240
             * is calculated by (key + step - key_size).
241
             */
242
 
243
            if (pclr->key_is_range)
244
		step <<=1; 	/* step = step * 2; */
245
 
246
            for (k = 0; k < pclr->num_entries; ++k, key += step) {
247
 
248
                if_debug0('j', "[j]CMDNmr()     check key:");
249
                if (gs_debug_c('j'))
250
                    print_msg_str_in_range(str + pre_size,
251
                        key, key + step - key_size, key_size) ;
252
 
253
                for (l = 0; l < key_size; l++) {
254
                    byte c = str[l + pre_size];
255
                    if (c < key[l] || c > key[step - key_size + l])
256
                        break;
257
                }
258
 
259
		if (pm_maxlen < pre_size + l) {
260
                    pm_maxlen = chr_size;
261
                    pm_chr = bytes2int(str, chr_size);
262
                    pm_index = (*pindex) + chr_size;
263
                    pm_fidx = pclr->font_index;
264
                }
265
                if (l == key_size)
266
                        break;
267
	    }
268
 
269
            /* all keys are tried, but found no match. */
270
            /* go to next prefix. */
271
            if (k == pclr->num_entries)
272
                continue;
273
 
274
            /* We have a match.  Return the result. */
275
            *pchr = bytes2int(str, chr_size);
276
            *pindex += chr_size;
277
            *pfidx = pclr->font_index;
278
            pvalue = pclr->values.data + k * pclr->value_size;
279
 
280
            if (gs_debug_c('J')) {
281
                dlprintf("[J]CMDNmr()     full matched pvalue=(");
282
                debug_print_string_hex(pvalue, pclr->value_size);
283
                dlprintf(")\n");
284
            }
285
 
286
            switch (pclr->value_type) {
287
            case CODE_VALUE_CID:
288
                *pglyph = gs_min_cid_glyph +
289
                    bytes2int(pvalue, pclr->value_size) +
290
                    gs_multidim_CID_offset(str + pre_size,
291
                        key, key + step - key_size, key_size);
292
                return 0;
293
            case CODE_VALUE_NOTDEF:
294
                *pglyph = gs_min_cid_glyph +
295
                    bytes2int(pvalue, pclr->value_size);
296
                return 0;
297
            case CODE_VALUE_GLYPH:
298
                *pglyph = bytes2int(pvalue, pclr->value_size);
299
                return 0;
300
            case CODE_VALUE_CHARS:
301
                *pglyph =
302
                    bytes2int(pvalue, pclr->value_size) +
303
                    bytes2int(str + pre_size, key_size) -
304
                    bytes2int(key, key_size);
305
                return pclr->value_size;
306
            default:            /* shouldn't happen */
307
                return_error(gs_error_rangecheck);
308
            }
309
        }
310
    }
311
    /* No mapping. */
312
    *pchr = pm_chr;
313
    *pindex = pm_index;
314
    *pfidx = pm_fidx;
315
    *pglyph = gs_no_glyph;
316
    if (gs_debug_c('J')) {
317
        dlprintf("[J]CMDNmr()     no full match, use partial match for (");
318
        debug_print_string_hex(str, pm_maxlen);
319
        dlprintf(")\n");
320
    }
321
    return 0;
322
}
323
 
324
/*
325
 * Decode a character from a string using a CMap.
326
 * Return like code_map_decode_next.
327
 * At present, the range specification by (begin|end)codespacerange
328
 * is not used in this function. Therefore, this function accepts
329
 * some invalid CMap which def & undef maps exceed the codespacerange.
330
 * It should be checked in this function, or some procedure in gs_cmap.ps.
331
 */
332
private int
333
gs_cmap_adobe1_decode_next(const gs_cmap_t * pcmap_in,
334
			   const gs_const_string * pstr,
335
			   uint * pindex, uint * pfidx,
336
			   gs_char * pchr, gs_glyph * pglyph)
337
{
338
    const gs_cmap_adobe1_t *pcmap = (const gs_cmap_adobe1_t *)pcmap_in;
339
    uint save_index = *pindex;
340
    int code;
341
 
342
    uint pm_index;
343
    uint pm_fidx;
344
    gs_char pm_chr;
345
 
346
    /* For first, check defined map */
347
    if_debug0('J', "[J]GCDN() check def CMap\n");
348
    code =
349
        code_map_decode_next_multidim_regime(&pcmap->def, pstr, pindex, pfidx, pchr, pglyph);
350
 
351
    /* This is defined character */
352
    if (code != 0 || *pglyph != gs_no_glyph)
353
        return code;
354
 
355
    /* In here, this is NOT defined character */
356
    /* save partially matched results */
357
    pm_index = *pindex;
358
    pm_fidx = *pfidx;
359
    pm_chr = *pchr;
360
 
361
    /* check notdef map. */
362
    if_debug0('J', "[J]GCDN() check notdef CMap\n");
363
    *pindex = save_index;
364
    code =
365
	code_map_decode_next_multidim_regime(&pcmap->notdef, pstr, pindex, pfidx, pchr, pglyph);
366
 
367
    /* This is defined "notdef" character. */
368
    if (code != 0 || *pglyph != gs_no_glyph)
369
        return code;
370
 
371
    /*
372
     * This is undefined in def & undef maps,
373
     * use partially matched result with default notdef (CID = 0).
374
     */ 
375
    if (save_index < pm_index) {
376
 
377
	/* there was some partially matched */
378
 
379
        *pglyph = gs_min_cid_glyph;	/* CID = 0 */
380
        *pindex = pm_index;
381
        *pfidx = pm_fidx;
382
        *pchr = '\0';
383
         return 0; /* should return some error for partial matched .notdef? */
384
    }
385
    else {
386
	/* no match */
387
 
388
	/* Even partial match is failed.
389
         * Getting the shortest length from defined characters,
390
         * and take the leading bytes (with same length of the shortest
391
         * defined chr) as an unidentified character: CID = 0.
392
	 * Also this procedure is specified in PS Ref. Manual v3,
393
         * at the end of Fonts chapter. 
394
         */
395
 
396
	const byte *str = pstr->data + save_index;
397
	uint ssize = pstr->size - save_index;
398
	int chr_size_shortest = 
399
		gs_cmap_get_shortest_chr(&pcmap->def, pfidx);
400
 
401
	if (chr_size_shortest <= ssize) {
402
            *pglyph = gs_min_cid_glyph;	/* CID = 0, this is CMap fallback */
403
            *pindex = save_index + chr_size_shortest;
404
	    *pchr = '\0';
405
            if (gs_debug_c('J')) {
406
                dlprintf1("[J]GCDN() no partial match, skip %d byte (",
407
                                               chr_size_shortest);
408
                debug_print_string_hex(str, chr_size_shortest);
409
                dlprintf(")\n");
410
            }
411
            return 0; /* should return some error for fallback .notdef? */
412
	}
413
	else {
414
            /* Undecodable string is shorter than the shortest character,
415
             * there's no way except to return error.
416
             */
417
            if (gs_debug_c('J')) {
418
                dlprintf2("[J]GCDN() left data in buffer (%d) is shorter than shortest defined character (%d)\n",
419
                  ssize, chr_size_shortest);
420
            }
421
            *pglyph = gs_no_glyph;
422
            return_error(gs_error_rangecheck);
423
	}
424
    }
425
}
426
 
427
    /* ------ Initialization/creation ------ */
428
 
429
/*
430
 * Allocate and initialize an Adobe1 CMap.  The caller must still fill in
431
 * the code space ranges, lookup tables, keys, and values.
432
 */
433
 
434
private int
435
adobe1_next_range(gs_cmap_ranges_enum_t *penum)
436
{
437
    const gs_cmap_adobe1_t *const pcmap =
438
	(const gs_cmap_adobe1_t *)penum->cmap;
439
 
440
    if (penum->index >= pcmap->code_space.num_ranges)
441
	return 1;
442
    penum->range = pcmap->code_space.ranges[penum->index++];
443
    return 0;
444
}
445
private const gs_cmap_ranges_enum_procs_t adobe1_range_procs = {
446
    adobe1_next_range
447
};
448
private void
449
gs_cmap_adobe1_enum_ranges(const gs_cmap_t *pcmap, gs_cmap_ranges_enum_t *pre)
450
{
451
    gs_cmap_ranges_enum_setup(pre, pcmap, &adobe1_range_procs);
452
}
453
private int
454
adobe1_next_lookup(gs_cmap_lookups_enum_t *penum, const gx_code_map_t *pcm)
455
{
456
    const gx_cmap_lookup_range_t *lookup = &pcm->lookup[penum->index[0]];
457
 
458
    if (penum->index[0] >= pcm->num_lookup)
459
	return 1;
460
    penum->entry.key_size = lookup->key_prefix_size + lookup->key_size;
461
    penum->entry.key_is_range = lookup->key_is_range;
462
    penum->entry.value_type = lookup->value_type;
463
    penum->entry.value.size = lookup->value_size;
464
    penum->entry.font_index = lookup->font_index;
465
    penum->index[0]++;
466
    penum->index[1] = 0;
467
    return 0;
468
}
469
private int
470
adobe1_next_lookup_def(gs_cmap_lookups_enum_t *penum)
471
{
472
    return adobe1_next_lookup(penum,
473
			&((const gs_cmap_adobe1_t *)penum->cmap)->def);
474
}
475
private int
476
adobe1_next_lookup_notdef(gs_cmap_lookups_enum_t *penum)
477
{
478
    return adobe1_next_lookup(penum,
479
			&((const gs_cmap_adobe1_t *)penum->cmap)->notdef);
480
}
481
private int
482
adobe1_next_entry(gs_cmap_lookups_enum_t *penum, const gx_code_map_t *pcm)
483
{
484
    const gx_cmap_lookup_range_t *lookup = &pcm->lookup[penum->index[0] - 1];
485
    int psize = lookup->key_prefix_size;
486
    int ksize = lookup->key_size;
487
    const byte *key =
488
	lookup->keys.data + penum->index[1] * ksize *
489
	(lookup->key_is_range ? 2 : 1);
490
    int i;
491
 
492
    if (penum->index[1] >= lookup->num_entries)
493
	return 1;
494
    if (psize + ksize > MAX_CMAP_CODE_SIZE)
495
	return_error(gs_error_rangecheck);
496
    for (i = 0; i < 2; ++i, key += ksize) {
497
	memcpy(penum->entry.key[i], lookup->key_prefix, psize);
498
	memcpy(penum->entry.key[i] + psize, key, ksize);
499
    }
500
    penum->entry.value.data =
501
	lookup->values.data + penum->index[1] * lookup->value_size;
502
    penum->entry.value.size = lookup->value_size;
503
    penum->index[1]++;
504
    return 0;
505
}
506
private int
507
adobe1_next_entry_def(gs_cmap_lookups_enum_t *penum)
508
{
509
    return adobe1_next_entry(penum,
510
			&((const gs_cmap_adobe1_t *)penum->cmap)->def);
511
}
512
private int
513
adobe1_next_entry_notdef(gs_cmap_lookups_enum_t *penum)
514
{
515
    return adobe1_next_entry(penum,
516
			&((const gs_cmap_adobe1_t *)penum->cmap)->notdef);
517
}
518
private const gs_cmap_lookups_enum_procs_t adobe1_lookup_def_procs = {
519
    adobe1_next_lookup_def, adobe1_next_entry_def
520
};
521
private const gs_cmap_lookups_enum_procs_t adobe1_lookup_notdef_procs = {
522
    adobe1_next_lookup_notdef, adobe1_next_entry_notdef
523
};
524
private void
525
gs_cmap_adobe1_enum_lookups(const gs_cmap_t *pcmap, int which,
526
			    gs_cmap_lookups_enum_t *pre)
527
{
528
    gs_cmap_lookups_enum_setup(pre, pcmap,
529
			       (which ? &adobe1_lookup_notdef_procs :
530
				&adobe1_lookup_def_procs));
531
}
532
 
533
private const gs_cmap_procs_t cmap_adobe1_procs = {
534
    gs_cmap_adobe1_decode_next,
535
    gs_cmap_adobe1_enum_ranges,
536
    gs_cmap_adobe1_enum_lookups,
537
    gs_cmap_compute_identity
538
};
539
 
540
int
541
gs_cmap_adobe1_alloc(gs_cmap_adobe1_t **ppcmap, int wmode,
542
		     const byte *map_name, uint name_size,
543
		     uint num_fonts, uint num_ranges, uint num_lookups,
544
		     uint keys_size, uint values_size,
545
		     const gs_cid_system_info_t *pcidsi_in, gs_memory_t *mem)
546
{
547
    gs_cmap_t *pcmap;
548
    gs_cmap_adobe1_t *pcmap1;
549
    gx_code_space_range_t *ranges = (gx_code_space_range_t *)
550
	gs_alloc_byte_array(mem, num_ranges, sizeof(gx_code_space_range_t),
551
			    "gs_cmap_alloc(code space ranges)");
552
    gx_cmap_lookup_range_t *lookups =
553
	(num_lookups == 0 ? NULL :
554
	 gs_alloc_struct_array(mem, num_lookups, gx_cmap_lookup_range_t,
555
			       &st_cmap_lookup_range,
556
			       "gs_cmap_alloc(lookup ranges)"));
557
    byte *keys =
558
	(keys_size == 0 ? NULL :
559
	 gs_alloc_string(mem, keys_size, "gs_cmap_alloc(keys)"));
560
    byte *values =
561
	(values_size == 0 ? NULL :
562
	 gs_alloc_string(mem, values_size, "gs_cmap_alloc(values)"));
563
    int code =
564
	gs_cmap_alloc(&pcmap, &st_cmap_adobe1, wmode, map_name, name_size,
565
		      pcidsi_in, num_fonts, &cmap_adobe1_procs, mem);
566
    uint i;
567
 
568
    if (code < 0 || ranges == 0 || (num_lookups != 0 && lookups == 0) ||
569
	(keys_size != 0 && keys == 0) || (values_size != 0 && values == 0)) {
570
	gs_free_string(mem, values, values_size, "gs_cmap_alloc(values)");
571
	gs_free_string(mem, keys, keys_size, "gs_cmap_alloc(keys)");
572
	gs_free_object(mem, lookups, "gs_cmap_alloc(lookup ranges)");
573
	gs_free_object(mem, ranges, "gs_cmap_alloc(code space ranges)");
574
	return_error(gs_error_VMerror);
575
    }
576
    *ppcmap = pcmap1 = (gs_cmap_adobe1_t *)pcmap;
577
    pcmap1->code_space.ranges = ranges;
578
    pcmap1->code_space.num_ranges = num_ranges;
579
    if (num_lookups > 0) {
580
	for (i = 0; i < num_lookups; ++i) {
581
	    memset(&lookups[i], 0, sizeof(*lookups));
582
	    lookups[i].cmap = pcmap1;
583
	}
584
	lookups[0].keys.data = keys;
585
	lookups[0].keys.size = keys_size;
586
	lookups[0].values.data = values;
587
	lookups[0].values.size = values_size;
588
    }
589
    pcmap1->def.lookup = lookups;
590
    pcmap1->def.num_lookup = num_lookups;
591
    pcmap1->notdef.lookup = 0;
592
    pcmap1->notdef.num_lookup = 0;
593
    /* no mark_glyph, mark_glyph_data, glyph_name, glyph_name_data */
594
    return 0;
595
}