2 |
- |
1 |
/* Copyright (C) 1997, 2000 Aladdin Enterprises. All rights reserved.
|
|
|
2 |
|
|
|
3 |
This software is provided AS-IS with no warranty, either express or
|
|
|
4 |
implied.
|
|
|
5 |
|
|
|
6 |
This software is distributed under license and may not be copied,
|
|
|
7 |
modified or distributed except as expressly authorized under the terms
|
|
|
8 |
of the license contained in the file LICENSE in this distribution.
|
|
|
9 |
|
|
|
10 |
For more information about licensing, please refer to
|
|
|
11 |
http://www.ghostscript.com/licensing/. For information on
|
|
|
12 |
commercial licensing, go to http://www.artifex.com/licensing/ or
|
|
|
13 |
contact Artifex Software, Inc., 101 Lucas Valley Road #110,
|
|
|
14 |
San Rafael, CA 94903, U.S.A., +1(415)492-9861.
|
|
|
15 |
*/
|
|
|
16 |
|
|
|
17 |
/* $Id: gxfcmap.h,v 1.16 2004/08/04 19:36:12 stefan Exp $ */
|
|
|
18 |
/* Internal CMap structure definitions */
|
|
|
19 |
|
|
|
20 |
/* This file should be called gxcmap.h, except that name is already used. */
|
|
|
21 |
|
|
|
22 |
#ifndef gxfcmap_INCLUDED
|
|
|
23 |
# define gxfcmap_INCLUDED
|
|
|
24 |
|
|
|
25 |
#include "gsfcmap.h"
|
|
|
26 |
#include "gsuid.h"
|
|
|
27 |
#include "gxcid.h"
|
|
|
28 |
|
|
|
29 |
/*
|
|
|
30 |
* CMaps are the structures that map (possibly variable-length) characters
|
|
|
31 |
* appearing in a text string to glyph numbers in some font-specific space.
|
|
|
32 |
* The structure defined here generally follows Adobe's specifications, but
|
|
|
33 |
* the actual implementation of the code space and the lookup tables is
|
|
|
34 |
* virtual, so that the same interface can be used for direct access to the
|
|
|
35 |
* corresponding "cmap" structure in TrueType fonts, rather than having to
|
|
|
36 |
* convert that structure to the Adobe-based one.
|
|
|
37 |
*/
|
|
|
38 |
|
|
|
39 |
/*
|
|
|
40 |
* A CMap conceptually consists of three parts:
|
|
|
41 |
*
|
|
|
42 |
* - The code space, used for parsing the input string into (possibly
|
|
|
43 |
* variable-length) characters.
|
|
|
44 |
*
|
|
|
45 |
* - A 'def' map, which maps defined parsed characters to values.
|
|
|
46 |
*
|
|
|
47 |
* - A 'notdef' map, which maps parsed but undefined characters to
|
|
|
48 |
* values.
|
|
|
49 |
*
|
|
|
50 |
* The value of a character may be a string, a name, or a CID. For more
|
|
|
51 |
* information, see the Adobe documentation.
|
|
|
52 |
*/
|
|
|
53 |
|
|
|
54 |
/* ---------------- Code space ranges ---------------- */
|
|
|
55 |
|
|
|
56 |
/*
|
|
|
57 |
* A code space is a non-empty, lexicographically sorted sequence of
|
|
|
58 |
* code space ranges. Ranges must not overlap. In each range,
|
|
|
59 |
* first[i] <= last[i] for 0 <= i < size.
|
|
|
60 |
*/
|
|
|
61 |
#define MAX_CMAP_CODE_SIZE 4
|
|
|
62 |
typedef struct gx_code_space_range_s {
|
|
|
63 |
byte first[MAX_CMAP_CODE_SIZE];
|
|
|
64 |
byte last[MAX_CMAP_CODE_SIZE];
|
|
|
65 |
int size; /* 1 .. MAX_CMAP_CODE_SIZE */
|
|
|
66 |
} gx_code_space_range_t;
|
|
|
67 |
|
|
|
68 |
/* ---------------- Lookup tables ---------------- */
|
|
|
69 |
|
|
|
70 |
/*
|
|
|
71 |
* A lookup table is a non-empty sequence of lookup ranges. Each range has
|
|
|
72 |
* an associated sorted lookup table, indexed by the num_key_bytes low-order
|
|
|
73 |
* code bytes. If key_is_range is true, each key is a range (2 x key_size
|
|
|
74 |
* bytes); if false, each key is a single code (key_size bytes).
|
|
|
75 |
*
|
|
|
76 |
* The only difference between CODE_VALUE_CID and CODE_VALUE_NOTDEF is
|
|
|
77 |
* that after looking up a CID in a table, for CODE_VALUE_CID the result
|
|
|
78 |
* is incremented by the difference between the input code and the key
|
|
|
79 |
* (i.e., a single CODE_VALUE_CID entry actually represents a range of
|
|
|
80 |
* CIDs), whereas for CODE_VALUE_NOTDEF, the result is not incremented.
|
|
|
81 |
* The defined-character map for a CMap uses the former behavior; the
|
|
|
82 |
* notdef map uses the latter.
|
|
|
83 |
*
|
|
|
84 |
* CODE_VALUE_GLYPH and CODE_VALUE_CHARS are reserved for
|
|
|
85 |
* rearranged font CMaps, which are not implemented yet.
|
|
|
86 |
*/
|
|
|
87 |
typedef enum {
|
|
|
88 |
CODE_VALUE_CID, /* CIDs */
|
|
|
89 |
CODE_VALUE_GLYPH, /* glyphs */
|
|
|
90 |
CODE_VALUE_CHARS, /* character(s) */
|
|
|
91 |
CODE_VALUE_NOTDEF /* CID - for notdef(char|range) dst */
|
|
|
92 |
#define CODE_VALUE_MAX CODE_VALUE_NOTDEF
|
|
|
93 |
} gx_cmap_code_value_type_t;
|
|
|
94 |
typedef struct gx_cmap_lookup_entry_s {
|
|
|
95 |
/* Key */
|
|
|
96 |
byte key[2][MAX_CMAP_CODE_SIZE]; /* [key_is_range + 1][key_size] */
|
|
|
97 |
int key_size; /* 0 .. MAX_CMAP_CODE_SIZE */
|
|
|
98 |
bool key_is_range;
|
|
|
99 |
/* Value */
|
|
|
100 |
gx_cmap_code_value_type_t value_type;
|
|
|
101 |
gs_const_string value;
|
|
|
102 |
int font_index; /* for rearranged fonts */
|
|
|
103 |
} gx_cmap_lookup_entry_t;
|
|
|
104 |
|
|
|
105 |
/* ---------------- CMaps proper ---------------- */
|
|
|
106 |
|
|
|
107 |
/*
|
|
|
108 |
* Define the elements common to all CMaps. Currently we include all
|
|
|
109 |
* elements from the Adobe specification except for the actual code space
|
|
|
110 |
* ranges and lookup tables.
|
|
|
111 |
*
|
|
|
112 |
* CMapType and id are common to all CMapTypes. We really only support the
|
|
|
113 |
* single Adobe standard CMap format. Note that the only documented values
|
|
|
114 |
* of CMapType in the PLRM are 0 and 1, which are equivalent; however, in
|
|
|
115 |
* the second PDF Reference, the CMapType for the example ToUnicode CMap is
|
|
|
116 |
* 2.
|
|
|
117 |
*
|
|
|
118 |
* glyph_name and glyph_name_data are only used if the CMap has lookup
|
|
|
119 |
* entries of type CODE_VALUE_GLYPH. We deliberately chose to make
|
|
|
120 |
* glyph_name a function pointer rather than including it in the procs
|
|
|
121 |
* virtual functions. The rationale is that the virtual functions are
|
|
|
122 |
* dependent on the representation of the CMap, so they should be set by the
|
|
|
123 |
* code that must work with this structure. However, glyph_name is not
|
|
|
124 |
* dependent on the representation of the CMap: it does not need to know
|
|
|
125 |
* anything about how the CMap is stored. Rather, it is meant to be used by
|
|
|
126 |
* the client who constructs the CMap, who decides how stored
|
|
|
127 |
* CODE_VALUE_GLYPH values correspond to printable glyph names. The same
|
|
|
128 |
* glyph_name procedure can, in principle, be used with multiple different
|
|
|
129 |
* subclasses of gs_cmap_t.
|
|
|
130 |
*/
|
|
|
131 |
#ifndef gs_cmap_DEFINED
|
|
|
132 |
# define gs_cmap_DEFINED
|
|
|
133 |
typedef struct gs_cmap_s gs_cmap_t;
|
|
|
134 |
#endif
|
|
|
135 |
|
|
|
136 |
#define GS_CMAP_COMMON\
|
|
|
137 |
int CMapType; /* must be first */\
|
|
|
138 |
gs_id id; /* internal ID (no relation to UID) */\
|
|
|
139 |
/* End of entries common to all CMapTypes */\
|
|
|
140 |
gs_const_string CMapName;\
|
|
|
141 |
gs_cid_system_info_t *CIDSystemInfo; /* [num_fonts] */\
|
|
|
142 |
int num_fonts;\
|
|
|
143 |
float CMapVersion;\
|
|
|
144 |
gs_uid uid; /* XUID or nothing */\
|
|
|
145 |
long UIDOffset;\
|
|
|
146 |
int WMode;\
|
|
|
147 |
bool from_Unicode; /* if true, characters are Unicode */\
|
|
|
148 |
bool ToUnicode; /* if true, it is a ToUnicode CMap */\
|
|
|
149 |
gs_glyph_name_proc_t glyph_name; /* glyph name procedure for printing */\
|
|
|
150 |
void *glyph_name_data; /* closure data */\
|
|
|
151 |
const gs_cmap_procs_t *procs
|
|
|
152 |
|
|
|
153 |
extern_st(st_cmap);
|
|
|
154 |
#define public_st_cmap() /* in gsfcmap.c */\
|
|
|
155 |
BASIC_PTRS(cmap_ptrs) {\
|
|
|
156 |
GC_CONST_STRING_ELT(gs_cmap_t, CMapName),\
|
|
|
157 |
GC_OBJ_ELT3(gs_cmap_t, CIDSystemInfo, uid.xvalues, glyph_name_data)\
|
|
|
158 |
};\
|
|
|
159 |
gs_public_st_basic(st_cmap, gs_cmap_t, "gs_cmap_t", cmap_ptrs, cmap_data)
|
|
|
160 |
|
|
|
161 |
typedef struct gs_cmap_ranges_enum_s gs_cmap_ranges_enum_t;
|
|
|
162 |
typedef struct gs_cmap_lookups_enum_s gs_cmap_lookups_enum_t;
|
|
|
163 |
|
|
|
164 |
typedef struct gs_cmap_procs_s {
|
|
|
165 |
|
|
|
166 |
/*
|
|
|
167 |
* Decode and map a character from a string using a CMap.
|
|
|
168 |
* See gsfcmap.h for details.
|
|
|
169 |
*/
|
|
|
170 |
|
|
|
171 |
int (*decode_next)(const gs_cmap_t *pcmap, const gs_const_string *str,
|
|
|
172 |
uint *pindex, uint *pfidx,
|
|
|
173 |
gs_char *pchr, gs_glyph *pglyph);
|
|
|
174 |
|
|
|
175 |
/*
|
|
|
176 |
* Initialize an enumeration of code space ranges. See below.
|
|
|
177 |
*/
|
|
|
178 |
|
|
|
179 |
void (*enum_ranges)(const gs_cmap_t *pcmap,
|
|
|
180 |
gs_cmap_ranges_enum_t *penum);
|
|
|
181 |
|
|
|
182 |
/*
|
|
|
183 |
* Initialize an enumeration of lookups. See below.
|
|
|
184 |
*/
|
|
|
185 |
|
|
|
186 |
void (*enum_lookups)(const gs_cmap_t *pcmap, int which,
|
|
|
187 |
gs_cmap_lookups_enum_t *penum);
|
|
|
188 |
|
|
|
189 |
/*
|
|
|
190 |
* Check if the cmap is identity.
|
|
|
191 |
*/
|
|
|
192 |
|
|
|
193 |
bool (*is_identity)(const gs_cmap_t *pcmap, int font_index_only);
|
|
|
194 |
|
|
|
195 |
} gs_cmap_procs_t;
|
|
|
196 |
|
|
|
197 |
struct gs_cmap_s {
|
|
|
198 |
GS_CMAP_COMMON;
|
|
|
199 |
};
|
|
|
200 |
|
|
|
201 |
/* ---------------- Enumerators ---------------- */
|
|
|
202 |
|
|
|
203 |
/*
|
|
|
204 |
* Define enumeration structures for code space ranges and lookup tables.
|
|
|
205 |
* Since all current and currently envisioned implementations are very
|
|
|
206 |
* simple, we don't bother to make this fully general, with subclasses
|
|
|
207 |
* or a "finish" procedure.
|
|
|
208 |
*/
|
|
|
209 |
typedef struct gs_cmap_ranges_enum_procs_s {
|
|
|
210 |
int (*next_range)(gs_cmap_ranges_enum_t *penum);
|
|
|
211 |
} gs_cmap_ranges_enum_procs_t;
|
|
|
212 |
struct gs_cmap_ranges_enum_s {
|
|
|
213 |
/*
|
|
|
214 |
* Return the next code space range here.
|
|
|
215 |
*/
|
|
|
216 |
gx_code_space_range_t range;
|
|
|
217 |
/*
|
|
|
218 |
* The rest of the information is private to the implementation.
|
|
|
219 |
*/
|
|
|
220 |
const gs_cmap_t *cmap;
|
|
|
221 |
const gs_cmap_ranges_enum_procs_t *procs;
|
|
|
222 |
uint index;
|
|
|
223 |
};
|
|
|
224 |
|
|
|
225 |
typedef struct gs_cmap_lookups_enum_procs_s {
|
|
|
226 |
int (*next_lookup)(gs_cmap_lookups_enum_t *penum);
|
|
|
227 |
int (*next_entry)(gs_cmap_lookups_enum_t *penum);
|
|
|
228 |
} gs_cmap_lookups_enum_procs_t;
|
|
|
229 |
struct gs_cmap_lookups_enum_s {
|
|
|
230 |
/*
|
|
|
231 |
* Return the next lookup and entry here.
|
|
|
232 |
*/
|
|
|
233 |
gx_cmap_lookup_entry_t entry;
|
|
|
234 |
/*
|
|
|
235 |
* The rest of the information is private to the implementation.
|
|
|
236 |
*/
|
|
|
237 |
const gs_cmap_t *cmap;
|
|
|
238 |
const gs_cmap_lookups_enum_procs_t *procs;
|
|
|
239 |
uint index[2];
|
|
|
240 |
byte temp_value[max(sizeof(gs_glyph), sizeof(gs_char))];
|
|
|
241 |
};
|
|
|
242 |
/*
|
|
|
243 |
* Define a vacuous next_lookup procedure, useful for the notdef lookups
|
|
|
244 |
* for CMaps that don't have any.
|
|
|
245 |
*/
|
|
|
246 |
extern const gs_cmap_lookups_enum_procs_t gs_cmap_no_lookups_procs;
|
|
|
247 |
|
|
|
248 |
/* ---------------- Client procedures ---------------- */
|
|
|
249 |
|
|
|
250 |
/*
|
|
|
251 |
* Initialize the enumeration of the code space ranges, and enumerate
|
|
|
252 |
* the next range. enum_next returns 0 if OK, 1 if finished, <0 if error.
|
|
|
253 |
* The intended usage is:
|
|
|
254 |
*
|
|
|
255 |
* for (gs_cmap_ranges_enum_init(pcmap, &renum);
|
|
|
256 |
* (code = gs_cmap_enum_next_range(&renum)) == 0; ) {
|
|
|
257 |
* ...
|
|
|
258 |
* }
|
|
|
259 |
* if (code < 0) <<error>>
|
|
|
260 |
*/
|
|
|
261 |
void gs_cmap_ranges_enum_init(const gs_cmap_t *pcmap,
|
|
|
262 |
gs_cmap_ranges_enum_t *penum);
|
|
|
263 |
int gs_cmap_enum_next_range(gs_cmap_ranges_enum_t *penum);
|
|
|
264 |
|
|
|
265 |
/*
|
|
|
266 |
* Initialize the enumeration of the lookups, and enumerate the next
|
|
|
267 |
* the next lookup or entry. which = 0 for defined characters,
|
|
|
268 |
* which = 1 for notdef. next_xxx returns 0 if OK, 1 if finished,
|
|
|
269 |
* <0 if error. The intended usage is:
|
|
|
270 |
*
|
|
|
271 |
* for (gs_cmap_lookups_enum_init(pcmap, which, &lenum);
|
|
|
272 |
* (code = gs_cmap_enum_next_lookup(&lenum)) == 0; ) {
|
|
|
273 |
* while ((code = gs_cmap_enum_next_entry(&lenum)) == 0) {
|
|
|
274 |
* ...
|
|
|
275 |
* }
|
|
|
276 |
* if (code < 0) <<error>>
|
|
|
277 |
* }
|
|
|
278 |
* if (code < 0) <<error>>
|
|
|
279 |
*
|
|
|
280 |
* Note that next_lookup sets (at least) penum->entry.
|
|
|
281 |
* key_size, key_is_range, value_type, font_index
|
|
|
282 |
* whereas next_entry sets penum->entry.
|
|
|
283 |
* key[0][*], key[1][*], value
|
|
|
284 |
* Clients must not modify any members of the enumerator.
|
|
|
285 |
* The bytes of the value string may be allocated locally (in the enumerator
|
|
|
286 |
* itself) and not survive from one call to the next.
|
|
|
287 |
*/
|
|
|
288 |
void gs_cmap_lookups_enum_init(const gs_cmap_t *pcmap, int which,
|
|
|
289 |
gs_cmap_lookups_enum_t *penum);
|
|
|
290 |
int gs_cmap_enum_next_lookup(gs_cmap_lookups_enum_t *penum);
|
|
|
291 |
int gs_cmap_enum_next_entry(gs_cmap_lookups_enum_t *penum);
|
|
|
292 |
|
|
|
293 |
/* ---------------- Implementation procedures ---------------- */
|
|
|
294 |
|
|
|
295 |
/*
|
|
|
296 |
* Initialize a just-allocated CMap, to ensure that all pointers are clean
|
|
|
297 |
* for the GC. Note that this only initializes the common part.
|
|
|
298 |
*/
|
|
|
299 |
void gs_cmap_init(const gs_memory_t *mem, gs_cmap_t *pcmap, int num_fonts);
|
|
|
300 |
|
|
|
301 |
/*
|
|
|
302 |
* Allocate and initialize (the common part of) a CMap.
|
|
|
303 |
*/
|
|
|
304 |
int gs_cmap_alloc(gs_cmap_t **ppcmap, const gs_memory_struct_type_t *pstype,
|
|
|
305 |
int wmode, const byte *map_name, uint name_size,
|
|
|
306 |
const gs_cid_system_info_t *pcidsi, int num_fonts,
|
|
|
307 |
const gs_cmap_procs_t *procs, gs_memory_t *mem);
|
|
|
308 |
|
|
|
309 |
/*
|
|
|
310 |
* Initialize an enumerator with convenient defaults (index = 0).
|
|
|
311 |
*/
|
|
|
312 |
void gs_cmap_ranges_enum_setup(gs_cmap_ranges_enum_t *penum,
|
|
|
313 |
const gs_cmap_t *pcmap,
|
|
|
314 |
const gs_cmap_ranges_enum_procs_t *procs);
|
|
|
315 |
void gs_cmap_lookups_enum_setup(gs_cmap_lookups_enum_t *penum,
|
|
|
316 |
const gs_cmap_t *pcmap,
|
|
|
317 |
const gs_cmap_lookups_enum_procs_t *procs);
|
|
|
318 |
|
|
|
319 |
/*
|
|
|
320 |
* Check for identity CMap. Uses a fast check for special cases.
|
|
|
321 |
*/
|
|
|
322 |
bool gs_cmap_is_identity(const gs_cmap_t *pcmap, int font_index_only);
|
|
|
323 |
|
|
|
324 |
/*
|
|
|
325 |
* For a random CMap, compute whether it is identity.
|
|
|
326 |
* It is not applicable to gs_cmap_ToUnicode_t due to
|
|
|
327 |
* different sizes of domain keys and range values.
|
|
|
328 |
*/
|
|
|
329 |
bool gs_cmap_compute_identity(const gs_cmap_t *pcmap, int font_index_only);
|
|
|
330 |
|
|
|
331 |
#endif /* gxfcmap_INCLUDED */
|