2 |
7u83 |
1 |
/*
|
|
|
2 |
Crown Copyright (c) 1997
|
|
|
3 |
|
|
|
4 |
This TenDRA(r) Computer Program is subject to Copyright
|
|
|
5 |
owned by the United Kingdom Secretary of State for Defence
|
|
|
6 |
acting through the Defence Evaluation and Research Agency
|
|
|
7 |
(DERA). It is made available to Recipients with a
|
|
|
8 |
royalty-free licence for its use, reproduction, transfer
|
|
|
9 |
to other parties and amendment for any purpose not excluding
|
|
|
10 |
product development provided that any such use et cetera
|
|
|
11 |
shall be deemed to be acceptance of the following conditions:-
|
|
|
12 |
|
|
|
13 |
(1) Its Recipients shall ensure that this Notice is
|
|
|
14 |
reproduced upon any copies or amended versions of it;
|
|
|
15 |
|
|
|
16 |
(2) Any amended version of it shall be clearly marked to
|
|
|
17 |
show both the nature of and the organisation responsible
|
|
|
18 |
for the relevant amendment or amendments;
|
|
|
19 |
|
|
|
20 |
(3) Its onward transfer from a recipient to another
|
|
|
21 |
party shall be deemed to be that party's acceptance of
|
|
|
22 |
these conditions;
|
|
|
23 |
|
|
|
24 |
(4) DERA gives no warranty or assurance as to its
|
|
|
25 |
quality or suitability for any purpose and DERA accepts
|
|
|
26 |
no liability whatsoever in relation to any use to which
|
|
|
27 |
it may be put.
|
|
|
28 |
*/
|
|
|
29 |
|
|
|
30 |
|
|
|
31 |
#include "config.h"
|
|
|
32 |
#include <limits.h>
|
|
|
33 |
#include "c_types.h"
|
|
|
34 |
#include "char.h"
|
|
|
35 |
#include "literal.h"
|
|
|
36 |
|
|
|
37 |
|
|
|
38 |
/*
|
|
|
39 |
ASCII FLAG
|
|
|
40 |
|
|
|
41 |
This flag is true if the native codeset is ASCII. It is initially -1
|
|
|
42 |
indicating that the conversion tables have not been initialised.
|
|
|
43 |
*/
|
|
|
44 |
|
|
|
45 |
int is_ascii = -1 ;
|
|
|
46 |
|
|
|
47 |
|
|
|
48 |
/*
|
|
|
49 |
CONVERSION TABLES
|
|
|
50 |
|
|
|
51 |
These tables give the conversions of ASCII to and from the native
|
|
|
52 |
codeset. The ASCII to native table is deduced for the portable
|
|
|
53 |
codeset from the list of characters in char.h which is given in
|
|
|
54 |
ASCII sequence.
|
|
|
55 |
*/
|
|
|
56 |
|
|
|
57 |
|
|
|
58 |
static character to_ascii_tab [ NO_CHAR + 1 ] ;
|
|
|
59 |
|
|
|
60 |
static character from_ascii_tab [ NO_CHAR + 1 ] = {
|
|
|
61 |
#define NONE char_illegal
|
|
|
62 |
#define CHAR_DATA( A, B, C, D ) ( D ),
|
|
|
63 |
#include "char.h"
|
|
|
64 |
#undef CHAR_DATA
|
|
|
65 |
NONE /* dummy last element */
|
|
|
66 |
} ;
|
|
|
67 |
|
|
|
68 |
|
|
|
69 |
/*
|
|
|
70 |
CONVERT A CHARACTER TO ASCII
|
|
|
71 |
|
|
|
72 |
This routine converts the character c from the native codeset to
|
|
|
73 |
ASCII.
|
|
|
74 |
*/
|
|
|
75 |
|
|
|
76 |
unsigned long to_ascii
|
|
|
77 |
PROTO_N ( ( c, ch ) )
|
|
|
78 |
PROTO_T ( unsigned long c X int *ch )
|
|
|
79 |
{
|
|
|
80 |
if ( c < NO_CHAR ) {
|
|
|
81 |
c = ( unsigned long ) to_ascii_tab [c] ;
|
|
|
82 |
*ch = CHAR_ASCII ;
|
|
|
83 |
}
|
|
|
84 |
return ( c ) ;
|
|
|
85 |
}
|
|
|
86 |
|
|
|
87 |
|
|
|
88 |
/*
|
|
|
89 |
CONVERT A CHARACTER TO NATIVE CODESET
|
|
|
90 |
|
|
|
91 |
This routine converts the character c to the native codeset from
|
|
|
92 |
ASCII.
|
|
|
93 |
*/
|
|
|
94 |
|
|
|
95 |
unsigned long from_ascii
|
|
|
96 |
PROTO_N ( ( c, ch ) )
|
|
|
97 |
PROTO_T ( unsigned long c X int *ch )
|
|
|
98 |
{
|
|
|
99 |
if ( c < NO_CHAR ) {
|
|
|
100 |
c = ( unsigned long ) from_ascii_tab [c] ;
|
|
|
101 |
*ch = CHAR_SIMPLE ;
|
|
|
102 |
}
|
|
|
103 |
return ( c ) ;
|
|
|
104 |
}
|
|
|
105 |
|
|
|
106 |
|
|
|
107 |
/*
|
|
|
108 |
TRANSFORM A TABLE FROM ASCII TO NATIVE CODESET
|
|
|
109 |
|
|
|
110 |
This routine maps the look-up table p from ASCII to native codeset.
|
|
|
111 |
*/
|
|
|
112 |
|
|
|
113 |
void map_ascii
|
|
|
114 |
PROTO_N ( ( p ) )
|
|
|
115 |
PROTO_T ( unsigned char *p )
|
|
|
116 |
{
|
|
|
117 |
unsigned c ;
|
|
|
118 |
int asc = is_ascii ;
|
|
|
119 |
if ( asc == -1 ) {
|
|
|
120 |
/* Set up conversion tables */
|
|
|
121 |
asc = 1 ;
|
|
|
122 |
for ( c = 0 ; c < NO_CHAR ; c++ ) {
|
|
|
123 |
to_ascii_tab [c] = ( character ) c ;
|
|
|
124 |
}
|
|
|
125 |
for ( c = 0 ; c < NO_CHAR ; c++ ) {
|
|
|
126 |
unsigned a = ( unsigned ) from_ascii_tab [c] ;
|
|
|
127 |
if ( a == NONE ) a = c ;
|
|
|
128 |
if ( a != c ) asc = 0 ;
|
|
|
129 |
from_ascii_tab [c] = ( character ) a ;
|
|
|
130 |
to_ascii_tab [a] = ( character ) c ;
|
|
|
131 |
}
|
|
|
132 |
is_ascii = asc ;
|
|
|
133 |
}
|
|
|
134 |
|
|
|
135 |
if ( asc == 0 ) {
|
|
|
136 |
/* Map table */
|
|
|
137 |
unsigned char b = p [ NONE ] ;
|
|
|
138 |
unsigned char copy [ NO_CHAR ] ;
|
|
|
139 |
for ( c = 0 ; c < NO_CHAR ; c++ ) {
|
|
|
140 |
copy [c] = p [c] ;
|
|
|
141 |
p [c] = b ;
|
|
|
142 |
}
|
|
|
143 |
SET ( copy ) ;
|
|
|
144 |
for ( c = 0 ; c < NO_CHAR ; c++ ) {
|
|
|
145 |
character a = from_ascii_tab [c] ;
|
|
|
146 |
p [a] = copy [c] ;
|
|
|
147 |
}
|
|
|
148 |
}
|
|
|
149 |
return ;
|
|
|
150 |
}
|
|
|
151 |
|
|
|
152 |
|
|
|
153 |
/*
|
|
|
154 |
TABLE OF ALPHABETIC UNICODE CHARACTERS
|
|
|
155 |
|
|
|
156 |
This table gives the ranges of the unicode characters which are
|
|
|
157 |
suitable for use in an identifier name in ascending order. It is
|
|
|
158 |
derived from Annex E of the standard (which derives from ISO/IEC
|
|
|
159 |
PDTR 10176) with two misprints corrected. 0e0d should be 0e8d
|
|
|
160 |
and 5e76 should be fe76. Note that the upper and lower case
|
|
|
161 |
English alphabets have been included even though they are not
|
|
|
162 |
valid universal character names.
|
|
|
163 |
*/
|
|
|
164 |
|
|
|
165 |
static struct {
|
|
|
166 |
unsigned long lo ;
|
|
|
167 |
unsigned long hi ;
|
|
|
168 |
} alpha_range [] = {
|
|
|
169 |
/* Latin */
|
|
|
170 |
{ 0x0041, 0x005a }, { 0x0061, 0x007a }, { 0x00c0, 0x00d6 },
|
|
|
171 |
{ 0x00d8, 0x00f6 }, { 0x00f8, 0x01f5 }, { 0x01fa, 0x0217 },
|
|
|
172 |
{ 0x0250, 0x02a8 },
|
|
|
173 |
|
|
|
174 |
/* Greek */
|
|
|
175 |
{ 0x0384, 0x0384 }, { 0x0388, 0x038a }, { 0x038c, 0x038c },
|
|
|
176 |
{ 0x038e, 0x03a1 }, { 0x03a3, 0x03ce }, { 0x03d0, 0x03d6 },
|
|
|
177 |
{ 0x03da, 0x03da }, { 0x03dc, 0x03dc }, { 0x03de, 0x03de },
|
|
|
178 |
{ 0x03e0, 0x03e0 }, { 0x03e2, 0x03f3 },
|
|
|
179 |
|
|
|
180 |
/* Cyrilic */
|
|
|
181 |
{ 0x0401, 0x040d }, { 0x040f, 0x044f }, { 0x0451, 0x045c },
|
|
|
182 |
{ 0x045e, 0x0481 }, { 0x0490, 0x04c4 }, { 0x04c7, 0x04c8 },
|
|
|
183 |
{ 0x04cb, 0x04cc }, { 0x04d0, 0x04eb }, { 0x04ee, 0x04f5 },
|
|
|
184 |
{ 0x04f8, 0x04f9 },
|
|
|
185 |
|
|
|
186 |
/* Armenian */
|
|
|
187 |
{ 0x0531, 0x0556 }, { 0x0561, 0x0587 },
|
|
|
188 |
|
|
|
189 |
/* Hebrew */
|
|
|
190 |
{ 0x05d0, 0x05ea }, { 0x05f0, 0x05f4 },
|
|
|
191 |
|
|
|
192 |
/* Arabic */
|
|
|
193 |
{ 0x0621, 0x063a }, { 0x0640, 0x0652 }, { 0x0670, 0x06b7 },
|
|
|
194 |
{ 0x06ba, 0x06be }, { 0x06c0, 0x06ce }, { 0x06e5, 0x06e7 },
|
|
|
195 |
|
|
|
196 |
/* Devanagari */
|
|
|
197 |
{ 0x0905, 0x0939 }, { 0x0958, 0x0962 },
|
|
|
198 |
|
|
|
199 |
/* Bengali */
|
|
|
200 |
{ 0x0985, 0x098c }, { 0x098f, 0x0990 }, { 0x0993, 0x09a8 },
|
|
|
201 |
{ 0x09aa, 0x09b0 }, { 0x09b2, 0x09b2 }, { 0x09b6, 0x09b9 },
|
|
|
202 |
{ 0x09dc, 0x09dd }, { 0x09df, 0x09e1 }, { 0x09f0, 0x09f1 },
|
|
|
203 |
|
|
|
204 |
/* Gurmukhi */
|
|
|
205 |
{ 0x0a05, 0x0a0a }, { 0x0a0f, 0x0a10 }, { 0x0a13, 0x0a28 },
|
|
|
206 |
{ 0x0a2a, 0x0a30 }, { 0x0a32, 0x0a33 }, { 0x0a35, 0x0a36 },
|
|
|
207 |
{ 0x0a38, 0x0a39 }, { 0x0a59, 0x0a5c }, { 0x0a5e, 0x0a5e },
|
|
|
208 |
|
|
|
209 |
/* Gujarati */
|
|
|
210 |
{ 0x0a85, 0x0a8b }, { 0x0a8d, 0x0a8d }, { 0x0a8f, 0x0a91 },
|
|
|
211 |
{ 0x0a93, 0x0aa8 }, { 0x0aaa, 0x0ab0 }, { 0x0ab2, 0x0ab3 },
|
|
|
212 |
{ 0x0ab5, 0x0ab9 }, { 0x0ae0, 0x0ae0 },
|
|
|
213 |
|
|
|
214 |
/* Oriya */
|
|
|
215 |
{ 0x0b05, 0x0b0c }, { 0x0b0f, 0x0b10 }, { 0x0b13, 0x0b28 },
|
|
|
216 |
{ 0x0b2a, 0x0b30 }, { 0x0b32, 0x0b33 }, { 0x0b36, 0x0b39 },
|
|
|
217 |
{ 0x0b5c, 0x0b5d }, { 0x0b5f, 0x0b61 },
|
|
|
218 |
|
|
|
219 |
/* Tamil */
|
|
|
220 |
{ 0x0b85, 0x0b8a }, { 0x0b8e, 0x0b90 }, { 0x0b92, 0x0b95 },
|
|
|
221 |
{ 0x0b99, 0x0b9a }, { 0x0b9c, 0x0b9c }, { 0x0b9e, 0x0b9f },
|
|
|
222 |
{ 0x0ba3, 0x0ba4 }, { 0x0ba8, 0x0baa }, { 0x0bae, 0x0bb5 },
|
|
|
223 |
{ 0x0bb7, 0x0bb9 },
|
|
|
224 |
|
|
|
225 |
/* Telugu */
|
|
|
226 |
{ 0x0c05, 0x0c0c }, { 0x0c0e, 0x0c10 }, { 0x0c12, 0x0c28 },
|
|
|
227 |
{ 0x0c2a, 0x0c33 }, { 0x0c35, 0x0c39 }, { 0x0c60, 0x0c61 },
|
|
|
228 |
|
|
|
229 |
/* Kannada */
|
|
|
230 |
{ 0x0c85, 0x0c8c }, { 0x0c8e, 0x0c90 }, { 0x0c92, 0x0ca8 },
|
|
|
231 |
{ 0x0caa, 0x0cb3 }, { 0x0cb5, 0x0cb9 }, { 0x0ce0, 0x0ce1 },
|
|
|
232 |
|
|
|
233 |
/* Malayalam */
|
|
|
234 |
{ 0x0d05, 0x0d0c }, { 0x0d0e, 0x0d10 }, { 0x0d12, 0x0d28 },
|
|
|
235 |
{ 0x0d2a, 0x0d39 }, { 0x0d60, 0x0d61 },
|
|
|
236 |
|
|
|
237 |
/* Thai */
|
|
|
238 |
{ 0x0e01, 0x0e30 }, { 0x0e32, 0x0e33 }, { 0x0e40, 0x0e46 },
|
|
|
239 |
{ 0x0e4f, 0x0e5b },
|
|
|
240 |
|
|
|
241 |
/* Lao */
|
|
|
242 |
{ 0x0e81, 0x0e82 }, { 0x0e84, 0x0e84 }, { 0x0e87, 0x0e87 },
|
|
|
243 |
{ 0x0e88, 0x0e88 }, { 0x0e8a, 0x0e8a }, { 0x0e8d, 0x0e8d },
|
|
|
244 |
{ 0x0e94, 0x0e97 }, { 0x0e99, 0x0e9f }, { 0x0ea1, 0x0ea3 },
|
|
|
245 |
{ 0x0ea5, 0x0ea5 }, { 0x0ea7, 0x0ea7 }, { 0x0eaa, 0x0eaa },
|
|
|
246 |
{ 0x0eab, 0x0eab }, { 0x0ead, 0x0eb0 }, { 0x0eb2, 0x0eb2 },
|
|
|
247 |
{ 0x0eb3, 0x0eb3 }, { 0x0ebd, 0x0ebd }, { 0x0ec0, 0x0ec4 },
|
|
|
248 |
{ 0x0ec6, 0x0ec6 },
|
|
|
249 |
|
|
|
250 |
/* Georgian */
|
|
|
251 |
{ 0x10a0, 0x10c5 }, { 0x10d0, 0x10f6 },
|
|
|
252 |
|
|
|
253 |
/* Hangul */
|
|
|
254 |
{ 0x1100, 0x1159 }, { 0x1161, 0x11a2 }, { 0x11a8, 0x11f9 },
|
|
|
255 |
|
|
|
256 |
/* Latin (continued) */
|
|
|
257 |
{ 0x1e00, 0x1e9a }, { 0x1ea0, 0x1ef9 },
|
|
|
258 |
|
|
|
259 |
/* Greek (continued) */
|
|
|
260 |
{ 0x1f00, 0x1f15 }, { 0x1f18, 0x1f1d }, { 0x1f20, 0x1f45 },
|
|
|
261 |
{ 0x1f48, 0x1f4d }, { 0x1f50, 0x1f57 }, { 0x1f59, 0x1f59 },
|
|
|
262 |
{ 0x1f5b, 0x1f5b }, { 0x1f5d, 0x1f5d }, { 0x1f5f, 0x1f7d },
|
|
|
263 |
{ 0x1f80, 0x1fb4 }, { 0x1fb6, 0x1fbc }, { 0x1fc2, 0x1fc4 },
|
|
|
264 |
{ 0x1fc6, 0x1fcc }, { 0x1fd0, 0x1fd3 }, { 0x1fd6, 0x1fdb },
|
|
|
265 |
{ 0x1fe0, 0x1fec }, { 0x1ff2, 0x1ff4 }, { 0x1ff6, 0x1ffc },
|
|
|
266 |
|
|
|
267 |
/* Hiragana */
|
|
|
268 |
{ 0x3041, 0x3094 }, { 0x309b, 0x309e },
|
|
|
269 |
|
|
|
270 |
/* Katakana */
|
|
|
271 |
{ 0x30a1, 0x30fe },
|
|
|
272 |
|
|
|
273 |
/* Bopmofo */
|
|
|
274 |
{ 0x3105, 0x312c },
|
|
|
275 |
|
|
|
276 |
/* CJK Unified Ideographs */
|
|
|
277 |
#if FS_NUMBER_SUFFIX
|
|
|
278 |
{ 0x4e00UL, 0x9fa5UL }, { 0xf900UL, 0xfa2dUL }, { 0xfb1fUL, 0xfb36UL },
|
|
|
279 |
{ 0xfb38UL, 0xfb3cUL }, { 0xfb3eUL, 0xfb3eUL }, { 0xfb40UL, 0xfb41UL },
|
|
|
280 |
{ 0xfb42UL, 0xfb44UL }, { 0xfb46UL, 0xfbb1UL }, { 0xfbd3UL, 0xfd3fUL },
|
|
|
281 |
{ 0xfd50UL, 0xfd8fUL }, { 0xfd92UL, 0xfdc7UL }, { 0xfdf0UL, 0xfdfbUL },
|
|
|
282 |
{ 0xfe70UL, 0xfe72UL }, { 0xfe74UL, 0xfe74UL }, { 0xfe76UL, 0xfefcUL },
|
|
|
283 |
{ 0xff21UL, 0xff3aUL }, { 0xff41UL, 0xff5aUL }, { 0xff66UL, 0xffbeUL },
|
|
|
284 |
{ 0xffc2UL, 0xffc7UL }, { 0xffcaUL, 0xffcfUL }, { 0xffd2UL, 0xffd7UL },
|
|
|
285 |
{ 0xffdaUL, 0xffdcUL }
|
|
|
286 |
#else
|
|
|
287 |
{ 0x4e00, 0x9fa5 }, { 0xf900, 0xfa2d }, { 0xfb1f, 0xfb36 },
|
|
|
288 |
{ 0xfb38, 0xfb3c }, { 0xfb3e, 0xfb3e }, { 0xfb40, 0xfb41 },
|
|
|
289 |
{ 0xfb42, 0xfb44 }, { 0xfb46, 0xfbb1 }, { 0xfbd3, 0xfd3f },
|
|
|
290 |
{ 0xfd50, 0xfd8f }, { 0xfd92, 0xfdc7 }, { 0xfdf0, 0xfdfb },
|
|
|
291 |
{ 0xfe70, 0xfe72 }, { 0xfe74, 0xfe74 }, { 0xfe76, 0xfefc },
|
|
|
292 |
{ 0xff21, 0xff3a }, { 0xff41, 0xff5a }, { 0xff66, 0xffbe },
|
|
|
293 |
{ 0xffc2, 0xffc7 }, { 0xffca, 0xffcf }, { 0xffd2, 0xffd7 },
|
|
|
294 |
{ 0xffda, 0xffdc }
|
|
|
295 |
#endif
|
|
|
296 |
} ;
|
|
|
297 |
|
|
|
298 |
|
|
|
299 |
/*
|
|
|
300 |
DOES A UNICODE CHARACTER REPRESENT AN ALPHABETIC VALUE?
|
|
|
301 |
|
|
|
302 |
This routine checks whether the unicode character c represents an
|
|
|
303 |
alphabetic value suitable for use in an identifier name. It
|
|
|
304 |
operates by performing a binary chop on the table above.
|
|
|
305 |
*/
|
|
|
306 |
|
|
|
307 |
int unicode_alpha
|
|
|
308 |
PROTO_N ( ( c ) )
|
|
|
309 |
PROTO_T ( unsigned long c )
|
|
|
310 |
{
|
|
|
311 |
int i = 0 ;
|
|
|
312 |
int j = array_size ( alpha_range ) - 1 ;
|
|
|
313 |
do {
|
|
|
314 |
int k = ( i + j ) / 2 ;
|
|
|
315 |
if ( c < alpha_range [k].lo ) {
|
|
|
316 |
/* Lower half */
|
|
|
317 |
j = k - 1 ;
|
|
|
318 |
} else if ( c > alpha_range [k].hi ) {
|
|
|
319 |
/* Upper half */
|
|
|
320 |
i = k + 1 ;
|
|
|
321 |
} else {
|
|
|
322 |
/* Match found */
|
|
|
323 |
return ( 1 ) ;
|
|
|
324 |
}
|
|
|
325 |
} while ( i <= j ) ;
|
|
|
326 |
return ( 0 ) ;
|
|
|
327 |
}
|