Subversion Repositories tendra.SVN

Rev

Go to most recent revision | Details | Last modification | View Log | RSS feed

Rev Author Line No. Line
2 7u83 1
/*
2
    		 Crown Copyright (c) 1997
3
 
4
    This TenDRA(r) Computer Program is subject to Copyright
5
    owned by the United Kingdom Secretary of State for Defence
6
    acting through the Defence Evaluation and Research Agency
7
    (DERA).  It is made available to Recipients with a
8
    royalty-free licence for its use, reproduction, transfer
9
    to other parties and amendment for any purpose not excluding
10
    product development provided that any such use et cetera
11
    shall be deemed to be acceptance of the following conditions:-
12
 
13
        (1) Its Recipients shall ensure that this Notice is
14
        reproduced upon any copies or amended versions of it;
15
 
16
        (2) Any amended version of it shall be clearly marked to
17
        show both the nature of and the organisation responsible
18
        for the relevant amendment or amendments;
19
 
20
        (3) Its onward transfer from a recipient to another
21
        party shall be deemed to be that party's acceptance of
22
        these conditions;
23
 
24
        (4) DERA gives no warranty or assurance as to its
25
        quality or suitability for any purpose and DERA accepts
26
        no liability whatsoever in relation to any use to which
27
        it may be put.
28
*/
29
 
30
 
31
#include "config.h"
32
#include <limits.h>
33
#include "c_types.h"
34
#include "char.h"
35
#include "literal.h"
36
 
37
 
38
/*
39
    ASCII FLAG
40
 
41
    This flag is true if the native codeset is ASCII.  It is initially -1
42
    indicating that the conversion tables have not been initialised.
43
*/
44
 
45
int is_ascii = -1 ;
46
 
47
 
48
/*
49
    CONVERSION TABLES
50
 
51
    These tables give the conversions of ASCII to and from the native
52
    codeset.  The ASCII to native table is deduced for the portable
53
    codeset from the list of characters in char.h which is given in
54
    ASCII sequence.
55
*/
56
 
57
 
58
static character to_ascii_tab [ NO_CHAR + 1 ] ;
59
 
60
static character from_ascii_tab [ NO_CHAR + 1 ] = {
61
#define NONE			char_illegal
62
#define CHAR_DATA( A, B, C, D )	( D ),
63
#include "char.h"
64
#undef CHAR_DATA
65
    NONE			/* dummy last element */
66
} ;
67
 
68
 
69
/*
70
    CONVERT A CHARACTER TO ASCII
71
 
72
    This routine converts the character c from the native codeset to
73
    ASCII.
74
*/
75
 
76
unsigned long to_ascii
77
    PROTO_N ( ( c, ch ) )
78
    PROTO_T ( unsigned long c X int *ch )
79
{
80
    if ( c < NO_CHAR ) {
81
	c = ( unsigned long ) to_ascii_tab [c] ;
82
	*ch = CHAR_ASCII ;
83
    }
84
    return ( c ) ;
85
}
86
 
87
 
88
/*
89
    CONVERT A CHARACTER TO NATIVE CODESET
90
 
91
    This routine converts the character c to the native codeset from
92
    ASCII.
93
*/
94
 
95
unsigned long from_ascii
96
    PROTO_N ( ( c, ch ) )
97
    PROTO_T ( unsigned long c X int *ch )
98
{
99
    if ( c < NO_CHAR ) {
100
	c = ( unsigned long ) from_ascii_tab [c] ;
101
	*ch = CHAR_SIMPLE ;
102
    }
103
    return ( c ) ;
104
}
105
 
106
 
107
/*
108
    TRANSFORM A TABLE FROM ASCII TO NATIVE CODESET
109
 
110
    This routine maps the look-up table p from ASCII to native codeset.
111
*/
112
 
113
void map_ascii
114
    PROTO_N ( ( p ) )
115
    PROTO_T ( unsigned char *p )
116
{
117
    unsigned c ;
118
    int asc = is_ascii ;
119
    if ( asc == -1 ) {
120
	/* Set up conversion tables */
121
	asc = 1 ;
122
	for ( c = 0 ; c < NO_CHAR ; c++ ) {
123
	    to_ascii_tab [c] = ( character ) c ;
124
	}
125
	for ( c = 0 ; c < NO_CHAR ; c++ ) {
126
	    unsigned a = ( unsigned ) from_ascii_tab [c] ;
127
	    if ( a == NONE ) a = c ;
128
	    if ( a != c ) asc = 0 ;
129
	    from_ascii_tab [c] = ( character ) a ;
130
	    to_ascii_tab [a] = ( character ) c ;
131
	}
132
	is_ascii = asc ;
133
    }
134
 
135
    if ( asc == 0 ) {
136
	/* Map table */
137
	unsigned char b = p [ NONE ] ;
138
	unsigned char copy [ NO_CHAR ] ;
139
	for ( c = 0 ; c < NO_CHAR ; c++ ) {
140
	    copy [c] = p [c] ;
141
	    p [c] = b ;
142
	}
143
	SET ( copy ) ;
144
	for ( c = 0 ; c < NO_CHAR ; c++ ) {
145
	    character a = from_ascii_tab [c] ;
146
	    p [a] = copy [c] ;
147
	}
148
    }
149
    return ;
150
}
151
 
152
 
153
/*
154
    TABLE OF ALPHABETIC UNICODE CHARACTERS
155
 
156
    This table gives the ranges of the unicode characters which are
157
    suitable for use in an identifier name in ascending order.  It is
158
    derived from Annex E of the standard (which derives from ISO/IEC
159
    PDTR 10176) with two misprints corrected.  0e0d should be 0e8d
160
    and 5e76 should be fe76.  Note that the upper and lower case
161
    English alphabets have been included even though they are not
162
    valid universal character names.
163
*/
164
 
165
static struct {
166
    unsigned long lo ;
167
    unsigned long hi ;
168
} alpha_range [] = {
169
    /* Latin */
170
    { 0x0041, 0x005a }, { 0x0061, 0x007a }, { 0x00c0, 0x00d6 },
171
    { 0x00d8, 0x00f6 }, { 0x00f8, 0x01f5 }, { 0x01fa, 0x0217 },
172
    { 0x0250, 0x02a8 },
173
 
174
    /* Greek */
175
    { 0x0384, 0x0384 }, { 0x0388, 0x038a }, { 0x038c, 0x038c },
176
    { 0x038e, 0x03a1 }, { 0x03a3, 0x03ce }, { 0x03d0, 0x03d6 },
177
    { 0x03da, 0x03da }, { 0x03dc, 0x03dc }, { 0x03de, 0x03de },
178
    { 0x03e0, 0x03e0 }, { 0x03e2, 0x03f3 },
179
 
180
    /* Cyrilic */
181
    { 0x0401, 0x040d }, { 0x040f, 0x044f }, { 0x0451, 0x045c },
182
    { 0x045e, 0x0481 }, { 0x0490, 0x04c4 }, { 0x04c7, 0x04c8 },
183
    { 0x04cb, 0x04cc }, { 0x04d0, 0x04eb }, { 0x04ee, 0x04f5 },
184
    { 0x04f8, 0x04f9 },
185
 
186
    /* Armenian */
187
    { 0x0531, 0x0556 }, { 0x0561, 0x0587 },
188
 
189
    /* Hebrew */
190
    { 0x05d0, 0x05ea }, { 0x05f0, 0x05f4 },
191
 
192
    /* Arabic */
193
    { 0x0621, 0x063a }, { 0x0640, 0x0652 }, { 0x0670, 0x06b7 },
194
    { 0x06ba, 0x06be }, { 0x06c0, 0x06ce }, { 0x06e5, 0x06e7 },
195
 
196
    /* Devanagari */
197
    { 0x0905, 0x0939 }, { 0x0958, 0x0962 },
198
 
199
    /* Bengali */
200
    { 0x0985, 0x098c }, { 0x098f, 0x0990 }, { 0x0993, 0x09a8 },
201
    { 0x09aa, 0x09b0 }, { 0x09b2, 0x09b2 }, { 0x09b6, 0x09b9 },
202
    { 0x09dc, 0x09dd }, { 0x09df, 0x09e1 }, { 0x09f0, 0x09f1 },
203
 
204
    /* Gurmukhi */
205
    { 0x0a05, 0x0a0a }, { 0x0a0f, 0x0a10 }, { 0x0a13, 0x0a28 },
206
    { 0x0a2a, 0x0a30 }, { 0x0a32, 0x0a33 }, { 0x0a35, 0x0a36 },
207
    { 0x0a38, 0x0a39 }, { 0x0a59, 0x0a5c }, { 0x0a5e, 0x0a5e },
208
 
209
    /* Gujarati */
210
    { 0x0a85, 0x0a8b }, { 0x0a8d, 0x0a8d }, { 0x0a8f, 0x0a91 },
211
    { 0x0a93, 0x0aa8 }, { 0x0aaa, 0x0ab0 }, { 0x0ab2, 0x0ab3 },
212
    { 0x0ab5, 0x0ab9 }, { 0x0ae0, 0x0ae0 },
213
 
214
    /* Oriya */
215
    { 0x0b05, 0x0b0c }, { 0x0b0f, 0x0b10 }, { 0x0b13, 0x0b28 },
216
    { 0x0b2a, 0x0b30 }, { 0x0b32, 0x0b33 }, { 0x0b36, 0x0b39 },
217
    { 0x0b5c, 0x0b5d }, { 0x0b5f, 0x0b61 },
218
 
219
    /* Tamil */
220
    { 0x0b85, 0x0b8a }, { 0x0b8e, 0x0b90 }, { 0x0b92, 0x0b95 },
221
    { 0x0b99, 0x0b9a }, { 0x0b9c, 0x0b9c }, { 0x0b9e, 0x0b9f },
222
    { 0x0ba3, 0x0ba4 }, { 0x0ba8, 0x0baa }, { 0x0bae, 0x0bb5 },
223
    { 0x0bb7, 0x0bb9 },
224
 
225
    /* Telugu */
226
    { 0x0c05, 0x0c0c }, { 0x0c0e, 0x0c10 }, { 0x0c12, 0x0c28 },
227
    { 0x0c2a, 0x0c33 }, { 0x0c35, 0x0c39 }, { 0x0c60, 0x0c61 },
228
 
229
    /* Kannada */
230
    { 0x0c85, 0x0c8c }, { 0x0c8e, 0x0c90 }, { 0x0c92, 0x0ca8 },
231
    { 0x0caa, 0x0cb3 }, { 0x0cb5, 0x0cb9 }, { 0x0ce0, 0x0ce1 },
232
 
233
    /* Malayalam */
234
    { 0x0d05, 0x0d0c }, { 0x0d0e, 0x0d10 }, { 0x0d12, 0x0d28 },
235
    { 0x0d2a, 0x0d39 }, { 0x0d60, 0x0d61 },
236
 
237
    /* Thai */
238
    { 0x0e01, 0x0e30 }, { 0x0e32, 0x0e33 }, { 0x0e40, 0x0e46 },
239
    { 0x0e4f, 0x0e5b },
240
 
241
    /* Lao */
242
    { 0x0e81, 0x0e82 }, { 0x0e84, 0x0e84 }, { 0x0e87, 0x0e87 },
243
    { 0x0e88, 0x0e88 }, { 0x0e8a, 0x0e8a }, { 0x0e8d, 0x0e8d },
244
    { 0x0e94, 0x0e97 }, { 0x0e99, 0x0e9f }, { 0x0ea1, 0x0ea3 },
245
    { 0x0ea5, 0x0ea5 }, { 0x0ea7, 0x0ea7 }, { 0x0eaa, 0x0eaa },
246
    { 0x0eab, 0x0eab }, { 0x0ead, 0x0eb0 }, { 0x0eb2, 0x0eb2 },
247
    { 0x0eb3, 0x0eb3 }, { 0x0ebd, 0x0ebd }, { 0x0ec0, 0x0ec4 },
248
    { 0x0ec6, 0x0ec6 },
249
 
250
    /* Georgian */
251
    { 0x10a0, 0x10c5 }, { 0x10d0, 0x10f6 },
252
 
253
    /* Hangul */
254
    { 0x1100, 0x1159 }, { 0x1161, 0x11a2 }, { 0x11a8, 0x11f9 },
255
 
256
    /* Latin (continued) */
257
    { 0x1e00, 0x1e9a }, { 0x1ea0, 0x1ef9 },
258
 
259
    /* Greek (continued) */
260
    { 0x1f00, 0x1f15 }, { 0x1f18, 0x1f1d }, { 0x1f20, 0x1f45 },
261
    { 0x1f48, 0x1f4d }, { 0x1f50, 0x1f57 }, { 0x1f59, 0x1f59 },
262
    { 0x1f5b, 0x1f5b }, { 0x1f5d, 0x1f5d }, { 0x1f5f, 0x1f7d },
263
    { 0x1f80, 0x1fb4 }, { 0x1fb6, 0x1fbc }, { 0x1fc2, 0x1fc4 },
264
    { 0x1fc6, 0x1fcc }, { 0x1fd0, 0x1fd3 }, { 0x1fd6, 0x1fdb },
265
    { 0x1fe0, 0x1fec }, { 0x1ff2, 0x1ff4 }, { 0x1ff6, 0x1ffc },
266
 
267
    /* Hiragana */
268
    { 0x3041, 0x3094 }, { 0x309b, 0x309e },
269
 
270
    /* Katakana */
271
    { 0x30a1, 0x30fe },
272
 
273
    /* Bopmofo */
274
    { 0x3105, 0x312c },
275
 
276
    /* CJK Unified Ideographs */
277
#if FS_NUMBER_SUFFIX
278
    { 0x4e00UL, 0x9fa5UL }, { 0xf900UL, 0xfa2dUL }, { 0xfb1fUL, 0xfb36UL },
279
    { 0xfb38UL, 0xfb3cUL }, { 0xfb3eUL, 0xfb3eUL }, { 0xfb40UL, 0xfb41UL },
280
    { 0xfb42UL, 0xfb44UL }, { 0xfb46UL, 0xfbb1UL }, { 0xfbd3UL, 0xfd3fUL },
281
    { 0xfd50UL, 0xfd8fUL }, { 0xfd92UL, 0xfdc7UL }, { 0xfdf0UL, 0xfdfbUL },
282
    { 0xfe70UL, 0xfe72UL }, { 0xfe74UL, 0xfe74UL }, { 0xfe76UL, 0xfefcUL },
283
    { 0xff21UL, 0xff3aUL }, { 0xff41UL, 0xff5aUL }, { 0xff66UL, 0xffbeUL },
284
    { 0xffc2UL, 0xffc7UL }, { 0xffcaUL, 0xffcfUL }, { 0xffd2UL, 0xffd7UL },
285
    { 0xffdaUL, 0xffdcUL }
286
#else
287
    { 0x4e00, 0x9fa5 }, { 0xf900, 0xfa2d }, { 0xfb1f, 0xfb36 },
288
    { 0xfb38, 0xfb3c }, { 0xfb3e, 0xfb3e }, { 0xfb40, 0xfb41 },
289
    { 0xfb42, 0xfb44 }, { 0xfb46, 0xfbb1 }, { 0xfbd3, 0xfd3f },
290
    { 0xfd50, 0xfd8f }, { 0xfd92, 0xfdc7 }, { 0xfdf0, 0xfdfb },
291
    { 0xfe70, 0xfe72 }, { 0xfe74, 0xfe74 }, { 0xfe76, 0xfefc },
292
    { 0xff21, 0xff3a }, { 0xff41, 0xff5a }, { 0xff66, 0xffbe },
293
    { 0xffc2, 0xffc7 }, { 0xffca, 0xffcf }, { 0xffd2, 0xffd7 },
294
    { 0xffda, 0xffdc }
295
#endif
296
} ;
297
 
298
 
299
/*
300
    DOES A UNICODE CHARACTER REPRESENT AN ALPHABETIC VALUE?
301
 
302
    This routine checks whether the unicode character c represents an
303
    alphabetic value suitable for use in an identifier name.  It
304
    operates by performing a binary chop on the table above.
305
*/
306
 
307
int unicode_alpha
308
    PROTO_N ( ( c ) )
309
    PROTO_T ( unsigned long c )
310
{
311
    int i = 0 ;
312
    int j = array_size ( alpha_range ) - 1 ;
313
    do {
314
	int k = ( i + j ) / 2 ;
315
	if ( c < alpha_range [k].lo ) {
316
	    /* Lower half */
317
	    j = k - 1 ;
318
	} else if ( c > alpha_range [k].hi ) {
319
	    /* Upper half */
320
	    i = k + 1 ;
321
	} else {
322
	    /* Match found */
323
	    return ( 1 ) ;
324
	}
325
    } while ( i <= j ) ;
326
    return ( 0 ) ;
327
}