2 |
- |
1 |
#ifndef PLAN9
|
|
|
2 |
#include <sys/types.h>
|
|
|
3 |
#include <stdio.h>
|
|
|
4 |
#include <unistd.h>
|
|
|
5 |
#include <stdlib.h>
|
|
|
6 |
#include <fcntl.h>
|
|
|
7 |
#include <string.h>
|
|
|
8 |
#include <errno.h>
|
|
|
9 |
#include "plan9.h"
|
|
|
10 |
#else /* PLAN9 */
|
|
|
11 |
#include <u.h>
|
|
|
12 |
#include <libc.h>
|
|
|
13 |
#include <bio.h>
|
|
|
14 |
#endif /* PLAN9 */
|
|
|
15 |
#include "cyrillic.h"
|
|
|
16 |
#include "misc.h"
|
|
|
17 |
#include "ms.h"
|
|
|
18 |
#include "8859.h"
|
|
|
19 |
#include "big5.h"
|
|
|
20 |
#include "gb.h"
|
|
|
21 |
#include "hdr.h"
|
|
|
22 |
#include "conv.h"
|
|
|
23 |
|
|
|
24 |
void usage(void);
|
|
|
25 |
void list(void);
|
|
|
26 |
int squawk = 1;
|
|
|
27 |
int clean = 0;
|
|
|
28 |
int verbose = 0;
|
|
|
29 |
long ninput, noutput, nrunes, nerrors;
|
|
|
30 |
char *file = "stdin";
|
|
|
31 |
char *argv0;
|
|
|
32 |
Rune runes[N];
|
|
|
33 |
char obuf[UTFmax*N]; /* maximum bloat from N runes */
|
|
|
34 |
long tab[NRUNE];
|
|
|
35 |
#ifndef PLAN9
|
|
|
36 |
extern char version[];
|
|
|
37 |
#endif
|
|
|
38 |
|
|
|
39 |
void intable(int, long *, struct convert *);
|
|
|
40 |
void unicode_in(int, long *, struct convert *);
|
|
|
41 |
void unicode_out(Rune *, int, long *);
|
|
|
42 |
|
|
|
43 |
int
|
|
|
44 |
main(int argc, char **argv)
|
|
|
45 |
{
|
|
|
46 |
char *from = "utf";
|
|
|
47 |
char *to = "utf";
|
|
|
48 |
int fd;
|
|
|
49 |
int listem = 0;
|
|
|
50 |
struct convert *t, *f;
|
|
|
51 |
|
|
|
52 |
ARGBEGIN {
|
|
|
53 |
case 'c':
|
|
|
54 |
clean = 1;
|
|
|
55 |
break;
|
|
|
56 |
case 'f':
|
|
|
57 |
from = EARGF(usage());
|
|
|
58 |
break;
|
|
|
59 |
case 'l':
|
|
|
60 |
listem = 1;
|
|
|
61 |
break;
|
|
|
62 |
case 's':
|
|
|
63 |
squawk = 0;
|
|
|
64 |
break;
|
|
|
65 |
case 't':
|
|
|
66 |
to = EARGF(usage());
|
|
|
67 |
break;
|
|
|
68 |
case 'v':
|
|
|
69 |
verbose = 1;
|
|
|
70 |
break;
|
|
|
71 |
default:
|
|
|
72 |
usage();
|
|
|
73 |
break;
|
|
|
74 |
} ARGEND
|
|
|
75 |
|
|
|
76 |
USED(argc);
|
|
|
77 |
if(verbose)
|
|
|
78 |
squawk = 1;
|
|
|
79 |
if(listem){
|
|
|
80 |
list();
|
|
|
81 |
EXIT(0, 0);
|
|
|
82 |
}
|
|
|
83 |
if(!from || !to)
|
|
|
84 |
usage();
|
|
|
85 |
f = conv(from, 1);
|
|
|
86 |
t = conv(to, 0);
|
|
|
87 |
#define PROC {if(f->flags&Table)\
|
|
|
88 |
intable(fd, (long *)f->data, t);\
|
|
|
89 |
else\
|
|
|
90 |
((Infn)(f->fn))(fd, (long *)0, t);}
|
|
|
91 |
if(*argv){
|
|
|
92 |
while(*argv){
|
|
|
93 |
file = *argv;
|
|
|
94 |
#ifndef PLAN9
|
|
|
95 |
if((fd = open(*argv, 0)) < 0){
|
|
|
96 |
EPR "%s: %s: %s\n", argv0, *argv, strerror(errno));
|
|
|
97 |
#else /* PLAN9 */
|
|
|
98 |
if((fd = open(*argv, OREAD)) < 0){
|
|
|
99 |
EPR "%s: %s: %r\n", argv0, *argv);
|
|
|
100 |
#endif /* PLAN9 */
|
|
|
101 |
EXIT(1, "open failure");
|
|
|
102 |
}
|
|
|
103 |
PROC
|
|
|
104 |
close(fd);
|
|
|
105 |
argv++;
|
|
|
106 |
}
|
|
|
107 |
} else {
|
|
|
108 |
fd = 0;
|
|
|
109 |
PROC
|
|
|
110 |
}
|
|
|
111 |
if(verbose)
|
|
|
112 |
EPR "%s: %ld input bytes, %ld runes, %ld output bytes (%ld errors)\n", argv0,
|
|
|
113 |
ninput, nrunes, noutput, nerrors);
|
|
|
114 |
EXIT(((nerrors && squawk)? 1:0), ((nerrors && squawk)? "conversion error":0));
|
|
|
115 |
return(0); /* shut up compiler */
|
|
|
116 |
}
|
|
|
117 |
|
|
|
118 |
void
|
|
|
119 |
usage(void)
|
|
|
120 |
{
|
|
|
121 |
EPR "Usage: %s [-slv] [-f cs] [-t cs] [file ...]\n", argv0);
|
|
|
122 |
verbose = 1;
|
|
|
123 |
list();
|
|
|
124 |
EXIT(1, "usage");
|
|
|
125 |
}
|
|
|
126 |
|
|
|
127 |
void
|
|
|
128 |
list(void)
|
|
|
129 |
{
|
|
|
130 |
struct convert *c;
|
|
|
131 |
char ch = verbose?'\t':' ';
|
|
|
132 |
|
|
|
133 |
#ifndef PLAN9
|
|
|
134 |
EPR "%s version = '%s'\n", argv0, version);
|
|
|
135 |
#endif
|
|
|
136 |
if(verbose)
|
|
|
137 |
EPR "character sets:\n");
|
|
|
138 |
else
|
|
|
139 |
EPR "cs:");
|
|
|
140 |
for(c = convert; c->name; c++){
|
|
|
141 |
if((c->flags&From) && c[1].name && (strcmp(c[1].name, c->name) == 0)){
|
|
|
142 |
EPR "%c%s", ch, c->name);
|
|
|
143 |
c++;
|
|
|
144 |
} else if(c->flags&Table)
|
|
|
145 |
EPR "%c%s", ch, c->name);
|
|
|
146 |
else if(c->flags&From)
|
|
|
147 |
EPR "%c%s(from)", ch, c->name);
|
|
|
148 |
else
|
|
|
149 |
EPR "%c%s(to)", ch, c->name);
|
|
|
150 |
if(verbose)
|
|
|
151 |
EPR "\t%s\n", c->chatter);
|
|
|
152 |
}
|
|
|
153 |
if(!verbose)
|
|
|
154 |
EPR "\n");
|
|
|
155 |
}
|
|
|
156 |
|
|
|
157 |
|
|
|
158 |
struct convert *
|
|
|
159 |
conv(char *name, int from)
|
|
|
160 |
{
|
|
|
161 |
struct convert *c;
|
|
|
162 |
|
|
|
163 |
for(c = convert; c->name; c++){
|
|
|
164 |
if(cistrcmp(c->name, name) != 0)
|
|
|
165 |
continue;
|
|
|
166 |
if(c->flags&Table)
|
|
|
167 |
return(c);
|
|
|
168 |
if(((c->flags&From) == 0) == (from == 0))
|
|
|
169 |
return(c);
|
|
|
170 |
}
|
|
|
171 |
EPR "%s: charset `%s' unknown\n", argv0, name);
|
|
|
172 |
EXIT(1, "unknown character set");
|
|
|
173 |
return(0); /* just shut the compiler up */
|
|
|
174 |
}
|
|
|
175 |
|
|
|
176 |
void
|
|
|
177 |
swab2(char *b, int n)
|
|
|
178 |
{
|
|
|
179 |
char *e, p;
|
|
|
180 |
|
|
|
181 |
for(e = b+n; b < e; b++){
|
|
|
182 |
p = *b;
|
|
|
183 |
*b = b[1];
|
|
|
184 |
*++b = p;
|
|
|
185 |
}
|
|
|
186 |
}
|
|
|
187 |
|
|
|
188 |
void
|
|
|
189 |
unicode_in(int fd, long *notused, struct convert *out)
|
|
|
190 |
{
|
|
|
191 |
Rune buf[N];
|
|
|
192 |
int n;
|
|
|
193 |
int swabme;
|
|
|
194 |
|
|
|
195 |
USED(notused);
|
|
|
196 |
if(read(fd, (char *)buf, 2) != 2)
|
|
|
197 |
return;
|
|
|
198 |
ninput += 2;
|
|
|
199 |
switch(buf[0])
|
|
|
200 |
{
|
|
|
201 |
default:
|
|
|
202 |
OUT(out, buf, 1);
|
|
|
203 |
case 0xFEFF:
|
|
|
204 |
swabme = 0;
|
|
|
205 |
break;
|
|
|
206 |
case 0xFFFE:
|
|
|
207 |
swabme = 1;
|
|
|
208 |
break;
|
|
|
209 |
}
|
|
|
210 |
while((n = read(fd, (char *)buf, 2*N)) > 0){
|
|
|
211 |
ninput += n;
|
|
|
212 |
if(swabme)
|
|
|
213 |
swab2((char *)buf, n);
|
|
|
214 |
if(n&1){
|
|
|
215 |
if(squawk)
|
|
|
216 |
EPR "%s: odd byte count in %s\n", argv0, file);
|
|
|
217 |
nerrors++;
|
|
|
218 |
if(clean)
|
|
|
219 |
n--;
|
|
|
220 |
else
|
|
|
221 |
buf[n++/2] = Runeerror;
|
|
|
222 |
}
|
|
|
223 |
OUT(out, buf, n/2);
|
|
|
224 |
}
|
|
|
225 |
OUT(out, buf, 0);
|
|
|
226 |
}
|
|
|
227 |
|
|
|
228 |
void
|
|
|
229 |
unicode_in_be(int fd, long *notused, struct convert *out)
|
|
|
230 |
{
|
|
|
231 |
int i, n;
|
|
|
232 |
Rune buf[N], r;
|
|
|
233 |
uchar *p;
|
|
|
234 |
|
|
|
235 |
USED(notused);
|
|
|
236 |
while((n = read(fd, (char *)buf, 2*N)) > 0){
|
|
|
237 |
ninput += n;
|
|
|
238 |
p = (uchar*)buf;
|
|
|
239 |
for(i=0; i<n/2; i++){
|
|
|
240 |
r = *p++<<8;
|
|
|
241 |
r |= *p++;
|
|
|
242 |
buf[i] = r;
|
|
|
243 |
}
|
|
|
244 |
if(n&1){
|
|
|
245 |
if(squawk)
|
|
|
246 |
EPR "%s: odd byte count in %s\n", argv0, file);
|
|
|
247 |
nerrors++;
|
|
|
248 |
if(clean)
|
|
|
249 |
n--;
|
|
|
250 |
else
|
|
|
251 |
buf[n++/2] = Runeerror;
|
|
|
252 |
}
|
|
|
253 |
OUT(out, buf, n/2);
|
|
|
254 |
}
|
|
|
255 |
OUT(out, buf, 0);
|
|
|
256 |
}
|
|
|
257 |
|
|
|
258 |
void
|
|
|
259 |
unicode_in_le(int fd, long *notused, struct convert *out)
|
|
|
260 |
{
|
|
|
261 |
int i, n;
|
|
|
262 |
Rune buf[N], r;
|
|
|
263 |
uchar *p;
|
|
|
264 |
|
|
|
265 |
USED(notused);
|
|
|
266 |
while((n = read(fd, (char *)buf, 2*N)) > 0){
|
|
|
267 |
ninput += n;
|
|
|
268 |
p = (uchar*)buf;
|
|
|
269 |
for(i=0; i<n/2; i++){
|
|
|
270 |
r = *p++;
|
|
|
271 |
r |= *p++<<8;
|
|
|
272 |
buf[i] = r;
|
|
|
273 |
}
|
|
|
274 |
if(n&1){
|
|
|
275 |
if(squawk)
|
|
|
276 |
EPR "%s: odd byte count in %s\n", argv0, file);
|
|
|
277 |
nerrors++;
|
|
|
278 |
if(clean)
|
|
|
279 |
n--;
|
|
|
280 |
else
|
|
|
281 |
buf[n++/2] = Runeerror;
|
|
|
282 |
}
|
|
|
283 |
OUT(out, buf, n/2);
|
|
|
284 |
}
|
|
|
285 |
OUT(out, buf, 0);
|
|
|
286 |
}
|
|
|
287 |
|
|
|
288 |
void
|
|
|
289 |
unicode_out(Rune *base, int n, long *notused)
|
|
|
290 |
{
|
|
|
291 |
static int first = 1;
|
|
|
292 |
|
|
|
293 |
USED(notused);
|
|
|
294 |
nrunes += n;
|
|
|
295 |
if(first){
|
|
|
296 |
unsigned short x = 0xFEFF;
|
|
|
297 |
noutput += 2;
|
|
|
298 |
write(1, (char *)&x, 2);
|
|
|
299 |
first = 0;
|
|
|
300 |
}
|
|
|
301 |
noutput += 2*n;
|
|
|
302 |
write(1, (char *)base, 2*n);
|
|
|
303 |
}
|
|
|
304 |
|
|
|
305 |
void
|
|
|
306 |
unicode_out_be(Rune *base, int n, long *notused)
|
|
|
307 |
{
|
|
|
308 |
int i;
|
|
|
309 |
uchar *p;
|
|
|
310 |
Rune r;
|
|
|
311 |
|
|
|
312 |
USED(notused);
|
|
|
313 |
p = (uchar*)base;
|
|
|
314 |
for(i=0; i<n; i++){
|
|
|
315 |
r = base[i];
|
|
|
316 |
*p++ = r>>8;
|
|
|
317 |
*p++ = r;
|
|
|
318 |
}
|
|
|
319 |
nrunes += n;
|
|
|
320 |
noutput += 2*n;
|
|
|
321 |
write(1, (char *)base, 2*n);
|
|
|
322 |
}
|
|
|
323 |
|
|
|
324 |
void
|
|
|
325 |
unicode_out_le(Rune *base, int n, long *notused)
|
|
|
326 |
{
|
|
|
327 |
int i;
|
|
|
328 |
uchar *p;
|
|
|
329 |
Rune r;
|
|
|
330 |
|
|
|
331 |
USED(notused);
|
|
|
332 |
p = (uchar*)base;
|
|
|
333 |
for(i=0; i<n; i++){
|
|
|
334 |
r = base[i];
|
|
|
335 |
*p++ = r;
|
|
|
336 |
*p++ = r>>8;
|
|
|
337 |
}
|
|
|
338 |
nrunes += n;
|
|
|
339 |
noutput += 2*n;
|
|
|
340 |
write(1, (char *)base, 2*n);
|
|
|
341 |
}
|
|
|
342 |
|
|
|
343 |
void
|
|
|
344 |
intable(int fd, long *table, struct convert *out)
|
|
|
345 |
{
|
|
|
346 |
uchar buf[N];
|
|
|
347 |
uchar *p, *e;
|
|
|
348 |
Rune *r;
|
|
|
349 |
int n;
|
|
|
350 |
long c;
|
|
|
351 |
|
|
|
352 |
while((n = read(fd, (char *)buf, N)) > 0){
|
|
|
353 |
ninput += n;
|
|
|
354 |
r = runes;
|
|
|
355 |
for(p = buf, e = buf+n; p < e; p++){
|
|
|
356 |
c = table[*p];
|
|
|
357 |
if(c < 0){
|
|
|
358 |
if(squawk)
|
|
|
359 |
EPR "%s: bad char 0x%x near byte %ld in %s\n", argv0, *p, ninput+(p-e), file);
|
|
|
360 |
nerrors++;
|
|
|
361 |
if(clean)
|
|
|
362 |
continue;
|
|
|
363 |
c = BADMAP;
|
|
|
364 |
}
|
|
|
365 |
*r++ = c;
|
|
|
366 |
}
|
|
|
367 |
OUT(out, runes, r-runes);
|
|
|
368 |
}
|
|
|
369 |
OUT(out, runes, 0);
|
|
|
370 |
if(n < 0){
|
|
|
371 |
#ifdef PLAN9
|
|
|
372 |
EPR "%s: input read: %r\n", argv0);
|
|
|
373 |
#else
|
|
|
374 |
EPR "%s: input read: %s\n", argv0, strerror(errno));
|
|
|
375 |
#endif
|
|
|
376 |
EXIT(1, "input read error");
|
|
|
377 |
}
|
|
|
378 |
}
|
|
|
379 |
|
|
|
380 |
void
|
|
|
381 |
outtable(Rune *base, int n, long *map)
|
|
|
382 |
{
|
|
|
383 |
long c;
|
|
|
384 |
char *p;
|
|
|
385 |
int i;
|
|
|
386 |
|
|
|
387 |
nrunes += n;
|
|
|
388 |
for(i = 0; i < NRUNE; i++)
|
|
|
389 |
tab[i] = -1;
|
|
|
390 |
for(i = 0; i < 256; i++)
|
|
|
391 |
if(map[i] >= 0)
|
|
|
392 |
tab[map[i]] = i;
|
|
|
393 |
for(i = 0, p = obuf; i < n; i++){
|
|
|
394 |
c = tab[base[i]];
|
|
|
395 |
if(c < 0){
|
|
|
396 |
if(squawk)
|
|
|
397 |
EPR "%s: rune 0x%x not in output cs\n", argv0, base[i]);
|
|
|
398 |
nerrors++;
|
|
|
399 |
if(clean)
|
|
|
400 |
continue;
|
|
|
401 |
c = BADMAP;
|
|
|
402 |
}
|
|
|
403 |
*p++ = c;
|
|
|
404 |
}
|
|
|
405 |
noutput += p-obuf;
|
|
|
406 |
write(1, obuf, p-obuf);
|
|
|
407 |
}
|
|
|
408 |
|
|
|
409 |
long tabascii[256] =
|
|
|
410 |
{
|
|
|
411 |
0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,0x08,0x09,0x0a,0x0b,0x0c,0x0d,0x0e,0x0f,
|
|
|
412 |
0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,0x18,0x19,0x1a,0x1b,0x1c,0x1d,0x1e,0x1f,
|
|
|
413 |
0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,0x28,0x29,0x2a,0x2b,0x2c,0x2d,0x2e,0x2f,
|
|
|
414 |
0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,0x38,0x39,0x3a,0x3b,0x3c,0x3d,0x3e,0x3f,
|
|
|
415 |
0x40,0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4a,0x4b,0x4c,0x4d,0x4e,0x4f,
|
|
|
416 |
0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5a,0x5b,0x5c,0x5d,0x5e,0x5f,
|
|
|
417 |
0x60,0x61,0x62,0x63,0x64,0x65,0x66,0x67,0x68,0x69,0x6a,0x6b,0x6c,0x6d,0x6e,0x6f,
|
|
|
418 |
0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,0x78,0x79,0x7a,0x7b,0x7c,0x7d,0x7e,0x7f,
|
|
|
419 |
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
|
|
420 |
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
|
|
421 |
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
|
|
422 |
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
|
|
423 |
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
|
|
424 |
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
|
|
425 |
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
|
|
426 |
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
|
|
|
427 |
};
|
|
|
428 |
|
|
|
429 |
long tabmsdos[256] = /* from jhelling@cs.ruu.nl (Jeroen Hellingman) */
|
|
|
430 |
{
|
|
|
431 |
0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,0x08,0x09,0x0a,0x0b,0x0c,0x0d,0x0e,0x0f,
|
|
|
432 |
0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,0x18,0x19,0x1a,0x1b,0x1c,0x1d,0x1e,0x1f,
|
|
|
433 |
0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,0x28,0x29,0x2a,0x2b,0x2c,0x2d,0x2e,0x2f,
|
|
|
434 |
0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,0x38,0x39,0x3a,0x3b,0x3c,0x3d,0x3e,0x3f,
|
|
|
435 |
0x40,0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4a,0x4b,0x4c,0x4d,0x4e,0x4f,
|
|
|
436 |
0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5a,0x5b,0x5c,0x5d,0x5e,0x5f,
|
|
|
437 |
0x60,0x61,0x62,0x63,0x64,0x65,0x66,0x67,0x68,0x69,0x6a,0x6b,0x6c,0x6d,0x6e,0x6f,
|
|
|
438 |
0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,0x78,0x79,0x7a,0x7b,0x7c,0x7d,0x7e,0x7f,
|
|
|
439 |
0x00c7, 0x00fc, 0x00e9, 0x00e2, 0x00e4, 0x00e0, 0x00e5, 0x00e7, /* latin */
|
|
|
440 |
0x00ea, 0x00eb, 0x00e8, 0x00ef, 0x00ee, 0x00ec, 0x00c4, 0x00c5,
|
|
|
441 |
0x00c9, 0x00e6, 0x00c6, 0x00f4, 0x00f6, 0x00f2, 0x00fb, 0x00f9,
|
|
|
442 |
0x00ff, 0x00d6, 0x00dc, 0x00a2, 0x00a3, 0x00a5, 0x20a7, 0x0192,
|
|
|
443 |
0x00e1, 0x00ed, 0x00f3, 0x00fa, 0x00f1, 0x00d1, 0x00aa, 0x00ba,
|
|
|
444 |
0x00bf, 0x2310, 0x00ac, 0x00bd, 0x00bc, 0x00a1, 0x00ab, 0x00bb,
|
|
|
445 |
0x2591, 0x2592, 0x2593, 0x2502, 0x2524, 0x2561, 0x2562, 0x2556, /* forms */
|
|
|
446 |
0x2555, 0x2563, 0x2551, 0x2557, 0x255d, 0x255c, 0x255b, 0x2510,
|
|
|
447 |
0x2514, 0x2534, 0x252c, 0x251c, 0x2500, 0x253c, 0x255e, 0x255f,
|
|
|
448 |
0x255a, 0x2554, 0x2569, 0x2566, 0x2560, 0x2550, 0x256c, 0x2567,
|
|
|
449 |
0x2568, 0x2564, 0x2565, 0x2559, 0x2558, 0x2552, 0x2553, 0x256b,
|
|
|
450 |
0x256a, 0x2518, 0x250c, 0x2588, 0x2584, 0x258c, 0x2590, 0x2580,
|
|
|
451 |
0x03b1, 0x00df, 0x0393, 0x03c0, 0x03a3, 0x03c3, 0x00b5, 0x03c4, /* greek */
|
|
|
452 |
0x03a6, 0x0398, 0x2126, 0x03b4, 0x221e, 0x2205, 0x2208, 0x2229,
|
|
|
453 |
0x2261, 0x00b1, 0x2265, 0x2264, 0x2320, 0x2321, 0x00f7, 0x2248, /* math */
|
|
|
454 |
0x00b0, 0x2022, 0x00b7, 0x221a, 0x207f, 0x00b2, 0x220e, 0x00a0,
|
|
|
455 |
};
|
|
|
456 |
long tabmsdos2[256] = /* from jhelling@cs.ruu.nl (Jeroen Hellingman) */
|
|
|
457 |
{
|
|
|
458 |
0x0000, 0x263a, 0x263b, 0x2665, 0x2666, 0x2663, 0x2660, 0x2022,
|
|
|
459 |
0x25d8, 0x25cb, 0x25d9, 0x2642, 0x2640, 0x266a, 0x266b, 0x263c,
|
|
|
460 |
0x25b6, 0x25c0, 0x2195, 0x203c, 0x00b6, 0x00a7, 0x2043, 0x21a8,
|
|
|
461 |
0x2191, 0x2193, 0x2192, 0x2190, 0x2319, 0x2194, 0x25b2, 0x25bc,
|
|
|
462 |
0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,0x28,0x29,0x2a,0x2b,0x2c,0x2d,0x2e,0x2f,
|
|
|
463 |
0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,0x38,0x39,0x3a,0x3b,0x3c,0x3d,0x3e,0x3f,
|
|
|
464 |
0x40,0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4a,0x4b,0x4c,0x4d,0x4e,0x4f,
|
|
|
465 |
0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5a,0x5b,0x5c,0x5d,0x5e,0x5f,
|
|
|
466 |
0x60,0x61,0x62,0x63,0x64,0x65,0x66,0x67,0x68,0x69,0x6a,0x6b,0x6c,0x6d,0x6e,0x6f,
|
|
|
467 |
0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,0x78,0x79,0x7a,0x7b,0x7c,0x7d,0x7e,0x7f,
|
|
|
468 |
0x00c7, 0x00fc, 0x00e9, 0x00e2, 0x00e4, 0x00e0, 0x00e5, 0x00e7, /* latin */
|
|
|
469 |
0x00ea, 0x00eb, 0x00e8, 0x00ef, 0x00ee, 0x00ec, 0x00c4, 0x00c5,
|
|
|
470 |
0x00c9, 0x00e6, 0x00c6, 0x00f4, 0x00f6, 0x00f2, 0x00fb, 0x00f9,
|
|
|
471 |
0x00ff, 0x00d6, 0x00dc, 0x00a2, 0x00a3, 0x00a5, 0x20a7, 0x0192,
|
|
|
472 |
0x00e1, 0x00ed, 0x00f3, 0x00fa, 0x00f1, 0x00d1, 0x00aa, 0x00ba,
|
|
|
473 |
0x00bf, 0x2310, 0x00ac, 0x00bd, 0x00bc, 0x00a1, 0x00ab, 0x00bb,
|
|
|
474 |
0x2591, 0x2592, 0x2593, 0x2502, 0x2524, 0x2561, 0x2562, 0x2556, /* forms */
|
|
|
475 |
0x2555, 0x2563, 0x2551, 0x2557, 0x255d, 0x255c, 0x255b, 0x2510,
|
|
|
476 |
0x2514, 0x2534, 0x252c, 0x251c, 0x2500, 0x253c, 0x255e, 0x255f,
|
|
|
477 |
0x255a, 0x2554, 0x2569, 0x2566, 0x2560, 0x2550, 0x256c, 0x2567,
|
|
|
478 |
0x2568, 0x2564, 0x2565, 0x2559, 0x2558, 0x2552, 0x2553, 0x256b,
|
|
|
479 |
0x256a, 0x2518, 0x250c, 0x2588, 0x2584, 0x258c, 0x2590, 0x2580,
|
|
|
480 |
0x03b1, 0x00df, 0x0393, 0x03c0, 0x03a3, 0x03c3, 0x00b5, 0x03c4, /* greek */
|
|
|
481 |
0x03a6, 0x0398, 0x2126, 0x03b4, 0x221e, 0x2205, 0x2208, 0x2229,
|
|
|
482 |
0x2261, 0x00b1, 0x2265, 0x2264, 0x2320, 0x2321, 0x00f7, 0x2248, /* math */
|
|
|
483 |
0x00b0, 0x2022, 0x00b7, 0x221a, 0x207f, 0x00b2, 0x220e, 0x00a0,
|
|
|
484 |
};
|
|
|
485 |
struct convert convert[] =
|
|
|
486 |
{ /* if two entries have the same name, put the from one first */
|
|
|
487 |
{ "8859-1", "Latin-1 (Western and Northern Europe including Italian)", Table, (void *)tab8859_1 },
|
|
|
488 |
{ "8859-2", "Latin-2 (Eastern Europe except Turkey and the Baltic countries)", Table, (void *)tab8859_2 },
|
|
|
489 |
{ "8859-3", "Latin-3 (Mediterranean, South Africa, Esperanto)", Table, (void *)tab8859_3 },
|
|
|
490 |
{ "8859-4", "Latin-4 (Scandinavia and the Baltic countries; obsolete)", Table, (void *)tab8859_4 },
|
|
|
491 |
{ "8859-5", "Part 5 (Cyrillic)", Table, (void *)tab8859_5 },
|
|
|
492 |
{ "8859-6", "Part 6 (Arabic)", Table, (void *)tab8859_6 },
|
|
|
493 |
{ "8859-7", "Part 7 (Greek)", Table, (void *)tab8859_7 },
|
|
|
494 |
{ "8859-8", "Part 8 (Hebrew)", Table, (void *)tab8859_8 },
|
|
|
495 |
{ "8859-9", "Latin-5 (Turkey, Western Europe except Icelandic and Faroese)", Table, (void *)tab8859_9 },
|
|
|
496 |
{ "8859-10", "Latin-6 (Northern Europe)", Table, (void *)tab8859_10 },
|
|
|
497 |
{ "8859-15", "Latin-9 (Western Europe)", Table, (void *)tab8859_15 },
|
|
|
498 |
{ "ascii", "7-bit ASCII", Table, (void *)tabascii },
|
|
|
499 |
{ "atari", "ATARI-ST character set", Table, (void *)tabatari },
|
|
|
500 |
{ "av", "Alternativnyj Variant", Table, (void *)tabav },
|
|
|
501 |
{ "big5", "Big 5 (HKU)", From|Func, 0, (Fnptr)big5_in },
|
|
|
502 |
{ "big5", "Big 5 (HKU)", Func, 0, (Fnptr)big5_out },
|
|
|
503 |
{ "ebcdic", "EBCDIC", Table, (void *)tabebcdic }, /* 6f is recommended bad map */
|
|
|
504 |
{ "euc-k", "Korean EUC: ASCII+KS C 5601 1987", From|Func, 0, (Fnptr)uksc_in },
|
|
|
505 |
{ "euc-k", "Korean EUC: ASCII+KS C 5601 1987", Func, 0, (Fnptr)uksc_out },
|
|
|
506 |
{ "euc-kr", "Korean EUC: ASCII+KS C 5601 1987", From|Func, 0, (Fnptr)uksc_in },
|
|
|
507 |
{ "euc-kr", "Korean EUC: ASCII+KS C 5601 1987", Func, 0, (Fnptr)uksc_out },
|
|
|
508 |
{ "ks_c_5601-1987", "Korean EUC: ASCII+KS C 5601 1987", From|Func, 0, (Fnptr)uksc_in },
|
|
|
509 |
{ "ks_c_5601-1987", "Korean EUC: ASCII+KS C 5601 1987", Func, 0, (Fnptr)uksc_out },
|
|
|
510 |
{ "gb2312", "GB2312-80 (Chinese)", From|Func, 0, (Fnptr)gb_in },
|
|
|
511 |
{ "gb2312", "GB2312-80 (Chinese)", Func, 0, (Fnptr)gb_out },
|
|
|
512 |
{ "gbk", "GBK (Chinese)", From|Func, 0, (Fnptr)gbk_in },
|
|
|
513 |
{ "gbk", "GBK (Chinese)", Func, 0, (Fnptr)gbk_out },
|
|
|
514 |
{ "html", "HTML", From|Func, 0, (Fnptr)html_in },
|
|
|
515 |
{ "html", "HTML", Func, 0, (Fnptr)html_out },
|
|
|
516 |
{ "ibm437", "IBM Code Page 437 (US)", Table, (void*)tabcp437 },
|
|
|
517 |
{ "ibm720", "IBM Code Page 720 (Arabic)", Table, (void*)tabcp720 },
|
|
|
518 |
{ "ibm737", "IBM Code Page 737 (Greek)", Table, (void*)tabcp737 },
|
|
|
519 |
{ "ibm775", "IBM Code Page 775 (Baltic)", Table, (void*)tabcp775 },
|
|
|
520 |
{ "ibm850", "IBM Code Page 850 (Multilingual Latin I)", Table, (void*)tabcp850 },
|
|
|
521 |
{ "ibm852", "IBM Code Page 852 (Latin II)", Table, (void*)tabcp852 },
|
|
|
522 |
{ "ibm855", "IBM Code Page 855 (Cyrillic)", Table, (void*)tabcp855 },
|
|
|
523 |
{ "ibm857", "IBM Code Page 857 (Turkish)", Table, (void*)tabcp857 },
|
|
|
524 |
{ "ibm858", "IBM Code Page 858 (Multilingual Latin I+Euro)", Table, (void*)tabcp858 },
|
|
|
525 |
{ "ibm862", "IBM Code Page 862 (Hebrew)", Table, (void*)tabcp862 },
|
|
|
526 |
{ "ibm866", "IBM Code Page 866 (Russian)", Table, (void*)tabcp866 },
|
|
|
527 |
{ "ibm874", "IBM Code Page 874 (Thai)", Table, (void*)tabcp874 },
|
|
|
528 |
{ "iso-2022-jp", "alias for jis-kanji (MIME)", From|Func, 0, (Fnptr)jisjis_in },
|
|
|
529 |
{ "iso-2022-jp", "alias for jis-kanji (MIME)", Func, 0, (Fnptr)jisjis_out },
|
|
|
530 |
{ "iso-8859-1", "alias for 8859-1 (MIME)", Table, (void *)tab8859_1 },
|
|
|
531 |
{ "iso-8859-2", "alias for 8859-2 (MIME)", Table, (void *)tab8859_2 },
|
|
|
532 |
{ "iso-8859-3", "alias for 8859-3 (MIME)", Table, (void *)tab8859_3 },
|
|
|
533 |
{ "iso-8859-4", "alias for 8859-4 (MIME)", Table, (void *)tab8859_4 },
|
|
|
534 |
{ "iso-8859-5", "alias for 8859-5 (MIME)", Table, (void *)tab8859_5 },
|
|
|
535 |
{ "iso-8859-6", "alias for 8859-6 (MIME)", Table, (void *)tab8859_6 },
|
|
|
536 |
{ "iso-8859-7", "alias for 8859-7 (MIME)", Table, (void *)tab8859_7 },
|
|
|
537 |
{ "iso-8859-8", "alias for 8859-8 (MIME)", Table, (void *)tab8859_8 },
|
|
|
538 |
{ "iso-8859-9", "alias for 8859-9 (MIME)", Table, (void *)tab8859_9 },
|
|
|
539 |
{ "iso-8859-10", "alias for 8859-10 (MIME)", Table, (void *)tab8859_10 },
|
|
|
540 |
{ "iso-8859-15", "alias for 8859-15 (MIME)", Table, (void *)tab8859_15 },
|
|
|
541 |
{ "jis", "guesses at the JIS encoding", From|Func, 0, (Fnptr)jis_in },
|
|
|
542 |
{ "jis-kanji", "ISO 2022-JP (Japanese)", From|Func, 0, (Fnptr)jisjis_in },
|
|
|
543 |
{ "jis-kanji", "ISO 2022-JP (Japanese)", Func, 0, (Fnptr)jisjis_out },
|
|
|
544 |
{ "koi8", "KOI-8 (GOST 19769-74)", Table, (void *)tabkoi8 },
|
|
|
545 |
{ "koi8-r", "alias for koi8 (MIME)", Table, (void *)tabkoi8 },
|
|
|
546 |
{ "latin1", "alias for 8859-1", Table, (void *)tab8859_1 },
|
|
|
547 |
{ "macrom", "Macintosh Standard Roman character set", Table, (void *)tabmacroman },
|
|
|
548 |
{ "microsoft", "alias for windows1252", Table, (void *)tabcp1252 },
|
|
|
549 |
{ "ms-kanji", "Microsoft, or Shift-JIS", From|Func, 0, (Fnptr)msjis_in },
|
|
|
550 |
{ "ms-kanji", "Microsoft, or Shift-JIS", Func, 0, (Fnptr)msjis_out },
|
|
|
551 |
{ "msdos", "IBM PC (alias for ibm437)", Table, (void *)tabcp437 },
|
|
|
552 |
{ "msdos2", "IBM PC (ibm437 with graphics in C0)", Table, (void *)tabmsdos2 },
|
|
|
553 |
{ "next", "NEXTSTEP character set", Table, (void *)tabnextstep },
|
|
|
554 |
{ "ov", "Osnovnoj Variant", Table, (void *)tabov },
|
|
|
555 |
{ "ps2", "IBM PS/2: (alias for ibm850)", Table, (void *)tabcp850 },
|
|
|
556 |
{ "sf1", "ISO-646: Finnish/Swedish SF-1 variant", Table, (void *)tabsf1 },
|
|
|
557 |
{ "sf2", "ISO-646: Finnish/Swedish SF-2 variant (recommended)", Table, (void *)tabsf2 },
|
|
|
558 |
{ "tis-620", "Thai+ASCII (TIS 620-1986)", Table, (void *)tabtis620 },
|
|
|
559 |
{ "tune", "TUNE (Tamil)", From|Func, 0, (Fnptr)tune_in },
|
|
|
560 |
{ "tune", "TUNE (Tamil)", Func, 0, (Fnptr)tune_out },
|
|
|
561 |
{ "ucode", "Russian U-code", Table, (void *)tabucode },
|
|
|
562 |
{ "ujis", "EUC-JX: JIS 0208", From|Func, 0, (Fnptr)ujis_in },
|
|
|
563 |
{ "ujis", "EUC-JX: JIS 0208", Func, 0, (Fnptr)ujis_out },
|
|
|
564 |
{ "unicode", "Unicode 1.1", From|Func, 0, (Fnptr)unicode_in },
|
|
|
565 |
{ "unicode", "Unicode 1.1", Func, 0, (Fnptr)unicode_out },
|
|
|
566 |
{ "unicode-be", "Unicode 1.1 big-endian", From|Func, 0, (Fnptr)unicode_in_be },
|
|
|
567 |
{ "unicode-be", "Unicode 1.1 big-endian", Func, 0, (Fnptr)unicode_out_be },
|
|
|
568 |
{ "unicode-le", "Unicode 1.1 little-endian", From|Func, 0, (Fnptr)unicode_in_le },
|
|
|
569 |
{ "unicode-le", "Unicode 1.1 little-endian", Func, 0, (Fnptr)unicode_out_le },
|
|
|
570 |
{ "us-ascii", "alias for ascii (MIME)", Table, (void *)tabascii },
|
|
|
571 |
{ "utf", "FSS-UTF a.k.a. UTF-8", From|Func, 0, (Fnptr)utf_in },
|
|
|
572 |
{ "utf", "FSS-UTF a.k.a. UTF-8", Func, 0, (Fnptr)utf_out },
|
|
|
573 |
{ "utf1", "UTF-1 (ISO 10646 Annex A)", From|Func, 0, (Fnptr)isoutf_in },
|
|
|
574 |
{ "utf1", "UTF-1 (ISO 10646 Annex A)", Func, 0, (Fnptr)isoutf_out },
|
|
|
575 |
{ "utf-8", "alias for utf (MIME)", From|Func, 0, (Fnptr)utf_in },
|
|
|
576 |
{ "utf-8", "alias for utf (MIME)", Func, 0, (Fnptr)utf_out },
|
|
|
577 |
{ "utf-16", "alias for unicode (MIME)", From|Func, 0, (Fnptr)unicode_in },
|
|
|
578 |
{ "utf-16", "alias for unicode (MIME)", Func, 0, (Fnptr)unicode_out },
|
|
|
579 |
{ "utf-16be", "alias for unicode-be (MIME)", From|Func, 0, (Fnptr)unicode_in_be },
|
|
|
580 |
{ "utf-16be", "alias for unicode-be (MIME)", Func, 0, (Fnptr)unicode_out_be },
|
|
|
581 |
{ "utf-16le", "alias for unicode-le (MIME)", From|Func, 0, (Fnptr)unicode_in_le },
|
|
|
582 |
{ "utf-16le", "alias for unicode-le (MIME)", Func, 0, (Fnptr)unicode_out_le },
|
|
|
583 |
{ "viet1", "Vietnamese VSCII-1 (1993)", Table, (void *)tabviet1 },
|
|
|
584 |
{ "viet2", "Vietnamese VSCII-2 (1993)", Table, (void *)tabviet2 },
|
|
|
585 |
{ "vscii", "Vietnamese VISCII 1.1 (1992)", Table, (void *)tabviscii },
|
|
|
586 |
{ "windows-1250", "Windows Code Page 1250 (Central Europe)", Table, (void *)tabcp1250 },
|
|
|
587 |
{ "windows-1251", "Windows Code Page 1251 (Cyrillic)", Table, (void *)tabcp1251 },
|
|
|
588 |
{ "windows-1252", "Windows Code Page 1252 (Latin I)", Table, (void *)tabcp1252 },
|
|
|
589 |
{ "windows-1253", "Windows Code Page 1253 (Greek)", Table, (void *)tabcp1253 },
|
|
|
590 |
{ "windows-1254", "Windows Code Page 1254 (Turkish)", Table, (void *)tabcp1254 },
|
|
|
591 |
{ "windows-1255", "Windows Code Page 1255 (Hebrew)", Table, (void *)tabcp1255 },
|
|
|
592 |
{ "windows-1256", "Windows Code Page 1256 (Arabic)", Table, (void *)tabcp1256 },
|
|
|
593 |
{ "windows-1257", "Windows Code Page 1257 (Baltic)", Table, (void *)tabcp1257 },
|
|
|
594 |
{ "windows-1258", "Windows Code Page 1258 (Vietnam)", Table, (void *)tabcp1258 },
|
|
|
595 |
{ 0 },
|
|
|
596 |
};
|