Subversion Repositories planix.SVN

Rev

Blame | Last modification | View Log | RSS feed

#include <stdlib.h>
#include <limits.h>

/*
 * Use the FSS-UTF transformation proposed by posix.
 *      We define 7 byte types:
 *      T0      0xxxxxxx        7 free bits
 *      Tx      10xxxxxx        6 free bits
 *      T1      110xxxxx        5 free bits
 *      T2      1110xxxx        4 free bits
 *      T3      11110xxx        3 free bits
 *      T4      111110xx        2 free bits
 *      T5      1111110x        1 free bit
 *
 *      Encoding is as follows.
 *      From hex        Thru hex        Sequence                Bits
 *      00000000        0000007F        T0                              7
 *      00000080        000007FF        T1 Tx                   11
 *      00000800        0000FFFF        T2 Tx Tx                        16
 *      00010000        001FFFFF        T3 Tx Tx Tx             21
 *      00200000        03FFFFFF        T4 Tx Tx Tx Tx          26
 *      04000000        7FFFFFFF        T5 Tx Tx  Tx Tx Tx      31
 */
int
mbtowc(wchar_t *pwc, const char *s, size_t n);

int
mblen(const char *s, size_t n)
{
        return mbtowc(0, s, n);
}

enum {
        C0MSK = 0x7F,
        C1MSK = 0x7FF,
        T1 = 0xC0,
        T2 = 0xE0,
        NT1BITS = 11,
        NSHFT = 5,
        NCSHFT = NSHFT + 1,
        WCHARMSK = (1<< (8*MB_LEN_MAX - 1)) - 1,
};

int
mbtowc(wchar_t *pwc, const char *s, size_t n)
{
        unsigned long long c[MB_LEN_MAX];
        unsigned long long l, m, wm, b;
        int i;

        if(!s)
                return 0;

        if(n < 1)
                goto bad;

        c[0] = s[0] & 0xff;             /* first one is special */
        if((c[0] & 0x80) == 0x00) {
                if(pwc)
                        *pwc = c[0];
                if(c[0] == 0)
                        return 0;
                return 1;
        }

        m = T2;
        b = m^0x20;
        l = c[0];
        wm = C1MSK;
        for(i = 1; i < MB_LEN_MAX + 1; i++){
                if(n < i+1)
                        goto bad;
                c[i] = (s[i] ^ 0x80) & 0xff;
                l = (l << NCSHFT) | c[i];
                if((c[i] & 0xC0) != 0x00)
                        goto bad;
                if((c[0] & m) == b) {
                        if(pwc)
                                *pwc = l & wm;
                        return i + 1;
                }
                b = m;
                m = (m >> 1) | 0x80;
                wm = (wm << NSHFT) | wm;
        }

        /*
         * bad decoding
         */
bad:
        return -1;

}

int
wctomb(char *s, wchar_t wchar)
{
        unsigned long long c, maxc, m;
        int i, j;

        if(!s)
                return 0;

        maxc = 0x80;
        c = wchar & WCHARMSK;
        if(c < maxc) {
                s[0] = c;
                return 1;
        }

        m = T1;
        for(i = 2; i < MB_LEN_MAX + 1; i++){
                maxc <<= 4;
                if(c < maxc || i == MB_LEN_MAX){
                        s[0] = m | (c >> ((i - 1) * NCSHFT));
                        for(j = i - 1; j >= 1; j--){
                                s[i - j] = 0x80|((c>>(6 * (j - 1)))&0x3f);
                        }
                        return i;
                }
                m = (m >> 1) | 0x80;
        }
        return MB_LEN_MAX;
}

size_t
mbstowcs(wchar_t *pwcs, const char *s, size_t n)
{
        int i, d, c;

        for(i=0; i < n; i++) {
                c = *s & 0xff;
                if(c < 0x80) {
                        *pwcs = c;
                        if(c == 0)
                                break;
                        s++;
                } else {
                        d = mbtowc(pwcs, s, MB_LEN_MAX);
                        if(d <= 0)
                                return (size_t)((d<0) ? -1 : i);
                        s += d;
                }
                pwcs++;
        }
        return i;
}

size_t
wcstombs(char *s, const wchar_t *pwcs, size_t n)
{
        int i, d;
        long c;
        char *p, *pe;
        char buf[MB_LEN_MAX];

        p = s;
        pe = p+n-MB_LEN_MAX;
        while(p < pe) {
                c = *pwcs++;
                if(c < 0x80)
                        *p++ = c;
                else
                        p += wctomb(p, c);
                if(c == 0)
                        return p-s;
        }
        while(p < pe+MB_LEN_MAX) {
                c = *pwcs++;
                d = wctomb(buf, c);
                if(p+d <= pe+MB_LEN_MAX) {
                        *p++ = buf[0];          /* first one is special */
                        for(i = 2; i < MB_LEN_MAX + 1; i++){
                                if(d <= i -1)
                                        break;
                                *p++ = buf[i];
                        }
                }
                if(c == 0)
                        break;
        }
        return p-s;
}