Subversion Repositories planix.SVN

Rev

Blame | Last modification | View Log | RSS feed

#include <u.h>
#include <libc.h>
#include <draw.h>
#include <html.h>
#include "impl.h"

Rune* whitespace = L" \t\n\r";
Rune* notwhitespace = L"^ \t\n\r";

// All lists start out like List structure.
// List itself can be used as list of int.
int
_listlen(List* l)
{
        int n = 0;

        while(l != nil) {
                l = l->next;
                n++;
        }
        return n;
}

// Cons
List*
_newlist(int val, List* rest)
{
        List* ans;

        ans = (List*)emalloc(sizeof(List));
        ans->val = val;
        ans->next = rest;
        return ans;
}

// Reverse a list in place
List*
_revlist(List* l)
{
        List* newl;
        List* nextl;

        newl = nil;
        while(l != nil) {
                nextl = l->next;
                l->next = newl;
                newl = l;
                l = nextl;
        }
        return newl;
}

// The next few routines take a "character class" as argument.
//    e.g., "a-zA-Z", or "^ \t\n"
// (ranges indicated by - except in first position;
//  ^ is first position means "not in" the following class)

// Splitl splits s[0:n] just before first character of class cl.
// Answers go in (p1, n1) and (p2, n2).
// If no split, the whole thing goes in the first component.
// Note: answers contain pointers into original string.
void
_splitl(Rune* s, int n, Rune* cl, Rune** p1, int* n1, Rune** p2, int* n2)
{
        Rune* p;

        p = _Strnclass(s, cl, n);
        *p1 = s;
        if(p == nil) {
                *n1 = n;
                *p2 = nil;
                *n2 = 0;
        }
        else {
                *p2 = p;
                *n1 = p-s;
                *n2 = n-*n1;
        }
}

// Splitr splits s[0:n] just after last character of class cl.
// Answers go in (p1, n1) and (p2, n2).
// If no split, the whole thing goes in the last component.
// Note: answers contain pointers into original string.
void
_splitr(Rune* s, int n, Rune* cl, Rune** p1, int* n1, Rune** p2, int* n2)
{
        Rune* p;

        p = _Strnrclass(s, cl, n);
        if(p == nil) {
                *p1 = nil;
                *n1 = 0;
                *p2 = s;
                *n2 = n;
        }
        else {
                *p1 = s;
                *p2 = p+1;
                *n1 = *p2-s;
                *n2 = n-*n1;
        }
}

// Splitall splits s[0:n] into parts that are separated by characters from class cl.
// Each part will have nonzero length.
// At most alen parts are found, and pointers to their starts go into
// the strarr array, while their lengths go into the lenarr array.
// The return value is the number of parts found.
int
_splitall(Rune* s, int n, Rune* cl, Rune** strarr, int* lenarr, int alen)
{
        int i;
        Rune* p;
        Rune* q;
        Rune* slast;

        if(s == nil || n == 0)
                return 0;
        i = 0;
        p = s;
        slast = s+n;
        while(p < slast && i < alen) {
                while(p < slast && _inclass(*p, cl))
                        p++;
                if(p == slast)
                        break;
                q = _Strnclass(p, cl, slast-p);
                if(q == nil)
                        q = slast;
                assert(q > p && q <= slast);
                strarr[i] = p;
                lenarr[i] = q-p;
                i++;
                p = q;
        }
        return i;
}

// Find part of s that excludes leading and trailing whitespace,
// and return that part in *pans (and its length in *panslen).
void
_trimwhite(Rune* s, int n, Rune** pans, int* panslen)
{
        Rune* p;
        Rune* q;

        p = nil;
        if(n > 0) {
                p = _Strnclass(s, notwhitespace, n);
                if(p != nil) {
                        q = _Strnrclass(s, notwhitespace, n);
                        assert(q != nil);
                        n = q+1-p;
                }
        }
        *pans = p;
        *panslen = n;
}

// _Strclass returns a pointer to the first element of s that is
// a member of class cl, nil if none.
Rune*
_Strclass(Rune* s, Rune* cl)
{
        Rune* p;

        for(p = s; *p != 0; p++)
                if(_inclass(*p, cl))
                        return p;
        return nil;
}

// _Strnclass returns a pointer to the first element of s[0:n] that is
// a member of class cl, nil if none.
Rune*
_Strnclass(Rune* s, Rune* cl, int n)
{
        Rune* p;

        for(p = s; n-- && *p != 0; p++)
                if(_inclass(*p, cl))
                        return p;
        return nil;
}

// _Strrclass returns a pointer to the last element of s that is
// a member of class cl, nil if none
Rune*
_Strrclass(Rune* s, Rune* cl)
{
        Rune* p;

        if(s == nil || *s == 0)
                return nil;
        p = s + runestrlen(s) - 1;
        while(p >= s) {
                if(_inclass(*p, cl))
                        return p;
                p--;
        };
        return nil;
}

// _Strnrclass returns a pointer to the last element of s[0:n] that is
// a member of class cl, nil if none
Rune*
_Strnrclass(Rune* s, Rune* cl, int n)
{
        Rune* p;

        if(s == nil || *s == 0 || n == 0)
                return nil;
        p = s + n - 1;
        while(p >= s) {
                if(_inclass(*p, cl))
                        return p;
                p--;
        };
        return nil;
}

// Is c in the class cl?
int
_inclass(Rune c, Rune* cl)
{
        int     n;
        int     ans;
        int     negate;
        int     i;

        n = _Strlen(cl);
        if(n == 0)
                return 0;
        ans = 0;
        negate = 0;
        if(cl[0] == '^') {
                negate = 1;
                cl++;
                n--;
        }
        for(i = 0; i < n; i++) {
                if(cl[i] == '-' && i > 0 && i < n - 1) {
                        if(c >= cl[i - 1] && c <= cl[i + 1]) {
                                ans = 1;
                                break;
                        }
                        i++;
                }
                else if(c == cl[i]) {
                        ans = 1;
                        break;
                }
        }
        if(negate)
                ans = !ans;
        return ans;
}

// Is pre a prefix of s?
int
_prefix(Rune* pre, Rune* s)
{
        int     ns;
        int     n;
        int     k;

        ns = _Strlen(s);
        n = _Strlen(pre);
        if(ns < n)
                return 0;
        for(k = 0; k < n; k++) {
                if(pre[k] != s[k])
                        return 0;
        }
        return 1;
}

// Number of runes in (null-terminated) s
int
_Strlen(Rune* s)
{
        if(s == nil)
                return 0;
        return runestrlen(s);
}

// -1, 0, 1 as s1 is lexicographically less, equal greater than s2
int
_Strcmp(Rune *s1, Rune *s2)
{
        if(s1 == nil)
                return (s2 == nil || *s2 == 0) ? 0 : -1;
        if(s2 == nil)
                return (*s1 == 0) ? 0 : 1;
        return runestrcmp(s1, s2);
}

// Like Strcmp, but use exactly n chars of s1 (assume s1 has at least n chars).
// Also, do a case-insensitive match, assuming s2
// has no chars in [A-Z], only their lowercase versions.
// (This routine is used for in-place keyword lookup, where s2 is in a keyword
// list and s1 is some substring, possibly mixed-case, in a buffer.)
int
_Strncmpci(Rune *s1, int n1, Rune *s2)
{
        Rune c1, c2;

        for(;;) {
                if(n1-- == 0) {
                        if(*s2 == 0)
                                return 0;
                        return -1;
                }
                c1 = *s1++;
                c2 = *s2++;
                if(c1 >= 'A' && c1 <= 'Z')
                        c1 = c1 - 'A' + 'a';
                if(c1 != c2) {
                        if(c1 > c2)
                                return 1;
                        return -1;
                }
        }
}

// emalloc and copy
Rune*
_Strdup(Rune* s)
{
        if(s == nil)
                return nil;
        return _Strndup(s, runestrlen(s));
}

// emalloc and copy n chars of s (assume s is at least that long),
// and add 0 terminator.
// Return nil if n==0.
Rune*
_Strndup(Rune* s, int n)
{
        Rune* ans;

        if(n <= 0)
                return nil;
        ans = _newstr(n);
        memmove(ans, s, n*sizeof(Rune));
        ans[n] = 0;
        return ans;
}
// emalloc enough room for n Runes, plus 1 null terminator.
// (Not initialized to anything.)
Rune*
_newstr(int n)
{
        return (Rune*)emalloc((n+1)*sizeof(Rune));
}

// emalloc and copy s+t
Rune*
_Strdup2(Rune* s, Rune* t)
{
        int ns, nt;
        Rune* ans;
        Rune* p;

        ns = _Strlen(s);
        nt = _Strlen(t);
        if(ns+nt == 0)
                return nil;
        ans = _newstr(ns+nt);
        p = _Stradd(ans, s, ns);
        p = _Stradd(p, t, nt);
        *p = 0;
        return ans;
}

// Return emalloc'd substring s[start:stop],
Rune*
_Strsubstr(Rune* s, int start, int stop)
{
        Rune* t;

        if(start == stop)
                return nil;
        t = _Strndup(s+start, stop-start);
        return t;
}

// Copy n chars to s1 from s2, and return s1+n
Rune*
_Stradd(Rune* s1, Rune* s2, int n)
{
        if(n == 0)
                return s1;
        memmove(s1, s2, n*sizeof(Rune));
        return s1+n;
}

// Like strtol, but converting from Rune* string

#define LONG_MAX        2147483647L
#define LONG_MIN        -2147483648L

long
_Strtol(Rune* nptr, Rune** endptr, int base)
{
        Rune* p;
        long n, nn;
        int c, ovfl, v, neg, ndig;

        p = nptr;
        neg = 0;
        n = 0;
        ndig = 0;
        ovfl = 0;

        /*
         * White space
         */
        for(;;p++){
                switch(*p){
                case ' ':
                case '\t':
                case '\n':
                case '\f':
                case '\r':
                case '\v':
                        continue;
                }
                break;
        }

        /*
         * Sign
         */
        if(*p=='-' || *p=='+')
                if(*p++ == '-')
                        neg = 1;

        /*
         * Base
         */
        if(base==0){
                if(*p != '0')
                        base = 10;
                else{
                        base = 8;
                        if(p[1]=='x' || p[1]=='X'){
                                p += 2;
                                base = 16;
                        }
                }
        }else if(base==16 && *p=='0'){
                if(p[1]=='x' || p[1]=='X')
                        p += 2;
        }else if(base<0 || 36<base)
                goto Return;

        /*
         * Non-empty sequence of digits
         */
        for(;; p++,ndig++){
                c = *p;
                v = base;
                if('0'<=c && c<='9')
                        v = c - '0';
                else if('a'<=c && c<='z')
                        v = c - 'a' + 10;
                else if('A'<=c && c<='Z')
                        v = c - 'A' + 10;
                if(v >= base)
                        break;
                nn = n*base + v;
                if(nn < n)
                        ovfl = 1;
                n = nn;
        }

    Return:
        if(ndig == 0)
                p = nptr;
        if(endptr)
                *endptr = p;
        if(ovfl){
                if(neg)
                        return LONG_MIN;
                return LONG_MAX;
        }
        if(neg)
                return -n;
        return n;
}

// Convert buf[0:n], bytes whose character set is chset,
// into a emalloc'd null-terminated Unicode string.
Rune*
toStr(uchar* buf, int n, int chset)
{
        int i;
        int m;
        Rune ch;
        Rune* ans;

        switch(chset) {
        case US_Ascii:
        case ISO_8859_1:
                ans = (Rune*)emalloc((n+1)*sizeof(Rune));
                for(i = 0; i < n; i++)
                        ans[i] = buf[i];
                ans[n] = 0;
                break;

        case UTF_8:
                m = 0;
                for(i = 0; i < n; ) {
                        i += chartorune(&ch, (char*)(buf+i));
                        m++;
                }
                ans = (Rune*)emalloc((m+1)*sizeof(Rune));
                m = 0;
                for(i = 0; i < n; ) {
                        i += chartorune(&ch, (char*)(buf+i));
                        ans[m++] = ch;
                }
                ans[m] = 0;
                break;

        default:
                ans = nil;
                assert(0);
        }
        return ans;
}

// Convert buf[0:n], Unicode characters,
// into an emalloc'd null-terminated string in character set chset.
// Use 0x80 for unconvertable characters.
uchar*
fromStr(Rune* buf, int n, int chset)
{
        uchar* ans;
        int i, lim, m;
        Rune ch;
        uchar* p;
        uchar s[UTFmax];

        ans = nil;
        switch(chset) {
        case US_Ascii:
        case ISO_8859_1:
                ans = (uchar*)emalloc(n+1);
                lim = (chset==US_Ascii)? 127 : 255;
                for(i = 0; i < n; i++) {
                        ch = buf[i];
                        if(ch > lim)
                                ch = 0x80;
                        ans[i] = ch;
                }
                ans[n] = 0;
                break;

        case UTF_8:
                m = 0;
                for(i = 0; i < n; i++) {
                        m += runetochar((char*)s, &buf[i]);
                }
                ans = (uchar*)emalloc(m+1);
                p = ans;
                for(i = 0; i < n; i++)
                        p += runetochar((char*)p, &buf[i]);
                *p = 0;
                break;

        default:
                assert(0);
        }
        return ans;

}