Subversion Repositories planix.SVN

Rev

Rev 2 | Blame | Compare with Previous | Last modification | View Log | RSS feed

#include <u.h>
#include <libc.h>
#include <bio.h>
#include "dict.h"

Dict dicts[] = {
        {"oed",         "Oxford English Dictionary, 2nd Ed.",
         "/lib/dict/oed2",      "/lib/dict/oed2index",
         oednextoff,    oedprintentry,          oedprintkey},
        {"ahd",         "American Heritage Dictionary, 2nd College Ed.",
         "/lib/ahd/DICT.DB",    "/lib/ahd/index",
         ahdnextoff,    ahdprintentry,          ahdprintkey},
        {"pgw",         "Project Gutenberg Webster Dictionary",
         "/lib/dict/pgw",       "/lib/dict/pgwindex",
         pgwnextoff,    pgwprintentry,          pgwprintkey},
        {"thesaurus",   "Collins Thesaurus",
         "/lib/dict/thesaurus", "/lib/dict/thesindex",
         thesnextoff,   thesprintentry, thesprintkey},
        {"roget",               "Project Gutenberg Roget's Thesaurus",
         "/lib/dict/roget", "/lib/dict/rogetindex",
         rogetnextoff,  rogetprintentry,        rogetprintkey},

        {"ce",          "Gendai Chinese->English",
         "/lib/dict/world/sansdata/sandic24.dat",
         "/lib/dict/world/sansdata/ceindex",
         worldnextoff,  worldprintentry,        worldprintkey},
        {"ceh",         "Gendai Chinese->English (Hanzi index)",
         "/lib/dict/world/sansdata/sandic24.dat",
         "/lib/dict/world/sansdata/cehindex",
         worldnextoff,  worldprintentry,        worldprintkey},
        {"ec",          "Gendai English->Chinese",
         "/lib/dict/world/sansdata/sandic24.dat",
         "/lib/dict/world/sansdata/ecindex",
         worldnextoff,  worldprintentry,        worldprintkey},

        {"dae",         "Gyldendal Danish->English",
         "/lib/dict/world/gylddata/sandic30.dat",
         "/lib/dict/world/gylddata/daeindex",
         worldnextoff,  worldprintentry,        worldprintkey},
        {"eda",         "Gyldendal English->Danish",
         "/lib/dict/world/gylddata/sandic29.dat",
         "/lib/dict/world/gylddata/edaindex",
         worldnextoff,  worldprintentry,        worldprintkey},

        {"due",         "Wolters-Noordhoff Dutch->English",
         "/lib/dict/world/woltdata/sandic07.dat",
         "/lib/dict/world/woltdata/deindex",
         worldnextoff,  worldprintentry,        worldprintkey},
        {"edu",         "Wolters-Noordhoff English->Dutch",
         "/lib/dict/world/woltdata/sandic06.dat",
         "/lib/dict/world/woltdata/edindex",
         worldnextoff,  worldprintentry,        worldprintkey},

        {"fie",         "WSOY Finnish->English",
         "/lib/dict/world/werndata/sandic32.dat",
         "/lib/dict/world/werndata/fieindex",
         worldnextoff,  worldprintentry,        worldprintkey},
        {"efi",         "WSOY English->Finnish",
         "/lib/dict/world/werndata/sandic31.dat",
         "/lib/dict/world/werndata/efiindex",
         worldnextoff,  worldprintentry,        worldprintkey},

        {"fe",          "Collins French->English",
         "/lib/dict/fe",        "/lib/dict/feindex",
         pcollnextoff,  pcollprintentry,        pcollprintkey},
        {"ef",          "Collins English->French",
         "/lib/dict/ef",        "/lib/dict/efindex",
         pcollnextoff,  pcollprintentry,        pcollprintkey},

        {"ge",          "Collins German->English",
         "/lib/dict/ge",        "/lib/dict/geindex",
         pcollgnextoff, pcollgprintentry,       pcollgprintkey},
        {"eg",          "Collins English->German",
         "/lib/dict/eg",        "/lib/dict/egindex",
         pcollgnextoff, pcollgprintentry,       pcollgprintkey},

        {"ie",          "Collins Italian->English",
         "/lib/dict/ie",        "/lib/dict/ieindex",
         pcollnextoff,  pcollprintentry,        pcollprintkey},
        {"ei",          "Collins English->Italian",
         "/lib/dict/ei",        "/lib/dict/eiindex",
         pcollnextoff,  pcollprintentry,        pcollprintkey},

        {"je",          "Sanshusha Japanese->English",
         "/lib/dict/world/sansdata/sandic18.dat",
         "/lib/dict/world/sansdata/jeindex",
         worldnextoff,  worldprintentry,        worldprintkey},
        {"jek",         "Sanshusha Japanese->English (Kanji index)",
         "/lib/dict/world/sansdata/sandic18.dat",
         "/lib/dict/world/sansdata/jekindex",
         worldnextoff,  worldprintentry,        worldprintkey},
        {"ej",          "Sanshusha English->Japanese",
         "/lib/dict/world/sansdata/sandic18.dat",
         "/lib/dict/world/sansdata/ejindex",
         worldnextoff,  worldprintentry,        worldprintkey},

        {"tjeg",        "Sanshusha technical Japanese->English,German",
         "/lib/dict/world/sansdata/sandic16.dat",
         "/lib/dict/world/sansdata/tjegindex",
         worldnextoff,  worldprintentry,        worldprintkey},
        {"tjegk",       "Sanshusha technical Japanese->English,German (Kanji index)",
         "/lib/dict/world/sansdata/sandic16.dat",
         "/lib/dict/world/sansdata/tjegkindex",
         worldnextoff,  worldprintentry,        worldprintkey},
        {"tegj",        "Sanshusha technical English->German,Japanese",
         "/lib/dict/world/sansdata/sandic16.dat",
         "/lib/dict/world/sansdata/tegjindex",
         worldnextoff,  worldprintentry,        worldprintkey},
        {"tgje",        "Sanshusha technical German->Japanese,English",
         "/lib/dict/world/sansdata/sandic16.dat",
         "/lib/dict/world/sansdata/tgjeindex",
         worldnextoff,  worldprintentry,        worldprintkey},

        {"ne",          "Kunnskapforlaget Norwegian->English",
         "/lib/dict/world/kunndata/sandic28.dat",
         "/lib/dict/world/kunndata/neindex",
         worldnextoff,  worldprintentry,        worldprintkey},
        {"en",          "Kunnskapforlaget English->Norwegian",
         "/lib/dict/world/kunndata/sandic27.dat",
         "/lib/dict/world/kunndata/enindex",
         worldnextoff,  worldprintentry,        worldprintkey},

        {"re",          "Leon Ungier Russian->English",
         "/lib/dict/re",        "/lib/dict/reindex",
         simplenextoff, simpleprintentry,       simpleprintkey},
        {"er",          "Leon Ungier English->Russian",
         "/lib/dict/re",        "/lib/dict/erindex",
         simplenextoff, simpleprintentry,       simpleprintkey},

        {"se",          "Collins Spanish->English",
         "/lib/dict/se",        "/lib/dict/seindex",
         pcollnextoff,  pcollprintentry,        pcollprintkey},
        {"es",          "Collins English->Spanish",
         "/lib/dict/es",        "/lib/dict/esindex",
         pcollnextoff,  pcollprintentry,        pcollprintkey},

        {"swe",         "Esselte Studium Swedish->English",
         "/lib/dict/world/essedata/sandic34.dat",
         "/lib/dict/world/essedata/sweindex",
         worldnextoff,  worldprintentry,        worldprintkey},
        {"esw",         "Esselte Studium English->Swedish",
         "/lib/dict/world/essedata/sandic33.dat",
         "/lib/dict/world/essedata/eswindex",
         worldnextoff,  worldprintentry,        worldprintkey},

        {"movie",       "Movies -- by title",
         "/lib/movie/data",     "/lib/dict/movtindex",
         movienextoff,  movieprintentry,        movieprintkey},
        {"moviea",      "Movies -- by actor",
         "/lib/movie/data",     "/lib/dict/movaindex",
         movienextoff,  movieprintentry,        movieprintkey},
        {"movied",      "Movies -- by director",
         "/lib/movie/data",     "/lib/dict/movdindex",
         movienextoff,  movieprintentry,        movieprintkey},

        {"slang",       "English Slang",
         "/lib/dict/slang",     "/lib/dict/slangindex",
         slangnextoff,  slangprintentry,        slangprintkey},

        {"robert",      "Robert Électronique",
         "/lib/dict/robert/_pointers",  "/lib/dict/robert/_index",
         robertnextoff, robertindexentry,       robertprintkey},
        {"robertv",     "Robert Électronique - formes des verbes",
         "/lib/dict/robert/flex.rob",   "/lib/dict/robert/_flexindex",
         robertnextflex,        robertflexentry,        robertprintkey},

        {0, 0, 0, 0, 0}
};

typedef struct Lig Lig;
struct Lig {
        Rune    start;          /* accent rune */
        Rune    *pairs;         /* <char,accented version> pairs */
};

static Lig ligtab[Nligs] = {
[LACU-LIGS]     {L'´', L"AÁaáCĆcćEÉeégģIÍiíıíLĹlĺNŃnńOÓoóRŔrŕSŚsśUÚuúYÝyýZŹzź"},
[LGRV-LIGS]     {L'ˋ', L"AÀaàEÈeèIÌiìıìOÒoòUÙuù"},
[LUML-LIGS]     {L'¨', L"AÄaäEËeëIÏiïOÖoöUÜuüYŸyÿ"},
[LCED-LIGS]     {L'¸',       L"CÇcçGĢKĶkķLĻlļNŅnņRŖrŗSŞsşTŢtţ"},
[LTIL-LIGS]     {L'˜',       L"AÃaãIĨiĩıĩNÑnñOÕoõUŨuũ"},
[LBRV-LIGS]     {L'˘',       L"AĂaăEĔeĕGĞgğIĬiĭıĭOŎoŏUŬuŭ"},
[LRNG-LIGS]     {L'˚',       L"AÅaåUŮuů"},
[LDOT-LIGS]     {L'˙',      L"CĊcċEĖeėGĠgġIİLĿlŀZŻzż"},
[LDTB-LIGS]     {L'.',  L""},
[LFRN-LIGS]     {L'⌢',     L"AÂaâCĈcĉEÊeêGĜgĝHĤhĥIÎiîıîJĴjĵOÔoôSŜsŝUÛuûWŴwŵYŶyŷ"},
[LFRB-LIGS]     {L'̯',       L""},
[LOGO-LIGS]     {L'˛',      L"AĄaąEĘeęIĮiįıįUŲuų"},
[LMAC-LIGS]     {L'¯',       L"AĀaāEĒeēIĪiīıīOŌoōUŪuū"},
[LHCK-LIGS]     {L'ˇ', L"CČcčDĎdďEĚeěLĽlľNŇnňRŘrřSŠsšTŤtťZŽzž"},
[LASP-LIGS]     {L'ʽ',       L""},
[LLEN-LIGS]     {L'ʼ',       L""},
[LBRB-LIGS]     {L'Ì®',       L""}
};

Rune *multitab[Nmulti] = {
[MAAS-MULTI]    L"ʽα",
[MALN-MULTI]    L"ʼα",
[MAND-MULTI]    L"and",
[MAOQ-MULTI]    L"a/q",
[MBRA-MULTI]    L"<|",
[MDD-MULTI]     L"..",
[MDDD-MULTI]    L"...",
[MEAS-MULTI]    L"ʽε",
[MELN-MULTI]    L"ʼε",
[MEMM-MULTI]    L"——",
[MHAS-MULTI]    L"ʽη",
[MHLN-MULTI]    L"ʼη",
[MIAS-MULTI]    L"ʽι",
[MILN-MULTI]    L"ʼι",
[MLCT-MULTI]    L"ct",
[MLFF-MULTI]    L"ff",
[MLFFI-MULTI]   L"ffi",
[MLFFL-MULTI]   L"ffl",
[MLFL-MULTI]    L"fl",
[MLFI-MULTI]    L"fi",
[MLLS-MULTI]    L"É«É«",
[MLST-MULTI]    L"st",
[MOAS-MULTI]    L"ʽο",
[MOLN-MULTI]    L"ʼο",
[MOR-MULTI]     L"or",
[MRAS-MULTI]    L"ʽρ",
[MRLN-MULTI]    L"ʼρ",
[MTT-MULTI]     L"~~",
[MUAS-MULTI]    L"ʽυ",
[MULN-MULTI]    L"ʼυ",
[MWAS-MULTI]    L"ʽω",
[MWLN-MULTI]    L"ʼω",
[MOE-MULTI]     L"oe",
[MES-MULTI]     L"  ",
};

static Rune     *ttabstack[20];
static int      ntt;

/*
 * tab is an array of n Assoc's, sorted by key.
 * Look for key in tab, and return corresponding val
 * or -1 if not there
 */
long
lookassoc(Assoc *tab, int n, char *key)
{
        Assoc *q;
        long i, low, high;
        int r;

        for(low = -1, high = n; high > low+1; ){
                i = (high+low)/2;
                q = &tab[i];
                if((r=strcmp(key, q->key))<0)
                        high = i;
                else if(r == 0)
                        return q->val;
                else
                        low=i;
        }
        return -1;
}

long
looknassoc(Nassoc *tab, int n, long key)
{
        Nassoc *q;
        long i, low, high;

        for(low = -1, high = n; high > low+1; ){
                i = (high+low)/2;
                q = &tab[i];
                if(key < q->key)
                        high = i;
                else if(key == q->key)
                        return q->val;
                else
                        low=i;
        }
        return -1;
}

void
err(char *fmt, ...)
{
        char buf[1000];
        va_list v;

        va_start(v, fmt);
        vsnprint(buf, sizeof(buf), fmt, v);
        va_end(v);
        fprint(2, "%s: %s\n", argv0, buf);
}

/*
 * Write the rune r to bout, keeping track of line length
 * and breaking the lines (at blanks) when they get too long
 */
void
outrune(long r)
{
        if(outinhibit)
                return;
        if(++linelen > breaklen && r == L' ') {
                Bputc(bout, '\n');
                linelen = 0;
        } else
                Bputrune(bout, r);
}

void
outrunes(Rune *rp)
{
        Rune r;

        while((r = *rp++) != 0)
                outrune(r);
}

/* like outrune, but when arg is know to be a char */
void
outchar(int c)
{
        if(outinhibit)
                return;
        if(++linelen > breaklen && c == ' ') {
                c ='\n';
                linelen = 0;
        }
        Bputc(bout, c);
}

void
outchars(char *s)
{
        char c;

        while((c = *s++) != 0)
                outchar(c);
}

void
outprint(char *fmt, ...)
{
        char buf[1000];
        va_list v;

        va_start(v, fmt);
        vsnprint(buf, sizeof(buf), fmt, v);
        va_end(v);
        outchars(buf);
}

void
outpiece(char *b, char *e)
{
        int c, lastc;

        lastc = 0;
        while(b < e) {
                c = *b++;
                if(c == '\n')
                        c = ' ';
                if(!(c == ' ' && lastc == ' '))
                        outchar(c);
                lastc = c;
        }
}

/*
 * Go to new line if not already there; indent if ind != 0.
 * If ind > 1, leave a blank line too.
 * Slight hack: assume if current line is only one or two
 * characters long, then they were spaces.
 */
void
outnl(int ind)
{
        if(outinhibit)
                return;
        if(ind) {
                if(ind > 1) {
                        if(linelen > 2)
                                Bputc(bout, '\n');
                        Bprint(bout, "\n  ");
                } else if(linelen == 0)
                        Bprint(bout, "  ");
                else if(linelen == 1)
                        Bputc(bout, ' ');
                else if(linelen != 2)
                        Bprint(bout, "\n  ");
                linelen = 2;
        } else {
                if(linelen) {
                        Bputc(bout, '\n');
                        linelen = 0;
                }
        }
}

/*
 * Fold the runes in null-terminated rp.
 * Use the sort(1) definition of folding (uppercase to lowercase,
 * accented characters to corresponding unaccented chars)
 */
void
fold(Rune *rp)
{
        Rune r;

        while((r = *rp) != 0) {
                r = tobaserune(r);
                if(isupperrune(r))
                        r = tolowerrune(r);
                *rp++ = r;
        }
}

/*
 * Like fold, but put folded result into new
 * (assumed to have enough space).
 * old is a regular expression, but we know that
 * metacharacters aren't affected
 */
void
foldre(char *new, char *old)
{
        Rune r;

        while(*old) {
                old += chartorune(&r, old);
                r = tobaserune(r);
                if(isupperrune(r))
                        r = tolowerrune(r);
                new += runetochar(new, &r);
        }
        *new = 0;
}

/*
 *      acomp(s, t) returns:
 *              -2 if s strictly precedes t
 *              -1 if s is a prefix of t
 *              0 if s is the same as t
 *              1 if t is a prefix of s
 *              2 if t strictly precedes s
 */

int
acomp(Rune *s, Rune *t)
{
        int cs, ct;

        for(;;) {
                cs = *s;
                ct = *t;
                if(cs != ct)
                        break;
                if(cs == 0)
                        return 0;
                s++;
                t++;
        }
        if(cs == 0)
                return -1;
        if(ct == 0)
                return 1;
        if(cs < ct)
                return -2;
        return 2;
}

/*
 * Copy null terminated Runes from 'from' to 'to'.
 */
void
runescpy(Rune *to, Rune *from)
{
        while((*to++ = *from++) != 0)
                continue;
}

/*
 * Conversion of unsigned number to long, no overflow detection
 */
long
runetol(Rune *r)
{
        int c;
        long n;

        n = 0;
        for(;; r++){
                c = *r;
                if(L'0'<=c && c<=L'9')
                        c -= '0';
                else
                        break;
                n = n*10 + c;
        }
        return n;
}

/*
 * See if there is a rune corresponding to the accented
 * version of r with accent acc (acc in [LIGS..LIGE-1]),
 * and return it if so, else return NONE.
 */
Rune
liglookup(Rune acc, Rune r)
{
        Rune *p;

        if(acc < LIGS || acc >= LIGE)
                return NONE;
        for(p = ligtab[acc-LIGS].pairs; *p; p += 2)
                if(*p == r)
                        return *(p+1);
        return NONE;
}

/*
 * Maintain a translation table stack (a translation table
 * is an array of Runes indexed by bytes or 7-bit bytes).
 * If starting is true, push the curtab onto the stack
 * and return newtab; else pop the top of the stack and
 * return it.
 * If curtab is 0, initialize the stack and return.
 */
Rune *
changett(Rune *curtab, Rune *newtab, int starting)
{
        if(curtab == 0) {
                ntt = 0;
                return 0;
        }
        if(starting) {
                if(ntt >= asize(ttabstack)) {
                        if(debug)
                                err("translation stack overflow");
                        return curtab;
                }
                ttabstack[ntt++] = curtab;
                return newtab;
        } else {
                if(ntt == 0) {
                        if(debug)
                                err("translation stack underflow");
                        return curtab;
                }
                return ttabstack[--ntt];
        }
}