Subversion Repositories planix.SVN

Rev

Blame | Last modification | View Log | RSS feed

#include <u.h>
#include <libc.h>
#include <bio.h>

/* automatically generated; do not edit. */
typedef struct Fibhdr Fibhdr;
struct Fibhdr {
        ushort wIdent;
        ushort nFib;
        ushort nProduct;
        ushort lid;
        short pnNext;
        uchar fDot;
        uchar fGlsy;
        uchar fComplex;
        uchar fHasPic;
        uchar cQuickSaves;
        uchar fEncrypted;
        uchar fWhichTblStm;
        uchar fReadOnlyRecommended;
        uchar fWriteReservation;
        uchar fExtChar;
        uchar fLoadOverride;
        uchar fFarEast;
        uchar fCrypto;
        ushort nFibBack;
        ulong lKey;
        uchar envr;
        uchar fMac;
        uchar fEmptySpecial;
        uchar fLoadOverridePage;
        uchar fFutureSavedUndo;
        uchar fWord97Saved;
        ushort chs;
        ushort chsTables;
        long fcMin;
        long fcMac;
        ushort csw;
};
enum { bcFibhdr = 0x22 };

/* automatically generated; do not edit. */
void
readFibhdr(Fibhdr *s, uchar *v, int nv)
{
        if(nv < bcFibhdr) sysfatal("not enough data for Fibhdr");
        s->wIdent = v[0x0] | (v[0x0+1] << 8);
        s->nFib = v[0x2] | (v[0x2+1] << 8);
        s->nProduct = v[0x4] | (v[0x4+1] << 8);
        s->lid = v[0x6] | (v[0x6+1] << 8);
        s->pnNext = v[0x8] | (v[0x8+1] << 8);
        s->fDot = ((v[0xA]) & 0x1) >> 0;
        s->fGlsy = ((v[0xA]) & 0x2) >> 1;
        s->fComplex = ((v[0xA]) & 0x4) >> 2;
        s->fHasPic = ((v[0xA]) & 0x8) >> 3;
        s->cQuickSaves = ((v[0xA]) & 0x240) >> 4;
        s->fEncrypted = ((v[0xB]) & 0x1) >> 0;
        s->fWhichTblStm = ((v[0xB]) & 0x2) >> 1;
        s->fReadOnlyRecommended = ((v[0xB]) & 0x4) >> 2;
        s->fWriteReservation = ((v[0xB]) & 0x8) >> 3;
        s->fExtChar = ((v[0xB]) & 0x16) >> 4;
        s->fLoadOverride = ((v[0xB]) & 0x32) >> 5;
        s->fFarEast = ((v[0xB]) & 0x64) >> 6;
        s->fCrypto = ((v[0xB]) & 0x128) >> 7;
        s->nFibBack = v[0xC] | (v[0xC+1] << 8);
        s->lKey = v[0xE] | (v[0xE+1] << 8)| (v[0xE+2] << 16) | (v[0xE+3] << 24);
        s->envr = v[0x12];
        s->fMac = ((v[0x13]) & 0x1) >> 0;
        s->fEmptySpecial = ((v[0x13]) & 0x2) >> 1;
        s->fLoadOverridePage = ((v[0x13]) & 0x4) >> 2;
        s->fFutureSavedUndo = ((v[0x13]) & 0x8) >> 3;
        s->fWord97Saved = ((v[0x13]) & 0x16) >> 4;
        s->chs = v[0x14] | (v[0x14+1] << 8);
        s->chsTables = v[0x16] | (v[0x16+1] << 8);
        s->fcMin = v[0x18] | (v[0x18+1] << 8)| (v[0x18+2] << 16) | (v[0x18+3] << 24);
        s->fcMac = v[0x1C] | (v[0x1C+1] << 8)| (v[0x1C+2] << 16) | (v[0x1C+3] << 24);
        s->csw = v[0x20] | (v[0x20+1] << 8);
}

void
usage(void)
{
        fprint(2, "usage: wordtext /mnt/doc/WordDocument\n");
        exits("usage");
}

void
main(int argc, char **argv)
{
        Biobuf *b;
        Biobuf bout;
        uchar buf[512];
        Fibhdr f;
        int i, c, n;

        ARGBEGIN{
        default:
                usage();
        }ARGEND

        if(argc != 1)
                usage();

        Binit(&bout, 1, OWRITE);
        b = Bopen(argv[0], OREAD);
        if(b == nil) {
                fprint(2, "couldn't open file: %r\n");
                exits("word");
        }

        n = Bread(b, buf, sizeof buf);
        if(n < sizeof buf) {
                fprint(2, "short read: %r\n");
                exits("read");
        }

        readFibhdr(&f, buf, sizeof buf);
        // printFibhdr(&f);

        Bseek(b, f.fcMin, 0);

        n = f.fcMac - f.fcMin;
        for(i=0; i<n; i++) {
                c = Bgetc(b);
                if(c < 0)
                        break;

                switch(c) {
                default:
                        Bputc(&bout, c);
                        break;

                case '\\':      Bprint(&bout, "\\");    break;  /* field escape */
                case 7: Bprint(&bout, "\n");            break;  /* cell, row mark */
                case 9: Bprint(&bout, "\t");            break;  /* tab */
                case 11:        Bprint(&bout, "\n");            break;  /* hard line break */
                case 12:        Bprint(&bout, "\n\n\n\n");      break;  /* page break */
                case 13:        Bprint(&bout, "\n\n");  break;  /* paragraph end */
                case 14:                                break;  /* column break */
                case 19:        Bprint(&bout, "<");             break;  /* field begin */
                case 20:        Bprint(&bout, ":");             break;  /* field sep */
                case 21:        Bprint(&bout, ">");             break;  /* field end */
                case 30:        Bprint(&bout, "-");             break;  /* non-breaking hyphen */
                case 31:                                break;  /* non-required hyphen */
        /*      case 45:        Bprint(&bout, "-");             break;  /* breaking hyphen */
                case 160:       Bprint(&bout, " ");             break;  /* non-breaking space */

                /*
                 *  these are only supposed to get used when special is set, but we 
                 * never see these ascii values otherwise anyway.
                 */

                /*
                 * Empirically, some documents have sections of text where
                 * every character is followed by a zero byte.  Some have sections
                 * of text where there are no zero bytes.  Still others have both
                 * types and alternate between them.  Until we parse which 
                 * characters are ``special'', page numbers lose out.
                 */
                case 0:  /* Bprint(&bout, "<pageno>"); */        break;
                case 1: Bprint(&bout, "<picture>");     break;
                case 2: Bprint(&bout, "<footnote>");    break;
                case 3: Bprint(&bout, "<footnote sep>");        break;
                case 4: Bprint(&bout, "<footnote cont>");       break;
                case 5: Bprint(&bout, "<animation>");   break;
                case 6: Bprint(&bout, "<lineno>");      break;
                /* case 7:      Bprint(&bout, "<hand picture>");        break; */
                case 8: Bprint(&bout, "<drawn object>");        break;
                case 10:        Bprint(&bout, "<abbrev date>"); break;
                /* case 11:     Bprint(&bout, "<hh:mm:ss>");    break; */
                /* case 12:     Bprint(&bout, "<section no>");  break; */
                /* case 14:     Bprint(&bout, "<Thu>"); break; */
                case 15:        Bprint(&bout, "<Thursday>");    break;
                case 16:        Bprint(&bout, "<day of month>");        break;

                case 22:        Bprint(&bout, "<hour>");        break;
                case 23:        Bprint(&bout, "<hour hh>");     break;
                case 24:        Bprint(&bout, "<minute>");      break;
                case 25:        Bprint(&bout, "<minute mm>");   break;
                case 26:        Bprint(&bout, "<seconds>");     break;
                case 27:        Bprint(&bout, "<AM/PM>");       break;
                case 28:        Bprint(&bout, "<hh:mm:ss>");    break;
                case 29:        Bprint(&bout, "<date>");        break;
        /* printable ascii begins hereish */
        /*
                case 30:        Bprint(&bout, "<mm/dd/yy>");    break;
                case 33:        Bprint(&bout, "<mm>");  break;
                case 34:        Bprint(&bout, "<yyyy>");        break;
                case 35:        Bprint(&bout, "<yy>");  break;
                case 36:        Bprint(&bout, "<Feb>"); break;
                case 37:        Bprint(&bout, "<February>");    break;
                case 38:        Bprint(&bout, "<hh:mm>");       break;
                case 39:        Bprint(&bout, "<long date>");   break;
                case 41:                                break; */
                }
        }
        Bprint(&bout, "\n");
}