Subversion Repositories planix.SVN

Rev

Rev 2 | Blame | Compare with Previous | Last modification | View Log | RSS feed

/*
 * wordwin.c
 * Copyright (C) 2002-2005 A.J. van Os; Released under GPL
 *
 * Description:
 * Deal with the WIN internals of a MS Word file
 */

#include "antiword.h"


/*
 * bGetDocumentText - make a list of the text blocks of a Word document
 *
 * Return TRUE when succesful, otherwise FALSE
 */
static BOOL
bGetDocumentText(FILE *pFile, const UCHAR *aucHeader)
{
        text_block_type tTextBlock;
        ULONG   ulBeginOfText;
        ULONG   ulTextLen, ulFootnoteLen;
        ULONG   ulHdrFtrLen, ulMacroLen, ulAnnotationLen;
        UINT    uiQuickSaves;
        USHORT  usDocStatus;
        BOOL    bTemplate, bFastSaved, bEncrypted, bSuccess;

        fail(pFile == NULL);
        fail(aucHeader == NULL);

        DBG_MSG("bGetDocumentText");

        /* Get the status flags from the header */
        usDocStatus = usGetWord(0x0a, aucHeader);
        DBG_HEX(usDocStatus);
        bTemplate = (usDocStatus & BIT(0)) != 0;
        DBG_MSG_C(bTemplate, "This document is a Template");
        bFastSaved = (usDocStatus & BIT(2)) != 0;
        uiQuickSaves = (UINT)(usDocStatus & 0x00f0) >> 4;
        DBG_MSG_C(bFastSaved, "This document is Fast Saved");
        DBG_DEC_C(bFastSaved, uiQuickSaves);
        if (bFastSaved) {
                werr(0, "Word2: fast saved documents are not supported yet");
                return FALSE;
        }
        bEncrypted = (usDocStatus & BIT(8)) != 0;
        if (bEncrypted) {
                werr(0, "Encrypted documents are not supported");
                return FALSE;
        }

        /* Get length information */
        ulBeginOfText = ulGetLong(0x18, aucHeader);
        DBG_HEX(ulBeginOfText);
        ulTextLen = ulGetLong(0x34, aucHeader);
        ulFootnoteLen = ulGetLong(0x38, aucHeader);
        ulHdrFtrLen = ulGetLong(0x3c, aucHeader);
        ulMacroLen = ulGetLong(0x40, aucHeader);
        ulAnnotationLen = ulGetLong(0x44, aucHeader);
        DBG_DEC(ulTextLen);
        DBG_DEC(ulFootnoteLen);
        DBG_DEC(ulHdrFtrLen);
        DBG_DEC(ulMacroLen);
        DBG_DEC(ulAnnotationLen);
        if (bFastSaved) {
                bSuccess = FALSE;
        } else {
                tTextBlock.ulFileOffset = ulBeginOfText;
                tTextBlock.ulCharPos = ulBeginOfText;
                tTextBlock.ulLength = ulTextLen +
                                ulFootnoteLen +
                                ulHdrFtrLen + ulMacroLen + ulAnnotationLen;
                tTextBlock.bUsesUnicode = FALSE;
                tTextBlock.usPropMod = IGNORE_PROPMOD;
                bSuccess = bAdd2TextBlockList(&tTextBlock);
                DBG_HEX_C(!bSuccess, tTextBlock.ulFileOffset);
                DBG_HEX_C(!bSuccess, tTextBlock.ulCharPos);
                DBG_DEC_C(!bSuccess, tTextBlock.ulLength);
                DBG_DEC_C(!bSuccess, tTextBlock.bUsesUnicode);
                DBG_DEC_C(!bSuccess, tTextBlock.usPropMod);
        }

        if (bSuccess) {
                vSplitBlockList(pFile,
                                ulTextLen,
                                ulFootnoteLen,
                                ulHdrFtrLen,
                                ulMacroLen,
                                ulAnnotationLen,
                                0,
                                0,
                                0,
                                FALSE);
        } else {
                vDestroyTextBlockList();
                werr(0, "I can't find the text of this document");
        }
        return bSuccess;
} /* end of bGetDocumentText */

/*
 * vGetDocumentData - make a list of the data blocks of a Word document
 */
static void
vGetDocumentData(FILE *pFile, const UCHAR *aucHeader)
{
        data_block_type tDataBlock;
        options_type    tOptions;
        ULONG   ulEndOfText, ulBeginCharInfo;
        BOOL    bFastSaved, bHasImages, bSuccess;
        USHORT  usDocStatus;

        /* Get the options */
        vGetOptions(&tOptions);

        /* Get the status flags from the header */
        usDocStatus = usGetWord(0x0a, aucHeader);
        DBG_HEX(usDocStatus);
        bFastSaved = (usDocStatus & BIT(2)) != 0;
        bHasImages = (usDocStatus & BIT(3)) != 0;

        if (!bHasImages ||
            tOptions.eConversionType == conversion_text ||
            tOptions.eConversionType == conversion_fmt_text ||
            tOptions.eConversionType == conversion_xml ||
            tOptions.eImageLevel == level_no_images) {
                /*
                 * No images in the document or text-only output or
                 * no images wanted, so no data blocks will be needed
                 */
                vDestroyDataBlockList();
                return;
        }

        if (bFastSaved) {
                bSuccess = FALSE;
        } else {
                /* This datablock is too big, but it contains all images */
                ulEndOfText = ulGetLong(0x1c, aucHeader);
                DBG_HEX(ulEndOfText);
                ulBeginCharInfo = ulGetLong(0xa0, aucHeader);
                DBG_HEX(ulBeginCharInfo);
                if (ulBeginCharInfo > ulEndOfText) {
                        tDataBlock.ulFileOffset = ulEndOfText;
                        tDataBlock.ulDataPos = ulEndOfText;
                        tDataBlock.ulLength = ulBeginCharInfo - ulEndOfText;
                        bSuccess = bAdd2DataBlockList(&tDataBlock);
                        DBG_HEX_C(!bSuccess, tDataBlock.ulFileOffset);
                        DBG_HEX_C(!bSuccess, tDataBlock.ulDataPos);
                        DBG_DEC_C(!bSuccess, tDataBlock.ulLength);
                } else {
                        bSuccess = ulBeginCharInfo == ulEndOfText;
                }
        }

        if (!bSuccess) {
                vDestroyDataBlockList();
                werr(0, "I can't find the data of this document");
        }
} /* end of vGetDocumentData */

/*
 * iInitDocumentWIN - initialize an WIN document
 *
 * Returns the version of Word that made the document or -1
 */
int
iInitDocumentWIN(FILE *pFile, long lFilesize)
{
        int     iWordVersion;
        BOOL    bSuccess;
        USHORT  usIdent;
        UCHAR   aucHeader[384];

        fail(pFile == NULL);

        if (lFilesize < 384) {
                return -1;
        }

        /* Read the headerblock */
        if (!bReadBytes(aucHeader, 384, 0x00, pFile)) {
                return -1;
        }
        /* Get the "magic number" from the header */
        usIdent = usGetWord(0x00, aucHeader);
        DBG_HEX(usIdent);
        fail(usIdent != 0xa59b &&       /* WinWord 1.x */
                usIdent != 0xa5db);     /* WinWord 2.0 */
        iWordVersion = iGetVersionNumber(aucHeader);
        if (iWordVersion != 1 && iWordVersion != 2) {
                werr(0, "This file is not from ''Win Word 1 or 2'.");
                return -1;
        }
        bSuccess = bGetDocumentText(pFile, aucHeader);
        if (bSuccess) {
                vGetDocumentData(pFile, aucHeader);
                vGetPropertyInfo(pFile, NULL,
                                NULL, 0, NULL, 0,
                                aucHeader, iWordVersion);
                vSetDefaultTabWidth(pFile, NULL,
                                NULL, 0, NULL, 0,
                                aucHeader, iWordVersion);
                vGetNotesInfo(pFile, NULL,
                                NULL, 0, NULL, 0,
                                aucHeader, iWordVersion);
        }
        return bSuccess ? iWordVersion : -1;
} /* end of iInitDocumentWIN */