Subversion Repositories planix.SVN

Rev

Blame | Last modification | View Log | RSS feed

/*
 * stylelist.c
 * Copyright (C) 1998-2005 A.J. van Os; Released under GNU GPL
 *
 * Description:
 * Build, read and destroy a list of Word style information
 */

#include <stdlib.h>
#include <stddef.h>
#include <ctype.h>
#include "antiword.h"


/*
 * Private structure to hide the way the information
 * is stored from the rest of the program
 */
typedef struct style_mem_tag {
        style_block_type        tInfo;
        ULONG                   ulSequenceNumber;
        struct style_mem_tag    *pNext;
} style_mem_type;

/* Variables needed to write the Style Information List */
static style_mem_type   *pAnchor = NULL;
static style_mem_type   *pStyleLast = NULL;
/* The type of conversion */
static conversion_type  eConversionType = conversion_unknown;
/* The character set encoding */
static encoding_type    eEncoding = encoding_neutral;
/* Values for efficiency reasons */
static const style_mem_type     *pMidPtr = NULL;
static BOOL             bMoveMidPtr = FALSE;
static BOOL             bInSequence = TRUE;


/*
 * vDestroyStyleInfoList - destroy the Style Information List
 */
void
vDestroyStyleInfoList(void)
{
        style_mem_type  *pCurr, *pNext;

        DBG_MSG("vDestroyStyleInfoList");

        /* Free the Style Information List */
        pCurr = pAnchor;
        while (pCurr != NULL) {
                pNext = pCurr->pNext;
                pCurr = xfree(pCurr);
                pCurr = pNext;
        }
        pAnchor = NULL;
        /* Reset all control variables */
        pStyleLast = NULL;
        pMidPtr = NULL;
        bMoveMidPtr = FALSE;
        bInSequence = TRUE;
} /* end of vDestroyStyleInfoList */

/*
 * vConvertListCharacter - convert the list character
 */
static void
vConvertListCharacter(UCHAR ucNFC, USHORT usListChar, char *szListChar)
{
        options_type    tOptions;
        size_t  tLen;

        fail(szListChar == NULL);
        fail(szListChar[0] != '\0');

        if (usListChar < 0x80 && isprint((int)usListChar)) {
                DBG_CHR_C(isalnum((int)usListChar), usListChar);
                szListChar[0] = (char)usListChar;
                szListChar[1] = '\0';
                return;
        }

        if (ucNFC != LIST_SPECIAL &&
            ucNFC != LIST_SPECIAL2 &&
            ucNFC != LIST_BULLETS) {
                szListChar[0] = '.';
                szListChar[1] = '\0';
                return;
        }

        if (eConversionType == conversion_unknown ||
            eEncoding == encoding_neutral) {
                vGetOptions(&tOptions);
                eConversionType = tOptions.eConversionType;
                eEncoding = tOptions.eEncoding;
        }

        switch (usListChar) {
        case 0x0000: case 0x00b7: case 0x00fe: case  0xf021: case 0xf043:
        case 0xf06c: case 0xf093: case 0xf0b7:
                usListChar = 0x2022;    /* BULLET */
                break;
        case 0x0096: case 0xf02d:
                usListChar = 0x2013;    /* EN DASH */
                break;
        case 0x00a8:
                usListChar = 0x2666;    /* BLACK DIAMOND SUIT */
                break;
        case 0x00de:
                usListChar = 0x21d2;    /* RIGHTWARDS DOUBLE ARROW */
                break;
        case 0x00e0: case 0xf074:
                usListChar = 0x25ca;    /* LOZENGE */
                break;
        case 0x00e1:
                usListChar = 0x2329;    /* LEFT ANGLE BRACKET */
                break;
        case 0xf020:
                usListChar = 0x0020;    /* SPACE */
                break;
        case 0xf041:
                usListChar = 0x270c;    /* VICTORY HAND */
                break;
        case 0xf066:
                usListChar = 0x03d5;    /* GREEK PHI SYMBOL */
                break;
        case 0xf06e:
                usListChar = 0x25a0;    /* BLACK SQUARE */
                break;
        case 0xf06f: case 0xf070: case 0xf0a8:
                usListChar = 0x25a1;    /* WHITE SQUARE */
                break;
        case 0xf071:
                usListChar = 0x2751;    /* LOWER RIGHT SHADOWED WHITE SQUARE */
                break;
        case 0xf075: case 0xf077:
                usListChar = 0x25c6;    /* BLACK DIAMOND */
                break;
        case 0xf076:
                usListChar = 0x2756;    /* BLACK DIAMOND MINUS WHITE X */
                break;
        case 0xf0a7:
                usListChar = 0x25aa;    /* BLACK SMALL SQUARE */
                break;
        case 0xf0d8:
                usListChar = 0x27a2;    /* RIGHTWARDS ARROWHEAD */
                break;
        case 0xf0e5:
                usListChar = 0x2199;    /* SOUTH WEST ARROW */
                break;
        case 0xf0f0:
                usListChar = 0x21e8;    /* RIGHTWARDS WHITE ARROW */
                break;
        case 0xf0fc:
                usListChar = 0x2713;    /* CHECK MARK */
                break;
        default:
                if ((usListChar >= 0xe000 && usListChar < 0xf900) ||
                    (usListChar < 0x80 && !isprint((int)usListChar))) {
                        /*
                         * All remaining private area characters and all
                         * remaining non-printable ASCII characters to their
                         * default bullet character
                         */
                        DBG_HEX(usListChar);
                        DBG_FIXME();
                        if (ucNFC == LIST_SPECIAL || ucNFC == LIST_SPECIAL2) {
                                usListChar = 0x2190;    /* LEFTWARDS ARROW */
                        } else {
                                usListChar = 0x2022;    /* BULLET */
                        }
                }
                break;
        }

        if (eEncoding == encoding_utf_8) {
                tLen = tUcs2Utf8(usListChar, szListChar, 4);
                szListChar[tLen] = '\0';
        } else {
                switch (usListChar) {
                case 0x03d5: case 0x25a1: case 0x25c6: case 0x25ca:
                case 0x2751:
                        szListChar[0] = 'o';
                        break;
                case 0x2013: case 0x2500:
                        szListChar[0] = '-';
                        break;
                case 0x2190: case 0x2199: case 0x2329:
                        szListChar[0] = '<';
                        break;
                case 0x21d2:
                        szListChar[0] = '=';
                        break;
                case 0x21e8: case 0x27a2:
                        szListChar[0] = '>';
                        break;
                case 0x25a0: case 0x25aa:
                        szListChar[0] = '.';
                        break;
                case 0x2666:
                        szListChar[0] = OUR_DIAMOND;
                        break;
                case 0x270c:
                        szListChar[0] = 'x';
                        break;
                case 0x2713:
                        szListChar[0] = 'V';
                        break;
                case 0x2756:
                        szListChar[0] = '*';
                        break;
                case 0x2022:
                default:
                        vGetBulletValue(eConversionType, eEncoding,
                                        szListChar, 2);
                        break;
                }
                tLen = 1;
        }
        szListChar[tLen] = '\0';
} /* end of vConvertListCharacter */

/*
 * eGetNumType - get the level type from the given level number
 *
 * Returns the level type
 */
level_type_enum
eGetNumType(UCHAR ucNumLevel)
{
        switch (ucNumLevel) {
        case  1: case  2: case  3: case  4: case  5:
        case  6: case  7: case  8: case  9:
                return level_type_outline;
        case 10:
                return level_type_numbering;
        case 11:
                return level_type_sequence;
        case 12:
                return level_type_pause;
        default:
                return level_type_none;
        }
} /* end of eGetNumType */

/*
 * vCorrectStyleValues - correct style values that Antiword can't use
 */
void
vCorrectStyleValues(style_block_type *pStyleBlock)
{
        if (pStyleBlock->usBeforeIndent > 0x7fff) {
                pStyleBlock->usBeforeIndent = 0;
        } else if (pStyleBlock->usBeforeIndent > 2160) {
                /* 2160 twips = 1.5 inches or 38.1 mm */
                DBG_DEC(pStyleBlock->usBeforeIndent);
                pStyleBlock->usBeforeIndent = 2160;
        }
        if (pStyleBlock->usIstd >= 1 &&
            pStyleBlock->usIstd <= 9 &&
            pStyleBlock->usBeforeIndent < HEADING_GAP) {
                NO_DBG_DEC(pStyleBlock->usBeforeIndent);
                pStyleBlock->usBeforeIndent = HEADING_GAP;
        }

        if (pStyleBlock->usAfterIndent > 0x7fff) {
                pStyleBlock->usAfterIndent = 0;
        } else if (pStyleBlock->usAfterIndent > 2160) {
                /* 2160 twips = 1.5 inches or 38.1 mm */
                DBG_DEC(pStyleBlock->usAfterIndent);
                pStyleBlock->usAfterIndent = 2160;
        }
        if (pStyleBlock->usIstd >= 1 &&
            pStyleBlock->usIstd <= 9 &&
            pStyleBlock->usAfterIndent < HEADING_GAP) {
                NO_DBG_DEC(pStyleBlock->usAfterIndent);
                pStyleBlock->usAfterIndent = HEADING_GAP;
        }

        if (pStyleBlock->sLeftIndent < 0) {
                pStyleBlock->sLeftIndent = 0;
        }
        if (pStyleBlock->sRightIndent > 0) {
                pStyleBlock->sRightIndent = 0;
        }
        vConvertListCharacter(pStyleBlock->ucNFC,
                        pStyleBlock->usListChar,
                        pStyleBlock->szListChar);
} /* end of vCorrectStyleValues */

/*
 * vAdd2StyleInfoList - Add an element to the Style Information List
 */
void
vAdd2StyleInfoList(const style_block_type *pStyleBlock)
{
        style_mem_type  *pListMember;

        fail(pStyleBlock == NULL);

        NO_DBG_MSG("bAdd2StyleInfoList");

        if (pStyleBlock->ulFileOffset == FC_INVALID) {
                NO_DBG_DEC(pStyleBlock->usIstd);
                return;
        }

        NO_DBG_HEX(pStyleBlock->ulFileOffset);
        NO_DBG_DEC_C(pStyleBlock->sLeftIndent != 0,
                                        pStyleBlock->sLeftIndent);
        NO_DBG_DEC_C(pStyleBlock->sRightIndent != 0,
                                        pStyleBlock->sRightIndent);
        NO_DBG_DEC_C(pStyleBlock->bNumPause, pStyleBlock->bNumPause);
        NO_DBG_DEC_C(pStyleBlock->usIstd != 0, pStyleBlock->usIstd);
        NO_DBG_DEC_C(pStyleBlock->usStartAt != 1, pStyleBlock->usStartAt);
        NO_DBG_DEC_C(pStyleBlock->usAfterIndent != 0,
                                        pStyleBlock->usAfterIndent);
        NO_DBG_DEC_C(pStyleBlock->ucAlignment != 0, pStyleBlock->ucAlignment);
        NO_DBG_DEC(pStyleBlock->ucNFC);
        NO_DBG_HEX(pStyleBlock->usListChar);

        if (pStyleLast != NULL &&
            pStyleLast->tInfo.ulFileOffset == pStyleBlock->ulFileOffset) {
                /*
                 * If two consecutive styles share the same
                 * offset, remember only the last style
                 */
                fail(pStyleLast->pNext != NULL);
                pStyleLast->tInfo = *pStyleBlock;
                /* Correct the values where needed */
                vCorrectStyleValues(&pStyleLast->tInfo);
                return;
        }

        /* Create list member */
        pListMember = xmalloc(sizeof(style_mem_type));
        /* Fill the list member */
        pListMember->tInfo = *pStyleBlock;
        pListMember->pNext = NULL;
        /* Add the sequence number */
        pListMember->ulSequenceNumber =
                        ulGetSeqNumber(pListMember->tInfo.ulFileOffset);
        /* Correct the values where needed */
        vCorrectStyleValues(&pListMember->tInfo);
        /* Add the new member to the list */
        if (pAnchor == NULL) {
                pAnchor = pListMember;
                /* For efficiency */
                pMidPtr = pAnchor;
                bMoveMidPtr = FALSE;
                bInSequence = TRUE;
        } else {
                fail(pStyleLast == NULL);
                pStyleLast->pNext = pListMember;
                /* For efficiency */
                if (bMoveMidPtr) {
                        pMidPtr = pMidPtr->pNext;
                        bMoveMidPtr = FALSE;
                } else {
                        bMoveMidPtr = TRUE;
                }
                if (bInSequence) {
                        bInSequence = pListMember->ulSequenceNumber >
                                        pStyleLast->ulSequenceNumber;
                }
        }
        pStyleLast = pListMember;
} /* end of vAdd2StyleInfoList */

/*
 * Get the record that follows the given recored in the Style Information List
 */
const style_block_type *
pGetNextStyleInfoListItem(const style_block_type *pCurr)
{
        const style_mem_type    *pRecord;
        size_t  tOffset;

        if (pCurr == NULL) {
                if (pAnchor == NULL) {
                        /* There are no records */
                        return NULL;
                }
                /* The first record is the only one without a predecessor */
                return &pAnchor->tInfo;
        }
        tOffset = offsetof(style_mem_type, tInfo);
        /* Many casts to prevent alignment warnings */
        pRecord = (style_mem_type *)(void *)((char *)pCurr - tOffset);
        fail(pCurr != &pRecord->tInfo);
        if (pRecord->pNext == NULL) {
                /* The last record has no successor */
                return NULL;
        }
        return &pRecord->pNext->tInfo;
} /* end of pGetNextStyleInfoListItem */

/*
 * Get the next text style
 */
const style_block_type *
pGetNextTextStyle(const style_block_type *pCurr)
{
        const style_block_type  *pRecord;

        pRecord = pCurr;
        do {
                pRecord = pGetNextStyleInfoListItem(pRecord);
        } while (pRecord != NULL &&
                 (pRecord->eListID == hdrftr_list ||
                  pRecord->eListID == macro_list ||
                  pRecord->eListID == annotation_list));
        return pRecord;
} /* end of pGetNextTextStyle */

/*
 * usGetIstd - get the istd that belongs to the given file offset
 */
USHORT
usGetIstd(ULONG ulFileOffset)
{
        const style_mem_type    *pCurr, *pBest, *pStart;
        ULONG   ulSeq, ulBest;

        ulSeq = ulGetSeqNumber(ulFileOffset);
        if (ulSeq == FC_INVALID) {
                return ISTD_NORMAL;
        }
        NO_DBG_HEX(ulFileOffset);
        NO_DBG_DEC(ulSeq);

        if (bInSequence &&
            pMidPtr != NULL &&
            ulSeq > pMidPtr->ulSequenceNumber) {
                /* The istd is in the second half of the chained list */
                pStart = pMidPtr;
        } else {
                pStart = pAnchor;
        }

        pBest = NULL;
        ulBest = 0;
        for (pCurr = pStart; pCurr != NULL; pCurr = pCurr->pNext) {
                if (pCurr->ulSequenceNumber != FC_INVALID &&
                    (pBest == NULL || pCurr->ulSequenceNumber > ulBest) &&
                    pCurr->ulSequenceNumber <= ulSeq) {
                        pBest = pCurr;
                        ulBest = pCurr->ulSequenceNumber;
                }
                if (bInSequence && pCurr->ulSequenceNumber > ulSeq) {
                        break;
                }
        }
        NO_DBG_DEC(ulBest);

        if (pBest == NULL) {
                return ISTD_NORMAL;
        }

        NO_DBG_DEC(pBest->tInfo.usIstd);
        return pBest->tInfo.usIstd;
} /* end of usGetIstd */

/*
 * bStyleImpliesList - does style info implies being part of a list
 *
 * Decide whether the style information implies that the given paragraph is
 * part of a list
 *
 * Returns TRUE when the paragraph is part of a list, otherwise FALSE
 */
BOOL
bStyleImpliesList(const style_block_type *pStyle, int iWordVersion)
{
        fail(pStyle == NULL);
        fail(iWordVersion < 0);

        if (pStyle->usIstd >= 1 && pStyle->usIstd <= 9) {
                /* These are heading levels */
                return FALSE;
        }
        if (iWordVersion < 8) {
                /* Check for old style lists */
                return pStyle->ucNumLevel != 0;
        }
        /* Check for new style lists */
        return pStyle->usListIndex != 0;
} /* end of bStyleImpliesList */