2 |
- |
1 |
/*
|
|
|
2 |
* wordwin.c
|
|
|
3 |
* Copyright (C) 2002-2005 A.J. van Os; Released under GPL
|
|
|
4 |
*
|
|
|
5 |
* Description:
|
|
|
6 |
* Deal with the WIN internals of a MS Word file
|
|
|
7 |
*/
|
|
|
8 |
|
|
|
9 |
#include "antiword.h"
|
|
|
10 |
|
|
|
11 |
|
|
|
12 |
/*
|
|
|
13 |
* bGetDocumentText - make a list of the text blocks of a Word document
|
|
|
14 |
*
|
|
|
15 |
* Return TRUE when succesful, otherwise FALSE
|
|
|
16 |
*/
|
|
|
17 |
static BOOL
|
|
|
18 |
bGetDocumentText(FILE *pFile, const UCHAR *aucHeader)
|
|
|
19 |
{
|
|
|
20 |
text_block_type tTextBlock;
|
|
|
21 |
ULONG ulBeginOfText;
|
|
|
22 |
ULONG ulTextLen, ulFootnoteLen;
|
|
|
23 |
ULONG ulHdrFtrLen, ulMacroLen, ulAnnotationLen;
|
|
|
24 |
UINT uiQuickSaves;
|
|
|
25 |
USHORT usDocStatus;
|
|
|
26 |
BOOL bTemplate, bFastSaved, bEncrypted, bSuccess;
|
|
|
27 |
|
|
|
28 |
fail(pFile == NULL);
|
|
|
29 |
fail(aucHeader == NULL);
|
|
|
30 |
|
|
|
31 |
DBG_MSG("bGetDocumentText");
|
|
|
32 |
|
|
|
33 |
/* Get the status flags from the header */
|
|
|
34 |
usDocStatus = usGetWord(0x0a, aucHeader);
|
|
|
35 |
DBG_HEX(usDocStatus);
|
|
|
36 |
bTemplate = (usDocStatus & BIT(0)) != 0;
|
|
|
37 |
DBG_MSG_C(bTemplate, "This document is a Template");
|
|
|
38 |
bFastSaved = (usDocStatus & BIT(2)) != 0;
|
|
|
39 |
uiQuickSaves = (UINT)(usDocStatus & 0x00f0) >> 4;
|
|
|
40 |
DBG_MSG_C(bFastSaved, "This document is Fast Saved");
|
|
|
41 |
DBG_DEC_C(bFastSaved, uiQuickSaves);
|
|
|
42 |
if (bFastSaved) {
|
|
|
43 |
werr(0, "Word2: fast saved documents are not supported yet");
|
|
|
44 |
return FALSE;
|
|
|
45 |
}
|
|
|
46 |
bEncrypted = (usDocStatus & BIT(8)) != 0;
|
|
|
47 |
if (bEncrypted) {
|
|
|
48 |
werr(0, "Encrypted documents are not supported");
|
|
|
49 |
return FALSE;
|
|
|
50 |
}
|
|
|
51 |
|
|
|
52 |
/* Get length information */
|
|
|
53 |
ulBeginOfText = ulGetLong(0x18, aucHeader);
|
|
|
54 |
DBG_HEX(ulBeginOfText);
|
|
|
55 |
ulTextLen = ulGetLong(0x34, aucHeader);
|
|
|
56 |
ulFootnoteLen = ulGetLong(0x38, aucHeader);
|
|
|
57 |
ulHdrFtrLen = ulGetLong(0x3c, aucHeader);
|
|
|
58 |
ulMacroLen = ulGetLong(0x40, aucHeader);
|
|
|
59 |
ulAnnotationLen = ulGetLong(0x44, aucHeader);
|
|
|
60 |
DBG_DEC(ulTextLen);
|
|
|
61 |
DBG_DEC(ulFootnoteLen);
|
|
|
62 |
DBG_DEC(ulHdrFtrLen);
|
|
|
63 |
DBG_DEC(ulMacroLen);
|
|
|
64 |
DBG_DEC(ulAnnotationLen);
|
|
|
65 |
if (bFastSaved) {
|
|
|
66 |
bSuccess = FALSE;
|
|
|
67 |
} else {
|
|
|
68 |
tTextBlock.ulFileOffset = ulBeginOfText;
|
|
|
69 |
tTextBlock.ulCharPos = ulBeginOfText;
|
|
|
70 |
tTextBlock.ulLength = ulTextLen +
|
|
|
71 |
ulFootnoteLen +
|
|
|
72 |
ulHdrFtrLen + ulMacroLen + ulAnnotationLen;
|
|
|
73 |
tTextBlock.bUsesUnicode = FALSE;
|
|
|
74 |
tTextBlock.usPropMod = IGNORE_PROPMOD;
|
|
|
75 |
bSuccess = bAdd2TextBlockList(&tTextBlock);
|
|
|
76 |
DBG_HEX_C(!bSuccess, tTextBlock.ulFileOffset);
|
|
|
77 |
DBG_HEX_C(!bSuccess, tTextBlock.ulCharPos);
|
|
|
78 |
DBG_DEC_C(!bSuccess, tTextBlock.ulLength);
|
|
|
79 |
DBG_DEC_C(!bSuccess, tTextBlock.bUsesUnicode);
|
|
|
80 |
DBG_DEC_C(!bSuccess, tTextBlock.usPropMod);
|
|
|
81 |
}
|
|
|
82 |
|
|
|
83 |
if (bSuccess) {
|
|
|
84 |
vSplitBlockList(pFile,
|
|
|
85 |
ulTextLen,
|
|
|
86 |
ulFootnoteLen,
|
|
|
87 |
ulHdrFtrLen,
|
|
|
88 |
ulMacroLen,
|
|
|
89 |
ulAnnotationLen,
|
|
|
90 |
0,
|
|
|
91 |
0,
|
|
|
92 |
0,
|
|
|
93 |
FALSE);
|
|
|
94 |
} else {
|
|
|
95 |
vDestroyTextBlockList();
|
|
|
96 |
werr(0, "I can't find the text of this document");
|
|
|
97 |
}
|
|
|
98 |
return bSuccess;
|
|
|
99 |
} /* end of bGetDocumentText */
|
|
|
100 |
|
|
|
101 |
/*
|
|
|
102 |
* vGetDocumentData - make a list of the data blocks of a Word document
|
|
|
103 |
*/
|
|
|
104 |
static void
|
|
|
105 |
vGetDocumentData(FILE *pFile, const UCHAR *aucHeader)
|
|
|
106 |
{
|
|
|
107 |
data_block_type tDataBlock;
|
|
|
108 |
options_type tOptions;
|
|
|
109 |
ULONG ulEndOfText, ulBeginCharInfo;
|
|
|
110 |
BOOL bFastSaved, bHasImages, bSuccess;
|
|
|
111 |
USHORT usDocStatus;
|
|
|
112 |
|
|
|
113 |
/* Get the options */
|
|
|
114 |
vGetOptions(&tOptions);
|
|
|
115 |
|
|
|
116 |
/* Get the status flags from the header */
|
|
|
117 |
usDocStatus = usGetWord(0x0a, aucHeader);
|
|
|
118 |
DBG_HEX(usDocStatus);
|
|
|
119 |
bFastSaved = (usDocStatus & BIT(2)) != 0;
|
|
|
120 |
bHasImages = (usDocStatus & BIT(3)) != 0;
|
|
|
121 |
|
|
|
122 |
if (!bHasImages ||
|
|
|
123 |
tOptions.eConversionType == conversion_text ||
|
|
|
124 |
tOptions.eConversionType == conversion_fmt_text ||
|
|
|
125 |
tOptions.eConversionType == conversion_xml ||
|
|
|
126 |
tOptions.eImageLevel == level_no_images) {
|
|
|
127 |
/*
|
|
|
128 |
* No images in the document or text-only output or
|
|
|
129 |
* no images wanted, so no data blocks will be needed
|
|
|
130 |
*/
|
|
|
131 |
vDestroyDataBlockList();
|
|
|
132 |
return;
|
|
|
133 |
}
|
|
|
134 |
|
|
|
135 |
if (bFastSaved) {
|
|
|
136 |
bSuccess = FALSE;
|
|
|
137 |
} else {
|
|
|
138 |
/* This datablock is too big, but it contains all images */
|
|
|
139 |
ulEndOfText = ulGetLong(0x1c, aucHeader);
|
|
|
140 |
DBG_HEX(ulEndOfText);
|
|
|
141 |
ulBeginCharInfo = ulGetLong(0xa0, aucHeader);
|
|
|
142 |
DBG_HEX(ulBeginCharInfo);
|
|
|
143 |
if (ulBeginCharInfo > ulEndOfText) {
|
|
|
144 |
tDataBlock.ulFileOffset = ulEndOfText;
|
|
|
145 |
tDataBlock.ulDataPos = ulEndOfText;
|
|
|
146 |
tDataBlock.ulLength = ulBeginCharInfo - ulEndOfText;
|
|
|
147 |
bSuccess = bAdd2DataBlockList(&tDataBlock);
|
|
|
148 |
DBG_HEX_C(!bSuccess, tDataBlock.ulFileOffset);
|
|
|
149 |
DBG_HEX_C(!bSuccess, tDataBlock.ulDataPos);
|
|
|
150 |
DBG_DEC_C(!bSuccess, tDataBlock.ulLength);
|
|
|
151 |
} else {
|
|
|
152 |
bSuccess = ulBeginCharInfo == ulEndOfText;
|
|
|
153 |
}
|
|
|
154 |
}
|
|
|
155 |
|
|
|
156 |
if (!bSuccess) {
|
|
|
157 |
vDestroyDataBlockList();
|
|
|
158 |
werr(0, "I can't find the data of this document");
|
|
|
159 |
}
|
|
|
160 |
} /* end of vGetDocumentData */
|
|
|
161 |
|
|
|
162 |
/*
|
|
|
163 |
* iInitDocumentWIN - initialize an WIN document
|
|
|
164 |
*
|
|
|
165 |
* Returns the version of Word that made the document or -1
|
|
|
166 |
*/
|
|
|
167 |
int
|
|
|
168 |
iInitDocumentWIN(FILE *pFile, long lFilesize)
|
|
|
169 |
{
|
|
|
170 |
int iWordVersion;
|
|
|
171 |
BOOL bSuccess;
|
|
|
172 |
USHORT usIdent;
|
|
|
173 |
UCHAR aucHeader[384];
|
|
|
174 |
|
|
|
175 |
fail(pFile == NULL);
|
|
|
176 |
|
|
|
177 |
if (lFilesize < 384) {
|
|
|
178 |
return -1;
|
|
|
179 |
}
|
|
|
180 |
|
|
|
181 |
/* Read the headerblock */
|
|
|
182 |
if (!bReadBytes(aucHeader, 384, 0x00, pFile)) {
|
|
|
183 |
return -1;
|
|
|
184 |
}
|
|
|
185 |
/* Get the "magic number" from the header */
|
|
|
186 |
usIdent = usGetWord(0x00, aucHeader);
|
|
|
187 |
DBG_HEX(usIdent);
|
|
|
188 |
fail(usIdent != 0xa59b && /* WinWord 1.x */
|
|
|
189 |
usIdent != 0xa5db); /* WinWord 2.0 */
|
|
|
190 |
iWordVersion = iGetVersionNumber(aucHeader);
|
|
|
191 |
if (iWordVersion != 1 && iWordVersion != 2) {
|
|
|
192 |
werr(0, "This file is not from ''Win Word 1 or 2'.");
|
|
|
193 |
return -1;
|
|
|
194 |
}
|
|
|
195 |
bSuccess = bGetDocumentText(pFile, aucHeader);
|
|
|
196 |
if (bSuccess) {
|
|
|
197 |
vGetDocumentData(pFile, aucHeader);
|
|
|
198 |
vGetPropertyInfo(pFile, NULL,
|
|
|
199 |
NULL, 0, NULL, 0,
|
|
|
200 |
aucHeader, iWordVersion);
|
|
|
201 |
vSetDefaultTabWidth(pFile, NULL,
|
|
|
202 |
NULL, 0, NULL, 0,
|
|
|
203 |
aucHeader, iWordVersion);
|
|
|
204 |
vGetNotesInfo(pFile, NULL,
|
|
|
205 |
NULL, 0, NULL, 0,
|
|
|
206 |
aucHeader, iWordVersion);
|
|
|
207 |
}
|
|
|
208 |
return bSuccess ? iWordVersion : -1;
|
|
|
209 |
} /* end of iInitDocumentWIN */
|