Subversion Repositories planix.SVN

Rev

Rev 2 | Details | Compare with Previous | Last modification | View Log | RSS feed

Rev Author Line No. Line
2 - 1
/*
2
 * wordwin.c
3
 * Copyright (C) 2002-2005 A.J. van Os; Released under GPL
4
 *
5
 * Description:
6
 * Deal with the WIN internals of a MS Word file
7
 */
8
 
9
#include "antiword.h"
10
 
11
 
12
/*
13
 * bGetDocumentText - make a list of the text blocks of a Word document
14
 *
15
 * Return TRUE when succesful, otherwise FALSE
16
 */
17
static BOOL
18
bGetDocumentText(FILE *pFile, const UCHAR *aucHeader)
19
{
20
	text_block_type	tTextBlock;
21
	ULONG	ulBeginOfText;
22
	ULONG	ulTextLen, ulFootnoteLen;
23
	ULONG	ulHdrFtrLen, ulMacroLen, ulAnnotationLen;
24
	UINT	uiQuickSaves;
25
	USHORT	usDocStatus;
26
	BOOL	bTemplate, bFastSaved, bEncrypted, bSuccess;
27
 
28
	fail(pFile == NULL);
29
	fail(aucHeader == NULL);
30
 
31
	DBG_MSG("bGetDocumentText");
32
 
33
	/* Get the status flags from the header */
34
	usDocStatus = usGetWord(0x0a, aucHeader);
35
	DBG_HEX(usDocStatus);
36
	bTemplate = (usDocStatus & BIT(0)) != 0;
37
	DBG_MSG_C(bTemplate, "This document is a Template");
38
	bFastSaved = (usDocStatus & BIT(2)) != 0;
39
	uiQuickSaves = (UINT)(usDocStatus & 0x00f0) >> 4;
40
	DBG_MSG_C(bFastSaved, "This document is Fast Saved");
41
	DBG_DEC_C(bFastSaved, uiQuickSaves);
42
	if (bFastSaved) {
43
		werr(0, "Word2: fast saved documents are not supported yet");
44
		return FALSE;
45
	}
46
	bEncrypted = (usDocStatus & BIT(8)) != 0;
47
	if (bEncrypted) {
48
		werr(0, "Encrypted documents are not supported");
49
		return FALSE;
50
	}
51
 
52
	/* Get length information */
53
	ulBeginOfText = ulGetLong(0x18, aucHeader);
54
	DBG_HEX(ulBeginOfText);
55
	ulTextLen = ulGetLong(0x34, aucHeader);
56
	ulFootnoteLen = ulGetLong(0x38, aucHeader);
57
	ulHdrFtrLen = ulGetLong(0x3c, aucHeader);
58
	ulMacroLen = ulGetLong(0x40, aucHeader);
59
	ulAnnotationLen = ulGetLong(0x44, aucHeader);
60
	DBG_DEC(ulTextLen);
61
	DBG_DEC(ulFootnoteLen);
62
	DBG_DEC(ulHdrFtrLen);
63
	DBG_DEC(ulMacroLen);
64
	DBG_DEC(ulAnnotationLen);
65
	if (bFastSaved) {
66
		bSuccess = FALSE;
67
	} else {
68
		tTextBlock.ulFileOffset = ulBeginOfText;
69
		tTextBlock.ulCharPos = ulBeginOfText;
70
		tTextBlock.ulLength = ulTextLen +
71
				ulFootnoteLen +
72
				ulHdrFtrLen + ulMacroLen + ulAnnotationLen;
73
		tTextBlock.bUsesUnicode = FALSE;
74
		tTextBlock.usPropMod = IGNORE_PROPMOD;
75
		bSuccess = bAdd2TextBlockList(&tTextBlock);
76
		DBG_HEX_C(!bSuccess, tTextBlock.ulFileOffset);
77
		DBG_HEX_C(!bSuccess, tTextBlock.ulCharPos);
78
		DBG_DEC_C(!bSuccess, tTextBlock.ulLength);
79
		DBG_DEC_C(!bSuccess, tTextBlock.bUsesUnicode);
80
		DBG_DEC_C(!bSuccess, tTextBlock.usPropMod);
81
	}
82
 
83
	if (bSuccess) {
84
		vSplitBlockList(pFile,
85
				ulTextLen,
86
				ulFootnoteLen,
87
				ulHdrFtrLen,
88
				ulMacroLen,
89
				ulAnnotationLen,
90
				0,
91
				0,
92
				0,
93
				FALSE);
94
	} else {
95
		vDestroyTextBlockList();
96
		werr(0, "I can't find the text of this document");
97
	}
98
	return bSuccess;
99
} /* end of bGetDocumentText */
100
 
101
/*
102
 * vGetDocumentData - make a list of the data blocks of a Word document
103
 */
104
static void
105
vGetDocumentData(FILE *pFile, const UCHAR *aucHeader)
106
{
107
	data_block_type	tDataBlock;
108
	options_type	tOptions;
109
	ULONG	ulEndOfText, ulBeginCharInfo;
110
	BOOL	bFastSaved, bHasImages, bSuccess;
111
	USHORT	usDocStatus;
112
 
113
	/* Get the options */
114
	vGetOptions(&tOptions);
115
 
116
	/* Get the status flags from the header */
117
	usDocStatus = usGetWord(0x0a, aucHeader);
118
	DBG_HEX(usDocStatus);
119
	bFastSaved = (usDocStatus & BIT(2)) != 0;
120
	bHasImages = (usDocStatus & BIT(3)) != 0;
121
 
122
	if (!bHasImages ||
123
	    tOptions.eConversionType == conversion_text ||
124
	    tOptions.eConversionType == conversion_fmt_text ||
125
	    tOptions.eConversionType == conversion_xml ||
126
	    tOptions.eImageLevel == level_no_images) {
127
		/*
128
		 * No images in the document or text-only output or
129
		 * no images wanted, so no data blocks will be needed
130
		 */
131
		vDestroyDataBlockList();
132
		return;
133
	}
134
 
135
	if (bFastSaved) {
136
		bSuccess = FALSE;
137
	} else {
138
		/* This datablock is too big, but it contains all images */
139
		ulEndOfText = ulGetLong(0x1c, aucHeader);
140
		DBG_HEX(ulEndOfText);
141
		ulBeginCharInfo = ulGetLong(0xa0, aucHeader);
142
		DBG_HEX(ulBeginCharInfo);
143
		if (ulBeginCharInfo > ulEndOfText) {
144
			tDataBlock.ulFileOffset = ulEndOfText;
145
			tDataBlock.ulDataPos = ulEndOfText;
146
			tDataBlock.ulLength = ulBeginCharInfo - ulEndOfText;
147
			bSuccess = bAdd2DataBlockList(&tDataBlock);
148
			DBG_HEX_C(!bSuccess, tDataBlock.ulFileOffset);
149
			DBG_HEX_C(!bSuccess, tDataBlock.ulDataPos);
150
			DBG_DEC_C(!bSuccess, tDataBlock.ulLength);
151
		} else {
152
			bSuccess = ulBeginCharInfo == ulEndOfText;
153
		}
154
	}
155
 
156
	if (!bSuccess) {
157
		vDestroyDataBlockList();
158
		werr(0, "I can't find the data of this document");
159
	}
160
} /* end of vGetDocumentData */
161
 
162
/*
163
 * iInitDocumentWIN - initialize an WIN document
164
 *
165
 * Returns the version of Word that made the document or -1
166
 */
167
int
168
iInitDocumentWIN(FILE *pFile, long lFilesize)
169
{
170
	int	iWordVersion;
171
	BOOL	bSuccess;
172
	USHORT	usIdent;
173
	UCHAR	aucHeader[384];
174
 
175
	fail(pFile == NULL);
176
 
177
	if (lFilesize < 384) {
178
		return -1;
179
	}
180
 
181
	/* Read the headerblock */
182
	if (!bReadBytes(aucHeader, 384, 0x00, pFile)) {
183
		return -1;
184
	}
185
	/* Get the "magic number" from the header */
186
	usIdent = usGetWord(0x00, aucHeader);
187
	DBG_HEX(usIdent);
188
	fail(usIdent != 0xa59b &&	/* WinWord 1.x */
189
		usIdent != 0xa5db);	/* WinWord 2.0 */
190
	iWordVersion = iGetVersionNumber(aucHeader);
191
	if (iWordVersion != 1 && iWordVersion != 2) {
192
		werr(0, "This file is not from ''Win Word 1 or 2'.");
193
		return -1;
194
	}
195
	bSuccess = bGetDocumentText(pFile, aucHeader);
196
	if (bSuccess) {
197
		vGetDocumentData(pFile, aucHeader);
198
		vGetPropertyInfo(pFile, NULL,
199
				NULL, 0, NULL, 0,
200
				aucHeader, iWordVersion);
201
		vSetDefaultTabWidth(pFile, NULL,
202
				NULL, 0, NULL, 0,
203
				aucHeader, iWordVersion);
204
		vGetNotesInfo(pFile, NULL,
205
				NULL, 0, NULL, 0,
206
				aucHeader, iWordVersion);
207
	}
208
	return bSuccess ? iWordVersion : -1;
209
} /* end of iInitDocumentWIN */