Subversion Repositories planix.SVN

Rev

Rev 2 | Details | Compare with Previous | Last modification | View Log | RSS feed

Rev Author Line No. Line
2 - 1
/*
2
 * wordole.c
3
 * Copyright (C) 1998-2004 A.J. van Os; Released under GPL
4
 *
5
 * Description:
6
 * Deal with the OLE internals of a MS Word file
7
 */
8
 
9
#include <string.h>
10
#include "antiword.h"
11
 
12
/* Private type for Property Set Storage entries */
13
typedef struct pps_entry_tag {
14
	ULONG	ulNext;
15
	ULONG	ulPrevious;
16
	ULONG	ulDir;
17
	ULONG	ulSB;
18
	ULONG	ulSize;
19
	int	iLevel;
20
	char	szName[32];
21
	UCHAR	ucType;
22
} pps_entry_type;
23
 
24
/* Show that a PPS number or index should not be used */
25
#define PPS_NUMBER_INVALID	0xffffffffUL
26
 
27
 
28
/* Macro to make sure all such statements will be identical */
29
#define FREE_ALL()		\
30
	do {\
31
		vDestroySmallBlockList();\
32
		aulRootList = xfree(aulRootList);\
33
		aulSbdList = xfree(aulSbdList);\
34
		aulBbdList = xfree(aulBbdList);\
35
		aulSBD = xfree(aulSBD);\
36
		aulBBD = xfree(aulBBD);\
37
	} while(0)
38
 
39
 
40
/*
41
 * ulReadLong - read four bytes from the given file and offset
42
 */
43
static ULONG
44
ulReadLong(FILE *pFile, ULONG ulOffset)
45
{
46
	UCHAR	aucBytes[4];
47
 
48
	fail(pFile == NULL);
49
 
50
	if (!bReadBytes(aucBytes, 4, ulOffset, pFile)) {
51
		werr(1, "Read long 0x%lx not possible", ulOffset);
52
	}
53
	return ulGetLong(0, aucBytes);
54
} /* end of ulReadLong */
55
 
56
/*
57
 * vName2String - turn the name into a proper string.
58
 */
59
static void
60
vName2String(char *szName, const UCHAR *aucBytes, size_t tNameSize)
61
{
62
	char	*pcChar;
63
	size_t	tIndex;
64
 
65
	fail(aucBytes == NULL || szName == NULL);
66
 
67
	if (tNameSize < 2) {
68
		szName[0] = '\0';
69
		return;
70
	}
71
	for (tIndex = 0, pcChar = szName;
72
	     tIndex < 2 * tNameSize;
73
	     tIndex += 2, pcChar++) {
74
		*pcChar = (char)aucBytes[tIndex];
75
	}
76
	szName[tNameSize - 1] = '\0';
77
} /* end of vName2String */
78
 
79
/*
80
 * tReadBlockIndices - read the Big/Small Block Depot indices
81
 *
82
 * Returns the number of indices read
83
 */
84
static size_t
85
tReadBlockIndices(FILE *pFile, ULONG *aulBlockDepot,
86
	size_t tMaxRec, ULONG ulOffset)
87
{
88
	size_t	tDone;
89
	int	iIndex;
90
	UCHAR	aucBytes[BIG_BLOCK_SIZE];
91
 
92
	fail(pFile == NULL || aulBlockDepot == NULL);
93
	fail(tMaxRec == 0);
94
 
95
	/* Read a big block with BBD or SBD indices */
96
	if (!bReadBytes(aucBytes, BIG_BLOCK_SIZE, ulOffset, pFile)) {
97
		werr(0, "Reading big block from 0x%lx is not possible",
98
			ulOffset);
99
		return 0;
100
	}
101
	/* Split the big block into indices, an index is four bytes */
102
	tDone = min(tMaxRec, (size_t)BIG_BLOCK_SIZE / 4);
103
	for (iIndex = 0; iIndex < (int)tDone; iIndex++) {
104
		aulBlockDepot[iIndex] = ulGetLong(4 * iIndex, aucBytes);
105
		NO_DBG_DEC(aulBlockDepot[iIndex]);
106
	}
107
	return tDone;
108
} /* end of tReadBlockIndices */
109
 
110
/*
111
 * bGetBBD - get the Big Block Depot indices from the index-blocks
112
 */
113
static BOOL
114
bGetBBD(FILE *pFile, const ULONG *aulDepot, size_t tDepotLen,
115
	ULONG *aulBBD, size_t tBBDLen)
116
{
117
	ULONG	ulBegin;
118
	size_t	tToGo, tDone;
119
	int	iIndex;
120
 
121
	fail(pFile == NULL || aulDepot == NULL || aulBBD == NULL);
122
 
123
	DBG_MSG("bGetBBD");
124
 
125
	tToGo = tBBDLen;
126
	for (iIndex = 0; iIndex < (int)tDepotLen && tToGo != 0; iIndex++) {
127
		ulBegin = (aulDepot[iIndex] + 1) * BIG_BLOCK_SIZE;
128
		NO_DBG_HEX(ulBegin);
129
		tDone = tReadBlockIndices(pFile, aulBBD, tToGo, ulBegin);
130
		fail(tDone > tToGo);
131
		if (tDone == 0) {
132
			return FALSE;
133
		}
134
		aulBBD += tDone;
135
		tToGo -= tDone;
136
	}
137
	return tToGo == 0;
138
} /* end of bGetBBD */
139
 
140
/*
141
 * bGetSBD - get the Small Block Depot indices from the index-blocks
142
 */
143
static BOOL
144
bGetSBD(FILE *pFile, const ULONG *aulDepot, size_t tDepotLen,
145
	ULONG *aulSBD, size_t tSBDLen)
146
{
147
	ULONG	ulBegin;
148
	size_t	tToGo, tDone;
149
	int	iIndex;
150
 
151
	fail(pFile == NULL || aulDepot == NULL || aulSBD == NULL);
152
 
153
	DBG_MSG("bGetSBD");
154
 
155
	tToGo = tSBDLen;
156
	for (iIndex = 0; iIndex < (int)tDepotLen && tToGo != 0; iIndex++) {
157
		fail(aulDepot[iIndex] >= ULONG_MAX / BIG_BLOCK_SIZE);
158
		ulBegin = (aulDepot[iIndex] + 1) * BIG_BLOCK_SIZE;
159
		NO_DBG_HEX(ulBegin);
160
		tDone = tReadBlockIndices(pFile, aulSBD, tToGo, ulBegin);
161
		fail(tDone > tToGo);
162
		if (tDone == 0) {
163
			return FALSE;
164
		}
165
		aulSBD += tDone;
166
		tToGo -= tDone;
167
	}
168
	return tToGo == 0;
169
} /* end of bGetSBD */
170
 
171
/*
172
 * vComputePPSlevels - compute the levels of the Property Set Storage entries
173
 */
174
static void
175
vComputePPSlevels(pps_entry_type *atPPSlist, pps_entry_type *pNode,
176
			int iLevel, int iRecursionLevel)
177
{
178
	fail(atPPSlist == NULL || pNode == NULL);
179
	fail(iLevel < 0 || iRecursionLevel < 0);
180
 
181
	if (iRecursionLevel > 25) {
182
		/* This removes the possibility of an infinite recursion */
183
		DBG_DEC(iRecursionLevel);
184
		return;
185
	}
186
	if (pNode->iLevel <= iLevel) {
187
		/* Avoid entering a loop */
188
		DBG_DEC(iLevel);
189
		DBG_DEC(pNode->iLevel);
190
		return;
191
	}
192
 
193
	pNode->iLevel = iLevel;
194
 
195
	if (pNode->ulDir != PPS_NUMBER_INVALID) {
196
		vComputePPSlevels(atPPSlist,
197
				&atPPSlist[pNode->ulDir],
198
				iLevel + 1,
199
				iRecursionLevel + 1);
200
	}
201
	if (pNode->ulNext != PPS_NUMBER_INVALID) {
202
		vComputePPSlevels(atPPSlist,
203
				&atPPSlist[pNode->ulNext],
204
				iLevel,
205
				iRecursionLevel + 1);
206
	}
207
	if (pNode->ulPrevious != PPS_NUMBER_INVALID) {
208
		vComputePPSlevels(atPPSlist,
209
				&atPPSlist[pNode->ulPrevious],
210
				iLevel,
211
				iRecursionLevel + 1);
212
	}
213
} /* end of vComputePPSlevels */
214
 
215
/*
216
 * bGetPPS - search the Property Set Storage for three sets
217
 *
218
 * Return TRUE if the WordDocument PPS is found
219
 */
220
static BOOL
221
bGetPPS(FILE *pFile,
222
	const ULONG *aulRootList, size_t tRootListLen, pps_info_type *pPPS)
223
{
224
	pps_entry_type	*atPPSlist;
225
	ULONG	ulBegin, ulOffset, ulTmp;
226
	size_t	tNbrOfPPS, tNameSize;
227
	int	iIndex, iStartBlock, iRootIndex;
228
	BOOL	bWord, bExcel;
229
	UCHAR	aucBytes[PROPERTY_SET_STORAGE_SIZE];
230
 
231
	fail(pFile == NULL || aulRootList == NULL || pPPS == NULL);
232
 
233
	DBG_MSG("bGetPPS");
234
 
235
	NO_DBG_DEC(tRootListLen);
236
 
237
	bWord = FALSE;
238
	bExcel = FALSE;
239
	(void)memset(pPPS, 0, sizeof(*pPPS));
240
 
241
	/* Read and store all the Property Set Storage entries */
242
 
243
	tNbrOfPPS = tRootListLen * BIG_BLOCK_SIZE / PROPERTY_SET_STORAGE_SIZE;
244
	atPPSlist = xcalloc(tNbrOfPPS, sizeof(pps_entry_type));
245
	iRootIndex = 0;
246
 
247
	for (iIndex = 0; iIndex < (int)tNbrOfPPS; iIndex++) {
248
		ulTmp = (ULONG)iIndex * PROPERTY_SET_STORAGE_SIZE;
249
		iStartBlock = (int)(ulTmp / BIG_BLOCK_SIZE);
250
		ulOffset = ulTmp % BIG_BLOCK_SIZE;
251
		ulBegin = (aulRootList[iStartBlock] + 1) * BIG_BLOCK_SIZE +
252
				ulOffset;
253
		NO_DBG_HEX(ulBegin);
254
		if (!bReadBytes(aucBytes, PROPERTY_SET_STORAGE_SIZE,
255
							ulBegin, pFile)) {
256
			werr(0, "Reading PPS %d is not possible", iIndex);
257
			atPPSlist = xfree(atPPSlist);
258
			return FALSE;
259
		}
260
		tNameSize = (size_t)usGetWord(0x40, aucBytes);
261
		tNameSize = (tNameSize + 1) / 2;
262
		vName2String(atPPSlist[iIndex].szName, aucBytes, tNameSize);
263
		atPPSlist[iIndex].ucType = ucGetByte(0x42, aucBytes);
264
		if (atPPSlist[iIndex].ucType == 5) {
265
			iRootIndex = iIndex;
266
		}
267
		atPPSlist[iIndex].ulPrevious = ulGetLong(0x44, aucBytes);
268
		atPPSlist[iIndex].ulNext = ulGetLong(0x48, aucBytes);
269
		atPPSlist[iIndex].ulDir = ulGetLong(0x4c, aucBytes);
270
		atPPSlist[iIndex].ulSB = ulGetLong(0x74, aucBytes);
271
		atPPSlist[iIndex].ulSize = ulGetLong(0x78, aucBytes);
272
		atPPSlist[iIndex].iLevel = INT_MAX;
273
		if ((atPPSlist[iIndex].ulPrevious >= (ULONG)tNbrOfPPS &&
274
		     atPPSlist[iIndex].ulPrevious != PPS_NUMBER_INVALID) ||
275
		    (atPPSlist[iIndex].ulNext >= (ULONG)tNbrOfPPS &&
276
		     atPPSlist[iIndex].ulNext != PPS_NUMBER_INVALID) ||
277
		    (atPPSlist[iIndex].ulDir >= (ULONG)tNbrOfPPS &&
278
		     atPPSlist[iIndex].ulDir != PPS_NUMBER_INVALID)) {
279
			DBG_DEC(iIndex);
280
			DBG_DEC(atPPSlist[iIndex].ulPrevious);
281
			DBG_DEC(atPPSlist[iIndex].ulNext);
282
			DBG_DEC(atPPSlist[iIndex].ulDir);
283
			DBG_DEC(tNbrOfPPS);
284
			werr(0, "The Property Set Storage is damaged");
285
			atPPSlist = xfree(atPPSlist);
286
			return FALSE;
287
		}
288
	}
289
 
290
#if 0 /* defined(DEBUG) */
291
	DBG_MSG("Before");
292
	for (iIndex = 0; iIndex < (int)tNbrOfPPS; iIndex++) {
293
		DBG_MSG(atPPSlist[iIndex].szName);
294
		DBG_HEX(atPPSlist[iIndex].ulDir);
295
		DBG_HEX(atPPSlist[iIndex].ulPrevious);
296
		DBG_HEX(atPPSlist[iIndex].ulNext);
297
		DBG_DEC(atPPSlist[iIndex].ulSB);
298
		DBG_HEX(atPPSlist[iIndex].ulSize);
299
		DBG_DEC(atPPSlist[iIndex].iLevel);
300
	}
301
#endif /* DEBUG */
302
 
303
	/* Add level information to each entry */
304
	vComputePPSlevels(atPPSlist, &atPPSlist[iRootIndex], 0, 0);
305
 
306
	/* Check the entries on level 1 for the required information */
307
	NO_DBG_MSG("After");
308
	for (iIndex = 0; iIndex < (int)tNbrOfPPS; iIndex++) {
309
#if 0 /* defined(DEBUG) */
310
		DBG_MSG(atPPSlist[iIndex].szName);
311
		DBG_HEX(atPPSlist[iIndex].ulDir);
312
		DBG_HEX(atPPSlist[iIndex].ulPrevious);
313
		DBG_HEX(atPPSlist[iIndex].ulNext);
314
		DBG_DEC(atPPSlist[iIndex].ulSB);
315
		DBG_HEX(atPPSlist[iIndex].ulSize);
316
		DBG_DEC(atPPSlist[iIndex].iLevel);
317
#endif /* DEBUG */
318
		if (atPPSlist[iIndex].iLevel != 1 ||
319
		    atPPSlist[iIndex].ucType != 2 ||
320
		    atPPSlist[iIndex].szName[0] == '\0' ||
321
		    atPPSlist[iIndex].ulSize == 0) {
322
			/* This entry can be ignored */
323
			continue;
324
		}
325
		if (pPPS->tWordDocument.ulSize == 0 &&
326
		    STREQ(atPPSlist[iIndex].szName, "WordDocument")) {
327
			pPPS->tWordDocument.ulSB = atPPSlist[iIndex].ulSB;
328
			pPPS->tWordDocument.ulSize = atPPSlist[iIndex].ulSize;
329
			bWord = TRUE;
330
		} else if (pPPS->tData.ulSize == 0 &&
331
			   STREQ(atPPSlist[iIndex].szName, "Data")) {
332
			pPPS->tData.ulSB = atPPSlist[iIndex].ulSB;
333
			pPPS->tData.ulSize = atPPSlist[iIndex].ulSize;
334
		} else if (pPPS->t0Table.ulSize == 0 &&
335
			   STREQ(atPPSlist[iIndex].szName, "0Table")) {
336
			pPPS->t0Table.ulSB = atPPSlist[iIndex].ulSB;
337
			pPPS->t0Table.ulSize = atPPSlist[iIndex].ulSize;
338
		} else if (pPPS->t1Table.ulSize == 0 &&
339
			   STREQ(atPPSlist[iIndex].szName, "1Table")) {
340
			pPPS->t1Table.ulSB = atPPSlist[iIndex].ulSB;
341
			pPPS->t1Table.ulSize = atPPSlist[iIndex].ulSize;
342
		} else if (pPPS->tSummaryInfo.ulSize == 0 &&
343
			   STREQ(atPPSlist[iIndex].szName,
344
						"\005SummaryInformation")) {
345
			pPPS->tSummaryInfo.ulSB = atPPSlist[iIndex].ulSB;
346
			pPPS->tSummaryInfo.ulSize = atPPSlist[iIndex].ulSize;
347
		} else if (pPPS->tDocSummaryInfo.ulSize == 0 &&
348
			   STREQ(atPPSlist[iIndex].szName,
349
					"\005DocumentSummaryInformation")) {
350
			pPPS->tDocSummaryInfo.ulSB = atPPSlist[iIndex].ulSB;
351
			pPPS->tDocSummaryInfo.ulSize = atPPSlist[iIndex].ulSize;
352
		} else if (STREQ(atPPSlist[iIndex].szName, "Book") ||
353
			   STREQ(atPPSlist[iIndex].szName, "Workbook")) {
354
			bExcel = TRUE;
355
		}
356
	}
357
 
358
	/* Free the space for the Property Set Storage entries */
359
	atPPSlist = xfree(atPPSlist);
360
 
361
	/* Draw your conclusions */
362
	if (bWord) {
363
		return TRUE;
364
	}
365
 
366
	if (bExcel) {
367
		werr(0, "Sorry, but this is an Excel spreadsheet");
368
	} else {
369
		werr(0, "This OLE file does not contain a Word document");
370
	}
371
	return FALSE;
372
} /* end of bGetPPS */
373
 
374
/*
375
 * vGetBbdList - make a list of the places to find big blocks
376
 */
377
static void
378
vGetBbdList(FILE *pFile, int iNbr, ULONG *aulBbdList, ULONG ulOffset)
379
{
380
	int	iIndex;
381
 
382
	fail(pFile == NULL);
383
	fail(iNbr > 127);
384
	fail(aulBbdList == NULL);
385
 
386
	NO_DBG_DEC(iNbr);
387
	for (iIndex = 0; iIndex < iNbr; iIndex++) {
388
                aulBbdList[iIndex] =
389
                        ulReadLong(pFile, ulOffset + 4 * (ULONG)iIndex);
390
		NO_DBG_DEC(iIndex);
391
                NO_DBG_HEX(aulBbdList[iIndex]);
392
        }
393
} /* end of vGetBbdList */
394
 
395
/*
396
 * bGetDocumentText - make a list of the text blocks of a Word document
397
 *
398
 * Return TRUE when succesful, otherwise FALSE
399
 */
400
static BOOL
401
bGetDocumentText(FILE *pFile, const pps_info_type *pPPS,
402
	const ULONG *aulBBD, size_t tBBDLen,
403
	const ULONG *aulSBD, size_t tSBDLen,
404
	const UCHAR *aucHeader, int iWordVersion)
405
{
406
	ULONG	ulBeginOfText;
407
	ULONG	ulTextLen, ulFootnoteLen, ulEndnoteLen;
408
	ULONG	ulHdrFtrLen, ulMacroLen, ulAnnotationLen;
409
	ULONG	ulTextBoxLen, ulHdrTextBoxLen;
410
	UINT	uiQuickSaves;
411
	BOOL	bFarEastWord, bTemplate, bFastSaved, bEncrypted, bSuccess;
412
	USHORT	usIdent, usDocStatus;
413
 
414
	fail(pFile == NULL || pPPS == NULL);
415
	fail(aulBBD == NULL);
416
	fail(aulSBD == NULL);
417
 
418
	DBG_MSG("bGetDocumentText");
419
 
420
	/* Get the "magic number" from the header */
421
	usIdent = usGetWord(0x00, aucHeader);
422
	DBG_HEX(usIdent);
423
	bFarEastWord = usIdent == 0x8098 || usIdent == 0x8099 ||
424
			usIdent == 0xa697 || usIdent == 0xa699;
425
	/* Get the status flags from the header */
426
	usDocStatus = usGetWord(0x0a, aucHeader);
427
	DBG_HEX(usDocStatus);
428
	bTemplate = (usDocStatus & BIT(0)) != 0;
429
	DBG_MSG_C(bTemplate, "This document is a Template");
430
	bFastSaved = (usDocStatus & BIT(2)) != 0;
431
	uiQuickSaves = (UINT)(usDocStatus & 0x00f0) >> 4;
432
	DBG_MSG_C(bFastSaved, "This document is Fast Saved");
433
	DBG_DEC_C(bFastSaved, uiQuickSaves);
434
	bEncrypted = (usDocStatus & BIT(8)) != 0;
435
	if (bEncrypted) {
436
		werr(0, "Encrypted documents are not supported");
437
		return FALSE;
438
	}
439
 
440
	/* Get length information */
441
	ulBeginOfText = ulGetLong(0x18, aucHeader);
442
	DBG_HEX(ulBeginOfText);
443
	switch (iWordVersion) {
444
	case 6:
445
	case 7:
446
		ulTextLen = ulGetLong(0x34, aucHeader);
447
		ulFootnoteLen = ulGetLong(0x38, aucHeader);
448
		ulHdrFtrLen = ulGetLong(0x3c, aucHeader);
449
		ulMacroLen = ulGetLong(0x40, aucHeader);
450
		ulAnnotationLen = ulGetLong(0x44, aucHeader);
451
		ulEndnoteLen = ulGetLong(0x48, aucHeader);
452
		ulTextBoxLen = ulGetLong(0x4c, aucHeader);
453
		ulHdrTextBoxLen = ulGetLong(0x50, aucHeader);
454
		break;
455
	case 8:
456
		ulTextLen = ulGetLong(0x4c, aucHeader);
457
		ulFootnoteLen = ulGetLong(0x50, aucHeader);
458
		ulHdrFtrLen = ulGetLong(0x54, aucHeader);
459
		ulMacroLen = ulGetLong(0x58, aucHeader);
460
		ulAnnotationLen = ulGetLong(0x5c, aucHeader);
461
		ulEndnoteLen = ulGetLong(0x60, aucHeader);
462
		ulTextBoxLen = ulGetLong(0x64, aucHeader);
463
		ulHdrTextBoxLen = ulGetLong(0x68, aucHeader);
464
		break;
465
	default:
466
		werr(0, "This version of Word is not supported");
467
		return FALSE;
468
	}
469
	DBG_DEC(ulTextLen);
470
	DBG_DEC(ulFootnoteLen);
471
	DBG_DEC(ulHdrFtrLen);
472
	DBG_DEC(ulMacroLen);
473
	DBG_DEC(ulAnnotationLen);
474
	DBG_DEC(ulEndnoteLen);
475
	DBG_DEC(ulTextBoxLen);
476
	DBG_DEC(ulHdrTextBoxLen);
477
 
478
	/* Make a list of the text blocks */
479
	switch (iWordVersion) {
480
	case 6:
481
	case 7:
482
		if (bFastSaved) {
483
			bSuccess = bGet6DocumentText(pFile,
484
					bFarEastWord,
485
					pPPS->tWordDocument.ulSB,
486
					aulBBD, tBBDLen,
487
					aucHeader);
488
		} else {
489
		  	bSuccess = bAddTextBlocks(ulBeginOfText,
490
				ulTextLen +
491
				ulFootnoteLen +
492
				ulHdrFtrLen +
493
				ulMacroLen + ulAnnotationLen +
494
				ulEndnoteLen +
495
				ulTextBoxLen + ulHdrTextBoxLen,
496
				bFarEastWord,
497
				IGNORE_PROPMOD,
498
				pPPS->tWordDocument.ulSB,
499
				aulBBD, tBBDLen);
500
		}
501
		break;
502
	case 8:
503
		bSuccess = bGet8DocumentText(pFile,
504
				pPPS,
505
				aulBBD, tBBDLen, aulSBD, tSBDLen,
506
				aucHeader);
507
		break;
508
	default:
509
		werr(0, "This version of Word is not supported");
510
		bSuccess = FALSE;
511
		break;
512
	}
513
 
514
	if (bSuccess) {
515
		vSplitBlockList(pFile,
516
				ulTextLen,
517
				ulFootnoteLen,
518
				ulHdrFtrLen,
519
				ulMacroLen,
520
				ulAnnotationLen,
521
				ulEndnoteLen,
522
				ulTextBoxLen,
523
				ulHdrTextBoxLen,
524
				!bFastSaved && iWordVersion == 8);
525
	} else {
526
		vDestroyTextBlockList();
527
		werr(0, "I can't find the text of this document");
528
	}
529
	return bSuccess;
530
} /* end of bGetDocumentText */
531
 
532
/*
533
 * vGetDocumentData - make a list of the data blocks of a Word document
534
 */
535
static void
536
vGetDocumentData(FILE *pFile, const pps_info_type *pPPS,
537
	const ULONG *aulBBD, size_t tBBDLen,
538
	const UCHAR *aucHeader, int iWordVersion)
539
{
540
	options_type	tOptions;
541
	ULONG	ulBeginOfText;
542
	BOOL	bFastSaved, bHasImages, bSuccess;
543
	USHORT	usDocStatus;
544
 
545
	fail(pFile == NULL);
546
	fail(pPPS == NULL);
547
	fail(aulBBD == NULL);
548
 
549
	/* Get the options */
550
	vGetOptions(&tOptions);
551
 
552
	/* Get the status flags from the header */
553
	usDocStatus = usGetWord(0x0a, aucHeader);
554
	DBG_HEX(usDocStatus);
555
	bFastSaved = (usDocStatus & BIT(2)) != 0;
556
	bHasImages = (usDocStatus & BIT(3)) != 0;
557
 
558
	if (!bHasImages ||
559
	    tOptions.eConversionType == conversion_text ||
560
	    tOptions.eConversionType == conversion_fmt_text ||
561
	    tOptions.eConversionType == conversion_xml ||
562
	    tOptions.eImageLevel == level_no_images) {
563
		/*
564
		 * No images in the document or text-only output or
565
		 * no images wanted, so no data blocks will be needed
566
		 */
567
		vDestroyDataBlockList();
568
		return;
569
	}
570
 
571
	/* Get length information */
572
	ulBeginOfText = ulGetLong(0x18, aucHeader);
573
	DBG_HEX(ulBeginOfText);
574
 
575
	/* Make a list of the data blocks */
576
	switch (iWordVersion) {
577
	case 6:
578
	case 7:
579
		/*
580
		 * The data blocks are in the text stream. The text stream
581
		 * is in "fast saved" format or "normal saved" format
582
		 */
583
		if (bFastSaved) {
584
			bSuccess = bGet6DocumentData(pFile,
585
					pPPS->tWordDocument.ulSB,
586
					aulBBD, tBBDLen,
587
					aucHeader);
588
		} else {
589
		  	bSuccess = bAddDataBlocks(ulBeginOfText,
590
					(ULONG)LONG_MAX,
591
					pPPS->tWordDocument.ulSB,
592
					aulBBD, tBBDLen);
593
		}
594
		break;
595
	case 8:
596
		/*
597
		 * The data blocks are in the data stream. The data stream
598
		 * is always in "normal saved" format
599
		 */
600
		bSuccess = bAddDataBlocks(0, (ULONG)LONG_MAX,
601
				pPPS->tData.ulSB, aulBBD, tBBDLen);
602
		break;
603
	default:
604
		werr(0, "This version of Word is not supported");
605
		bSuccess = FALSE;
606
		break;
607
	}
608
 
609
	if (!bSuccess) {
610
		vDestroyDataBlockList();
611
		werr(0, "I can't find the data of this document");
612
	}
613
} /* end of vGetDocumentData */
614
 
615
/*
616
 * iInitDocumentOLE - initialize an OLE document
617
 *
618
 * Returns the version of Word that made the document or -1
619
 */
620
int
621
iInitDocumentOLE(FILE *pFile, long lFilesize)
622
{
623
	pps_info_type	PPS_info;
624
	ULONG	*aulBBD, *aulSBD;
625
	ULONG	*aulRootList, *aulBbdList, *aulSbdList;
626
	ULONG	ulBdbListStart, ulAdditionalBBDlist;
627
	ULONG	ulRootStartblock, ulSbdStartblock, ulSBLstartblock;
628
	ULONG	ulStart, ulTmp;
629
	long	lMaxBlock;
630
	size_t	tBBDLen, tSBDLen, tNumBbdBlocks, tRootListLen;
631
	int	iWordVersion, iIndex, iToGo;
632
	BOOL	bSuccess;
633
	USHORT	usIdent, usDocStatus;
634
	UCHAR	aucHeader[HEADER_SIZE];
635
 
636
	fail(pFile == NULL);
637
 
638
	lMaxBlock = lFilesize / BIG_BLOCK_SIZE - 2;
639
	DBG_DEC(lMaxBlock);
640
	if (lMaxBlock < 1) {
641
		return -1;
642
	}
643
	tBBDLen = (size_t)(lMaxBlock + 1);
644
	tNumBbdBlocks = (size_t)ulReadLong(pFile, 0x2c);
645
	DBG_DEC(tNumBbdBlocks);
646
	ulRootStartblock = ulReadLong(pFile, 0x30);
647
	DBG_DEC(ulRootStartblock);
648
	ulSbdStartblock = ulReadLong(pFile, 0x3c);
649
	DBG_DEC(ulSbdStartblock);
650
	ulAdditionalBBDlist = ulReadLong(pFile, 0x44);
651
	DBG_HEX(ulAdditionalBBDlist);
652
	ulSBLstartblock = ulReadLong(pFile,
653
			(ulRootStartblock + 1) * BIG_BLOCK_SIZE + 0x74);
654
	DBG_DEC(ulSBLstartblock);
655
	tSBDLen = (size_t)(ulReadLong(pFile,
656
			(ulRootStartblock + 1) * BIG_BLOCK_SIZE + 0x78) /
657
			SMALL_BLOCK_SIZE);
658
	/* All to be xcalloc-ed pointers to NULL */
659
	aulRootList = NULL;
660
	aulSbdList = NULL;
661
	aulBbdList = NULL;
662
	aulSBD = NULL;
663
	aulBBD = NULL;
664
/* Big Block Depot */
665
	aulBbdList = xcalloc(tNumBbdBlocks, sizeof(ULONG));
666
	aulBBD = xcalloc(tBBDLen, sizeof(ULONG));
667
	iToGo = (int)tNumBbdBlocks;
668
	vGetBbdList(pFile, min(iToGo, 109),  aulBbdList, 0x4c);
669
	ulStart = 109;
670
	iToGo -= 109;
671
	while (ulAdditionalBBDlist != END_OF_CHAIN && iToGo > 0) {
672
		ulBdbListStart = (ulAdditionalBBDlist + 1) * BIG_BLOCK_SIZE;
673
		vGetBbdList(pFile, min(iToGo, 127),
674
					aulBbdList + ulStart, ulBdbListStart);
675
		ulAdditionalBBDlist = ulReadLong(pFile,
676
					ulBdbListStart + 4 * 127);
677
		DBG_DEC(ulAdditionalBBDlist);
678
		DBG_HEX(ulAdditionalBBDlist);
679
		ulStart += 127;
680
		iToGo -= 127;
681
	}
682
	if (!bGetBBD(pFile, aulBbdList, tNumBbdBlocks, aulBBD, tBBDLen)) {
683
		FREE_ALL();
684
		return -1;
685
	}
686
	aulBbdList = xfree(aulBbdList);
687
/* Small Block Depot */
688
	aulSbdList = xcalloc(tBBDLen, sizeof(ULONG));
689
	aulSBD = xcalloc(tSBDLen, sizeof(ULONG));
690
	for (iIndex = 0, ulTmp = ulSbdStartblock;
691
	     iIndex < (int)tBBDLen && ulTmp != END_OF_CHAIN;
692
	     iIndex++, ulTmp = aulBBD[ulTmp]) {
693
		if (ulTmp >= (ULONG)tBBDLen) {
694
			DBG_DEC(ulTmp);
695
			DBG_DEC(tBBDLen);
696
			werr(1, "The Big Block Depot is damaged");
697
		}
698
		aulSbdList[iIndex] = ulTmp;
699
		NO_DBG_HEX(aulSbdList[iIndex]);
700
	}
701
	if (!bGetSBD(pFile, aulSbdList, tBBDLen, aulSBD, tSBDLen)) {
702
		FREE_ALL();
703
		return -1;
704
	}
705
	aulSbdList = xfree(aulSbdList);
706
/* Root list */
707
	for (tRootListLen = 0, ulTmp = ulRootStartblock;
708
	     tRootListLen < tBBDLen && ulTmp != END_OF_CHAIN;
709
	     tRootListLen++, ulTmp = aulBBD[ulTmp]) {
710
		if (ulTmp >= (ULONG)tBBDLen) {
711
			DBG_DEC(ulTmp);
712
			DBG_DEC(tBBDLen);
713
			werr(1, "The Big Block Depot is damaged");
714
		}
715
	}
716
	if (tRootListLen == 0) {
717
		werr(0, "No Rootlist found");
718
		FREE_ALL();
719
		return -1;
720
	}
721
	aulRootList = xcalloc(tRootListLen, sizeof(ULONG));
722
	for (iIndex = 0, ulTmp = ulRootStartblock;
723
	     iIndex < (int)tBBDLen && ulTmp != END_OF_CHAIN;
724
	     iIndex++, ulTmp = aulBBD[ulTmp]) {
725
		if (ulTmp >= (ULONG)tBBDLen) {
726
			DBG_DEC(ulTmp);
727
			DBG_DEC(tBBDLen);
728
			werr(1, "The Big Block Depot is damaged");
729
		}
730
		aulRootList[iIndex] = ulTmp;
731
		NO_DBG_DEC(aulRootList[iIndex]);
732
	}
733
	fail(tRootListLen != (size_t)iIndex);
734
	bSuccess = bGetPPS(pFile, aulRootList, tRootListLen, &PPS_info);
735
	aulRootList = xfree(aulRootList);
736
	if (!bSuccess) {
737
		FREE_ALL();
738
		return -1;
739
	}
740
/* Small block list */
741
	if (!bCreateSmallBlockList(ulSBLstartblock, aulBBD, tBBDLen)) {
742
		FREE_ALL();
743
		return -1;
744
	}
745
 
746
	if (PPS_info.tWordDocument.ulSize < MIN_SIZE_FOR_BBD_USE) {
747
		DBG_DEC(PPS_info.tWordDocument.ulSize);
748
		FREE_ALL();
749
		werr(0, "I'm afraid the text stream of this file "
750
			"is too small to handle.");
751
		return -1;
752
	}
753
	/* Read the headerblock */
754
	if (!bReadBuffer(pFile, PPS_info.tWordDocument.ulSB,
755
			aulBBD, tBBDLen, BIG_BLOCK_SIZE,
756
			aucHeader, 0, HEADER_SIZE)) {
757
		FREE_ALL();
758
		return -1;
759
	}
760
	usIdent = usGetWord(0x00, aucHeader);
761
	DBG_HEX(usIdent);
762
	fail(usIdent != 0x8098 &&	/* Word 7 for oriental languages */
763
	     usIdent != 0x8099 &&	/* Word 7 for oriental languages */
764
	     usIdent != 0xa5dc &&	/* Word 6 & 7 */
765
	     usIdent != 0xa5ec &&	/* Word 7 & 97 & 98 */
766
	     usIdent != 0xa697 &&	/* Word 7 for oriental languages */
767
	     usIdent != 0xa699);	/* Word 7 for oriental languages */
768
	iWordVersion = iGetVersionNumber(aucHeader);
769
	if (iWordVersion < 6) {
770
		FREE_ALL();
771
		werr(0, "This file is from a version of Word before Word 6.");
772
		return -1;
773
	}
774
 
775
	/* Get the status flags from the header */
776
	usDocStatus = usGetWord(0x0a, aucHeader);
777
        if (usDocStatus & BIT(9)) {
778
		PPS_info.tTable = PPS_info.t1Table;
779
	} else {
780
		PPS_info.tTable = PPS_info.t0Table;
781
	}
782
	/* Clean the entries that should not be used */
783
	memset(&PPS_info.t0Table, 0, sizeof(PPS_info.t0Table));
784
	memset(&PPS_info.t1Table, 0, sizeof(PPS_info.t1Table));
785
 
786
	bSuccess = bGetDocumentText(pFile, &PPS_info,
787
			aulBBD, tBBDLen, aulSBD, tSBDLen,
788
			aucHeader, iWordVersion);
789
	if (bSuccess) {
790
		vGetDocumentData(pFile, &PPS_info,
791
			aulBBD, tBBDLen, aucHeader, iWordVersion);
792
		vGetPropertyInfo(pFile, &PPS_info,
793
			aulBBD, tBBDLen, aulSBD, tSBDLen,
794
			aucHeader, iWordVersion);
795
		vSetDefaultTabWidth(pFile, &PPS_info,
796
			aulBBD, tBBDLen, aulSBD, tSBDLen,
797
			aucHeader, iWordVersion);
798
		vGetNotesInfo(pFile, &PPS_info,
799
			aulBBD, tBBDLen, aulSBD, tSBDLen,
800
			aucHeader, iWordVersion);
801
	}
802
	FREE_ALL();
803
	return bSuccess ? iWordVersion : -1;
804
} /* end of iInitDocumentOLE */