Subversion Repositories planix.SVN

Rev

Rev 2 | Details | Compare with Previous | Last modification | View Log | RSS feed

Rev Author Line No. Line
2 - 1
/*
2
 * findtext.c
3
 * Copyright (C) 1998-2004 A.J. van Os; Released under GNU GPL
4
 *
5
 * Description:
6
 * Find the blocks that contain the text of MS Word files
7
 */
8
 
9
#include <stdio.h>
10
#include <stdlib.h>
11
#include "antiword.h"
12
 
13
 
14
/*
15
 * bAddTextBlocks - Add the blocks to the text block list
16
 *
17
 * Returns TRUE when successful, FALSE if not
18
 */
19
BOOL
20
bAddTextBlocks(ULONG ulCharPosFirst, ULONG ulTotalLength,
21
	BOOL bUsesUnicode, USHORT usPropMod,
22
	ULONG ulStartBlock, const ULONG *aulBBD, size_t tBBDLen)
23
{
24
	text_block_type	tTextBlock;
25
	ULONG	ulCharPos, ulOffset, ulIndex;
26
	long	lToGo;
27
 
28
	fail(ulTotalLength > (ULONG)LONG_MAX / 2);
29
	fail(ulStartBlock > MAX_BLOCKNUMBER && ulStartBlock != END_OF_CHAIN);
30
	fail(aulBBD == NULL);
31
 
32
	NO_DBG_HEX(ulCharPosFirst);
33
	NO_DBG_DEC(ulTotalLength);
34
 
35
	if (bUsesUnicode) {
36
		/* One character equals two bytes */
37
		NO_DBG_MSG("Uses Unicode");
38
		lToGo = (long)ulTotalLength * 2;
39
	} else {
40
		/* One character equals one byte */
41
		NO_DBG_MSG("Uses ASCII");
42
		lToGo = (long)ulTotalLength;
43
	}
44
 
45
	ulCharPos = ulCharPosFirst;
46
	ulOffset = ulCharPosFirst;
47
	for (ulIndex = ulStartBlock;
48
	     ulIndex != END_OF_CHAIN && lToGo > 0;
49
	     ulIndex = aulBBD[ulIndex]) {
50
		if (ulIndex >= (ULONG)tBBDLen) {
51
			DBG_DEC(ulIndex);
52
			DBG_DEC(tBBDLen);
53
			werr(1, "The Big Block Depot is damaged");
54
		}
55
		if (ulOffset >= BIG_BLOCK_SIZE) {
56
			ulOffset -= BIG_BLOCK_SIZE;
57
			continue;
58
		}
59
		tTextBlock.ulFileOffset =
60
			(ulIndex + 1) * BIG_BLOCK_SIZE + ulOffset;
61
		tTextBlock.ulCharPos = ulCharPos;
62
		tTextBlock.ulLength = min(BIG_BLOCK_SIZE - ulOffset,
63
						(ULONG)lToGo);
64
		tTextBlock.bUsesUnicode = bUsesUnicode;
65
		tTextBlock.usPropMod = usPropMod;
66
		ulOffset = 0;
67
		if (!bAdd2TextBlockList(&tTextBlock)) {
68
			DBG_HEX(tTextBlock.ulFileOffset);
69
			DBG_HEX(tTextBlock.ulCharPos);
70
			DBG_DEC(tTextBlock.ulLength);
71
			DBG_DEC(tTextBlock.bUsesUnicode);
72
			DBG_DEC(tTextBlock.usPropMod);
73
			return FALSE;
74
		}
75
		ulCharPos += tTextBlock.ulLength;
76
		lToGo -= (long)tTextBlock.ulLength;
77
	}
78
	DBG_DEC_C(lToGo != 0, lToGo);
79
	return lToGo == 0;
80
} /* end of bAddTextBlocks */
81
 
82
/*
83
 * bGet6DocumentText - make a list of the text blocks of Word 6/7 files
84
 *
85
 * Code for "fast saved" files.
86
 *
87
 * Returns TRUE when successful, FALSE if not
88
 */
89
BOOL
90
bGet6DocumentText(FILE *pFile, BOOL bUsesUnicode, ULONG ulStartBlock,
91
	const ULONG *aulBBD, size_t tBBDLen, const UCHAR *aucHeader)
92
{
93
	UCHAR	*aucBuffer;
94
	ULONG	ulBeginTextInfo, ulTextOffset, ulTotLength;
95
	size_t	tTextInfoLen;
96
	int	iIndex, iType, iOff, iLen, iPieces;
97
	USHORT	usPropMod;
98
 
99
	DBG_MSG("bGet6DocumentText");
100
 
101
	fail(pFile == NULL);
102
	fail(aulBBD == NULL);
103
	fail(aucHeader == NULL);
104
 
105
	ulBeginTextInfo = ulGetLong(0x160, aucHeader);	/* fcClx */
106
	DBG_HEX(ulBeginTextInfo);
107
	tTextInfoLen = (size_t)ulGetLong(0x164, aucHeader);	/* lcbClx */
108
	DBG_DEC(tTextInfoLen);
109
 
110
	aucBuffer = xmalloc(tTextInfoLen);
111
	if (!bReadBuffer(pFile, ulStartBlock,
112
			aulBBD, tBBDLen, BIG_BLOCK_SIZE,
113
			aucBuffer, ulBeginTextInfo, tTextInfoLen)) {
114
		aucBuffer = xfree(aucBuffer);
115
		return FALSE;
116
	}
117
	NO_DBG_PRINT_BLOCK(aucBuffer, tTextInfoLen);
118
 
119
	iOff = 0;
120
	while ((size_t)iOff < tTextInfoLen) {
121
		iType = (int)ucGetByte(iOff, aucBuffer);
122
		iOff++;
123
		if (iType == 0) {
124
			DBG_FIXME();
125
			iOff++;
126
			continue;
127
		}
128
		if (iType == 1) {
129
			iLen = (int)usGetWord(iOff, aucBuffer);
130
			vAdd2PropModList(aucBuffer + iOff);
131
			iOff += iLen + 2;
132
			continue;
133
		}
134
		if (iType != 2) {
135
			werr(0, "Unknown type of 'fastsaved' format");
136
			aucBuffer = xfree(aucBuffer);
137
			return FALSE;
138
		}
139
		/* Type 2 */
140
		iLen = (int)usGetWord(iOff, aucBuffer);
141
		NO_DBG_DEC(iLen);
142
		iOff += 4;
143
		iPieces = (iLen - 4) / 12;
144
		DBG_DEC(iPieces);
145
		for (iIndex = 0; iIndex < iPieces; iIndex++) {
146
			ulTextOffset = ulGetLong(
147
				iOff + (iPieces + 1) * 4 + iIndex * 8 + 2,
148
				aucBuffer);
149
			usPropMod = usGetWord(
150
				iOff + (iPieces + 1) * 4 + iIndex * 8 + 6,
151
				aucBuffer);
152
			ulTotLength = ulGetLong(iOff + (iIndex + 1) * 4,
153
						aucBuffer) -
154
					ulGetLong(iOff + iIndex * 4,
155
						aucBuffer);
156
			NO_DBG_HEX_C(usPropMod != 0, usPropMod);
157
			if (!bAddTextBlocks(ulTextOffset, ulTotLength,
158
					bUsesUnicode, usPropMod,
159
					ulStartBlock,
160
					aulBBD, tBBDLen)) {
161
				aucBuffer = xfree(aucBuffer);
162
				return FALSE;
163
			}
164
		}
165
		break;
166
	}
167
	aucBuffer = xfree(aucBuffer);
168
	return TRUE;
169
} /* end of bGet6DocumentText */
170
 
171
/*
172
 * bGet8DocumentText - make a list of the text blocks of Word 8/97 files
173
 *
174
 * Returns TRUE when successful, FALSE if not
175
 */
176
BOOL
177
bGet8DocumentText(FILE *pFile, const pps_info_type *pPPS,
178
	const ULONG *aulBBD, size_t tBBDLen,
179
	const ULONG *aulSBD, size_t tSBDLen,
180
	const UCHAR *aucHeader)
181
{
182
	const ULONG	*aulBlockDepot;
183
	UCHAR	*aucBuffer;
184
	ULONG	ulTextOffset, ulBeginTextInfo;
185
	ULONG	ulTotLength, ulLen;
186
	long	lIndex, lPieces, lOff;
187
	size_t	tTextInfoLen, tBlockDepotLen, tBlockSize;
188
	int	iType, iLen;
189
	BOOL	bUsesUnicode;
190
	USHORT	usPropMod;
191
 
192
	DBG_MSG("bGet8DocumentText");
193
 
194
	fail(pFile == NULL || pPPS == NULL);
195
	fail(aulBBD == NULL || aulSBD == NULL);
196
	fail(aucHeader == NULL);
197
 
198
  	ulBeginTextInfo = ulGetLong(0x1a2, aucHeader);	/* fcClx */
199
	DBG_HEX(ulBeginTextInfo);
200
	tTextInfoLen = (size_t)ulGetLong(0x1a6, aucHeader);	/* lcbClx */
201
	DBG_DEC(tTextInfoLen);
202
 
203
	DBG_DEC(pPPS->tTable.ulSB);
204
	DBG_HEX(pPPS->tTable.ulSize);
205
	if (pPPS->tTable.ulSize == 0) {
206
		return FALSE;
207
	}
208
 
209
	if (pPPS->tTable.ulSize < MIN_SIZE_FOR_BBD_USE) {
210
	  	/* Use the Small Block Depot */
211
		aulBlockDepot = aulSBD;
212
		tBlockDepotLen = tSBDLen;
213
		tBlockSize = SMALL_BLOCK_SIZE;
214
	} else {
215
	  	/* Use the Big Block Depot */
216
		aulBlockDepot = aulBBD;
217
		tBlockDepotLen = tBBDLen;
218
		tBlockSize = BIG_BLOCK_SIZE;
219
	}
220
	aucBuffer = xmalloc(tTextInfoLen);
221
	if (!bReadBuffer(pFile, pPPS->tTable.ulSB,
222
			aulBlockDepot, tBlockDepotLen, tBlockSize,
223
			aucBuffer, ulBeginTextInfo, tTextInfoLen)) {
224
		aucBuffer = xfree(aucBuffer);
225
		return FALSE;
226
	}
227
	NO_DBG_PRINT_BLOCK(aucBuffer, tTextInfoLen);
228
 
229
	lOff = 0;
230
	while (lOff < (long)tTextInfoLen) {
231
		iType = (int)ucGetByte(lOff, aucBuffer);
232
		lOff++;
233
		if (iType == 0) {
234
			DBG_FIXME();
235
			lOff++;
236
			continue;
237
		}
238
		if (iType == 1) {
239
			iLen = (int)usGetWord(lOff, aucBuffer);
240
			vAdd2PropModList(aucBuffer + lOff);
241
			lOff += (long)iLen + 2;
242
			continue;
243
		}
244
		if (iType != 2) {
245
			werr(0, "Unknown type of 'fastsaved' format");
246
			aucBuffer = xfree(aucBuffer);
247
			return FALSE;
248
		}
249
		/* Type 2 */
250
		ulLen = ulGetLong(lOff, aucBuffer);
251
		if (ulLen < 4) {
252
			DBG_DEC(ulLen);
253
			return FALSE;
254
		}
255
		lOff += 4;
256
		lPieces = (long)((ulLen - 4) / 12);
257
		DBG_DEC(lPieces);
258
		for (lIndex = 0; lIndex < lPieces; lIndex++) {
259
			ulTextOffset = ulGetLong(
260
				lOff + (lPieces + 1) * 4 + lIndex * 8 + 2,
261
				aucBuffer);
262
			usPropMod = usGetWord(
263
				lOff + (lPieces + 1) * 4 + lIndex * 8 + 6,
264
				aucBuffer);
265
			ulTotLength = ulGetLong(lOff + (lIndex + 1) * 4,
266
						aucBuffer) -
267
					ulGetLong(lOff + lIndex * 4,
268
						aucBuffer);
269
			if ((ulTextOffset & BIT(30)) == 0) {
270
				bUsesUnicode = TRUE;
271
			} else {
272
				bUsesUnicode = FALSE;
273
				ulTextOffset &= ~BIT(30);
274
				ulTextOffset /= 2;
275
			}
276
			NO_DBG_HEX_C(usPropMod != 0, usPropMod);
277
			if (!bAddTextBlocks(ulTextOffset, ulTotLength,
278
					bUsesUnicode, usPropMod,
279
					pPPS->tWordDocument.ulSB,
280
					aulBBD, tBBDLen)) {
281
				aucBuffer = xfree(aucBuffer);
282
				return FALSE;
283
			}
284
		}
285
		break;
286
	}
287
	aucBuffer = xfree(aucBuffer);
288
	return TRUE;
289
} /* end of bGet8DocumentText */