Warning: Attempt to read property "date" on null in /usr/local/www/websvn.planix.org/blame.php on line 247

Warning: Attempt to read property "msg" on null in /usr/local/www/websvn.planix.org/blame.php on line 247
WebSVN – planix.SVN – Blame – /os/branches/feature_tlsv12/sys/src/cmd/aux/antiword/wordlib.c – Rev 2

Subversion Repositories planix.SVN

Rev

Go to most recent revision | Details | Last modification | View Log | RSS feed

Rev Author Line No. Line
2 - 1
/*
2
 * wordlib.c
3
 * Copyright (C) 1998-2004 A.J. van Os; Released under GNU GPL
4
 *
5
 * Description:
6
 * Deal with the internals of a MS Word file
7
 */
8
 
9
#include "antiword.h"
10
 
11
static BOOL	bOldMacFile = FALSE;
12
 
13
 
14
/*
15
 * Common part of the file checking functions
16
 */
17
static BOOL
18
bCheckBytes(FILE *pFile, const UCHAR *aucBytes, size_t tBytes)
19
{
20
	int	iIndex, iChar;
21
 
22
	fail(pFile == NULL || aucBytes == NULL || tBytes == 0);
23
 
24
	rewind(pFile);
25
 
26
	for (iIndex = 0; iIndex < (int)tBytes; iIndex++) {
27
		iChar = getc(pFile);
28
		if (iChar == EOF || iChar != (int)aucBytes[iIndex]) {
29
			NO_DBG_HEX(iChar);
30
			NO_DBG_HEX(aucBytes[iIndex]);
31
			return FALSE;
32
		}
33
	}
34
	return TRUE;
35
} /* end of bCheckBytes */
36
 
37
/*
38
 * This function checks whether the given file is or is not a "Word for DOS"
39
 * document
40
 */
41
BOOL
42
bIsWordForDosFile(FILE *pFile, long lFilesize)
43
{
44
	static UCHAR	aucBytes[] =
45
		{ 0x31, 0xbe, 0x00, 0x00, 0x00, 0xab };	/* Word for DOS */
46
 
47
	DBG_MSG("bIsWordForDosFile");
48
 
49
	if (pFile == NULL || lFilesize < 0) {
50
		DBG_MSG("No proper file given");
51
		return FALSE;
52
	}
53
	if (lFilesize < 128) {
54
		DBG_MSG("File too small to be a Word document");
55
		return FALSE;
56
	}
57
	return bCheckBytes(pFile, aucBytes, elementsof(aucBytes));
58
} /* end of bIsWordForDosFile */
59
 
60
/*
61
 * This function checks whether the given file is or is not a file with an
62
 * OLE envelope (That is a document made by Word 6 or later)
63
 */
64
static BOOL
65
bIsWordFileWithOLE(FILE *pFile, long lFilesize)
66
{
67
	static UCHAR	aucBytes[] =
68
		{ 0xd0, 0xcf, 0x11, 0xe0, 0xa1, 0xb1, 0x1a, 0xe1 };
69
	int	iTailLen;
70
 
71
	if (pFile == NULL || lFilesize < 0) {
72
		DBG_MSG("No proper file given");
73
		return FALSE;
74
	}
75
	if (lFilesize < (long)BIG_BLOCK_SIZE * 3) {
76
		DBG_MSG("This file is too small to be a Word document");
77
		return FALSE;
78
	}
79
 
80
	iTailLen = (int)(lFilesize % BIG_BLOCK_SIZE);
81
	switch (iTailLen) {
82
	case 0:		/* No tail, as it should be */
83
		break;
84
	case 1:
85
	case 2:		/* Filesize mismatch or a buggy email program */
86
		if ((int)(lFilesize % 3) == iTailLen) {
87
			DBG_DEC(lFilesize);
88
			return FALSE;
89
		}
90
		/*
91
		 * Ignore extra bytes caused by buggy email programs.
92
		 * They have bugs in their base64 encoding or decoding.
93
		 * 3 bytes -> 4 ascii chars -> 3 bytes
94
		 */
95
		DBG_MSG("Document with extra bytes");
96
		break;
97
	default:	/* Wrong filesize for a Word document */
98
		DBG_DEC(lFilesize);
99
		DBG_DEC(iTailLen);
100
		return FALSE;
101
	}
102
	return bCheckBytes(pFile, aucBytes, elementsof(aucBytes));
103
} /* end of bIsWordFileWithOLE */
104
 
105
/*
106
 * This function checks whether the given file is or is not a RTF document
107
 */
108
BOOL
109
bIsRtfFile(FILE *pFile)
110
{
111
	static UCHAR	aucBytes[] =
112
		{ '{', '\\', 'r', 't', 'f', '1' };
113
 
114
	DBG_MSG("bIsRtfFile");
115
 
116
	return bCheckBytes(pFile, aucBytes, elementsof(aucBytes));
117
} /* end of bIsRtfFile */
118
 
119
/*
120
 * This function checks whether the given file is or is not a WP document
121
 */
122
BOOL
123
bIsWordPerfectFile(FILE *pFile)
124
{
125
	static UCHAR	aucBytes[] =
126
		{ 0xff, 'W', 'P', 'C' };
127
 
128
	DBG_MSG("bIsWordPerfectFile");
129
 
130
	return bCheckBytes(pFile, aucBytes, elementsof(aucBytes));
131
} /* end of bIsWordPerfectFile */
132
 
133
/*
134
 * This function checks whether the given file is or is not a "Win Word 1 or 2"
135
 * document
136
 */
137
BOOL
138
bIsWinWord12File(FILE *pFile, long lFilesize)
139
{
140
	static UCHAR	aucBytes[2][4] = {
141
		{ 0x9b, 0xa5, 0x21, 0x00 },	/* Win Word 1.x */
142
		{ 0xdb, 0xa5, 0x2d, 0x00 },	/* Win Word 2.0 */
143
	};
144
	int	iIndex;
145
 
146
	DBG_MSG("bIsWinWord12File");
147
 
148
	if (pFile == NULL || lFilesize < 0) {
149
		DBG_MSG("No proper file given");
150
		return FALSE;
151
	}
152
	if (lFilesize < 384) {
153
		DBG_MSG("This file is too small to be a Word document");
154
		return FALSE;
155
	}
156
 
157
	for (iIndex = 0; iIndex < (int)elementsof(aucBytes); iIndex++) {
158
		if (bCheckBytes(pFile,
159
				aucBytes[iIndex],
160
				elementsof(aucBytes[iIndex]))) {
161
			return TRUE;
162
		}
163
	}
164
	return FALSE;
165
} /* end of bIsWinWord12File */
166
 
167
/*
168
 * This function checks whether the given file is or is not a "Mac Word 4 or 5"
169
 * document
170
 */
171
BOOL
172
bIsMacWord45File(FILE *pFile)
173
{
174
	static UCHAR	aucBytes[2][6] = {
175
		{ 0xfe, 0x37, 0x00, 0x1c, 0x00, 0x00 },	/* Mac Word 4 */
176
		{ 0xfe, 0x37, 0x00, 0x23, 0x00, 0x00 },	/* Mac Word 5 */
177
	};
178
	int	iIndex;
179
 
180
	DBG_MSG("bIsMacWord45File");
181
 
182
	for (iIndex = 0; iIndex < (int)elementsof(aucBytes); iIndex++) {
183
		if (bCheckBytes(pFile,
184
				aucBytes[iIndex],
185
				elementsof(aucBytes[iIndex]))) {
186
			return TRUE;
187
		}
188
	}
189
	return FALSE;
190
} /* end of bIsMacWord45File */
191
 
192
/*
193
 * iGuessVersionNumber - guess the Word version number from first few bytes
194
 *
195
 * Returns the guessed version number or -1 when no guess it possible
196
 */
197
int
198
iGuessVersionNumber(FILE *pFile, long lFilesize)
199
{
200
	if(bIsWordForDosFile(pFile, lFilesize)) {
201
		return 0;
202
	}
203
	if (bIsWinWord12File(pFile, lFilesize)) {
204
		return 2;
205
	}
206
	if (bIsMacWord45File(pFile)) {
207
		return 5;
208
	}
209
	if (bIsWordFileWithOLE(pFile, lFilesize)) {
210
		return 6;
211
	}
212
	return -1;
213
} /* end of iGuessVersionNumber */
214
 
215
/*
216
 * iGetVersionNumber - get the Word version number from the header
217
 *
218
 * Returns the version number or -1 when unknown
219
 */
220
int
221
iGetVersionNumber(const UCHAR *aucHeader)
222
{
223
	USHORT	usFib, usChse;
224
 
225
	usFib = usGetWord(0x02, aucHeader);
226
	if (usFib >= 0x1000) {
227
		/* To big: must be MacWord using Big Endian */
228
		DBG_HEX(usFib);
229
		usFib = usGetWordBE(0x02, aucHeader);
230
	}
231
	DBG_DEC(usFib);
232
	bOldMacFile = FALSE;
233
	switch (usFib) {
234
	case   0:
235
		DBG_MSG("Word for DOS");
236
		return 0;
237
	case  28:
238
		DBG_MSG("Word 4 for Macintosh");
239
		bOldMacFile = TRUE;
240
		return 4;
241
	case  33:
242
		DBG_MSG("Word 1.x for Windows");
243
		return 1;
244
	case  35:
245
		DBG_MSG("Word 5 for Macintosh");
246
		bOldMacFile = TRUE;
247
		return 5;
248
	case  45:
249
		DBG_MSG("Word 2 for Windows");
250
		return 2;
251
	case 101:
252
	case 102:
253
		DBG_MSG("Word 6 for Windows");
254
		return 6;
255
	case 103:
256
	case 104:
257
		usChse = usGetWord(0x14, aucHeader);
258
		DBG_DEC(usChse);
259
		switch (usChse) {
260
		case 0:
261
			DBG_MSG("Word 7 for Win95");
262
			return 7;
263
		case 256:
264
			DBG_MSG("Word 6 for Macintosh");
265
			bOldMacFile = TRUE;
266
			return 6;
267
		default:
268
			DBG_FIXME();
269
			if ((int)ucGetByte(0x05, aucHeader) == 0xe0) {
270
				DBG_MSG("Word 7 for Win95");
271
				return 7;
272
			}
273
			DBG_MSG("Word 6 for Macintosh");
274
			bOldMacFile = TRUE;
275
			return 6;
276
		}
277
	default:
278
		usChse = usGetWord(0x14, aucHeader);
279
		DBG_DEC(usChse);
280
		if (usFib < 192) {
281
			/* Unknown or unsupported version of Word */
282
			DBG_DEC(usFib);
283
			return -1;
284
		}
285
		DBG_MSG_C(usChse != 256, "Word97 for Win95/98/NT");
286
		DBG_MSG_C(usChse == 256, "Word98 for Macintosh");
287
		return 8;
288
	}
289
} /* end of iGetVersionNumber */
290
 
291
/*
292
 * TRUE if the current file was made by Word version 6 or older on an
293
 * Apple Macintosh, otherwise FALSE.
294
 * This function hides the methode of how to find out from the rest of the
295
 * program.
296
 */
297
BOOL
298
bIsOldMacFile(void)
299
{
300
	return bOldMacFile;
301
} /* end of bIsOldMacFile */
302
 
303
/*
304
 * iInitDocument - initialize a document
305
 *
306
 * Returns the version of Word that made the document or -1
307
 */
308
int
309
iInitDocument(FILE *pFile, long lFilesize)
310
{
311
	int	iGuess, iWordVersion;
312
 
313
	iGuess = iGuessVersionNumber(pFile, lFilesize);
314
	switch (iGuess) {
315
	case 0:
316
		iWordVersion = iInitDocumentDOS(pFile, lFilesize);
317
		break;
318
	case 2:
319
		iWordVersion = iInitDocumentWIN(pFile, lFilesize);
320
		break;
321
	case 5:
322
		iWordVersion = iInitDocumentMAC(pFile, lFilesize);
323
		break;
324
	case 6:
325
		iWordVersion = iInitDocumentOLE(pFile, lFilesize);
326
		break;
327
	default:
328
		DBG_DEC(iGuess);
329
		iWordVersion = -1;
330
		break;
331
	}
332
	return iWordVersion;
333
} /* end of iInitDocument */
334
 
335
/*
336
 * vFreeDocument - free a document by free-ing its parts
337
 */
338
void
339
vFreeDocument(void)
340
{
341
	DBG_MSG("vFreeDocument");
342
 
343
	/* Free the memory */
344
	vDestroyTextBlockList();
345
	vDestroyDataBlockList();
346
	vDestroyListInfoList();
347
	vDestroyRowInfoList();
348
	vDestroyStyleInfoList();
349
	vDestroyFontInfoList();
350
	vDestroyStylesheetList();
351
	vDestroyPictInfoList();
352
	vDestroyDocumentInfoList();
353
	vDestroySectionInfoList();
354
	vDestroyHdrFtrInfoList();
355
	vDestroyPropModList();
356
	vDestroyNotesInfoLists();
357
	vDestroyFontTable();
358
	vDestroySummaryInfo();
359
} /* end of vFreeDocument */