Subversion Repositories planix.SVN

Rev

Rev 2 | Details | Compare with Previous | Last modification | View Log | RSS feed

Rev Author Line No. Line
2 - 1
/*
2
 * prop0.c
3
 * Copyright (C) 2002-2004 A.J. van Os; Released under GNU GPL
4
 *
5
 * Description:
6
 * Read the property information from a Word for DOS file
7
 */
8
 
9
#include <string.h>
10
#include <time.h>
11
#include "antiword.h"
12
 
13
 
14
/*
15
 * tConvertDosDate - convert DOS date format
16
 *
17
 * returns Unix time_t or -1
18
 */
19
static time_t
20
tConvertDosDate(const char *szDosDate)
21
{
22
	struct tm	tTime;
23
	const char	*pcTmp;
24
	time_t		tResult;
25
 
26
	memset(&tTime, 0, sizeof(tTime));
27
	pcTmp = szDosDate;
28
	/* Get the month */
29
	if (!isdigit(*pcTmp)) {
30
		return (time_t)-1;
31
	}
32
	tTime.tm_mon = (int)(*pcTmp - '0');
33
	pcTmp++;
34
	if (isdigit(*pcTmp)) {
35
		tTime.tm_mon *= 10;
36
		tTime.tm_mon += (int)(*pcTmp - '0');
37
		pcTmp++;
38
	}
39
	/* Get the first separater */
40
	if (isalnum(*pcTmp)) {
41
		return (time_t)-1;
42
	}
43
	pcTmp++;
44
	/* Get the day */
45
	if (!isdigit(*pcTmp)) {
46
		return (time_t)-1;
47
	}
48
	tTime.tm_mday = (int)(*pcTmp - '0');
49
	pcTmp++;
50
	if (isdigit(*pcTmp)) {
51
		tTime.tm_mday *= 10;
52
		tTime.tm_mday += (int)(*pcTmp - '0');
53
		pcTmp++;
54
	}
55
	/* Get the second separater */
56
	if (isalnum(*pcTmp)) {
57
		return (time_t)-1;
58
	}
59
	pcTmp++;
60
	/* Get the year */
61
	if (!isdigit(*pcTmp)) {
62
		return (time_t)-1;
63
	}
64
	tTime.tm_year = (int)(*pcTmp - '0');
65
	pcTmp++;
66
	if (isdigit(*pcTmp)) {
67
		tTime.tm_year *= 10;
68
		tTime.tm_year += (int)(*pcTmp - '0');
69
		pcTmp++;
70
	}
71
	/* Check the values */
72
	if (tTime.tm_mon == 0 || tTime.tm_mday == 0 || tTime.tm_mday > 31) {
73
		return (time_t)-1;
74
	}
75
	/* Correct the values */
76
	tTime.tm_mon--;		/* From 01-12 to 00-11 */
77
	if (tTime.tm_year < 80) {
78
		tTime.tm_year += 100;	/* 00 means 2000 is 100 */
79
	}
80
	tTime.tm_isdst = -1;
81
	tResult = mktime(&tTime);
82
	NO_DBG_MSG(ctime(&tResult));
83
	return tResult;
84
} /* end of tConvertDosDate */
85
 
86
/*
87
 * Build the lists with Document Property Information for Word for DOS files
88
 */
89
void
90
vGet0DopInfo(FILE *pFile, const UCHAR *aucHeader)
91
{
92
	document_block_type	tDocument;
93
	UCHAR	*aucBuffer;
94
	ULONG	ulBeginSumdInfo, ulBeginNextBlock;
95
	size_t	tLen;
96
	USHORT	usOffset;
97
 
98
        tDocument.ucHdrFtrSpecification = 0;
99
        tDocument.usDefaultTabWidth = usGetWord(0x70, aucHeader); /* dxaTab */
100
        tDocument.tCreateDate = (time_t)-1;
101
        tDocument.tRevisedDate = (time_t)-1;
102
 
103
	ulBeginSumdInfo = 128 * (ULONG)usGetWord(0x1c, aucHeader);
104
	DBG_HEX(ulBeginSumdInfo);
105
	ulBeginNextBlock = 128 * (ULONG)usGetWord(0x6a, aucHeader);
106
	DBG_HEX(ulBeginNextBlock);
107
 
108
	if (ulBeginSumdInfo < ulBeginNextBlock && ulBeginNextBlock != 0) {
109
		/* There is a summary information block */
110
		tLen = (size_t)(ulBeginNextBlock - ulBeginSumdInfo);
111
		aucBuffer = xmalloc(tLen);
112
		/* Read the summary information block */
113
		if (bReadBytes(aucBuffer, tLen, ulBeginSumdInfo, pFile)) {
114
       			usOffset = usGetWord(12, aucBuffer);
115
			if (aucBuffer[usOffset] != 0) {
116
				NO_DBG_STRN(aucBuffer + usOffset, 8);
117
				tDocument.tRevisedDate =
118
				tConvertDosDate((char *)aucBuffer + usOffset);
119
			}
120
			usOffset = usGetWord(14, aucBuffer);
121
			if (aucBuffer[usOffset] != 0) {
122
				NO_DBG_STRN(aucBuffer + usOffset, 8);
123
				tDocument.tCreateDate =
124
				tConvertDosDate((char *)aucBuffer + usOffset);
125
			}
126
		}
127
		aucBuffer = xfree(aucBuffer);
128
	}
129
        vCreateDocumentInfoList(&tDocument);
130
} /* end of vGet0DopInfo */
131
 
132
/*
133
 * Fill the section information block with information
134
 * from a Word for DOS file.
135
 */
136
static void
137
vGet0SectionInfo(const UCHAR *aucGrpprl, size_t tBytes,
138
		section_block_type *pSection)
139
{
140
	USHORT	usCcol;
141
	UCHAR	ucTmp;
142
 
143
	fail(aucGrpprl == NULL || pSection == NULL);
144
 
145
	if (tBytes < 2) {
146
		return;
147
	}
148
	/* bkc */
149
	ucTmp = ucGetByte(1, aucGrpprl);
150
	DBG_HEX(ucTmp);
151
	ucTmp &= 0x07;
152
	DBG_HEX(ucTmp);
153
	pSection->bNewPage = ucTmp != 0 && ucTmp != 1;
154
	if (tBytes < 18) {
155
		return;
156
	}
157
	/* ccolM1 */
158
	usCcol = (USHORT)ucGetByte(17, aucGrpprl);
159
	DBG_DEC(usCcol);
160
} /* end of vGet0SectionInfo */
161
 
162
/*
163
 * Build the lists with Section Property Information for Word for DOS files
164
 */
165
void
166
vGet0SepInfo(FILE *pFile, const UCHAR *aucHeader)
167
{
168
	section_block_type	tSection;
169
	UCHAR	*aucBuffer;
170
	ULONG	ulBeginOfText, ulTextOffset, ulBeginSectInfo;
171
	ULONG	ulCharPos, ulSectPage, ulBeginNextBlock;
172
	size_t	tSectInfoLen, tIndex, tSections, tBytes;
173
	UCHAR	aucTmp[2], aucFpage[35];
174
 
175
	fail(pFile == NULL || aucHeader == NULL);
176
 
177
	ulBeginOfText = 128;
178
	NO_DBG_HEX(ulBeginOfText);
179
	ulBeginSectInfo = 128 * (ULONG)usGetWord(0x18, aucHeader);
180
	DBG_HEX(ulBeginSectInfo);
181
	ulBeginNextBlock = 128 * (ULONG)usGetWord(0x1a, aucHeader);
182
	DBG_HEX(ulBeginNextBlock);
183
	if (ulBeginSectInfo == ulBeginNextBlock) {
184
		/* There is no section information block */
185
		return;
186
	}
187
 
188
	/* Get the the number of sections */
189
	if (!bReadBytes(aucTmp, 2, ulBeginSectInfo, pFile)) {
190
		return;
191
	}
192
	tSections = (size_t)usGetWord(0, aucTmp);
193
	NO_DBG_DEC(tSections);
194
 
195
	/* Read the Section Descriptors */
196
	tSectInfoLen = 10 * tSections;
197
	NO_DBG_DEC(tSectInfoLen);
198
	aucBuffer = xmalloc(tSectInfoLen);
199
	if (!bReadBytes(aucBuffer, tSectInfoLen, ulBeginSectInfo + 4, pFile)) {
200
		aucBuffer = xfree(aucBuffer);
201
		return;
202
	}
203
	NO_DBG_PRINT_BLOCK(aucBuffer, tSectInfoLen);
204
 
205
	/* Read the Section Properties */
206
	for (tIndex = 0; tIndex < tSections; tIndex++) {
207
		ulTextOffset = ulGetLong(10 * tIndex, aucBuffer);
208
		NO_DBG_HEX(ulTextOffset);
209
		ulCharPos = ulBeginOfText + ulTextOffset;
210
		NO_DBG_HEX(ulTextOffset);
211
		ulSectPage = ulGetLong(10 * tIndex + 6, aucBuffer);
212
		NO_DBG_HEX(ulSectPage);
213
		if (ulSectPage == FC_INVALID ||		/* Must use defaults */
214
		    ulSectPage < 128 ||			/* Should not happen */
215
		    ulSectPage >= ulBeginSectInfo) {	/* Should not happen */
216
			DBG_HEX_C(ulSectPage != FC_INVALID, ulSectPage);
217
			vDefault2SectionInfoList(ulCharPos);
218
			continue;
219
		}
220
		/* Get the number of bytes to read */
221
		if (!bReadBytes(aucTmp, 1, ulSectPage, pFile)) {
222
			continue;
223
		}
224
		tBytes = 1 + (size_t)ucGetByte(0, aucTmp);
225
		NO_DBG_DEC(tBytes);
226
		if (tBytes > sizeof(aucFpage)) {
227
			DBG_DEC(tBytes);
228
			tBytes = sizeof(aucFpage);
229
		}
230
		/* Read the bytes */
231
		if (!bReadBytes(aucFpage, tBytes, ulSectPage, pFile)) {
232
			continue;
233
		}
234
		NO_DBG_PRINT_BLOCK(aucFpage, tBytes);
235
		/* Process the bytes */
236
		vGetDefaultSection(&tSection);
237
		vGet0SectionInfo(aucFpage + 1, tBytes - 1, &tSection);
238
		vAdd2SectionInfoList(&tSection, ulCharPos);
239
	}
240
	/* Clean up before you leave */
241
	aucBuffer = xfree(aucBuffer);
242
} /* end of vGet0SepInfo */
243
 
244
/*
245
 * Fill the style information block with information
246
 * from a Word for DOS file.
247
 */
248
static void
249
vGet0StyleInfo(int iFodo, const UCHAR *aucGrpprl, style_block_type *pStyle)
250
{
251
	int	iBytes;
252
	UCHAR	ucTmp;
253
 
254
	fail(iFodo <= 0 || aucGrpprl == NULL || pStyle == NULL);
255
 
256
	pStyle->usIstdNext = ISTD_NORMAL;
257
 
258
	iBytes = (int)ucGetByte(iFodo, aucGrpprl);
259
	if (iBytes < 1) {
260
		return;
261
	}
262
	/* stc if styled */
263
	ucTmp = ucGetByte(iFodo + 1, aucGrpprl);
264
	if ((ucTmp & BIT(0)) != 0) {
265
		ucTmp >>= 1;
266
		if (ucTmp >= 88 && ucTmp <= 94) {
267
			/* Header levels 1 through 7 */
268
			pStyle->usIstd = ucTmp - 87;
269
			pStyle->ucNumLevel = 1;
270
		}
271
	}
272
	if (iBytes < 2) {
273
		return;
274
	}
275
	/* jc */
276
	ucTmp = ucGetByte(iFodo + 2, aucGrpprl);
277
	pStyle->ucAlignment = ucTmp & 0x02;
278
	if (iBytes < 3) {
279
		return;
280
	}
281
	/* stc */
282
	ucTmp = ucGetByte(iFodo + 3, aucGrpprl);
283
	ucTmp &= 0x7f;
284
	if (ucTmp >= 88 && ucTmp <= 94) {
285
		/* Header levels 1 through 7 */
286
		pStyle->usIstd = ucTmp - 87;
287
		pStyle->ucNumLevel = 1;
288
	}
289
	if (iBytes < 6) {
290
		return;
291
	}
292
	/* dxaRight */
293
	pStyle->sRightIndent = (short)usGetWord(iFodo + 5, aucGrpprl);
294
	NO_DBG_DEC(pStyle->sRightIndent);
295
	if (iBytes < 8) {
296
		return;
297
	}
298
	/* dxaLeft */
299
	pStyle->sLeftIndent = (short)usGetWord(iFodo + 7, aucGrpprl);
300
	NO_DBG_DEC(pStyle->sLeftIndent);
301
	if (iBytes < 10) {
302
		return;
303
	}
304
	/* dxaLeft1 */
305
	pStyle->sLeftIndent1 = (short)usGetWord(iFodo + 9, aucGrpprl);
306
	NO_DBG_DEC(pStyle->sLeftIndent1);
307
	if (iBytes < 14) {
308
		return;
309
	}
310
	/* dyaBefore */
311
	pStyle->usBeforeIndent = usGetWord(iFodo + 13, aucGrpprl);
312
	NO_DBG_DEC(pStyle->usBeforeIndent);
313
	if (iBytes < 16) {
314
		return;
315
	}
316
	/* dyaAfter */
317
	pStyle->usAfterIndent = usGetWord(iFodo + 15, aucGrpprl);
318
	NO_DBG_DEC(pStyle->usAfterIndent);
319
} /* end of vGet0StyleInfo */
320
 
321
/*
322
 * Build the lists with Paragraph Information for Word for DOS files
323
 */
324
void
325
vGet0PapInfo(FILE *pFile, const UCHAR *aucHeader)
326
{
327
	style_block_type	tStyle;
328
	ULONG	ulBeginParfInfo, ulCharPos, ulCharPosNext;
329
	int	iIndex, iRun, iFodo;
330
	UCHAR	aucFpage[128];
331
 
332
	fail(pFile == NULL || aucHeader == NULL);
333
 
334
	ulBeginParfInfo = 128 * (ULONG)usGetWord(0x12, aucHeader);
335
	NO_DBG_HEX(ulBeginParfInfo);
336
 
337
	do {
338
		if (!bReadBytes(aucFpage, 128, ulBeginParfInfo, pFile)) {
339
			return;
340
		}
341
		NO_DBG_PRINT_BLOCK(aucFpage, 128);
342
		ulCharPosNext = ulGetLong(0, aucFpage);
343
		iRun = (int)ucGetByte(0x7f, aucFpage);
344
		NO_DBG_DEC(iRun);
345
		for (iIndex = 0; iIndex < iRun; iIndex++) {
346
			iFodo = (int)usGetWord(6 * iIndex + 8, aucFpage);
347
			if (iFodo <= 0 || iFodo > 0x79) {
348
				DBG_DEC_C(iFodo != (int)0xffff, iFodo);
349
				continue;
350
			}
351
			vFillStyleFromStylesheet(0, &tStyle);
352
			vGet0StyleInfo(iFodo, aucFpage + 4, &tStyle);
353
			ulCharPos = ulCharPosNext;
354
			ulCharPosNext = ulGetLong(6 * iIndex + 4, aucFpage);
355
			tStyle.ulFileOffset = ulCharPos;
356
			vAdd2StyleInfoList(&tStyle);
357
		}
358
		ulBeginParfInfo += 128;
359
	} while (ulCharPosNext == ulBeginParfInfo);
360
} /* end of vGet0PapInfo */
361
 
362
/*
363
 * Fill the font information block with information
364
 * from a Word for DOS file.
365
 */
366
static void
367
vGet0FontInfo(int iFodo, const UCHAR *aucGrpprl, font_block_type *pFont)
368
{
369
	int	iBytes;
370
	UCHAR	ucTmp;
371
 
372
	fail(iFodo <= 0 || aucGrpprl == NULL || pFont == NULL);
373
 
374
	iBytes = (int)ucGetByte(iFodo, aucGrpprl);
375
	if (iBytes < 2) {
376
		return;
377
	}
378
	/* fBold, fItalic, cFtc */
379
	ucTmp = ucGetByte(iFodo + 2, aucGrpprl);
380
	if ((ucTmp & BIT(0)) != 0) {
381
		pFont->usFontStyle |= FONT_BOLD;
382
	}
383
	if ((ucTmp & BIT(1)) != 0) {
384
		pFont->usFontStyle |= FONT_ITALIC;
385
	}
386
	pFont->ucFontNumber = ucTmp >> 2;
387
	NO_DBG_DEC(pFont->ucFontNumber);
388
	if (iBytes < 3) {
389
		return;
390
	}
391
	/* cHps */
392
	pFont->usFontSize = (USHORT)ucGetByte(iFodo + 3, aucGrpprl);
393
	NO_DBG_DEC(pFont->usFontSize);
394
	if (iBytes < 4) {
395
		return;
396
	}
397
	/* cKul, fStrike, fCaps, fSmallCaps, fVanish */
398
	ucTmp = ucGetByte(iFodo + 4, aucGrpprl);
399
	if ((ucTmp & BIT(0)) != 0 || (ucTmp & BIT(2)) != 0) {
400
		pFont->usFontStyle |= FONT_UNDERLINE;
401
	}
402
	if ((ucTmp & BIT(1)) != 0) {
403
		pFont->usFontStyle |= FONT_STRIKE;
404
	}
405
	if ((ucTmp & BIT(4)) != 0) {
406
		pFont->usFontStyle |= FONT_CAPITALS;
407
	}
408
	if ((ucTmp & BIT(5)) != 0) {
409
		pFont->usFontStyle |= FONT_SMALL_CAPITALS;
410
	}
411
	if ((ucTmp & BIT(7)) != 0) {
412
		pFont->usFontStyle |= FONT_HIDDEN;
413
	}
414
	DBG_HEX(pFont->usFontStyle);
415
	if (iBytes < 6) {
416
		return;
417
	}
418
	/* cIss */
419
	ucTmp = ucGetByte(iFodo + 6, aucGrpprl);
420
	if (ucTmp != 0) {
421
		if (ucTmp < 128) {
422
			pFont->usFontStyle |= FONT_SUPERSCRIPT;
423
			DBG_MSG("Superscript");
424
		} else {
425
			pFont->usFontStyle |= FONT_SUBSCRIPT;
426
			DBG_MSG("Subscript");
427
		}
428
	}
429
	if (iBytes < 7) {
430
		return;
431
	}
432
	/* cIco */
433
	ucTmp = ucGetByte(iFodo + 7, aucGrpprl);
434
	switch (ucTmp & 0x07) {
435
	case 0: pFont->ucFontColor = FONT_COLOR_BLACK; break;
436
	case 1: pFont->ucFontColor = FONT_COLOR_RED; break;
437
	case 2: pFont->ucFontColor = FONT_COLOR_GREEN; break;
438
	case 3: pFont->ucFontColor = FONT_COLOR_BLUE; break;
439
	case 4: pFont->ucFontColor = FONT_COLOR_CYAN; break;
440
	case 5: pFont->ucFontColor = FONT_COLOR_MAGENTA; break;
441
	case 6: pFont->ucFontColor = FONT_COLOR_YELLOW; break;
442
	case 7: pFont->ucFontColor = FONT_COLOR_WHITE; break;
443
	default:pFont->ucFontColor = FONT_COLOR_BLACK; break;
444
	}
445
	NO_DBG_DEC(pFont->ucFontColor);
446
} /* end of vGet0FontInfo */
447
 
448
/*
449
 * Build the lists with Character Information for Word for DOS files
450
 */
451
void
452
vGet0ChrInfo(FILE *pFile, const UCHAR *aucHeader)
453
{
454
	font_block_type		tFont;
455
	ULONG	ulBeginCharInfo, ulCharPos, ulCharPosNext;
456
	int	iIndex, iRun, iFodo;
457
	UCHAR	aucFpage[128];
458
 
459
	fail(pFile == NULL || aucHeader == NULL);
460
 
461
	ulBeginCharInfo = ulGetLong(0x0e, aucHeader);
462
	NO_DBG_HEX(ulBeginCharInfo);
463
	ulBeginCharInfo = ROUND128(ulBeginCharInfo);
464
	NO_DBG_HEX(ulBeginCharInfo);
465
 
466
	do {
467
		if (!bReadBytes(aucFpage, 128, ulBeginCharInfo, pFile)) {
468
			return;
469
		}
470
		NO_DBG_PRINT_BLOCK(aucFpage, 128);
471
		ulCharPosNext = ulGetLong(0, aucFpage);
472
		iRun = (int)ucGetByte(0x7f, aucFpage);
473
		NO_DBG_DEC(iRun);
474
		for (iIndex = 0; iIndex < iRun; iIndex++) {
475
			iFodo = (int)usGetWord(6 * iIndex + 8, aucFpage);
476
			if (iFodo <= 0 || iFodo > 0x79) {
477
				DBG_DEC_C(iFodo != (int)0xffff, iFodo);
478
				continue;
479
			}
480
			vFillFontFromStylesheet(0, &tFont);
481
			vGet0FontInfo(iFodo, aucFpage + 4, &tFont);
482
			ulCharPos = ulCharPosNext;
483
			ulCharPosNext = ulGetLong(6 * iIndex + 4, aucFpage);
484
			tFont.ulFileOffset = ulCharPos;
485
			vAdd2FontInfoList(&tFont);
486
		}
487
		ulBeginCharInfo += 128;
488
	} while (ulCharPosNext == ulBeginCharInfo);
489
} /* end of vGet0ChrInfo */