examples/PIPS/antiword/src/prop0.c

00001 /*
00002  * prop0.c
00003  * Copyright (C) 2002-2004 A.J. van Os; Released under GNU GPL
00004  *
00005  * Description:
00006  * Read the property information from a Word for DOS file
00007  */
00008 
00009 #include <string.h>
00010 #include <time.h>
00011 #include "antiword.h"
00012 
00013 
00014 /*
00015  * tConvertDosDate - convert DOS date format
00016  *
00017  * returns Unix time_t or -1
00018  */
00019 static time_t
00020 tConvertDosDate(const char *szDosDate)
00021 {
00022         struct tm       tTime;
00023         const char      *pcTmp;
00024         time_t          tResult;
00025 
00026         memset(&tTime, 0, sizeof(tTime));
00027         pcTmp = szDosDate;
00028         /* Get the month */
00029         if (!isdigit(*pcTmp)) {
00030                 return (time_t)-1;
00031         }
00032         tTime.tm_mon = (int)(*pcTmp - '0');
00033         pcTmp++;
00034         if (isdigit(*pcTmp)) {
00035                 tTime.tm_mon *= 10;
00036                 tTime.tm_mon += (int)(*pcTmp - '0');
00037                 pcTmp++;
00038         }
00039         /* Get the first separater */
00040         if (isalnum(*pcTmp)) {
00041                 return (time_t)-1;
00042         }
00043         pcTmp++;
00044         /* Get the day */
00045         if (!isdigit(*pcTmp)) {
00046                 return (time_t)-1;
00047         }
00048         tTime.tm_mday = (int)(*pcTmp - '0');
00049         pcTmp++;
00050         if (isdigit(*pcTmp)) {
00051                 tTime.tm_mday *= 10;
00052                 tTime.tm_mday += (int)(*pcTmp - '0');
00053                 pcTmp++;
00054         }
00055         /* Get the second separater */
00056         if (isalnum(*pcTmp)) {
00057                 return (time_t)-1;
00058         }
00059         pcTmp++;
00060         /* Get the year */
00061         if (!isdigit(*pcTmp)) {
00062                 return (time_t)-1;
00063         }
00064         tTime.tm_year = (int)(*pcTmp - '0');
00065         pcTmp++;
00066         if (isdigit(*pcTmp)) {
00067                 tTime.tm_year *= 10;
00068                 tTime.tm_year += (int)(*pcTmp - '0');
00069                 pcTmp++;
00070         }
00071         /* Check the values */
00072         if (tTime.tm_mon == 0 || tTime.tm_mday == 0 || tTime.tm_mday > 31) {
00073                 return (time_t)-1;
00074         }
00075         /* Correct the values */
00076         tTime.tm_mon--;         /* From 01-12 to 00-11 */
00077         if (tTime.tm_year < 80) {
00078                 tTime.tm_year += 100;   /* 00 means 2000 is 100 */
00079         }
00080         tTime.tm_isdst = -1;
00081         tResult = mktime(&tTime);
00082         NO_DBG_MSG(ctime(&tResult));
00083         return tResult;
00084 } /* end of tConvertDosDate */
00085 
00086 /*
00087  * Build the lists with Document Property Information for Word for DOS files
00088  */
00089 void
00090 vGet0DopInfo(FILE *pFile, const UCHAR *aucHeader)
00091 {
00092         document_block_type     tDocument;
00093         UCHAR   *aucBuffer;
00094         ULONG   ulBeginSumdInfo, ulBeginNextBlock;
00095         size_t  tLen;
00096         USHORT  usOffset;
00097 
00098         tDocument.ucHdrFtrSpecification = 0;
00099         tDocument.usDefaultTabWidth = usGetWord(0x70, aucHeader); /* dxaTab */
00100         tDocument.tCreateDate = (time_t)-1;
00101         tDocument.tRevisedDate = (time_t)-1;
00102 
00103         ulBeginSumdInfo = 128 * (ULONG)usGetWord(0x1c, aucHeader);
00104         DBG_HEX(ulBeginSumdInfo);
00105         ulBeginNextBlock = 128 * (ULONG)usGetWord(0x6a, aucHeader);
00106         DBG_HEX(ulBeginNextBlock);
00107 
00108         if (ulBeginSumdInfo < ulBeginNextBlock && ulBeginNextBlock != 0) {
00109                 /* There is a summary information block */
00110                 tLen = (size_t)(ulBeginNextBlock - ulBeginSumdInfo);
00111                 aucBuffer = xmalloc(tLen);
00112                 /* Read the summary information block */
00113                 if (bReadBytes(aucBuffer, tLen, ulBeginSumdInfo, pFile)) {
00114                         usOffset = usGetWord(12, aucBuffer);
00115                         if (aucBuffer[usOffset] != 0) {
00116                                 NO_DBG_STRN(aucBuffer + usOffset, 8);
00117                                 tDocument.tRevisedDate =
00118                                 tConvertDosDate((char *)aucBuffer + usOffset);
00119                         }
00120                         usOffset = usGetWord(14, aucBuffer);
00121                         if (aucBuffer[usOffset] != 0) {
00122                                 NO_DBG_STRN(aucBuffer + usOffset, 8);
00123                                 tDocument.tCreateDate =
00124                                 tConvertDosDate((char *)aucBuffer + usOffset);
00125                         }
00126                 }
00127                 aucBuffer = xfree(aucBuffer);
00128         }
00129         vCreateDocumentInfoList(&tDocument);
00130 } /* end of vGet0DopInfo */
00131 
00132 /*
00133  * Fill the section information block with information
00134  * from a Word for DOS file.
00135  */
00136 static void
00137 vGet0SectionInfo(const UCHAR *aucGrpprl, size_t tBytes,
00138                 section_block_type *pSection)
00139 {
00140         USHORT  usCcol;
00141         UCHAR   ucTmp;
00142 
00143         fail(aucGrpprl == NULL || pSection == NULL);
00144 
00145         if (tBytes < 2) {
00146                 return;
00147         }
00148         /* bkc */
00149         ucTmp = ucGetByte(1, aucGrpprl);
00150         DBG_HEX(ucTmp);
00151         ucTmp &= 0x07;
00152         DBG_HEX(ucTmp);
00153         pSection->bNewPage = ucTmp != 0 && ucTmp != 1;
00154         if (tBytes < 18) {
00155                 return;
00156         }
00157         /* ccolM1 */
00158         usCcol = (USHORT)ucGetByte(17, aucGrpprl);
00159         DBG_DEC(usCcol);
00160 } /* end of vGet0SectionInfo */
00161 
00162 /*
00163  * Build the lists with Section Property Information for Word for DOS files
00164  */
00165 void
00166 vGet0SepInfo(FILE *pFile, const UCHAR *aucHeader)
00167 {
00168         section_block_type      tSection;
00169         UCHAR   *aucBuffer;
00170         ULONG   ulBeginOfText, ulTextOffset, ulBeginSectInfo;
00171         ULONG   ulCharPos, ulSectPage, ulBeginNextBlock;
00172         size_t  tSectInfoLen, tIndex, tSections, tBytes;
00173         UCHAR   aucTmp[2], aucFpage[35];
00174 
00175         fail(pFile == NULL || aucHeader == NULL);
00176 
00177         ulBeginOfText = 128;
00178         NO_DBG_HEX(ulBeginOfText);
00179         ulBeginSectInfo = 128 * (ULONG)usGetWord(0x18, aucHeader);
00180         DBG_HEX(ulBeginSectInfo);
00181         ulBeginNextBlock = 128 * (ULONG)usGetWord(0x1a, aucHeader);
00182         DBG_HEX(ulBeginNextBlock);
00183         if (ulBeginSectInfo == ulBeginNextBlock) {
00184                 /* There is no section information block */
00185                 return;
00186         }
00187 
00188         /* Get the the number of sections */
00189         if (!bReadBytes(aucTmp, 2, ulBeginSectInfo, pFile)) {
00190                 return;
00191         }
00192         tSections = (size_t)usGetWord(0, aucTmp);
00193         NO_DBG_DEC(tSections);
00194 
00195         /* Read the Section Descriptors */
00196         tSectInfoLen = 10 * tSections;
00197         NO_DBG_DEC(tSectInfoLen);
00198         aucBuffer = xmalloc(tSectInfoLen);
00199         if (!bReadBytes(aucBuffer, tSectInfoLen, ulBeginSectInfo + 4, pFile)) {
00200                 aucBuffer = xfree(aucBuffer);
00201                 return;
00202         }
00203         NO_DBG_PRINT_BLOCK(aucBuffer, tSectInfoLen);
00204 
00205         /* Read the Section Properties */
00206         for (tIndex = 0; tIndex < tSections; tIndex++) {
00207                 ulTextOffset = ulGetLong(10 * tIndex, aucBuffer);
00208                 NO_DBG_HEX(ulTextOffset);
00209                 ulCharPos = ulBeginOfText + ulTextOffset;
00210                 NO_DBG_HEX(ulTextOffset);
00211                 ulSectPage = ulGetLong(10 * tIndex + 6, aucBuffer);
00212                 NO_DBG_HEX(ulSectPage);
00213                 if (ulSectPage == FC_INVALID ||         /* Must use defaults */
00214                     ulSectPage < 128 ||                 /* Should not happen */
00215                     ulSectPage >= ulBeginSectInfo) {    /* Should not happen */
00216                         DBG_HEX_C(ulSectPage != FC_INVALID, ulSectPage);
00217                         vDefault2SectionInfoList(ulCharPos);
00218                         continue;
00219                 }
00220                 /* Get the number of bytes to read */
00221                 if (!bReadBytes(aucTmp, 1, ulSectPage, pFile)) {
00222                         continue;
00223                 }
00224                 tBytes = 1 + (size_t)ucGetByte(0, aucTmp);
00225                 NO_DBG_DEC(tBytes);
00226                 if (tBytes > sizeof(aucFpage)) {
00227                         DBG_DEC(tBytes);
00228                         tBytes = sizeof(aucFpage);
00229                 }
00230                 /* Read the bytes */
00231                 if (!bReadBytes(aucFpage, tBytes, ulSectPage, pFile)) {
00232                         continue;
00233                 }
00234                 NO_DBG_PRINT_BLOCK(aucFpage, tBytes);
00235                 /* Process the bytes */
00236                 vGetDefaultSection(&tSection);
00237                 vGet0SectionInfo(aucFpage + 1, tBytes - 1, &tSection);
00238                 vAdd2SectionInfoList(&tSection, ulCharPos);
00239         }
00240         /* Clean up before you leave */
00241         aucBuffer = xfree(aucBuffer);
00242 } /* end of vGet0SepInfo */
00243 
00244 /*
00245  * Fill the style information block with information
00246  * from a Word for DOS file.
00247  */
00248 static void
00249 vGet0StyleInfo(int iFodo, const UCHAR *aucGrpprl, style_block_type *pStyle)
00250 {
00251         int     iBytes;
00252         UCHAR   ucTmp;
00253 
00254         fail(iFodo <= 0 || aucGrpprl == NULL || pStyle == NULL);
00255 
00256         pStyle->usIstdNext = ISTD_NORMAL;
00257 
00258         iBytes = (int)ucGetByte(iFodo, aucGrpprl);
00259         if (iBytes < 1) {
00260                 return;
00261         }
00262         /* stc if styled */
00263         ucTmp = ucGetByte(iFodo + 1, aucGrpprl);
00264         if ((ucTmp & BIT(0)) != 0) {
00265                 ucTmp >>= 1;
00266                 if (ucTmp >= 88 && ucTmp <= 94) {
00267                         /* Header levels 1 through 7 */
00268                         pStyle->usIstd = ucTmp - 87;
00269                         pStyle->ucNumLevel = 1;
00270                 }
00271         }
00272         if (iBytes < 2) {
00273                 return;
00274         }
00275         /* jc */
00276         ucTmp = ucGetByte(iFodo + 2, aucGrpprl);
00277         pStyle->ucAlignment = ucTmp & 0x02;
00278         if (iBytes < 3) {
00279                 return;
00280         }
00281         /* stc */
00282         ucTmp = ucGetByte(iFodo + 3, aucGrpprl);
00283         ucTmp &= 0x7f;
00284         if (ucTmp >= 88 && ucTmp <= 94) {
00285                 /* Header levels 1 through 7 */
00286                 pStyle->usIstd = ucTmp - 87;
00287                 pStyle->ucNumLevel = 1;
00288         }
00289         if (iBytes < 6) {
00290                 return;
00291         }
00292         /* dxaRight */
00293         pStyle->sRightIndent = (short)usGetWord(iFodo + 5, aucGrpprl);
00294         NO_DBG_DEC(pStyle->sRightIndent);
00295         if (iBytes < 8) {
00296                 return;
00297         }
00298         /* dxaLeft */
00299         pStyle->sLeftIndent = (short)usGetWord(iFodo + 7, aucGrpprl);
00300         NO_DBG_DEC(pStyle->sLeftIndent);
00301         if (iBytes < 10) {
00302                 return;
00303         }
00304         /* dxaLeft1 */
00305         pStyle->sLeftIndent1 = (short)usGetWord(iFodo + 9, aucGrpprl);
00306         NO_DBG_DEC(pStyle->sLeftIndent1);
00307         if (iBytes < 14) {
00308                 return;
00309         }
00310         /* dyaBefore */
00311         pStyle->usBeforeIndent = usGetWord(iFodo + 13, aucGrpprl);
00312         NO_DBG_DEC(pStyle->usBeforeIndent);
00313         if (iBytes < 16) {
00314                 return;
00315         }
00316         /* dyaAfter */
00317         pStyle->usAfterIndent = usGetWord(iFodo + 15, aucGrpprl);
00318         NO_DBG_DEC(pStyle->usAfterIndent);
00319 } /* end of vGet0StyleInfo */
00320 
00321 /*
00322  * Build the lists with Paragraph Information for Word for DOS files
00323  */
00324 void
00325 vGet0PapInfo(FILE *pFile, const UCHAR *aucHeader)
00326 {
00327         style_block_type        tStyle;
00328         ULONG   ulBeginParfInfo, ulCharPos, ulCharPosNext;
00329         int     iIndex, iRun, iFodo;
00330         UCHAR   aucFpage[128];
00331 
00332         fail(pFile == NULL || aucHeader == NULL);
00333 
00334         ulBeginParfInfo = 128 * (ULONG)usGetWord(0x12, aucHeader);
00335         NO_DBG_HEX(ulBeginParfInfo);
00336 
00337         do {
00338                 if (!bReadBytes(aucFpage, 128, ulBeginParfInfo, pFile)) {
00339                         return;
00340                 }
00341                 NO_DBG_PRINT_BLOCK(aucFpage, 128);
00342                 ulCharPosNext = ulGetLong(0, aucFpage);
00343                 iRun = (int)ucGetByte(0x7f, aucFpage);
00344                 NO_DBG_DEC(iRun);
00345                 for (iIndex = 0; iIndex < iRun; iIndex++) {
00346                         iFodo = (int)usGetWord(6 * iIndex + 8, aucFpage);
00347                         if (iFodo <= 0 || iFodo > 0x79) {
00348                                 DBG_DEC_C(iFodo != (int)0xffff, iFodo);
00349                                 continue;
00350                         }
00351                         vFillStyleFromStylesheet(0, &tStyle);
00352                         vGet0StyleInfo(iFodo, aucFpage + 4, &tStyle);
00353                         ulCharPos = ulCharPosNext;
00354                         ulCharPosNext = ulGetLong(6 * iIndex + 4, aucFpage);
00355                         tStyle.ulFileOffset = ulCharPos;
00356                         vAdd2StyleInfoList(&tStyle);
00357                 }
00358                 ulBeginParfInfo += 128;
00359         } while (ulCharPosNext == ulBeginParfInfo);
00360 } /* end of vGet0PapInfo */
00361 
00362 /*
00363  * Fill the font information block with information
00364  * from a Word for DOS file.
00365  */
00366 static void
00367 vGet0FontInfo(int iFodo, const UCHAR *aucGrpprl, font_block_type *pFont)
00368 {
00369         int     iBytes;
00370         UCHAR   ucTmp;
00371 
00372         fail(iFodo <= 0 || aucGrpprl == NULL || pFont == NULL);
00373 
00374         iBytes = (int)ucGetByte(iFodo, aucGrpprl);
00375         if (iBytes < 2) {
00376                 return;
00377         }
00378         /* fBold, fItalic, cFtc */
00379         ucTmp = ucGetByte(iFodo + 2, aucGrpprl);
00380         if ((ucTmp & BIT(0)) != 0) {
00381                 pFont->usFontStyle |= FONT_BOLD;
00382         }
00383         if ((ucTmp & BIT(1)) != 0) {
00384                 pFont->usFontStyle |= FONT_ITALIC;
00385         }
00386         pFont->ucFontNumber = ucTmp >> 2;
00387         NO_DBG_DEC(pFont->ucFontNumber);
00388         if (iBytes < 3) {
00389                 return;
00390         }
00391         /* cHps */
00392         pFont->usFontSize = (USHORT)ucGetByte(iFodo + 3, aucGrpprl);
00393         NO_DBG_DEC(pFont->usFontSize);
00394         if (iBytes < 4) {
00395                 return;
00396         }
00397         /* cKul, fStrike, fCaps, fSmallCaps, fVanish */
00398         ucTmp = ucGetByte(iFodo + 4, aucGrpprl);
00399         if ((ucTmp & BIT(0)) != 0 || (ucTmp & BIT(2)) != 0) {
00400                 pFont->usFontStyle |= FONT_UNDERLINE;
00401         }
00402         if ((ucTmp & BIT(1)) != 0) {
00403                 pFont->usFontStyle |= FONT_STRIKE;
00404         }
00405         if ((ucTmp & BIT(4)) != 0) {
00406                 pFont->usFontStyle |= FONT_CAPITALS;
00407         }
00408         if ((ucTmp & BIT(5)) != 0) {
00409                 pFont->usFontStyle |= FONT_SMALL_CAPITALS;
00410         }
00411         if ((ucTmp & BIT(7)) != 0) {
00412                 pFont->usFontStyle |= FONT_HIDDEN;
00413         }
00414         DBG_HEX(pFont->usFontStyle);
00415         if (iBytes < 6) {
00416                 return;
00417         }
00418         /* cIss */
00419         ucTmp = ucGetByte(iFodo + 6, aucGrpprl);
00420         if (ucTmp != 0) {
00421                 if (ucTmp < 128) {
00422                         pFont->usFontStyle |= FONT_SUPERSCRIPT;
00423                         DBG_MSG("Superscript");
00424                 } else {
00425                         pFont->usFontStyle |= FONT_SUBSCRIPT;
00426                         DBG_MSG("Subscript");
00427                 }
00428         }
00429         if (iBytes < 7) {
00430                 return;
00431         }
00432         /* cIco */
00433         ucTmp = ucGetByte(iFodo + 7, aucGrpprl);
00434         switch (ucTmp & 0x07) {
00435         case 0: pFont->ucFontColor = FONT_COLOR_BLACK; break;
00436         case 1: pFont->ucFontColor = FONT_COLOR_RED; break;
00437         case 2: pFont->ucFontColor = FONT_COLOR_GREEN; break;
00438         case 3: pFont->ucFontColor = FONT_COLOR_BLUE; break;
00439         case 4: pFont->ucFontColor = FONT_COLOR_CYAN; break;
00440         case 5: pFont->ucFontColor = FONT_COLOR_MAGENTA; break;
00441         case 6: pFont->ucFontColor = FONT_COLOR_YELLOW; break;
00442         case 7: pFont->ucFontColor = FONT_COLOR_WHITE; break;
00443         default:pFont->ucFontColor = FONT_COLOR_BLACK; break;
00444         }
00445         NO_DBG_DEC(pFont->ucFontColor);
00446 } /* end of vGet0FontInfo */
00447 
00448 /*
00449  * Build the lists with Character Information for Word for DOS files
00450  */
00451 void
00452 vGet0ChrInfo(FILE *pFile, const UCHAR *aucHeader)
00453 {
00454         font_block_type         tFont;
00455         ULONG   ulBeginCharInfo, ulCharPos, ulCharPosNext;
00456         int     iIndex, iRun, iFodo;
00457         UCHAR   aucFpage[128];
00458 
00459         fail(pFile == NULL || aucHeader == NULL);
00460 
00461         ulBeginCharInfo = ulGetLong(0x0e, aucHeader);
00462         NO_DBG_HEX(ulBeginCharInfo);
00463         ulBeginCharInfo = ROUND128(ulBeginCharInfo);
00464         NO_DBG_HEX(ulBeginCharInfo);
00465 
00466         do {
00467                 if (!bReadBytes(aucFpage, 128, ulBeginCharInfo, pFile)) {
00468                         return;
00469                 }
00470                 NO_DBG_PRINT_BLOCK(aucFpage, 128);
00471                 ulCharPosNext = ulGetLong(0, aucFpage);
00472                 iRun = (int)ucGetByte(0x7f, aucFpage);
00473                 NO_DBG_DEC(iRun);
00474                 for (iIndex = 0; iIndex < iRun; iIndex++) {
00475                         iFodo = (int)usGetWord(6 * iIndex + 8, aucFpage);
00476                         if (iFodo <= 0 || iFodo > 0x79) {
00477                                 DBG_DEC_C(iFodo != (int)0xffff, iFodo);
00478                                 continue;
00479                         }
00480                         vFillFontFromStylesheet(0, &tFont);
00481                         vGet0FontInfo(iFodo, aucFpage + 4, &tFont);
00482                         ulCharPos = ulCharPosNext;
00483                         ulCharPosNext = ulGetLong(6 * iIndex + 4, aucFpage);
00484                         tFont.ulFileOffset = ulCharPos;
00485                         vAdd2FontInfoList(&tFont);
00486                 }
00487                 ulBeginCharInfo += 128;
00488         } while (ulCharPosNext == ulBeginCharInfo);
00489 } /* end of vGet0ChrInfo */

Generated by  doxygen 1.6.2