examples/PIPS/antiword/src/wordole.c

00001 /*
00002  * wordole.c
00003  * Copyright (C) 1998-2004 A.J. van Os; Released under GPL
00004  *
00005  * Description:
00006  * Deal with the OLE internals of a MS Word file
00007  */
00008 
00009 #include <string.h>
00010 #include "antiword.h"
00011 
00012 /* Private type for Property Set Storage entries */
00013 typedef struct pps_entry_tag {
00014         ULONG   ulNext;
00015         ULONG   ulPrevious;
00016         ULONG   ulDir;
00017         ULONG   ulSB;
00018         ULONG   ulSize;
00019         int     iLevel;
00020         char    szName[32];
00021         UCHAR   ucType;
00022 } pps_entry_type;
00023 
00024 /* Show that a PPS number or index should not be used */
00025 #define PPS_NUMBER_INVALID      0xffffffffUL
00026 
00027 
00028 /* Macro to make sure all such statements will be identical */
00029 #define FREE_ALL()              \
00030         do {\
00031                 vDestroySmallBlockList();\
00032                 aulRootList = xfree(aulRootList);\
00033                 aulSbdList = xfree(aulSbdList);\
00034                 aulBbdList = xfree(aulBbdList);\
00035                 aulSBD = xfree(aulSBD);\
00036                 aulBBD = xfree(aulBBD);\
00037         } while(0)
00038 
00039 
00040 /*
00041  * ulReadLong - read four bytes from the given file and offset
00042  */
00043 static ULONG
00044 ulReadLong(FILE *pFile, ULONG ulOffset)
00045 {
00046         UCHAR   aucBytes[4];
00047 
00048         fail(pFile == NULL);
00049 
00050         if (!bReadBytes(aucBytes, 4, ulOffset, pFile)) {
00051                 werr(1, "Read long 0x%lx not possible", ulOffset);
00052         }
00053         return ulGetLong(0, aucBytes);
00054 } /* end of ulReadLong */
00055 
00056 /*
00057  * vName2String - turn the name into a proper string.
00058  */
00059 static void
00060 vName2String(char *szName, const UCHAR *aucBytes, size_t tNameSize)
00061 {
00062         char    *pcChar;
00063         size_t  tIndex;
00064 
00065         fail(aucBytes == NULL || szName == NULL);
00066 
00067         if (tNameSize < 2) {
00068                 szName[0] = '\0';
00069                 return;
00070         }
00071         for (tIndex = 0, pcChar = szName;
00072              tIndex < 2 * tNameSize;
00073              tIndex += 2, pcChar++) {
00074                 *pcChar = (char)aucBytes[tIndex];
00075         }
00076         szName[tNameSize - 1] = '\0';
00077 } /* end of vName2String */
00078 
00079 /*
00080  * tReadBlockIndices - read the Big/Small Block Depot indices
00081  *
00082  * Returns the number of indices read
00083  */
00084 static size_t
00085 tReadBlockIndices(FILE *pFile, ULONG *aulBlockDepot,
00086         size_t tMaxRec, ULONG ulOffset)
00087 {
00088         size_t  tDone;
00089         int     iIndex;
00090         UCHAR   aucBytes[BIG_BLOCK_SIZE];
00091 
00092         fail(pFile == NULL || aulBlockDepot == NULL);
00093         fail(tMaxRec == 0);
00094 
00095         /* Read a big block with BBD or SBD indices */
00096         if (!bReadBytes(aucBytes, BIG_BLOCK_SIZE, ulOffset, pFile)) {
00097                 werr(0, "Reading big block from 0x%lx is not possible",
00098                         ulOffset);
00099                 return 0;
00100         }
00101         /* Split the big block into indices, an index is four bytes */
00102         tDone = min(tMaxRec, (size_t)BIG_BLOCK_SIZE / 4);
00103         for (iIndex = 0; iIndex < (int)tDone; iIndex++) {
00104                 aulBlockDepot[iIndex] = ulGetLong(4 * iIndex, aucBytes);
00105                 NO_DBG_DEC(aulBlockDepot[iIndex]);
00106         }
00107         return tDone;
00108 } /* end of tReadBlockIndices */
00109 
00110 /*
00111  * bGetBBD - get the Big Block Depot indices from the index-blocks
00112  */
00113 static BOOL
00114 bGetBBD(FILE *pFile, const ULONG *aulDepot, size_t tDepotLen,
00115         ULONG *aulBBD, size_t tBBDLen)
00116 {
00117         ULONG   ulBegin;
00118         size_t  tToGo, tDone;
00119         int     iIndex;
00120 
00121         fail(pFile == NULL || aulDepot == NULL || aulBBD == NULL);
00122 
00123         DBG_MSG("bGetBBD");
00124 
00125         tToGo = tBBDLen;
00126         for (iIndex = 0; iIndex < (int)tDepotLen && tToGo != 0; iIndex++) {
00127                 ulBegin = (aulDepot[iIndex] + 1) * BIG_BLOCK_SIZE;
00128                 NO_DBG_HEX(ulBegin);
00129                 tDone = tReadBlockIndices(pFile, aulBBD, tToGo, ulBegin);
00130                 fail(tDone > tToGo);
00131                 if (tDone == 0) {
00132                         return FALSE;
00133                 }
00134                 aulBBD += tDone;
00135                 tToGo -= tDone;
00136         }
00137         return tToGo == 0;
00138 } /* end of bGetBBD */
00139 
00140 /*
00141  * bGetSBD - get the Small Block Depot indices from the index-blocks
00142  */
00143 static BOOL
00144 bGetSBD(FILE *pFile, const ULONG *aulDepot, size_t tDepotLen,
00145         ULONG *aulSBD, size_t tSBDLen)
00146 {
00147         ULONG   ulBegin;
00148         size_t  tToGo, tDone;
00149         int     iIndex;
00150 
00151         fail(pFile == NULL || aulDepot == NULL || aulSBD == NULL);
00152 
00153         DBG_MSG("bGetSBD");
00154 
00155         tToGo = tSBDLen;
00156         for (iIndex = 0; iIndex < (int)tDepotLen && tToGo != 0; iIndex++) {
00157                 fail(aulDepot[iIndex] >= ULONG_MAX / BIG_BLOCK_SIZE);
00158                 ulBegin = (aulDepot[iIndex] + 1) * BIG_BLOCK_SIZE;
00159                 NO_DBG_HEX(ulBegin);
00160                 tDone = tReadBlockIndices(pFile, aulSBD, tToGo, ulBegin);
00161                 fail(tDone > tToGo);
00162                 if (tDone == 0) {
00163                         return FALSE;
00164                 }
00165                 aulSBD += tDone;
00166                 tToGo -= tDone;
00167         }
00168         return tToGo == 0;
00169 } /* end of bGetSBD */
00170 
00171 /*
00172  * vComputePPSlevels - compute the levels of the Property Set Storage entries
00173  */
00174 static void
00175 vComputePPSlevels(pps_entry_type *atPPSlist, pps_entry_type *pNode,
00176                         int iLevel, int iRecursionLevel)
00177 {
00178         fail(atPPSlist == NULL || pNode == NULL);
00179         fail(iLevel < 0 || iRecursionLevel < 0);
00180 
00181         if (iRecursionLevel > 25) {
00182                 /* This removes the possibility of an infinite recursion */
00183                 DBG_DEC(iRecursionLevel);
00184                 return;
00185         }
00186         if (pNode->iLevel <= iLevel) {
00187                 /* Avoid entering a loop */
00188                 DBG_DEC(iLevel);
00189                 DBG_DEC(pNode->iLevel);
00190                 return;
00191         }
00192 
00193         pNode->iLevel = iLevel;
00194 
00195         if (pNode->ulDir != PPS_NUMBER_INVALID) {
00196                 vComputePPSlevels(atPPSlist,
00197                                 &atPPSlist[pNode->ulDir],
00198                                 iLevel + 1,
00199                                 iRecursionLevel + 1);
00200         }
00201         if (pNode->ulNext != PPS_NUMBER_INVALID) {
00202                 vComputePPSlevels(atPPSlist,
00203                                 &atPPSlist[pNode->ulNext],
00204                                 iLevel,
00205                                 iRecursionLevel + 1);
00206         }
00207         if (pNode->ulPrevious != PPS_NUMBER_INVALID) {
00208                 vComputePPSlevels(atPPSlist,
00209                                 &atPPSlist[pNode->ulPrevious],
00210                                 iLevel,
00211                                 iRecursionLevel + 1);
00212         }
00213 } /* end of vComputePPSlevels */
00214 
00215 /*
00216  * bGetPPS - search the Property Set Storage for three sets
00217  *
00218  * Return TRUE if the WordDocument PPS is found
00219  */
00220 static BOOL
00221 bGetPPS(FILE *pFile,
00222         const ULONG *aulRootList, size_t tRootListLen, pps_info_type *pPPS)
00223 {
00224         pps_entry_type  *atPPSlist;
00225         ULONG   ulBegin, ulOffset, ulTmp;
00226         size_t  tNbrOfPPS, tNameSize;
00227         int     iIndex, iStartBlock, iRootIndex;
00228         BOOL    bWord, bExcel;
00229         UCHAR   aucBytes[PROPERTY_SET_STORAGE_SIZE];
00230 
00231         fail(pFile == NULL || aulRootList == NULL || pPPS == NULL);
00232 
00233         DBG_MSG("bGetPPS");
00234 
00235         NO_DBG_DEC(tRootListLen);
00236 
00237         bWord = FALSE;
00238         bExcel = FALSE;
00239         (void)memset(pPPS, 0, sizeof(*pPPS));
00240 
00241         /* Read and store all the Property Set Storage entries */
00242 
00243         tNbrOfPPS = tRootListLen * BIG_BLOCK_SIZE / PROPERTY_SET_STORAGE_SIZE;
00244         atPPSlist = xcalloc(tNbrOfPPS, sizeof(pps_entry_type));
00245         iRootIndex = 0;
00246 
00247         for (iIndex = 0; iIndex < (int)tNbrOfPPS; iIndex++) {
00248                 ulTmp = (ULONG)iIndex * PROPERTY_SET_STORAGE_SIZE;
00249                 iStartBlock = (int)(ulTmp / BIG_BLOCK_SIZE);
00250                 ulOffset = ulTmp % BIG_BLOCK_SIZE;
00251                 ulBegin = (aulRootList[iStartBlock] + 1) * BIG_BLOCK_SIZE +
00252                                 ulOffset;
00253                 NO_DBG_HEX(ulBegin);
00254                 if (!bReadBytes(aucBytes, PROPERTY_SET_STORAGE_SIZE,
00255                                                         ulBegin, pFile)) {
00256                         werr(0, "Reading PPS %d is not possible", iIndex);
00257                         atPPSlist = xfree(atPPSlist);
00258                         return FALSE;
00259                 }
00260                 tNameSize = (size_t)usGetWord(0x40, aucBytes);
00261                 tNameSize = (tNameSize + 1) / 2;
00262                 vName2String(atPPSlist[iIndex].szName, aucBytes, tNameSize);
00263                 atPPSlist[iIndex].ucType = ucGetByte(0x42, aucBytes);
00264                 if (atPPSlist[iIndex].ucType == 5) {
00265                         iRootIndex = iIndex;
00266                 }
00267                 atPPSlist[iIndex].ulPrevious = ulGetLong(0x44, aucBytes);
00268                 atPPSlist[iIndex].ulNext = ulGetLong(0x48, aucBytes);
00269                 atPPSlist[iIndex].ulDir = ulGetLong(0x4c, aucBytes);
00270                 atPPSlist[iIndex].ulSB = ulGetLong(0x74, aucBytes);
00271                 atPPSlist[iIndex].ulSize = ulGetLong(0x78, aucBytes);
00272                 atPPSlist[iIndex].iLevel = INT_MAX;
00273                 if ((atPPSlist[iIndex].ulPrevious >= (ULONG)tNbrOfPPS &&
00274                      atPPSlist[iIndex].ulPrevious != PPS_NUMBER_INVALID) ||
00275                     (atPPSlist[iIndex].ulNext >= (ULONG)tNbrOfPPS &&
00276                      atPPSlist[iIndex].ulNext != PPS_NUMBER_INVALID) ||
00277                     (atPPSlist[iIndex].ulDir >= (ULONG)tNbrOfPPS &&
00278                      atPPSlist[iIndex].ulDir != PPS_NUMBER_INVALID)) {
00279                         DBG_DEC(iIndex);
00280                         DBG_DEC(atPPSlist[iIndex].ulPrevious);
00281                         DBG_DEC(atPPSlist[iIndex].ulNext);
00282                         DBG_DEC(atPPSlist[iIndex].ulDir);
00283                         DBG_DEC(tNbrOfPPS);
00284                         werr(0, "The Property Set Storage is damaged");
00285                         atPPSlist = xfree(atPPSlist);
00286                         return FALSE;
00287                 }
00288         }
00289 
00290 #if 0 /* defined(DEBUG) */
00291         DBG_MSG("Before");
00292         for (iIndex = 0; iIndex < (int)tNbrOfPPS; iIndex++) {
00293                 DBG_MSG(atPPSlist[iIndex].szName);
00294                 DBG_HEX(atPPSlist[iIndex].ulDir);
00295                 DBG_HEX(atPPSlist[iIndex].ulPrevious);
00296                 DBG_HEX(atPPSlist[iIndex].ulNext);
00297                 DBG_DEC(atPPSlist[iIndex].ulSB);
00298                 DBG_HEX(atPPSlist[iIndex].ulSize);
00299                 DBG_DEC(atPPSlist[iIndex].iLevel);
00300         }
00301 #endif /* DEBUG */
00302 
00303         /* Add level information to each entry */
00304         vComputePPSlevels(atPPSlist, &atPPSlist[iRootIndex], 0, 0);
00305 
00306         /* Check the entries on level 1 for the required information */
00307         NO_DBG_MSG("After");
00308         for (iIndex = 0; iIndex < (int)tNbrOfPPS; iIndex++) {
00309 #if 0 /* defined(DEBUG) */
00310                 DBG_MSG(atPPSlist[iIndex].szName);
00311                 DBG_HEX(atPPSlist[iIndex].ulDir);
00312                 DBG_HEX(atPPSlist[iIndex].ulPrevious);
00313                 DBG_HEX(atPPSlist[iIndex].ulNext);
00314                 DBG_DEC(atPPSlist[iIndex].ulSB);
00315                 DBG_HEX(atPPSlist[iIndex].ulSize);
00316                 DBG_DEC(atPPSlist[iIndex].iLevel);
00317 #endif /* DEBUG */
00318                 if (atPPSlist[iIndex].iLevel != 1 ||
00319                     atPPSlist[iIndex].ucType != 2 ||
00320                     atPPSlist[iIndex].szName[0] == '\0' ||
00321                     atPPSlist[iIndex].ulSize == 0) {
00322                         /* This entry can be ignored */
00323                         continue;
00324                 }
00325                 if (pPPS->tWordDocument.ulSize == 0 &&
00326                     STREQ(atPPSlist[iIndex].szName, "WordDocument")) {
00327                         pPPS->tWordDocument.ulSB = atPPSlist[iIndex].ulSB;
00328                         pPPS->tWordDocument.ulSize = atPPSlist[iIndex].ulSize;
00329                         bWord = TRUE;
00330                 } else if (pPPS->tData.ulSize == 0 &&
00331                            STREQ(atPPSlist[iIndex].szName, "Data")) {
00332                         pPPS->tData.ulSB = atPPSlist[iIndex].ulSB;
00333                         pPPS->tData.ulSize = atPPSlist[iIndex].ulSize;
00334                 } else if (pPPS->t0Table.ulSize == 0 &&
00335                            STREQ(atPPSlist[iIndex].szName, "0Table")) {
00336                         pPPS->t0Table.ulSB = atPPSlist[iIndex].ulSB;
00337                         pPPS->t0Table.ulSize = atPPSlist[iIndex].ulSize;
00338                 } else if (pPPS->t1Table.ulSize == 0 &&
00339                            STREQ(atPPSlist[iIndex].szName, "1Table")) {
00340                         pPPS->t1Table.ulSB = atPPSlist[iIndex].ulSB;
00341                         pPPS->t1Table.ulSize = atPPSlist[iIndex].ulSize;
00342                 } else if (pPPS->tSummaryInfo.ulSize == 0 &&
00343                            STREQ(atPPSlist[iIndex].szName,
00344                                                 "\005SummaryInformation")) {
00345                         pPPS->tSummaryInfo.ulSB = atPPSlist[iIndex].ulSB;
00346                         pPPS->tSummaryInfo.ulSize = atPPSlist[iIndex].ulSize;
00347                 } else if (pPPS->tDocSummaryInfo.ulSize == 0 &&
00348                            STREQ(atPPSlist[iIndex].szName,
00349                                         "\005DocumentSummaryInformation")) {
00350                         pPPS->tDocSummaryInfo.ulSB = atPPSlist[iIndex].ulSB;
00351                         pPPS->tDocSummaryInfo.ulSize = atPPSlist[iIndex].ulSize;
00352                 } else if (STREQ(atPPSlist[iIndex].szName, "Book") ||
00353                            STREQ(atPPSlist[iIndex].szName, "Workbook")) {
00354                         bExcel = TRUE;
00355                 }
00356         }
00357 
00358         /* Free the space for the Property Set Storage entries */
00359         atPPSlist = xfree(atPPSlist);
00360 
00361         /* Draw your conclusions */
00362         if (bWord) {
00363                 return TRUE;
00364         }
00365 
00366         if (bExcel) {
00367                 werr(0, "Sorry, but this is an Excel spreadsheet");
00368         } else {
00369                 werr(0, "This OLE file does not contain a Word document");
00370         }
00371         return FALSE;
00372 } /* end of bGetPPS */
00373 
00374 /*
00375  * vGetBbdList - make a list of the places to find big blocks
00376  */
00377 static void
00378 vGetBbdList(FILE *pFile, int iNbr, ULONG *aulBbdList, ULONG ulOffset)
00379 {
00380         int     iIndex;
00381 
00382         fail(pFile == NULL);
00383         fail(iNbr > 127);
00384         fail(aulBbdList == NULL);
00385 
00386         NO_DBG_DEC(iNbr);
00387         for (iIndex = 0; iIndex < iNbr; iIndex++) {
00388                 aulBbdList[iIndex] =
00389                         ulReadLong(pFile, ulOffset + 4 * (ULONG)iIndex);
00390                 NO_DBG_DEC(iIndex);
00391                 NO_DBG_HEX(aulBbdList[iIndex]);
00392         }
00393 } /* end of vGetBbdList */
00394 
00395 /*
00396  * bGetDocumentText - make a list of the text blocks of a Word document
00397  *
00398  * Return TRUE when succesful, otherwise FALSE
00399  */
00400 static BOOL
00401 bGetDocumentText(FILE *pFile, const pps_info_type *pPPS,
00402         const ULONG *aulBBD, size_t tBBDLen,
00403         const ULONG *aulSBD, size_t tSBDLen,
00404         const UCHAR *aucHeader, int iWordVersion)
00405 {
00406         ULONG   ulBeginOfText;
00407         ULONG   ulTextLen, ulFootnoteLen, ulEndnoteLen;
00408         ULONG   ulHdrFtrLen, ulMacroLen, ulAnnotationLen;
00409         ULONG   ulTextBoxLen, ulHdrTextBoxLen;
00410         UINT    uiQuickSaves;
00411         BOOL    bFarEastWord, bTemplate, bFastSaved, bEncrypted, bSuccess;
00412         USHORT  usIdent, usDocStatus;
00413 
00414         fail(pFile == NULL || pPPS == NULL);
00415         fail(aulBBD == NULL);
00416         fail(aulSBD == NULL);
00417 
00418         DBG_MSG("bGetDocumentText");
00419 
00420         /* Get the "magic number" from the header */
00421         usIdent = usGetWord(0x00, aucHeader);
00422         DBG_HEX(usIdent);
00423         bFarEastWord = usIdent == 0x8098 || usIdent == 0x8099 ||
00424                         usIdent == 0xa697 || usIdent == 0xa699;
00425         /* Get the status flags from the header */
00426         usDocStatus = usGetWord(0x0a, aucHeader);
00427         DBG_HEX(usDocStatus);
00428         bTemplate = (usDocStatus & BIT(0)) != 0;
00429         DBG_MSG_C(bTemplate, "This document is a Template");
00430         bFastSaved = (usDocStatus & BIT(2)) != 0;
00431         uiQuickSaves = (UINT)(usDocStatus & 0x00f0) >> 4;
00432         DBG_MSG_C(bFastSaved, "This document is Fast Saved");
00433         DBG_DEC_C(bFastSaved, uiQuickSaves);
00434         bEncrypted = (usDocStatus & BIT(8)) != 0;
00435         if (bEncrypted) {
00436                 werr(0, "Encrypted documents are not supported");
00437                 return FALSE;
00438         }
00439 
00440         /* Get length information */
00441         ulBeginOfText = ulGetLong(0x18, aucHeader);
00442         DBG_HEX(ulBeginOfText);
00443         switch (iWordVersion) {
00444         case 6:
00445         case 7:
00446                 ulTextLen = ulGetLong(0x34, aucHeader);
00447                 ulFootnoteLen = ulGetLong(0x38, aucHeader);
00448                 ulHdrFtrLen = ulGetLong(0x3c, aucHeader);
00449                 ulMacroLen = ulGetLong(0x40, aucHeader);
00450                 ulAnnotationLen = ulGetLong(0x44, aucHeader);
00451                 ulEndnoteLen = ulGetLong(0x48, aucHeader);
00452                 ulTextBoxLen = ulGetLong(0x4c, aucHeader);
00453                 ulHdrTextBoxLen = ulGetLong(0x50, aucHeader);
00454                 break;
00455         case 8:
00456                 ulTextLen = ulGetLong(0x4c, aucHeader);
00457                 ulFootnoteLen = ulGetLong(0x50, aucHeader);
00458                 ulHdrFtrLen = ulGetLong(0x54, aucHeader);
00459                 ulMacroLen = ulGetLong(0x58, aucHeader);
00460                 ulAnnotationLen = ulGetLong(0x5c, aucHeader);
00461                 ulEndnoteLen = ulGetLong(0x60, aucHeader);
00462                 ulTextBoxLen = ulGetLong(0x64, aucHeader);
00463                 ulHdrTextBoxLen = ulGetLong(0x68, aucHeader);
00464                 break;
00465         default:
00466                 werr(0, "This version of Word is not supported");
00467                 return FALSE;
00468         }
00469         DBG_DEC(ulTextLen);
00470         DBG_DEC(ulFootnoteLen);
00471         DBG_DEC(ulHdrFtrLen);
00472         DBG_DEC(ulMacroLen);
00473         DBG_DEC(ulAnnotationLen);
00474         DBG_DEC(ulEndnoteLen);
00475         DBG_DEC(ulTextBoxLen);
00476         DBG_DEC(ulHdrTextBoxLen);
00477 
00478         /* Make a list of the text blocks */
00479         switch (iWordVersion) {
00480         case 6:
00481         case 7:
00482                 if (bFastSaved) {
00483                         bSuccess = bGet6DocumentText(pFile,
00484                                         bFarEastWord,
00485                                         pPPS->tWordDocument.ulSB,
00486                                         aulBBD, tBBDLen,
00487                                         aucHeader);
00488                 } else {
00489                         bSuccess = bAddTextBlocks(ulBeginOfText,
00490                                 ulTextLen +
00491                                 ulFootnoteLen +
00492                                 ulHdrFtrLen +
00493                                 ulMacroLen + ulAnnotationLen +
00494                                 ulEndnoteLen +
00495                                 ulTextBoxLen + ulHdrTextBoxLen,
00496                                 bFarEastWord,
00497                                 IGNORE_PROPMOD,
00498                                 pPPS->tWordDocument.ulSB,
00499                                 aulBBD, tBBDLen);
00500                 }
00501                 break;
00502         case 8:
00503                 bSuccess = bGet8DocumentText(pFile,
00504                                 pPPS,
00505                                 aulBBD, tBBDLen, aulSBD, tSBDLen,
00506                                 aucHeader);
00507                 break;
00508         default:
00509                 werr(0, "This version of Word is not supported");
00510                 bSuccess = FALSE;
00511                 break;
00512         }
00513 
00514         if (bSuccess) {
00515                 vSplitBlockList(pFile,
00516                                 ulTextLen,
00517                                 ulFootnoteLen,
00518                                 ulHdrFtrLen,
00519                                 ulMacroLen,
00520                                 ulAnnotationLen,
00521                                 ulEndnoteLen,
00522                                 ulTextBoxLen,
00523                                 ulHdrTextBoxLen,
00524                                 !bFastSaved && iWordVersion == 8);
00525         } else {
00526                 vDestroyTextBlockList();
00527                 werr(0, "I can't find the text of this document");
00528         }
00529         return bSuccess;
00530 } /* end of bGetDocumentText */
00531 
00532 /*
00533  * vGetDocumentData - make a list of the data blocks of a Word document
00534  */
00535 static void
00536 vGetDocumentData(FILE *pFile, const pps_info_type *pPPS,
00537         const ULONG *aulBBD, size_t tBBDLen,
00538         const UCHAR *aucHeader, int iWordVersion)
00539 {
00540         options_type    tOptions;
00541         ULONG   ulBeginOfText;
00542         BOOL    bFastSaved, bHasImages, bSuccess;
00543         USHORT  usDocStatus;
00544 
00545         fail(pFile == NULL);
00546         fail(pPPS == NULL);
00547         fail(aulBBD == NULL);
00548 
00549         /* Get the options */
00550         vGetOptions(&tOptions);
00551 
00552         /* Get the status flags from the header */
00553         usDocStatus = usGetWord(0x0a, aucHeader);
00554         DBG_HEX(usDocStatus);
00555         bFastSaved = (usDocStatus & BIT(2)) != 0;
00556         bHasImages = (usDocStatus & BIT(3)) != 0;
00557 
00558         if (!bHasImages ||
00559             tOptions.eConversionType == conversion_text ||
00560             tOptions.eConversionType == conversion_fmt_text ||
00561             tOptions.eConversionType == conversion_xml ||
00562             tOptions.eImageLevel == level_no_images) {
00563                 /*
00564                  * No images in the document or text-only output or
00565                  * no images wanted, so no data blocks will be needed
00566                  */
00567                 vDestroyDataBlockList();
00568                 return;
00569         }
00570 
00571         /* Get length information */
00572         ulBeginOfText = ulGetLong(0x18, aucHeader);
00573         DBG_HEX(ulBeginOfText);
00574 
00575         /* Make a list of the data blocks */
00576         switch (iWordVersion) {
00577         case 6:
00578         case 7:
00579                 /*
00580                  * The data blocks are in the text stream. The text stream
00581                  * is in "fast saved" format or "normal saved" format
00582                  */
00583                 if (bFastSaved) {
00584                         bSuccess = bGet6DocumentData(pFile,
00585                                         pPPS->tWordDocument.ulSB,
00586                                         aulBBD, tBBDLen,
00587                                         aucHeader);
00588                 } else {
00589                         bSuccess = bAddDataBlocks(ulBeginOfText,
00590                                         (ULONG)LONG_MAX,
00591                                         pPPS->tWordDocument.ulSB,
00592                                         aulBBD, tBBDLen);
00593                 }
00594                 break;
00595         case 8:
00596                 /*
00597                  * The data blocks are in the data stream. The data stream
00598                  * is always in "normal saved" format
00599                  */
00600                 bSuccess = bAddDataBlocks(0, (ULONG)LONG_MAX,
00601                                 pPPS->tData.ulSB, aulBBD, tBBDLen);
00602                 break;
00603         default:
00604                 werr(0, "This version of Word is not supported");
00605                 bSuccess = FALSE;
00606                 break;
00607         }
00608 
00609         if (!bSuccess) {
00610                 vDestroyDataBlockList();
00611                 werr(0, "I can't find the data of this document");
00612         }
00613 } /* end of vGetDocumentData */
00614 
00615 /*
00616  * iInitDocumentOLE - initialize an OLE document
00617  *
00618  * Returns the version of Word that made the document or -1
00619  */
00620 int
00621 iInitDocumentOLE(FILE *pFile, long lFilesize)
00622 {
00623         pps_info_type   PPS_info;
00624         ULONG   *aulBBD, *aulSBD;
00625         ULONG   *aulRootList, *aulBbdList, *aulSbdList;
00626         ULONG   ulBdbListStart, ulAdditionalBBDlist;
00627         ULONG   ulRootStartblock, ulSbdStartblock, ulSBLstartblock;
00628         ULONG   ulStart, ulTmp;
00629         long    lMaxBlock;
00630         size_t  tBBDLen, tSBDLen, tNumBbdBlocks, tRootListLen;
00631         int     iWordVersion, iIndex, iToGo;
00632         BOOL    bSuccess;
00633         USHORT  usIdent, usDocStatus;
00634         UCHAR   aucHeader[HEADER_SIZE];
00635 
00636         fail(pFile == NULL);
00637 
00638         lMaxBlock = lFilesize / BIG_BLOCK_SIZE - 2;
00639         DBG_DEC(lMaxBlock);
00640         if (lMaxBlock < 1) {
00641                 return -1;
00642         }
00643         tBBDLen = (size_t)(lMaxBlock + 1);
00644         tNumBbdBlocks = (size_t)ulReadLong(pFile, 0x2c);
00645         DBG_DEC(tNumBbdBlocks);
00646         ulRootStartblock = ulReadLong(pFile, 0x30);
00647         DBG_DEC(ulRootStartblock);
00648         ulSbdStartblock = ulReadLong(pFile, 0x3c);
00649         DBG_DEC(ulSbdStartblock);
00650         ulAdditionalBBDlist = ulReadLong(pFile, 0x44);
00651         DBG_HEX(ulAdditionalBBDlist);
00652         ulSBLstartblock = ulReadLong(pFile,
00653                         (ulRootStartblock + 1) * BIG_BLOCK_SIZE + 0x74);
00654         DBG_DEC(ulSBLstartblock);
00655         tSBDLen = (size_t)(ulReadLong(pFile,
00656                         (ulRootStartblock + 1) * BIG_BLOCK_SIZE + 0x78) /
00657                         SMALL_BLOCK_SIZE);
00658         /* All to be xcalloc-ed pointers to NULL */
00659         aulRootList = NULL;
00660         aulSbdList = NULL;
00661         aulBbdList = NULL;
00662         aulSBD = NULL;
00663         aulBBD = NULL;
00664 /* Big Block Depot */
00665         aulBbdList = xcalloc(tNumBbdBlocks, sizeof(ULONG));
00666         aulBBD = xcalloc(tBBDLen, sizeof(ULONG));
00667         iToGo = (int)tNumBbdBlocks;
00668         vGetBbdList(pFile, min(iToGo, 109),  aulBbdList, 0x4c);
00669         ulStart = 109;
00670         iToGo -= 109;
00671         while (ulAdditionalBBDlist != END_OF_CHAIN && iToGo > 0) {
00672                 ulBdbListStart = (ulAdditionalBBDlist + 1) * BIG_BLOCK_SIZE;
00673                 vGetBbdList(pFile, min(iToGo, 127),
00674                                         aulBbdList + ulStart, ulBdbListStart);
00675                 ulAdditionalBBDlist = ulReadLong(pFile,
00676                                         ulBdbListStart + 4 * 127);
00677                 DBG_DEC(ulAdditionalBBDlist);
00678                 DBG_HEX(ulAdditionalBBDlist);
00679                 ulStart += 127;
00680                 iToGo -= 127;
00681         }
00682         if (!bGetBBD(pFile, aulBbdList, tNumBbdBlocks, aulBBD, tBBDLen)) {
00683                 FREE_ALL();
00684                 return -1;
00685         }
00686         aulBbdList = xfree(aulBbdList);
00687 /* Small Block Depot */
00688         aulSbdList = xcalloc(tBBDLen, sizeof(ULONG));
00689         aulSBD = xcalloc(tSBDLen, sizeof(ULONG));
00690         for (iIndex = 0, ulTmp = ulSbdStartblock;
00691              iIndex < (int)tBBDLen && ulTmp != END_OF_CHAIN;
00692              iIndex++, ulTmp = aulBBD[ulTmp]) {
00693                 if (ulTmp >= (ULONG)tBBDLen) {
00694                         DBG_DEC(ulTmp);
00695                         DBG_DEC(tBBDLen);
00696                         werr(1, "The Big Block Depot is damaged");
00697                 }
00698                 aulSbdList[iIndex] = ulTmp;
00699                 NO_DBG_HEX(aulSbdList[iIndex]);
00700         }
00701         if (!bGetSBD(pFile, aulSbdList, tBBDLen, aulSBD, tSBDLen)) {
00702                 FREE_ALL();
00703                 return -1;
00704         }
00705         aulSbdList = xfree(aulSbdList);
00706 /* Root list */
00707         for (tRootListLen = 0, ulTmp = ulRootStartblock;
00708              tRootListLen < tBBDLen && ulTmp != END_OF_CHAIN;
00709              tRootListLen++, ulTmp = aulBBD[ulTmp]) {
00710                 if (ulTmp >= (ULONG)tBBDLen) {
00711                         DBG_DEC(ulTmp);
00712                         DBG_DEC(tBBDLen);
00713                         werr(1, "The Big Block Depot is damaged");
00714                 }
00715         }
00716         if (tRootListLen == 0) {
00717                 werr(0, "No Rootlist found");
00718                 FREE_ALL();
00719                 return -1;
00720         }
00721         aulRootList = xcalloc(tRootListLen, sizeof(ULONG));
00722         for (iIndex = 0, ulTmp = ulRootStartblock;
00723              iIndex < (int)tBBDLen && ulTmp != END_OF_CHAIN;
00724              iIndex++, ulTmp = aulBBD[ulTmp]) {
00725                 if (ulTmp >= (ULONG)tBBDLen) {
00726                         DBG_DEC(ulTmp);
00727                         DBG_DEC(tBBDLen);
00728                         werr(1, "The Big Block Depot is damaged");
00729                 }
00730                 aulRootList[iIndex] = ulTmp;
00731                 NO_DBG_DEC(aulRootList[iIndex]);
00732         }
00733         fail(tRootListLen != (size_t)iIndex);
00734         bSuccess = bGetPPS(pFile, aulRootList, tRootListLen, &PPS_info);
00735         aulRootList = xfree(aulRootList);
00736         if (!bSuccess) {
00737                 FREE_ALL();
00738                 return -1;
00739         }
00740 /* Small block list */
00741         if (!bCreateSmallBlockList(ulSBLstartblock, aulBBD, tBBDLen)) {
00742                 FREE_ALL();
00743                 return -1;
00744         }
00745 
00746         if (PPS_info.tWordDocument.ulSize < MIN_SIZE_FOR_BBD_USE) {
00747                 DBG_DEC(PPS_info.tWordDocument.ulSize);
00748                 FREE_ALL();
00749                 werr(0, "I'm afraid the text stream of this file "
00750                         "is too small to handle.");
00751                 return -1;
00752         }
00753         /* Read the headerblock */
00754         if (!bReadBuffer(pFile, PPS_info.tWordDocument.ulSB,
00755                         aulBBD, tBBDLen, BIG_BLOCK_SIZE,
00756                         aucHeader, 0, HEADER_SIZE)) {
00757                 FREE_ALL();
00758                 return -1;
00759         }
00760         usIdent = usGetWord(0x00, aucHeader);
00761         DBG_HEX(usIdent);
00762         fail(usIdent != 0x8098 &&       /* Word 7 for oriental languages */
00763              usIdent != 0x8099 &&       /* Word 7 for oriental languages */
00764              usIdent != 0xa5dc &&       /* Word 6 & 7 */
00765              usIdent != 0xa5ec &&       /* Word 7 & 97 & 98 */
00766              usIdent != 0xa697 &&       /* Word 7 for oriental languages */
00767              usIdent != 0xa699);        /* Word 7 for oriental languages */
00768         iWordVersion = iGetVersionNumber(aucHeader);
00769         if (iWordVersion < 6) {
00770                 FREE_ALL();
00771                 werr(0, "This file is from a version of Word before Word 6.");
00772                 return -1;
00773         }
00774 
00775         /* Get the status flags from the header */
00776         usDocStatus = usGetWord(0x0a, aucHeader);
00777         if (usDocStatus & BIT(9)) {
00778                 PPS_info.tTable = PPS_info.t1Table;
00779         } else {
00780                 PPS_info.tTable = PPS_info.t0Table;
00781         }
00782         /* Clean the entries that should not be used */
00783         memset(&PPS_info.t0Table, 0, sizeof(PPS_info.t0Table));
00784         memset(&PPS_info.t1Table, 0, sizeof(PPS_info.t1Table));
00785 
00786         bSuccess = bGetDocumentText(pFile, &PPS_info,
00787                         aulBBD, tBBDLen, aulSBD, tSBDLen,
00788                         aucHeader, iWordVersion);
00789         if (bSuccess) {
00790                 vGetDocumentData(pFile, &PPS_info,
00791                         aulBBD, tBBDLen, aucHeader, iWordVersion);
00792                 vGetPropertyInfo(pFile, &PPS_info,
00793                         aulBBD, tBBDLen, aulSBD, tSBDLen,
00794                         aucHeader, iWordVersion);
00795                 vSetDefaultTabWidth(pFile, &PPS_info,
00796                         aulBBD, tBBDLen, aulSBD, tSBDLen,
00797                         aucHeader, iWordVersion);
00798                 vGetNotesInfo(pFile, &PPS_info,
00799                         aulBBD, tBBDLen, aulSBD, tSBDLen,
00800                         aucHeader, iWordVersion);
00801         }
00802         FREE_ALL();
00803         return bSuccess ? iWordVersion : -1;
00804 } /* end of iInitDocumentOLE */

Generated by  doxygen 1.6.2