examples/PIPS/antiword/src/datalist.c

00001 /*
00002  * datalist.c
00003  * Copyright (C) 2000-2002 A.J. van Os; Released under GPL
00004  *
00005  * Description:
00006  * Build, read and destroy a list of Word data blocks
00007  */
00008 
00009 #include <stdlib.h>
00010 #include <errno.h>
00011 #include "antiword.h"
00012 
00013 #if defined(__riscos)
00014 #define EIO             42
00015 #endif /* __riscos */
00016 
00017 
00018 /*
00019  * Private structure to hide the way the information
00020  * is stored from the rest of the program
00021  */
00022 typedef struct data_mem_tag {
00023         data_block_type         tInfo;
00024         struct data_mem_tag     *pNext;
00025 } data_mem_type;
00026 
00027 /* Variable to describe the start of the data block list */
00028 static data_mem_type    *pAnchor = NULL;
00029 /* Variable needed to read the data block list */
00030 static data_mem_type    *pBlockLast = NULL;
00031 /* Variable needed to read the data block list */
00032 static data_mem_type    *pBlockCurrent = NULL;
00033 static ULONG    ulBlockOffset = 0;
00034 static size_t   tByteNext = 0;
00035 /* Last block read */
00036 static UCHAR    aucBlock[BIG_BLOCK_SIZE];
00037 
00038 
00039 /*
00040  * vDestroyDataBlockList - destroy the data block list
00041  */
00042 void
00043 vDestroyDataBlockList(void)
00044 {
00045         data_mem_type   *pCurr, *pNext;
00046 
00047         DBG_MSG("vDestroyDataBlockList");
00048 
00049         pCurr = pAnchor;
00050         while (pCurr != NULL) {
00051                 pNext = pCurr->pNext;
00052                 pCurr = xfree(pCurr);
00053                 pCurr = pNext;
00054         }
00055         pAnchor = NULL;
00056         /* Reset all the control variables */
00057         pBlockLast = NULL;
00058         pBlockCurrent = NULL;
00059         ulBlockOffset = 0;
00060         tByteNext = 0;
00061 } /* end of vDestroyDataBlockList */
00062 
00063 /*
00064  * bAdd2DataBlockList - add an element to the data block list
00065  *
00066  * Returns TRUE when successful, otherwise FALSE
00067  */
00068 BOOL
00069 bAdd2DataBlockList(const data_block_type *pDataBlock)
00070 {
00071         data_mem_type   *pListMember;
00072 
00073         fail(pDataBlock == NULL);
00074         fail(pDataBlock->ulFileOffset == FC_INVALID);
00075         fail(pDataBlock->ulDataPos == CP_INVALID);
00076         fail(pDataBlock->ulLength == 0);
00077 
00078         NO_DBG_MSG("bAdd2DataBlockList");
00079         NO_DBG_HEX(pDataBlock->ulFileOffset);
00080         NO_DBG_HEX(pDataBlock->ulDataPos);
00081         NO_DBG_HEX(pDataBlock->ulLength);
00082 
00083         if (pDataBlock->ulFileOffset == FC_INVALID ||
00084             pDataBlock->ulDataPos == CP_INVALID ||
00085             pDataBlock->ulLength == 0) {
00086                 werr(0, "Software (datablock) error");
00087                 return FALSE;
00088         }
00089         /* Check for continuous blocks */
00090         if (pBlockLast != NULL &&
00091             pBlockLast->tInfo.ulFileOffset +
00092              pBlockLast->tInfo.ulLength == pDataBlock->ulFileOffset &&
00093             pBlockLast->tInfo.ulDataPos +
00094              pBlockLast->tInfo.ulLength == pDataBlock->ulDataPos) {
00095                 /* These are continous blocks */
00096                 pBlockLast->tInfo.ulLength += pDataBlock->ulLength;
00097                 return TRUE;
00098         }
00099         /* Make a new block */
00100         pListMember = xmalloc(sizeof(data_mem_type));
00101         /* Add the block to the data list */
00102         pListMember->tInfo = *pDataBlock;
00103         pListMember->pNext = NULL;
00104         if (pAnchor == NULL) {
00105                 pAnchor = pListMember;
00106         } else {
00107                 fail(pBlockLast == NULL);
00108                 pBlockLast->pNext = pListMember;
00109         }
00110         pBlockLast = pListMember;
00111         return TRUE;
00112 } /* end of bAdd2DataBlockList */
00113 
00114 /*
00115  * ulGetDataOffset - get the offset in the data block list
00116  *
00117  * Get the fileoffset the current position in the data block list
00118  */
00119 ULONG
00120 ulGetDataOffset(FILE *pFile)
00121 {
00122         return pBlockCurrent->tInfo.ulFileOffset + ulBlockOffset + tByteNext;
00123 } /* end of ulGetDataOffset */
00124 
00125 /*
00126  * bSetDataOffset - set the offset in the data block list
00127  *
00128  * Make the given fileoffset the current position in the data block list
00129  */
00130 BOOL
00131 bSetDataOffset(FILE *pFile, ULONG ulFileOffset)
00132 {
00133         data_mem_type   *pCurr;
00134         size_t  tReadLen;
00135 
00136         DBG_HEX(ulFileOffset);
00137 
00138         for (pCurr = pAnchor; pCurr != NULL; pCurr = pCurr->pNext) {
00139                 if (ulFileOffset < pCurr->tInfo.ulFileOffset ||
00140                     ulFileOffset >= pCurr->tInfo.ulFileOffset +
00141                      pCurr->tInfo.ulLength) {
00142                         /* The file offset is not in this block */
00143                         continue;
00144                 }
00145                 /* Compute the maximum number of bytes to read */
00146                 tReadLen = (size_t)(pCurr->tInfo.ulFileOffset +
00147                                 pCurr->tInfo.ulLength -
00148                                 ulFileOffset);
00149                 /* Compute the real number of bytes to read */
00150                 if (tReadLen > sizeof(aucBlock)) {
00151                         tReadLen = sizeof(aucBlock);
00152                 }
00153                 /* Read the bytes */
00154                 if (!bReadBytes(aucBlock, tReadLen, ulFileOffset, pFile)) {
00155                         return FALSE;
00156                 }
00157                 /* Set the control variables */
00158                 pBlockCurrent = pCurr;
00159                 ulBlockOffset = ulFileOffset - pCurr->tInfo.ulFileOffset;
00160                 tByteNext = 0;
00161                 return TRUE;
00162         }
00163         return FALSE;
00164 } /* end of bSetDataOffset */
00165 
00166 /*
00167  * iNextByte - get the next byte from the data block list
00168  */
00169 int
00170 iNextByte(FILE *pFile)
00171 {
00172         ULONG   ulReadOff;
00173         size_t  tReadLen;
00174 
00175         fail(pBlockCurrent == NULL);
00176 
00177         if (tByteNext >= sizeof(aucBlock) ||
00178             ulBlockOffset + tByteNext >= pBlockCurrent->tInfo.ulLength) {
00179                 if (ulBlockOffset + sizeof(aucBlock) <
00180                                         pBlockCurrent->tInfo.ulLength) {
00181                         /* Same block, next part */
00182                         ulBlockOffset += sizeof(aucBlock);
00183                 } else {
00184                         /* Next block, first part */
00185                         pBlockCurrent = pBlockCurrent->pNext;
00186                         ulBlockOffset = 0;
00187                 }
00188                 if (pBlockCurrent == NULL) {
00189                         /* Past the last part of the last block */
00190                         errno = EIO;
00191                         return EOF;
00192                 }
00193                 tReadLen = (size_t)
00194                                 (pBlockCurrent->tInfo.ulLength - ulBlockOffset);
00195                 if (tReadLen > sizeof(aucBlock)) {
00196                         tReadLen = sizeof(aucBlock);
00197                 }
00198                 ulReadOff = pBlockCurrent->tInfo.ulFileOffset + ulBlockOffset;
00199                 if (!bReadBytes(aucBlock, tReadLen, ulReadOff, pFile)) {
00200                         errno = EIO;
00201                         return EOF;
00202                 }
00203                 tByteNext = 0;
00204         }
00205         return (int)aucBlock[tByteNext++];
00206 } /* end of iNextByte */
00207 
00208 /*
00209  * usNextWord - get the next word from the data block list
00210  *
00211  * Read a two byte value in Little Endian order, that means MSB last
00212  *
00213  * All return values can be valid so errno is set in case of error
00214  */
00215 USHORT
00216 usNextWord(FILE *pFile)
00217 {
00218         USHORT  usLSB, usMSB;
00219 
00220         usLSB = (USHORT)iNextByte(pFile);
00221         if (usLSB == (USHORT)EOF) {
00222                 errno = EIO;
00223                 return (USHORT)EOF;
00224         }
00225         usMSB = (USHORT)iNextByte(pFile);
00226         if (usMSB == (USHORT)EOF) {
00227                 DBG_MSG("usNextWord: Unexpected EOF");
00228                 errno = EIO;
00229                 return (USHORT)EOF;
00230         }
00231         return (usMSB << 8) | usLSB;
00232 } /* end of usNextWord */
00233 
00234 /*
00235  * ulNextLong - get the next long from the data block list
00236  *
00237  * Read a four byte value in Little Endian order, that means MSW last
00238  *
00239  * All return values can be valid so errno is set in case of error
00240  */
00241 ULONG
00242 ulNextLong(FILE *pFile)
00243 {
00244         ULONG   ulLSW, ulMSW;
00245 
00246         ulLSW = (ULONG)usNextWord(pFile);
00247         if (ulLSW == (ULONG)EOF) {
00248                 errno = EIO;
00249                 return (ULONG)EOF;
00250         }
00251         ulMSW = (ULONG)usNextWord(pFile);
00252         if (ulMSW == (ULONG)EOF) {
00253                 DBG_MSG("ulNextLong: Unexpected EOF");
00254                 errno = EIO;
00255                 return (ULONG)EOF;
00256         }
00257         return (ulMSW << 16) | ulLSW;
00258 } /* end of ulNextLong */
00259 
00260 /*
00261  * usNextWordBE - get the next two byte value
00262  *
00263  * Read a two byte value in Big Endian order, that means MSB first
00264  *
00265  * All return values can be valid so errno is set in case of error
00266  */
00267 USHORT
00268 usNextWordBE(FILE *pFile)
00269 {
00270         USHORT usLSB, usMSB;
00271 
00272         usMSB = (USHORT)iNextByte(pFile);
00273         if (usMSB == (USHORT)EOF) {
00274                 errno = EIO;
00275                 return (USHORT)EOF;
00276         }
00277         usLSB = (USHORT)iNextByte(pFile);
00278         if (usLSB == (USHORT)EOF) {
00279                 DBG_MSG("usNextWordBE: Unexpected EOF");
00280                 errno = EIO;
00281                 return (USHORT)EOF;
00282         }
00283         return (usMSB << 8) | usLSB;
00284 } /* end of usNextWordBE */
00285 
00286 /*
00287  * ulNextLongBE - get the next four byte value
00288  *
00289  * Read a four byte value in Big Endian order, that means MSW first
00290  *
00291  * All return values can be valid so errno is set in case of error
00292  */
00293 ULONG
00294 ulNextLongBE(FILE *pFile)
00295 {
00296         ULONG   ulLSW, ulMSW;
00297 
00298         ulMSW = (ULONG)usNextWordBE(pFile);
00299         if (ulMSW == (ULONG)EOF) {
00300                 errno = EIO;
00301                 return (ULONG)EOF;
00302         }
00303         ulLSW = (ULONG)usNextWordBE(pFile);
00304         if (ulLSW == (ULONG)EOF) {
00305                 DBG_MSG("ulNextLongBE: Unexpected EOF");
00306                 errno = EIO;
00307                 return (ULONG)EOF;
00308         }
00309         return (ulMSW << 16) | ulLSW;
00310 } /* end of ulNextLongBE */
00311 
00312 /*
00313  * tSkipBytes - skip over the given number of bytes
00314  *
00315  * Returns the number of skipped bytes
00316  */
00317 size_t
00318 tSkipBytes(FILE *pFile, size_t tToSkip)
00319 {
00320         size_t  tToGo, tMaxMove, tMove;
00321 
00322         fail(pFile == NULL);
00323         fail(pBlockCurrent == NULL);
00324 
00325         tToGo = tToSkip;
00326         while (tToGo != 0) {
00327                 /* Goto the end of the current block */
00328                 tMaxMove = min(sizeof(aucBlock) - tByteNext,
00329                                 (size_t)(pBlockCurrent->tInfo.ulLength -
00330                                 ulBlockOffset - tByteNext));
00331                 tMove = min(tMaxMove, tToGo);
00332                 tByteNext += tMove;
00333                 tToGo -= tMove;
00334                 if (tToGo != 0) {
00335                         /* Goto the next block */
00336                         if (iNextByte(pFile) == EOF) {
00337                                 return tToSkip - tToGo;
00338                         }
00339                         tToGo--;
00340                 }
00341         }
00342         return tToSkip;
00343 } /* end of tSkipBytes */
00344 
00345 /*
00346  * Translate  a data position to an offset in the file.
00347  * Logical to physical offset.
00348  *
00349  * Returns:     FC_INVALID: in case of error
00350  *              otherwise: the computed file offset
00351  */
00352 ULONG
00353 ulDataPos2FileOffset(ULONG ulDataPos)
00354 {
00355         data_mem_type   *pCurr;
00356 
00357         fail(ulDataPos == CP_INVALID);
00358 
00359         for (pCurr = pAnchor; pCurr != NULL; pCurr = pCurr->pNext) {
00360                 if (ulDataPos < pCurr->tInfo.ulDataPos ||
00361                     ulDataPos >= pCurr->tInfo.ulDataPos +
00362                      pCurr->tInfo.ulLength) {
00363                         /* The data offset is not in this block, try the next */
00364                         continue;
00365                 }
00366                 /* The data offset is in the current block */
00367                 return pCurr->tInfo.ulFileOffset +
00368                                 ulDataPos -
00369                                 pCurr->tInfo.ulDataPos;
00370         }
00371         /* Passed beyond the end of the list */
00372         DBG_HEX_C(ulDataPos != 0, ulDataPos);
00373         return FC_INVALID;
00374 } /* end of ulDataPos2FileOffset */

Generated by  doxygen 1.6.2