examples/PIPS/antiword/src/stylelist.c

00001 /*
00002  * stylelist.c
00003  * Copyright (C) 1998-2005 A.J. van Os; Released under GNU GPL
00004  *
00005  * Description:
00006  * Build, read and destroy a list of Word style information
00007  */
00008 
00009 #include <stdlib.h>
00010 #include <stddef.h>
00011 #include <ctype.h>
00012 #include "antiword.h"
00013 
00014 
00015 /*
00016  * Private structure to hide the way the information
00017  * is stored from the rest of the program
00018  */
00019 typedef struct style_mem_tag {
00020         style_block_type        tInfo;
00021         ULONG                   ulSequenceNumber;
00022         struct style_mem_tag    *pNext;
00023 } style_mem_type;
00024 
00025 /* Variables needed to write the Style Information List */
00026 static style_mem_type   *pAnchor = NULL;
00027 static style_mem_type   *pStyleLast = NULL;
00028 /* The type of conversion */
00029 static conversion_type  eConversionType = conversion_unknown;
00030 /* The character set encoding */
00031 static encoding_type    eEncoding = encoding_neutral;
00032 /* Values for efficiency reasons */
00033 static const style_mem_type     *pMidPtr = NULL;
00034 static BOOL             bMoveMidPtr = FALSE;
00035 static BOOL             bInSequence = TRUE;
00036 
00037 
00038 /*
00039  * vDestroyStyleInfoList - destroy the Style Information List
00040  */
00041 void
00042 vDestroyStyleInfoList(void)
00043 {
00044         style_mem_type  *pCurr, *pNext;
00045 
00046         DBG_MSG("vDestroyStyleInfoList");
00047 
00048         /* Free the Style Information List */
00049         pCurr = pAnchor;
00050         while (pCurr != NULL) {
00051                 pNext = pCurr->pNext;
00052                 pCurr = xfree(pCurr);
00053                 pCurr = pNext;
00054         }
00055         pAnchor = NULL;
00056         /* Reset all control variables */
00057         pStyleLast = NULL;
00058         pMidPtr = NULL;
00059         bMoveMidPtr = FALSE;
00060         bInSequence = TRUE;
00061 } /* end of vDestroyStyleInfoList */
00062 
00063 /*
00064  * vConvertListCharacter - convert the list character
00065  */
00066 static void
00067 vConvertListCharacter(UCHAR ucNFC, USHORT usListChar, char *szListChar)
00068 {
00069         options_type    tOptions;
00070         size_t  tLen;
00071 
00072         fail(szListChar == NULL);
00073         fail(szListChar[0] != '\0');
00074 
00075         if (usListChar < 0x80 && isprint((int)usListChar)) {
00076                 DBG_CHR_C(isalnum((int)usListChar), usListChar);
00077                 szListChar[0] = (char)usListChar;
00078                 szListChar[1] = '\0';
00079                 return;
00080         }
00081 
00082         if (ucNFC != LIST_SPECIAL &&
00083             ucNFC != LIST_SPECIAL2 &&
00084             ucNFC != LIST_BULLETS) {
00085                 szListChar[0] = '.';
00086                 szListChar[1] = '\0';
00087                 return;
00088         }
00089 
00090         if (eConversionType == conversion_unknown ||
00091             eEncoding == encoding_neutral) {
00092                 vGetOptions(&tOptions);
00093                 eConversionType = tOptions.eConversionType;
00094                 eEncoding = tOptions.eEncoding;
00095         }
00096 
00097         switch (usListChar) {
00098         case 0x0000: case 0x00b7: case 0x00fe: case  0xf021: case 0xf043:
00099         case 0xf06c: case 0xf093: case 0xf0b7:
00100                 usListChar = 0x2022;    /* BULLET */
00101                 break;
00102         case 0x0096: case 0xf02d:
00103                 usListChar = 0x2013;    /* EN DASH */
00104                 break;
00105         case 0x00a8:
00106                 usListChar = 0x2666;    /* BLACK DIAMOND SUIT */
00107                 break;
00108         case 0x00de:
00109                 usListChar = 0x21d2;    /* RIGHTWARDS DOUBLE ARROW */
00110                 break;
00111         case 0x00e0: case 0xf074:
00112                 usListChar = 0x25ca;    /* LOZENGE */
00113                 break;
00114         case 0x00e1:
00115                 usListChar = 0x2329;    /* LEFT ANGLE BRACKET */
00116                 break;
00117         case 0xf020:
00118                 usListChar = 0x0020;    /* SPACE */
00119                 break;
00120         case 0xf041:
00121                 usListChar = 0x270c;    /* VICTORY HAND */
00122                 break;
00123         case 0xf066:
00124                 usListChar = 0x03d5;    /* GREEK PHI SYMBOL */
00125                 break;
00126         case 0xf06e:
00127                 usListChar = 0x25a0;    /* BLACK SQUARE */
00128                 break;
00129         case 0xf06f: case 0xf070: case 0xf0a8:
00130                 usListChar = 0x25a1;    /* WHITE SQUARE */
00131                 break;
00132         case 0xf071:
00133                 usListChar = 0x2751;    /* LOWER RIGHT SHADOWED WHITE SQUARE */
00134                 break;
00135         case 0xf075: case 0xf077:
00136                 usListChar = 0x25c6;    /* BLACK DIAMOND */
00137                 break;
00138         case 0xf076:
00139                 usListChar = 0x2756;    /* BLACK DIAMOND MINUS WHITE X */
00140                 break;
00141         case 0xf0a7:
00142                 usListChar = 0x25aa;    /* BLACK SMALL SQUARE */
00143                 break;
00144         case 0xf0d8:
00145                 usListChar = 0x27a2;    /* RIGHTWARDS ARROWHEAD */
00146                 break;
00147         case 0xf0e5:
00148                 usListChar = 0x2199;    /* SOUTH WEST ARROW */
00149                 break;
00150         case 0xf0f0:
00151                 usListChar = 0x21e8;    /* RIGHTWARDS WHITE ARROW */
00152                 break;
00153         case 0xf0fc:
00154                 usListChar = 0x2713;    /* CHECK MARK */
00155                 break;
00156         default:
00157                 if ((usListChar >= 0xe000 && usListChar < 0xf900) ||
00158                     (usListChar < 0x80 && !isprint((int)usListChar))) {
00159                         /*
00160                          * All remaining private area characters and all
00161                          * remaining non-printable ASCII characters to their
00162                          * default bullet character
00163                          */
00164                         DBG_HEX(usListChar);
00165                         DBG_FIXME();
00166                         if (ucNFC == LIST_SPECIAL || ucNFC == LIST_SPECIAL2) {
00167                                 usListChar = 0x2190;    /* LEFTWARDS ARROW */
00168                         } else {
00169                                 usListChar = 0x2022;    /* BULLET */
00170                         }
00171                 }
00172                 break;
00173         }
00174 
00175         if (eEncoding == encoding_utf_8) {
00176                 tLen = tUcs2Utf8(usListChar, szListChar, 4);
00177                 szListChar[tLen] = '\0';
00178         } else {
00179                 switch (usListChar) {
00180                 case 0x03d5: case 0x25a1: case 0x25c6: case 0x25ca:
00181                 case 0x2751:
00182                         szListChar[0] = 'o';
00183                         break;
00184                 case 0x2013: case 0x2500:
00185                         szListChar[0] = '-';
00186                         break;
00187                 case 0x2190: case 0x2199: case 0x2329:
00188                         szListChar[0] = '<';
00189                         break;
00190                 case 0x21d2:
00191                         szListChar[0] = '=';
00192                         break;
00193                 case 0x21e8: case 0x27a2:
00194                         szListChar[0] = '>';
00195                         break;
00196                 case 0x25a0: case 0x25aa:
00197                         szListChar[0] = '.';
00198                         break;
00199                 case 0x2666:
00200                         szListChar[0] = OUR_DIAMOND;
00201                         break;
00202                 case 0x270c:
00203                         szListChar[0] = 'x';
00204                         break;
00205                 case 0x2713:
00206                         szListChar[0] = 'V';
00207                         break;
00208                 case 0x2756:
00209                         szListChar[0] = '*';
00210                         break;
00211                 case 0x2022:
00212                 default:
00213                         vGetBulletValue(eConversionType, eEncoding,
00214                                         szListChar, 2);
00215                         break;
00216                 }
00217                 tLen = 1;
00218         }
00219         szListChar[tLen] = '\0';
00220 } /* end of vConvertListCharacter */
00221 
00222 /*
00223  * eGetNumType - get the level type from the given level number
00224  *
00225  * Returns the level type
00226  */
00227 level_type_enum
00228 eGetNumType(UCHAR ucNumLevel)
00229 {
00230         switch (ucNumLevel) {
00231         case  1: case  2: case  3: case  4: case  5:
00232         case  6: case  7: case  8: case  9:
00233                 return level_type_outline;
00234         case 10:
00235                 return level_type_numbering;
00236         case 11:
00237                 return level_type_sequence;
00238         case 12:
00239                 return level_type_pause;
00240         default:
00241                 return level_type_none;
00242         }
00243 } /* end of eGetNumType */
00244 
00245 /*
00246  * vCorrectStyleValues - correct style values that Antiword can't use
00247  */
00248 void
00249 vCorrectStyleValues(style_block_type *pStyleBlock)
00250 {
00251         if (pStyleBlock->usBeforeIndent > 0x7fff) {
00252                 pStyleBlock->usBeforeIndent = 0;
00253         } else if (pStyleBlock->usBeforeIndent > 2160) {
00254                 /* 2160 twips = 1.5 inches or 38.1 mm */
00255                 DBG_DEC(pStyleBlock->usBeforeIndent);
00256                 pStyleBlock->usBeforeIndent = 2160;
00257         }
00258         if (pStyleBlock->usIstd >= 1 &&
00259             pStyleBlock->usIstd <= 9 &&
00260             pStyleBlock->usBeforeIndent < HEADING_GAP) {
00261                 NO_DBG_DEC(pStyleBlock->usBeforeIndent);
00262                 pStyleBlock->usBeforeIndent = HEADING_GAP;
00263         }
00264 
00265         if (pStyleBlock->usAfterIndent > 0x7fff) {
00266                 pStyleBlock->usAfterIndent = 0;
00267         } else if (pStyleBlock->usAfterIndent > 2160) {
00268                 /* 2160 twips = 1.5 inches or 38.1 mm */
00269                 DBG_DEC(pStyleBlock->usAfterIndent);
00270                 pStyleBlock->usAfterIndent = 2160;
00271         }
00272         if (pStyleBlock->usIstd >= 1 &&
00273             pStyleBlock->usIstd <= 9 &&
00274             pStyleBlock->usAfterIndent < HEADING_GAP) {
00275                 NO_DBG_DEC(pStyleBlock->usAfterIndent);
00276                 pStyleBlock->usAfterIndent = HEADING_GAP;
00277         }
00278 
00279         if (pStyleBlock->sLeftIndent < 0) {
00280                 pStyleBlock->sLeftIndent = 0;
00281         }
00282         if (pStyleBlock->sRightIndent > 0) {
00283                 pStyleBlock->sRightIndent = 0;
00284         }
00285         vConvertListCharacter(pStyleBlock->ucNFC,
00286                         pStyleBlock->usListChar,
00287                         pStyleBlock->szListChar);
00288 } /* end of vCorrectStyleValues */
00289 
00290 /*
00291  * vAdd2StyleInfoList - Add an element to the Style Information List
00292  */
00293 void
00294 vAdd2StyleInfoList(const style_block_type *pStyleBlock)
00295 {
00296         style_mem_type  *pListMember;
00297 
00298         fail(pStyleBlock == NULL);
00299 
00300         NO_DBG_MSG("bAdd2StyleInfoList");
00301 
00302         if (pStyleBlock->ulFileOffset == FC_INVALID) {
00303                 NO_DBG_DEC(pStyleBlock->usIstd);
00304                 return;
00305         }
00306 
00307         NO_DBG_HEX(pStyleBlock->ulFileOffset);
00308         NO_DBG_DEC_C(pStyleBlock->sLeftIndent != 0,
00309                                         pStyleBlock->sLeftIndent);
00310         NO_DBG_DEC_C(pStyleBlock->sRightIndent != 0,
00311                                         pStyleBlock->sRightIndent);
00312         NO_DBG_DEC_C(pStyleBlock->bNumPause, pStyleBlock->bNumPause);
00313         NO_DBG_DEC_C(pStyleBlock->usIstd != 0, pStyleBlock->usIstd);
00314         NO_DBG_DEC_C(pStyleBlock->usStartAt != 1, pStyleBlock->usStartAt);
00315         NO_DBG_DEC_C(pStyleBlock->usAfterIndent != 0,
00316                                         pStyleBlock->usAfterIndent);
00317         NO_DBG_DEC_C(pStyleBlock->ucAlignment != 0, pStyleBlock->ucAlignment);
00318         NO_DBG_DEC(pStyleBlock->ucNFC);
00319         NO_DBG_HEX(pStyleBlock->usListChar);
00320 
00321         if (pStyleLast != NULL &&
00322             pStyleLast->tInfo.ulFileOffset == pStyleBlock->ulFileOffset) {
00323                 /*
00324                  * If two consecutive styles share the same
00325                  * offset, remember only the last style
00326                  */
00327                 fail(pStyleLast->pNext != NULL);
00328                 pStyleLast->tInfo = *pStyleBlock;
00329                 /* Correct the values where needed */
00330                 vCorrectStyleValues(&pStyleLast->tInfo);
00331                 return;
00332         }
00333 
00334         /* Create list member */
00335         pListMember = xmalloc(sizeof(style_mem_type));
00336         /* Fill the list member */
00337         pListMember->tInfo = *pStyleBlock;
00338         pListMember->pNext = NULL;
00339         /* Add the sequence number */
00340         pListMember->ulSequenceNumber =
00341                         ulGetSeqNumber(pListMember->tInfo.ulFileOffset);
00342         /* Correct the values where needed */
00343         vCorrectStyleValues(&pListMember->tInfo);
00344         /* Add the new member to the list */
00345         if (pAnchor == NULL) {
00346                 pAnchor = pListMember;
00347                 /* For efficiency */
00348                 pMidPtr = pAnchor;
00349                 bMoveMidPtr = FALSE;
00350                 bInSequence = TRUE;
00351         } else {
00352                 fail(pStyleLast == NULL);
00353                 pStyleLast->pNext = pListMember;
00354                 /* For efficiency */
00355                 if (bMoveMidPtr) {
00356                         pMidPtr = pMidPtr->pNext;
00357                         bMoveMidPtr = FALSE;
00358                 } else {
00359                         bMoveMidPtr = TRUE;
00360                 }
00361                 if (bInSequence) {
00362                         bInSequence = pListMember->ulSequenceNumber >
00363                                         pStyleLast->ulSequenceNumber;
00364                 }
00365         }
00366         pStyleLast = pListMember;
00367 } /* end of vAdd2StyleInfoList */
00368 
00369 /*
00370  * Get the record that follows the given recored in the Style Information List
00371  */
00372 const style_block_type *
00373 pGetNextStyleInfoListItem(const style_block_type *pCurr)
00374 {
00375         const style_mem_type    *pRecord;
00376         size_t  tOffset;
00377 
00378         if (pCurr == NULL) {
00379                 if (pAnchor == NULL) {
00380                         /* There are no records */
00381                         return NULL;
00382                 }
00383                 /* The first record is the only one without a predecessor */
00384                 return &pAnchor->tInfo;
00385         }
00386         tOffset = offsetof(style_mem_type, tInfo);
00387         /* Many casts to prevent alignment warnings */
00388         pRecord = (style_mem_type *)(void *)((char *)pCurr - tOffset);
00389         fail(pCurr != &pRecord->tInfo);
00390         if (pRecord->pNext == NULL) {
00391                 /* The last record has no successor */
00392                 return NULL;
00393         }
00394         return &pRecord->pNext->tInfo;
00395 } /* end of pGetNextStyleInfoListItem */
00396 
00397 /*
00398  * Get the next text style
00399  */
00400 const style_block_type *
00401 pGetNextTextStyle(const style_block_type *pCurr)
00402 {
00403         const style_block_type  *pRecord;
00404 
00405         pRecord = pCurr;
00406         do {
00407                 pRecord = pGetNextStyleInfoListItem(pRecord);
00408         } while (pRecord != NULL &&
00409                  (pRecord->eListID == hdrftr_list ||
00410                   pRecord->eListID == macro_list ||
00411                   pRecord->eListID == annotation_list));
00412         return pRecord;
00413 } /* end of pGetNextTextStyle */
00414 
00415 /*
00416  * usGetIstd - get the istd that belongs to the given file offset
00417  */
00418 USHORT
00419 usGetIstd(ULONG ulFileOffset)
00420 {
00421         const style_mem_type    *pCurr, *pBest, *pStart;
00422         ULONG   ulSeq, ulBest;
00423 
00424         ulSeq = ulGetSeqNumber(ulFileOffset);
00425         if (ulSeq == FC_INVALID) {
00426                 return ISTD_NORMAL;
00427         }
00428         NO_DBG_HEX(ulFileOffset);
00429         NO_DBG_DEC(ulSeq);
00430 
00431         if (bInSequence &&
00432             pMidPtr != NULL &&
00433             ulSeq > pMidPtr->ulSequenceNumber) {
00434                 /* The istd is in the second half of the chained list */
00435                 pStart = pMidPtr;
00436         } else {
00437                 pStart = pAnchor;
00438         }
00439 
00440         pBest = NULL;
00441         ulBest = 0;
00442         for (pCurr = pStart; pCurr != NULL; pCurr = pCurr->pNext) {
00443                 if (pCurr->ulSequenceNumber != FC_INVALID &&
00444                     (pBest == NULL || pCurr->ulSequenceNumber > ulBest) &&
00445                     pCurr->ulSequenceNumber <= ulSeq) {
00446                         pBest = pCurr;
00447                         ulBest = pCurr->ulSequenceNumber;
00448                 }
00449                 if (bInSequence && pCurr->ulSequenceNumber > ulSeq) {
00450                         break;
00451                 }
00452         }
00453         NO_DBG_DEC(ulBest);
00454 
00455         if (pBest == NULL) {
00456                 return ISTD_NORMAL;
00457         }
00458 
00459         NO_DBG_DEC(pBest->tInfo.usIstd);
00460         return pBest->tInfo.usIstd;
00461 } /* end of usGetIstd */
00462 
00463 /*
00464  * bStyleImpliesList - does style info implies being part of a list
00465  *
00466  * Decide whether the style information implies that the given paragraph is
00467  * part of a list
00468  *
00469  * Returns TRUE when the paragraph is part of a list, otherwise FALSE
00470  */
00471 BOOL
00472 bStyleImpliesList(const style_block_type *pStyle, int iWordVersion)
00473 {
00474         fail(pStyle == NULL);
00475         fail(iWordVersion < 0);
00476 
00477         if (pStyle->usIstd >= 1 && pStyle->usIstd <= 9) {
00478                 /* These are heading levels */
00479                 return FALSE;
00480         }
00481         if (iWordVersion < 8) {
00482                 /* Check for old style lists */
00483                 return pStyle->ucNumLevel != 0;
00484         }
00485         /* Check for new style lists */
00486         return pStyle->usListIndex != 0;
00487 } /* end of bStyleImpliesList */

Generated by  doxygen 1.6.2