Planeshift

wn.h

Go to the documentation of this file.
00001 /*
00002    
00003    wn.h - header file needed to use WordNet Run Time Library
00004 
00005    $Id: wn.h,v 1.4 2007/09/14 23:19:16 mgist Exp $
00006 
00007 */
00008 
00009 #ifndef _WN_
00010 #define _WN_
00011 
00012 #include <stdio.h>
00013 
00014 /* Platform specific path and filename specifications */
00015 
00016 #ifdef _WINDOWS
00017 #define DICTDIR         "\\dict"
00018 //#define DEFAULTPATH   "C:\\Program Files\\WordNet\\2.1\\dict"
00019 #define DEFAULTPATH     "data\\dict"
00020 #define DATAFILE        "%s\\data.%s"
00021 #define INDEXFILE       "%s\\index.%s"
00022 #define SENSEIDXFILE    "%s\\index.sense"
00023 #define KEYIDXFILE      "%s\\index.key"
00024 #define REVKEYIDXFILE   "%s\\index.key.rev"
00025 #define VRBSENTFILE     "%s\\sents.vrb"
00026 #define VRBIDXFILE      "%s\\sentidx.vrb"
00027 #define CNTLISTFILE     "%s\\cntlist.rev"
00028 #else
00029 #define DICTDIR         "/dict"
00030 //#define DEFAULTPATH   "/usr/local/WordNet-2.1/dict"
00031 #define DEFAULTPATH     "data/dict"
00032 #define DATAFILE        "%s/data.%s"
00033 #define INDEXFILE       "%s/index.%s"
00034 #define SENSEIDXFILE    "%s/index.sense"
00035 #define KEYIDXFILE      "%s/index.key"
00036 #define REVKEYIDXFILE   "%s/index.key.rev"
00037 #define VRBSENTFILE     "%s/sents.vrb"
00038 #define VRBIDXFILE      "%s/sentidx.vrb"
00039 #define CNTLISTFILE     "%s/cntlist.rev"
00040 #endif
00041 
00042 /* Various buffer sizes */
00043 
00044 #define SEARCHBUF       ((long)(200*(long)1024))
00045 #define LINEBUF         (15*1024) /* 15K buffer to read index & data files */
00046 #define SMLINEBUF       (3*1024) /* small buffer for output lines */
00047 #define WORDBUF         (256)   /* buffer for one word or collocation */
00048 
00049 #define ALLSENSES       0       /* pass to findtheinfo() if want all senses */
00050 #define MAXID           15      /* maximum id number in lexicographer file */
00051 #define MAXDEPTH        20      /* maximum tree depth - used to find cycles */
00052 #define MAXSENSE        75      /* maximum number of senses in database */
00053 #define MAX_FORMS       5       /* max # of different 'forms' word can have */
00054 #define MAXFNUM         44      /* maximum number of lexicographer files */
00055 
00056 /* Pointer type and search type counts */
00057 
00058 /* Pointers */
00059 
00060 #define ANTPTR           1      /* ! */
00061 #define HYPERPTR         2      /* @ */
00062 #define HYPOPTR          3      /* ~ */
00063 #define ENTAILPTR        4      /* * */
00064 #define SIMPTR           5      /* & */
00065 
00066 #define ISMEMBERPTR      6      /* #m */
00067 #define ISSTUFFPTR       7      /* #s */
00068 #define ISPARTPTR        8      /* #p */
00069 
00070 #define HASMEMBERPTR     9      /* %m */
00071 #define HASSTUFFPTR     10      /* %s */
00072 #define HASPARTPTR      11      /* %p */
00073 
00074 #define MERONYM         12      /* % (not valid in lexicographer file) */
00075 #define HOLONYM         13      /* # (not valid in lexicographer file) */
00076 #define CAUSETO         14      /* > */
00077 #define PPLPTR          15      /* < */
00078 #define SEEALSOPTR      16      /* ^ */
00079 #define PERTPTR         17      /* \ */
00080 #define ATTRIBUTE       18      /* = */
00081 #define VERBGROUP       19      /* $ */
00082 #define DERIVATION      20      /* + */
00083 #define CLASSIFICATION  21      /* ; */
00084 #define CLASS           22      /* - */
00085 
00086 #define LASTTYPE        CLASS
00087 
00088 /* Misc searches */
00089 
00090 #define SYNS            (LASTTYPE + 1)
00091 #define FREQ            (LASTTYPE + 2)
00092 #define FRAMES          (LASTTYPE + 3)
00093 #define COORDS          (LASTTYPE + 4)
00094 #define RELATIVES       (LASTTYPE + 5)
00095 #define HMERONYM        (LASTTYPE + 6)
00096 #define HHOLONYM        (LASTTYPE + 7)
00097 #define WNGREP          (LASTTYPE + 8)
00098 #define OVERVIEW        (LASTTYPE + 9)
00099 
00100 #define MAXSEARCH       OVERVIEW
00101 
00102 #define CLASSIF_START    (MAXSEARCH + 1)
00103 
00104 #define CLASSIF_CATEGORY (CLASSIF_START)        /* ;c */
00105 #define CLASSIF_USAGE    (CLASSIF_START + 1)    /* ;u */
00106 #define CLASSIF_REGIONAL (CLASSIF_START + 2)    /* ;r */
00107 
00108 #define CLASSIF_END      CLASSIF_REGIONAL
00109 
00110 #define CLASS_START      (CLASSIF_END + 1)
00111 
00112 #define CLASS_CATEGORY   (CLASS_START)          /* -c */
00113 #define CLASS_USAGE      (CLASS_START + 1)      /* -u */
00114 #define CLASS_REGIONAL   (CLASS_START + 2)      /* -r */
00115 
00116 #define CLASS_END        CLASS_REGIONAL
00117 
00118 #define INSTANCE         (CLASS_END + 1)        /* @i */
00119 #define INSTANCES        (CLASS_END + 2)        /* ~i */
00120 
00121 #define MAXPTR          INSTANCES
00122 
00123 /* WordNet part of speech stuff */
00124 
00125 #define NUMPARTS        4       /* number of parts of speech */
00126 #define NUMFRAMES       35      /* number of verb frames */
00127 
00128 /* Generic names for part of speech */
00129 
00130 #define NOUN            1
00131 #define VERB            2
00132 #define ADJ             3
00133 #define ADV             4
00134 #define SATELLITE       5       /* not really a part of speech */
00135 #define ADJSAT          SATELLITE
00136 
00137 #define ALL_POS         0       /* passed to in_wn() to check all POS */
00138 
00139 #define bit(n) ((unsigned int)((unsigned int)1<<((unsigned int)n)))
00140 
00141 /* Adjective markers */
00142 
00143 #define PADJ            1       /* (p) */
00144 #define NPADJ           2       /* (a) */
00145 #define IPADJ           3       /* (ip) */
00146 
00147 #define UNKNOWN_MARKER          0
00148 #define ATTRIBUTIVE             NPADJ
00149 #define PREDICATIVE             PADJ
00150 #define IMMED_POSTNOMINAL       IPADJ
00151 
00152 extern char *wnrelease;         /* WordNet release/version number */
00153 
00154 extern char *lexfiles[];        /* names of lexicographer files */
00155 extern char *ptrtyp[];          /* pointer characters */
00156 extern char *partnames[];       /* POS strings */
00157 extern char partchars[];        /* single chars for each POS */
00158 extern char *adjclass[];        /* adjective class strings */
00159 extern char *frametext[];       /* text of verb frames */
00160 
00161 /* Data structures used by search code functions. */
00162 
00163 /* Structure for index file entry */
00164 typedef struct {
00165     long idxoffset;             /* byte offset of entry in index file */
00166     char *wd;                   /* word string */
00167     char *pos;                  /* part of speech */
00168     int sense_cnt;              /* sense (collins) count */
00169     int off_cnt;                /* number of offsets */
00170     int tagged_cnt;             /* number senses that are tagged */
00171     unsigned long *offset;      /* offsets of synsets containing word */
00172     int ptruse_cnt;             /* number of pointers used */
00173     int *ptruse;                /* pointers used */
00174 } Index;
00175 
00176 typedef Index *IndexPtr;
00177 
00178 /* Structure for data file synset */
00179 typedef struct ss {
00180     long hereiam;               /* current file position */
00181     int sstype;                 /* type of ADJ synset */
00182     int fnum;                   /* file number that synset comes from */
00183     char *pos;                  /* part of speech */
00184     int wcount;                 /* number of words in synset */
00185     char **words;               /* words in synset */
00186     int *lexid;                 /* unique id in lexicographer file */
00187     int *wnsns;                 /* sense number in wordnet */
00188     int whichword;              /* which word in synset we're looking for */
00189     int ptrcount;               /* number of pointers */
00190     int *ptrtyp;                /* pointer types */
00191     long *ptroff;               /* pointer offsets */
00192     int *ppos;                  /* pointer part of speech */
00193     int *pto;                   /* pointer 'to' fields */
00194     int *pfrm;                  /* pointer 'from' fields */
00195     int fcount;                 /* number of verb frames */
00196     int *frmid;                 /* frame numbers */
00197     int *frmto;                 /* frame 'to' fields */
00198     char *defn;                 /* synset gloss (definition) */
00199     unsigned int key;           /* unique synset key */
00200 
00201     /* these fields are used if a data structure is returned
00202        instead of a text buffer */
00203 
00204     struct ss *nextss;          /* ptr to next synset containing searchword */
00205     struct ss *nextform;        /* ptr to list of synsets for alternate
00206                                    spelling of wordform */
00207     int searchtype;             /* type of search performed */
00208     struct ss *ptrlist;         /* ptr to synset list result of search */
00209     char *headword;             /* if pos is "s", this is cluster head word */
00210     short headsense;            /* sense number of headword */
00211 } Synset;
00212 
00213 typedef Synset *SynsetPtr;
00214 
00215 typedef struct si {
00216     char *sensekey;             /* sense key */
00217     char *word;                 /* word string */
00218     long loc;                   /* synset offset */
00219     int wnsense;                /* WordNet sense number */
00220     int tag_cnt;                /* number of semantic tags to sense */
00221     struct si *nextsi;          /* ptr to next sense index entry */
00222 } SnsIndex;
00223 
00224 typedef SnsIndex *SnsIndexPtr;
00225     
00226 typedef struct {
00227     int SenseCount[MAX_FORMS];  /* number of senses word form has */
00228     int OutSenseCount[MAX_FORMS]; /* number of senses printed for word form */
00229     int numforms;               /* number of word forms searchword has */
00230     int printcnt;               /* number of senses printed by search */
00231     char *searchbuf;            /* buffer containing formatted results */
00232     SynsetPtr searchds;         /* data structure containing search results */
00233 } SearchResults;
00234 
00235 typedef SearchResults *SearchResultsPtr;
00236 
00237 /* Global variables and flags */
00238 
00239 extern SearchResults wnresults; /* structure containing results of search */
00240 extern int fnflag;              /* if set, print lex filename after sense */
00241 extern int dflag;               /* if set, print definitional glosses */
00242 extern int saflag;              /* if set, print SEE ALSO pointers */
00243 extern int fileinfoflag;        /* if set, print lex file info on synsets */
00244 extern int frflag;              /* if set, print verb frames after synset */
00245 extern int abortsearch;         /* if set, stop search algorithm */
00246 extern int offsetflag;          /* if set, print byte offset of each synset */
00247 extern int wnsnsflag;           /* if set, print WN sense # for each word */
00248 
00249 /* File pointers for database files */
00250 
00251 extern int OpenDB;              /* if non-zero, database file are open */
00252 extern FILE *datafps[NUMPARTS + 1], 
00253             *indexfps[NUMPARTS + 1],
00254             *sensefp,
00255             *cntlistfp,
00256             *keyindexfp, *revkeyindexfp,
00257             *vidxfilefp, *vsentfilefp;
00258 
00259 /* Method for interface to check for events while search is running */
00260 
00261 extern void (*interface_doevents_func)(void);  
00262                         /* callback for interruptable searches in */
00263                         /* single-threaded interfaces */
00264 
00265 /* General error message handler - can be defined by interface.
00266    Default function provided in library returns -1 */
00267 
00268 extern int default_display_message(char *);
00269 extern int (*display_message)(char *);
00270 
00271 
00272 /* Make all the functions compatible with c++ files */
00273 #ifdef __cplusplus
00274 extern "C" {
00275 #endif
00276 
00277 /* External library function prototypes */
00278 
00279 /*** Search and database functions (search.c) ***/
00280 
00281 /* Primry search algorithm for use with user interfaces */
00282 extern char *findtheinfo(char *, int, int, int);        
00283 
00284 /* Primary search algorithm for use with programs (returns data structure) */
00285 extern SynsetPtr findtheinfo_ds(char *, int, int, int); 
00286 
00287 /* Set bit for each search type that is valid for the search word
00288    passed and return bit mask. */
00289 extern unsigned int is_defined(char *, int); 
00290 
00291 /* Set bit for each POS that search word is in.  0 returned if
00292    word is not in WordNet. */
00293 extern unsigned int in_wn(char *, int); 
00294 
00295 /* Find word in index file and return parsed entry in data structure.
00296    Input word must be exact match of string in database. */
00297 extern IndexPtr index_lookup(char *, int); 
00298 
00299 /* 'smart' search of index file.  Find word in index file, trying different
00300    techniques - replace hyphens with underscores, replace underscores with
00301    hyphens, strip hyphens and underscores, strip periods. */
00302 extern IndexPtr getindex(char *, int);  
00303 extern IndexPtr parse_index(long, int, char *);
00304 
00305 /* Read synset from data file at byte offset passed and return parsed
00306    entry in data structure. */
00307 extern SynsetPtr read_synset(int, long, char *);
00308 
00309 /* Read synset at current byte offset in file and return parsed entry
00310    in data structure. */
00311 extern SynsetPtr parse_synset(FILE *, int, char *); 
00312 
00313 /* Free a synset linked list allocated by findtheinfo_ds() */
00314 extern void free_syns(SynsetPtr);       
00315 
00316 /* Free a synset */
00317 extern void free_synset(SynsetPtr);     
00318 
00319 /* Free an index structure */
00320 extern void free_index(IndexPtr);       
00321 
00322 /* Recursive search algorithm to trace a pointer tree and return results
00323    in linked list of data structures. */
00324 SynsetPtr traceptrs_ds(SynsetPtr, int, int, int);
00325 
00326 /* Do requested search on synset passed, returning output in buffer. */
00327 extern char *do_trace(SynsetPtr, int, int, int);
00328 
00329 /*** Morphology functions (morph.c) ***/
00330 
00331 /* Open exception list files */
00332 extern int morphinit(); 
00333 
00334 /* Close exception list files and reopen */
00335 extern int re_morphinit();      
00336 
00337 /* Try to find baseform (lemma) of word or collocation in POS. */
00338 extern char *morphstr(char *, int);     
00339 
00340 /* Try to find baseform (lemma) of individual word in POS. */
00341 extern char *morphword(char *, int);    
00342 
00343 /*** Utility functions (wnutil.c) ***/
00344 
00345 /* Top level function to open database files, initialize wn_filenames,
00346    and open exeception lists. */
00347 extern int wninit();            
00348 
00349 /* Top level function to close and reopen database files, initialize
00350    wn_filenames and open exception lists. */
00351 extern int re_wninit();
00352 
00353 /* Top level function to close database files */
00354 extern int wnclose();           
00355 
00356 /* Count the number of underscore or space separated words in a string. */
00357 extern int cntwords(char *, char);              
00358 
00359 /* Convert string to lower case remove trailing adjective marker if found */
00360 extern char *strtolower(char *);        
00361 
00362 /* Convert string passed to lower case */
00363 extern char *ToLowerCase(char *);       
00364 
00365 /* Replace all occurrences of 'from' with 'to' in 'str' */
00366 extern char *strsubst(char *, char, char);      
00367 
00368 /* Return pointer code for pointer type characer passed. */
00369 extern int getptrtype(char *);  
00370 
00371 /* Return part of speech code for string passed */
00372 extern int getpos(char *);              
00373 
00374 /* Return synset type code for string passed. */
00375 extern int getsstype(char *);           
00376 
00377 /* Reconstruct synset from synset pointer and return ptr to buffer */
00378 extern char *FmtSynset(SynsetPtr, int); 
00379 
00380 /* Find string for 'searchstr' as it is in index file */
00381 extern char *GetWNStr(char *, int);
00382 
00383 /* Pass in string for POS, return corresponding integer value */
00384 extern int StrToPos(char *);
00385 
00386 /* Return synset for sense key passed. */
00387 extern SynsetPtr GetSynsetForSense(char *);
00388 
00389 /* Find offset of sense key in data file */
00390 extern long GetDataOffset(char *);
00391 
00392 /* Find polysemy (collins) count for sense key passed. */
00393 extern int GetPolyCount(char *);
00394 
00395 /* Return word part of sense key */
00396 extern char *GetWORD(char *);
00397 
00398 /* Return POS code for sense key passed. */
00399 extern int GetPOS(char *);
00400 
00401 /* Convert WordNet sense number passed of IndexPtr entry to sense key. */
00402 extern char *WNSnsToStr(IndexPtr, int);
00403 
00404 /* Search for string and/or baseform of word in database and return
00405    index structure for word if found in database. */
00406 extern IndexPtr GetValidIndexPointer(char *, int);
00407 
00408 /* Return sense number in database for word and lexsn passed. */
00409 int GetWNSense(char *, char *);
00410 
00411 SnsIndexPtr GetSenseIndex(char *);
00412 void FreeSenseIndex(SnsIndexPtr);
00413 
00414 char *GetOffsetForKey(unsigned int);
00415 unsigned int GetKeyForOffset(char *);
00416 
00417 char *SetSearchdir();
00418 
00419 /* Return number of times sense is tagged */
00420 int GetTagcnt(IndexPtr, int);
00421 
00422 /*
00423 ** Wrapper functions for strstr that allow you to retrieve each
00424 ** occurance of a word within a longer string, not just the first.
00425 **
00426 ** strstr_init is called with the same arguments as normal strstr,
00427 ** but does not return any value.
00428 **
00429 ** strstr_getnext returns the position offset (not a pointer, as does
00430 ** normal strstr) of the next occurance, or -1 if none remain.
00431 */
00432 extern void strstr_init (char *, char *);
00433 extern int strstr_getnext (void);
00434 
00435 /*** Binary search functions (binsearch.c) ***/
00436 
00437 /* General purpose binary search function to search for key as first
00438    item on line in open file.  Item is delimited by space. */
00439 extern char *bin_search(char *, FILE *);
00440 extern char *read_index(long, FILE *);
00441 
00442 /* Copy contents from one file to another. */
00443 extern void copyfile(FILE *, FILE *);
00444 
00445 /* Function to replace a line in a file.  Returns the original line,
00446    or NULL in case of error. */
00447 extern char *replace_line(char *, char *, FILE *);
00448 
00449 /* Find location to insert line at in file.  If line with this
00450    key is already in file, return NULL. */
00451 extern char *insert_line(char *, char *, FILE *);
00452 
00453 #ifdef __cplusplus
00454 }
00455 #endif
00456 
00457 extern char **helptext[NUMPARTS + 1];
00458 /*
00459 static char *license = "\
00460 This software and database is being provided to you, the LICENSEE, by  \n\
00461 Princeton University under the following license.  By obtaining, using  \n\
00462 and/or copying this software and database, you agree that you have  \n\
00463 read, understood, and will comply with these terms and conditions.:  \n\
00464   \n\
00465 Permission to use, copy, modify and distribute this software and  \n\
00466 database and its documentation for any purpose and without fee or  \n\
00467 royalty is hereby granted, provided that you agree to comply with  \n\
00468 the following copyright notice and statements, including the disclaimer,  \n\
00469 and that the same appear on ALL copies of the software, database and  \n\
00470 documentation, including modifications that you make for internal  \n\
00471 use or for distribution.  \n\
00472   \n\
00473 WordNet 2.1 Copyright 2005 by Princeton University.  All rights reserved.  \n\
00474   \n\
00475 THIS SOFTWARE AND DATABASE IS PROVIDED \"AS IS\" AND PRINCETON  \n\
00476 UNIVERSITY MAKES NO REPRESENTATIONS OR WARRANTIES, EXPRESS OR  \n\
00477 IMPLIED.  BY WAY OF EXAMPLE, BUT NOT LIMITATION, PRINCETON  \n\
00478 UNIVERSITY MAKES NO REPRESENTATIONS OR WARRANTIES OF MERCHANT-  \n\
00479 ABILITY OR FITNESS FOR ANY PARTICULAR PURPOSE OR THAT THE USE  \n\
00480 OF THE LICENSED SOFTWARE, DATABASE OR DOCUMENTATION WILL NOT  \n\
00481 INFRINGE ANY THIRD PARTY PATENTS, COPYRIGHTS, TRADEMARKS OR  \n\
00482 OTHER RIGHTS.  \n\
00483   \n\
00484 The name of Princeton University or Princeton may not be used in  \n\
00485 advertising or publicity pertaining to distribution of the software  \n\
00486 and/or database.  Title to copyright in this software, database and  \n\
00487 any associated documentation shall at all times remain with  \n\
00488 Princeton University and LICENSEE agrees to preserve same.  \n"
00489 ;
00490 
00491 static char dblicense[] = "\
00492   1 This software and database is being provided to you, the LICENSEE, by  \n\
00493   2 Princeton University under the following license.  By obtaining, using  \n\
00494   3 and/or copying this software and database, you agree that you have  \n\
00495   4 read, understood, and will comply with these terms and conditions.:  \n\
00496   5   \n\
00497   6 Permission to use, copy, modify and distribute this software and  \n\
00498   7 database and its documentation for any purpose and without fee or  \n\
00499   8 royalty is hereby granted, provided that you agree to comply with  \n\
00500   9 the following copyright notice and statements, including the disclaimer,  \n\
00501   10 and that the same appear on ALL copies of the software, database and  \n\
00502   11 documentation, including modifications that you make for internal  \n\
00503   12 use or for distribution.  \n\
00504   13   \n\
00505   14 WordNet 2.1 Copyright 2005 by Princeton University.  All rights reserved.  \n\
00506   15   \n\
00507   16 THIS SOFTWARE AND DATABASE IS PROVIDED \"AS IS\" AND PRINCETON  \n\
00508   17 UNIVERSITY MAKES NO REPRESENTATIONS OR WARRANTIES, EXPRESS OR  \n\
00509   18 IMPLIED.  BY WAY OF EXAMPLE, BUT NOT LIMITATION, PRINCETON  \n\
00510   19 UNIVERSITY MAKES NO REPRESENTATIONS OR WARRANTIES OF MERCHANT-  \n\
00511   20 ABILITY OR FITNESS FOR ANY PARTICULAR PURPOSE OR THAT THE USE  \n\
00512   21 OF THE LICENSED SOFTWARE, DATABASE OR DOCUMENTATION WILL NOT  \n\
00513   22 INFRINGE ANY THIRD PARTY PATENTS, COPYRIGHTS, TRADEMARKS OR  \n\
00514   23 OTHER RIGHTS.  \n\
00515   24   \n\
00516   25 The name of Princeton University or Princeton may not be used in  \n\
00517   26 advertising or publicity pertaining to distribution of the software  \n\
00518   27 and/or database.  Title to copyright in this software, database and  \n\
00519   28 any associated documentation shall at all times remain with  \n\
00520   29 Princeton University and LICENSEE agrees to preserve same.  \n"
00521 ; */
00522 
00523 #define DBLICENSE_SIZE  (sizeof(dblicense))
00524 
00525 #endif /*_WN_*/