Planeshift
|
00001 /* 00002 00003 wn.h - header file needed to use WordNet Run Time Library 00004 00005 $Id: wn.h,v 1.4 2007/09/14 23:19:16 mgist Exp $ 00006 00007 */ 00008 00009 #ifndef _WN_ 00010 #define _WN_ 00011 00012 #include <stdio.h> 00013 00014 /* Platform specific path and filename specifications */ 00015 00016 #ifdef _WINDOWS 00017 #define DICTDIR "\\dict" 00018 //#define DEFAULTPATH "C:\\Program Files\\WordNet\\2.1\\dict" 00019 #define DEFAULTPATH "data\\dict" 00020 #define DATAFILE "%s\\data.%s" 00021 #define INDEXFILE "%s\\index.%s" 00022 #define SENSEIDXFILE "%s\\index.sense" 00023 #define KEYIDXFILE "%s\\index.key" 00024 #define REVKEYIDXFILE "%s\\index.key.rev" 00025 #define VRBSENTFILE "%s\\sents.vrb" 00026 #define VRBIDXFILE "%s\\sentidx.vrb" 00027 #define CNTLISTFILE "%s\\cntlist.rev" 00028 #else 00029 #define DICTDIR "/dict" 00030 //#define DEFAULTPATH "/usr/local/WordNet-2.1/dict" 00031 #define DEFAULTPATH "data/dict" 00032 #define DATAFILE "%s/data.%s" 00033 #define INDEXFILE "%s/index.%s" 00034 #define SENSEIDXFILE "%s/index.sense" 00035 #define KEYIDXFILE "%s/index.key" 00036 #define REVKEYIDXFILE "%s/index.key.rev" 00037 #define VRBSENTFILE "%s/sents.vrb" 00038 #define VRBIDXFILE "%s/sentidx.vrb" 00039 #define CNTLISTFILE "%s/cntlist.rev" 00040 #endif 00041 00042 /* Various buffer sizes */ 00043 00044 #define SEARCHBUF ((long)(200*(long)1024)) 00045 #define LINEBUF (15*1024) /* 15K buffer to read index & data files */ 00046 #define SMLINEBUF (3*1024) /* small buffer for output lines */ 00047 #define WORDBUF (256) /* buffer for one word or collocation */ 00048 00049 #define ALLSENSES 0 /* pass to findtheinfo() if want all senses */ 00050 #define MAXID 15 /* maximum id number in lexicographer file */ 00051 #define MAXDEPTH 20 /* maximum tree depth - used to find cycles */ 00052 #define MAXSENSE 75 /* maximum number of senses in database */ 00053 #define MAX_FORMS 5 /* max # of different 'forms' word can have */ 00054 #define MAXFNUM 44 /* maximum number of lexicographer files */ 00055 00056 /* Pointer type and search type counts */ 00057 00058 /* Pointers */ 00059 00060 #define ANTPTR 1 /* ! */ 00061 #define HYPERPTR 2 /* @ */ 00062 #define HYPOPTR 3 /* ~ */ 00063 #define ENTAILPTR 4 /* * */ 00064 #define SIMPTR 5 /* & */ 00065 00066 #define ISMEMBERPTR 6 /* #m */ 00067 #define ISSTUFFPTR 7 /* #s */ 00068 #define ISPARTPTR 8 /* #p */ 00069 00070 #define HASMEMBERPTR 9 /* %m */ 00071 #define HASSTUFFPTR 10 /* %s */ 00072 #define HASPARTPTR 11 /* %p */ 00073 00074 #define MERONYM 12 /* % (not valid in lexicographer file) */ 00075 #define HOLONYM 13 /* # (not valid in lexicographer file) */ 00076 #define CAUSETO 14 /* > */ 00077 #define PPLPTR 15 /* < */ 00078 #define SEEALSOPTR 16 /* ^ */ 00079 #define PERTPTR 17 /* \ */ 00080 #define ATTRIBUTE 18 /* = */ 00081 #define VERBGROUP 19 /* $ */ 00082 #define DERIVATION 20 /* + */ 00083 #define CLASSIFICATION 21 /* ; */ 00084 #define CLASS 22 /* - */ 00085 00086 #define LASTTYPE CLASS 00087 00088 /* Misc searches */ 00089 00090 #define SYNS (LASTTYPE + 1) 00091 #define FREQ (LASTTYPE + 2) 00092 #define FRAMES (LASTTYPE + 3) 00093 #define COORDS (LASTTYPE + 4) 00094 #define RELATIVES (LASTTYPE + 5) 00095 #define HMERONYM (LASTTYPE + 6) 00096 #define HHOLONYM (LASTTYPE + 7) 00097 #define WNGREP (LASTTYPE + 8) 00098 #define OVERVIEW (LASTTYPE + 9) 00099 00100 #define MAXSEARCH OVERVIEW 00101 00102 #define CLASSIF_START (MAXSEARCH + 1) 00103 00104 #define CLASSIF_CATEGORY (CLASSIF_START) /* ;c */ 00105 #define CLASSIF_USAGE (CLASSIF_START + 1) /* ;u */ 00106 #define CLASSIF_REGIONAL (CLASSIF_START + 2) /* ;r */ 00107 00108 #define CLASSIF_END CLASSIF_REGIONAL 00109 00110 #define CLASS_START (CLASSIF_END + 1) 00111 00112 #define CLASS_CATEGORY (CLASS_START) /* -c */ 00113 #define CLASS_USAGE (CLASS_START + 1) /* -u */ 00114 #define CLASS_REGIONAL (CLASS_START + 2) /* -r */ 00115 00116 #define CLASS_END CLASS_REGIONAL 00117 00118 #define INSTANCE (CLASS_END + 1) /* @i */ 00119 #define INSTANCES (CLASS_END + 2) /* ~i */ 00120 00121 #define MAXPTR INSTANCES 00122 00123 /* WordNet part of speech stuff */ 00124 00125 #define NUMPARTS 4 /* number of parts of speech */ 00126 #define NUMFRAMES 35 /* number of verb frames */ 00127 00128 /* Generic names for part of speech */ 00129 00130 #define NOUN 1 00131 #define VERB 2 00132 #define ADJ 3 00133 #define ADV 4 00134 #define SATELLITE 5 /* not really a part of speech */ 00135 #define ADJSAT SATELLITE 00136 00137 #define ALL_POS 0 /* passed to in_wn() to check all POS */ 00138 00139 #define bit(n) ((unsigned int)((unsigned int)1<<((unsigned int)n))) 00140 00141 /* Adjective markers */ 00142 00143 #define PADJ 1 /* (p) */ 00144 #define NPADJ 2 /* (a) */ 00145 #define IPADJ 3 /* (ip) */ 00146 00147 #define UNKNOWN_MARKER 0 00148 #define ATTRIBUTIVE NPADJ 00149 #define PREDICATIVE PADJ 00150 #define IMMED_POSTNOMINAL IPADJ 00151 00152 extern char *wnrelease; /* WordNet release/version number */ 00153 00154 extern char *lexfiles[]; /* names of lexicographer files */ 00155 extern char *ptrtyp[]; /* pointer characters */ 00156 extern char *partnames[]; /* POS strings */ 00157 extern char partchars[]; /* single chars for each POS */ 00158 extern char *adjclass[]; /* adjective class strings */ 00159 extern char *frametext[]; /* text of verb frames */ 00160 00161 /* Data structures used by search code functions. */ 00162 00163 /* Structure for index file entry */ 00164 typedef struct { 00165 long idxoffset; /* byte offset of entry in index file */ 00166 char *wd; /* word string */ 00167 char *pos; /* part of speech */ 00168 int sense_cnt; /* sense (collins) count */ 00169 int off_cnt; /* number of offsets */ 00170 int tagged_cnt; /* number senses that are tagged */ 00171 unsigned long *offset; /* offsets of synsets containing word */ 00172 int ptruse_cnt; /* number of pointers used */ 00173 int *ptruse; /* pointers used */ 00174 } Index; 00175 00176 typedef Index *IndexPtr; 00177 00178 /* Structure for data file synset */ 00179 typedef struct ss { 00180 long hereiam; /* current file position */ 00181 int sstype; /* type of ADJ synset */ 00182 int fnum; /* file number that synset comes from */ 00183 char *pos; /* part of speech */ 00184 int wcount; /* number of words in synset */ 00185 char **words; /* words in synset */ 00186 int *lexid; /* unique id in lexicographer file */ 00187 int *wnsns; /* sense number in wordnet */ 00188 int whichword; /* which word in synset we're looking for */ 00189 int ptrcount; /* number of pointers */ 00190 int *ptrtyp; /* pointer types */ 00191 long *ptroff; /* pointer offsets */ 00192 int *ppos; /* pointer part of speech */ 00193 int *pto; /* pointer 'to' fields */ 00194 int *pfrm; /* pointer 'from' fields */ 00195 int fcount; /* number of verb frames */ 00196 int *frmid; /* frame numbers */ 00197 int *frmto; /* frame 'to' fields */ 00198 char *defn; /* synset gloss (definition) */ 00199 unsigned int key; /* unique synset key */ 00200 00201 /* these fields are used if a data structure is returned 00202 instead of a text buffer */ 00203 00204 struct ss *nextss; /* ptr to next synset containing searchword */ 00205 struct ss *nextform; /* ptr to list of synsets for alternate 00206 spelling of wordform */ 00207 int searchtype; /* type of search performed */ 00208 struct ss *ptrlist; /* ptr to synset list result of search */ 00209 char *headword; /* if pos is "s", this is cluster head word */ 00210 short headsense; /* sense number of headword */ 00211 } Synset; 00212 00213 typedef Synset *SynsetPtr; 00214 00215 typedef struct si { 00216 char *sensekey; /* sense key */ 00217 char *word; /* word string */ 00218 long loc; /* synset offset */ 00219 int wnsense; /* WordNet sense number */ 00220 int tag_cnt; /* number of semantic tags to sense */ 00221 struct si *nextsi; /* ptr to next sense index entry */ 00222 } SnsIndex; 00223 00224 typedef SnsIndex *SnsIndexPtr; 00225 00226 typedef struct { 00227 int SenseCount[MAX_FORMS]; /* number of senses word form has */ 00228 int OutSenseCount[MAX_FORMS]; /* number of senses printed for word form */ 00229 int numforms; /* number of word forms searchword has */ 00230 int printcnt; /* number of senses printed by search */ 00231 char *searchbuf; /* buffer containing formatted results */ 00232 SynsetPtr searchds; /* data structure containing search results */ 00233 } SearchResults; 00234 00235 typedef SearchResults *SearchResultsPtr; 00236 00237 /* Global variables and flags */ 00238 00239 extern SearchResults wnresults; /* structure containing results of search */ 00240 extern int fnflag; /* if set, print lex filename after sense */ 00241 extern int dflag; /* if set, print definitional glosses */ 00242 extern int saflag; /* if set, print SEE ALSO pointers */ 00243 extern int fileinfoflag; /* if set, print lex file info on synsets */ 00244 extern int frflag; /* if set, print verb frames after synset */ 00245 extern int abortsearch; /* if set, stop search algorithm */ 00246 extern int offsetflag; /* if set, print byte offset of each synset */ 00247 extern int wnsnsflag; /* if set, print WN sense # for each word */ 00248 00249 /* File pointers for database files */ 00250 00251 extern int OpenDB; /* if non-zero, database file are open */ 00252 extern FILE *datafps[NUMPARTS + 1], 00253 *indexfps[NUMPARTS + 1], 00254 *sensefp, 00255 *cntlistfp, 00256 *keyindexfp, *revkeyindexfp, 00257 *vidxfilefp, *vsentfilefp; 00258 00259 /* Method for interface to check for events while search is running */ 00260 00261 extern void (*interface_doevents_func)(void); 00262 /* callback for interruptable searches in */ 00263 /* single-threaded interfaces */ 00264 00265 /* General error message handler - can be defined by interface. 00266 Default function provided in library returns -1 */ 00267 00268 extern int default_display_message(char *); 00269 extern int (*display_message)(char *); 00270 00271 00272 /* Make all the functions compatible with c++ files */ 00273 #ifdef __cplusplus 00274 extern "C" { 00275 #endif 00276 00277 /* External library function prototypes */ 00278 00279 /*** Search and database functions (search.c) ***/ 00280 00281 /* Primry search algorithm for use with user interfaces */ 00282 extern char *findtheinfo(char *, int, int, int); 00283 00284 /* Primary search algorithm for use with programs (returns data structure) */ 00285 extern SynsetPtr findtheinfo_ds(char *, int, int, int); 00286 00287 /* Set bit for each search type that is valid for the search word 00288 passed and return bit mask. */ 00289 extern unsigned int is_defined(char *, int); 00290 00291 /* Set bit for each POS that search word is in. 0 returned if 00292 word is not in WordNet. */ 00293 extern unsigned int in_wn(char *, int); 00294 00295 /* Find word in index file and return parsed entry in data structure. 00296 Input word must be exact match of string in database. */ 00297 extern IndexPtr index_lookup(char *, int); 00298 00299 /* 'smart' search of index file. Find word in index file, trying different 00300 techniques - replace hyphens with underscores, replace underscores with 00301 hyphens, strip hyphens and underscores, strip periods. */ 00302 extern IndexPtr getindex(char *, int); 00303 extern IndexPtr parse_index(long, int, char *); 00304 00305 /* Read synset from data file at byte offset passed and return parsed 00306 entry in data structure. */ 00307 extern SynsetPtr read_synset(int, long, char *); 00308 00309 /* Read synset at current byte offset in file and return parsed entry 00310 in data structure. */ 00311 extern SynsetPtr parse_synset(FILE *, int, char *); 00312 00313 /* Free a synset linked list allocated by findtheinfo_ds() */ 00314 extern void free_syns(SynsetPtr); 00315 00316 /* Free a synset */ 00317 extern void free_synset(SynsetPtr); 00318 00319 /* Free an index structure */ 00320 extern void free_index(IndexPtr); 00321 00322 /* Recursive search algorithm to trace a pointer tree and return results 00323 in linked list of data structures. */ 00324 SynsetPtr traceptrs_ds(SynsetPtr, int, int, int); 00325 00326 /* Do requested search on synset passed, returning output in buffer. */ 00327 extern char *do_trace(SynsetPtr, int, int, int); 00328 00329 /*** Morphology functions (morph.c) ***/ 00330 00331 /* Open exception list files */ 00332 extern int morphinit(); 00333 00334 /* Close exception list files and reopen */ 00335 extern int re_morphinit(); 00336 00337 /* Try to find baseform (lemma) of word or collocation in POS. */ 00338 extern char *morphstr(char *, int); 00339 00340 /* Try to find baseform (lemma) of individual word in POS. */ 00341 extern char *morphword(char *, int); 00342 00343 /*** Utility functions (wnutil.c) ***/ 00344 00345 /* Top level function to open database files, initialize wn_filenames, 00346 and open exeception lists. */ 00347 extern int wninit(); 00348 00349 /* Top level function to close and reopen database files, initialize 00350 wn_filenames and open exception lists. */ 00351 extern int re_wninit(); 00352 00353 /* Top level function to close database files */ 00354 extern int wnclose(); 00355 00356 /* Count the number of underscore or space separated words in a string. */ 00357 extern int cntwords(char *, char); 00358 00359 /* Convert string to lower case remove trailing adjective marker if found */ 00360 extern char *strtolower(char *); 00361 00362 /* Convert string passed to lower case */ 00363 extern char *ToLowerCase(char *); 00364 00365 /* Replace all occurrences of 'from' with 'to' in 'str' */ 00366 extern char *strsubst(char *, char, char); 00367 00368 /* Return pointer code for pointer type characer passed. */ 00369 extern int getptrtype(char *); 00370 00371 /* Return part of speech code for string passed */ 00372 extern int getpos(char *); 00373 00374 /* Return synset type code for string passed. */ 00375 extern int getsstype(char *); 00376 00377 /* Reconstruct synset from synset pointer and return ptr to buffer */ 00378 extern char *FmtSynset(SynsetPtr, int); 00379 00380 /* Find string for 'searchstr' as it is in index file */ 00381 extern char *GetWNStr(char *, int); 00382 00383 /* Pass in string for POS, return corresponding integer value */ 00384 extern int StrToPos(char *); 00385 00386 /* Return synset for sense key passed. */ 00387 extern SynsetPtr GetSynsetForSense(char *); 00388 00389 /* Find offset of sense key in data file */ 00390 extern long GetDataOffset(char *); 00391 00392 /* Find polysemy (collins) count for sense key passed. */ 00393 extern int GetPolyCount(char *); 00394 00395 /* Return word part of sense key */ 00396 extern char *GetWORD(char *); 00397 00398 /* Return POS code for sense key passed. */ 00399 extern int GetPOS(char *); 00400 00401 /* Convert WordNet sense number passed of IndexPtr entry to sense key. */ 00402 extern char *WNSnsToStr(IndexPtr, int); 00403 00404 /* Search for string and/or baseform of word in database and return 00405 index structure for word if found in database. */ 00406 extern IndexPtr GetValidIndexPointer(char *, int); 00407 00408 /* Return sense number in database for word and lexsn passed. */ 00409 int GetWNSense(char *, char *); 00410 00411 SnsIndexPtr GetSenseIndex(char *); 00412 void FreeSenseIndex(SnsIndexPtr); 00413 00414 char *GetOffsetForKey(unsigned int); 00415 unsigned int GetKeyForOffset(char *); 00416 00417 char *SetSearchdir(); 00418 00419 /* Return number of times sense is tagged */ 00420 int GetTagcnt(IndexPtr, int); 00421 00422 /* 00423 ** Wrapper functions for strstr that allow you to retrieve each 00424 ** occurance of a word within a longer string, not just the first. 00425 ** 00426 ** strstr_init is called with the same arguments as normal strstr, 00427 ** but does not return any value. 00428 ** 00429 ** strstr_getnext returns the position offset (not a pointer, as does 00430 ** normal strstr) of the next occurance, or -1 if none remain. 00431 */ 00432 extern void strstr_init (char *, char *); 00433 extern int strstr_getnext (void); 00434 00435 /*** Binary search functions (binsearch.c) ***/ 00436 00437 /* General purpose binary search function to search for key as first 00438 item on line in open file. Item is delimited by space. */ 00439 extern char *bin_search(char *, FILE *); 00440 extern char *read_index(long, FILE *); 00441 00442 /* Copy contents from one file to another. */ 00443 extern void copyfile(FILE *, FILE *); 00444 00445 /* Function to replace a line in a file. Returns the original line, 00446 or NULL in case of error. */ 00447 extern char *replace_line(char *, char *, FILE *); 00448 00449 /* Find location to insert line at in file. If line with this 00450 key is already in file, return NULL. */ 00451 extern char *insert_line(char *, char *, FILE *); 00452 00453 #ifdef __cplusplus 00454 } 00455 #endif 00456 00457 extern char **helptext[NUMPARTS + 1]; 00458 /* 00459 static char *license = "\ 00460 This software and database is being provided to you, the LICENSEE, by \n\ 00461 Princeton University under the following license. By obtaining, using \n\ 00462 and/or copying this software and database, you agree that you have \n\ 00463 read, understood, and will comply with these terms and conditions.: \n\ 00464 \n\ 00465 Permission to use, copy, modify and distribute this software and \n\ 00466 database and its documentation for any purpose and without fee or \n\ 00467 royalty is hereby granted, provided that you agree to comply with \n\ 00468 the following copyright notice and statements, including the disclaimer, \n\ 00469 and that the same appear on ALL copies of the software, database and \n\ 00470 documentation, including modifications that you make for internal \n\ 00471 use or for distribution. \n\ 00472 \n\ 00473 WordNet 2.1 Copyright 2005 by Princeton University. All rights reserved. \n\ 00474 \n\ 00475 THIS SOFTWARE AND DATABASE IS PROVIDED \"AS IS\" AND PRINCETON \n\ 00476 UNIVERSITY MAKES NO REPRESENTATIONS OR WARRANTIES, EXPRESS OR \n\ 00477 IMPLIED. BY WAY OF EXAMPLE, BUT NOT LIMITATION, PRINCETON \n\ 00478 UNIVERSITY MAKES NO REPRESENTATIONS OR WARRANTIES OF MERCHANT- \n\ 00479 ABILITY OR FITNESS FOR ANY PARTICULAR PURPOSE OR THAT THE USE \n\ 00480 OF THE LICENSED SOFTWARE, DATABASE OR DOCUMENTATION WILL NOT \n\ 00481 INFRINGE ANY THIRD PARTY PATENTS, COPYRIGHTS, TRADEMARKS OR \n\ 00482 OTHER RIGHTS. \n\ 00483 \n\ 00484 The name of Princeton University or Princeton may not be used in \n\ 00485 advertising or publicity pertaining to distribution of the software \n\ 00486 and/or database. Title to copyright in this software, database and \n\ 00487 any associated documentation shall at all times remain with \n\ 00488 Princeton University and LICENSEE agrees to preserve same. \n" 00489 ; 00490 00491 static char dblicense[] = "\ 00492 1 This software and database is being provided to you, the LICENSEE, by \n\ 00493 2 Princeton University under the following license. By obtaining, using \n\ 00494 3 and/or copying this software and database, you agree that you have \n\ 00495 4 read, understood, and will comply with these terms and conditions.: \n\ 00496 5 \n\ 00497 6 Permission to use, copy, modify and distribute this software and \n\ 00498 7 database and its documentation for any purpose and without fee or \n\ 00499 8 royalty is hereby granted, provided that you agree to comply with \n\ 00500 9 the following copyright notice and statements, including the disclaimer, \n\ 00501 10 and that the same appear on ALL copies of the software, database and \n\ 00502 11 documentation, including modifications that you make for internal \n\ 00503 12 use or for distribution. \n\ 00504 13 \n\ 00505 14 WordNet 2.1 Copyright 2005 by Princeton University. All rights reserved. \n\ 00506 15 \n\ 00507 16 THIS SOFTWARE AND DATABASE IS PROVIDED \"AS IS\" AND PRINCETON \n\ 00508 17 UNIVERSITY MAKES NO REPRESENTATIONS OR WARRANTIES, EXPRESS OR \n\ 00509 18 IMPLIED. BY WAY OF EXAMPLE, BUT NOT LIMITATION, PRINCETON \n\ 00510 19 UNIVERSITY MAKES NO REPRESENTATIONS OR WARRANTIES OF MERCHANT- \n\ 00511 20 ABILITY OR FITNESS FOR ANY PARTICULAR PURPOSE OR THAT THE USE \n\ 00512 21 OF THE LICENSED SOFTWARE, DATABASE OR DOCUMENTATION WILL NOT \n\ 00513 22 INFRINGE ANY THIRD PARTY PATENTS, COPYRIGHTS, TRADEMARKS OR \n\ 00514 23 OTHER RIGHTS. \n\ 00515 24 \n\ 00516 25 The name of Princeton University or Princeton may not be used in \n\ 00517 26 advertising or publicity pertaining to distribution of the software \n\ 00518 27 and/or database. Title to copyright in this software, database and \n\ 00519 28 any associated documentation shall at all times remain with \n\ 00520 29 Princeton University and LICENSEE agrees to preserve same. \n" 00521 ; */ 00522 00523 #define DBLICENSE_SIZE (sizeof(dblicense)) 00524 00525 #endif /*_WN_*/