1
2
3
4
5
6
7
8
9
10 from urllib import quote_plus, unquote_plus
11 import itertools as it
12
13 from nltk import defaultdict
14 from nltk.wordnet import *
15 from nltk.wordnet.stemmer import _morphy
16 from nltk.wordnet.synset import _RELATION_TABLE
17
18 __all__ = ['get_static_index_page',
19 'get_static_page_by_path',
20 'new_word_and_body',
21 'page_word',
22 'relations_2',
23 'uniq_cntr']
24
25
26 """
27 Wordnet Browser Utilities.
28
29 This provides a backend to both wxbrowse and browserver.py.
30 """
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49 -def relations_2(synsetObj, rel_name=None, word_match=False):
50 """
51 Return a dict of relations or a list of word match pairs for synsetObj.
52
53 The dictionary keys are the names for the relations found.
54 The dictionary items are lists of either synsets or words
55 depending on the relation.
56
57 If rel_name is specified, a list of eiher synsets or words for only
58 that relation type is returned.
59
60 If word_match is True a list of pairs (source,target) of matching
61 word information is returned . Here source and target are tuples
62 of form (synset,word_index), where 'word_index' is the 0-based index of
63 the word in synset 'synset'
64 @param synsetObj: The synset whose relations we are using
65 @type synsetObj: synset
66 @param rel_name: A relation name we are interested in
67 @type rel_name: str
68 @param word_match: Tells if we want word-level matching or not
69 @type word_match: truth value
70 @return: A relation dict or a list of word match pairs for synsetObj.
71 @rtype: A dict or list
72 """
73
74
75 if not hasattr(synsetObj, '_relations') or \
76 word_match != synsetObj._word_match_last_used:
77 relations = defaultdict(list)
78 for (type, offset, pos, indices) in synsetObj._pointerTuples:
79 rel = _RELATION_TABLE[type]
80 source_ind = int(indices[0:2], 16) - 1
81 target_ind = int(indices[2:], 16) - 1
82 pos = normalizePOS(pos)
83 offset = int(offset)
84 synset = dictionary.synset(pos, offset)
85 if target_ind >= 0:
86 if word_match:
87 source_tuple = (synsetObj,source_ind)
88 target_tuple = (synset,target_ind)
89 relations[rel].append((source_tuple,target_tuple))
90 else:
91 relations[rel].append(synset[target_ind])
92 else:
93 relations[rel].append(synset)
94 synsetObj._relations = dict(relations)
95 synsetObj._word_match_last_used = word_match
96 if rel_name is not None:
97 return synsetObj._relations.get(rel_name, [])
98 else:
99 return synsetObj._relations
100
101
102 _pos_tuples = [(N,'N','noun'), (V,'V','verb'), (ADJ,'J','adj'),
103 (ADV,'R','adv')]
104
106 for n,x in enumerate(pos_tuple):
107 if x is not None:
108 break
109 for pt in _pos_tuples:
110 if pt[n] == pos_tuple[n]: return pt
111 return None
112
113 implemented_rel_names = \
114 ['antonym',
115 'attribute',
116 'cause',
117 'derivationally related form',
118 'direct hypernym',
119 'direct hyponym',
120 'direct troponym',
121 'domain category',
122 'domain region',
123 'domain term category',
124 'domain term region',
125 'domain term usage',
126 'domain usage',
127 'entailment',
128 'full hyponym',
129 'full troponym',
130 'has instance',
131 'inherited hypernym',
132 'instance',
133 'member holonym',
134 'member meronym',
135 'Overview',
136 'part holonym',
137 'part meronym',
138 'participle',
139 'pertainym',
140 'phrasal verb',
141 'see also',
142 'sentence frame',
143 'similar to',
144 'sister term',
145 'substance holonym',
146 'substance meronym',
147 'synset',
148 'verb group'
149 ]
150
151
152
153
154
155 rel_order = \
156 [(HYPONYM,'direct hyponym/full hyponym'),
157 (HYPONYM,'direct troponym/full troponym'),
158 (CLASS_REGIONAL,'domain term region'),
159 (PART_HOLONYM,PART_MERONYM),
160 (ATTRIBUTE,ATTRIBUTE),
161 (SUBSTANCE_HOLONYM,SUBSTANCE_MERONYM),
162 (SUBSTANCE_MERONYM,SUBSTANCE_HOLONYM),
163 (MEMBER_MERONYM,MEMBER_HOLONYM),
164 (MEMBER_HOLONYM,MEMBER_MERONYM),
165 (VERB_GROUP,VERB_GROUP),
166 (CLASSIF_CATEGORY, CLASSIF_CATEGORY),
167 (INSTANCE_HYPONYM, 'has instance'),
168 (CLASS_CATEGORY,'domain term category'),
169 (CLASS_USAGE,'domain term usage'),
170 (HYPERNYM,'direct hypernym/inherited hypernym/sister term'),
171 (CLASSIF_REGIONAL, CLASSIF_REGIONAL),
172 (CLASSIF_USAGE,'domain usage'),
173 (PART_MERONYM,PART_HOLONYM),
174 (INSTANCE_HYPERNYM, 'instance'),
175 (CAUSE,CAUSE),
176 (ALSO_SEE,'see also'),
177 (ALSO_SEE,'phrasal verb'),
178 (SIMILAR,'similar to'),
179 (ENTAILMENT,ENTAILMENT),
180 (PARTICIPLE_OF, 'participle'),
181 (ANTONYM, 'antonym'),
182 (FRAMES,'derivationally related form'),
183
184 (PERTAINYM,PERTAINYM)
185 ]
186
188 for dbn,dispn in rel_order:
189 if dispname in dispn.split('/'):
190 return dbn
191 return None
192
194 for dbn,dispn in rel_order:
195 if dbn == dbname:
196 return dispn
197 return '???'
198
199 html_header = '''
200 <!DOCTYPE html PUBLIC '-//W3C//DTD HTML 4.01//EN'
201 'http://www.w3.org/TR/html4/strict.dtd'>
202 <html>
203 <head>
204 <meta name='generator' content=
205 'HTML Tidy for Windows (vers 14 February 2006), see www.w3.org'>
206 <meta http-equiv='Content-Type' content=
207 'text/html; charset=us-ascii'>
208 <title>NLTK Wordnet Browser display of: %s</title></head>
209 <body bgcolor='#F5F5F5' text='#000000'>
210 '''
211 html_trailer = '''
212 </body>
213 </html>
214 '''
215
216 explanation = '''
217 <h3>Search Help</h3>
218 <ul><li>The display below the line is an example of the output the browser
219 shows you when you enter a search word. The search word was <b>green</b>.</li>
220 <li>The search result shows for different parts of speech the <b>synsets</b>
221 i.e. different meanings for the word.</li>
222 <li>All underlined texts are hypertext links. There are two types of links:
223 word links and others. Clicking a word link carries out a search for the word
224 in the Wordnet database.</li>
225 <li>Clicking a link of the other type opens a display section of data attached
226 to that link. Clicking that link a second time closes the section again.</li>
227 <li>Clicking <u>S:</u> opens a section showing the relations for that synset.
228 </li>
229 <li>Clicking on a relation name opens a section that displays the associated
230 synsets.</li>
231 <li>Type a search word in the <b>Word</b> field and start the search by the
232 <b>Enter/Return</b> key or click the <b>Search</b> button.</li>
233 </ul>
234 <hr width='100%'>
235 '''
236
237
238
239 -def _bold(txt): return '<b>%s</b>' % txt
240
241 -def _center(txt): return '<center>%s</center>' % txt
242
243 -def _hlev(n,txt): return '<h%d>%s</h%d>' % (n,txt,n)
244
245 -def _italic(txt): return '<i>%s</i>' % txt
246
247 -def _li(txt): return '<li>%s</li>' % txt
248
250 '''
251 Return a HTML page of NLTK Browser format constructed from the
252 word and body
253
254 @param word: The word that the body corresponds to
255 @type word: str
256 @param body: The HTML body corresponding to the word
257 @type body: str
258 @return: a HTML page for the word-body combination
259 @rtype: str
260 '''
261 return (html_header % word) + body + html_trailer
262
263 -def _ul(txt): return '<ul>' + txt + '</ul>'
264
265
267 return _center(_bold('<br>'*10 + '*'*10 + ' ' + txt + ' ' + '*'*10))
268
269 full_hyponym_cont_text = \
270 _ul(_li(_italic('(has full hyponym continuation)'))) + '\n'
271
272
273 _uniq_cntr = 0
274
276 """
277 Return a unique counter, a state is kept to ensure that the same
278 counter is not provided multiple times.
279
280 @return: A unique integer for this module instance.
281 @rtype: int
282 """
283 global _uniq_cntr
284 _uniq_cntr += 1
285 return _uniq_cntr
286
291
293 '''
294 Returns the HTML string for one synset or word
295
296 @param word: the current word
297 @type word: str
298 @param s_or_w: a tuple containing word information or a synset
299 @type s_or_w: tuple or synset
300 @param prev_synset_key: key of previous synset
301 @type prev_synset_key: str
302 @return: The HTML string built for this synset or word
303 @rtype: str
304 '''
305 u_c = uniq_cntr()
306 if isinstance(s_or_w, tuple):
307 form_str,(synset,oppo,forms) = s_or_w
308 pos,offset,ind = oppo
309 synset = dictionary.synset(pos, offset)
310 synset_key = _pos_match((None,None,synset.pos))[1] + str(synset.offset)
311 synset_key += ':' + str(ind) + ',' + prev_synset_key
312 oppo = synset.words[ind]
313 oppo = oppo.replace('_', ' ')
314 typ = 'W'
315 else:
316 synset = s_or_w
317 typ = 'S'
318 synset_key = _pos_match((None,None,synset.pos))[1] + str(synset.offset)
319 synset_key += ',' + prev_synset_key
320 if synset.pos.startswith('ad'):
321 descr = synset.pos
322 else:
323 descr = synset.pos[0]
324 s = '<li><a href="' + typ + quote_plus(word + '#' + synset_key + '#' + \
325 str(u_c)) + '">' + typ + ':</a>' + ' (' + descr + ') '
326 if isinstance(s_or_w, tuple):
327 s += '<a href="M' + quote_plus(oppo + '#' + str(uniq_cntr())) + \
328 '">' + oppo + '</a> ' + form_str
329 for w in forms:
330 pos,offset,ind = w
331 w = dictionary.synset(pos, offset).words[ind]
332 w = w.replace('_', ' ')
333 s += '<a href="M' + quote_plus(w + '#' + str(uniq_cntr())) + \
334 '">' + w + '</a>, '
335 s = s[:-2] + '] '
336 else:
337 for w in synset:
338 w = w.replace('_', ' ')
339 if w.lower() == word:
340 s+= _bold(w) + ', '
341 else:
342 s += '<a href="M' + quote_plus(w + '#' + str(uniq_cntr())) + \
343 '">' + w + '</a>, '
344 s = s[:-2] + ' ('
345
346 gl = ''
347 hyph_not_found = True
348 for g in synset.gloss.split('; '):
349 if not g.startswith('"'):
350 if gl: gl += '; '
351 gl += g
352 else:
353 if hyph_not_found:
354 gl += ') <i>'
355 hyph_not_found = False
356 else:
357 gl += '; '
358 gl += g
359 if hyph_not_found:
360 gl += ')'
361 else:
362 gl += '</i>'
363 return s + gl + '</li>\n'
364
368
370 return '<a href="R%s"><i>%s</i></a>' % \
371 (quote_plus('#'.join((word, synset_keys, rel, str(uniq_cntr())))),
372 rel)
373
382
384 '''
385 Builds the HTML string for the relations of a synset
386
387 @param word: The current word
388 @type word: str
389 @param link_type: The link type, word or synset
390 @type link_type: str
391 @param synset_keys: synset keys for this and the previous synset
392 @type synset_keys: str
393 @return: The HTML for a synset's relations
394 @rtype: str
395 '''
396 sk,prev_sk = synset_keys.split(',')
397 synset = _get_synset(sk.split(':')[0])
398 rel_keys = relations_2(synset).keys()
399
400 html = ''
401 if link_type == 'W':
402 rel_names = [(ANTONYM, 'antonym'),
403 (FRAMES,'derivationally related form')]
404 else:
405 rel_names = rel_order
406 for rel in rel_names:
407 db_name,disp_name = rel
408 if db_name == ALSO_SEE:
409 if synset.pos == 'verb' and disp_name != 'phrasal verb' or \
410 synset.pos != 'verb' and disp_name == 'phrasal verb':
411 continue
412 if db_name == HYPONYM:
413 if synset.pos == 'verb':
414 if disp_name.find('tropo') == -1:
415 continue
416 else:
417 if disp_name.find('hypo') == -1:
418 continue
419 if synset[db_name] or \
420 db_name == ANTONYM and _anto_or_similar_anto(synset):
421 lst = [' <i>/</i> ' + _rel_ref(word, synset_keys, r)
422 for r in disp_name.split('/')]
423 html += ''.join(lst)[10:]
424 html += '\n'
425 if db_name in rel_keys: rel_keys.remove(db_name)
426 if link_type == 'W':
427 html += _rel_ref(word, synset_keys, 'Overview') + '\n'
428 html += _rel_ref(word, synset_keys, 'synset') + '\n'
429 else:
430 for rel in rel_keys:
431 html += _rel_ref(word, synset_keys, rel) + '\n'
432 if synset.pos == 'verb' and synset.verbFrameStrings:
433 html += _rel_ref(word, synset_keys, 'sentence frame') + '\n'
434 return html
435
449
455
457 synset_key,prev_synset_key = synset_keys.split(',')
458 rel_name = _dispname_to_dbname(rel_name)
459 if rel_name == ANTONYM:
460 rel_form = ' [Opposed to: '
461 else:
462 rel_form = ' [Related to: '
463 s = ''
464 rel = relations_2(synset, rel_name=rel_name, word_match=True)
465 if rel:
466 hlp = [((s1.pos,s1.offset,i1),(s0.pos,s0.offset,i0))
467 for ((s0,i0),(s1,i1)) in rel]
468 if prev_synset_key:
469 sk,prev_sk = synset_keys.split(',')
470 sk0,sk1 = sk.split(':')
471 syns = _get_synset(sk0)
472 ind = int(sk1)
473 hlp = [((s1.pos,s1.offset,i1),(s0.pos,s0.offset,i0))
474 for ((s0,i0),(s1,i1))
475 in rel
476 if (s0.pos == syns.pos)
477 and (s0.offset == syns.offset)
478 and (i0 == ind)]
479 hlp = it.groupby(hlp,key=lambda x:x[0])
480 hlp_2 = []
481 for h in hlp:
482 forms = []
483 for h2 in h[1]:
484 forms.append(h2[1])
485 forms.sort()
486 hlp_2 = [(h[0],forms)] + hlp_2
487 for h,f in hlp_2:
488 s += _collect_one(word, (rel_form,(s1,h,f)), synset_key)
489 elif rel_name == ANTONYM:
490 similar = relations_2(synset, rel_name=SIMILAR)
491 for simi in similar:
492 anto = relations_2(simi, rel_name=ANTONYM, word_match=True)
493 if anto:
494 for a in anto:
495 ((s0,i0),(s1,i1)) = a
496 form = (s0.pos,s0.offset,i0)
497 oppo = (s1.pos,s1.offset,i1)
498 s += _collect_one(word, \
499 (' [Indirect via ',(s1,oppo,[form])), synset_key)
500 return s
501
503 synset_key,prev_synset_key = synset_keys.split(',')
504 synset = _get_synset(synset_key.split(':')[0])
505 if rel_name == 'full hyponym' or rel_name == 'full troponym':
506 if rel_name == 'full hyponym':
507 depth = synset.min_depth()
508 if depth <= 2: depth = 1
509 elif depth == 3: depth = 2
510 else: depth = -1
511 tree = synset.tree(HYPONYM, depth, cut_mark='...')
512 html = '\n' + _hyponym_ul_structure(word, tree[1:]) + '\n'
513 html += ''.join((_collect_one(word, x, '')
514 for x
515 in synset[INSTANCE_HYPONYM]))
516 return _ul(html + '\n')
517 elif rel_name == 'inherited hypernym':
518 tree = synset.tree(HYPERNYM)
519 return _hypernym_ul_structure(word, tree[1:][0])
520 elif rel_name == 'sister term':
521 s = ''
522 for x in synset[HYPERNYM]:
523 s += _collect_one(word, x, '')
524 s += '<ul>'
525 s += ''.join((_collect_one(word, y, '') for y in x[HYPONYM]))
526 s += '\n</ul>'
527 return _ul(s + '\n')
528 elif rel_name == 'sentence frame':
529 verb_frame_strings = [(VERB_FRAME_STRINGS[i] % _bold(word)) \
530 for i in synset.verbFrames]
531 s = '\n'.join(['<li>' + vfs + '</li>' for vfs
532 in verb_frame_strings])
533 return _ul(s + '\n')
534 elif rel_name == 'Overview':
535 ind = int(synset_key.split(':')[1])
536 w,b = _w_b(synset.words[ind], True)
537 if not w: return ''
538 return _ul(b + '\n')
539 elif rel_name == 'synset':
540 s = _collect_one(word, synset, '')
541 return _ul(s + '\n')
542 elif rel_name == 'domain term region':
543 rel = _dispname_to_dbname(rel_name)
544 s = ''
545 word_collection = []
546 for x in synset[rel]:
547 if isinstance(x, basestring):
548 word_collection.append(x)
549 else:
550 s += _collect_one(word, x, '')
551
552 for wrd in word_collection:
553 w = _pos_match((None,None,synset.pos))[0][wrd]
554 oppo = None
555 for syns in w:
556 for wlr in relations_2(syns, CLASSIF_REGIONAL,True):
557 if not isinstance(wlr, tuple):
558 continue
559 syn,i = wlr[1]
560 syns,j = wlr[0]
561 if syn == synset and syns.words[j] == wrd:
562 form = (syn.pos,syn.offset,i)
563 oppo = (syns.pos,syns.offset,j)
564 break
565 if oppo: break
566 if oppo:
567 s += _collect_one(word, \
568 (' [Related to: ',(synset,oppo,[form])), synset_key)
569 return _ul(s + '\n')
570 else:
571 rel = _dispname_to_dbname(rel_name)
572
573 if rel == ANTONYM or \
574 isinstance(relations_2(synset)[rel][0], basestring):
575 s = _word_ul_structure(word, synset, rel_name, synset_keys)
576 return _ul(s + '\n')
577 else:
578 s = ''.join((_collect_one(word, x, '') for x in synset[rel]))
579 if rel == HYPONYM:
580 s += ''.join((_collect_one(word, x, '')
581 for x
582 in synset[INSTANCE_HYPONYM]))
583 return _ul(s + '\n')
584
585 -def _w_b(word, overview):
586 pos_forms = defaultdict(list)
587 words = word.split(',')
588 words = [w.strip() for w in words if w.strip() != ""]
589 if len(words) == 0:
590
591 return "", "Please specify a word to search for."
592
593
594
595 for pos_str in ['noun', 'verb', 'adj', 'adv']:
596 for w in words:
597 for form in _morphy(w, pos=pos_str):
598 if form not in pos_forms[pos_str]:
599 pos_forms[pos_str].append(form)
600 body = ''
601 for pos,pos_str,name in \
602 ((N,'noun','Noun'), (V,'verb','Verb'),
603 (ADJ,'adj','Adjective'), (ADV,'adv','Adverb')):
604 if pos_str in pos_forms:
605 if not overview:
606 body += _hlev(3, name) + '\n'
607 for w in pos_forms[pos_str]:
608
609 try:
610 body += _collect_all(w, pos)
611 except KeyError:
612 pass
613 if not body:
614 body = "The word '%s' was not found in the dictonary." % word
615 return word,body
616
618 '''
619 Return a 2-tuple of a new word and the HTML body consisting of all the
620 synsets for all the POS that the word was found in
621
622 @param word: The word for which the HTML body is to be constructed
623 @type word: str
624 @return: The tuple (word,body)
625 @rtype: Tuple (str,str)
626 '''
627 word = word.lower().replace('_', ' ')
628 return _w_b(word, False)
629
631 '''Removes the first string <ul>...stuff...</ul> from the html page.
632
633 The search starts at index. Note: ...stuff... may contain embedded
634 <ul>-</ul> pairs but the search continues to the </ul> that is the
635 pair of the starting <ul>
636 '''
637 ind = page.find('<ul>', index)
638 if ind == -1: return page
639 ul_count = 1
640 ul_start = ind
641 ind += 4
642 while ul_count:
643 ind = page.find('ul>', ind)
644 if ind == -1: return page
645 if page[ind - 1] == '/':
646 ul_count -= 1
647 ul_end = ind + 3
648 else:
649 ul_count += 1
650 ind += 3
651 return page[:ul_start] + page[ul_end:]
652
653 -def page_word(page, word, href):
654 '''
655 Returns a tuple of the HTML page built and the new current word
656
657 @param page: The currently active HTML page
658 @type page: str
659 @param word: The currently active word
660 @type word: str
661 @param href: The hypertext reference to be solved
662 @type href: str
663 @return: A tuple (page,word), where page is the new current HTML page
664 to be sent to the browser and
665 word is the new current word
666 @rtype: A tuple (str,str)
667 '''
668 link_type = href[0]
669 q_link = href[1:]
670 u_link = unquote_plus(q_link)
671
672 if link_type == 'M' or link_type == 'N':
673 word, u_c = u_link.split('#')
674 word,body = new_word_and_body(word)
675 return pg(word, body), word
676
677 elif link_type == 'R':
678
679
680 word,synset_keys,rel_name,u_c = u_link.split('#')
681 ind = page.find(q_link) + len(q_link) + 2
682
683
684 if page[ind:ind+3] == '<b>':
685 page = _ul_section_removed(page, ind)
686 page = page[:ind] + '<i>' + rel_name + \
687 '</i>' + page[ind + len(rel_name) + 14:]
688 return page, word
689 else:
690
691
692 end = page.find('\n', ind)
693 start = page.rfind('\n', 0, ind)
694 start = page.find('<b>', start, end)
695 if start != -1:
696 page = _ul_section_removed(page, ind)
697 end = page.find('</b>', start, end)
698 page = page[:start] + page[start+3:end] + page[end+4:]
699
700
701
702 if rel_name in implemented_rel_names:
703 ind = page.find(q_link) + len(q_link) + 2
704 ind_2 = ind + len(rel_name) + 7
705 page = page[:ind] + _bold(page[ind:ind_2]) + \
706 page[ind_2:]
707
708 ind = page.find('\n', ind) + 1
709 section = \
710 _relation_section(rel_name, word, synset_keys)
711 page = page[:ind] + section + page[ind:]
712 return page, word
713 else:
714 return None, None
715 else:
716
717
718
719
720 l_t = link_type + ':'
721 word,syns_keys,link_counter = u_link.split('#')
722 ind = page.find(q_link) + len(q_link) + 2
723
724
725 if page[ind:ind+3] == '<b>':
726 page = _ul_section_removed(page, ind)
727 page = page[:ind] + l_t + page[ind + 9:]
728 return page, word
729 else:
730
731 page = page[:ind] + _bold(l_t) + page[ind + 2:]
732
733 ind = page.find('\n', ind) + 1
734
735 if page[ind+5:].startswith(full_hyponym_cont_text):
736 page = page[0:ind+5] + \
737 page[ind+5+len(full_hyponym_cont_text):]
738 s_r = _synset_relations(word, link_type, syns_keys)
739 s_r = s_r.split('\n')[:-1]
740 s_r = [_li(sr) for sr in s_r]
741 s_r = _ul('\n' + '\n'.join(s_r) + '\n') + '\n'
742 page = page[:ind] + s_r + page[ind:]
743 return page, word
744
745
747 """
748 Return a static HTML page from the path given.
749 """
750 if path == "index_2.html":
751 return get_static_index_page(False)
752 elif path == "index.html":
753 return get_static_index_page(True)
754 elif path == "NLTK Wordnet Browser Database Info.html":
755 return "Display of Wordnet Database Statistics is not supported"
756 elif path == "start.html":
757 return get_static_start_page()
758 elif path == "upper_2.html":
759 return get_static_upper_page(False)
760 elif path == "upper.html":
761 return get_static_upper_page(True)
762 elif path == "web_help.html":
763 return get_static_web_help_page()
764 elif path == "wx_help.html":
765 return get_static_wx_help_page()
766 else:
767 return "Internal error: Path for static page '%s' is unknown" % path
768
769 f = open(path)
770 page = f.read()
771 f.close()
772 return page
773
774
776 """
777 Return the static web help page.
778 """
779 return \
780 """
781 <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">
782 <html>
783 <!-- Natural Language Toolkit: Wordnet Interface: Graphical Wordnet Browser
784 Copyright (C) 2007 - 2008 NLTK Project
785 Author: Jussi Salmela <[email protected]>
786 URL: <http://nltk.org>
787 For license information, see LICENSE.TXT -->
788 <head>
789 <meta http-equiv='Content-Type' content='text/html; charset=us-ascii'>
790 <title>NLTK Wordnet Browser display of: * Help *</title>
791 </head>
792 <body bgcolor='#F5F5F5' text='#000000'>
793 <h2>NLTK Wordnet Browser Help</h2>
794 <p>The NLTK Wordnet Browser is a tool to use in browsing the Wordnet database. It tries to behave like the Wordnet project's web browser but the difference is that the NLTK Wordnet Browser uses a local Wordnet database.
795 <p><b>You are using the Javascript client part of the NLTK Wordnet BrowseServer.</b> We assume your browser is in tab sheets enabled mode.</p>
796 <p>For background information on Wordnet, see the Wordnet project home page: <a href="http://wordnet.princeton.edu/"><b> http://wordnet.princeton.edu/</b></a>. For more information on the NLTK project, see the project home:
797 <a href="http://nltk.sourceforge.net/"><b>http://nltk.sourceforge.net/</b></a>. To get an idea of what the Wordnet version used by this browser includes choose <b>Show Database Info</b> from the <b>View</b> submenu.</p>
798 <h3>Word search</h3>
799 <p>The word to be searched is typed into the <b>New Word</b> field and the search started with Enter or by clicking the <b>Search</b> button. There is no uppercase/lowercase distinction: the search word is transformed to lowercase before the search.</p>
800 <p>In addition, the word does not have to be in base form. The browser tries to find the possible base form(s) by making certain morphological substitutions. Typing <b>fLIeS</b> as an obscure example gives one <a href="MfLIeS">this</a>. Click the previous link to see what this kind of search looks like and then come back to this page by using the <b>Alt+LeftArrow</b> key combination.</p>
801 <p>The result of a search is a display of one or more
802 <b>synsets</b> for every part of speech in which a form of the
803 search word was found to occur. A synset is a set of words
804 having the same sense or meaning. Each word in a synset that is
805 underlined is a hyperlink which can be clicked to trigger an
806 automatic search for that word.</p>
807 <p>Every synset has a hyperlink <b>S:</b> at the start of its
808 display line. Clicking that symbol shows you the name of every
809 <b>relation</b> that this synset is part of. Every relation name is a hyperlink that opens up a display for that relation. Clicking it another time closes the display again. Clicking another relation name on a line that has an opened relation closes the open relation and opens the clicked relation.</p>
810 <p>It is also possible to give two or more words or collocations to be searched at the same time separating them with a comma like this <a href="Mcheer up,clear up">cheer up,clear up</a>, for example. Click the previous link to see what this kind of search looks like and then come back to this page by using the <b>Alt+LeftArrow</b> key combination. As you could see the search result includes the synsets found in the same order than the forms were given in the search field.</p>
811 <p>
812 There are also word level (lexical) relations recorded in the Wordnet database. Opening this kind of relation displays lines with a hyperlink <b>W:</b> at their beginning. Clicking this link shows more info on the word in question.</p>
813 <h3>The Buttons</h3>
814 <p>The <b>Search</b> and <b>Help</b> buttons need no more explanation. </p>
815 <p>The <b>Show Database Info</b> button shows a collection of Wordnet database statistics.</p>
816 <p>The <b>Shutdown the Server</b> button is shown for the first client of the BrowServer program i.e. for the client that is automatically launched when the BrowServer is started but not for the succeeding clients in order to protect the server from accidental shutdowns.
817 </p></body>
818 </html>
819 """
820
822 """
823 Return static WX help page.
824 """
825 return \
826 """
827 <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">
828 <html>
829 <!-- Natural Language Toolkit: Wordnet Interface: Graphical Wordnet Browser
830 Copyright (C) 2007 - 2008 NLTK Project
831 Author: Jussi Salmela <[email protected]>
832 URL: <http://nltk.org>
833 For license information, see LICENSE.TXT -->
834 <head>
835 <meta http-equiv='Content-Type' content='text/html; charset=us-ascii'>
836 <title>NLTK Wordnet Browser display of: * Help *</title>
837 </head>
838 <body bgcolor='#F5F5F5' text='#000000'>
839 <h2>NLTK Wordnet Browser Help</h2>
840 <p>The NLTK Wordnet Browser is a tool to use in browsing the Wordnet database. It tries to behave like the Wordnet project's web browser but the difference is that the NLTK Wordnet Browser uses a local Wordnet database. The NLTK Wordnet Browser has only a part of normal browser functionality and it is <b>not</b> an Internet browser.</p>
841 <p>For background information on Wordnet, see the Wordnet project home page: <b>http://wordnet.princeton.edu/</b>. For more information on the NLTK project, see the project home: <b>http://nltk.sourceforge.net/</b>. To get an idea of what the Wordnet version used by this browser includes choose <b>Show Database Info</b> from the <b>View</b> submenu.</p>
842 <h3>The User Interface</h3>
843 <p>The user interface is a so called <b>notebook</b> interface. This
844 is familiar nowadays for almost everyone from Internet browsers,
845 for example. It consists of one or more independent pages often
846 (and here also) called <b>tabsheets</b>.</p>
847 <p>Every tabsheet contains its own search history which can be
848 browsed back and forth at will. The result of a new word search
849 will be shown on the currently active tabsheet if nothing else is
850 wanted. It is also possible to open a new tabsheet for the search
851 word given.</p>
852 <p>The position and size of the browser window as well as font size can be adjusted and the selections are retained between sessions.</p>
853 <h3>Word search</h3>
854 <p>The word to be searched is typed into the <b>Word(s):</b> field and the search started with Enter or by clicking the <b>Search the word(s)</b> button. There is no uppercase/lowercase distinction: the search word is transformed to lowercase before the search.</p>
855 <p>In addition, the word does not have to be in base form. The browser tries to find the possible base form(s) by making certain morphological substitutions. Typing <b>fLIeS</b> as an obscure example gives one <a href="MfLIeS">this</a>. Click the previous link to see what this kind of search looks like and then come back to this page by clicking the <b>Previous Page</b> button.</p>
856 <p>The result of a search is a display of one or more
857 <b>synsets</b> for every part of speech in which a form of the
858 search word was found to occur. A synset is a set of words
859 having the same sense or meaning. Each word in a synset that is
860 underlined is a hyperlink which can be clicked to trigger an
861 automatic search for that word.</p>
862 <p>Every synset has a hyperlink <b>S:</b> at the start of its
863 display line. Clicking that symbol shows you the name of every
864 <b>relation</b> that this synset is part of. Every relation name is a hyperlink that opens up a display for that relation. Clicking it another time closes the display again. Clicking another relation name on a line that has an opened relation closes the open relation and opens the clicked relation.</p>
865 <p>It is also possible to give two or more words or collocations to be searched at the same time separating them with a comma like this <a href="Mcheer up,clear up">cheer up,clear up</a>, for example. Click the previous link to see what this kind of search looks like and then come back to this page by clicking the <b>Previous Page</b> button. As you could see the search result includes the synsets found in the same order than the forms were given in the search field.</p>
866 <p>
867 There are also word level (lexical) relations recorded in the Wordnet database. Opening this kind of relation displays lines with a hyperlink <b>W:</b> at their beginning. Clicking this link shows more info on the word in question.</p>
868 <h3>Menu Structure</h3>
869 The browser has a menubar that you can use to invoke a set of
870 different operations. Most of the menu selections also have a
871 corresponding keyboard shortcut.
872 <h4>The File Menu</h4>
873 <p>Using the file menu you can <b>open</b> a previously saved NLTK
874 Wordnet Browser page. Note that only pages saved with this browser
875 can be read.</p>
876 <p>And as indicated above you can <b>save</b> a search page. The
877 resulting file is a normal HTML mode file which can be viewed,
878 printed etc. as any other HTML file.</p>
879 <p>You can also <b>print</b> a page and <b>preview</b> a page to be
880 printed. The selected printing settings are remembered during the
881 session.</p>
882 <h4>The Tabsheets Menu</h4>
883 <p>You can <b>open an empty tabsheet</b> and <b>close</b> the
884 currently active tabsheet.</p>
885 <p>When you enter a new search word in the search word field you
886 can make the search result be shown in a <b>new tabsheet</b>.</p>
887 <h4>Page History</h4>
888 You can browse the page history of the currently active tabsheet
889 either <b>forwards</b> or <b>backwards</b>. <b>Next Page</b>
890 browses towards the newer pages and <b>Previous Page</b> towards
891 the older pages.
892 <h4>The View Menu</h4>
893 <p>You can <b>increase</b>, <b>decrease</b> and <b>normalize</b>
894 the font size. The font size chosen is retained between
895 sessions.</p>
896 <p>You can choose <b>Show Database Info</b> to see the word, synset and relation counts by POS as well as one example word (as a hyperlink) for every relation&POS pair occuring.</p>
897 <p>You can view the <b>HTML source</b> of a page if you are
898 curious.</p>
899 <h4>The Help Menu</h4>
900 You can view this <b>help text</b> as you already know. The
901 <b>about</b> selection tells you something about the program.
902 <h3>The Keyboard Shortcuts</h3>
903 <p>The following keyboard shortcuts can be used to quickly launch
904 the desired operation.</p>
905 <table border="1" cellpadding="1" cellspacing="1" summary="">
906 <col align="center">
907 <col align="center">
908 <tr>
909 <th>Keyboard Shortcut</th>
910 <th>Operation</th>
911 </tr>
912 <tr>
913 <td>Ctrl+O</td>
914 <td>Open a file</td>
915 </tr>
916 <tr>
917 <td>Ctrl+S</td>
918 <td>Save current page as</td>
919 </tr>
920 <tr>
921 <td>Ctrl+P</td>
922 <td>Print current page</td>
923 </tr>
924 <tr>
925 <td>Ctrl+T</td>
926 <td>Open a new (empty) tabsheet</td>
927 </tr>
928 <tr>
929 <td>Ctrl+W</td>
930 <td>Close the current tabsheet</td>
931 </tr>
932 <tr>
933 <td>Ctrl+LinkClick</td>
934 <td>Open the link in a new unfocused tabsheet</td>
935 </tr>
936 <tr>
937 <td>Ctrl+Shift+LinkClick</td>
938 <td>Opent the link in a new focused tabsheet</td>
939 </tr>
940 <tr>
941 <td>Alt+Enter (1)</td>
942 <td>Show the word in search word field in a new tabsheet</td>
943 </tr>
944 <tr>
945 <td>Alt+LeftArrow</td>
946 <td>Previous page in page history</td>
947 </tr>
948 <tr>
949 <td>Ctrl+LeftArrow (2)</td>
950 <td>Previous page in page history</td>
951 </tr>
952 <tr>
953 <td>Alt+RightArrow</td>
954 <td>Next page in page history</td>
955 </tr>
956 <tr>
957 <td>Ctlr+RightArrow (2)</td>
958 <td>Next page in page history</td>
959 </tr>
960 <tr>
961 <td>Ctrl++/Ctrl+Numpad+/Ctrl+UpArrow (3)</td>
962 <td>Increase font size</td>
963 </tr>
964 <tr>
965 <td>Ctrl+-/Ctrl+Numpad-/Ctrl+DownArrow (3)</td>
966 <td>Decrease font size</td>
967 </tr>
968 <tr>
969 <td>Ctrl+0 (4)</td>
970 <td>Normal font size</td>
971 </tr>
972 <tr>
973 <td>Ctrl+U</td>
974 <td>Show HTML source</td>
975 </tr>
976 </table>
977 <dl>
978 <dt>(1)</dt>
979 <dd>This works only when the search word field is active i.e. the
980 caret is in it.</dd>
981 <dt>(2)</dt>
982 <dd>These are nonstandard combinations, the usual ones being
983 Alt+LeftArrow and Alt+RightArrow. These are still functional because there used to be difficulties with the standard ones earlier in the life of this program. Use these if the standard combinations do not work properly for you.</dd>
984 <dt>(3)</dt>
985 <dd>There are so many of these combinations because the usual i.e.
986 Ctrl++/Ctrl+- combinations did not work on the author's laptop and
987 the Numpad combinations were cumbersome to use. Hopefully the first
988 ones work on the computers of others.</dd>
989 <dt>(4)</dt>
990 <dd>This combination Ctrl+0 is "Ctrl+zero" not "Ctrl+ou".</dd>
991 </dl>
992 </body>
993 </html>
994 """
995
996
998 """
999 Get the static start page.
1000 """
1001
1002 return \
1003 """
1004 <!DOCTYPE html PUBLIC '-//W3C//DTD HTML 4.01//EN' 'http://www.w3.org/TR/html4/strict.dtd'>
1005 <html>
1006 <!-- Natural Language Toolkit: Wordnet Interface: Graphical Wordnet Browser
1007 Copyright (C) 2007 - 2008 NLTK Project
1008 Author: Jussi Salmela <[email protected]>
1009 URL: <http://nltk.org>
1010 For license information, see LICENSE.TXT -->
1011 <head>
1012 <meta http-equiv='Content-Type' content=
1013 'text/html; charset=us-ascii'>
1014 <title>NLTK Wordnet Browser display of: green</title>
1015 </head>
1016 <body bgcolor='#F5F5F5' text='#000000'>
1017
1018 <h3>Search Help</h3>
1019 <ul><li>The display below the line is an example of the output the browser
1020 shows you when you enter a search word. The search word was <b>green</b>.</li>
1021 <li>The search result shows for different parts of speech the <b>synsets</b>
1022 i.e. different meanings for the word.</li>
1023 <li>All underlined texts are hypertext links. There are two types of links:
1024 word links and others. Clicking a word link carries out a search for the word
1025 in the Wordnet database.</li>
1026 <li>Clicking a link of the other type opens a display section of data attached
1027 to that link. Clicking that link a second time closes the section again.</li>
1028 <li>Clicking <u>S:</u> opens a section showing the relations for that synset.</li>
1029 <li>Clicking on a relation name opens a section that displays the associated
1030 synsets.</li>
1031 <li>Type a search word in the <b>Next Word</b> field and start the search by the
1032 <b>Enter/Return</b> key or click the <b>Search</b> button.</li>
1033 </ul>
1034 <hr width='100%'>
1035 <h3>Noun</h3>
1036 <ul><li><a href="Sgreen%23N4967191%2C%2347">S:</a> (n) <b>green</b>, <a href="Mgreenness%2348">greenness</a>, <a href="Mviridity%2349">viridity</a> (green color or pigment; resembling the color of growing grass)</li>
1037 <li><a href="Sgreen%23N8615374%2C%2350">S:</a> (n) <a href="Mpark%2351">park</a>, <a href="Mcommons%2352">commons</a>, <a href="Mcommon%2353">common</a>, <b>green</b> (a piece of open land for recreational use in an urban area) <i>"they went for a walk in the park"</i></li>
1038 <li><a href="Sgreen%23N11013324%2C%2354">S:</a> (n) <b>Green</b>, <a href="MWilliam+Green%2355">William Green</a> (United States labor leader who was president of the American Federation of Labor from 1924 to 1952 and who led the struggle with the Congress of Industrial Organizations (1873-1952))</li>
1039 <li><a href="Sgreen%23N10060904%2C%2356">S:</a> (n) <b>Green</b> (an environmentalist who belongs to the Green Party)</li>
1040 <li><a href="Sgreen%23N9294066%2C%2357">S:</a> (n) <b>Green</b>, <a href="MGreen+River%2358">Green River</a> (a river that rises in western Wyoming and flows southward through Utah to become a tributary of the Colorado River)</li>
1041 <li><a href="Sgreen%23N8579780%2C%2359">S:</a> (n) <b>green</b>, <a href="Mputting+green%2360">putting green</a>, <a href="Mputting+surface%2361">putting surface</a> (an area of closely cropped grass surrounding the hole on a golf course) <i>"the ball rolled across the green and into the bunker"</i></li>
1042 <li><a href="Sgreen%23N7709333%2C%2362">S:</a> (n) <a href="Mgreens%2363">greens</a>, <b>green</b>, <a href="Mleafy+vegetable%2364">leafy vegetable</a> (any of various leafy plants or their leaves and stems eaten as vegetables)</li>
1043 <li><a href="Sgreen%23N3606572%2C%2365">S:</a> (n) <a href="MK%2366">K</a>, <a href="Mjet%2367">jet</a>, <a href="Msuper+acid%2368">super acid</a>, <a href="Mspecial+K%2369">special K</a>, <a href="Mhoney+oil%2370">honey oil</a>, <b>green</b>, <a href="Mcat+valium%2371">cat valium</a>, <a href="Msuper+C%2372">super C</a> (street names for ketamine)</li>
1044
1045 </ul>
1046 <h3>Verb</h3>
1047 <ul><li><a href="Sgreen%23V521478%2C%2373">S:</a> (v) <b>green</b> (turn or become green) <i>"The trees are greening"</i></li>
1048
1049 </ul>
1050 <h3>Adjective</h3>
1051 <ul><li><a href="Sgreen%23J375969%2C%2374">S:</a> (adj) <b>green</b>, <a href="Mgreenish%2375">greenish</a>, <a href="Mlight-green%2376">light-green</a>, <a href="Mdark-green%2377">dark-green</a> (of the color between blue and yellow in the color spectrum; similar to the color of fresh grass) <i>"a green tree"; "green fields"; "green paint"</i></li>
1052 <li><a href="Sgreen%23J3069937%2C%2378">S:</a> (adj) <b>green</b> (concerned with or supporting or in conformity with the political principles of the Green Party)</li>
1053 <li><a href="Sgreen%23J1493897%2C%2379">S:</a> (adj) <b>green</b>, <a href="Munripe%2380">unripe</a>, <a href="Munripened%2381">unripened</a>, <a href="Mimmature%2382">immature</a> (not fully developed or mature; not ripe) <i>"unripe fruit"; "fried green tomatoes"; "green wood"</i></li>
1054 <li><a href="Sgreen%23J2545257%2C%2383">S:</a> (adj) <b>green</b> (looking pale and unhealthy) <i>"you're looking green"; "green around the gills"</i></li>
1055 <li><a href="Sgreen%23J2272485%2C%2384">S:</a> (adj) <a href="Mfleeceable%2385">fleeceable</a>, <b>green</b>, <a href="Mgullible%2386">gullible</a> (naive and easily deceived or tricked) <i>"at that early age she had been gullible and in love"</i></li>
1056
1057 </ul>
1058
1059 </body>
1060 </html>
1061 """
1062
1063 -def get_static_index_page(with_shutdown):
1064 """
1065 Get the static index page.
1066 """
1067 template = \
1068 """
1069 <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Frameset//EN" "http://www.w3.org/TR/html4/frameset.dtd">
1070 <HTML>
1071 <!-- Natural Language Toolkit: Wordnet Interface: Graphical Wordnet Browser
1072 Copyright (C) 2007 - 2008 NLTK Project
1073 Author: Jussi Salmela <[email protected]>
1074 URL: <http://nltk.org>
1075 For license information, see LICENSE.TXT -->
1076 <HEAD>
1077 <TITLE>NLTK Wordnet Browser</TITLE>
1078 </HEAD>
1079
1080 <frameset rows="7%%,93%%">
1081 <frame src="%s" name="header">
1082 <frame src="start.html" name="body">
1083 </frameset>
1084 </HTML>
1085 """
1086 if with_shutdown:
1087 upper_link = "upper.html"
1088 else:
1089 upper_link = "upper_2.html"
1090
1091 return template % upper_link
1092
1093
1094 -def get_static_upper_page(with_shutdown):
1095 """
1096 Return the upper frame page,
1097
1098 If with_shutdown is True then a 'shutdown' button is also provided
1099 to shutdown the server.
1100 """
1101 template = \
1102 """
1103 <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">
1104 <html>
1105 <!-- Natural Language Toolkit: Wordnet Interface: Graphical Wordnet Browser
1106 Copyright (C) 2007 - 2008 NLTK Project
1107 Author: Jussi Salmela <[email protected]>
1108 URL: <http://nltk.org>
1109 For license information, see LICENSE.TXT -->
1110 <head>
1111 <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1" />
1112 <title>Untitled Document</title>
1113 </head>
1114 <body>
1115 <form method="GET" action="search" target="body">
1116 Current Word: <input type="text" id="currentWord" size="10" disabled>
1117 Next Word: <input type="text" id="nextWord" name="nextWord" size="10">
1118 <input name="searchButton" type="submit" value="Search">
1119 </form>
1120 <a target="body" href="web_help.html">Help</a>
1121 %s
1122
1123 </body>
1124 </html>
1125 """
1126 if with_shutdown:
1127 shutdown_link = "<a href=\"SHUTDOWN THE SERVER\">Shutdown</a>"
1128 else:
1129 shutdown_link = ""
1130
1131 return template % shutdown_link
1132