Package nltk :: Package wordnet :: Package browser :: Module browserver
[hide private]
[frames] | no frames]

Source Code for Module nltk.wordnet.browser.browserver

  1  #!/usr/bin/env python 
  2  # 
  3  # Natural Language Toolkit: Wordnet Interface: Graphical Wordnet Browser 
  4  # 
  5  # Copyright (C) 2007 - 2008 NLTK Project 
  6  # Author: Jussi Salmela <[email protected]> 
  7  #         Paul Bone <[email protected]> 
  8  # URL: <http://nltk.org> 
  9  # For license information, see LICENSE.TXT 
 10   
 11  """ 
 12  BrowServer is a server for browsing the NLTK Wordnet database It first 
 13  launches a browser client to be used for browsing and then starts 
 14  serving the requests of that and maybe other clients 
 15   
 16  Usage:: 
 17   
 18      browserver.py -h 
 19      browserver.py [-s] [-p <port>] 
 20   
 21  Options:: 
 22   
 23      -h or --help 
 24          Display this help message. 
 25   
 26      -l <file> or --log-file <file> 
 27          Logs messages to the given file, If this option is not specified 
 28          messages are silently dropped. 
 29   
 30      -p <port> or --port <port> 
 31          Run the web server on this TCP port, defaults to 8000. 
 32   
 33      -s or --server-mode 
 34          Do not start a web browser, and do not allow a user to 
 35          shotdown the server through the web interface. 
 36  """ 
 37   
 38  import os 
 39  from sys import argv 
 40  from BaseHTTPServer import HTTPServer, BaseHTTPRequestHandler 
 41  from urllib import unquote_plus 
 42  import webbrowser 
 43  import datetime 
 44  import re 
 45  import threading 
 46  import time 
 47  import getopt 
 48  import base64 
 49   
 50  from util import page_word, uniq_cntr, html_header, html_trailer, \ 
 51      get_static_index_page, get_static_page_by_path 
 52   
 53  page = None 
 54  word = None 
 55  firstClient = True 
 56   
 57  # For storing the HTML pages 
 58  viewed_pages = {} 
 59  curr_page_num = 1 
 60  # For linking the unique counters to the numbers of the stored pages 
 61  uc_to_pn = {} 
 62   
 63  uc_pat = re.compile('(%23\d+">)') 
 64   
 65  # True if we're not also running a web browser.  The value f server_mode 
 66  # gets set by demo(). 
 67  server_mode = None  
 68   
 69  # If set this is a file object for writting log messages. 
 70  logfile = None 
 71   
 72   
73 -def uc_updated_page(page, old_uc):
74 ''' 75 Returns the page with old unique counters changeed to new ones 76 ''' 77 page_parts = uc_pat.split(page) 78 page = '' 79 for part in page_parts: 80 if part.startswith('%23') and part.endswith('">'): 81 # Generate a new unique counter if this is an old counter 82 if int(part[3:-2]) < old_uc: 83 page += '%23' + str(uniq_cntr()) + '">' 84 else: 85 page += part 86 else: 87 page += part 88 return page
89
90 -class MyServerHandler(BaseHTTPRequestHandler):
91
92 - def do_HEAD(self):
93 self.send_head()
94
95 - def do_GET(self):
96 global page, word, firstClient 97 global uc_to_pn, curr_page_num, viewed_pages 98 sp = self.path[1:] 99 if unquote_plus(sp) == 'SHUTDOWN THE SERVER': 100 if server_mode: 101 page = "Server must be killed with SIGTERM." 102 type = "text/plain" 103 else: 104 print 'Server shutting down!' 105 os._exit(0) 106 107 elif sp == 'favicon.ico': 108 type = 'image/x-icon' 109 page = favicon_data() 110 111 elif sp == '': # First request. 112 type = 'text/html' 113 old_uc = uniq_cntr() # Trigger the update of old uc:s 114 if not server_mode and firstClient: 115 firstClient = False 116 page = get_static_index_page(True) 117 else: 118 page = get_static_index_page(False) 119 word = 'green' 120 121 elif sp.endswith('.html'): # Trying to fetch a HTML file 122 type = 'text/html' 123 old_uc = uniq_cntr() # Trigger the update of old uc:s 124 usp = unquote_plus(sp) 125 if usp == 'NLTK Wordnet Browser Database Info.html': 126 word = '* Database Info *' 127 if os.path.isfile(usp): 128 page = open(usp).read() 129 else: 130 page = (html_header % word) + \ 131 '<p>The database info file:'\ 132 '<p><b>' + usp + '</b>' + \ 133 '<p>was not found. Run this:' + \ 134 '<p><b>python dbinfo_html.py</b>' + \ 135 '<p>to produce it.' + html_trailer 136 else: 137 # TODO Handle files here. 138 word = sp 139 page = get_static_page_by_path(usp) 140 else: 141 type = 'text/html' 142 old_uc = uniq_cntr() # Trigger the update of old uc:s 143 144 # Handle search queries. 145 if sp.startswith("search"): 146 parts = (sp.split("?")[1]).split("&") 147 word = [p.split("=")[1] 148 for p in parts if p.startswith("nextWord")][0] 149 sp = "M%s%%23%d" % (word, 0) 150 151 uc = get_unique_counter_from_url(sp) 152 # Page lookup needs not and cannot be done for the search words 153 if uc: 154 if uc in uc_to_pn and uc_to_pn[uc] in viewed_pages: 155 page = viewed_pages[uc_to_pn[uc]] 156 page,word = page_word(page, word, sp) 157 page = uc_updated_page(page, old_uc) 158 new_uc = uniq_cntr() 159 for uc in range(old_uc, new_uc): 160 uc_to_pn[uc] = curr_page_num 161 viewed_pages[curr_page_num] = page 162 curr_page_num += 1 163 164 # Send result. 165 self.send_head(type) 166 self.wfile.write(page)
167 168
169 - def send_head(self, type=None):
170 self.send_response(200) 171 self.send_header('Content-type', type) 172 self.end_headers()
173
174 - def log_message(self, format, *args):
175 global logfile 176 177 if logfile: 178 logfile.write( 179 "%s - - [%s] %s\n" % 180 (self.address_string(), 181 self.log_date_time_string(), 182 format%args))
183 184 185 # This data was encoded with the following procedure
186 -def encode_icon():
187 f = open("favicon.ico", "rb") 188 s = f.read() 189 f.close() 190 191 def split(s): 192 if len(s) <= 72: 193 return [s] 194 else: 195 return [s[0:72]] + split(s[72:])
196 197 print split(base64.urlsafe_b64encode(s)) 198 199 200 FAVICON_BASE64_DATA = \ 201 ['AAABAAEAEBAAAAAAAABoBQAAFgAAACgAAAAQAAAAIAAAAAEACAAAAAAAAAAAAAAAAAAAAAAA', 202 'AAAAAAAAAAD___8A9___ANb3_wDO9_8AjPf_ALXv_wCc7_8AjO__AHvv_wBz7_8Aa-__AKXn', 203 '_wCc5_8AlOf_AITn_wBz5_8Aa-f_AGPn_wBa5_8Ac97_AGve_wBj3v8AWt7_AFLe_wBK3v8A', 204 'Qt7_AFrW_wBS1v8AStb_AELW_wA51v8AMdb_ACnO_wAhzv8AGM7_ABjG_wD___cA__f3APf3', 205 '9wB73vcAUtb3AErW9wAhxvcAAMb3AFLO7wAYxu8AEMbvACG95wAYvecA9-fWAHPG1gBKvdYA', 206 'Ob3WACG91gDv3s4Axt7OACm1zgCMtb0ASq29ACGlvQBStbUAUq21ADGttQA5pbUA3satAEqc', 207 'rQDWvaUAY62lAOfGnADWvZwAtbWcAJStnADGrZQAzq2MAIycjABznIwAa5yMAN61hADWrXsA', 208 'zq17AMalewCtpXsAa4x7AMaccwC9nHMAtZRzAISUcwBrjHMAzqVrALWUawCtlGsArYxrAHuE', 209 'awBre2sAY3trAHuEYwBzhGMAc3tjAGt7YwDGlFoAvYxaAGNzWgBSa1oAxpRSAK2MUgDGjEoA', 210 'vYxKAL2ESgC1hEoArYRKAIRzSgB7a0oAc2tKAGtrSgBaY0oAtYRCAK17QgCle0IApXM5AJxz', 211 'OQCcazkAjGMxAIRaMQBzWjEAa1oxAIRaKQB7ShAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA', 212 'AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA', 213 'AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA', 214 'AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA', 215 'AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA', 216 'AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA', 217 'AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA', 218 'AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA', 219 'AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA', 220 'AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA', 221 'AAAAAAAAAAAAAAAGFh4YLAYAAAAAAAAAAAAHGB4gIBYWBgAAAAAAAAAAFhgzQR45MixGMQAA', 222 'AAAAABQYTF0WbzplWQAAAABFVEgKFExdFG59eywWCwBAdHRJCgpLXRhxe3IvIiIDT2p0VAdh', 223 'fn5xbzplciAwFFNqanQ3BwoKChYYGB4gICxYanRqalRPWVRZRhMYHiAYTmlqdnZ2dnh5eX1G', 224 'FhgeFEVjaT1SVithKzg7WhMYGAsATmM9UjgwXDt2eFsIFgcAAAAAFDRDLUo-bnhZAAAAAAAA', 225 'AAgwRS1cO3Z2WgAAAAAAAAADUTZHbVJ0d0kAAAAAAAAAADFPY2pqZEgAAAAAAAAA__8AAP__', 226 'AAD__wAAsaEAAE5eAABOXgAA__4AAPv_AAD__wAA__8AAM3-AADw_wAA__8AAML-AAD__wAA', 227 'xf4='] 228 229
230 -def favicon_data():
231 """ 232 Return the data for the favicon image. 233 """ 234 return base64.urlsafe_b64decode(''.join(FAVICON_BASE64_DATA))
235 236
237 -def get_unique_counter_from_url(sp):
238 """ 239 Extract the unique counter from the URL if it has one. Otherwise return 240 null. 241 """ 242 pos = sp.rfind('%23') 243 if pos != -1: 244 return int(sp[(pos + 3):]) 245 else: 246 return None
247 248
249 -def demo(port=8000, runBrowser=True, logfilename=None):
250 """ 251 Run NLTK Wordnet Browser Server. 252 253 @param port: The port number for the server to listen on, defaults to 254 8000 255 @type port: C{int} 256 257 @param runBrowser: True to start a web browser and point it at the web 258 server. 259 @type runBrowser: C{boolean} 260 """ 261 # The webbrowser module is unpredictable, typically it blocks if it uses 262 # a console web browser, and doesn't block if it uses a GUI webbrowser, 263 # so we need to force it to have a clear correct behaviour. 264 # 265 # Normally the server should run for as long as the user wants. they 266 # should idealy be able to control this from the UI by closing the 267 # window or tab. Second best would be clicking a button to say 268 # 'Shutdown' that first shutsdown the server and closes the window or 269 # tab, or exits the text-mode browser. Both of these are unfreasable. 270 # 271 # The next best alternative is to start the server, have it close when 272 # it receives SIGTERM (default), and run the browser as well. The user 273 # may have to shutdown both programs. 274 # 275 # Since webbrowser may block, and the webserver will block, we must run 276 # them in seperate threads. 277 # 278 global server_mode, logfile 279 server_mode = not runBrowser 280 281 # Setup logging. 282 if logfilename: 283 try: 284 logfile = open(logfilename, "a", 1) # 1 means 'line buffering' 285 except IOError, e: 286 sys.stderr.write("Couldn't open %s for writing: %s", 287 logfilename, e) 288 sys.exit(1) 289 else: 290 logfile = None 291 292 # Compute URL and start web browser 293 url = 'http://localhost:' + str(port) 294 if runBrowser: 295 server_ready = threading.Event() 296 browser_thread = startBrowser(url, server_ready) 297 298 # Start the server. 299 server = HTTPServer(('', port), MyServerHandler) 300 if logfile: 301 logfile.write( 302 'NLTK Wordnet browser server running serving: %s\n' % url) 303 if runBrowser: 304 server_ready.set() 305 306 try: 307 server.serve_forever() 308 except KeyboardInterrupt: 309 pass 310 311 if runBrowser: 312 browser_thread.join()
313 314
315 -def startBrowser(url, server_ready):
316 def run(): 317 server_ready.wait() 318 time.sleep(1) # Wait a little bit more, there's still the chance of 319 # a race condition. 320 webbrowser.open(url, new = 2, autoraise = 1)
321 t = threading.Thread(target=run) 322 t.start() 323 return t 324 325
326 -def usage():
327 """ 328 Display the command line help message. 329 """ 330 print __doc__
331 332 333 if __name__ == '__main__': 334 # Parse and interpret options. 335 (opts, _) = getopt.getopt(argv[1:], "l:p:sh", 336 ["logfile=", "port=", "server-mode", "help"]) 337 port = 8000 338 server_mode = False 339 help_mode = False 340 logfilename = None 341 for (opt, value) in opts: 342 if (opt == "-l") or (opt == "--logfile"): 343 logfilename = str(value) 344 elif (opt == "-p") or (opt == "--port"): 345 port = int(value) 346 elif (opt == "-s") or (opt == "--server-mode"): 347 server_mode = True 348 elif (opt == "-h") or (opt == "--help"): 349 help_mode = True 350 351 if help_mode: 352 usage() 353 else: 354 demo(port, not server_mode, logfilename) 355