Package nltk :: Package classify :: Module mallet
[hide private]
[frames] | no frames]

Source Code for Module nltk.classify.mallet

 1  # Natural Language Toolkit: Interface to Mallet Machine Learning Package 
 2  # 
 3  # Copyright (C) 2008 NLTK Project 
 4  # Author: Edward Loper <[email protected]> 
 5  # URL: <http://nltk.org> 
 6  # For license information, see LICENSE.TXT 
 7  # 
 8  # $Id: naivebayes.py 2063 2004-07-17 21:02:24Z edloper $ 
 9   
10  """ 
11  A set of functions used to interface with the external U{Mallet 
12  <http://mallet.cs.umass.edu/>} machine learning package.  Before 
13  C{mallet} can be used, you should tell NLTK where it can find the 
14  C{mallet} package, using the L{config_mallet()} function.  Typical 
15  usage: 
16   
17      >>> import nltk 
18      >>> nltk.config_mallet('.../path/to/mallet') 
19  """ 
20  __docformat__ = 'epytext en' 
21   
22  import os 
23  import os.path 
24   
25  from nltk.internals import find_binary, java 
26   
27  ###################################################################### 
28  #{ Configuration 
29  ###################################################################### 
30   
31  _mallet_home = None 
32  _mallet_classpath = None 
33 -def config_mallet(mallet_home=None):
34 """ 35 Configure NLTK's interface to the C{mallet} machine learning 36 package. 37 38 @param mallet_home: The full path to the C{mallet} directory. If 39 not specified, then nltk will search the system for a 40 C{mallet} directory; and if one is not found, it will raise a 41 C{LookupError} exception. 42 @type mallet_home: C{string} 43 """ 44 global _mallet_home, _mallet_classpath 45 46 # We don't actually care about this binary -- we just use it to 47 # make sure we've found the right directory. 48 mallethon_bin = find_binary( 49 'mallet', mallet_home, 50 env_vars=['MALLET', 'MALLET_HOME'], 51 binary_names=['mallethon'], 52 url='http://mallet.cs.umass.edu>') 53 # Record the location where mallet lives. 54 bin_dir = os.path.split(mallethon_bin)[0] 55 _mallet_home = os.path.split(bin_dir)[0] 56 # Construct a classpath for using mallet. 57 lib_dir = os.path.join(_mallet_home, 'lib') 58 if not os.path.isdir(lib_dir): 59 raise ValueError('While configuring mallet: directory %r ' 60 'not found.' % lib_dir) 61 _mallet_classpath = ':'.join([os.path.join(lib_dir, filename) 62 for filename in sorted(os.listdir(lib_dir)) 63 if filename.endswith('.jar')])
64 65
66 -def call_mallet(cmd, classpath=None, stdin=None, stdout=None, stderr=None, 67 blocking=True):
68 """ 69 Call L{nltk.internals.java()} with the given command, and with the 70 classpath modified to include both C{nltk.jar} and all the C{.jar} 71 files defined by Mallet. 72 73 See L{nltk.internals.java()} for parameter and return value 74 descriptions. 75 """ 76 if _mallet_classpath is None: 77 config_mallet() 78 79 # Set up the classpath 80 if classpath is None: 81 classpath = _mallet_classpath 82 else: 83 classpath += ':' + _mallet_classpath 84 # Delegate to java() 85 return java(cmd, classpath, stdin, stdout, stderr, blocking)
86