1
2
3
4
5
6
7
8
9
10 """
11 A set of functions used to interface with the external U{Mallet
12 <http://mallet.cs.umass.edu/>} machine learning package. Before
13 C{mallet} can be used, you should tell NLTK where it can find the
14 C{mallet} package, using the L{config_mallet()} function. Typical
15 usage:
16
17 >>> import nltk
18 >>> nltk.config_mallet('.../path/to/mallet')
19 """
20 __docformat__ = 'epytext en'
21
22 import os
23 import os.path
24
25 from nltk.internals import find_binary, java
26
27
28
29
30
31 _mallet_home = None
32 _mallet_classpath = None
34 """
35 Configure NLTK's interface to the C{mallet} machine learning
36 package.
37
38 @param mallet_home: The full path to the C{mallet} directory. If
39 not specified, then nltk will search the system for a
40 C{mallet} directory; and if one is not found, it will raise a
41 C{LookupError} exception.
42 @type mallet_home: C{string}
43 """
44 global _mallet_home, _mallet_classpath
45
46
47
48 mallethon_bin = find_binary(
49 'mallet', mallet_home,
50 env_vars=['MALLET', 'MALLET_HOME'],
51 binary_names=['mallethon'],
52 url='http://mallet.cs.umass.edu>')
53
54 bin_dir = os.path.split(mallethon_bin)[0]
55 _mallet_home = os.path.split(bin_dir)[0]
56
57 lib_dir = os.path.join(_mallet_home, 'lib')
58 if not os.path.isdir(lib_dir):
59 raise ValueError('While configuring mallet: directory %r '
60 'not found.' % lib_dir)
61 _mallet_classpath = ':'.join([os.path.join(lib_dir, filename)
62 for filename in sorted(os.listdir(lib_dir))
63 if filename.endswith('.jar')])
64
65
66 -def call_mallet(cmd, classpath=None, stdin=None, stdout=None, stderr=None,
67 blocking=True):
68 """
69 Call L{nltk.internals.java()} with the given command, and with the
70 classpath modified to include both C{nltk.jar} and all the C{.jar}
71 files defined by Mallet.
72
73 See L{nltk.internals.java()} for parameter and return value
74 descriptions.
75 """
76 if _mallet_classpath is None:
77 config_mallet()
78
79
80 if classpath is None:
81 classpath = _mallet_classpath
82 else:
83 classpath += ':' + _mallet_classpath
84
85 return java(cmd, classpath, stdin, stdout, stderr, blocking)
86