1
2
3
4
5
6
7
8
9
10 import re
11
12 from api import *
13
15 """
16 A stemmer that uses regular expressions to identify morphological
17 affixes. Any substrings that match the regular expressions will
18 be removed.
19 """
21 """
22 Create a new regexp stemmer.
23
24 @type regexp: C{string} or C{regexp}
25 @param regexp: The regular expression that should be used to
26 identify morphological affixes.
27 @type min: int
28 @param min: The minimum length of string to stem
29 """
30
31 if not hasattr(regexp, 'pattern'):
32 regexp = re.compile(regexp)
33 self._regexp = regexp
34 self._min = min
35
36 - def stem(self, word):
37 if len(word) < self._min:
38 return word
39 else:
40 return self._regexp.sub('', word)
41
43 return '<RegexpStemmer: %r>' % self._regexp.pattern
44
58
59
60 if __name__ == '__main__': demo()
61