| Home | Trees | Indices | Help |
|
|---|
|
|
1 # Natural Language Toolkit: POS Tag Simplification
2 #
3 # Copyright (C) 2001-2008 NLTK Project
4 # Author: Steven Bird <[email protected]>
5 # URL: <http://nltk.org>
6 # For license information, see LICENSE.TXT
7
8
9 ######################################################################
10 #{ Brown
11 ######################################################################
12
13 # http://khnt.hit.uib.no/icame/manuals/brown/INDEX.HTM
14
15 brown_mapping1 = {
16 'j': 'ADJ', 'p': 'PRO', 'm': 'MOD', 'q': 'DET',
17 'd': 'DET', 'w': 'WH', 'r': 'ADV', 'i': 'P',
18 'u': 'UH', 'e': 'EX', 'o': 'NUM', 'b': 'V',
19 'h': 'V', 'd': 'V', 'f': 'FW', 'a': 'DET', 't': 'TO',
20 'cc': 'CNJ', 'cs': 'CNJ', 'cd': 'NUM',
21 'nn': 'N', 'nr': 'N', 'np': 'NP', 'nc': 'N',
22 }
23 brown_mapping2 = {
24 'vb': 'V', 'vbd': 'VD', 'vbg': 'VG', 'vbn': 'VN'
25 }
26
28 tag = tag.lower()
29 if tag[0] in brown_mapping1:
30 return brown_mapping1[tag[0]]
31 elif tag[:2] in brown_mapping1:
32 return brown_mapping1[tag[:2]]
33 try:
34 if '-' in tag:
35 tag = '-'.split(tag)[0]
36 return brown_mapping2[tag]
37 except KeyError:
38 return tag.upper()
39
40 ######################################################################
41 #{ Wall Street Journal tags (Penn Treebank)
42 ######################################################################
43
44 wsj_mapping = {
45 '-lrb-': '(', '-rrb-': ')', '-lsb-': '(',
46 '-rsb-': ')', '-lcb-': '(', '-rcb-': ')',
47 '-none-': '', 'cc': 'CNJ', 'cd': 'NUM',
48 'dt': 'DET', 'ex': 'EX', 'fw': 'FW', # existential "there", foreign word
49 'in': 'P', 'jj': 'ADJ', 'jjr': 'ADJ',
50 'jjs': 'ADJ', 'ls': 'L', 'md': 'MOD', # list item marker
51 'nn': 'N', 'nnp': 'NP', 'nnps': 'NP',
52 'nns': 'N', 'pdt': 'DET', 'pos': '',
53 'prp': 'PRO', 'prp$': 'PRO', 'rb': 'ADV',
54 'rbr': 'ADV', 'rbs': 'ADV', 'rp': 'PRO',
55 'sym': 'S', 'to': 'TO', 'uh': 'UH',
56 'vb': 'V', 'vbd': 'VD', 'vbg': 'VG',
57 'vbn': 'VN', 'vbp': 'V', 'vbz': 'V',
58 'wdt': 'WH', 'wp': 'WH', 'wp$': 'WH',
59 'wrb': 'WH',
60 'bes': 'V', 'hvs': 'V', 'prp^vbp': 'PRO' # additions for NPS Chat corpus
61 }
62
64 if tag and tag[0] == '^':
65 tag = tag[1:]
66 try:
67 tag = wsj_mapping[tag.lower()]
68 except KeyError:
69 pass
70 return tag.upper()
71
72 indian_mapping = {
73 'nn': 'N', 'vm': 'MOD', 'jj': 'ADJ', 'nnp': 'NP',
74 'prp': 'PRO', 'prep': 'PRE', 'vaux': 'V', 'vfm': 'V',
75 'cc': 'CNJ', 'nnpc': 'NP', 'nnc': 'N', 'qc': 'QC',
76 'dem': 'DET', 'vrb': 'V', 'qfnum': 'NUM', 'rb': 'ADV',
77 'qf': 'DET', 'punc': '.', 'rp': 'PRT', 'psp': 'PSP',
78 'nst': 'N', 'nvb': 'N', 'vjj': 'V', 'neg': 'NEG',
79 'vnn': 'V', 'xc': 'XC', 'intf': 'INTF', 'nloc': 'N',
80 'jvb': 'ADJ', 'wq': 'WH', 'qw': 'WH', 'jj:?': 'ADJ',
81 '"cc': 'CNJ', 'nnp,': 'NP', 'sym\xc0\xa7\xb7': 'SYM',
82 'symc': 'SYM'}
83
85 if ':' in tag:
86 tag = ':'.split(tag)[0]
87 try:
88 tag = indian_mapping[tag.lower()]
89 except KeyError:
90 pass
91 return tag.upper()
92
93
94 ######################################################################
95 #{ Alpino tags
96 ######################################################################
97
98 alpino_mapping = {
99 'noun':'N', 'name': 'NP', 'vg': 'VG', 'punct':'.',
100 'verb':'V', 'pron': 'PRO', 'prep':'P'
101 }
102
109
110 ######################################################################
111 #{ Default tag simplification
112 ######################################################################
113
115 return tag[0].upper()
116
| Home | Trees | Indices | Help |
|
|---|
| Generated by Epydoc 3.0beta1 on Wed Aug 27 15:09:08 2008 | http://epydoc.sourceforge.net |