Japanese WordNet (12.1.5) - Deutschina's Tech Diary

There are seveal Japanse thesaurus, but many of them are not free of charge. Instead of them, Japanese WordNet is avaible here:
http://nlpwww.nict.go.jp/wn-ja/

First, I downloaded the file "wnjpn-all.tab" and put it under "nltk_data/wnjpn". Then crated a file named "JapaneseWordNetCorpusReader.py" with following source codes.

[code langauge="python"]
from nltk.corpus.reader.wordnet import WordNetCorpusReader

class JapaneseWordNetCorpusReader(WordNetCorpusReader):
def __init__(self, root, filename):
WordNetCorpusReader.__init__(self, root)
import codecs
f = codecs.open(filename, encoding="utf-8")
self._jword2offset = {}
for line in f:
_cells = line.strip().split('\t')
_offset_pos = _cells[0]
_word = _cells[1]
if len(_cells) > 2:
_tag = _cells[2]
_offset, _pos = _offset_pos.split('-')
self._jword2offset[_word] = {'offset': int(_offset), 'pos': _pos}

def synset(self, word):
if word in self._jword2offset:
return WordNetCorpusReader._synset_from_pos_and_offset(
self, self._jword2offset[word]['pos'], self._jword2offset[word]['offset']
)
else:
return None
|