1
0
forked from mirror/misskey
mi.moris.day/src/tools/ai/categorizer.ts
2017-09-06 19:41:36 +09:00

90 lines
1.8 KiB
TypeScript

import * as fs from 'fs';
const bayes = require('bayes');
const MeCab = require('mecab-async');
import Post from '../../api/models/post';
export default class Categorizer {
classifier: any;
categorizerDbFilePath: string;
mecab: any;
constructor(categorizerDbFilePath: string, mecabCommand: string = 'mecab -d /usr/share/mecab/dic/mecab-ipadic-neologd') {
this.categorizerDbFilePath = categorizerDbFilePath;
this.mecab = new MeCab();
this.mecab.command = mecabCommand;
// BIND -----------------------------------
this.tokenizer = this.tokenizer.bind(this);
}
tokenizer(text: string) {
return this.mecab.wakachiSync(text);
}
async init() {
try {
const db = fs.readFileSync(this.categorizerDbFilePath, {
encoding: 'utf8'
});
this.classifier = bayes.fromJson(db);
this.classifier.tokenizer = this.tokenizer;
} catch(e) {
this.classifier = bayes({
tokenizer: this.tokenizer
});
// 訓練データ
const verifiedPosts = await Post.find({
is_category_verified: true
});
// 学習
verifiedPosts.forEach(post => {
this.classifier.learn(post.text, post.category);
});
this.save();
}
}
async learn(id, category) {
const post = await Post.findOne({ _id: id });
Post.update({ _id: id }, {
$set: {
category: category,
is_category_verified: true
}
});
this.classifier.learn(post.text, category);
this.save();
}
async categorize(id) {
const post = await Post.findOne({ _id: id });
const category = this.classifier.categorize(post.text);
Post.update({ _id: id }, {
$set: {
category: category
}
});
}
async test(text) {
return this.classifier.categorize(text);
}
save() {
fs.writeFileSync(this.categorizerDbFilePath, this.classifier.toJson(), {
encoding: 'utf8'
});
}
}