/*******************************************************************************************************************
-- Title : [Py3.5] Naive Bayes Classifier 생성/저장/재사용 w/ NLTK
-- Reference : https://www.pythonprogramming.net/naive-bayes-classifier-nltk-tutorial/
-- Key word : nltk pickle dump 자연어처리 덤프 나이브베이즈 나이브 베이즈 naive bayes classifier
                  classification 분류기 재사용 reuse 
*******************************************************************************************************************/

■ Scripts

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
import nltk
from nltk.corpus import movie_reviews
import pandas as pd
import pickle
 
# https://www.pythonprogramming.net/naive-bayes-classifier-nltk-tutorial/
 
# =======================================
# -- Get dataset from dump
# =======================================
 
featuresets = []
 
with open ('featuresets.dmp''rb') as fp:
    featuresets = pickle.load(fp)
 
print(featuresets[0])
print("... features_sets""." * 100"\n")
 
print(len(featuresets))
print("... len(featuresets)""." * 100"\n")
 
 
# =======================================
# -- Naive Bayes Classifier
# =======================================
 
# -- set training data
training_set = featuresets[:1900]
 
# -- set test data
testing_set = featuresets[1900:]
 
# -- define classifier
classifier = nltk.NaiveBayesClassifier.train(training_set)
 
# -- get accuracy
print((nltk.classify.accuracy(classifier, testing_set))*100'%')
print(",,, Classifier accuracy percent""," * 100"\n")
 
classifier.show_most_informative_features(50)
print(",,, show_most_informative_features""," * 100"\n")
 
 
# =======================================
# -- Save and load Classifiers with NLTK
# =======================================
 
# -- Save classifier with pickle
save_classifier = open("naivebayes.pickle","wb")
pickle.dump(classifier, save_classifier)
save_classifier.close()
 
# -- Load classifier from pickle
classifier_f = open("naivebayes.pickle""rb")
classifier_new = pickle.load(classifier_f)
classifier_f.close()
 
print(type(classifier_new))
print(";;; Classifier type"";" * 100"\n")
 
# -- Reuse classifier
print((nltk.classify.accuracy(classifier_new, featuresets))*100'%')
print(";;; New Classifier accuracy percent"";" * 100"\n")
cs


■ Files

naivebayes.pickle

featuresets.zip



저작자 표시 비영리 변경 금지
신고

+ Recent posts

티스토리 툴바