/*******************************************************************************************************************
-- Title : [Py3.5] Several Classifer with Scikit-Learn, NLTK
-- Reference : pythonprogramming.net
-- Key word : nltk sklearn scikit-learn classifier classification naive bayes pickle multinomial naive vayes
                  nernoulli naive bayes logistic classifier stochastic gradient descent support vector classifier
                  linear support vector classifier svc sgd nusvc nu-svc 분류기 나이브베이즈 나이브 베이즈 
                  서포트 벡터 베루누이 
*******************************************************************************************************************/

■ Scripts

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
from nltk.classify.scikitlearn import SklearnClassifier
from sklearn.naive_bayes import MultinomialNB,BernoulliNB
import pickle
import nltk
 
# https://www.pythonprogramming.net/sklearn-scikit-learn-nltk-tutorial/?completed=/pickle-classifier-save-nltk-tutorial/
 
# =======================================
# -- Get dataset from dump
# =======================================
 
featuresets = []
 
with open ('featuresets.dmp''rb') as fp:
    featuresets = pickle.load(fp)
 
print("features[0]:", featuresets[0])
print("features[1]:", featuresets[1])
print("... features_sets""." * 100"\n")
 
print(len(featuresets))
print("... len(featuresets)""." * 100"\n")
 
 
# =======================================
# -- Naive Bayes Classifier
# =======================================
 
# -- set training data
training_set = featuresets[:1900]
 
# -- set test data
testing_set = featuresets[1900:]
 
 
# =======================================
# -- Several Classifier with Scikit-Learn, NLTK
# =======================================
 
from sklearn.linear_model import LogisticRegression, SGDClassifier
from sklearn.svm import SVC, LinearSVC, NuSVC
 
# -- Naive Bayes with NLTK Classifier
NLTKNB_classifier = nltk.NaiveBayesClassifier.train(training_set)
print("*** Original Naive Bayes accuracy percent:", (nltk.classify.accuracy(NLTKNB_classifier, testing_set))*100)
 
# -- show_most_informative_features
NLTKNB_classifier.show_most_informative_features(15)
 
# -- Multinomial Naive Bayes with SKLearn Classifier
SML_MNB_classifier = SklearnClassifier(MultinomialNB())
SML_MNB_classifier.train(training_set)
print("*** MNB_classifier accuracy percent:", (nltk.classify.accuracy(SML_MNB_classifier, testing_set))*100)
 
# -- Bernoulli Naive Bayes with SKLearn Classifier
SKL_BNB_classifier = SklearnClassifier(BernoulliNB())
SKL_BNB_classifier.train(training_set)
print("*** BernoulliNB_classifier accuracy percent:", (nltk.classify.accuracy(SKL_BNB_classifier, testing_set))*100)
 
# -- Logistic Regression with SKLearn Classifier
LogisticRegression_classifier = SklearnClassifier(LogisticRegression())
LogisticRegression_classifier.train(training_set)
print("*** LogisticRegression_classifier accuracy percent:", (nltk.classify.accuracy(LogisticRegression_classifier, testing_set))*100)
 
# -- Stochastic Gradient Descent (SGD) with SKLearn Classifier
SGDClassifier_classifier = SklearnClassifier(SGDClassifier())
SGDClassifier_classifier.train(training_set)
print("*** SGDClassifier_classifier accuracy percent:", (nltk.classify.accuracy(SGDClassifier_classifier, testing_set))*100)
 
# -- Support Vector Classifier (SVC) with SKLearn Classifier
SVC_classifier = SklearnClassifier(SVC())
SVC_classifier.train(training_set)
print("*** SVC_classifier accuracy percent:", (nltk.classify.accuracy(SVC_classifier, testing_set))*100)
 
# -- Linear Support Vector Classifier (L-SVC) with SKLearn Classifier
LinearSVC_classifier = SklearnClassifier(LinearSVC())
LinearSVC_classifier.train(training_set)
print("*** LinearSVC_classifier accuracy percent:", (nltk.classify.accuracy(LinearSVC_classifier, testing_set))*100)
 
# -- Nu Support Vector Classifier (Nu-SVC) with SKLearn Classifier
NuSVC_classifier = SklearnClassifier(NuSVC())
NuSVC_classifier.train(training_set)
print("*** NuSVC_classifier accuracy percent:", (nltk.classify.accuracy(NuSVC_classifier, testing_set))*100)
cs


■ Files

featuresets.zip


저작자 표시 비영리 변경 금지
신고

+ Recent posts