반응형
/*******************************************************************************************************************
-- Title : [Py3.5] Several Classifer with Scikit-Learn, NLTK
-- Reference : pythonprogramming.net
-- Key word : nltk sklearn scikit-learn classifier classification naive bayes pickle multinomial naive vayes
nernoulli naive bayes logistic classifier stochastic gradient descent support vector classifier
linear support vector classifier svc sgd nusvc nu-svc 분류기 나이브베이즈 나이브 베이즈
서포트 벡터 베루누이
*******************************************************************************************************************/
■ Scripts
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 | from nltk.classify.scikitlearn import SklearnClassifier from sklearn.naive_bayes import MultinomialNB,BernoulliNB import pickle import nltk # https://www.pythonprogramming.net/sklearn-scikit-learn-nltk-tutorial/?completed=/pickle-classifier-save-nltk-tutorial/ # ======================================= # -- Get dataset from dump # ======================================= # Ref : dbrang.tistory.com/1239 featuresets = [] with open ('featuresets.dmp', 'rb') as fp: featuresets = pickle.load(fp) print("features[0]:", featuresets[0]) print("features[1]:", featuresets[1]) print("... features_sets", "." * 100, "\n") print(len(featuresets)) print("... len(featuresets)", "." * 100, "\n") # ======================================= # -- Naive Bayes Classifier # ======================================= # -- set training data training_set = featuresets[:1900] # -- set test data testing_set = featuresets[1900:] # ======================================= # -- Several Classifier with Scikit-Learn, NLTK # ======================================= from sklearn.linear_model import LogisticRegression, SGDClassifier from sklearn.svm import SVC, LinearSVC, NuSVC # -- Naive Bayes with NLTK Classifier NLTKNB_classifier = nltk.NaiveBayesClassifier.train(training_set) print("*** Original Naive Bayes accuracy percent:", (nltk.classify.accuracy(NLTKNB_classifier, testing_set))*100) # -- show_most_informative_features NLTKNB_classifier.show_most_informative_features(15) # -- Multinomial Naive Bayes with SKLearn Classifier SML_MNB_classifier = SklearnClassifier(MultinomialNB()) SML_MNB_classifier.train(training_set) print("*** MNB_classifier accuracy percent:", (nltk.classify.accuracy(SML_MNB_classifier, testing_set))*100) # -- Bernoulli Naive Bayes with SKLearn Classifier SKL_BNB_classifier = SklearnClassifier(BernoulliNB()) SKL_BNB_classifier.train(training_set) print("*** BernoulliNB_classifier accuracy percent:", (nltk.classify.accuracy(SKL_BNB_classifier, testing_set))*100) # -- Logistic Regression with SKLearn Classifier LogisticRegression_classifier = SklearnClassifier(LogisticRegression()) LogisticRegression_classifier.train(training_set) print("*** LogisticRegression_classifier accuracy percent:", (nltk.classify.accuracy(LogisticRegression_classifier, testing_set))*100) # -- Stochastic Gradient Descent (SGD) with SKLearn Classifier SGDClassifier_classifier = SklearnClassifier(SGDClassifier()) SGDClassifier_classifier.train(training_set) print("*** SGDClassifier_classifier accuracy percent:", (nltk.classify.accuracy(SGDClassifier_classifier, testing_set))*100) # -- Support Vector Classifier (SVC) with SKLearn Classifier SVC_classifier = SklearnClassifier(SVC()) SVC_classifier.train(training_set) print("*** SVC_classifier accuracy percent:", (nltk.classify.accuracy(SVC_classifier, testing_set))*100) # -- Linear Support Vector Classifier (L-SVC) with SKLearn Classifier LinearSVC_classifier = SklearnClassifier(LinearSVC()) LinearSVC_classifier.train(training_set) print("*** LinearSVC_classifier accuracy percent:", (nltk.classify.accuracy(LinearSVC_classifier, testing_set))*100) # -- Nu Support Vector Classifier (Nu-SVC) with SKLearn Classifier NuSVC_classifier = SklearnClassifier(NuSVC()) NuSVC_classifier.train(training_set) print("*** NuSVC_classifier accuracy percent:", (nltk.classify.accuracy(NuSVC_classifier, testing_set))*100) | cs |
■ Files
반응형