반응형
/*******************************************************************************************************************
-- Title : [Py3.5] Naive Bayes Classifier 생성/저장/재사용 w/ NLTK
-- Reference : https://www.pythonprogramming.net/naive-bayes-classifier-nltk-tutorial/
-- Key word : nltk pickle dump 자연어처리 덤프 나이브베이즈 나이브 베이즈 naive bayes classifier
classification 분류기 재사용 reuse
*******************************************************************************************************************/
■ Scripts
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 | import nltk from nltk.corpus import movie_reviews import pandas as pd import pickle # https://www.pythonprogramming.net/naive-bayes-classifier-nltk-tutorial/ # ======================================= # -- Get dataset from dump # ======================================= # Ref : dbrang.tistory.com/1239 featuresets = [] with open ('featuresets.dmp', 'rb') as fp: featuresets = pickle.load(fp) print(featuresets[0]) print("... features_sets", "." * 100, "\n") print(len(featuresets)) print("... len(featuresets)", "." * 100, "\n") # ======================================= # -- Naive Bayes Classifier # ======================================= # -- set training data training_set = featuresets[:1900] # -- set test data testing_set = featuresets[1900:] # -- define classifier classifier = nltk.NaiveBayesClassifier.train(training_set) # -- get accuracy print((nltk.classify.accuracy(classifier, testing_set))*100, '%') print(",,, Classifier accuracy percent", "," * 100, "\n") classifier.show_most_informative_features(50) print(",,, show_most_informative_features", "," * 100, "\n") # ======================================= # -- Save and load Classifiers with NLTK # ======================================= # -- Save classifier with pickle save_classifier = open("naivebayes.pickle","wb") pickle.dump(classifier, save_classifier) save_classifier.close() # -- Load classifier from pickle classifier_f = open("naivebayes.pickle", "rb") classifier_new = pickle.load(classifier_f) classifier_f.close() print(type(classifier_new)) print(";;; Classifier type", ";" * 100, "\n") # -- Reuse classifier print((nltk.classify.accuracy(classifier_new, featuresets))*100, '%') print(";;; New Classifier accuracy percent", ";" * 100, "\n") | cs |
■ Files
반응형