반응형
/*******************************************************************************************************************
-- Title : [Py3.5] Stemming words w/ NLTK
-- Reference : pythonprogramming.net
-- Key word : nlp nltk stemming stemmer 자연어 처리 포터 스태머 스태밍 porter stemmer stem
word_tokenize sent_tokenize word tokenize sent tokenize
*******************************************************************************************************************/
-- Python
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 | from nltk.stem import PorterStemmer from nltk.tokenize import sent_tokenize, word_tokenize # -- 포터 스태머 선언 ps = PorterStemmer() example_words = ["python", "pythoner", "pythoning", "pythoned", "pythonly"] for w in example_words: print(ps.stem(w)) print("[1] ", "*" * 100, "\n") # -- 단어 토큰에서 포터 스태머 적용 new_text = "It is important to by very pythonly while you are pythoning with python. All pythoners have pythoned poorly at least once." words = word_tokenize(new_text) for w in words: print(ps.stem(w)) print("[2] ", "&" * 100, "\n") # -- 테스트 new_text2 = "installed packages failed: installing error occurred something." words2 = word_tokenize(new_text2) for w2 in words2: print(ps.stem(w2)) print("[3] ", "$" * 100, "\n") |
반응형