반응형
/*******************************************************************************************************************
-- Title : [Py3.5] Tokenizing, POS Tagging, Chunking, Lemmatizing /w TextBlob
-- Reference : textblob.readthedocs.io/en/dev/quickstart.html
-- Key word : textblob tokenizing pos tagging chunking lemmatizing sentences words tags non-phrases
lemmatize singularize pluralize 자연어 처리 품사 태깅 청킹 len
*******************************************************************************************************************/
■ TextBlob Script
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 | from textblob import TextBlob # TextBlob API : textblob.readthedocs.io/en/dev/api_reference.html#textblob.blob.TextBlob.noun_phrases # ------------------------------ # -- Blob Raw Text # ------------------------------ raw_blob = TextBlob("""Python is a high-level, general-purpose programming language. This Programm is made by DdolI in Netherland. Part-of-speech tags can be accessed through the tags property. octopi went""") print(raw_blob) print("... blob_raw_text", "." * 100, "\n") # ------------------------------ # -- Tokenizing # ------------------------------ # -- sentence tokenizing print(raw_blob.sentences) print(",,, sent_tokenizing", "," * 100, "\n") # -- word tokenizing print(raw_blob.words) print(",,, word_tokenizing", "," * 100, "\n") # -- token words counting print (len(raw_blob.words)) print(",,, word_token counting", "," * 100, "\n") # ------------------------------ # -- POS Tagging and Chunking # ------------------------------ # -- POS Tagging print(raw_blob.tags) print(";;; pos_tagging", ";" * 100, "\n") # -- Noun Phrase Chunking print(raw_blob.noun_phrases) print(";;; np chunking", ";" * 100, "\n") # ------------------------------ # -- Lemmatizing # ------------------------------ print("복수:", raw_blob.words[16], " 단수:", raw_blob.words[6]) print("---------------------------------------") print("단수화:", raw_blob.words[16].singularize()) print("복수화:", raw_blob.words[6].pluralize()) print("") print("과거형:", raw_blob.words[-1], " 복수:", raw_blob.words[-2]) print("---------------------------------------") print("현재형:", raw_blob.words[-1].lemmatize('v')) print("단수형:", raw_blob.words[-2].lemmatize()) # = print (raw_blob.words[-2].lemmatize('n')) print("!!! lemmatizing", "!" * 100, "\n") |
반응형