반응형

/*******************************************************************************************************************
-- Title : [Py3.5] Tokenizing, POS Tagging, Chunking, Lemmatizing /w TextBlob
-- Reference : textblob.readthedocs.io/en/dev/quickstart.html
-- Key word : textblob tokenizing pos tagging chunking lemmatizing sentences words tags non-phrases
                  lemmatize singularize pluralize 자연어 처리 품사 태깅 청킹 len
*******************************************************************************************************************/

■ TextBlob Script

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
from textblob import TextBlob
 
 
# ------------------------------
# -- Blob Raw Text
# ------------------------------
raw_blob = TextBlob("""Python is a high-level, general-purpose programming language.
This Programm is made by DdolI in Netherland.
Part-of-speech tags can be accessed through the tags property.
octopi went""")
 
print(raw_blob)
print("... blob_raw_text""." * 100"\n")
 
 
# ------------------------------
# -- Tokenizing
# ------------------------------
 
# -- sentence tokenizing
print(raw_blob.sentences)
print(",,, sent_tokenizing""," * 100"\n")
 
# -- word tokenizing
print(raw_blob.words)
print(",,, word_tokenizing""," * 100"\n")
 
# -- token words counting
print (len(raw_blob.words))
print(",,, word_token counting""," * 100"\n")
 
 
# ------------------------------
# -- POS Tagging and Chunking
# ------------------------------
 
# -- POS Tagging
print(raw_blob.tags)
print(";;; pos_tagging"";" * 100"\n")
 
# -- Noun Phrase Chunking
print(raw_blob.noun_phrases)
print(";;; np chunking"";" * 100"\n")
 
 
# ------------------------------
# -- Lemmatizing
# ------------------------------
 
print("복수:", raw_blob.words[16], "   단수:", raw_blob.words[6])
print("---------------------------------------")
print("단수화:", raw_blob.words[16].singularize())
print("복수화:", raw_blob.words[6].pluralize())
print("")
 
print("과거형:", raw_blob.words[-1], "   복수:", raw_blob.words[-2])
print("---------------------------------------")
print("현재형:", raw_blob.words[-1].lemmatize('v'))
print("단수형:", raw_blob.words[-2].lemmatize())  # = print (raw_blob.words[-2].lemmatize('n'))
 
print("!!! lemmatizing""!" * 100"\n")
 
 


반응형

+ Recent posts