반응형

/*******************************************************************************************************************
-- Title :  [NLP] Free e-book and Sample Corpus
-- Reference : googling
-- Key word : ebook e-book sample corpus text 텍스트 이북 코퍼스 파일 file 샘플 소스 source
*******************************************************************************************************************/

Project Gutenberg
    ㅇ 죄와 벌 | Crime and Punishment
        http://www.gutenberg.org/files/2554/2554.txt

    ㅇ 모비딕 | Moby dick
        http://www.gutenberg.org/files/2701/2701-0.txt

    ㅇ 이상한 나라의 엘리스 | Alice's Adventures in Wonderland
        http://www.gutenberg.org/cache/epub/28885/pg28885.txt


■ Website Browsing
    ㅇ Reviews of a Hotel obtained Tripadvisor's Website
        https://s3-ap-south-1.amazonaws.com/av-blog-media/wp-content/uploads/2017/04/04080929/...


■ Files

US20160049961A1.txt


source = "spaCy is an open-source software library for advanced Natural-Language-Processing, " \
         "written in the programming languages Python -Cython 4.5.678. " \
         "It offers the fastest syntactic parser in the world. " \
         "The library is published under the MIT license and currently supports English and German, " \
         "as well as tokenization for Chinese and several other languages. " \
         "Unlike NLTK, which is mainly * intended for teaching and research, " \
         "spaCy focuses on providing software for production usage. As of version 11.0, " \
         "spaCy also supports deep learning workflows that allow connecting-statistical-models trained " \
         "by popular machine learning libraries like TensorFlow, Keras or Scikit-learn. " \
         "spaCy's machine learning library, Thinc, is also available as a separate open-source Python library."


출처: http://dbrang.tistory.com/ [dBRang]source = "spaCy is an open-source software library for advanced Natural-Language-Processing, " \
         "written in the programming languages Python -Cython 4.5.678. " \
         "It offers the fastest syntactic parser in the world. " \
         "The library is published under the MIT license and currently supports English and German, " \
         "as well as tokenization for Chinese and several other languages. " \
         "Unlike NLTK, which is mainly * intended for teaching and research, " \
         "spaCy focuses on providing software for production usage. As of version 11.0, " \
         "spaCy also supports deep learning workflows that allow connecting-statistical-models trained " \
         "by popular machine learning libraries like TensorFlow, Keras or Scikit-learn. " \
         "spaCy's machine learning library, Thinc, is also available as a separate open-source Python library."


출처: http://dbrang.tistory.com/ [dBRang]
source = "spaCy is an open-source software library for advanced Natural-Language-Processing, " \
         "written in the programming languages Python -Cython 4.5.678. " \
         "It offers the fastest syntactic parser in the world. " \
         "The library is published under the MIT license and currently supports English and German, " \
         "as well as tokenization for Chinese and several other languages. " \
         "Unlike NLTK, which is mainly * intended for teaching and research, " \
         "spaCy focuses on providing software for production usage. As of version 11.0, " \
         "spaCy also supports deep learning workflows that allow connecting-statistical-models trained " \
         "by popular machine learning libraries like TensorFlow, Keras or Scikit-learn. " \
         "spaCy's machine learning library, Thinc, is also available as a separate open-source Python library."


출처: http://dbrang.tistory.com/ [dBRang]
source = "spaCy is an open-source software library for advanced Natural-Language-Processing, " \
         "written in the programming languages Python -Cython 4.5.678. " \
         "It offers the fastest syntactic parser in the world. " \
         "The library is published under the MIT license and currently supports English and German, " \
         "as well as tokenization for Chinese and several other languages. " \
         "Unlike NLTK, which is mainly * intended for teaching and research, " \
         "spaCy focuses on providing software for production usage. As of version 11.0, " \
         "spaCy also supports deep learning workflows that allow connecting-statistical-models trained " \
         "by popular machine learning libraries like TensorFlow, Keras or Scikit-learn. " \
         "spaCy's machine learning library, Thinc, is also available as a separate open-source Python library."


출처: http://dbrang.tistory.com/ [dBRang]

■ Source

source = "spaCy is an open-source software library for advanced Natural-Language-Processing, " \
            "written in the programming languages Python -Cython 4.5.678. " \
            "It offers the fastest syntactic parser in the world. " \
            "The library is published under the MIT license and currently supports English and German, " \
            "as well as tokenization for Chinese and several other languages. " \
            "Unlike NLTK, which is mainly * intended for teaching and research, " \
            "spaCy focuses on providing software for production usage. As of version 11.0, " \
            "spaCy also supports deep learning workflows that allow connecting-statistical-models trained " \
            "by popular machine learning libraries like TensorFlow, Keras or Scikit-learn. " \
            "spaCy's machine learning library, Thinc, is also available as a separate open-source Python library."


반응형

+ Recent posts