|
|
- import shutil
-
- from shutil import copyfile
-
- copyfile(src = "../input/cleantext/cleantext.py", dst = "../working/cleantext.py")
-
- # import all our functions
- from cleantext import *
-
- #!pylint cleantext
-
- import pandas as pd
- from sklearn.feature_extraction.text import CountVectorizer
-
- training = [
- " I am master of all",
- "I am a absolute learner"
- ]
-
- generalization = [
- "I am absolute learner learner"
- ]
-
- vectorization = CountVectorizer(
- stop_words = "english",
- preprocessor = process.master_clean_text)
-
- vectorization.fit(training)
-
- build_vocab = {
- value:key
- for key , value in vectorization.vocabulary_.items()
- }
-
- vocab = [build_vocab[i] for i in range(len(build_vocab))]
-
- pd.DataFrame(
- data = vectorization.transform(generalization).toarray(),
- index=["generalization"],
- columns=vocab
- )
|