import shutil from shutil import copyfile copyfile(src = "../input/cleantext/cleantext.py", dst = "../working/cleantext.py") # import all our functions from cleantext import * #!pylint cleantext import pandas as pd from sklearn.feature_extraction.text import CountVectorizer training = [ " I am master of all", "I am a absolute learner" ] generalization = [ "I am absolute learner learner" ] vectorization = CountVectorizer( stop_words = "english", preprocessor = process.master_clean_text) vectorization.fit(training) build_vocab = { value:key for key , value in vectorization.vocabulary_.items() } vocab = [build_vocab[i] for i in range(len(build_vocab))] pd.DataFrame( data = vectorization.transform(generalization).toarray(), index=["generalization"], columns=vocab )