jrtechs
/
jrtechs-RandomScripts
mirror of https://github.com/jrtechs/RandomScripts.git


import re as clear
class process():    def master_clean_text(text):            #clean up all the html tags            text = clear.sub(r'<.*?>','',text)            #remove the unwanted punctation chars
            text = clear.sub(r"\\","",text)            text = clear.sub(r"\'","",text)            text = clear.sub(r"\"","",text)
            # coversion to lowercase to remove complexity            text = text.strip().lower()
            #removing unwanted expressions
            unwanted = '!"\'#$%&()*+,-./:;<=>?@[\\]^_`{|}~\t\n'
            convert = dict((c," ") for c in unwanted)
            # str.maketrans() --->> creates a one to one mapping of a character to its translation/replacement.            mapping_trans = str.maketrans(convert)
            text = text.translate(mapping_trans)
            return text    #master_clean_text("<a> Say youre scrapping a text from you'r website !! WEll it might be swap CASE or unevened you wanna remove all the punctation's into separate WOrd !!!!</a>").split()