# this is code for tonkenization of test and clean data . run this before applying algorithm f = open('train.json', 'r') # reaidng from file w = open('clean_train.json','w') # writing to file flag= True # flag to indicate that is we are inside " "or outside "". while True: ch=f.read(1) if ch=='"': flag= not flag if ch==' ' and flag==False: ch1='_' #if inside "" change ever sapce to _ else: ch1=ch w.write(ch1) # write to new file if not ch: break f.close() w.close() # this is code for tonkenization of test and clean data . run this before applying algorithm f = open('train.json', 'r') # reaidng from file w = open('clean_train.json','w') # writing to file flag= True # flag to indicate that is we are inside " "or outside "". while True: ch=f.read(1) if ch=='"': flag= not flag if ch==' ' and flag==False: ch1='_' #if inside "" change ever sapce to _ else: ch1=ch w.write(ch1) # write to new file if not ch: break f.close() w.close()