Trying to remove null bytes but deleting all my text - python

So I'm trying to remove null bytes from some text.
I wrote three functions that I think do the same thing.
They all end up giving me blank files and erasing all input.
Here is the example input with a null byte:
T: 14/01/2015 22:27:05**\00**||||END_OF_RECORD <- ** so you can see it (I can see it in my ubuntu text editor)
T: 14/01/2015 22:27:05 ||||END_OF_RECORD <- what my IDE shows is a box there
Here is the code I wrote to try and fix the files, but they just end up blank.
from pathlib import Path
# Removes null bytes from the txt files
def removeNULLBytes():
for p in workspace.glob('*.csv'):
new = Path(workspace, p.name)
new = new.with_suffix('.csv')
with p.open() as infile, new.open('wb') as outfile:
fileName = infile.name
with open(fileName, 'rb') as in_file:
data = in_file.read()
# data = str(data, encoding='utf8', errors='ignore')
data = (data.replace(b'\x00', b''))
outfile.write(data)
def removeNULLs():
for p in workspace.glob('*.csv'):
new = Path(workspace, p.name)
new = new.with_suffix('.csv')
with p.open() as infile, new.open('w') as outfile:
fileName = infile.name
with open(fileName, 'r') as in_file:
data = in_file.read()
# data = str(data, encoding='utf8', errors='ignore')
data = (data.replace(u"\u0000", ""))
outfile.write(data)
def removeNull():
for p in workspace.glob('*.csv'):
new = Path(workspace, p.name)
new = new.with_suffix('.csv')
with p.open() as infile, new.open('w') as outfile:
for line in infile.read():
newline = ''.join([i if not u"\u0000" else "" for i in line])
data = (line.replace(line, newline))
outfile.writelines(data)
if __name__ == '__main__':
workspace = Path('/home/')
# removeNULLBytes()
removeNull()
# removeNULLs()
Any advice is much appreciated. Thanks!

Related

How to convert a text file to json file?

I have this ".txt" file image so I want to convert it to a JSON file using python
I've tried a lot of solutions but It didn't work because of the format of the file.
can anyone help me, please!
can I convert it so it will be easy to manipulate it?
This is my file
Teste: 89
IGUAL
{
"3C:67:8C:E7:F5:C8": ["b''", "-83"],
"64:23:15:3D:25:FC": ["b'HUAWEI-B311-25FC'", "-83"],
"98:00:6A:1D:6F:CA": ["b'WE'", "-83"],
"64:23:15:3D:25:FF": ["b''", "-83"],
"D4:6B:A6:C7:36:24": ["b'Wudi'", "-51"],
"00:1E:2A:1B:A5:74": ["b'NETGEAR'", "-54"],
"3C:67:8C:63:70:54": ["b'Vodafone_ADSL_2018'", "-33"],
"90:F6:52:67:EA:EE": ["b'Akram'", "-80"],
"04:C0:6F:1F:07:40": ["b'memo'", "-60"],
"80:7D:14:5F:A7:FC": ["b'WIFI 1'", "-49"]
}
and this is the code I tried
import json
filename = 'data_strength/dbm-2021-11-21_12-11-47.963190.txt'
dict1 = {}
with open(filename) as fh:
for line in fh:
command, description = line.strip().split(None, 10)
dict1[command] = description.strip()
out_file = open('test1.json', "w")
json.dump(dict1, out_file, indent=4, sort_key=False)
out_file.close()
The JSON structure in your file starts at the first occurrence of a left brace. Therefore, you can just do this:
import json
INPUT = 'igual.txt'
OUTPUT = 'igual.json'
with open(INPUT) as igual:
contents = igual.read()
if (idx := contents.find('{')) >= 0:
d = json.loads(contents[idx:])
with open(OUTPUT, 'w') as jout:
json.dump(d, jout, indent=4)

writing into a csv file with python

heres my little program. at the end i want to write the names and passwords
into csv file like this:
Jack,9978
Sara,1647
but i cant!? my program output is correct but when i write it into csv it goes like:
Jack9978,Sara1674
how will you fix it?
import hashlib
import csv
answer = []
usr_pas = []
with open('...', 'r') as f:
reader = csv.reader(f)
for word in reader:
usr_pas.append(word)
for i in range(999, 10000):
num = str(i)
m = hashlib.sha256()
m.update(num.encode('utf-8'))
hsh = m.hexdigest()
hash_dict = {hsh: num}
for key in list(hash_dict.items()):
for value in usr_pas:
if key[0] == value[1]:
answer.append(value[0] +','+ key[1])
file = open("...", 'w', newline='')
with file:
writer = csv.writer(file)
writer.writerow(i.strip().replace(',', '') for i in answer)
file.close()
what did i wrong!?
Try this (lines with comments are changed):
import hashlib
import csv
answer = []
usr_pas = []
with open('...', 'r') as f:
reader = csv.reader(f)
for word in reader:
usr_pas.append(word)
for i in range(999, 10000):
num = str(i)
m = hashlib.sha256()
m.update(num.encode('utf-8'))
hsh = m.hexdigest()
hash_dict = {hsh: num}
for key in list(hash_dict.items()):
for value in usr_pas:
if key[0] == value[1]:
answer.append(value[0] +','+ key[1] + '\n') #added '\n' at the end
file = open("...", 'w', newline='')
with file:
writer = csv.writer(file)
writer.writerow(i for i in answer) #removed i.replace
file.close()
I guess you want a csv file with multiple lines instead of one. If so, my suggestion is to use csv.csvwriter.writerows instead of csv.csvwriter.writerow. The latter is designed to write a single row. See the official document here. Indeed multiple lines might be created with \n manipulator, it means a single line with multiple elements that contains "new line", which seems awkward.
Since we can use the default delimiter (comma), we just need to manage each element in the line as a tuple (or a list). Answers should be added into list answer like this:
answer.append((value[0], key[1]))
while we write rows in this way:
writer.writerows(answer)
Let's put them together:
import hashlib
import csv
answer = []
usr_pas = []
with open('...', 'r') as f:
reader = csv.reader(f)
for word in reader:
usr_pas.append(word)
for i in range(999, 10000):
num = str(i)
m = hashlib.sha256()
m.update(num.encode('utf-8'))
hsh = m.hexdigest()
hash_dict = {hsh: num}
for key in list(hash_dict.items()):
for value in usr_pas:
if key[0] == value[1]:
# answer.append(value[0] +','+ key[1])
answer.append((value[0], key[1]))
file = open("...", 'w', newline='')
with file:
writer = csv.writer(file)
# writer.writerow(i.strip().replace(',', '') for i in answer)
writer.writerows(answer)
file.close()

Unable to get text file output in python 2.7

I have taken input from Numbers.txt file and want to write output in out.txt file,
can anyone guide what is going wrong.
import num2word_EN as s
text = open("C:\\Users\\eanaaks\\Desktop\\Python Practice Program\\Numbers.txt","r")
outfile = open("C:\\Users\\eanaaks\\Desktop\\Python Practice Program\\out.txt", "w")
for line in text:
line = line.rstrip()
num = int(line)
print line
x = s.to_card(num)
print (x)
outfile.write("%s\n"%(line));
outfile.close()
text.close()
Here's an improved version of your code:
import num2word_EN as s
input_file = 'C:\Users\eanaaks\Desktop\Python Practice Program\Numbers.txt'
output_file = 'C:\Users\eanaaks\Desktop\Python Practice Program\out.txt'
with open(input_file, 'r') as fin
with open(output_file, 'w') as fout:
for line in fin:
num = int(line)
print(line)
x = s.to_card(num)
print(x)
# What's the data type of x? int? string?
# This will write the original data and the processed data separated by tab.
fout.write('%s\t%s\n' % (line.rstrip(), x));

How to split text file by id in python

I have a bunch of text files containing tab separated tables. The second column contains an id number, and each file is already sorted by that id number. I want to separate each file into multiple files by the id number in column 2. Here's what I have.
readpath = 'path-to-read-file'
writepath = 'path-to-write-file'
for filename in os.listdir(readpath):
with open(readpath+filename, 'r') as fh:
lines = fh.readlines()
lastid = 0
f = open(writepath+'checkme.txt', 'w')
f.write(filename)
for line in lines:
thisid = line.split("\t")[1]
if int(thisid) <> lastid:
f.close()
f = open(writepath+thisid+'-'+filename,'w')
lastid = int(thisid)
f.write(line)
f.close()
What I get is simply a copy of all the read files with the first id number from each file in front of the new filenames. It is as if
thisid = line.split("\t")[1]
is only done once in the loop. Any clue to what is going on?
EDIT
The problem was my files used \r rather than \r\n to terminate lines. Corrected code (simply adding 'rU' when opening the read file and swapping != for <>):
readpath = 'path-to-read-file'
writepath = 'path-to-write-file'
for filename in os.listdir(readpath):
with open(readpath+filename, 'rU') as fh:
lines = fh.readlines()
lastid = 0
f = open(writepath+'checkme.txt', 'w')
f.write(filename)
for line in lines:
thisid = line.split("\t")[1]
if int(thisid) != lastid:
f.close()
f = open(writepath+thisid+'-'+filename,'w')
lastid = int(thisid)
f.write(line)
f.close()
If you're dealing with tab delimited files, then you can use the csv module, and take advantage of the fact that itertools.groupby will do the previous/current tracking of the id for you. Also utilise os.path.join to make sure your filenames end up joining correctly.
Untested:
import os
import csv
from itertools import groupby
readpath = 'path-to-read-file'
writepath = 'path-to-write-file'
for filename in os.listdir(readpath):
with open(os.path.join(readpath, filename)) as fin:
tabin = csv.reader(fin, delimiter='\t')
for file_id, rows in groupby(tabin, lambda L: L[1]):
with open(os.path.join(writepath, file_id + '-' + filename), 'w') as fout:
tabout = csv.writer(fout, delimiter='\t')
tabout.writerows(rows)

Python: Issue when trying to read and write multiple files

This script reads and writes all the individual html files in a directory. The script reiterates, highlight and write the output.The issue is, after highlighting the last instance of the search item, the script removes all the remaining contents after the last search instance in the output of each file. Any help here is appreciated.
import os
import sys
import re
source = raw_input("Enter the source files path:")
listfiles = os.listdir(source)
for f in listfiles:
filepath = os.path.join(source+'\\'+f)
infile = open(filepath, 'r+')
source_content = infile.read()
color = ('red')
regex = re.compile(r"(\b in \b)|(\b be \b)|(\b by \b)|(\b user \b)|(\bmay\b)|(\bmight\b)|(\bwill\b)|(\b's\b)|(\bdon't\b)|(\bdoesn't\b)|(\bwon't\b)|(\bsupport\b)|(\bcan't\b)|(\bkill\b)|(\betc\b)|(\b NA \b)|(\bfollow\b)|(\bhang\b)|(\bbelow\b)", re.I)
i = 0; output = ""
for m in regex.finditer(source_content):
output += "".join([source_content[i:m.start()],
"<strong><span style='color:%s'>" % color[0:],
source_content[m.start():m.end()],
"</span></strong>"])
i = m.end()
outfile = open(filepath, 'w')
outfile.seek(0, 2)
outfile.write(output)
print "\nProcess Completed!\n"
infile.close()
outfile.close()
raw_input()
After your for loop is over, you need to include whatever is left after the last match:
...
i = m.end()
output += source_content[i:]) # Here's the end of your file
outfile = open(filepath, 'w')
...

Categories