Content miss match - python

I am using python script to copy data of a file into other
input_file = open('blind_willie.MP3', 'rb')
contents = input_file.read()
output_file = open('f2.txt', 'wb')
output_file.write(contents)
When I open f2 using text editor I see these kind of symbols:
ÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿù`‘~ +Pg]Nñòs
Is there a way to see the binary content of the f2 file?

Yes, there is a way to see the binary content of the f2 file, and you have discovered it. Those symbols represent the binary content of the file.
If you'd like to see a human-readable interpretation of the binary content, you'll need something like a hex dump program or hex editor.
On Linux, I use the hd or od -t x1 command.
If you'd like to write your own hex dump command, you might start with one of these:
scapy hexdump()
contents.encode("hex")
http://code.activestate.com/recipes/577243-hex-dump/
http://code.activestate.com/recipes/576945/
Or you could use this code:
def hd(data):
""" str --> hex dump """
def printable(c):
import string
return c in string.printable and not c.isspace()
result = ""
for i in range(0, len(data), 16):
line = data[i:i+16]
result += '{0:05x} '.format(i)
result += ' '.join(c.encode("hex") for c in line)
result += " " * (50-len(line)*3)
result += ''.join(c if printable(c) else '.' for c in line)
result += "\n"
return result
input_file = open('blind_willie.MP3', 'rb')
contents = input_file.read()
output_file = open('f2.txt', 'wb')
output_file.write(hd(contents))

Related

Problem with not printing entire Hexadecimal for input file ... missing partial header

I'm new to Python, so any assistance is greatly appreciated.
I selected a PNG image and noticed the hexadecimal doesn't include the full PNG header
code:
link to photo beginning hexadecimal
output beginning hexadecimal with missing header hex
import sys
from tkinter import filedialog as fd
# Converts file to a binary .bin file
def binary_convert(filex):
f = open(filex, 'rb')
for b in f:
# creates and prints to Output.txt (need to change to temp)
sys.stdout = open('OutputX.bin', 'w')
bytes = 0
line = []
fileContents = f.read()
for b in fileContents:
bytes = bytes + 1
line.append(b)
print("{0:0{1}x}".format(b, 2), end=" ")
if bytes % 16 == 0:
print("", end="")
line = []
print("")
# Main Program
# Open file via Windows Explorer
file1 = fd.askopenfilename()
# Converts the one files to binary
binary_convert(file1)

Removing phrases from a file based on another file (Python)

How do I do this in python?
badphrases.txt contains
Go away
Don't do that
Stop it
allphrases.txt contains
I don't know why you do that. Go away.
I was wondering what you were doing.
You seem nice
I want allphrases.txt to be clean of the lines in badphrases.txt.
It's trivial in bash
cat badfiles.txt | while read b
do
cat allphrases.txt | grep -v "$b" > tmp
cat tmp > allphrases.txt
done
Oh, you thought I hadn't looked or tried. I searched for over and hour.
Here's my code:
# Files
ttv = "/tmp/tv.dat"
tmp = "/tmp/tempfile"
bad = "/tmp/badshows"
badfiles already exists
...code right here creates ttv
# Function grep_v
def grep_v(f,str):
file = open(f, "r")
for line in file:
if line in str:
return True
return False
t = open(tmp, 'w')
tfile = open(ttv, "r")
for line in tfile:
if not grep_v(bad,line):
t.write(line)
tfile.close
t.close
os.rename(tmp, ttv)
First google how to read a file in python:
you will probably get something like this: How do I read a file line-by-line into a list?
Use this to read both the files in lists
with open('badphrases.txt') as f:
content = f.readlines()
badphrases = [x.strip() for x in content]
with open('allphrases.txt') as f:
content = f.readlines()
allphrases = [x.strip() for x in content]
Now you have both the content in lists.
Iterate over allphrases and check if phrases from badphrases are present in it.
At this point you might consider google :
how to iterate over a list python
how to check if string present in another string python
Take the code from those places and built a brute-force algo like this:
for line in allphrases:
flag = True
for badphrase in badphrases:
if badphrase in line:
flag = False
break
if flag:
print(line)
If you can understand this code then you will notice you need to replace print with output to file:
Now google how to print to file python.
Then think about how to improve the algorithm. All the best.
UPDATE:
#COLDSPEED suggested you can simple google
- how to replace lines in a file in python:
You might get something like this: Search and replace a line in a file in Python
Which also works.
Solution not too bad.
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import feedparser, os, re
# Files
h = os.environ['HOME']
ttv = h + "/WEB/Shows/tv.dat"
old = h + "/WEB/Shows/old.dat"
moo = h + "/WEB/Shows/moo.dat"
tmp = h + "/WEB/Shows/tempfile"
bad = h + "/WEB/Shows/badshows"
# Function not_present
def not_present(f,str):
file = open(f, "r")
for line in file:
if str in line:
return False
return True
# Sources (shortened)
sources = ['http://predb.me/?cats=tv&rss=1']
# Grab all the feeds and put them into ttv and old
k = open(old, 'a')
f = open(ttv, 'a')
for h in sources:
d = feedparser.parse(h)
for post in d.entries:
if not_present(old,post.link):
f.write(post.title + "|" + post.link + "\n")
k.write(post.title + "|" + post.link + "\n")
f.close
k.close
# Remove shows without [Ss][0-9] and put them in moo
m = open(moo, 'a')
t = open(tmp, 'w')
file = open(ttv, "r")
for line in file:
if re.search(r's[0-9]', line, re.I) is None:
m.write(line)
# print("moo", line)
else:
t.write(line)
# print("tmp", line)
t.close
m.close
os.rename(tmp, ttv)
# Remove badshows
t = open(tmp, 'w')
with open(bad) as f:
content = f.readlines()
bap = [x.strip() for x in content]
with open(ttv) as f:
content = f.readlines()
all = [x.strip() for x in content]
for line in all:
flag = True
for b in bap:
if b in line:
flag = False
break
if flag:
t.write(line + "\n")
t.close
os.rename(tmp, ttv)

replace line if found or append - python

I have text that is key-value pairs separated by '='. I would like to replace the line if the key matches. if not, i would like to append it at the bottom. I've tried several ways, including:
def split_command_key_and_value(command):
if '=' in command:
command2 = command.split('=')
return command2
def test(command, path):
command2 = split_command_key_and_value(command)
pattern = command2[0]
myfile = open(path,'r') # open file handle for read
# use r'', you don't need to replace '\' with '/'
result = open(path, 'w') # open file handle for write
for line in myfile:
line = line.strip() # it's always a good behave to strip what you read from files
if pattern in line:
line = command # if match, replace line
result.write(line) # write every line
myfile.close() # don't forget to close file handle
result.close()
I know the above is just to replace text, but it deletes the text in the file, and I can't see why. Could someone point me in the right direction?
Thanks
Update:
I'm almost there, but some of my lines have similar keys, so mutiple lines are matching when only 1 should. I've tried to incorporate a regex boundary in my loop with no luck. My code is below. Does anyone have a suggestion?
There is some text in the file that isn't key-value, so I would like to skip that.
def modify(self, name, value):
comb = name + ' ' + '=' + ' ' + value + '\n'
with open('/file/', 'w') as tmpstream:
with open('/file/', 'r') as stream:
for line in stream:
if setting_name in line:
tmpstream.write(comb)
else:
tmpstream.write(line)
I think I got it. See code below.
def modify(self, name, value):
comb = name + ' ' + '=' + ' ' + value + '\n'
mylist = []
with open('/file/', 'w') as tmpstream:
with open('/file/', 'r') as stream:
for line in stream:
a = line.split()
b = re.compile('\\b'+name+'\\b')
if len(a) > 0:
if b.search(a[0]):
tmpstream.write(comb)
else:
tmpstream.write(line)
I spoke too soon. It stops at the key-value I provide. So, it only writes one line, and doesn't write the lines that don't match.
def modify(name, value):
comb = name + ' ' + '=' + ' ' + value + '\n'
mylist = []
with open('/file1', 'w') as tmpstream:
with open('/file2', 'r') as stream:
for line in stream:
a = line.split()
b = re.compile('\\b'+name+'\\b')
if len(a) > 0:
if b.search(a[0]):
tmpstream.write(comb)
else:
tmpstream.write(line)
Can anyone see the issue?
Because when you open file for writing
result = open(path, 'w') # open file handle for write
you just erase it content. Try to write in different file and after all work done replace old file with new one. Or read all data into memory and then process it and write to file.
with open(path) as f:
data = f.read()
with open(path, 'w') as f:
for l in data:
# make job here
first of all you are reading an writing the same file ...
you could first read it all and the write line by line
with open(path,'r') as f:
myfile = f.read() # read everything in the variable "myfile"
result = open(path, 'w') # open file handle for write
for line in myfile.splitlines(): # process the original file content 1 line at a time
# as before
I strongly recommend reading python's documentation on how to read and write files.
If you open an existing file in write-mode open(path, 'w'), its content will be erased:
mode can be (...) 'w' for only writing (an existing file with the same name will be erased)
To replace a line in python you can have a look at this: Search and replace a line in a file in Python
Here is one the solutions provided there adapted to your context (tested for python3):
from tempfile import mkstemp
from shutil import move
from os import close
def test(filepath, command):
# Split command into key/value
key, _ = command.split('=')
matched_key = False
# Create a temporary file
fh, tmp_absolute_path = mkstemp()
with open(tmp_absolute_path, 'w') as tmp_stream:
with open(filepath, 'r') as stream:
for line in stream:
if key in line:
matched_key = True
tmp_stream.write(command + '\n')
else:
tmp_stream.write(line)
if not matched_key:
tmp_stream.write(command + '\n')
close(fh)
move(tmp_absolute_path, filepath)
Note that with the code above every line that matches key (key=blob or blob=key) will be replaced.

Save output from biopython object into a file?

Here i have a code written to extract "locus_tag" of gene using "id". How can i save the output from this into a file in a tab seperated format????code adopted and modified https://www.biostars.org/p/110284/
from Bio import SeqIO
foo = open("geneid.txt")
lines = foo.read().splitlines()
genbank_file = open("example.gbk")
for record in SeqIO.parse(genbank_file, "genbank"):
for f in record.features:
if f.type == "CDS" and "protein_id" in f.qualifiers:
protein_id = f.qualifiers["protein_id"][0]
if protein_id in lines:
print f.qualifiers["protein_id"][0],f.qualifiers["locus_tag"][0]
Try adding something like this -- but you will need to make certain the indentations are correct with the code that you have already written.
with open(your_outputFileName, 'w') as outputFile:
string = '\t'.join([f.qualifiers['protein_id'][0],f.qualifiers['locus_tag'][0]])
outputFile.write(string + '\n')
You should also consider opening your initial file using "with". This will automatically close the file when you are done with it -- otherwise -- be certain to close the file (e.g., foo.close()).
for record in SeqIO.parse(genbank_file, 'genbank'):
for f in record.features:
if f.type == 'CDS' and 'protein_id' in f.qualifiers:
protein_id = f.qualifiers['protein_id'][0]
if protein_id in lines:
print f.qualifiers['protein_id'][0],f.qualifiers['locus_tag'][0]
with open('your_outputFileName', 'w') as outputFile:
string = '\t'.join([f.qualifiers['protein_id'][0],f.qualifiers['locus_tag'][0]]) + '\n'
outputFile.write(string)

Python: Issue when trying to read and write multiple files

This script reads and writes all the individual html files in a directory. The script reiterates, highlight and write the output.The issue is, after highlighting the last instance of the search item, the script removes all the remaining contents after the last search instance in the output of each file. Any help here is appreciated.
import os
import sys
import re
source = raw_input("Enter the source files path:")
listfiles = os.listdir(source)
for f in listfiles:
filepath = os.path.join(source+'\\'+f)
infile = open(filepath, 'r+')
source_content = infile.read()
color = ('red')
regex = re.compile(r"(\b in \b)|(\b be \b)|(\b by \b)|(\b user \b)|(\bmay\b)|(\bmight\b)|(\bwill\b)|(\b's\b)|(\bdon't\b)|(\bdoesn't\b)|(\bwon't\b)|(\bsupport\b)|(\bcan't\b)|(\bkill\b)|(\betc\b)|(\b NA \b)|(\bfollow\b)|(\bhang\b)|(\bbelow\b)", re.I)
i = 0; output = ""
for m in regex.finditer(source_content):
output += "".join([source_content[i:m.start()],
"<strong><span style='color:%s'>" % color[0:],
source_content[m.start():m.end()],
"</span></strong>"])
i = m.end()
outfile = open(filepath, 'w')
outfile.seek(0, 2)
outfile.write(output)
print "\nProcess Completed!\n"
infile.close()
outfile.close()
raw_input()
After your for loop is over, you need to include whatever is left after the last match:
...
i = m.end()
output += source_content[i:]) # Here's the end of your file
outfile = open(filepath, 'w')
...

Categories