Hopefully someone can help me out. This seems like a very simple question that I just can't find the answer to. I am trying to create a tempfile, and then using this same tempfile, I'd like to write into it using the dd command. And then open that same file and time the time it takes to read the file.
So basically:
create tempfile
then, dd if=/dev/zero of=tempfile
and finally, time how long it takes to open and read tempfile
I'm not sure why, but this is the error I'm getting. TypeError: coercing to Unicode: need string or buffer, instance found. I think it's because I've got the same file open at the same time, but not sure. Any ideas?
Here's the code:
import time
import tempfile
import subprocess
import argparse
def readfile(size, block_size, path):
with tempfile.NamedTemporaryFile(prefix='iospeeds-', dir=path, delete=True) as tf:
cmd = ['dd', 'if=/dev/zero', 'of={}'.format(tf), 'bs={}'.format(block_size), 'count={}'.format(size/block_size)]
subprocess.call(cmd, stderr=subprocess.STDOUT)
start_time = time.time()
with open(tf, 'rb') as read_file:
end_time = time.time()
total_time = start_time - end_time
print total_time
return total_time
def parse_args():
parser = argparse.ArgumentParser()
parser.add_argument('--size', type=int, default=1048576)
parser.add_argument('--block-size', type=int, default=4096)
parser.add_argument('--path', default='./')
return parser.parse_args()
def main():
args=parse_args()
size = args.size
block_size = args.block_size
path = args.path
readfile(size, block_size, path)
if __name__ == "__main__":
main()
Here's the Traceback:
Traceback (most recent call last):
File "./rd.py", line 38, in <module>
main()
File "./rd.py", line 35, in main
readfile(size, block_size, path)
File "./rd.py", line 14, in readfile
with open(tf, 'rb') as read_file:
Thanks!
You're trying to open a file with a file type in the name spot, basically you're trying to do open(file, 'rb') instead of open(filename, 'rb'). Try:
with open(tf.name, 'rb') as read_file:
Related
I have a huge zip file with a large number of files. Parsing all these files takes a lot of time, so I thought about using multiprocessing to speed things up. I am not sure how to approach it, as a zipfile.ZipFile in Python is not an iterable.
I am aware that I could extract all contents from the zip file and then iterate over the list of filenames, however, I'd prefer to not have to keep extra free space to hold the extracted data and would like to operate on the ZipFile.
Maybe there is any other solution to this this problem, so I am open to suggestions.
EDIT:
Using the below code technically works, but the problem is that each time the get_content() function runs, it seems the large zip file that I have is being opened again, ultimately taking as long as 15 seconds to reach each file.
import multiprocessing
from zipfile import ZipFile
from multiprocessing import Pool
import time
path = 'zipfile.zip'
def get_file_list(zip_path):
with ZipFile(zip_path, 'r') as zipObj:
listOfiles = zipObj.namelist()
return listOfiles
def get_content(file_name):
start_time = time.time()
with ZipFile(path, 'r') as zipObject:
with zipObject.open(file_name) as file:
content = file.read()
end_time = time.time()
print(f"It took {end_time - start_time} to open this file")
return content
def parse_files():
file_list = get_file_list(path)
with Pool(multiprocessing.cpu_count()) as p:
contents = p.map(get_content, file_list)
print(contents)
parse_files()
import os
import shutil
from zipfile import ZipFile
from multiprocessing import Pool
def create_dummy_zip():
os.mkdir("dummy")
for i in range(100):
with open(f"dummy/{i}.file", "w") as f:
f.write(f"Content: {i}")
shutil.make_archive("dummy", 'zip', "dummy")
shutil.rmtree('dummy')
def delete_dummy():
try:
os.remove("dummy.zip")
shutil.rmtree('dummy')
except:
pass
def get_file_list(zip_path):
with ZipFile(zip_path, 'r') as zipObj:
listOfiles = zipObj.namelist()
return listOfiles
def get_content(file_name):
with ZipFile("dummy.zip", 'r') as zipObject:
with zipObject.open(file_name) as file:
content = file.read()
return content
if __name__ == '__main__':
try:
create_dummy_zip()
file_list = get_file_list("dummy.zip")
with Pool(5) as p:
contents = p.map(get_content, file_list)
print(contents)
delete_dummy()
except:
delete_dummy()
I have a python script that reads in data from a .csv file and uses it to make mathematical calculation on the data. When I run it, I get this error:
Traceback (most recent call last):
File "HW1_PythonTemplate.py", line 120, in <module>
print ','.join(map(str,calculate(args.data, args.i)))
File "HW1_PythonTemplate.py", line 56, in calculate
with open(file, 'r') as csvfile:
TypeError: coercing to Unicode: need string or buffer, type found
My code looks like:
import argparse
import csv
import sys
def calculate( dataFile, ithAttr):
numObj, minValue, maxValue, mean, stdev, Q1, median, Q3, IQR = [0,"inf","-inf",0,0,0,0,0,0]
rows = []
with open(file, 'r') as csvfile:
csvreader = csv.reader(csvfile)
for row in csvreader:
rows.append(row)
columniStr = [row[ithAttr-1] for row in rows]
columniFloat = []
for value in columniStr:
try:
columniFloat.append(float(value))
except ValueError:
pass
In the calculate function, everything past that is just arbitrary math.
My main looks like:
if __name__ == "__main__":
parser = argparse.ArgumentParser(description='calc')
parser.add_argument('--i', type=int,
help="ith attribute of the dataset (2 <= i <= 29)",
default=5,
choices=range(2,30),
required=True)
parser.add_argument("--data", type=str,
help="Location of the dataset file",
default="energydata_complete.csv",
required=True)
args = parser.parse_args()
print ','.join(map(str,calculate(args.data, args.i)))
with open(file
You mis-spelled dataFile.
file is the built-in Python datatype for file objects, so you're accidentally trying to open a type.
I have a folder called pdfs. I first obtain a list of the files and print them:
import ghostscript, os
from os import listdir
from os.path import isfile, join
def get_files(path):
input_files = [f for f in listdir(path) if isfile(join(path, f))]
return input_files
def pdf2jpeg(pdf_input_path, jpeg_output_path):
args = ["pdf2jpeg", # actual value doesn't matter
"-dNOPAUSE",
"-sDEVICE=jpeg",
"-dJPEGQ=95",
"-r600x600",
"-sOutputFile=" + jpeg_output_path,
pdf_input_path]
ghostscript.Ghostscript(*args)
if __name__ == '__main__':
input_files = get_files("pdfs")
# pdf2jpeg("pdfs/test1.pdf", "jpgs/test1.jpg")
for input_file in input_files:
input_file_name = str("pdfs/"+str(input_file))
output_file_name = str('jpgs/'+str(input_file).replace(" ", "_").replace("pdf", "jpg"))#split(".")[0]
print input_file_name
print output_file_name
# pdf2jpeg(input_file_name, output_file_name)
OUTPUT:
pdfs/test1 (5th copy).pdf
jpgs/test1_(5th_copy).jpg
pdfs/test1 (copy).pdf
jpgs/test1_(copy).jpg
pdfs/test1 (4th copy).pdf
jpgs/test1_(4th_copy).jpg
pdfs/test1 (3rd copy).pdf
jpgs/test1_(3rd_copy).jpg
pdfs/test1 (another copy).pdf
jpgs/test1_(another_copy).jpg
Also when i execute pdf2jpeg("pdfs/test1.pdf", "jpgs/test1.jpg") the code works and I get the converted jpg.
Now when I want to loop through the list and uncoment the last line:pdf2jpeg(input_file_name, output_file_name)
if __name__ == '__main__':
input_files = get_files("pdfs")
# pdf2jpeg("pdfs/test1.pdf", "jpgs/test1.jpg")
for input_file in input_files:
input_file_name = str("pdfs/"+str(input_file))
output_file_name = str('jpgs/'+str(input_file).replace(" ", "_").replace("pdf", "jpg"))#split(".")[0]
print input_file_name
print output_file_name
pdf2jpeg(input_file_name, output_file_name)
I GET THIS ERROR:
Traceback (most recent call last):
File "gsPdf2Jpg.py", line 28, in <module>
pdf2jpeg(input_file_name, output_file_name)
File "gsPdf2Jpg.py", line 17, in pdf2jpeg
ghostscript.Ghostscript(*args)
File "/home/trackstarz/prohealth/phenv/local/lib/python2.7/site-packages/ghostscript/__init__.py", line 157, in Ghostscript
stderr=kw.get('stderr', None))
File "/home/trackstarz/prohealth/phenv/local/lib/python2.7/site-packages/ghostscript/__init__.py", line 72, in __init__
rc = gs.init_with_args(instance, args)
File "/home/trackstarz/prohealth/phenv/local/lib/python2.7/site-packages/ghostscript/_gsprint.py", line 177, in init_with_args
raise GhostscriptError(rc)
ghostscript._gsprint.GhostscriptError: limitcheck
I went through and changed the loop to only go through individual input_files[0], input_files[1] and they work, the moment I loop them all they stop working. The only thing I can think of is that I have to clear something from the memory, or disconnect from the file. I am just taking wild guesses here.
I have the input file that looks like this (infile.txt):
a x
b y
c z
I want to implement a program that enable user to write to STDOUT or file depending on the command:
python mycode.py infile.txt outfile.txt
Will write to file.
And with this
python mycode.py infile.txt #2nd case
Will write to STDOUT.
I'm stuck with this code:
import sys
import csv
nof_args = len(sys.argv)
infile = sys.argv[1]
print nof_args
outfile = ''
if nof_args == 3:
outfile = sys.argv[2]
# for some reason infile is so large
# so we can't save it to data structure (e.g. list) for further processing
with open(infile, 'rU') as tsvfile:
tabreader = csv.reader(tsvfile, delimiter=' ')
with open(outfile, 'w') as file:
for line in tabreader:
outline = "__".join(line)
# and more processing
if nof_args == 3:
file.write(outline + "\n")
else:
print outline
file.close()
When using 2nd case it produces
Traceback (most recent call last):
File "test.py", line 18, in <module>
with open(outfile, 'w') as file:
IOError: [Errno 2] No such file or directory: ''
What's the better way to implement it?
You can try this:
import sys
if write_to_file:
out = open(file_name, 'w')
else:
out = sys.stdout
# or a one-liner:
# out = open(file_name, 'w') if write_to_file else sys.stdout
for stuff in data:
out.write(stuff)
out.flush() # cannot close stdout
# Python deals with open files automatically
You can also use this instead of out.flush():
try:
out.close()
except AttributeError:
pass
This looks a bit ugly to me, so, flush will be just well.
i'm trying to dump RAM for my virtualbox using python script but it give me an error and I just can it solve it
this my code :
import os
from struct import *
import optparse
import random
import string
machine_name = "OSC-2016"
def dump_ram():
output_file = "test.elf"
ram_file_name = "ram.bin"
dump_cmd = "vboxmanage debugvm %s dumpvmcore --filename %s" %(machine_name, output_file)
os.system(dump_cmd)
popen_cmd = "readelf --program-headers %s |grep -m1 -A1 LOAD" %(output_file)
file_info = os.popen(popen_cmd).read()
#print file_info
file_info = " ".join(file_info.split()) #remove duplicate spaces
file_info = file_info.split() #create a list by splitting on spaces
ram_start = int(file_info[1], 16)
ram_size = int(file_info[4], 16)
print "RAM SIZE is ", ram_size
ram = open(output_file, "rb").read()[ram_start:ram_start + ram_size]
ram_file_var = open(ram_file_name, "wb")
ram_file_var.write(ram)
ram_file_var.close()
def main():
dump_ram()
if __name__ == '__main__':
main()
And the error as it shown
Traceback (most recent call last):
File "C:/Users/ABC/Desktop/dumpRAM.py", line 42, in <module> main()
File "C:/Users/ABC/Desktop/dumpRAM.py", line 39, in main dump_ram()
File "C:/Users/ABC/Desktop/dumpRAM.py", line 27, in dump_ram
ram_start = int(file_info[1], 16)
IndexError: list index out of range
WHAT TO DO ?
it for my homework in operating system course and tomorrow is the final day , so I will be so helpful if you help , and i'm writing this cuz the stack overflow won't publish my question cuz it have too much code and less details and it requiring more details hhhhhhhh