I have the input file that looks like this (infile.txt):
a x
b y
c z
I want to implement a program that enable user to write to STDOUT or file depending on the command:
python mycode.py infile.txt outfile.txt
Will write to file.
And with this
python mycode.py infile.txt #2nd case
Will write to STDOUT.
I'm stuck with this code:
import sys
import csv
nof_args = len(sys.argv)
infile = sys.argv[1]
print nof_args
outfile = ''
if nof_args == 3:
outfile = sys.argv[2]
# for some reason infile is so large
# so we can't save it to data structure (e.g. list) for further processing
with open(infile, 'rU') as tsvfile:
tabreader = csv.reader(tsvfile, delimiter=' ')
with open(outfile, 'w') as file:
for line in tabreader:
outline = "__".join(line)
# and more processing
if nof_args == 3:
file.write(outline + "\n")
else:
print outline
file.close()
When using 2nd case it produces
Traceback (most recent call last):
File "test.py", line 18, in <module>
with open(outfile, 'w') as file:
IOError: [Errno 2] No such file or directory: ''
What's the better way to implement it?
You can try this:
import sys
if write_to_file:
out = open(file_name, 'w')
else:
out = sys.stdout
# or a one-liner:
# out = open(file_name, 'w') if write_to_file else sys.stdout
for stuff in data:
out.write(stuff)
out.flush() # cannot close stdout
# Python deals with open files automatically
You can also use this instead of out.flush():
try:
out.close()
except AttributeError:
pass
This looks a bit ugly to me, so, flush will be just well.
Related
I'm working through https://testdriven.io/developing-an-asynchronous-task-queue-in-python . I've also taken a look at sys.argv[1] meaning in script for clarification on sys.argv
From the former I have:
def save_file(filename, data):
random_str = uuid.uuid4().hex
outfile = f'{filename}_{random_str}.txt'
with open(os.path.join(OUTPUT_DIRECTORY, outfile), 'w') as outfile:
outfile.write(data)
def get_word_counts(filename):
wordcount = collections.Counter()
# get counts
with open(os.path.join(DATA_DIRECTORY, filename), 'r') as f:
for line in f:
wordcount.update(line.split())
for word in set(COMMON_WORDS):
del wordcount[word]
# save file
save_file(filename, json.dumps(dict(wordcount.most_common(20))))
# simulate long-running task
time.sleep(2)
proc = os.getpid()
print(f'Processed {filename} with process id: {proc}')
if __name__ == '__main__':
print(sys.argv, len(sys.argv))
# print(sys.argv[1], len(sys.argv))
get_word_counts(sys.argv[1])
When I run it directly with I get:
$ python tasks.py
['tasks.py'] 1
Traceback (most recent call last):
File "tasks.py", line 46, in <module>
get_word_counts(sys.argv[1])
IndexError: list index out of range
Given that you can see there is only one element in the list, why did the author write the the code in this way?
get_word_counts(sys.argv[1])
Should be
get_word_counts(sys.argv[0])
Indexes start at zero in most languages (including python)
can anyone tell me what is wrong with my code?
def count_letters(in_file, out_file):
in_file = open("in_file.txt", "r")
for line in in_file:
for x in sorted(line):
count = x.count()
out_file = open("out_file.txt", "w")
print(x, " ", count)
in_file.close()
out_file.close()
print(out_file)
it's supposed to
Takes two filenames (in_file and out_file)as arguments
Opens and reads the input file specified by in_file, and counts the number of occurrences of each letter (in a case-insensitive manner)
Writes the result in the output file specified by out_file
when I run it, it tells me that "in_file" is not defined, but I have defined it, and made an "in_file.txt."
any help would be appreciated!
You want pass "in_file.txt" as in_file variable and use it like:
in_f=open(in_file,"r")
similarly for out_file otherwise, what is the point of making a function?
But i think your error relates of not having "in_file.txt" in your current working directory, you can check with
import os
print os.getcwd()
this will show what files code can see
in_file = open(in_file, "r") # you must do the way so got an error
try this function:
def count_letters(in_file, out_file):
in_file_fd = open(in_file, "r") # open in file
out_file_fd = open(out_file, "w") # open out file
for line in in_file_fd:
for x in sorted(line):
count = line.count(x)
res = x + " " + str(count) + '\n'
print (res)
out_file_fd.write(res) # write the data into out file
in_file_fd.close() # close in_file
out_file_fd.close() # close out_file
count_letters('test.py', 'out_file')
It works for me, hope helpful.
I am quite new to Python and even newer to stdin stdout method. Nevertheless I need to make my script usable for UNIX commands, in order to make it possible for example to process 2 input files at once with my script.
This script works perfectly well with command line arguments:
newlist = []
def f1()
....
def f2(input_file):
vol_id = sys.argv[3]
for line in input_file:
if ... :
line = line.replace('abc','def')
line = line.replace('id', 'id'+vol_id)
....
newlist.append(line)
return newlist
def main():
if len(sys.argv) < 4:
print 'usage: ./myscript.py [file_in... file_out... volume_id]'
sys.exit(1)
else:
filename = sys.argv[1]
filename_out = sys.argv[2]
tree = etree.parse(filename)
extract(tree)
input_file = open(filename, 'rU')
change_class(input_file)
file_new = open(filename_out, 'w')
for x in newlist:
if '\n' in x:
x = x.replace('\n', '')
print>>file_new, x
When I tried to add stdin stdout to it, I first had a problem with reading the same input file first, and for this reason made some chages so that it would be actually open only once. Here is my modified main():
filename = sys.argv[1]
filename_out = sys.argv[2]
if filename == '-':
filename = sys.stdin
else:
input_file = open(filename, 'rU')
if filename_out == '-':
filename_out = sys.stdout
file_new = filename_out
else:
file_new = open(filename_out, 'w')
input_file = open(filename, 'rU')
tree = etree.fromstring(input_file)
extract(tree)
change_class(input_file)
for x in newlist:
if '\n' in x:
x = x.replace('\n', '')
print>>file_new, x
Then I ran my script like this:
./myscript.py - - volumeid < inputfile > outputfile
And I got this error message:
Traceback (most recent call last):
File "./myscript.py", line 191, in <module>
main()
File "./myscript.py", line 175, in main
input_file = open(filename, 'rU')
TypeError: coercing to Unicode: need string or buffer, file found
What am I doing wrong?
You are trying to use an open file object as a filename:
filename = sys.stdin
# ...
input_file = open(filename, 'rU')
You cannot re-read from sys.stdin anyway; you need to read all of the file into memory, then process it twice:
if filename == '-':
input_file = sys.stdin
else:
input_file = open(filename, 'rU')
input_data = input_file.read()
tree = etree.fromstring(input_data)
extract(tree)
change_class(input_data)
mwhere you'll have to alter change_class to handle a string, not an open file object.
I want this to recursively call the next file, listed in a manually inputted file. It is the first word listed in the file.
The current error messege is:
OSError: [Errno 22] Invalid argument: 'file1.txt\n'.
This is my current code:
import os
def crawl(fname):
infile = open(fname, 'r')
if os.stat(fname)[6]==0:
return "Visiting {}".format(fname)
infile.close()
else:
print ("Visiting {}".format(fname))
lines = infile.read().splitlines()
nextfile = lines[0].strip()
for line in lines:
crawl(nextfile)
Try:
import os
def crawl(fname):
with open(fname, "r") as infile:
print("Visiting {}".format(fname))
if os.stat(fname).st_size:
lines = infile.read().splitlines()
for line in lines:
crawl(line)
I'm pretty sure the problem is that you're getting a newline at the end of the filename you are reading from the first file. You can easily fix it, by using the strip method to remove the newline:
nextfile = lines[0].strip()
If I try executing the following code
f = file('test','rb')
fout = file('test.out','wb')
for i in range(10):
a = f.read(1)
fout.write(a)
f.close()
f = fout
f.seek(4)
print f.read(4)
Where 'test' is any arbitrary file, I get:
Traceback (most recent call last):
File "testbad.py", line 12, in <module>
print f.read(4)
IOError: [Errno 9] Bad file descriptor
If however, I change just the fout line to use a temporary file:
import tempfile
f = file('test','rb')
fout = tempfile.NamedTemporaryFile()
for i in range(10):
a = f.read(1)
fout.write(a)
f.close()
f = fout
f.seek(4)
print f.read(4)
There are no errors. Does anyone know why this is? I would have expected the first case to work, but I must be doing something wrong.
Thanks in advance for any help!
you've only opened the file fout for writing, not reading. To open for both use
fout = file('test.out','r+b')