I wrote a class to deal with large files and I want to make a "write" method for the class so that I can easily make changes to the data in the file and then write out a new file.
What I want to be able do is:
1.) Read in the original file
sources = Catalog(<filename>)
2.) Make changes on the data contained in the file
for source in sources:
source['blah1'] = source['blah1'] + 4
3.) Write out the updated value to a new file
sources.catalog_write(<new_filename>)
To this end I wrote a fairly straightforward generator,
class Catalog(object):
def __init__(self, fname):
self.data = open(fname, 'r')
self.header = ['blah1', 'blah2', 'blah3']
def next(self):
line = self.data.readline()
line = line.lstrip()
if line == "":
self.data.close()
raise StopIteration()
cols = line.split()
if len(cols) != len(self.header):
print "Input catalog is not valid."
raise StopIteration()
for element, col in zip(self.header, cols):
self.__dict__.update({element:float(col)})
return self.__dict__.copy()
def __iter__(self):
return self
This is my attempt at a write method:
def catalog_write(self, outname):
with open(outname, "w") as out:
out.write(" ".join(self.header) + "\n")
for source in self:
out.write(" ".join(map(str, source)) + "\n")
But I get the following error when I try to call that class method,
File "/Catalogs.py", line 53, in catalog_write
for source in self:
File "/Catalogs.py", line 27, in next
line = self.data.readline()
ValueError: I/O operation on closed file
I realize that this is because generators are generally a one time deal but I know that there are workarounds to this (like this question and this post but I'm not sure what the best way to do this is. These files are quite large and I'd like their read in and use to be as efficient as possible (both time-wise and memory-wise). Is there a pythonic way to do this?
Assumptions made:
Input File: [ infile ]
1.2 3.4 5.6
4.5 6.7 8.9
Usage:
>>> a = Catalog('infile')
>>> a.catalog_write('outfile')
Now Output File: [ outfile ]
blah1 blah2 blah3
1.2 3.4 5.6
4.5 6.7 8.9
Writing it again to another file: [ outfile2 ]
>>> a.catalog_write('outfile2')
Now Output File: [ outfile2 ]
blah1 blah2 blah3
1.2 3.4 5.6
4.5 6.7 8.9
So from what you have posted, looks like you need to reopen your data [ Assuming it is the file object with file name as self.fname ].
Modify your __init__ to save the fname as an attribute
Create a data object initially [ I am not opening it at __init__ stage, so that you could open and close when needed all inside your next() method ] I have just created the data as an object so that it can have an attribute closed like a file object, so that you could check whether self.data.closed is True and reopen the same from inside your next() method and read from the same.
def __init__(self, fname):
self.fname = fname
self.data = object()
self.data = lambda: None
self.data.closed = True
self.header = ['blah1', 'blah2', 'blah3']
Now the next method is modified as follows :
def next(self):
if self.data.closed:
self.data = open(self.fname, "r")
line = self.data.readline()
line = line.lstrip()
if line == "":
if not self.data.closed:
self.data.close()
raise StopIteration()
cols = line.split()
if len(cols) != len(self.header):
print "Input catalog is not valid."
if not self.data.closed:
self.data.close()
raise StopIteration()
for element, col in zip(self.header, cols):
self.__dict__.update({element:float(col)})
return self.__dict__.copy()
Your catalog_write method should be as follows :
Note that any modifications to data must be done within the for loop as shown.
def catalog_write(self, outname):
with open(outname, "w") as out:
out.write(" ".join(self.header) + "\n")
for source in self:
source['blah1'] = 444 # Data modified.
out.write(" ".join(map(str, [source[self.header[i]] for i in range(len(self.header)) ])) + "\n")
I assumed that you want the updated values of the headers written as a column in the outname file.
Related
As the title says, I made a file editing program with python.
Here is the code that I'm have a problem with:
#fileEditing.py
def fileError(file):
raise OSError("file {} does not exist".format(file))
class AccessFile():
def fileExists(self, file):
import os
return bool(os.path.exists(file))
def filecreate(self, file):
if not self.fileExists(file):
with open(file, "w") as f:
f.close()
else: raise OSError("file {} already exists".format(file))
def filedelete(self, file):
import os
if self.fileExists(file):
os.remove(file)
else: fileError(file)
def fileread(self, file):
#check if file exists
if self.fileExists(file):
#detect length of file
with open(file, "r") as f:
line = " "
x = 0
while line != "":
line = f.readline()
x += 1
#piece lines together in a list
filelines = []
with open(file, "r") as f:
for i in range(x - 1):
filelines.append(str(f.readline()))
#return a tuple
return tuple(filelines)
else: fileError(file)
def filewrite(self, file, line, text):
''' BUG: apparently this either overwrites the line its writing or appends
to the line its writing... make up your mind!'''
if self.fileExists(file):
#get file contents
filelines = list(self.fileread(file))
#see if line parameter is out of range or not
try:
filelines[line] = text
except IndexError:
for i in range(line - len(filelines)):
filelines.append("")
filelines.append(str(text) + "\n")
#apply changes
with open(file, "w") as f:
f.write("") #delete contents
with open(file, "w") as f:
for l in filelines:
f.write(l)
else: fileError(file)
def fileoverwrite(self, file, data):
#if there is no file to delete, it will make a new one
try:
self.filedelete(file)
except:
pass
self.filecreate(file)
x = 0
for line in data:
print(line)
self.filewrite(file, x, line)
x += 1
accessfile = AccessFile()
The bug is in the filewrite(self, file, line, text) function. When called, it either writes a new line (which is what I want it to do), appends to the line its supposed to replace, or just doesn't write any lines at all.
Say I want to write a python file with this program:
#pytesting.py
from fileEditing import *
file = "/Users/ashton/Desktop/Atom/Python/FileEditing/FileManager.py"
data = [
"from fileEditing import *",
"",
"class FileEditing():",
" def __init__(options, immutable_files):",
" self.options, self.immutable_files = options, immutable_files",
" ",
" def prompt():",
" ",
"",
"while True:",
" pass"
]
accessfile.fileoverwrite(file, data)
When I run it, it makes a file with accessfile.fileoverwrite(file, data), like its supposed to.
But thats where things get whacky.
(FileManager.py below)
from fileEditing import *
class FileEditing():
def __init__(options, immutable_files): self.options, self.immutable_files = options, immutable_files
def prompt():
while True:
If you know how to fix the filewrite(self, file, line, text), please let me know.
(I use python 2.7 but python 3 is fine)
So this is definitely a Python 3.x solution but you said that it is fine, don't know if it will work in Python 2.x but it is so simple it should:
def file_overwrite(self, file, data):
with open(file, 'w') as file:
file.write('\n'.join(data))
And you seemingly also need to fix that data list because it is missing a few commas. Also the fact that this is all in a class is a bit weird, you do nothing with the instance, they all might as well be separate functions or #classmethods or #staticmethods. Also several things could be improved with your other functions. For example you shouldn't open the file twice and count its lines to read it. Just do file.readlines() at it will return a list of all lines:
def fileread(self, file):
if self.fileExists(file):
with open(file) as file:
return file.readlines()
else:
fileError(file)
Then also import os once at the start of the file, you don't need to import it in every function where you use os, also:
with open(file, "w") as f:
f.close()
f.close() is completely pointless because the context manger closes the file anyways and also there is mode "x" which is specifically made for file creation and will raise an error if the file already exists: https://www.w3schools.com/python/python_file_handling.asp
I'm trying to utilise list comprehension for sorting data from a very large file. The file structure is like so:
THING
info1
info2
info3
THING
info1
info2
info3
... and so on.
Basically trying to collect all info1 into a list and all info2 into another list. I have a previous script which does this, but it's slow. I'm also trying to make it object oriented so I can use the data more efficiently.
Old script:
info1_data = []
info2_data = []
with open(myfile) as f:
for line in f:
if re.search('THING',line):
line=next(f)
info1_data.append(line)
line=next(f)
info2_data.append(line)
New script:
def __init__(self, file):
self.file = file
def sort_info1(self):
with self.file as f:
info1_data = [next(f) for line in f if re.search('THING',line)]
return info1_data
def sort_info2(self):
with self.file as f:
info2_data = [next(f).next(f) for line in f if re.search('THING',line)]
return info2_data
The new script works for getting info1_data as a list. However, to get info2_data I can't find anything for skipping 2 lines using this method. I guessed at next(f).next(f). It runs but doesn't produce anything.
Is this possible?
Many thanks.
Following help from Moses I've this solution. The islice is very confusing though and I don't fully understand it, even after reading the python.docs. Does the iterable get the data (i.e., info1 or info2) or do the start, stop and step dictate what data is extracted?
islice(iterable, start, stop[, step])
from itertools import islice
import re
class SomeClass(object):
def __init__(self, file):
self.file = file
def search(self, word, i):
self.file.seek(0) # seek to start of file
for line in self.file:
if re.search(word, line) and i == 0:
line = next(self.file)
yield line
elif re.search(word, line) and i == 1:
line = next(self.file)
line = next(self.file)
yield line
def sort_info1(self):
return list(islice(self.search('THING',0), 0, None, 2))
def sort_info2(self):
return list(islice(self.search('THING',1), 2, None, 2))
info1 = SomeClass(open("test.dat")).sort_info1()
info2 = SomeClass(open("test.dat")).sort_info2()
You should the seek the file back to the start in order to repeat the search from the beginning of the file. Also, you could use a generator function to decouple the search operation from the production of the data. Then use itertools.islice to step over lines:
from itertools import islice
class SomeClass(object):
def __init__(self, file):
self.file = file
def search(self, word):
self.file.seek(0) # seek to start of file
for line in self.file:
if re.search(word, line):
# yield next two lines
yield next(self.file)
yield next(self.file)
def sort_info1(self):
return list(islice(self.search('THING'), 0, None, 2))
def sort_info2(self):
return list(islice(self.search('THING'), 1, None, 2))
However instead of passing the file, I'll suggest you pass the path to the file instead so the file can be closed after each use, to avoid holding up resources when they are not (or not yet) needed.
You can do like this:
def sort_info2(self):
with self.file as f:
info2_data = [(next(f),next(f))[1] for line in f if re.search('THING',line)]
return info2_data
But it looks a little weird way!
I'm trying to write a program by change an open file, and I need to add a new line in the print.
In the open txt.file, it shows like this (I use"_" replace blank):
Name_____Height(m)_____Weight(kg)
Bill________1.58__________58
Mary_____1.65__________43
...
And now I want to add a new row like this:
Name_____Height(m)_____Weight(kg)_____Age(year)<---The new vertical line
Bill________1.58__________58_____________15
Mary_____1.65__________43_____________17
And my code it's:
data_file = open("file.txt", "r")
print(data_file.read())
data_file.close()
So, how could I add another vertical line in the open file? Moreover, If I want to add more rows, how can I do this?
One more thing, I use the python 3.5
I wrote a little class to do everything you asked for and more. Implementation examples are done at the bottom. Let me know if this works for you.
class Feed(object):
def __init__(self, file_name, sep, naming_convention=None):
self.file_name = file_name
self.feed_item_naming = naming_convention
self.sep = sep
self.feed = self.load_feed()
def get_head(self, file=None):#lmao...
'''
Get the header
'''
if not file:
head = open(self.file_name).readline().split(self.sep)
else:
head = file[0].split(self.sep)
return head
def __repr__(self):
return repr(self.feed)
def load_feed(self):
'''
load a feed object
set the key of each item to the naming convention
if we have multiple item names we increment the name bill becomes bill_2 and then bill_3 etc...
'''
#first we open the file and grab the headers
file = [x.rstrip() for x in open(self.file_name).readlines()]
self.header = self.get_head(file)
if not self.feed_item_naming and self.feed_item_naming not in self.header:
self.feed_item_naming = self.header[0]
data = {}
for line in file[1:]:
if line != '':
line = line.split(self.sep)
pos = line[self.header.index(self.feed_item_naming)]
while pos in data:
try:
ending = int(pos[-1])+1
pos.replace(pos[-1], ending)
except:
pos = pos+'_2'
data[pos] = {}
for item in self.header:
data[pos][item] = line[self.header.index(item)]
return data
def unload_feed(self, file_name=None, sep=None):
'''
write the modified feed back out to a data file
'''
if not file_name:
file_name = self.file_name
if not sep:
sep = self.sep
with open(file_name, 'wb') as file:
for i in self.header:
if i != self.header[-1]:
file.write(i+sep)
else:
file.write(i)
file.write('\n')
for i in self.feed:
for x in self.header:
if x != self.header[-1]:
file.write(str(self.feed[i][x])+sep)
else:
file.write(str(self.feed[i][x]))
file.write('\n')
def add_key(self, key, default_value=None):
'''
Add a key to each of the items
'''
if key not in self.header:
for i in self.feed:
self.feed[i][key]=default_value
self.header.append(key)
def get_key_value(self, item, key):
'''
get the value for the items key
'''
return self.feed[item][key]
def get_item(self, item):
'''
get an individual item
'''
return self.feed[item]
def set_key_value(self, item, key, value):
'''
set the value of each items key
{item:{key:value, key:value}, item...etc}
'''
self.feed[item][key] = value
def set_key_values(self, item, key_value_dict):
'''
set multiple key values for an item
'''
for k,v in key_value_dict.iteritems():
self.set_key_value(item, k, v)
def add_item(self, item):
'''
Add a new item
'''
while item in self.feed:
try:
end = str(int(item[-1])+1)
item = item.replace(item[-1], end)
except:
item = item+'_2'
self.feed[item] = {}
self.feed[item][self.feed_item_naming] = item
for i in self.header:
if i != self.feed_item_naming:
self.feed[item][i] = None
f = Feed('file.txt', '_____', 'Name') #initialize a new feed object, make sure that all seperators are the same for each item in your file
f.add_item('Angela') #add a new item
f.set_key_values('Angela', {'Height(m)':5, 'Weight(kg)':123}) #set the new items height and weight
f.add_key('Position')#create a new key for each item
f.unload_feed() #write the feed back to the file
print(f)
If by "add a new vertical line" you mean "add a new column" to your file, you can do this with the help of the csv module.
The code below works by reading the contents of your file as a list, making the changes, and then writing the updated list back to the file. You can add rows to your file this way, as well.
import csv
with open('file.txt', 'r') as f:
reader = list(csv.reader(f, delimiter=' ')) # if your file is delimited by spaces, tabs, etc.
# include that value here. It appears that
# your file is space-delimited, but that's
# just a guess based on the info in your question.
for i,row in enumerate(reader):
if i == 0:
row.append('Age(year)')
if i == 1:
row.append('15')
if i == 2:
row.append('17')
with open('file.txt','w') as f:
wr = csv.writer(f, delimiter=' ')
for row in reader:
wr.writerow(row)
# file.txt output:
# Name Height(m) Weight(kg) Age(year)
# Bill 1.58 58 15
# Mary 1.6 43 17
This code also uses with statements when working with your file. Using either with or close() (like you included in your question) is the correct practice when working with files. A with statement is easy to use because it closes your file automatically.
I want to create a class for storing attributes of the many data files that my script has to process. The attributes are values that are found in the datafiles, or values that are calculated from other values that are found in the data files.
Unfortunately, I'm not understanding the output of the code that I've written to accomplish that goal. What I think this should do is: print the name of the file being processed and a value seqlength from that file. The actual output is given below the code.
class SrcFile:
def __init__(self, which):
self.name = which
def seqlength(self):
with open(self.name) as file:
linecounter = 0
for line in file:
linecounter += 1
if linecounter == 3:
self.seqlength = int(line.split()[0])
break
for f in files:
file = SrcFile(f)
print(file.name, file.seqlength)
This prints file.name as expected, but for file.seqlength it returns a value that I don't understand.
../Testdata/12_indels.ss <bound method SrcFile.seqlength of <__main__.SrcFile object at 0x10066cad0>>
It's clear to me that I'm not understanding something fundamental about classes and functions. Is it clear to you what I'm missing here?
.seqlength is a method and needs (), but you are also not returning anything from it. Try this instead:
def seqlength(self):
with open(self.name) as file:
linecounter = 0
for line in file:
linecounter += 1
if linecounter == 3:
return int(line.split()[0])
And then calling it:
for f in files:
file = SrcFile(f)
print(file.name, file.seqlength())
Thats because .seqlength is a method.
Try doing
print(filename, file.seqlength())
I want to learn Python so I started writing my first program which is a phone book directory.
It has the options to add a name and phone number, remove numbers, and search for them.
Ive been stuck on the remove part for about 2 days now and just can't get it working correctly. I've been in the Python IRC and everything, but haven't been able to figure it out.
Basically, my program stores the numbers to a list in a file. I cannot figure out how to remove a particular line in the file but keep the rest of the file intact. Can someone please help me with this?
Some people have advised that it will be easier to do if I create a temp file, remove the line, then copy the remaining lines from the original file over to the temp file. Then write over the original file over with the temp file. So I have been trying this...
if ui == 'remove':
coname = raw_input('What company do you want to remove? ') # company name
f = open('codilist.txt', 'r') # original phone number listing
f1 = open('codilist.tmp', 'a') # open a tmp file
for line in f:
if line.strip() != coname.strip():
for line in f:
f1.write(line)
break # WILL LATER OVERWRITE THE codilist.txt WITH THE TMP FILE
else:
f1.write(line)
else:
print 'Error: That company is not listed.'
f1.close()
f.close()
continue
I assume your file contains something like <name><whitespace><number> on each line? If that's the case, you could use something like this for your if statement (error handling not included!):
name, num = line.strip().split()
if name != coname.strip():
# write to file
Suggestion:
Unless there is some specific reason for you to use a custom format, the file format json is quite good for this kind of task. Also note the use of the 'with' statement in these examples, which saves you having to explicitly close the file.
To write the information:
import json
# Somehow build a dict of {coname: num,...}
info = {'companyA': '0123456789', 'companyB': '0987654321'}
with open('codilist.txt', 'w') as f:
json.dump(info, f, indent=4) # Using indent for prettier files
To read/amend the file:
import json
with open('codilist.txt', 'r+') as f:
info = json.load(f)
# Remove coname
if coname in info:
info.pop(coname)
else:
print 'No record exists for ' + coname
# Add 'companyC'
info['companyC'] = '0112233445'
# Write back to file
json.dump(info, f, indent=4)
You'll need python2.6 or later for these examples. If you're on 2.5, you'll need these imports:
import simplejson as json
from __future__ import with_statement
Hope that helps!
Here is a pretty extensively rewritten version:
all the phone data is wrapped into a Phonebook class; data is kept in memory (instead of being saved and reloaded for every call)
it uses the csv module to load and save data
individual actions are turned into short functions or methods (instead of One Big Block of Code)
commands are abstracted into a function-dispatch dictionary (instead of a cascade of if/then tests)
This should be much easier to understand and maintain.
import csv
def show_help():
print('\n'.join([
"Commands:",
" help shows this screen",
" load [file] loads the phonebook (file name is optional)",
" save [file] saves the phonebook (file name is optional)",
" add {name} {number} adds an entry to the phonebook",
" remove {name} removes an entry from the phonebook",
" search {name} displays matching entries",
" list show all entries",
" quit exits the program"
]))
def getparam(val, prompt):
if val is None:
return raw_input(prompt).strip()
else:
return val
class Phonebook(object):
def __init__(self, fname):
self.fname = fname
self.data = []
self.load()
def load(self, fname=None):
if fname is None:
fname = self.fname
try:
with open(fname, 'rb') as inf:
self.data = list(csv.reader(inf))
print("Phonebook loaded")
except IOError:
print("Couldn't open '{}'".format(fname))
def save(self, fname=None):
if fname is None:
fname = self.fname
with open(fname, 'wb') as outf:
csv.writer(outf).writerows(self.data)
print("Phonebook saved")
def add(self, name=None, number=None):
name = getparam(name, 'Company name? ')
number = getparam(number, 'Company number? ')
self.data.append([name,number])
print("Company added")
def remove(self, name=None):
name = getparam(name, 'Company name? ')
before = len(self.data)
self.data = [d for d in self.data if d[0] != name]
after = len(self.data)
print("Deleted {} entries".format(before-after))
def search(self, name=None):
name = getparam(name, 'Company name? ')
found = 0
for c,n in self.data:
if c.startswith(name):
found += 1
print("{:<20} {:<15}".format(c,n))
print("Found {} entries".format(found))
def list(self):
for c,n in self.data:
print("{:<20} {:<15}".format(c,n))
print("Listed {} entries".format(len(self.data)))
def main():
pb = Phonebook('phonebook.csv')
commands = {
'help': show_help,
'load': pb.load,
'save': pb.save,
'add': pb.add,
'remove': pb.remove,
'search': pb.search,
'list': pb.list
}
goodbyes = set(['quit','bye','exit'])
while True:
# get user input
inp = raw_input("#> ").split()
# if something was typed in
if inp:
# first word entered is the command; anything after that is a parameter
cmd,args = inp[0],inp[1:]
if cmd in goodbyes:
# exit the program (can't be delegated to a function)
print 'Goodbye.'
break
elif cmd in commands:
# "I know how to do this..."
try:
# call the appropriate function, and pass any parameters
commands[cmd](*args)
except TypeError:
print("Wrong number of arguments (type 'help' for commands)")
else:
print("I didn't understand that (type 'help' for commands)")
if __name__=="__main__":
main()
Something simple like this will read all of f, and write out all the lines that don't match:
for line in f:
if line.strip() != coname.strip():
f1.write(line)
Ned's answer looks like it should work. If you haven't tried this already, you can set python's interactive debugger above the line in question. Then you can print out the values of line.strip() and coname.strip() to verify you are comparing apples to apples.
for line in f:
import pdb
pdb.set_trace()
if line.strip() != coname.strip():
f1.write(line)
Here's a list of pdb commands.
You probably don't want to open the temp file in append ('a') mode:
f1 = open('codilist.tmp', 'a') # open a tmp file
also, be aware that
for line in f:
...
f1.write(line)
will write everything to the file without newlines.
The basic structure you want is:
for line in myfile:
if not <line-matches-company>:
tmpfile.write(line + '\n') # or print >>tmpfile, line
you'll have to implement <line-matches-company> (there isn't enough information in the question to know what it should be -- perhaps if you showed a couple of lines from your data file..?)
I got this working...
if ui == 'remove':
coname = raw_input('What company do you want to remove? ') # company name
f = open('codilist.txt')
tmpfile = open('codilist.tmp', 'w')
for line in f:
if coname in line:
print coname + ' has been removed.'
else:
tmpfile.write(line)
f.close()
tmpfile.close()
os.rename('codilist.tmp', 'codilist.txt')
continue