I am refactoring C++ code using the Python bindings of the Clang compiler (cindex). Using that, I analyze the AST and prepare changes. I end up with a list of operations similar to the following:
DELETE line 10
INSERT line 5 column 32: << "tofu"
REPLACE from line 31 colum 6 to line 33 column 82 with: std::cout << "Thanks SO"
...
My question is how to turn these into actual file changes.
Doing it directly with python seems tedious: patches need to be applied in the right order and checked for consistency. It looks quite hard and error-prone.
I also can’t find a good library to help (clang does have something called a Rewriter, but it isn't wrapped in Python. I'd really like to avoid C++ for refactoring if possible).
Maybe an idea could be to generate patches and apply them with git, maybe? But even that seems a bit tedious.
Any ideas?
So I rolled out my own. The code is almost certainly buggy and not very pretty, but I'm posting it in the hope it might help someone until a better solution is found.
class PatchRecord(object):
""" Record patches, validate them, order them, and apply them """
def __init__(self):
# output of readlines for each patched file
self.lines = {}
# list of patches for each patched file
self.patches = {}
class Patch(object):
""" Abstract base class for editing operations """
def __init__(self, filename, start, end):
self.filename = filename
self.start = start
self.end = end
def __repr__(self):
return "{op}: {filename} {start}/{end} {what}".format(
op=self.__class__.__name__.upper(),
filename=self.filename,
start=format_place(self.start),
end=format_place(self.end),
what=getattr(self, "what", ""))
def apply(self, lines):
print "Warning: applying no-op patch"
class Delete(Patch):
def __init__(self, filename, extent):
super(PatchRecord.Delete, self).__init__(
filename, extent.start, extent.end)
print "DELETE: {file} {extent}".format(file=self.filename,
extent=format_extent(extent))
def apply(self, lines):
lines[self.start.line - 1:self.end.line] = [
lines[self.start.line - 1][:self.start.column - 1] +
lines[self.end.line - 1][self.end.column:]]
class Insert(Patch):
def __init__(self, filename, start, what):
super(PatchRecord.Insert, self).__init__(filename, start, start)
self.what = what
print "INSERT {where} {what}".format(what=what, where=format_place(self.start))
def apply(self, lines):
line = lines[self.start.line - 1]
lines[self.start.line - 1] = "%s%s%s" % (
line[:self.start.column],
self.what,
line[self.start.column:])
class Replace(Patch):
def __init__(self, filename, extent, what):
super(PatchRecord.Replace, self).__init__(
filename, extent.start, extent.end)
self.what = what
print "REPLACE: {where} {what}".format(what=what,
where=format_extent(extent))
def apply(self, lines):
lines[self.start.line - 1:self.end.line] = [
lines[self.start.line - 1][:self.start.column - 1] +
self.what +
lines[self.end.line - 1][self.end.column - 1:]]
# Convenience functions for creating patches
def delete(self, filename, extent):
self.patches[filename] = self.patches.get(
filename, []) + [self.Delete(filename, extent)]
def insert(self, filename, where, what):
self.patches[filename] = self.patches.get(
filename, []) + [self.Insert(filename, where, what)]
def replace(self, filename, extent, what):
self.patches[filename] = self.patches.get(
filename, []) + [self.Replace(filename, extent, what)]
def _pos_to_tuple(self, position):
""" Convert a source location to a tuple for use as a sorting key """
return (position.line, position.column)
def sort(self, filename):
""" Sort patches by extent start """
self.patches[filename].sort(key=lambda p: self._pos_to_tuple(p.start))
def validate(self, filename):
"""Try to insure patches are consistent"""
print "Checking patches for %s" % filename
self.sort(filename)
previous = self.patches[filename][0]
for p in self.patches[filename][1:]:
assert(self._pos_to_tuple(p.start) >
self._pos_to_tuple(previous.start))
def _apply(self, filename):
self.sort(filename)
lines = self._getlines(filename)
for p in reversed(self.patches[filename]):
print p
p.apply(lines)
def _getlines(self, filename):
""" Get source file lines for editing """
if not filename in self.lines:
with open(filename) as f:
self.lines[filename] = f.readlines()
return self.lines[filename]
def apply(self):
for filename in self.patches:
self.validate(filename)
self._apply(filename)
# with open(filename+".patched","w") as output:
with open(filename, "w") as output:
output.write("".join(self._getlines(filename)))
Just create a PatchRecord object, add changes using the create, replace and delete methods, and apply them with apply when you're ready.
Related
I need to do a streaming upload (i.e., not load the full file part in memory) of a slice of a large (multi-GB) file, using python-requests.
I've looked around in the doc and on Stack Overflow, and haven't found a working way to do it (again, without loading the full slice in memory).
Here's the code I have:
class FileSlice(AbstractContextManager):
"""
File-like object that only reads a slice of a file
Inspired by stackoverflow.com/a/29838711/593036, but actually works.
"""
def __init__(self, filepath: str, seek_from: int, read_limit: int):
self.filepath = filepath
self.seek_from = seek_from
self.read_limit = read_limit
self.n_seen = 0
def __enter__(self):
self.f = open(self.filepath, "rb")
self.f.seek(self.seek_from)
return self
def __len__(self):
total_length = os.fstat(self.f.fileno()).st_size
return min(self.read_limit, total_length - self.seek_from)
def read(self, n=-1):
if self.n_seen >= self.read_limit:
return b""
remaining_amount = self.read_limit - self.n_seen
n_to_read = remaining_amount if n < 0 else min(n, remaining_amount)
self.n_seen += n_to_read
return self.f.read(n_to_read)
def __iter__(self):
yield self.read(n=io.DEFAULT_BUFFER_SIZE)
def __exit__(self, *args):
self.f.close()
Then the actual request:
with FileSlice(filepath, seek_from=i * chunk_size, read_limit=chunk_size) as data:
r = requests.put(presigned_url, data=data)
r.raise_for_status()
This seems pretty complex, so I'm wondering:
if I'm missing a simpler way
if my approach is correct.
Thank you!
I have the following base class:
class ClientRepo(Repository):
def __init__(self) -> None:
self.__clientList = []
def hasClientWithId(self, clientId):
for client in self.__clientList:
if client.getId() == clientId:
return True
return False
def addClient(self, client):
if type(client).__name__ == 'ClientDAO':
if not self.hasClientWithId(client.getId()):
client.setClientId(self.__maximumIndexInClientList() + 1)
self.__clientList.append(client)
else:
raise ObjectAlreadyInCollectionException
else:
raise TypeError
which basically only holds a list and can add a ClientDAO to it.
And the following, which derives from it:
class ClientFileRepository(ClientRepo):
def __init__(self, fileName) -> None:
super().__init__()
self.__fileName = fileName
self.__file = None
def hasClientWithId(self, clientId):
self.__loadRepo()
hasClientWithId = super().hasClientWithId(clientId)
super().clean()
return hasClientWithId
def addClient(self, client):
self.__loadRepo()
super().addClient(client)
self.__storeRepo()
super().clean()
def __loadFileReadMode(self):
self.__file = open(self.__fileName, "r")
def __loadFileWriteMode(self):
self.__file = open(self.__fileName, "w")
def __closeFile(self):
self.__file.close()
def __loadRepo(self):
self.__loadFileReadMode()
for line in self.__file:
splitLine = line.split()
clientToAdd = ClientDAO(splitLine[1])
clientToAdd.setClientId(int(splitLine[0]))
super().addClientWithId(clientToAdd)
self.__closeFile()
def __storeRepo(self):
self.__loadFileWriteMode()
self.__file.write("")
for client in super().getList():
self.__file.write(self.clientToString(client))
self.__closeFile()
def clientToString(self, clientDAO):
return str(clientDAO.getId()) + " " + clientDAO.getName() + "\n"
a class which should load the list from a file, call addClient from parent, and store the updated list in the file. The problem is that after child class loads the file in addClient, it calls the method in the parent, which calls hasClientWithId, from the child, again. But I want it to call hasClientWithId, from the parent, that is, the context it is in. Can I achieve that?
I can think of several ways to achieve your goal. I ranked them from worst to best
1. Exactly what you asked for
You wanted that ClientRepo.addClient calls ClientRepo.hasClientWithId instead of ClientFileRepository.hasClientWithId. It is possible to enforce that:
class ClientRepo(Repository):
def addClient(self, client):
if type(client).__name__ == 'ClientDAO':
if not ClientRepo.hasClientWithId(self, client.getId()):
client.setClientId(self.__maximumIndexInClientList() + 1)
self.__clientList.append(client)
else:
raise ObjectAlreadyInCollectionException
else:
raise TypeError
This is not a good approach, because it's unintuitive and breaks the principles of OOP. Any other programmer writing a subclass of ClientRepo that overrides hasClientWithId would expect that this will have an effect for every call to hasClientWithId even inside of addClient
2. Let ClientFileRepository decide which function to use
Add a variable
self.__isFileOpen = False
in ClientFileRepository.__init__, set it to True when you open the file and to False when you close the file. Then change the hasClientWithId within ClientFileRepository to
def hasClientWithId(self, clientId):
if not self.__isFileOpen:
self.__loadRepo()
result = super().hasClientWithId(clientId)
super().clean()
return result
else:
return super().hasClientWithId(clientId)
to avoid opening the same file again. This works, but it is pretty difficult to write new functions for this class, because you always need to be aware if the function call is a call from within your class or from somewhere else. Also this seems pretty inefficient, because you read and write the entire file, even when you only add one client.
3. Read the file only once and modify the underlying ClientRepo
class ClientFileRepository(ClientRepo):
def __init__(self, fileName) -> None:
super().__init__()
self.__fileName = fileName
self.__loadRepo()
# No hasClientWithId needed
def addClient(self, client):
super().addClient(client)
self.__storeRepo()
def __loadRepo(self):
with open(self.__filename) as file:
for line in file:
splitLine = line.split()
clientToAdd = ClientDAO(splitLine[1])
clientToAdd.setClientId(int(splitLine[0]))
super().addClientWithId(clientToAdd)
def __storeRepo(self):
with open(self.__filename, "w") as file:
file.write("")
for client in super().getList():
file.write(self.clientToString(client))
This obviously assumes that the file is not changed by someone else between calls to addClient and the program still overwrites the entire file for every addClient. If this is a problem for you it is best to be explicit and make loadRepo and storeRepo public. Then the programmer using this class can decide when loading and saving are necessary and useful. You can use context managers for this.
Extra: Read and save the file for every method
You can use function decorators to use solution 2 without writing the same code for every function:
import functools
def loadAndStore(function):
#functoools.wraps(function)
def wrappedFunction(self, *args, **kwargs):
if self.__isFileOpen:
return function(self, *args, **kwargs)
else:
self.__isFileOpen = True
self.__loadRepo()
try:
return function(self, *args, **kwargs)
except Exception as e: # Only catch expected exceptions
raise
finally:
self.__storeRepo()
self.clear() # some cleanup
self.__isFileOpen = False
return wrappedFunction
class ClientFileRepository(ClientRepo):
def __init__(self, fileName) -> None:
super().__init__()
self.__fileName = fileName
self.__isFileOpen = False
#loadAndStore
def hasClientWithId(self, clientId):
return super().hasClientWithId(clientId)
#loadAndStore
def addClient(self, client):
super().addClient(client)
def __loadRepo(self):
with open(self.__filename) as file:
for line in file:
splitLine = line.split()
clientToAdd = ClientDAO(splitLine[1])
clientToAdd.setClientId(int(splitLine[0]))
super().addClientWithId(clientToAdd)
def __storeRepo(self):
with open(self.__filename, "w") as file:
file.write("")
for client in super().getList():
file.write(self.clientToString(client))
Be careful here, using this is not very intuitive. For example self.__isFileOpen is defined in __init__, but none of the methods below directly use it. Instead its use is hidden in the loadAndStore decorator.
Some quick hints at the end:
type(client).__name__ == 'ClientDAO' is bad practice. Use isinstance(client, ClientDAO) to fully adopt OOP
If this is not part of a bigger project with given naming conventions use the python style guide
Using private variables like __fileName is generally considered unnecessary, just prefix the variable with one underscore to indicate "internal use". The same is true for functions.
I would like to stack filters around an open() function. These filters are supposed, for example, to change every encountered a characters into b in the stream read from the file.
For example, here is a code sample:
def filter (stream):
for line in stream:
yield line.replace('a', 'b')
def add_filter(filter, file):
return io.TextIOWrapper(filter(file))
def processing_file(f):
import sys
for line in f:
sys.stdout.write("aa: " + line)
f = open('./example.txt', 'r')
f = add_filter(filter, f)
processing_file(f)
I guess that the filter_a() function should return a TextIOWrapper to mimic the result of an open() function. But, I keep having the following error message:
AttributeError: 'generator' object has no attribute 'readable'
In fact, I understand the error, but I do not know how to work around and make it work properly.
You can iterate directly over the filter generator:
with open('./example.txt', 'r') as f:
for line in filter(f):
sys.stdout.write("aa: " + line)
I came with a solution to my own question... I, first, have to admit that my question was not totally well formed and may have lacked of precision. So, I do not blame anybody to have discarded it.
My original intention was to come out with a stackable framework of filters over a stream (open()). Trying to make it easy to use, also.
I mainly found inspiration in this answer on StackOverflow which was solving about 90% of my problem.
So, imagine we have two filters (which are coded as generators):
def tab_filter(stream):
for line in stream:
yield line.replace ('\t', ' ' * 8)
def a_filter(stream):
for line in stream:
yield line.replace ('a', 'z')
Then, we have this class allowing to wrap a generator inside a stream:
class IterStream(object):
"File-like streaming iterator."
def __init__(self, generator):
self.generator = generator
self.iterator = iter(generator)
self.leftover = ''
def __len__(self):
return self.generator.__len__()
def __iter__(self):
return self.iterator
def next(self):
return self.iterator.next()
def read(self, size):
data = self.leftover
count = len(self.leftover)
try:
while count < size:
chunk = self.next()
data += chunk
count += len(chunk)
except StopIteration:
self.leftover = ''
return data
if count > size:
self.leftover = data[size:]
return data[:size]
Using it in the code will be as follow:
import sys
f = IterStream(a_filter(IterStream(tab_filter(open('Example.txt', 'r')))))
for line in f:
sys.stdout.write("aa: " + line)
But, this is not yet totally satisfactory because we need a lot of useless function stacking. So, I decided to wrap it inside a decorator:
def streamfilter(filter):
def stream(iostream):
return IterStream(filter(iostream))
return stream
#streamfilter
def tab_filter(stream):
for line in stream:
yield line.replace ('\t', ' ' * 8)
#streamfilter
def a_filter(stream):
for line in stream:
yield line.replace ('a', 'z')
Then, using the code is much easier now:
import sys
f = a_filter(tab_filter(open('Example.txt', 'r')))
for line in f:
sys.stdout.write("aa: " + line)
I hope that some of you will find this few lines useful.
I wrote a simple program to read through a log and to parse through and obtain the lowest beginning number (the head) and to print it. I am now editing that program and combining it with a class I wrote to parse an actual logfile. Essentially, as opposed to sorting based off of the simple number from the log from my previous program, I now need to reference the parsed information from one class into another class. I was wondering what the most convenient way to do this. I am a beginner programmer in python and don't know if I can explicitly reference the class.
Here are the classes.
Parser
class LogLine:
SEVERITIES = ['EMERG','ALERT','CRIT','ERR','WARNING','NOTICE','INFO','DEBUG']
severity = 1
def __init__(self, line):
try:
m = re.match(r"^(\d{4}-\d{2}-\d{2}\s*\d{2}:\d{2}:\d{2}),?(\d{3}),?(\s+\[(?:[^\]]+)\])+\s+[A-Z]+\s+(\s?[a-zA-Z0-9\.])+\s?(\((?:\s?\w)+\))\s?(\s?.)+", line)
timestr, msstr, sevstr, self.filename, linestr, self.message = m.groups()
self.line = int(linestr)
self.sev = self.SEVERITIES.index(sevstr)
self.time = float(calendar.timegm(time.strptime(timestr, "%Y-%m-%d %H:%M:%S,%f"))) + float(msstr)/1000.0
dt = datetime.strptime(t, "%Y-%m-%d %H:%M:%S,%f")
except Exception:
print 'error',self.filename
def get_time(self):
return self.time
def get_severity(self):
return self.sev
def get_message(self):
return self.message
def get_filename(self):
return self.filename
def get_line(self):
return self.line
Sorter
class LogFile:
def __init__(self,filepath):
self.logfile = open(filepath, "r")
self.head = None
def __str__(self):
return "x=" + str(self.x) + "y="+str(self.y)
def readline(self):
if self.head != None:
h = self.head
self.head = None
return h
else:
return self.logfile.readline().rstrip(' ')
def get_line(self):
if self.head == None:
self.head = self.readline().rstrip(' ')
return self.head.get.line()
else:
return self.head.get.line()
def close (self):
self.logfile.close()
I have begun to edit my second class by adding the get_line function. Don't know if I'm on the right track.
In simpler terms, I need the head to become "LogLine"
It is okay to use one class from another class. You have one class that parses a single line from a log file and builds an object that represents the line; and you have another class that reads lines from a log file. It would be very natural for the second class to call the first class.
Here is a very simple class that reads all lines from a log file and builds a list:
class LogFile(object):
def __init__(self,filepath):
with open(filepath, "r") as f:
self.lst = [LogLine(line) for line in f]
You can see that self.lst is being set to a list of lines from the input log file, but not just the text of the line; the code is calling LogLine(line) to store instances of LogLine. If you want, you can sort the list after you build it:
self.lst.sort(key=LogLine.get_line)
If the log files are very large, it might not be practical to build the list. You have a .get_line() method function, and we can use that:
class LogFile(object):
def __init__(self,filepath):
self.logfile = open(filepath, "r")
def get_line(self):
try:
line = next(self.logfile) # get next line from open file object
return LogLine(line)
except StopIteration: # next() raises this when you reach the end of the file
return None # return
def close(self):
self.logfile.close()
An open file object (returned by the open() function) can be iterated. We can call next() on this object and it will give us the next input line. When the end of file is reached, Python will raise StopIteration to signal the end of the file.
Here the code will catch the StopIteration exception and return None when the end of the log file is reached. But I think this isn't the best way to handle this problem. Let's make the LogFile class work in for loops and such:
class LogFile(object):
def __init__(self,filepath):
self.f = open(filepath)
def __next__(self): # Python 3.x needs this to be named "__next__"
try:
line = next(self.f)
return LogLine(line)
except StopIteration:
# when we reach the end of input, close the file object
self.f.close()
# re-raise the exception
raise
next = __next__ # Python 2.x needs this to be named "next"
A for loop in Python will repeatedly call the .__next__() method function (Python 3.x) or else the .next() method function (Python 2.x) until the StopIteration exception is raised. Here we have defined both method function names so this code should work in Python 2.x or in Python 3.x.
Now you can do this:
for ll in LogFile("some_log_file"):
... # do something with ll, which will always be a LogLine instance
I am trying to write a program using python-fuse, but I can't get file writing down.
my file_class looks like this
class FuseFile(object):
def __init__(self, path, flags, *mode):
debug(path)
#debug(mode);
self.file = tempfile.TemporaryFile(*mode);
self.fd = self.file.fileno()
self.path = path
def write(self, buf, offset):
head, tail = os.path.split(self.path)
self.file.seek(offset);
self.file.write(buf);
return len(buf)
def read(self, length, offset):
file = apiCall("readfile",{"file":self.path}).read();
slen = len(file)
if length < slen:
if offset + size > slen:
size = slen - offset
buf = file[offset:offset+size]
else:
buf = ''
return file # I don't know if this buff stuff is necesarry...
def ftruncate(self, len):
self.file.truncate(len);
def release(self, flags):
self.file.close()
def flush(self):
self._fflush()
def fsync(self, isfsyncfile):
self._fflush()
if isfsyncfile and hasattr(os, 'fdatasync'):
os.fdatasync(self.fd)
else:
os.fsync(self.fd)
def _fflush(self):
if 'w' in self.file.mode or 'a' in self.file.mode:
self.file.flush()
but when I try and edit the file in a editor like VIM I get this:
"mnt/stuff.txt" E514: write error (file system full?)
WARNING: Original file may be lost or damaged
don't quit the editor until the file is successfully written!
[EDIT] I found the problem, I didn't have a open method, but even so, I eventually took out the file_class to implement the methods in the main FUSE class, because that seems to work better
I eventually found out that the probelm was that I hadn't created a open(), or create() method in my file class, but eventually I settled with implementing all the methods in the main FUSE class because the file_class didn't seem to be working for me