I wrote a small Python Django program that parses data from a JSON API call and saves it into Parse, using ParsePy.
I have a python file that collects the data and saves it into a Parse app DB. The Python file also passes some data into a different file that should save the passed data into a different Parse app.
In pseudocode:
File1.py
register('key1', 'restKey1')
file2.class1(passedData)
file1.saveData
File2.py
register('key2','restKey2')
file2.saveData
When I run the files individually, the code works perfectly. However, when I execute the program through the first file, the data is all getting saved into the first Parse app database instead of the second one.
I think you can use pattern like this:
#!/usr/bin/python
class SourceInterface(object):
def get_data(self):
raise NotImplementedError("Subclasses should implement this!")
class DestinationInterface(object):
def put_data(self, data):
raise NotImplementedError("Subclasses should implement this!")
class FileSource(SourceInterface):
def __init__(self, filename):
self.filename = filename
def get_data(self):
lines = None
with open(self.filename, 'r') as f:
lines = f.readlines()
if lines:
with open(self.filename, 'w') as f:
if lines[1:]:
f.writelines(lines[1:])
return lines[0]
class FileDestination(DestinationInterface):
def __init__(self, filename):
self.filename = filename
def put_data(self, data):
print 'put data', data
with open(self.filename, 'a+') as f:
f.write(data)
class DataProcessor(object):
sources_list = []
destinitions_list = []
def register_source(self, source):
self.sources_list.append(source)
def register_destinition(self, destinition):
self.destinitions_list.append(destinition)
def process(self):
for source in self.sources_list:
data = source.get_data()
if data:
for destinition in self.destinitions_list:
destinition.put_data(data)
if __name__ == '__main__':
processor = DataProcessor()
processor.register_source(FileSource('/tmp/source1.txt'))
processor.register_source(FileSource('/tmp/source2.txt'))
processor.register_destinition(FileDestination('/tmp/destinition1.txt'))
processor.register_destinition(FileDestination('/tmp/destinition2.txt'))
processor.process()
Just define you own Source and Destination classes
Related
as said, I'd like to open a json file and make it into a list, in order to append new elements to it and then dump all back into the json file.
Here is my code(the commented part is what I previously tried):
class Carta:
def __init__(self,filename):
self.__filename = filename
self.__lista = []
# try:
# f = open(self.__filename,"r")
# except:
# f = open(self.__filename, "w")
# f.close()
# f = open(self.__filename, "r")
with open(self.__filename) as file:
self.__lista = json.load(file)
# read=json.load(f)
# for c in leggi:
# self.__lista.append(c)
# print(self.__lista)
# f.close()
def add(self, c):
self.__lista.append(c)
def save(self):
f = open(self.__filename, "w")
for c in self.__lista:
f.write("%s\n" % str(c))
f.close()
It wouldn't work if you read from a JSON file, json list and then write custom string. Because next time you read the JSON file it's gonna fail.
So, during write/save you should make it json itself. Here's the code the explains how to do it.
import json
class Carta:
def __init__(self, filename):
self.__filename = filename
self.__lista = list()
self.read_from_json_file()
def read_from_json_file(self):
with open(self.__filename) as file:
self.__lista = json.load(file)
def write_to_json_file(self):
with open(self.__filename, 'w') as f:
json.dump(self.__lista, f)
def add(self, value):
self.__lista.append(value)
The reason you should use with open(filename, mode) as f: instead of f = open(filename) is because at the end of with block the file is automatically closed. Otherwise you've to call f.close() every time you open a file.
json.load - reads json data from file, converts to python data type/structure.
json.dump - read python data type/structure, converts it into string and stores it in the file (file handle) and saves the file.
Using pdb to trace errors
import json
import pdb
class Carta:
def __init__(self, filename):
self.__filename = filename
self.__lista = list()
self.read_from_json_file()
def read_from_json_file(self):
pdb.set_trace() # to pause execution and start debugger
# When paused,
# type n to continue to next line,
# type c to continue execution or to continue to the next loop
# type b <file_name>:<line_number> to add another break point, where <file_name> and <line_number> are place holders
# Example, b /home/username/hello.py:43, will add breakpoint at 43 line of hello.py in /home/username path
# type q to quit debugger and halt execution
with open(self.__filename) as file:
self.__lista = json.load(file)
def write_to_json_file(self):
with open(self.__filename, 'w') as f:
json.dump(self.__lista, f)
def add(self, value):
# Second breakpoint
pdb.set_trace()
self.__lista.append(value)
Or just run your file with
python -m pdb file.py and then add breakpoints. It will pause in the first line itself and return you a (pdb) console where you can add breakpoint.
import json
#read from file
with open("demofile.txt", "r") as f: x = f.read()
#parse
y = json.loads(x)
#edit
y["user"] = { "fname": "John", "lname": "Who"}
#save to file
with open("demofile.txt", "w") as f: f.write(json.dumps(y))
https://repl.it/#KrzysztofPecyna/PythonJsonExample
To read JSON from a file:
import json
with open('data.txt') as json_file:
data = json.load(json_file)
To add new data:
data['key'] = "value"
To write JSON to a file:
with open('data.txt', 'w') as outfile:
json.dump(data, outfile)
Trying to wrap my head around decorators in Python
I am trying to write a class which contains 2 functions:
A function which takes a pandas DataFrame as an argument and writes it to a text file, using tabulate.
def text_file(filename, df):
table = tabulate(filename, tablefmt="grid", headers=df.columns)
with open(filename, 'w') as f:
f.write(table)
A function to upload files to Slack and delete it after 5 seconds
slack = Slacker(api_token)
def upload(func):
#functools.wraps(func)
def upload_wrapper(*args, **kwargs):
slack.files.upload(file,
channels=channel,
title=head,
initial_comment=comment)
time.sleep(5)
os.remove(file)
return upload_wrapper(*args, **kwargs)
I am getting tripped up on understanding how to use decorators and how to use args and kwargs.
The end result of my structure would be something like:
from slacker import Slacker
from tabulate import tabulate
import functools
import time, os
class Slack:
def __init__(self, api_token, channel):
self.Slacker = Slacker(api_token)
self.channel = channel
#functools.wraps(func)
def upload(func):
def upload_wrapper(self, *args, **kwargs):
self.slack.files.upload(file,
channels=self.channel,
title=head,
initial_comment=comment)
time.sleep(5)
os.remove(file)
return upload_wrapper(self, *args, **kwargs)
def generate_text_file(self, filename, df):
table = tabulate(filename, tablefmt="grid", headers=df.columns)
with open(filename, 'w') as f:
f.write(table)
And my desired result would be to call a function which prepares a file for me, then uploads it to Slack and deletes it so no files are kept on the system (I would have this running via a cronjob).
workspace_name = Slack(api_token, channel=channel_name)
df = some_pandas_function()
#upload
workspace_name.generate_text_file("filename.txt", df)
Any help would be much appreciated ...
I'm trying to organize my code I already have by implementing classes and execute methods on classes instantiations. I have put some hours into figuring out how to use classes, but still haven't figured it out. Could someone help me?
This is the original code:
def readSignalAcquisitionData(fileName):
f = open(fileName, 'r')
# dummy read
f.readline()
timeStamps = []
dataInput = []
for ln in f:
# parse info
timeStr, dataStr = ln.split(',')
timeStamps.append(float(timeStr))
dataInput.append(float(dataStr))
f.close()
return timeStamps, dataInput
And this is what I currently have:
class SignalDataIOUnit:
def __init__(self, fileName):
self.fileName = fileName
def readSignalAcquisitionData(self):
f = open(self.fileName, 'r')
self.timeStamps = []
self.dataInput = []
for ln in f:
# parse info
self.timeStr, self.dataStr = ln.split(',')
self.timeStamps.append(float(self.timeStr))
self.dataInput.append(float(self.dataStr))
f.close()
return self.timeStamps, self.dataInput
def writeFilteredData(self, fileName, timeStamps, dataOut):
pass
fileName="LabsWeek03_inputData.csv"
timeStamps, dataInput = SignalDataIOUnit.readSignalAcquisitionData(fileName)
print(timeStamps)
When I try running it through the terminal I get these error messages:
Traceback (most recent call last):
File "SignalDataEvaluationUnit_OOP.py", line 26, in <module>
timeStamps, dataInput = SignalDataIOUnit.readSignalAcquisitionData(fileName)
File "SignalDataEvaluationUnit_OOP.py", line 7, in readSignalAcquisitionData
f = open(self.fileName, 'r')
AttributeError: 'str' object has no attribute 'fileName'
As #decezeā¦ says in comment, you haven't instantiated the class SignalDataIOUnit, that's why it doesn't work.
To make it work, you have 2 choices:
Instantiating SignalDataIOUnit object and call the method readSignalAcquisitionData:
timeStamps, dataInput = SignalDataIOUnit(fileName).readSignalAcquisitionData()
Use Python's #staticmethod decorator:
class SignalDataIOUnit:
def __init__(self, fileName):
self.fileName = fileName
#staticmethod
def readSignalAcquisitionData(fileName):
...
then just call it as usual
timeStamps, dataInput = SignalDataIOUnit.readSignalAcquisitionData(fileName)
yes, you should use like this
fileName="LabsWeek03_inputData.csv"
timeStamps, dataInput = SignalDataIOUnit(fileName).readSignalAcquisitionData()
print(timeStamps)
I am trying to open a file in a class and close it on exit in this manner.
class PlanetaryImage(object):
#classmethod
def open(cls, filename):
with open(filename, 'rb') as fp:
return cls(fp, filename)
def __init__(self, stream, filename=None, memory_layout='DISK'):
self.filename = filename
self._parse_data(stream)
def _parse_data(self, stream):
data_stream = stream
try:
if self.data_filename is not None:
dirpath = os.path.dirname(self.filename)
data_file = os.path.abspath(
os.path.join(dirpath, self.data_filename))
data_stream = open(data_file, 'rb')
data_stream.seek(self.start_byte)
if self.format in self.BAND_STORAGE_TYPE:
return getattr(self, self.BAND_STORAGE_TYPE[self.format])(data_stream)
raise Exception('Unkown format (%s)' % self.format)
finally:
data_stream.close()
There are certain cases where I am having to use open one more file in _parse_data function. I wanted to use with but the if statements make it difficult. Any suggestions on how to make the try section more pythonic.
There's no reason for _parse_data to try to open a file. It should be the caller's responsibility to either use PlanetaryImage.open with a file name or to provide an open file handle to __init__. _parse_data should do just one thing: parse the data from its stream argument.
class PlanetaryImage(object):
#classmethod
def open(cls, filename):
with open(filename, 'rb') as fp:
return cls(fp, filename)
def __init__(self, stream, memory_layout='DISK'):
self._parse_data(stream)
def _parse_data(self, data_stream):
try:
data_stream.seek(self.start_byte)
if self.format in self.BAND_STORAGE_TYPE:
return getattr(self, self.BAND_STORAGE_TYPE[self.format])(data_stream)
raise Exception('Unkown format (%s)' % self.format)
finally:
data_stream.close()
Now, there are simply two options for using the class:
with open(filename, 'rb') as fp:
x = PlanetaryImage(fp)
...
or
x = PlanetaryImage(filename)
....
I am trying to upload a csv file in a django form:
class CSVUploadForm(forms.Form):
csv_file = forms.FileField(label='Select a CSV file to import:',)
def clean(self):
file_csv = self.cleaned_data['csv_file']
records = csv.reader(open('/mypath/'+file_csv.name, 'rU'), dialect=csv.excel_tab)
I need to open the file in universal new line mode. I can do that with "open" method above, but that will not work for this form because the file I am dealing with is an in memory uploaded version of the csv.
How do I pass the universal new line mode flag rU to something like this:
records = csv.reader(file_csv, dialect=csv.excel_tab)
?
You can use str.splitlines() -- which automatically splits on universale line-breaks -- in the following manner:
def clean(self):
file_csv = self.cleaned_data['csv_file']
lines = file_csv.read().splitlines()
records = csv.reader(lines, dialect=csv.excel_tab)
If you are worried about the memory cost of creating the lines variable, you can force Django to save the file to a local file on disk changing the FILE_UPLOAD_MAX_MEMORY_SIZE variable in settings.py (more on this variable here):
# add to your settings.py
FILE_UPLOAD_MAX_MEMORY_SIZE = 0
FILE_UPLOAD_TEMP_DIR = '/tmp'
Then to process the file from it's tmp folder using universal mode:
def clean(self):
file_csv = open(self.cleaned_data['csv_file'].temporary_file_path, 'rU')
records = csv.reader(file_csv, dialect=csv.excel_tab)
Problem with the solution above is that it reads the whole file all at once, make it not acceptable when processing large csv file. For small CSV files files will be saved to disk instead of being kept in memory which is also not so great.
I've created a class to handle new lines
class FileWithUniversalNewLine(object):
def __init__(self, file_obj):
self.file = file_obj
def lines(self):
buff = "" # In case of reading incomplete line, buff will temporarly keep the incomplete line
while True:
line = self.file.read(2048)
if not line:
if buff:
yield buff
raise StopIteration
# Convert all new lines into linux new line
line = buff + line.replace("\r\n", "\n").replace("\r", "\n")
lines = line.split("\n")
buff = lines.pop()
for sline in lines:
yield sline
def close(self):
self.file.close()
def __exit__(self, *args, **kwargs):
return self.file.__exit__(*args, **kwargs)
def __enter__(self, *args, **kwargs):
return self
def __iter__(self):
return self.lines()
Usage:
csvfile = FileWithUniversalNewLine(file_csv)
records = csv.reader(csvfile, dialect=csv.excel_tab)