Appending csv, no overwritting - python

How can I append a csv file with new information without overwritting the existing results? I tried but it overwrites:
def save_jobs_to_excel(jobs_list, filename):
path=r'\Users\Alberto\OneDrive\Documents\Big Data\pirple\jobs'
jobs = pd.DataFrame(jobs_list)
if filename not in path:
jobs.to_csv(os.path.join(path,f'{filename}.csv'))
elif filename in path:
with open(filename,'a') as f:
f.write(jobs.to_csv(os.path.join(path,f'{filename}.csv')))
else:
pass

I think you are double opening your file and then having jobs.to_csv() overwrite. Maybe something like:
def save_jobs_to_excel(jobs_list, filename):
path = r'\Users\Alberto\OneDrive\Documents\Big Data\pirple\jobs'
jobs = pd.DataFrame(jobs_list)
filepath = os.path.join(path,f'{filename}.csv')
if filepath.exists():
jobs.to_csv(filepath)
elif filename in path:
jobs.to_csv(open(filepath,'a'))
else:
pass

Related

how to not overwrite file in python in different os path

so i want to avoid overwrite the file name that existed. but i don't know how to combine the code with mycode. please help me
here's my code for write file:
def filepass(f):
print(f)
with open ('media/pass/'+'filepass.txt', 'a') as fo:
fo.write(f)
fo.close()
return fo
and here's the code to create number in name filepass:
def build_filename(name, num=0):
root, ext = os.path.splitext(name)
print(root)
return '%s%d%s' % (root, num, ext) if num else name
def find_next_filename(name, max_tries=20):
if not os.path.exists(name): return name
else:
for i in range(max_tries):
test_name = build_filename(name, i+1)
if not os.path.exists(test_name): return test_name
return None
all i want is to create filename : filepass.txt, filepass1.txt, filepass2.txt
Something like this?
def filepass(f):
print(f)
filename = find_next_filename('media/pass/filepass.txt')
with open (filename, 'a') as fo:
fo.write(f)
# you don't need to close when you use "with open";
# but then it doesn't make sense to return a closed file handle
# maybe let's return the filename instead
return filename

increment csv file number after loop completion

I am executing a loop and save data in a csv file. to store the data i am creating a path and directory and append the csv file. as it is the code executes fine and generates one csv file upon completion. i would like the csv file to increment each time i run the code so i dont have to delete or overwrite the previous one such a way that i get file_0 for run1, file_1 for run 2 and so on. i inserted `# fileName = "{}/{}_{}.csv".format(file_path[0], self.file_name, file_path[1])'' but this saves me each point in a separate file. any suggestions welcome. thanks.
import csv
from fileinput import filename
from locale import currency
import time
from numpy import append, outer
import time
from datetime import datetime
import os
from random import random
#create folder timestamped
class CreateFile():
def __init__(self, procedure):
self.procedure = procedure # Get the procedure name.
self.createfile() # Call create file function.
def createfile(self):
date = datetime.now().date()
PARENT_DIR = "C:\test/".format(date) # Get the path.
DIR = '{}'.format(self.procedure) # Get procedure name.
self.PATH = os.path.join(PARENT_DIR, DIR) # Form a full path.
try:
if not os.path.exists(self.PATH): # If the given path does not exists.
os.makedirs(self.PATH) # Make a directory.
except OSError: # OSError occur, don't make the directory.
print ("Creation of the directory [%s] failed." % DIR) # Print message.
else: # Successfully created the director, print the message.
print ("Successfully created the directory %s " % DIR)
def get_file(self):
file_list = os.listdir(self.PATH) # Load path into list directory.
file_count = len(file_list) # Chech the number of file(s) under the given path.
return [self.PATH, file_count] # Return full path and file count under this folder.
# initialization and setpoints list
startpoint = 300
setpoint = 310
step = 10
temp_list = []
for x in range(startpoint, setpoint+1, step):
temp_list.append(x)
print(temp_list)
class save_data1():
def __init__(self, file_name):
self.file_name = file_name
file_path_count = CreateFile(file_name).get_file()
self.fileName = "{}/{}.csv".format(file_path_count[0], file_name)
def record_csv(self, fileName, now, ep1):
with open(fileName, 'a', newline='') as csvfile:
header = ["Timestamp",'Temp', "ep1"]
writer = csv.DictWriter(csvfile, fieldnames=header)
if csvfile.tell() == 0:
writer.writeheader()
writer.writerow(
{
"Timestamp": now,
'Temp': temp,
"ep1": ep1
}
)
csvfile.close()
def test(self):
file_path = CreateFile(self.file_name).get_file()
# fileName = "{}/{}_{}.csv".format(file_path[0], self.file_name, file_path[1])
fileName = "{}/{}.csv".format(file_path[0], self.file_name)
now = datetime.now()
ep1 = random() #here just random number instead of instrument
self.record_csv(fileName, now, ep1)
# Set setpoint in Temp list
for temp in temp_list:
# print a header
print('')
hdr = '______ T ______ __________ H __________\t______ Ext Param ______'
print(hdr)
time.sleep(0.5)
print('setpoint:', temp)
if temp == 300:
save_data1('meas1').test()
else:
print('Waiting ')
save_data1('meas1').test()

How to read files from two folders and avoid duplicates in Python

I have the following folders that I read SQL files from and save them as variables:
++folder1
-1.sql
-2.sql
-3.sql
++folder2
-2.sql
The following code does the job well for a single folder. How I can modify this code to read not just from one folder but from two with a rule that if a file exists in folder2 than don't read the file with the same name from folder1?
folder1 = '../folder1/'
for filename in os.listdir(folder1):
path = os.path.join(folder1, filename)
if os.path.isdir(path):
continue
with open(folder1 + filename, 'r') as myfile:
data = myfile.read()
query_name = filename.replace(".sql", "")
exec (query_name + " = data")
You can try something like follows:
folders = ['../folder2/','../folder1/']
checked =[]
for folder in folders:
for filename in os.listdir(folder):
if filename not in checked:
checked.append(filename)
path = os.path.join(folder, filename)
if os.path.isdir(path):
continue
with open(folder + filename, 'r') as myfile:
data = myfile.read()
query_name = filename.replace(".sql", "")
exec (query_name + " = data")
The answer to this is simple: Do two listdir calls, then skip over the files in folder1 that are also in folder2.
One way to do this is with set operations: the set difference a - b means all elements in a that are not also in b, which is exactly what you want.
files1 = set(os.listdir(folder1))
files2 = set(os.listdir(folder2))
files1 -= files2
paths1 = [os.path.join(folder1, file) for file in files1]
paths2 = [os.path.join(folder2, file) for file in files2]
for path in paths1 + paths2:
if os.path.isdir(path):
# etc.
As a side note, dynamically creating a bunch of variables like this is almost always a very bad idea, and doing it with exec instead of globals or setattr is an even worse idea. It's usually be much better to store everything in, e.g., a dict. For example:
queries = {}
for path in paths1 + paths2:
if os.path.isdir(path):
continue
name = os.path.splitext(os.path.basename(path))[0]
with open(path) as f:
queries[name] = f.read()

Rename and extract zipfile in python

Wanted to extract .zip file one by one. Before extracting I need to rename
myzip = zipfile.ZipFile(source,'r')
for zib_e in myzip.namelist():
filename = os.path.basename(zib_e)
if not filename:
continue
print zib_e
myzip.extract(zib_e,"/tmp/")
myzip.close()
The above code extracts all file in /tmp/. But I wanted to rename each file and save in destination directory ie., /tmp/ without zipped structure
After including read function, I can manipulate the file name
def guid1():
uniqueid = uuid.uuid4()
guid = str(uniqueid)
return guid
def zipextract(source,destination):
myzip = zipfile.ZipFile(source,'r')
for zib_e in myzip.namelist():
filename = os.path.basename(zib_e)
if not filename:
continue
print destination
data = myzip.read(zib_e)
output = open(destination+guid1()+".txt",'wb') #exporting to given location one by one
output.write(data)
output.close()
#data.close()
myzip.close()

How do I fix this file_tracker that reads/writes using JSON dictionaries?

I am trying to write a script that tracks for changes made in directories/files set to multiple file paths created by an installer. I found Thomas Sileo's DirTools project on git, modified it, but am now running into some issues when writing/reading from JSON:
1) First, I believe that I am writing to JSON incorrectly and am finding that my create_state() function is only writing the last path I need.
2) If I get it working, I am unable to read/parse the file like I was before. I usually get ValueError: Extra data errors
Code below:
import os import json import getpass
files = [] subdirs = []
USER = getpass.getuser()
pathMac = ['/Applications/',
'/Users/' + USER + '/Documents/' ]
def create_dir_index(path):
files = []
subdirs = []
for root, dirs, filenames in os.walk(path):
for subdir in dirs:
subdirs.append(os.path.relpath(os.path.join(root, subdir), path))
for f in filenames:
files.append(os.path.relpath(os.path.join(root, f), path))
return dict(files=files, subdirs=subdirs)
def create_state(): for count in xrange(len(pathMac)):
dir_state = create_dir_index(pathMac[count])
out_file = open("Manifest.json", "w")
json.dump(dir_state, out_file)
out_file.close()
def compare_states(dir_base, dir_cmp):
'''
return a comparison two manifest json files
'''
data = {}
data['deleted'] = list(set(dir_cmp['files']) - set(dir_base['files']))
data['created'] = list(set(dir_base['files']) - set(dir_cmp['files']))
data['deleted_dirs'] = list(set(dir_cmp['subdirs']) - set(dir_base['subdirs']))
data['created_dirs'] = list(set(dir_base['subdirs']) - set(dir_cmp['subdirs']))
return data
if __name__ == '__main__':
response = raw_input("Would you like to Compare or Create? ")
if response == "Create":
# CREATE MANIFEST json file
create_state()
print "Manifest file created."
elif response == "Compare":
# create the CURRENT state of all indexes in pathMac and write to json file
for count in xrange(len(pathMac)):
dir_state = create_dir_index(pathMac[count])
out_file = open("CurrentState.json", "w")
json.dump(dir_state, out_file)
out_file.close()
# Open and Load the contents from the file into dictionaries
manifest = json.load(open("Manifest.json", "r"))
current = json.load(open("CurrentState.json", "r"))
print compare_states(current, manifest)

Categories