Add one more value to csv from text file in python - python

I am converting multiple text files to a csv file. My text file looks like this:
ITEM: TIMESTEP
55000
ITEM: NUMBER OF ATOMS
4365
ITEM: BOX BOUNDS ff ff ff
-0.3 0.3
-0.6 0.6
-0.6 0.6
ITEM: ATOMS id type x y z vx vy vz fx fy fz omegax omegay omegaz radius
4356 1 -0.0885288 -0.0101421 -0.48871 -0.000941682 0.778688 -0.0153902 -0.00720861 -0.0533703 0.0104717 0.35581 -0.0601358 -0.436049 0.01
4227 1 0.0157977 0.00542603 -0.488429 -0.00996111 0.784119 0.00813807 -0.000491847 0.0144889 -0.0120111 1.08208 -0.0671177 0.369492 0.01
3973 1 0.0179724 0.0256167 -0.48799 -0.00582994 0.772455 0.0394544 0.0109589 -0.0187232 -0.00111718 -0.0586513 -0.162943 1.12784 0.01
4300 1 0.0900919 0.0248592 -0.488025 -0.000455483 0.769978 0.0388239 -0.00364509 0.0409803 -0.00269227 3.94355 -0.0249566 -0.223111 0.01
4200 1 -0.0230223 0.0329911 -0.483108 -0.00238 0.778547 0.0500186 0.0421189 -0.021588 0.05607 0.112989 -0.0813771 -1.09981 0.015
4339 1 0.00143577 0.0368542 -0.488107 0.000587848 0.784672 0.0593572 0.00385562 -0.00475113 -0.00710483 -0.201196 0.158512 -5.63826 0.01
4106 1 0.0648392 0.0269728 -0.483248 -0.00365836 0.766081 0.0395827 0.0418642 0.1802 0.0547313 -0.0578358 0.124205 -0.96464 0.015
4104 1 -0.084453 0.0507114 -0.482726 -0.000596577 0.75636 0.0806599 0.000817826 0.0119286 -0.0150014 -0.0864852 -0.103877 0.198773 0.015
Right now my csv file contains value after line 9 (in python code line 8).
I want to include line 2 (Header - TIMESTEP) also in csv along with all the values after 9.
I tried to edit my code but couldn't succeed. Can I get some help:
My code is here:
import numpy as np
import pandas as pd
import csv
import glob
import time
def main():
start = time.time()
data_folder = "./all/" #folder name
files = glob.glob(data_folder + '*dump*.data')
print("Total files:", len(files))
# get header from one of the files
#header = []
with open('all/dump46000.data', 'r') as f:
#lines = f.readlines()
for _ in range(8):
next(f) # skip first 8 lines
header = ','.join(f.readline().split()[2:]) + '\n'
headers = ','.join(f.readline().split()[2:])
#header.append(headers)
#header.append('timestep')
print(header)
for file in files:
with open(file, 'r') as f, open(f'all.csv', 'a') as g: # note the 'a'
g.write(header) # write the header
for _ in range(9):
next(f) # skip first 9 lines
for line in f:
g.write(line.rstrip().replace(' ', ',') + '\n')
print(time.time() - start)
if __name__ == "__main__":
main()
My folder all contains more than 600 files:
['./all/dump501000.data',
'./all/dump307000.data',
'./all/dump612000.data',
'./all/dump369000.data',
'./all/dump23000.data',
'./all/dump470000.data',
'./all/dump235000.data',
'./all/dump6000.data',
'./all/dump568000.data',
'./all/dump506000.data',
'./all/dump623000.data',
'./all/dump329000.data',
'./all/dump220000.data',
.....................
....................
I want this csv file from text file:
id type x y z vx vy vz fx fy fz omegax omegay omegaz radius TIMESTEP
But I am getting this csv
id type x y z vx vy vz fx fy fz omegax omegay omegaz radius
Thank you

enter code hereHere is something you can try to add TIMESTEP with your data in csv. I am just wondering if you need to print the header for each file. My understanding is you can print header at the top for once. If you want to print that for each file, bring it into the for loop.
import numpy as np
import pandas as pd
import csv
import glob
import time
def main():
start = time.time()
data_folder = "./all/" #folder name
files = glob.glob(data_folder + '*dump*.data')
print("Total files:", len(files))
# get header from one of the files
header = []
with open('all/dump46000.data', 'r') as f:
#lines = f.readlines()
header.extend(f.readline().split()[1:])
timeStep = f.readline().split()
for _ in range(6):
next(f) # skip first 8 lines
header.extend(f.readline().split()[2:])
a = True
print(header)
headerString = ','.join(header)
for file in files:
with open(file, 'r') as f, open(f'all.csv', 'a') as g: # note the 'a'
next(f)
g.write(headerString+ '\n') # write the header
timeStep = f.readline().split()
for _ in range(7):
next(f)
for line in f:
file_line = line.split()
file_line.insert(0,timeStep[0])
data = ','.join(file_line)
g.write(data + '\n')
print(time.time() - start)
if __name__ == "__main__":
main()

Based on what you want, here's what should work
import numpy as np
import pandas as pd
import csv
import glob
import time
def main():
start = time.perf_counter()
data_folder = "./all/" #folder name
files = glob.glob(data_folder + '*dump*.data')
print("Total files:", len(files))
for file in files:
with open(file, 'r') as f, open(f'all.csv', 'a') as g: # note the 'a'
header = f.readline().split("ITEM: ")[1] + '\n'
headers = f.readline()
print(header)
g.write(header)
g.write(headers)
for _ in range(6):
next(f)
for line in f:
g.write(line.rstrip().replace(' ', ',') + '\n')
print(time.perf_counter() - start)
if __name__ == "__main__":
main()
Let me know if you need any other syntax or something else in the final CSV.
Also to time something always use time.perf_counter it's more accurate.

Related

Take in values from CSV to use in a function

I have written a function that takes in the normal force, mass, acceleration, and coefficient of friction and calculates the applied force. I have the values of the parameter for which I need the applied force to be calculated. How do I take in the value from CSV and calculate the applied force. I have tried many times but could not figure it out. Here's my code:
import matplotlib.pyplot as plt
import csv
import math
def forceAppliedCalc(mass, acceleration, normalForce, muVal):
forceYcomp = -(-9.8 * mass) - normalForce
forceXcomp = (mass * acceleration) + (muVal * normalForce)
return math.sqrt(math.pow(forceXcomp, 2) + math.pow(forceYcomp, 2))
file = open("Data.csv")
reader = csv.reader(file, delimiter=",")
data = dict()
headerRead = False
headers = []
for row in reader:
if headerRead == False:
for i in range(len(row)):
data[row[i]] = []
headers = row
headerRead = True
else:
for i in range(len(row)):
data[headers[i]].append(row[i])
And, here's the CSV file I am working with:
Normal,Acceleration,Mass,Mu,Name,Guess
300,0.333,40,0.525,Alf,150
300,0.333,40,0.525,Benny,160
300,0.333,40,0.525,Claire,170
250,0.2,50,0.3,Claire,250
250,0.2,50,0.3,Alf,265
250,0.2,50,0.3,Benny,255
260,0.4,55,0.32,Claire,280
260,0.4,55,0.32,Alf,284
260,0.4,55,0.32,Benny,300
280,0.3,60,0.4,Benny,340
280,0.3,60,0.4,Claire,360
280,0.3,60,0.4,Alf,330
210,0.14,90,0.6,Alf,700
210,0.14,90,0.6,Benny,800
210,0.14,90,0.6,Claire,600
140,0.167,45,0.144,Claire,300
140,0.167,45,0.144,Alf,145
140,0.167,45,0.144,Benny,167
60,1.2,130,0.178,Claire,1225
60,1.2,130,0.178,Alf,1444
60,1.2,130,0.178,Benny,1467
625,0.9,50,0.35,Benny,200
625,0.9,50,0.35,Claire,250
625,0.9,50,0.35,Alf,213
266,0.12,57,0.787,Alf,370
266,0.12,57,0.787,Benny,567
266,0.12,57,0.787,Claire,809
267,0.268,115,0.235,Benny,900
267,0.268,115,0.235,Claire,905
267,0.268,115,0.235,Alf,1020
Thanks in advance
You can try using pandas, a well-known library for data processing.
Sample code:
import math
import pandas as pd
def forceAppliedCalc(mass, acceleration, normalForce, muVal):
forceYcomp = -(-9.8 * mass) - normalForce
forceXcomp = (mass * acceleration) + (muVal * normalForce)
return math.sqrt(math.pow(forceXcomp, 2) + math.pow(forceYcomp, 2))
csv = pd.read_csv('abcd.csv')
csv['force'] = csv[['Mass', 'Acceleration', 'Normal', 'Mu']].apply(lambda x: forceAppliedCalc(*x), axis=1)
print(csv.head())
Output
Normal Acceleration Mass Mu Name Guess force
0 300 0.333 40 0.525 Alf 150 194.019258
1 300 0.333 40 0.525 Benny 160 194.019258
2 300 0.333 40 0.525 Claire 170 194.019258
3 250 0.200 50 0.300 Claire 250 254.607541
4 250 0.200 50 0.300 Alf 265 254.607541
In case you don't want to use pandas, you can achieve your goal via a complicated python zip, list and map, for example:
# Notice that data is a dictionary of (string: list of string)
force = [forceAppliedCalc(*map(float, params)) for params in zip(data['Mass'], data['Acceleration'], data['Normal'], data['Mu'])]
Output:
[194.01925780705378, 194.01925780705378, 194.01925780705378, 254.60754112948035, 254.60754112948035, 254.60754112948035, 298.1745126599522, 298.1745126599522, 298.1745126599522, 334.3112322372672, 334.3112322372672, 334.3112322372672, 686.1442705437394, 686.1442705437394, 686.1442705437394, 302.269590969717, 302.269590969717, 302.269590969717, 1225.3890086009421, 1225.3890086009421, 1225.3890086009421, 296.29219108845916, 296.29219108845916, 296.29219108845916, 363.79859417540365, 363.79859417540365, 363.79859417540365, 865.0747997861225, 865.0747997861225, 865.0747997861225]
First, welcome to SOF!
I think a little approach about you are asking can be the following script (attempting to be simplest and more similar to your original code):
import csv
import math
def force_applied_calc(mass, acceleration, normal_force, mu_val):
force_y_comp = -(-9.8 * mass) - normal_force
force_x_comp = (mass * acceleration) + (mu_val * normal_force)
return math.sqrt(math.pow(force_x_comp, 2) + math.pow(force_y_comp, 2))
if __name__ == '__main__':
data = []
headers = []
save_data = False
with open('Data.csv', 'r') as read_obj:
csv_dict_reader = csv.DictReader(read_obj)
headers = csv_dict_reader.fieldnames
for csv_dict in csv_dict_reader:
csv_dict.update(
{
"Force": force_applied_calc(
int(csv_dict['Mass']),
float(csv_dict['Acceleration']),
int(csv_dict['Normal']),
float(csv_dict['Mu'])
)
}
)
data.append(csv_dict)
print(csv_dict)
# Overwrite file with new data.
if save_data and 'Force' not in headers:
headers.append('Force')
with open('Data.csv', 'w', newline='') as write_obj:
csv_dict_writer = csv.DictWriter(write_obj, delimiter=',', fieldnames=headers)
csv_dict_writer.writeheader()
csv_dict_writer.writerows(data)
Note: #tandat it's a really good answer.
Something like this would help.
import csv
final_file = open('output.csv', 'a')
writer = csv.writer(final_file)
with open('file.csv', 'r') as file:
header = next(file).split(",") # exclude header
header.append("appliedForce")
writer.writerow(header) # add header to new outputfile
reader = csv.reader(file, delimiter=',')
for row in reader:
appliedForce = forceAppliedCalc(row[2], row[1], row[0], row[3])
row.append(appliedForce)
writer.writerow(row)

How can i align replaced text file in python?

I changed part of column data, but when a i replaced that, the new file text, the columns in not align.
i send to you the files (input, output files), also you can see the following image.
Thank you for you answer!
Note: I read two columns in datar.csv
You can see the files: https://www.shorturl.at/luFX7
import pandas as pd
df = pd.read_csv('datar.csv')
j=0
#input file
fin = open("input.txt", "rt")
#output file to write the result to
fout = open("output.txt", "wt")
#for each line in the input file
for line in fin:
ori= df["data_o"][j]
ori=str(ori)
ree= df["data_r"][j]
ree=str(ree)
fout.write(line.replace(ori,ree))
j=j+1
fin.close()
fout.close()
I want it to be like this picture
Maybe add an default width:
# e.g: 6 characters lenght
# [...]
line = line.replace(ori, ree) # get line after convert
line_original_width = len(line) # get lenght of line
if line_original_width < 6: # check if fill is necessary
fill = 6 - line_original_width # get amount of necessary fill
line = (" " * fill) + line # fill the left with space
fout.write(line) # write all in the file
# [...]
Else, you can use the f-string to do the same things. But I can't show you an example because I'm not very good at.
Ok,
There's a more complicated way for your code, but I'm sure it will work.
This program will make a default width for each column.
import pandas as pd
df = pd.read_csv('datar.csv')
j=0
with open("input.txt", "rt") as fino:
fin = fino.read() # !!!
fin = fin.split("\n") # !!!
fout = open("output.txt", "wt")
out = [[]] * len(fin) # !!!
# --- decode part ---
iter_count = 0 # !!!
for line in fin:
ori= df["data_o"][j]
ori=str(ori)
ree= df["data_r"][j]
ree=str(ree)
line = line.replace(ori, ree)
line = line.split(" ")
while "" in line: line.remove("")
out[iter_count].append(line)
iter_count += 1
# --- padding part ---
column_count = len(out[0])
row_count = len(out)
for column in range(column_count):
maxl = []
for row in range(row_count):
maxl.append(len(out[row][column]))
maxl = max(maxl) + 1
for row in range(row_count):
pad = " " * (maxl - len(out[row][column]))
out[row][column] = pad + out[row][column]
# --- writting part ---
for row in out:
for num in row:
fout.write(num)
fout.write("\n")
Watch-out, I modified the top part of your original code.
Mention me if you publish.

How to calculate moving average for temperature?

This is the output I need:
Temperature anomaly filename:SacramentoTemps.csv
Enter window size:60
1940,-0.2331
1941,-0.2169
1942,-0.2150
1943,-0.2228
1944,-0.2107
1945,-0.1796
1946,-0.1667
1947,-0.1582
1948,-0.1585
1949,-0.1492
1950,-0.1711
1951,-0.1688
1952,-0.1490
1953,-0.1556
1954,-0.1548
1955,-0.1580
1956,-0.1420
1957,-0.1101
1958,-0.1017
This is my code:
filename = input("Temperature anomaly filename:")
infile = open(filename, "r")
k = int(input("Enter window size:"))
infile.readline()
temp_list = []
for line in infile:
line = line.strip()
year,temp = line.split(",")
temp = float(temp)
temp_list.append(temp)
index = k
for index in range(index,len(temp_list)-1-index):
year = 1880 + index
ave = sum(temp_list[index:index+k]) / (2*index+1)
print(str(year)+","+"{:.4f}".format(ave))
infile.close()
My code currently prints out up until the year 1957 and it prints out the wrong averages for each year. What do I need to fix?
filename = "SacramentoTemps.csv"
infile = open(filename, "r")
k = int(input("Enter window size:"))
temp_list = []
for line in infile:
line = line.strip()
year, temp = line.split(",")
temp = float(temp)
temp_list.append(temp)
infile.close()
moving_average = []
for i, temp in enumerate(temp_list):
average = temp
if len(temp_list) - i < k:
break
for j in range(k):
average += temp_list[i+j]
moving_average.append(average/k)
print(str(year) + "," + "{:.4f}".format(average))
I coded in the direction of modifying your code as little as possible.
One thing to note is your file need to be longer than window size.
Using pandas would be most sane way to go:
import pandas as pd
filename = "SacramentoTemps.csv"
window = 2
data = pd.read_csv(filename)
data.temperature.rolling(window = window).mean().fillna(data.temperature)

pandas data-frame continuously update

please see the pandas based Patten scanner, here i am using csv as data source and loading the same in to data.
since data is loading from csv file, i have to reload/rerun the script every 5 min to read the updated csv file hence repeating the plot every 5min.
is there any way to use df.update to avoid reloading of the script and prevent the reloading of plot again and again.
import pandas as pd
import numpy as np
from scipy.signal import argrelextrema
import matplotlib.pyplot as plt
from harmonic_functions import *
import uuid
from csv import DictReader
data = pd.read_csv('temp.csv')
data.time = pd.to_datetime(data.time,format='%d.%m.%Y %H:%M:%S.%f')
data.index = data['time']
# data = data.drop_duplicates(keep=False)
price = data.close.copy()
err_allowed = 10.0/100
pnl = []
trade_dates=[]
correct_pats=0
pats=0
# plt.ion()
for i in range (100,len(price)):
current_idx,current_pat,start,end = peak_detect(price.values[:i],order=7)
XA = current_pat[1] - current_pat[0]
AB = current_pat[2] - current_pat[1]
BC = current_pat[3] - current_pat[2]
CD = current_pat[4] - current_pat[3]
moves = [XA,AB,BC,CD]
gart = is_gartley(moves,err_allowed)
butt = is_butterfly(moves,err_allowed)
bat = is_bat(moves,err_allowed)
crab = is_crab(moves,err_allowed)
shark = is_shark(moves,err_allowed)
trio = is_trio(moves,err_allowed)
cyph = is_cyph(moves,err_allowed)
three_dives = is_3dives(moves, err_allowed)
fivezero = is_50(moves, err_allowed)
altbat = is_altbat(moves, err_allowed)
deepcrab = is_deepcrab(moves, err_allowed)
dragon = is_dragon(moves, err_allowed)
snorm = is_snorm(moves, err_allowed)
harmonics = np.array([gart,butt,bat,crab,shark,trio,cyph,three_dives,fivezero,altbat,deepcrab,dragon,snorm])
labels = ['Garterly','Butterfly','Bat','Crab','Shark','Trio','Cypher','3Dives','5Zero','AltBat','DeepCrab','Dragon','Snorm']
if np.any(harmonics == 1) or np.any(harmonics == -1):
for j in range (0,len(harmonics)):
if harmonics[j] == 1 or harmonics[j]==-1:
pats+=1
sense = 'Bearish ' if harmonics[j]==-1 else 'Bullish '
label = sense + labels[j] + ' found'
print(label)
print(price.values[start])
plt.title(label)
plt.plot(np.arange(start,i+5),price.values[start:i+5])
plt.scatter(current_idx,current_pat,c='r')
filename = str(uuid.uuid1())[:8]
print(current_pat)
print(current_idx)
# with open('temp.csv', mode='r') as csv_file:
# file = DictReader(csv_file, delimiter=',')
# close = str(current_pat[4])
# print(current_pat)
# rows = [row for row in file if row['close'] in close]
# closetime = rows[-1]['ID']
# print(closetime)
write1 = str(current_idx)
write2 = str(current_pat)
write = write1 + ',' + write2
print(write)
with open("datadb", "r+") as file:
for line in file:
if write in line:
break
else: # not found, we are at the eof
file.write(f"{write}\n") # append missing data
print(filename)
plt.savefig(filename)
plt.close(filename)
# plt.show()
plt.clf()

Rewind the file pointer to the beginning of the previous line

I am doing text processing and using 'readline()' function as follows:
ifd = open(...)
for line in ifd:
while (condition)
do something...
line = ifd.readline()
condition = ....
#Here when the condition becomes false I need to rewind the pointer so that the 'for' loop read the same line again.
ifd.fseek() followed by readline is giving me a '\n' character. How to rewind the pointer so that the whole line is read again.
>>> ifd.seek(-1,1)
>>> line = ifd.readline()
>>> line
'\n'
Here is my code
labtestnames = sorted(tmp)
#Now read each line in the inFile and write into outFile
ifd = open(inFile, "r")
ofd = open(outFile, "w")
#read the header
header = ifd.readline() #Do nothing with this line. Skip
#Write header into the output file
nl = "mrn\tspecimen_id\tlab_number\tlogin_dt\tfluid"
offset = len(nl.split("\t"))
nl = nl + "\t" + "\t".join(labtestnames)
ofd.write(nl+"\n")
lenFields = len(nl.split("\t"))
print "Reading the input file and converting into modified file for further processing (correlation analysis etc..)"
prevTup = (0,0,0)
rowComplete = 0
k=0
for line in ifd:
k=k+1
if (k==200): break
items = line.rstrip("\n").split("\t")
if((items[0] =='')):
continue
newline= list('' for i in range(lenFields))
newline[0],newline[1],newline[3],newline[2],newline[4] = items[0], items[1], items[3], items[2], items[4]
ltests = []
ltvals = []
while(cmp(prevTup, (items[0], items[1], items[3])) == 0): # If the same mrn, lab_number and specimen_id then fill the same row. else create a new row.
ltests.append(items[6])
ltvals.append(items[7])
pos = ifd.tell()
line = ifd.readline()
prevTup = (items[0], items[1], items[3])
items = line.rstrip("\n").split("\t")
rowComplete = 1
if (rowComplete == 1): #If the row is completed, prepare newline and write into outfile
indices = [labtestnames.index(x) for x in ltests]
j=0
ifd.seek(pos)
for i in indices:
newline[i+offset] = ltvals[j]
j=j+1
if (rowComplete == 0): #
currTup = (items[0], items[1], items[3])
ltests = items[6]
ltvals = items[7]
pos = ifd.tell()
line = ifd.readline()
items = line.rstrip("\n").split("\t")
newTup = (items[0], items[1], items[3])
if(cmp(currTup, newTup) == 0):
prevTup = currTup
ifd.seek(pos)
continue
else:
indices = labtestnames.index(ltests)
newline[indices+offset] = ltvals
ofd.write(newline+"\n")
The problem can be handled more simply using itertools.groupby. groupby can cluster all the contiguous lines that deal with the same mrn, specimen_id, and lab_num.
The code that does this is
for key, group in IT.groupby(reader, key = mykey):
where reader iterates over the lines of the input file, and mykey is defined by
def mykey(row):
return (row['mrn'], row['specimen_id'], row['lab_num'])
Each row from reader is passed to mykey, and all rows with the same key are clustered together in the same group.
While we're at it, we might as well use the csv module to read each line into a dict (which I call row). This frees us from having to deal with low-level string manipulation like line.rstrip("\n").split("\t") and instead of referring to columns by index numbers (e.g. row[3]) we can write code that speaks in higher-level terms such as row['lab_num'].
import itertools as IT
import csv
inFile = 'curious.dat'
outFile = 'curious.out'
def mykey(row):
return (row['mrn'], row['specimen_id'], row['lab_num'])
fieldnames = 'mrn specimen_id date lab_num Bilirubin Lipase Calcium Magnesium Phosphate'.split()
with open(inFile, 'rb') as ifd:
reader = csv.DictReader(ifd, delimiter = '\t')
with open(outFile, 'wb') as ofd:
writer = csv.DictWriter(
ofd, fieldnames, delimiter = '\t', lineterminator = '\n', )
writer.writeheader()
for key, group in IT.groupby(reader, key = mykey):
new = {}
row = next(group)
for key in ('mrn', 'specimen_id', 'date', 'lab_num'):
new[key] = row[key]
new[row['labtest']] = row['result_val']
for row in group:
new[row['labtest']] = row['result_val']
writer.writerow(new)
yields
mrn specimen_id date lab_num Bilirubin Lipase Calcium Magnesium Phosphate
4419529 1614487 26.2675 5802791G 0.1
3319529 1614487 26.2675 5802791G 0.3 153 8.1 2.1 4
5713871 682571 56.0779 9732266E 4.1
This seems to be a perfect use case for yield expressions. Consider the following example that prints lines from a file, repeating some of them at random:
def buflines(fp):
r = None
while True:
r = yield r or next(fp)
if r:
yield None
from random import randint
with open('filename') as fp:
buf = buflines(fp)
for line in buf:
print line
if randint(1, 100) > 80:
print 'ONCE AGAIN::'
buf.send(line)
Basically, if you want to process an item once again, you send it back to the generator. On the next iteration you will be reading the same item once again.

Categories