Take in values from CSV to use in a function - python
I have written a function that takes in the normal force, mass, acceleration, and coefficient of friction and calculates the applied force. I have the values of the parameter for which I need the applied force to be calculated. How do I take in the value from CSV and calculate the applied force. I have tried many times but could not figure it out. Here's my code:
import matplotlib.pyplot as plt
import csv
import math
def forceAppliedCalc(mass, acceleration, normalForce, muVal):
forceYcomp = -(-9.8 * mass) - normalForce
forceXcomp = (mass * acceleration) + (muVal * normalForce)
return math.sqrt(math.pow(forceXcomp, 2) + math.pow(forceYcomp, 2))
file = open("Data.csv")
reader = csv.reader(file, delimiter=",")
data = dict()
headerRead = False
headers = []
for row in reader:
if headerRead == False:
for i in range(len(row)):
data[row[i]] = []
headers = row
headerRead = True
else:
for i in range(len(row)):
data[headers[i]].append(row[i])
And, here's the CSV file I am working with:
Normal,Acceleration,Mass,Mu,Name,Guess
300,0.333,40,0.525,Alf,150
300,0.333,40,0.525,Benny,160
300,0.333,40,0.525,Claire,170
250,0.2,50,0.3,Claire,250
250,0.2,50,0.3,Alf,265
250,0.2,50,0.3,Benny,255
260,0.4,55,0.32,Claire,280
260,0.4,55,0.32,Alf,284
260,0.4,55,0.32,Benny,300
280,0.3,60,0.4,Benny,340
280,0.3,60,0.4,Claire,360
280,0.3,60,0.4,Alf,330
210,0.14,90,0.6,Alf,700
210,0.14,90,0.6,Benny,800
210,0.14,90,0.6,Claire,600
140,0.167,45,0.144,Claire,300
140,0.167,45,0.144,Alf,145
140,0.167,45,0.144,Benny,167
60,1.2,130,0.178,Claire,1225
60,1.2,130,0.178,Alf,1444
60,1.2,130,0.178,Benny,1467
625,0.9,50,0.35,Benny,200
625,0.9,50,0.35,Claire,250
625,0.9,50,0.35,Alf,213
266,0.12,57,0.787,Alf,370
266,0.12,57,0.787,Benny,567
266,0.12,57,0.787,Claire,809
267,0.268,115,0.235,Benny,900
267,0.268,115,0.235,Claire,905
267,0.268,115,0.235,Alf,1020
Thanks in advance
You can try using pandas, a well-known library for data processing.
Sample code:
import math
import pandas as pd
def forceAppliedCalc(mass, acceleration, normalForce, muVal):
forceYcomp = -(-9.8 * mass) - normalForce
forceXcomp = (mass * acceleration) + (muVal * normalForce)
return math.sqrt(math.pow(forceXcomp, 2) + math.pow(forceYcomp, 2))
csv = pd.read_csv('abcd.csv')
csv['force'] = csv[['Mass', 'Acceleration', 'Normal', 'Mu']].apply(lambda x: forceAppliedCalc(*x), axis=1)
print(csv.head())
Output
Normal Acceleration Mass Mu Name Guess force
0 300 0.333 40 0.525 Alf 150 194.019258
1 300 0.333 40 0.525 Benny 160 194.019258
2 300 0.333 40 0.525 Claire 170 194.019258
3 250 0.200 50 0.300 Claire 250 254.607541
4 250 0.200 50 0.300 Alf 265 254.607541
In case you don't want to use pandas, you can achieve your goal via a complicated python zip, list and map, for example:
# Notice that data is a dictionary of (string: list of string)
force = [forceAppliedCalc(*map(float, params)) for params in zip(data['Mass'], data['Acceleration'], data['Normal'], data['Mu'])]
Output:
[194.01925780705378, 194.01925780705378, 194.01925780705378, 254.60754112948035, 254.60754112948035, 254.60754112948035, 298.1745126599522, 298.1745126599522, 298.1745126599522, 334.3112322372672, 334.3112322372672, 334.3112322372672, 686.1442705437394, 686.1442705437394, 686.1442705437394, 302.269590969717, 302.269590969717, 302.269590969717, 1225.3890086009421, 1225.3890086009421, 1225.3890086009421, 296.29219108845916, 296.29219108845916, 296.29219108845916, 363.79859417540365, 363.79859417540365, 363.79859417540365, 865.0747997861225, 865.0747997861225, 865.0747997861225]
First, welcome to SOF!
I think a little approach about you are asking can be the following script (attempting to be simplest and more similar to your original code):
import csv
import math
def force_applied_calc(mass, acceleration, normal_force, mu_val):
force_y_comp = -(-9.8 * mass) - normal_force
force_x_comp = (mass * acceleration) + (mu_val * normal_force)
return math.sqrt(math.pow(force_x_comp, 2) + math.pow(force_y_comp, 2))
if __name__ == '__main__':
data = []
headers = []
save_data = False
with open('Data.csv', 'r') as read_obj:
csv_dict_reader = csv.DictReader(read_obj)
headers = csv_dict_reader.fieldnames
for csv_dict in csv_dict_reader:
csv_dict.update(
{
"Force": force_applied_calc(
int(csv_dict['Mass']),
float(csv_dict['Acceleration']),
int(csv_dict['Normal']),
float(csv_dict['Mu'])
)
}
)
data.append(csv_dict)
print(csv_dict)
# Overwrite file with new data.
if save_data and 'Force' not in headers:
headers.append('Force')
with open('Data.csv', 'w', newline='') as write_obj:
csv_dict_writer = csv.DictWriter(write_obj, delimiter=',', fieldnames=headers)
csv_dict_writer.writeheader()
csv_dict_writer.writerows(data)
Note: #tandat it's a really good answer.
Something like this would help.
import csv
final_file = open('output.csv', 'a')
writer = csv.writer(final_file)
with open('file.csv', 'r') as file:
header = next(file).split(",") # exclude header
header.append("appliedForce")
writer.writerow(header) # add header to new outputfile
reader = csv.reader(file, delimiter=',')
for row in reader:
appliedForce = forceAppliedCalc(row[2], row[1], row[0], row[3])
row.append(appliedForce)
writer.writerow(row)
Related
Why a "While True" loop could stop out of nowhere?
I´m sensing the vibrations of a machine live with Arduino. For that I use an accelerometer, an Arduino Uno board, and a Python script that I wrote. The code is intented to read the data of the sensor from the serial port (g), calculate the root mean square of the secuence (RMS) and save the data in an csv file. The problem I have is that my sript stops showing and saving the data out of nowhere, do you see any mistakes in the code? I couldn't identify any relationship between the different incidents, because each one happened at different timing. Here it is: import csv from math import sqrt import serial from itertools import count import os ruta = 'C:/Users/jabde/OneDrive/Documentos/Juan/PhD/Ensayos/Acelerómetro/archivo.csv' nombre_archivo = input("Ingrese el nombre del archivo: ") ruta_completa = os.path.join(os.path.dirname(ruta), nombre_archivo + '.csv') os.chdir(os.path.dirname(ruta)) arduinoData=serial.Serial('com3',115200) fieldnames = ["t", "g", "RMS"] i = 0 t = 0 g = 0.15 RMS = 0.1425 suma_cuadrados = 0 with open(ruta_completa, 'w', newline= '') as csv_file: csv_writer = csv.DictWriter(csv_file, fieldnames=fieldnames) csv_writer.writeheader() while True: with open(ruta_completa, 'a', newline= '') as csv_file: csv_writer = csv.DictWriter(csv_file, fieldnames=fieldnames) try: g = arduinoData.readline() g = float(g) / 800 t = t + 0.2 t = round(t,1) i = i + 1 cuadrados = g * g suma_cuadrados = suma_cuadrados + cuadrados RMS = suma_cuadrados / i RMS = sqrt(RMS) info = { "g": g, "t": t, "RMS": RMS } with open(ruta_completa, 'a', newline= '') as csv_file: csv_writer = csv.DictWriter(csv_file, fieldnames=fieldnames) csv_writer.writerow(info) print(t, g, RMS) time.sleep(0.2) except: pass Thanks in advance! pd: this is my first post, I'm a newby :) I thought it could be a space problem in my laptop so I changed the rute where it was saving the CSV file but nothing happened.
I would start with something like this and see where it got me. Note that some of your code is commented out and/or mocked for testing on my part. The idea is to handle exceptions via an outer loop while doing expected work in the inner loop. import csv import math #import serial #import os import time import random #ruta = 'C:/Users/jabde/OneDrive/Documentos/Juan/PhD/Ensayos/Acelerómetro/archivo.csv' #nombre_archivo = input("Ingrese el nombre del archivo: ") #ruta_completa = os.path.join(os.path.dirname(ruta), nombre_archivo + '.csv') ruta_completa = "out.csv" #os.chdir(os.path.dirname(ruta)) #arduinoData = serial.Serial('com3',115200) class arduinoData: readline = lambda : 100_000 * random.random() start_time = int(time.time()) suma_cuadrados = 0 i = 1 with open(ruta_completa, 'w', newline= '') as csv_file: csv_writer = csv.writer(csv_file) csv_writer.writerow(["t", "g", "RMS"]) while True: try: with open(ruta_completa, 'a', newline= '') as csv_file: csv_writer = csv.writer(csv_file) while True: if random.randint(0, 10) == 10: raise Exception("Test Exception") t = round(time.time() - start_time, 1) g = float(arduinoData.readline()) / 800 suma_cuadrados += (g * g) RMS = math.sqrt(suma_cuadrados / i) row = [g, t, RMS] csv_writer.writerow([g, t, RMS]) print(row) i += 1 time.sleep(0.2) except Exception as e: print(f"Error: {e}") print("\tTrying again in 5 seconds...") time.sleep(5)
Comparing Thousands of datetimes in a reasonable amount of time?
Long story short, I'm trying to calibrate a thermometer. I have a CSV from my reference thermometer with 16k records (CSV-A), and a CSV from the thermometer to be calibrated with about 52k records (CSV-B). I need to compare the records from CSV-B to the CSV-A to get the times where CSV-B is closest to CSV-A and store the time and value in a different array. I believe that I have gotten the basic logic of checking the datetimes in place, but the problem seems to be the fact that I have to iterate through an array of 52,000 items 16,000 times. I've tried implementing both multiprocessing and multithreading, but the script has yet to finish running. import numpy as np, csv, multiprocessing as mp from datetime import datetime as d from multiprocessing import Process, Pool d_rt_t = [] d_rh_t = [] d_dt = [] d_dh = [] d_rt = [] d_rh = [] nts = d.now().timestamp() with open(f"calib_temp-{nts}.csv", 'w') as ctw: pass with open(f"calib_humid-{nts}.csv", 'w') as chw: pass def find_nearest(array, value): nearest = min(array, key=lambda x: abs(d.strptime(x[1], '%m/%d/%Y %H:%M:%S:%f') - d.strptime(value, '%Y-%m-%d %H:%M:%S'))) return nearest def comp_d_rt_t(): for row in d_rt: pool = Pool() d_rt_t.append([pool.map(find_nearest, d_dt, row[1]),row[1]]) def comp_d_rh_t(): for row in d_rh: d_rh_t.append([pool.map(find_nearest, d_dt, row[1]),row[1]]) #str2date = lambda x: d.strptime(x.decode("utf-8"), '%m/%d/%Y %H:%M:%S:%f') #str2date2 = lambda x: d.strptime(x.decode("utf-8"), '%Y-%m-%d %H:%M:%S') with open("dht-temp.csv", 'r', newline='') as ddt: fr_dt = csv.reader(ddt, delimiter=',') for row in fr_dt: d_dt.append([row[0],row[1]]) ddt.close with open("dht-humid.csv", 'r', newline='') as ddh: fr_dh = csv.reader(ddh, delimiter=',') for row in fr_dh: d_dh.append([row[0],row[1]]) ddh.close with open("ref-temp.csv", 'r', newline='') as drt: fr_rt = csv.reader(drt, delimiter=',') for row in fr_rt: d_rt.append([row[0],row[1]]) drt.close with open("ref-humid.csv", 'r', newline='') as drh: fr_rh = csv.reader(drh, delimiter=',') for row in fr_rh: d_rh.append([row[0],row[1]]) drh.close p1 = Process(target=comp_d_rt_t, args=(d_dt,row[1])) p2 = Process(target=comp_d_rh_t, args=(d_dh,row[1])) p1.start() p2.start() p1.join() p2.join() print(d_rt_t) with open(f"calib_temp-{nts}.csv", 'a', newline='') as ct: c = csv.writer(ct, delimiter = ',') for row in d_rt_t: dt = np.where(d_dt == row[1]) rt = np.where(d_rt == row[1]) print(rt) c.writerow([dt[0], rt[0]]) with open(f"calib_humid-{nts}.csv", 'a', newline='') as ch: c = csv.writer(ch, delimiter = ',') for row in d_rh_t: dh = np.where(d_dh == row[1]) print(dh) rh = np.where(d_rh == row[1]) print(rh) c.writerow([dh[0], rh[0]]) I moved the for loops around a bit, but before they just called numpy append which called the find_nearest method.
Add one more value to csv from text file in python
I am converting multiple text files to a csv file. My text file looks like this: ITEM: TIMESTEP 55000 ITEM: NUMBER OF ATOMS 4365 ITEM: BOX BOUNDS ff ff ff -0.3 0.3 -0.6 0.6 -0.6 0.6 ITEM: ATOMS id type x y z vx vy vz fx fy fz omegax omegay omegaz radius 4356 1 -0.0885288 -0.0101421 -0.48871 -0.000941682 0.778688 -0.0153902 -0.00720861 -0.0533703 0.0104717 0.35581 -0.0601358 -0.436049 0.01 4227 1 0.0157977 0.00542603 -0.488429 -0.00996111 0.784119 0.00813807 -0.000491847 0.0144889 -0.0120111 1.08208 -0.0671177 0.369492 0.01 3973 1 0.0179724 0.0256167 -0.48799 -0.00582994 0.772455 0.0394544 0.0109589 -0.0187232 -0.00111718 -0.0586513 -0.162943 1.12784 0.01 4300 1 0.0900919 0.0248592 -0.488025 -0.000455483 0.769978 0.0388239 -0.00364509 0.0409803 -0.00269227 3.94355 -0.0249566 -0.223111 0.01 4200 1 -0.0230223 0.0329911 -0.483108 -0.00238 0.778547 0.0500186 0.0421189 -0.021588 0.05607 0.112989 -0.0813771 -1.09981 0.015 4339 1 0.00143577 0.0368542 -0.488107 0.000587848 0.784672 0.0593572 0.00385562 -0.00475113 -0.00710483 -0.201196 0.158512 -5.63826 0.01 4106 1 0.0648392 0.0269728 -0.483248 -0.00365836 0.766081 0.0395827 0.0418642 0.1802 0.0547313 -0.0578358 0.124205 -0.96464 0.015 4104 1 -0.084453 0.0507114 -0.482726 -0.000596577 0.75636 0.0806599 0.000817826 0.0119286 -0.0150014 -0.0864852 -0.103877 0.198773 0.015 Right now my csv file contains value after line 9 (in python code line 8). I want to include line 2 (Header - TIMESTEP) also in csv along with all the values after 9. I tried to edit my code but couldn't succeed. Can I get some help: My code is here: import numpy as np import pandas as pd import csv import glob import time def main(): start = time.time() data_folder = "./all/" #folder name files = glob.glob(data_folder + '*dump*.data') print("Total files:", len(files)) # get header from one of the files #header = [] with open('all/dump46000.data', 'r') as f: #lines = f.readlines() for _ in range(8): next(f) # skip first 8 lines header = ','.join(f.readline().split()[2:]) + '\n' headers = ','.join(f.readline().split()[2:]) #header.append(headers) #header.append('timestep') print(header) for file in files: with open(file, 'r') as f, open(f'all.csv', 'a') as g: # note the 'a' g.write(header) # write the header for _ in range(9): next(f) # skip first 9 lines for line in f: g.write(line.rstrip().replace(' ', ',') + '\n') print(time.time() - start) if __name__ == "__main__": main() My folder all contains more than 600 files: ['./all/dump501000.data', './all/dump307000.data', './all/dump612000.data', './all/dump369000.data', './all/dump23000.data', './all/dump470000.data', './all/dump235000.data', './all/dump6000.data', './all/dump568000.data', './all/dump506000.data', './all/dump623000.data', './all/dump329000.data', './all/dump220000.data', ..................... .................... I want this csv file from text file: id type x y z vx vy vz fx fy fz omegax omegay omegaz radius TIMESTEP But I am getting this csv id type x y z vx vy vz fx fy fz omegax omegay omegaz radius Thank you
enter code hereHere is something you can try to add TIMESTEP with your data in csv. I am just wondering if you need to print the header for each file. My understanding is you can print header at the top for once. If you want to print that for each file, bring it into the for loop. import numpy as np import pandas as pd import csv import glob import time def main(): start = time.time() data_folder = "./all/" #folder name files = glob.glob(data_folder + '*dump*.data') print("Total files:", len(files)) # get header from one of the files header = [] with open('all/dump46000.data', 'r') as f: #lines = f.readlines() header.extend(f.readline().split()[1:]) timeStep = f.readline().split() for _ in range(6): next(f) # skip first 8 lines header.extend(f.readline().split()[2:]) a = True print(header) headerString = ','.join(header) for file in files: with open(file, 'r') as f, open(f'all.csv', 'a') as g: # note the 'a' next(f) g.write(headerString+ '\n') # write the header timeStep = f.readline().split() for _ in range(7): next(f) for line in f: file_line = line.split() file_line.insert(0,timeStep[0]) data = ','.join(file_line) g.write(data + '\n') print(time.time() - start) if __name__ == "__main__": main()
Based on what you want, here's what should work import numpy as np import pandas as pd import csv import glob import time def main(): start = time.perf_counter() data_folder = "./all/" #folder name files = glob.glob(data_folder + '*dump*.data') print("Total files:", len(files)) for file in files: with open(file, 'r') as f, open(f'all.csv', 'a') as g: # note the 'a' header = f.readline().split("ITEM: ")[1] + '\n' headers = f.readline() print(header) g.write(header) g.write(headers) for _ in range(6): next(f) for line in f: g.write(line.rstrip().replace(' ', ',') + '\n') print(time.perf_counter() - start) if __name__ == "__main__": main() Let me know if you need any other syntax or something else in the final CSV. Also to time something always use time.perf_counter it's more accurate.
pandas data-frame continuously update
please see the pandas based Patten scanner, here i am using csv as data source and loading the same in to data. since data is loading from csv file, i have to reload/rerun the script every 5 min to read the updated csv file hence repeating the plot every 5min. is there any way to use df.update to avoid reloading of the script and prevent the reloading of plot again and again. import pandas as pd import numpy as np from scipy.signal import argrelextrema import matplotlib.pyplot as plt from harmonic_functions import * import uuid from csv import DictReader data = pd.read_csv('temp.csv') data.time = pd.to_datetime(data.time,format='%d.%m.%Y %H:%M:%S.%f') data.index = data['time'] # data = data.drop_duplicates(keep=False) price = data.close.copy() err_allowed = 10.0/100 pnl = [] trade_dates=[] correct_pats=0 pats=0 # plt.ion() for i in range (100,len(price)): current_idx,current_pat,start,end = peak_detect(price.values[:i],order=7) XA = current_pat[1] - current_pat[0] AB = current_pat[2] - current_pat[1] BC = current_pat[3] - current_pat[2] CD = current_pat[4] - current_pat[3] moves = [XA,AB,BC,CD] gart = is_gartley(moves,err_allowed) butt = is_butterfly(moves,err_allowed) bat = is_bat(moves,err_allowed) crab = is_crab(moves,err_allowed) shark = is_shark(moves,err_allowed) trio = is_trio(moves,err_allowed) cyph = is_cyph(moves,err_allowed) three_dives = is_3dives(moves, err_allowed) fivezero = is_50(moves, err_allowed) altbat = is_altbat(moves, err_allowed) deepcrab = is_deepcrab(moves, err_allowed) dragon = is_dragon(moves, err_allowed) snorm = is_snorm(moves, err_allowed) harmonics = np.array([gart,butt,bat,crab,shark,trio,cyph,three_dives,fivezero,altbat,deepcrab,dragon,snorm]) labels = ['Garterly','Butterfly','Bat','Crab','Shark','Trio','Cypher','3Dives','5Zero','AltBat','DeepCrab','Dragon','Snorm'] if np.any(harmonics == 1) or np.any(harmonics == -1): for j in range (0,len(harmonics)): if harmonics[j] == 1 or harmonics[j]==-1: pats+=1 sense = 'Bearish ' if harmonics[j]==-1 else 'Bullish ' label = sense + labels[j] + ' found' print(label) print(price.values[start]) plt.title(label) plt.plot(np.arange(start,i+5),price.values[start:i+5]) plt.scatter(current_idx,current_pat,c='r') filename = str(uuid.uuid1())[:8] print(current_pat) print(current_idx) # with open('temp.csv', mode='r') as csv_file: # file = DictReader(csv_file, delimiter=',') # close = str(current_pat[4]) # print(current_pat) # rows = [row for row in file if row['close'] in close] # closetime = rows[-1]['ID'] # print(closetime) write1 = str(current_idx) write2 = str(current_pat) write = write1 + ',' + write2 print(write) with open("datadb", "r+") as file: for line in file: if write in line: break else: # not found, we are at the eof file.write(f"{write}\n") # append missing data print(filename) plt.savefig(filename) plt.close(filename) # plt.show() plt.clf()
Python Code Speed Up
My code should compare two vectors saved as dictionary (two pickle files) and save the result into a pickle file too. This works but very slowly. For one compare result I'm waiting about 7:2o min. Because I have a lot of videos (exactly 2033) this prog will run about 10 days. This is too long. How can I speed up my code for Python 2.7? import math import csv import pickle from itertools import izip global_ddc_file = 'E:/global_ddc.p' io = 'E:/AV-Datensatz' v_source = '' def dot_product(v1, v2): return sum(map(lambda x: x[0] * x[1], izip(v1, v2))) # izip('ABCD', 'xy') --> Ax By def cosine_measure(v1, v2): prod = dot_product(v1, v2) len1 = math.sqrt(dot_product(v1, v1)) len2 = math.sqrt(dot_product(v2, v2)) if (len1 * len2) <> 0: out = prod / (len1 * len2) else: out = 0 return out def findSource(v): v_id = "/"+v[0].lstrip("<http://av.tib.eu/resource/video").rstrip(">") v_source = io + v_id v_file = v_source + '/vector.p' source = [v_id, v_source, v_file] return source def getVector(v, vectorCol): with open (v, 'rb') as f: try: vector_v = pickle.load(f) except: print 'file couldnt be loaded' tf_idf = [] tf_idf = [vec[1][vectorCol] for vec in vector_v] return tf_idf def compareVectors(v1, v2, vectorCol): v1_source = findSource(v1) v2_source = findSource(v2) V1 = getVector(v1_source[2], vectorCol) V2 = getVector(v2_source[2], vectorCol) sim = [v1_source[0], v2_source[0], cosine_measure(V1, V2)] return sim #with open('videos_av_portal_cc_3.0_nur2bspStanford.csv', 'rb') as dataIn: with open('videos_av_portal_cc_3.0_vollstaendig.csv', 'rb') as dataIn: #with open('videos_av_portal_cc_3.0.csv', 'rb') as dataIn: try: reader = csv.reader(dataIn) v_source = [] for row in reader: v_source.append(findSource(row)) #print v_source for one in v_source: print one[1] compVec = [] for another in v_source: if one <> another: compVec.append(compareVectors(one, another, 3)) compVec_sort = sorted(compVec, key=lambda cosim: cosim[2], reverse = True) # save vector file for each video with open (one[1] + '/compare.p','wb') as f: pickle.dump(compVec_sort,f) finally: dataIn.close()
Split code in 2 parts: 1. Load Dictionary in vectors 2. Compare 2 dictionaries using multiprocessmultiprocess example 3. Launch process simultaneously according to memory availability and end the process after 8 mins. Then update the 3rd dictionary. 4. Then relaunch process on next set of data , follow step 3 and continue till the dictionary length. This should reduce total turnaround time. Let me know if you need code .