Long story short, I'm trying to calibrate a thermometer. I have a CSV from my reference thermometer with 16k records (CSV-A), and a CSV from the thermometer to be calibrated with about 52k records (CSV-B). I need to compare the records from CSV-B to the CSV-A to get the times where CSV-B is closest to CSV-A and store the time and value in a different array.
I believe that I have gotten the basic logic of checking the datetimes in place, but the problem seems to be the fact that I have to iterate through an array of 52,000 items 16,000 times. I've tried implementing both multiprocessing and multithreading, but the script has yet to finish running.
import numpy as np, csv, multiprocessing as mp
from datetime import datetime as d
from multiprocessing import Process, Pool
d_rt_t = []
d_rh_t = []
d_dt = []
d_dh = []
d_rt = []
d_rh = []
nts = d.now().timestamp()
with open(f"calib_temp-{nts}.csv", 'w') as ctw:
pass
with open(f"calib_humid-{nts}.csv", 'w') as chw:
pass
def find_nearest(array, value):
nearest = min(array, key=lambda x: abs(d.strptime(x[1], '%m/%d/%Y %H:%M:%S:%f') - d.strptime(value, '%Y-%m-%d %H:%M:%S')))
return nearest
def comp_d_rt_t():
for row in d_rt:
pool = Pool()
d_rt_t.append([pool.map(find_nearest, d_dt, row[1]),row[1]])
def comp_d_rh_t():
for row in d_rh:
d_rh_t.append([pool.map(find_nearest, d_dt, row[1]),row[1]])
#str2date = lambda x: d.strptime(x.decode("utf-8"), '%m/%d/%Y %H:%M:%S:%f')
#str2date2 = lambda x: d.strptime(x.decode("utf-8"), '%Y-%m-%d %H:%M:%S')
with open("dht-temp.csv", 'r', newline='') as ddt:
fr_dt = csv.reader(ddt, delimiter=',')
for row in fr_dt:
d_dt.append([row[0],row[1]])
ddt.close
with open("dht-humid.csv", 'r', newline='') as ddh:
fr_dh = csv.reader(ddh, delimiter=',')
for row in fr_dh:
d_dh.append([row[0],row[1]])
ddh.close
with open("ref-temp.csv", 'r', newline='') as drt:
fr_rt = csv.reader(drt, delimiter=',')
for row in fr_rt:
d_rt.append([row[0],row[1]])
drt.close
with open("ref-humid.csv", 'r', newline='') as drh:
fr_rh = csv.reader(drh, delimiter=',')
for row in fr_rh:
d_rh.append([row[0],row[1]])
drh.close
p1 = Process(target=comp_d_rt_t, args=(d_dt,row[1]))
p2 = Process(target=comp_d_rh_t, args=(d_dh,row[1]))
p1.start()
p2.start()
p1.join()
p2.join()
print(d_rt_t)
with open(f"calib_temp-{nts}.csv", 'a', newline='') as ct:
c = csv.writer(ct, delimiter = ',')
for row in d_rt_t:
dt = np.where(d_dt == row[1])
rt = np.where(d_rt == row[1])
print(rt)
c.writerow([dt[0], rt[0]])
with open(f"calib_humid-{nts}.csv", 'a', newline='') as ch:
c = csv.writer(ch, delimiter = ',')
for row in d_rh_t:
dh = np.where(d_dh == row[1])
print(dh)
rh = np.where(d_rh == row[1])
print(rh)
c.writerow([dh[0], rh[0]])
I moved the for loops around a bit, but before they just called numpy append which called the find_nearest method.
I have written a function that takes in the normal force, mass, acceleration, and coefficient of friction and calculates the applied force. I have the values of the parameter for which I need the applied force to be calculated. How do I take in the value from CSV and calculate the applied force. I have tried many times but could not figure it out. Here's my code:
import matplotlib.pyplot as plt
import csv
import math
def forceAppliedCalc(mass, acceleration, normalForce, muVal):
forceYcomp = -(-9.8 * mass) - normalForce
forceXcomp = (mass * acceleration) + (muVal * normalForce)
return math.sqrt(math.pow(forceXcomp, 2) + math.pow(forceYcomp, 2))
file = open("Data.csv")
reader = csv.reader(file, delimiter=",")
data = dict()
headerRead = False
headers = []
for row in reader:
if headerRead == False:
for i in range(len(row)):
data[row[i]] = []
headers = row
headerRead = True
else:
for i in range(len(row)):
data[headers[i]].append(row[i])
And, here's the CSV file I am working with:
Normal,Acceleration,Mass,Mu,Name,Guess
300,0.333,40,0.525,Alf,150
300,0.333,40,0.525,Benny,160
300,0.333,40,0.525,Claire,170
250,0.2,50,0.3,Claire,250
250,0.2,50,0.3,Alf,265
250,0.2,50,0.3,Benny,255
260,0.4,55,0.32,Claire,280
260,0.4,55,0.32,Alf,284
260,0.4,55,0.32,Benny,300
280,0.3,60,0.4,Benny,340
280,0.3,60,0.4,Claire,360
280,0.3,60,0.4,Alf,330
210,0.14,90,0.6,Alf,700
210,0.14,90,0.6,Benny,800
210,0.14,90,0.6,Claire,600
140,0.167,45,0.144,Claire,300
140,0.167,45,0.144,Alf,145
140,0.167,45,0.144,Benny,167
60,1.2,130,0.178,Claire,1225
60,1.2,130,0.178,Alf,1444
60,1.2,130,0.178,Benny,1467
625,0.9,50,0.35,Benny,200
625,0.9,50,0.35,Claire,250
625,0.9,50,0.35,Alf,213
266,0.12,57,0.787,Alf,370
266,0.12,57,0.787,Benny,567
266,0.12,57,0.787,Claire,809
267,0.268,115,0.235,Benny,900
267,0.268,115,0.235,Claire,905
267,0.268,115,0.235,Alf,1020
Thanks in advance
You can try using pandas, a well-known library for data processing.
Sample code:
import math
import pandas as pd
def forceAppliedCalc(mass, acceleration, normalForce, muVal):
forceYcomp = -(-9.8 * mass) - normalForce
forceXcomp = (mass * acceleration) + (muVal * normalForce)
return math.sqrt(math.pow(forceXcomp, 2) + math.pow(forceYcomp, 2))
csv = pd.read_csv('abcd.csv')
csv['force'] = csv[['Mass', 'Acceleration', 'Normal', 'Mu']].apply(lambda x: forceAppliedCalc(*x), axis=1)
print(csv.head())
Output
Normal Acceleration Mass Mu Name Guess force
0 300 0.333 40 0.525 Alf 150 194.019258
1 300 0.333 40 0.525 Benny 160 194.019258
2 300 0.333 40 0.525 Claire 170 194.019258
3 250 0.200 50 0.300 Claire 250 254.607541
4 250 0.200 50 0.300 Alf 265 254.607541
In case you don't want to use pandas, you can achieve your goal via a complicated python zip, list and map, for example:
# Notice that data is a dictionary of (string: list of string)
force = [forceAppliedCalc(*map(float, params)) for params in zip(data['Mass'], data['Acceleration'], data['Normal'], data['Mu'])]
Output:
[194.01925780705378, 194.01925780705378, 194.01925780705378, 254.60754112948035, 254.60754112948035, 254.60754112948035, 298.1745126599522, 298.1745126599522, 298.1745126599522, 334.3112322372672, 334.3112322372672, 334.3112322372672, 686.1442705437394, 686.1442705437394, 686.1442705437394, 302.269590969717, 302.269590969717, 302.269590969717, 1225.3890086009421, 1225.3890086009421, 1225.3890086009421, 296.29219108845916, 296.29219108845916, 296.29219108845916, 363.79859417540365, 363.79859417540365, 363.79859417540365, 865.0747997861225, 865.0747997861225, 865.0747997861225]
First, welcome to SOF!
I think a little approach about you are asking can be the following script (attempting to be simplest and more similar to your original code):
import csv
import math
def force_applied_calc(mass, acceleration, normal_force, mu_val):
force_y_comp = -(-9.8 * mass) - normal_force
force_x_comp = (mass * acceleration) + (mu_val * normal_force)
return math.sqrt(math.pow(force_x_comp, 2) + math.pow(force_y_comp, 2))
if __name__ == '__main__':
data = []
headers = []
save_data = False
with open('Data.csv', 'r') as read_obj:
csv_dict_reader = csv.DictReader(read_obj)
headers = csv_dict_reader.fieldnames
for csv_dict in csv_dict_reader:
csv_dict.update(
{
"Force": force_applied_calc(
int(csv_dict['Mass']),
float(csv_dict['Acceleration']),
int(csv_dict['Normal']),
float(csv_dict['Mu'])
)
}
)
data.append(csv_dict)
print(csv_dict)
# Overwrite file with new data.
if save_data and 'Force' not in headers:
headers.append('Force')
with open('Data.csv', 'w', newline='') as write_obj:
csv_dict_writer = csv.DictWriter(write_obj, delimiter=',', fieldnames=headers)
csv_dict_writer.writeheader()
csv_dict_writer.writerows(data)
Note: #tandat it's a really good answer.
Something like this would help.
import csv
final_file = open('output.csv', 'a')
writer = csv.writer(final_file)
with open('file.csv', 'r') as file:
header = next(file).split(",") # exclude header
header.append("appliedForce")
writer.writerow(header) # add header to new outputfile
reader = csv.reader(file, delimiter=',')
for row in reader:
appliedForce = forceAppliedCalc(row[2], row[1], row[0], row[3])
row.append(appliedForce)
writer.writerow(row)
There is a csv file, say A.csv, having content:
Place,Hotel,Food,Fare
Norway,Regal,NonVeg,5000
Poland,Jenny,Italiano,6000
Norway,Suzane,Vegeterian,4000
Norway,Regal,NonVeg,5000
I have to parse this csv and obtain an output by passing arguments in command prompt.
Example 1:
mycode.py Place
Desired output is:
Place,Fare
Norway,14000
Poland,6000
Example 2:
mycode.py Place Hotel
Desired output is:
Place,Hotel,Fare
Norway,Regal,10000
Poland,Jenny,6000
Norway,Suzane,4000
So it is clear from the above example that no matter what you pass as argument it gives you the sum of the Fare header for the common ones.
Below is my code and I am able to pass arguments and get an output, but I am stuck in sum of Fare. Can any one help me with this.
import sys
import csv
import collections
d = collections.defaultdict(list)
Data = []
Result = []
Final = []
Argvs = []
argv_len = len(sys.argv)
index = 0
input = ''
file = open('A.csv', 'rb')
try:
reader = csv.reader(file)
for row in reader:
Data.append(row)
for x in range(1, argv_len):
Argvs.append(sys.argv[x])
Argvs.append('Fare')
for input in Argvs:
for y in range(0, len(Data[0])):
if(input == Data[0][y]):
for z in range(1, len(Data)):
Result.append(Data[z][y])
break
Final.append(Result)
Result = []
New = []
NewFinal = []
for x in range(0, len(Final[0])):
for y in range(0, len(Final)):
New.append(Final[y][x])
NewFinal.append(New)
New = []
out = {}
for a in NewFinal:
out.setdefault(a[0],[]).append(int(a[-1]))
with open("output.csv", "wb") as csv_file:
writer = csv.writer(csv_file, dialect='excel', delimiter=',')
writer.writerow(Argvs)
for k,v in out.iteritems():
writer.writerow((k,sum(v)))
except Exception,e:
print str(e)
finally:
file.close()
I edit the code and tried to group it. Now I am able to get the aggregate of the Fare but not the desired output.
So when I am passing:
mycode.py Place Hotel
Instead of:
Place,Hotel,Fare
Norway,Regal,10000
Poland,Jenny,6000
Norway,Suzane,4000
I am getting:
Place,Hotel,Fare
Norway,14000
Poland,6000
Finally i managed to get my desired output.
Below i am sharing the final code. \
import sys
import csv
Data = []
Result = []
Final = []
Argvs = []
argv_len = len(sys.argv)
index = 0
input = ''
file = open('A.csv', 'rb')
try:
reader = csv.reader(file)
for row in reader:
Data.append(row)
for x in range(1, argv_len):
Argvs.append(sys.argv[x])
Argvs.append('Fare')
for input in Argvs:
for y in range(0, len(Data[0])):
if(input == Data[0][y]):
for z in range(1, len(Data)):
Result.append(Data[z][y])
break
Final.append(Result)
Result = []
New = []
NewFinal = []
for x in range(0, len(Final[0])):
for y in range(0, len(Final)):
New.append(Final[y][x])
NewFinal.append(New)
New = []
out = {}
for a in NewFinal:
count_val = a[-1]
del a[-1]
key_val = ','.join(a)
out.setdefault(key_val.strip('"'),[]).append(int(count_val))
with open("output.csv", "wb") as csv_file:
writer = csv.writer(csv_file, delimiter=',',quotechar=' ')
writer.writerow(Argvs)
for k,v in out.iteritems():
writer.writerow((k,sum(v)))
except Exception,e:
print str(e)
finally:
file.close()
So I have a program, that reads through a bunch of files and appends the necessary data that I need. I need to now take those particular data and show them as a list. To be more specific, these are the parameters I have:
a = Source, b = luminosity, c = luminosity error, d = HST, e = XRS, f = gmag, g = z, and h = rh
I want to display this in a list, each defining a particular column. I just don't know where exactly I should insert the print statement among the various for loops I've done to do this.
I would appreciate any help! Here's the program (the main focus is in the for loops done and how they iterate through the data, and don't worry about indentations, the program so far works I just need to display the data appended in columns):
import sys
import os
import re
import urllib
import urllib2
from os.path import basename
import urlparse
import shutil
base_dirname = '/projects/XRB_Web/apmanuel/499/'
base_sourcefile = base_dirname + 'Sources.txt'
try:
file = open(base_sourcefile, 'r')
except IOError:
print 'Cannot open: '+base_sourcefile
Source = []
Finallist = []
ACS = []
SRC = []
for line in file:
data_line_check = (line.strip())
if data_line_check:
line = re.sub(r'\s+', ' ', line)
point = line.split('|')
temp_source = (point[0]).strip()
if temp_source and len(point) == 3:
Source = (point[0]).strip()
Source = re.sub(r'\s', '_', Source)
print Source+"\n"
temp_finallist = (point[1]).strip()
if temp_finallist:
Finallistaddress = (point[1]).strip()
Finallistaddress = re.sub(r'\s', '_', Finallistaddress)
Luminositybase_dirname1 = '/projects/XRB_Web/apmanuel/499/Lists/' + Finallistaddress
try:
file2 = open(Luminositybase_dirname1, 'r')
except IOError:
print 'Cannot open: '+Luminositybase_dirname1
source = []
luminosity = []
luminosityerr = []
for line in file2:
pointy = line.split()
a = int(pointy[0])
b = float(pointy[5])
c = float(pointy[6])
source.append(a)
luminosity.append(b)
luminosityerr.append(c)
temp_HST = (point[2]).strip()
if temp_HST:
HSTaddress = (point[2]).strip()
HSTaddress = re.sub(r'\s', '_', HSTaddress)
HSTbase_dirname2 = '/projects/XRB_Web/apmanuel/499/Lists/' + HSTaddress
try:
file3 = open(HSTbase_dirname2, 'r')
except IOError:
print 'Cannot open: '+HSTbase_dirname2
HST = []
for line in file3:
pointy2 = line.split()
d = int(pointy2[0])
HST.append(d)
temp_XRS = (point[3]).strip()
if temp_XRS:
XRSaddress = (point[3]).strip()
XRSaddress =re.sub(r'\s', '_', XRSaddress)
XRSbase_dirname3 = '/projects/XRB_Web/apmanuel/499/Lists/' + XRSaddress
try:
file4 = open(XRSbase_dirname3, 'r')
except IOError:
print 'Cannot open: '+XRSbase_dirname3
XRS = []
for line in file4:
pointy3 = line.split()
e = int(pointy3[0])
XRS.append(e)
temp_others = (point[4]).strip()
if temp_others:
othersaddress = (point[4]).strip()
othersaddress =re.sub(r'\s', '_', othersaddress)
othersbase_dirname4 = '/projects/XRB_Web/apmanuel/499/Lists/' + othersaddress
try:
file5 = open(othersbase_dirname4, 'r')
except IOError:
print 'Cannot open: '+othersbase_dirname4
gmag = []
z = []
rh = []
for line in file5:
pointy4 = line.split()
f = float(pointy4[3])
g = float(pointy4[5])
h = float(pointy4[7])
rh.append(f)
gmag.append(g)
z.append(h)
this function will return columns for a list of rows. note that this requires the lists to all have an element in the column you are trying to access, though it would be relatively simple to change this if you need it.
def getcolumn(matrix,index): #index specifies which column of the matrix you want. note that like all other list indexes, this starts from 0, not one.
column = []
for row in matrix:
column.append(row[index])
return column