Take in values from CSV to use in a function - python

I have written a function that takes in the normal force, mass, acceleration, and coefficient of friction and calculates the applied force. I have the values of the parameter for which I need the applied force to be calculated. How do I take in the value from CSV and calculate the applied force. I have tried many times but could not figure it out. Here's my code:
import matplotlib.pyplot as plt
import csv
import math
def forceAppliedCalc(mass, acceleration, normalForce, muVal):
forceYcomp = -(-9.8 * mass) - normalForce
forceXcomp = (mass * acceleration) + (muVal * normalForce)
return math.sqrt(math.pow(forceXcomp, 2) + math.pow(forceYcomp, 2))
file = open("Data.csv")
reader = csv.reader(file, delimiter=",")
data = dict()
headerRead = False
headers = []
for row in reader:
if headerRead == False:
for i in range(len(row)):
data[row[i]] = []
headers = row
headerRead = True
else:
for i in range(len(row)):
data[headers[i]].append(row[i])
And, here's the CSV file I am working with:
Normal,Acceleration,Mass,Mu,Name,Guess
300,0.333,40,0.525,Alf,150
300,0.333,40,0.525,Benny,160
300,0.333,40,0.525,Claire,170
250,0.2,50,0.3,Claire,250
250,0.2,50,0.3,Alf,265
250,0.2,50,0.3,Benny,255
260,0.4,55,0.32,Claire,280
260,0.4,55,0.32,Alf,284
260,0.4,55,0.32,Benny,300
280,0.3,60,0.4,Benny,340
280,0.3,60,0.4,Claire,360
280,0.3,60,0.4,Alf,330
210,0.14,90,0.6,Alf,700
210,0.14,90,0.6,Benny,800
210,0.14,90,0.6,Claire,600
140,0.167,45,0.144,Claire,300
140,0.167,45,0.144,Alf,145
140,0.167,45,0.144,Benny,167
60,1.2,130,0.178,Claire,1225
60,1.2,130,0.178,Alf,1444
60,1.2,130,0.178,Benny,1467
625,0.9,50,0.35,Benny,200
625,0.9,50,0.35,Claire,250
625,0.9,50,0.35,Alf,213
266,0.12,57,0.787,Alf,370
266,0.12,57,0.787,Benny,567
266,0.12,57,0.787,Claire,809
267,0.268,115,0.235,Benny,900
267,0.268,115,0.235,Claire,905
267,0.268,115,0.235,Alf,1020
Thanks in advance

You can try using pandas, a well-known library for data processing.
Sample code:
import math
import pandas as pd
def forceAppliedCalc(mass, acceleration, normalForce, muVal):
forceYcomp = -(-9.8 * mass) - normalForce
forceXcomp = (mass * acceleration) + (muVal * normalForce)
return math.sqrt(math.pow(forceXcomp, 2) + math.pow(forceYcomp, 2))
csv = pd.read_csv('abcd.csv')
csv['force'] = csv[['Mass', 'Acceleration', 'Normal', 'Mu']].apply(lambda x: forceAppliedCalc(*x), axis=1)
print(csv.head())
Output
Normal Acceleration Mass Mu Name Guess force
0 300 0.333 40 0.525 Alf 150 194.019258
1 300 0.333 40 0.525 Benny 160 194.019258
2 300 0.333 40 0.525 Claire 170 194.019258
3 250 0.200 50 0.300 Claire 250 254.607541
4 250 0.200 50 0.300 Alf 265 254.607541
In case you don't want to use pandas, you can achieve your goal via a complicated python zip, list and map, for example:
# Notice that data is a dictionary of (string: list of string)
force = [forceAppliedCalc(*map(float, params)) for params in zip(data['Mass'], data['Acceleration'], data['Normal'], data['Mu'])]
Output:
[194.01925780705378, 194.01925780705378, 194.01925780705378, 254.60754112948035, 254.60754112948035, 254.60754112948035, 298.1745126599522, 298.1745126599522, 298.1745126599522, 334.3112322372672, 334.3112322372672, 334.3112322372672, 686.1442705437394, 686.1442705437394, 686.1442705437394, 302.269590969717, 302.269590969717, 302.269590969717, 1225.3890086009421, 1225.3890086009421, 1225.3890086009421, 296.29219108845916, 296.29219108845916, 296.29219108845916, 363.79859417540365, 363.79859417540365, 363.79859417540365, 865.0747997861225, 865.0747997861225, 865.0747997861225]

First, welcome to SOF!
I think a little approach about you are asking can be the following script (attempting to be simplest and more similar to your original code):
import csv
import math
def force_applied_calc(mass, acceleration, normal_force, mu_val):
force_y_comp = -(-9.8 * mass) - normal_force
force_x_comp = (mass * acceleration) + (mu_val * normal_force)
return math.sqrt(math.pow(force_x_comp, 2) + math.pow(force_y_comp, 2))
if __name__ == '__main__':
data = []
headers = []
save_data = False
with open('Data.csv', 'r') as read_obj:
csv_dict_reader = csv.DictReader(read_obj)
headers = csv_dict_reader.fieldnames
for csv_dict in csv_dict_reader:
csv_dict.update(
{
"Force": force_applied_calc(
int(csv_dict['Mass']),
float(csv_dict['Acceleration']),
int(csv_dict['Normal']),
float(csv_dict['Mu'])
)
}
)
data.append(csv_dict)
print(csv_dict)
# Overwrite file with new data.
if save_data and 'Force' not in headers:
headers.append('Force')
with open('Data.csv', 'w', newline='') as write_obj:
csv_dict_writer = csv.DictWriter(write_obj, delimiter=',', fieldnames=headers)
csv_dict_writer.writeheader()
csv_dict_writer.writerows(data)
Note: #tandat it's a really good answer.

Something like this would help.
import csv
final_file = open('output.csv', 'a')
writer = csv.writer(final_file)
with open('file.csv', 'r') as file:
header = next(file).split(",") # exclude header
header.append("appliedForce")
writer.writerow(header) # add header to new outputfile
reader = csv.reader(file, delimiter=',')
for row in reader:
appliedForce = forceAppliedCalc(row[2], row[1], row[0], row[3])
row.append(appliedForce)
writer.writerow(row)

Related

Why a "While True" loop could stop out of nowhere?

I´m sensing the vibrations of a machine live with Arduino. For that I use an accelerometer, an Arduino Uno board, and a Python script that I wrote. The code is intented to read the data of the sensor from the serial port (g), calculate the root mean square of the secuence (RMS) and save the data in an csv file. The problem I have is that my sript stops showing and saving the data out of nowhere, do you see any mistakes in the code? I couldn't identify any relationship between the different incidents, because each one happened at different timing. Here it is:
import csv
from math import sqrt
import serial
from itertools import count
import os
ruta = 'C:/Users/jabde/OneDrive/Documentos/Juan/PhD/Ensayos/Acelerómetro/archivo.csv'
nombre_archivo = input("Ingrese el nombre del archivo: ")
ruta_completa = os.path.join(os.path.dirname(ruta), nombre_archivo + '.csv')
os.chdir(os.path.dirname(ruta))
arduinoData=serial.Serial('com3',115200)
fieldnames = ["t", "g", "RMS"]
i = 0
t = 0
g = 0.15
RMS = 0.1425
suma_cuadrados = 0
with open(ruta_completa, 'w', newline= '') as csv_file:
csv_writer = csv.DictWriter(csv_file, fieldnames=fieldnames)
csv_writer.writeheader()
while True:
with open(ruta_completa, 'a', newline= '') as csv_file:
csv_writer = csv.DictWriter(csv_file, fieldnames=fieldnames)
try:
g = arduinoData.readline()
g = float(g) / 800
t = t + 0.2
t = round(t,1)
i = i + 1
cuadrados = g * g
suma_cuadrados = suma_cuadrados + cuadrados
RMS = suma_cuadrados / i
RMS = sqrt(RMS)
info = {
"g": g,
"t": t,
"RMS": RMS
}
with open(ruta_completa, 'a', newline= '') as csv_file:
csv_writer = csv.DictWriter(csv_file, fieldnames=fieldnames)
csv_writer.writerow(info)
print(t, g, RMS)
time.sleep(0.2)
except:
pass
Thanks in advance!
pd: this is my first post, I'm a newby :)
I thought it could be a space problem in my laptop so I changed the rute where it was saving the CSV file but nothing happened.
I would start with something like this and see where it got me. Note that some of your code is commented out and/or mocked for testing on my part.
The idea is to handle exceptions via an outer loop while doing expected work in the inner loop.
import csv
import math
#import serial
#import os
import time
import random
#ruta = 'C:/Users/jabde/OneDrive/Documentos/Juan/PhD/Ensayos/Acelerómetro/archivo.csv'
#nombre_archivo = input("Ingrese el nombre del archivo: ")
#ruta_completa = os.path.join(os.path.dirname(ruta), nombre_archivo + '.csv')
ruta_completa = "out.csv"
#os.chdir(os.path.dirname(ruta))
#arduinoData = serial.Serial('com3',115200)
class arduinoData:
readline = lambda : 100_000 * random.random()
start_time = int(time.time())
suma_cuadrados = 0
i = 1
with open(ruta_completa, 'w', newline= '') as csv_file:
csv_writer = csv.writer(csv_file)
csv_writer.writerow(["t", "g", "RMS"])
while True:
try:
with open(ruta_completa, 'a', newline= '') as csv_file:
csv_writer = csv.writer(csv_file)
while True:
if random.randint(0, 10) == 10:
raise Exception("Test Exception")
t = round(time.time() - start_time, 1)
g = float(arduinoData.readline()) / 800
suma_cuadrados += (g * g)
RMS = math.sqrt(suma_cuadrados / i)
row = [g, t, RMS]
csv_writer.writerow([g, t, RMS])
print(row)
i += 1
time.sleep(0.2)
except Exception as e:
print(f"Error: {e}")
print("\tTrying again in 5 seconds...")
time.sleep(5)

Comparing Thousands of datetimes in a reasonable amount of time?

Long story short, I'm trying to calibrate a thermometer. I have a CSV from my reference thermometer with 16k records (CSV-A), and a CSV from the thermometer to be calibrated with about 52k records (CSV-B). I need to compare the records from CSV-B to the CSV-A to get the times where CSV-B is closest to CSV-A and store the time and value in a different array.
I believe that I have gotten the basic logic of checking the datetimes in place, but the problem seems to be the fact that I have to iterate through an array of 52,000 items 16,000 times. I've tried implementing both multiprocessing and multithreading, but the script has yet to finish running.
import numpy as np, csv, multiprocessing as mp
from datetime import datetime as d
from multiprocessing import Process, Pool
d_rt_t = []
d_rh_t = []
d_dt = []
d_dh = []
d_rt = []
d_rh = []
nts = d.now().timestamp()
with open(f"calib_temp-{nts}.csv", 'w') as ctw:
pass
with open(f"calib_humid-{nts}.csv", 'w') as chw:
pass
def find_nearest(array, value):
nearest = min(array, key=lambda x: abs(d.strptime(x[1], '%m/%d/%Y %H:%M:%S:%f') - d.strptime(value, '%Y-%m-%d %H:%M:%S')))
return nearest
def comp_d_rt_t():
for row in d_rt:
pool = Pool()
d_rt_t.append([pool.map(find_nearest, d_dt, row[1]),row[1]])
def comp_d_rh_t():
for row in d_rh:
d_rh_t.append([pool.map(find_nearest, d_dt, row[1]),row[1]])
#str2date = lambda x: d.strptime(x.decode("utf-8"), '%m/%d/%Y %H:%M:%S:%f')
#str2date2 = lambda x: d.strptime(x.decode("utf-8"), '%Y-%m-%d %H:%M:%S')
with open("dht-temp.csv", 'r', newline='') as ddt:
fr_dt = csv.reader(ddt, delimiter=',')
for row in fr_dt:
d_dt.append([row[0],row[1]])
ddt.close
with open("dht-humid.csv", 'r', newline='') as ddh:
fr_dh = csv.reader(ddh, delimiter=',')
for row in fr_dh:
d_dh.append([row[0],row[1]])
ddh.close
with open("ref-temp.csv", 'r', newline='') as drt:
fr_rt = csv.reader(drt, delimiter=',')
for row in fr_rt:
d_rt.append([row[0],row[1]])
drt.close
with open("ref-humid.csv", 'r', newline='') as drh:
fr_rh = csv.reader(drh, delimiter=',')
for row in fr_rh:
d_rh.append([row[0],row[1]])
drh.close
p1 = Process(target=comp_d_rt_t, args=(d_dt,row[1]))
p2 = Process(target=comp_d_rh_t, args=(d_dh,row[1]))
p1.start()
p2.start()
p1.join()
p2.join()
print(d_rt_t)
with open(f"calib_temp-{nts}.csv", 'a', newline='') as ct:
c = csv.writer(ct, delimiter = ',')
for row in d_rt_t:
dt = np.where(d_dt == row[1])
rt = np.where(d_rt == row[1])
print(rt)
c.writerow([dt[0], rt[0]])
with open(f"calib_humid-{nts}.csv", 'a', newline='') as ch:
c = csv.writer(ch, delimiter = ',')
for row in d_rh_t:
dh = np.where(d_dh == row[1])
print(dh)
rh = np.where(d_rh == row[1])
print(rh)
c.writerow([dh[0], rh[0]])
I moved the for loops around a bit, but before they just called numpy append which called the find_nearest method.

Add one more value to csv from text file in python

I am converting multiple text files to a csv file. My text file looks like this:
ITEM: TIMESTEP
55000
ITEM: NUMBER OF ATOMS
4365
ITEM: BOX BOUNDS ff ff ff
-0.3 0.3
-0.6 0.6
-0.6 0.6
ITEM: ATOMS id type x y z vx vy vz fx fy fz omegax omegay omegaz radius
4356 1 -0.0885288 -0.0101421 -0.48871 -0.000941682 0.778688 -0.0153902 -0.00720861 -0.0533703 0.0104717 0.35581 -0.0601358 -0.436049 0.01
4227 1 0.0157977 0.00542603 -0.488429 -0.00996111 0.784119 0.00813807 -0.000491847 0.0144889 -0.0120111 1.08208 -0.0671177 0.369492 0.01
3973 1 0.0179724 0.0256167 -0.48799 -0.00582994 0.772455 0.0394544 0.0109589 -0.0187232 -0.00111718 -0.0586513 -0.162943 1.12784 0.01
4300 1 0.0900919 0.0248592 -0.488025 -0.000455483 0.769978 0.0388239 -0.00364509 0.0409803 -0.00269227 3.94355 -0.0249566 -0.223111 0.01
4200 1 -0.0230223 0.0329911 -0.483108 -0.00238 0.778547 0.0500186 0.0421189 -0.021588 0.05607 0.112989 -0.0813771 -1.09981 0.015
4339 1 0.00143577 0.0368542 -0.488107 0.000587848 0.784672 0.0593572 0.00385562 -0.00475113 -0.00710483 -0.201196 0.158512 -5.63826 0.01
4106 1 0.0648392 0.0269728 -0.483248 -0.00365836 0.766081 0.0395827 0.0418642 0.1802 0.0547313 -0.0578358 0.124205 -0.96464 0.015
4104 1 -0.084453 0.0507114 -0.482726 -0.000596577 0.75636 0.0806599 0.000817826 0.0119286 -0.0150014 -0.0864852 -0.103877 0.198773 0.015
Right now my csv file contains value after line 9 (in python code line 8).
I want to include line 2 (Header - TIMESTEP) also in csv along with all the values after 9.
I tried to edit my code but couldn't succeed. Can I get some help:
My code is here:
import numpy as np
import pandas as pd
import csv
import glob
import time
def main():
start = time.time()
data_folder = "./all/" #folder name
files = glob.glob(data_folder + '*dump*.data')
print("Total files:", len(files))
# get header from one of the files
#header = []
with open('all/dump46000.data', 'r') as f:
#lines = f.readlines()
for _ in range(8):
next(f) # skip first 8 lines
header = ','.join(f.readline().split()[2:]) + '\n'
headers = ','.join(f.readline().split()[2:])
#header.append(headers)
#header.append('timestep')
print(header)
for file in files:
with open(file, 'r') as f, open(f'all.csv', 'a') as g: # note the 'a'
g.write(header) # write the header
for _ in range(9):
next(f) # skip first 9 lines
for line in f:
g.write(line.rstrip().replace(' ', ',') + '\n')
print(time.time() - start)
if __name__ == "__main__":
main()
My folder all contains more than 600 files:
['./all/dump501000.data',
'./all/dump307000.data',
'./all/dump612000.data',
'./all/dump369000.data',
'./all/dump23000.data',
'./all/dump470000.data',
'./all/dump235000.data',
'./all/dump6000.data',
'./all/dump568000.data',
'./all/dump506000.data',
'./all/dump623000.data',
'./all/dump329000.data',
'./all/dump220000.data',
.....................
....................
I want this csv file from text file:
id type x y z vx vy vz fx fy fz omegax omegay omegaz radius TIMESTEP
But I am getting this csv
id type x y z vx vy vz fx fy fz omegax omegay omegaz radius
Thank you
enter code hereHere is something you can try to add TIMESTEP with your data in csv. I am just wondering if you need to print the header for each file. My understanding is you can print header at the top for once. If you want to print that for each file, bring it into the for loop.
import numpy as np
import pandas as pd
import csv
import glob
import time
def main():
start = time.time()
data_folder = "./all/" #folder name
files = glob.glob(data_folder + '*dump*.data')
print("Total files:", len(files))
# get header from one of the files
header = []
with open('all/dump46000.data', 'r') as f:
#lines = f.readlines()
header.extend(f.readline().split()[1:])
timeStep = f.readline().split()
for _ in range(6):
next(f) # skip first 8 lines
header.extend(f.readline().split()[2:])
a = True
print(header)
headerString = ','.join(header)
for file in files:
with open(file, 'r') as f, open(f'all.csv', 'a') as g: # note the 'a'
next(f)
g.write(headerString+ '\n') # write the header
timeStep = f.readline().split()
for _ in range(7):
next(f)
for line in f:
file_line = line.split()
file_line.insert(0,timeStep[0])
data = ','.join(file_line)
g.write(data + '\n')
print(time.time() - start)
if __name__ == "__main__":
main()
Based on what you want, here's what should work
import numpy as np
import pandas as pd
import csv
import glob
import time
def main():
start = time.perf_counter()
data_folder = "./all/" #folder name
files = glob.glob(data_folder + '*dump*.data')
print("Total files:", len(files))
for file in files:
with open(file, 'r') as f, open(f'all.csv', 'a') as g: # note the 'a'
header = f.readline().split("ITEM: ")[1] + '\n'
headers = f.readline()
print(header)
g.write(header)
g.write(headers)
for _ in range(6):
next(f)
for line in f:
g.write(line.rstrip().replace(' ', ',') + '\n')
print(time.perf_counter() - start)
if __name__ == "__main__":
main()
Let me know if you need any other syntax or something else in the final CSV.
Also to time something always use time.perf_counter it's more accurate.

pandas data-frame continuously update

please see the pandas based Patten scanner, here i am using csv as data source and loading the same in to data.
since data is loading from csv file, i have to reload/rerun the script every 5 min to read the updated csv file hence repeating the plot every 5min.
is there any way to use df.update to avoid reloading of the script and prevent the reloading of plot again and again.
import pandas as pd
import numpy as np
from scipy.signal import argrelextrema
import matplotlib.pyplot as plt
from harmonic_functions import *
import uuid
from csv import DictReader
data = pd.read_csv('temp.csv')
data.time = pd.to_datetime(data.time,format='%d.%m.%Y %H:%M:%S.%f')
data.index = data['time']
# data = data.drop_duplicates(keep=False)
price = data.close.copy()
err_allowed = 10.0/100
pnl = []
trade_dates=[]
correct_pats=0
pats=0
# plt.ion()
for i in range (100,len(price)):
current_idx,current_pat,start,end = peak_detect(price.values[:i],order=7)
XA = current_pat[1] - current_pat[0]
AB = current_pat[2] - current_pat[1]
BC = current_pat[3] - current_pat[2]
CD = current_pat[4] - current_pat[3]
moves = [XA,AB,BC,CD]
gart = is_gartley(moves,err_allowed)
butt = is_butterfly(moves,err_allowed)
bat = is_bat(moves,err_allowed)
crab = is_crab(moves,err_allowed)
shark = is_shark(moves,err_allowed)
trio = is_trio(moves,err_allowed)
cyph = is_cyph(moves,err_allowed)
three_dives = is_3dives(moves, err_allowed)
fivezero = is_50(moves, err_allowed)
altbat = is_altbat(moves, err_allowed)
deepcrab = is_deepcrab(moves, err_allowed)
dragon = is_dragon(moves, err_allowed)
snorm = is_snorm(moves, err_allowed)
harmonics = np.array([gart,butt,bat,crab,shark,trio,cyph,three_dives,fivezero,altbat,deepcrab,dragon,snorm])
labels = ['Garterly','Butterfly','Bat','Crab','Shark','Trio','Cypher','3Dives','5Zero','AltBat','DeepCrab','Dragon','Snorm']
if np.any(harmonics == 1) or np.any(harmonics == -1):
for j in range (0,len(harmonics)):
if harmonics[j] == 1 or harmonics[j]==-1:
pats+=1
sense = 'Bearish ' if harmonics[j]==-1 else 'Bullish '
label = sense + labels[j] + ' found'
print(label)
print(price.values[start])
plt.title(label)
plt.plot(np.arange(start,i+5),price.values[start:i+5])
plt.scatter(current_idx,current_pat,c='r')
filename = str(uuid.uuid1())[:8]
print(current_pat)
print(current_idx)
# with open('temp.csv', mode='r') as csv_file:
# file = DictReader(csv_file, delimiter=',')
# close = str(current_pat[4])
# print(current_pat)
# rows = [row for row in file if row['close'] in close]
# closetime = rows[-1]['ID']
# print(closetime)
write1 = str(current_idx)
write2 = str(current_pat)
write = write1 + ',' + write2
print(write)
with open("datadb", "r+") as file:
for line in file:
if write in line:
break
else: # not found, we are at the eof
file.write(f"{write}\n") # append missing data
print(filename)
plt.savefig(filename)
plt.close(filename)
# plt.show()
plt.clf()

Python Code Speed Up

My code should compare two vectors saved as dictionary (two pickle files) and save the result into a pickle file too. This works but very slowly. For one compare result I'm waiting about 7:2o min. Because I have a lot of videos (exactly 2033) this prog will run about 10 days. This is too long. How can I speed up my code for Python 2.7?
import math
import csv
import pickle
from itertools import izip
global_ddc_file = 'E:/global_ddc.p'
io = 'E:/AV-Datensatz'
v_source = ''
def dot_product(v1, v2):
return sum(map(lambda x: x[0] * x[1], izip(v1, v2))) # izip('ABCD', 'xy') --> Ax By
def cosine_measure(v1, v2):
prod = dot_product(v1, v2)
len1 = math.sqrt(dot_product(v1, v1))
len2 = math.sqrt(dot_product(v2, v2))
if (len1 * len2) <> 0:
out = prod / (len1 * len2)
else: out = 0
return out
def findSource(v):
v_id = "/"+v[0].lstrip("<http://av.tib.eu/resource/video").rstrip(">")
v_source = io + v_id
v_file = v_source + '/vector.p'
source = [v_id, v_source, v_file]
return source
def getVector(v, vectorCol):
with open (v, 'rb') as f:
try:
vector_v = pickle.load(f)
except: print 'file couldnt be loaded'
tf_idf = []
tf_idf = [vec[1][vectorCol] for vec in vector_v]
return tf_idf
def compareVectors(v1, v2, vectorCol):
v1_source = findSource(v1)
v2_source = findSource(v2)
V1 = getVector(v1_source[2], vectorCol)
V2 = getVector(v2_source[2], vectorCol)
sim = [v1_source[0], v2_source[0], cosine_measure(V1, V2)]
return sim
#with open('videos_av_portal_cc_3.0_nur2bspStanford.csv', 'rb') as dataIn:
with open('videos_av_portal_cc_3.0_vollstaendig.csv', 'rb') as dataIn:
#with open('videos_av_portal_cc_3.0.csv', 'rb') as dataIn:
try:
reader = csv.reader(dataIn)
v_source = []
for row in reader:
v_source.append(findSource(row))
#print v_source
for one in v_source:
print one[1]
compVec = []
for another in v_source:
if one <> another:
compVec.append(compareVectors(one, another, 3))
compVec_sort = sorted(compVec, key=lambda cosim: cosim[2], reverse = True)
# save vector file for each video
with open (one[1] + '/compare.p','wb') as f:
pickle.dump(compVec_sort,f)
finally:
dataIn.close()
Split code in 2 parts:
1. Load Dictionary in vectors
2. Compare 2 dictionaries using multiprocessmultiprocess example
3. Launch process simultaneously according to memory availability and end the process after 8 mins. Then update the 3rd dictionary.
4. Then relaunch process on next set of data , follow step 3 and continue till the dictionary length.
This should reduce total turnaround time.
Let me know if you need code .

Categories