Nested while loop only iterates once - python

I have written some code which takes data from a csv file, stores it in lists, then iterates over the data returning only the information I need.
I had it working for single lists:
# Import modules
import csv
import datetime
# import numpy as np
import matplotlib.pyplot as plt
# Time code (as slow to run)
tin = []
tout = []
tin = datetime.datetime.now() #tic
plt.close()
# Assign variables
pktime = []
pkey1 = []
pkey2 = []
pkey3 = []
pkey4 = []
pkey5 = []
pkey6 = []
pkeys=[pkey1, pkey2, pkey3, pkey4, pkey5, pkey6]
delt1 = []
delt2 = []
delt3 = []
delt4 = []
delt5 = []
delt6 = []
delts=[delt1, delt2, delt3, delt4, delt5, delt6]
pkey1full=[]
pkey2full=[]
pkey3full=[]
pkey4full=[]
pkey5full=[]
pkey6full=[]
pkeyfull=[pkey1full, pkey2full, pkey3full, pkey4full, pkey5full, pkey6full]
# Read in PK weight/deltaT/time values
with open('PKweight.csv') as pkweight:
red = csv.reader(pkweight)
for t, pk1, pk2, pk3, pk4, pk5, pk6, dt1, dt2, dt3, dt4, dt5, dt6 in red:
pktime.append(datetime.datetime.strptime(t,'%H:%M:%S'))
pkey1.append(float(pk1))
pkey2.append(float(pk2))
pkey3.append(float(pk3))
pkey4.append(float(pk4))
pkey5.append(float(pk5))
pkey6.append(float(pk6))
delt1.append(float(dt1))
delt2.append(float(dt2))
delt3.append(float(dt3))
delt4.append(float(dt4))
delt5.append(float(dt5))
delt6.append(float(dt6))
#calculate the pkweight for each cell, then append it to pkey*full
def pkweight1_calc():
i=1
while i<=(len(pkey1)-1):
if pkey1[i] == 0.0 and pkey1[i-1]!=0.0:
pkey1full.append(pkey1[i-2])
i+=1
pkey1full.reverse()
return pkey1full
pkweight1_calc()
I had this code written out 6 times to complete the function for each of the sets of data(1-6), however I want to have it all as one function. I have tried using a nested while loop within a while loop, however it only returns one of the lists, whatever the inital value of j was:
def pkweight_calc():
i=1
for j in range(0,5):
while i<=(len(pkeys[j])-1):
if (pkeys[j][i]) == 0.0 and (pkeys[j][i-1])!=0.0:
pkeyfull[j].append(pkeys[j][i-2])
i+=1
pkeyfull[j].reverse()
pkweight_calc()
Can anyone help me with this? Thanks in advance!!
EDIT- updated indenting, Sorry!

thanks for the help, I managed to find someone at work who could help me. He wasnt sure why but changing the while loop
while i<=(len(pkeys[j])-1):
to a for loop:
for i in range(2, len(pkeys[j])):
solved it. Not sure why but it did!

Related

Python iterate over each 100 elements

I don't know if this is a good way to optimize, but basically I am using python inside a 3D app to create random colors per object. And the code I have works well with objects within 10k polygons. But it crashes in 100k polygons. Is there a way to do it by chunks in the loop, basically I have the for loop and using an if statement to filter the first 100. But then I need another 100, and another 100, etc. How can I write that? Maybe with a time sleep between each. It's not going to be faster but at least won't possible crash the program. Thanks.
for i, n in enumerate(uvShellIds):
#code can only perform well within sets of 100 elements
limit = 100 #?
if 0 <= i <= 100:
#do something
print(n)
# now I need it to work on a new set of 100 elements
#if 101 <= i <= 200:
#(...keep going between sets of 100...)
My current code :
import maya.OpenMaya as om
import maya.cmds as cmds
import random
def getUvShelList(name):
selList = om.MSelectionList()
selList.add(name)
selListIter = om.MItSelectionList(selList, om.MFn.kMesh)
pathToShape = om.MDagPath()
selListIter.getDagPath(pathToShape)
meshNode = pathToShape.fullPathName()
uvSets = cmds.polyUVSet(meshNode, query=True, allUVSets =True)
allSets = []
for uvset in uvSets:
shapeFn = om.MFnMesh(pathToShape)
shells = om.MScriptUtil()
shells.createFromInt(0)
# shellsPtr = shells.asUintPtr()
nbUvShells = shells.asUintPtr()
uArray = om.MFloatArray() #array for U coords
vArray = om.MFloatArray() #array for V coords
uvShellIds = om.MIntArray() #The container for the uv shell Ids
shapeFn.getUVs(uArray, vArray)
shapeFn.getUvShellsIds(uvShellIds, nbUvShells, uvset)
# shellCount = shells.getUint(shellsPtr)
shells = {}
for i, n in enumerate(uvShellIds):
#print(i,n)
limit = 100
if i <= limit:
if n in shells:
# shells[n].append([uArray[i],vArray[i]])
shells[n].append( '%s.map[%i]' % ( name, i ) )
else:
# shells[n] = [[uArray[i],vArray[i]]]
shells[n] = [ '%s.map[%i]' % ( name, i ) ]
allSets.append({uvset: shells})
for shell in shells:
selection_shell = shells.get(shell)
cmds.select(selection_shell)
#print(shells.get(shell))
facesSel = cmds.polyListComponentConversion(fromUV=True, toFace=True)
cmds.select(facesSel)
r = [random.random() for i in range(3)]
cmds.polyColorPerVertex(facesSel,rgb=(r[0], r[1], r[2]), cdo=1 )
cmds.select(deselect=1)
getUvShelList( 'polySurface359' )
You can use islice from itertools to chunk.
from itertools import islice
uvShellIds = list(range(1000))
iterator = iter(uvShellIds)
while True:
chunk = list(islice(iterator, 100))
if not chunk:
break
print(chunk) # chunk contains 100 elements you can process
I don't know how well it fits in your current code but, below is how you can process the chunks:
from itertools import islice
uvShellIds = list(range(1000))
iterator = iter(uvShellIds)
offset = 0
while True:
chunk = list(islice(iterator, 100))
if not chunk:
break
# Processing chunk items
for i, n in enumerate(chunk):
# offset + i will give you the right index referring to the uvShellIds variable
# Then , perform your actions
if n in shells:
# shells[n].append([uArray[i],vArray[i]])
shells[n].append( '%s.map[%i]' % ( name, offset + i ) )
else:
# shells[n] = [[uArray[i],vArray[i]]]
shells[n] = [ '%s.map[%i]' % ( name, offset + i ) ]
offset += 100
# Your sleep can come here
The snippet above should replace your for i, n in enumerate(uvShellIds): block.
As #David Culbreth's answer stated, I'm not sure the sleep will be of help, but I left a comment on where you can place it.
I use this generator to "chunkify" my long-running operations in python into smaller batches:
def chunkify_list(items, chunk_size):
for i in range(0, len(items), chunk_size):
yield items[i:i+chunk_size]
With this defined, you can write your program something like this:
items = [1,2,3,4,5 ...]
for chunk in chunkify_list(items, 100):
for item in chunk:
process_item(item)
sleep(delay)
Now, I'm not going to guarantee that sleep will actually solve your problems, but this lets you handle your data one chunk at a time.

Numpy `searchsorted` far slower than my binary search function

I was experimenting with binary search, and when I got my version working I figured I would compare its speed to that of NumPy's. I was fairly surprised at the results, for two reasons.
I know that binary search should grow as log n, which mine did, but NumPy grew linearly.
Not only that, but NumPy was just plain slower -- at the start and certainly at the end.
I attached a graph of the results. Orange is NumPy and blue is mine. To the left is the time in milliseconds it took to find the last item in the list (items[-1]) and the bottom shows the length of the list. I have also checked to make sure that my code is returning the correct value and it is.
In case I wasn't clear, my questions are basically "why" two #1 and #2
#binary_search.py
from typing import Iterable
from numba import njit
from numba.typed import List
def _find(items: Iterable[int], to_find: int):
min = -1
max = len(items)
while True:
split = int((max+min)/2)
item = items[split]
if item == to_find:
return split
elif max == min:
print(min, max)
print(items)
print(to_find)
print(split)
exit()
elif item > to_find:
max = split - 1
elif item < to_find:
min = split + 1
def findsorted(_items: Iterable[int], to_find: int):
items = _items
return _find(items, to_find)
#graph_results.py
import binary_search as bs
import sys
import time
import numpy as np
from matplotlib import pyplot as plt
iterations = int(sys.argv[1])
items = [0, 1]
lx = []
ly = []
nx = []
ny = []
for i in range(2, iterations):
l_avg_times = []
n_avg_times = []
items.append(items[-1] + 1)
for _ in range(0, 100):
to_find = items[-1]
lstart = time.time()
bs.findsorted(items, to_find)
lend = time.time()
nstart = lend
np.searchsorted(items, to_find)
nend = time.time()
ltotal = lend-lstart
ntotal = nend-nstart
l_avg_times.append(ltotal)
n_avg_times.append(ntotal)
ly.append(
round(
sum(l_avg_times)/len(l_avg_times),
1000
)*1000
)
lx.append(i)
ny.append(
round(
sum(n_avg_times)/len(n_avg_times),
1000
)*1000
)
nx.append(i)
plt.plot(lx, ly)
plt.plot(nx, ny)
plt.show()

How can I optimize the groupby.apply(function) in Python?

I have a function that uses deque.collections to track daily stock in based on FIFO. An order will be fulfilled if possible and is substracted from stock accordingly. I use a function in groupby.apply(my_function).
I have struggles where to place the second loop. Both loops work properly when run on their own. But I do not get them working combined.
The dataset is about 1.5 million rows.
Thanks.
DOS = 7
WIP = 1
df_fin['list_stock'] = 0
df_fin['stock_new'] = 0
def create_stocklist(x):
x['date_diff'] = x['dates'] - x['dates'].shift()
x['date_diff'] = x['date_diff'].fillna(0)
x['date_diff'] = (x['date_diff'] / np.timedelta64(1, 'D')).astype(int)
x['list_stock'] = x['list_stock'].astype(object)
x['stock_new'] = x['stock_new'].astype(object)
var_stock = DOS*[0]
sl = deque([0],maxlen=DOS)
for i in x.index:
order = x['order_bin'][i]
if x['date_diff'][i] > 0:
for p in range(0,x['date_diff'][i]):
if p == WIP:
sl.appendleft(x.return_bin[i-1])
else:
sl.appendleft(0)
sl_list = list(sl)
sl_list.reverse()
new_list = []
#from here the loop does not work as I wanted it to work.
#I want to loop over de created sl_list
#and then start the loop above with the outcome of the loop below.
for elem in sl_list:
while order > 0:
val = max(0,elem-order)
order = (abs(min(0,elem-order)))
new_list.append(val)
break
else:
new_list.append(elem)
new_list.reverse()
x.at[i,'list_stock'] = new_list
sl = deque(new_list)
return x
df_fin.groupby(by=['ID']).apply(create_stocklist)
You do not have access to sl_list inside the second loop, you should just define it in the upper scope: for example just after the first global for loop:
for i in x.index:
# define it just here
sl_list = []
order = x['order_bin'][i]

How do I switch this MATLAB loop to a Python loop

So I have a challenge in my hand that I am trying to accomplish. I have a matlab code that works fine homever, I want to write the same code in python. Homever I don't get the same results.
I have tried using a different for loop than the one in matlab. Although these should give the same results I am fail at some point in the loop, although I couldn't figure out where the mistake was.
for ii = 1:100 #matlab code
healthy=2*randn(100,1000)+5;
patient=2*randn(100,1000)+7;
threshold=mu_healthy-sd_healthy:0.1:mu_patient+sd_patient;
for i=1:length(threshold)
TP(i)=sum(patient>=threshold(i));
FP(i)=sum(healthy>=threshold(i));
TN(i)=sum(healthy<threshold(i));
FN(i)=sum(patient<threshold(i));
end
FPR(ii,:)=FP/1000;
TPR(ii,:)=TP/1000;
def appending(): #python code
for n in range(0,50):
for x in range(0,1000):
for a in range(0,61):
if Apatient[x,n]>=newthreshold[a]:
TP[a].append(Apatient[x,n])
elif Ahealthy[x,n]>=newthreshold[a]:
FP.append(Ahealthy[x,n])
elif Apatient[x,n]<newthreshold[a]:
TN.append(Apatient[x,n])
elif Ahealthy[x,n]<newthreshold[a]:
FN.append(Ahealthy[x,n])
If you can run this in matlab, you will see FN,TN values with 61 values in each column. I want the same to happen in my loop as well,homever I get lots of elements if I run this code. Thanks
Just following MATLAB script, tried translation.
import numpy as np
mu_healthy = 5
sd_healthy = 2
mu_patient = 7
sd_patient = 2
threshold = np.arange(mu_healthy-sd_healthy, mu_patient+sd_patient+0.1, 0.1)
L = len(threshold)
TP = np.zeros([L,1])
FP = np.zeros([L,1])
TN = np.zeros([L,1])
FN = np.zeros([L,1])
FPR = np.zeros([100,L])
TPR = np.zeros([100,L])
for ii in range(0,100):
healthy = sd_healthy*np.random.normal(mu_healthy,1,[100,1000])
patient = sd_patient*np.random.normal(mu_patient,1,[100,1000])
for i in range(0, L):
TP[i] = np.sum(patient>=threshold[i])
FP[i] = np.sum(healthy>=threshold[i])
TN[i] = np.sum(healthy<threshold[i])
FN[i] = np.sum(patient<threshold[i])
FPR[ii,:] = FP[:,0]
TPR[ii,:] = TP[:,0]
FPR = FPR/1000
TPR = TPR/1000

How to Split Multiple Arrays Simultaneously

I have 3 lists, all of the same length. One of the lengths is a number representing a day, and the other two lists are data which correspond to that day, e.g
day = [1,1,1,1,1,1,2,2,2,2,2,3,3,3,3,3,4,4,4,4....]
data1 = [1,2,3,4,5,6,1,2,3,4,5,1,2,3,4,5,1,2,3,4....] # (effectively random numbers)
data2 = [1,2,3,4,5,6,1,2,3,4,5,1,2,3,4,5,1,2,3,4....] # (again, effectively random numbers)
What I need to do is to take data1 and data2 for day 1, perform operations on it, and then repeat the process for day 2, day 3, day 4 and so on.
I currently have:
def sortfile(day, data1, data2):
x=[]
y=[]
date=[]
temp1=[]
temp2=[]
i=00
for i in range(0,len(day)-1):
if day[i] == day[i+1]:
x.append(data1[i])
y.append(data2[i])
i+=1
#print x, y
else:
for i in range(len(x)):
temp1.append(x)
for i in range(len(y)):
temp2.append(y)
date.append(day[i])
x=[]
y=[]
i+=1
while i!=(len(epoch)-1):
x.append(data1[i])
y.append(data2[i])
i+=1
date.append(day[i])
return date, temp1, temp2
This is supposed to append to the x array whilst the day stays the same, and then if it changes append all the data from the x array to the temp1 array, then clear the x array. It will then perform operations on temp1 and temp2. However, when I run this as a check (I'm aware that I'm not clearing temp1 and temp2 at any point), temp1 just fills with the full list of days and temp2 is empty. I'm not sure why this is and am open to completely restarting if a better way is suggested!
Thanks in advance.
Just zip the lists:
x = []
y = []
date = []
temp1 = []
temp2 = []
day = [1,1,1,1,1,1,2,2,2,2,2,3,3,3,3,3,4,4,4,4]
data1 = [1,2,3,4,5,6,1,2,3,4,5,1,2,3,4,5,1,2,3,4]
data2 = [1,2,3,4,5,6,1,2,3,4,5,1,2,3,4,5,1,2,3,4]
zipped = zip(day, data1,data2) # list(zipped) for python 3
for ind, dy, dt1, dt2 in enumerate(zipped[:-1]):
if zipped[ind+1][0] == dy:
x.append(dt1)
y.append(dt2)
else:
temp1 += x
temp2 += y
x = []
y = []
Not sure what your while loop is doing as it is outside the for loops and you don't actually return or use x and y so that code seems irrelevant and may well be the reason your code is not returning what you expect.
groupby and zip are a good solution for this problem. It lets you group bits of sorted data together. zip allows you to access the elements at each index of day, data1, and data2 together as a tuple.
from operator import itemgetter
from itertools import groupby
day = [1,1,1,1,1,1,2,2,2,2,2,3,3,3,3,3,4,4,4,4]
data1 = [1,2,3,4,5,6,1,2,3,4,5,1,2,3,4,5,1,2,3,4]
data2 = [1,2,3,4,5,6,1,2,3,4,5,1,2,3,4,5,1,2,3,4]
x = []
y = []
for day_num, data in groupby(zip(day, data1, data2), itemgetter(0)):
data = list(data)
data1_total = sum(d[1] for d in data)
x.append(data1_total)
data2_total = sum(d[2] for d in data)
y.append(data2_total)
itemgetter is just a function that tells groupby to group the tuple of elements by the first element in the tuple (the day value).
Another option is to use defaultdict and simply iterate over days adding data as we go:
from collections import defaultdict
d1 = defaultdict(list)
d2 = defaultdict(list)
for n, d in enumerate(day):
d1[d].append(data1[n])
d2[d].append(data2[n])
This creates two dicts like {day: [value1, value2...]...}. Note that this solution doesn't require days to be sorted.
Running several threads is similar to running several different programs concurrently sharing the same data. You can call a thread for each array.
Read more about threading on the threading documentation

Categories