Applying Function on a pandas dataframe - python

I am using various search algorithm and I want to use it on data frame using other column here is the code.
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import time as t
def linear_search(arr, x):
for i in range(len(arr)):
# comparing the array against the given parameter
if arr[i] == x:
# Return the parameter if the paramerS is true
return x
return -1;
def linear_search_while(arr,x):
found = -1
i = 0
while i < len(arr) and found == -1:
if arr[i] == x:
return x
i = i + 1
return -1
def binary_search_while(alist, item):
first = 0
last = len(alist)-1
found = -1
while first<=last and not found:
midpoint = (first + last)//2
if alist[midpoint] == item:
found = item
else:
if item < alist[midpoint]:
last = midpoint-1
else:
first = midpoint+1
return found
def binary_search_rec(arr, x):
if len(arr) == 0 or (len(arr) == 1 and arr[0]!= x):
return -1
mid = arr[len(arr)// 2]
if x == mid: return x
if x < mid: return binary_search_rec(arr[:len(arr)// 2], x)
if x > mid: return binary_search_rec(arr[len(arr)//2+1:], x)
def selectionSort(a):
len_a = len(a) # getting the length of the array
for i in range (0,len_a-1):
minIndex = i
for j in range(i+1, len_a):
array
if a[j] < a[minIndex]:
minIndex = j
temp = a[i]
a[i] = a[minIndex]
a[minIndex] = temp
return a
def timer_Linear_Search(arr, x):
arr = selectionSort(arr)
start = t.clock()
linear_search_while(arr,x)
stop = t.clock()
timer = stop - start
return timer
def timer_Binary_Search(arr, x):
arr = selectionSort(arr)
start = t.clock()
binary_search_while(arr,x)
stop = t.clock()
timer = stop - start
return timer
def timer_Sort(arr):
start = t.clock()
selectionSort(arr)
stop = t.clock()
timer = stop - start
return timer
def timer_Linear_S_Search(arr, x):
start = t.clock()
arr = selectionSort(arr)
linear_search_while(arr,x)
stop = t.clock()
timer = stop - start
return timer
def timer_Binary_S_Search(arr, x):
start = t.clock()
arr = selectionSort(arr)
binary_search_while(arr,x)
stop = t.clock()
timer = stop - start
return timer
calculation_df = pd.DataFrame()
calculation_df['Size'] = [512,1024, 2048,4096,8192]
*I want to use all the above functions such that they use thev value of Size and create a random array and number to calculate the rest of the columns.
[The final data should look something like this][1]
currently i am using manual calculation and appending the data to the dataframe
My question is how can I apply custom function in python code
I am currently manually adding all these calculation to the dataframe*

You can use the apply method of DataFrame (see this):
def linear_search(arr, x):
for i in range(len(arr)):
if arr[i] == x:
return x
return -1;
df = pd.DataFrame(data=[[1,2],[3,4],[5,6]], columns=["a", "b"])
df.apply(lambda arr: linear_search(arr, 1))
Output:
a 1
b -1

Related

results of Iterative and recursion benchmark of QuickSort should looks like this?

I just create benchmark to compare speed of two implementation of Quick sort.
Iterative and recursion.
I expected than recursive will be slower, but I got that plot (blue is rec):
It's possible that recursion is faster? Maybe I just do some mistake in my code?
Just in case I pase my code.
import time
import random
import sys
arrayList = []
arr = [random.randint(1,15000) for _ in range(1000)]
numbersList = [100000, 300000, 500000, 900000, 1000000, 1500000]
numbersForBenchmark = []
for i in range(len(numbersList)):
arr = [random.randint(1,15000) for _ in range(numbersList[i])]
numbersForBenchmark.append(arr)
print(numbersForBenchmark)
recursionTimeArray = []
iterationTimeArray = []
arrRe = arr
arrIt = arr
def partition(lst, start, end):
pos = start
for i in range(start, end):
if lst[i] < lst[end]: # in your version it always goes from 0
lst[i],lst[pos] = lst[pos],lst[i]
pos += 1
lst[pos],lst[end] = lst[end],lst[pos] # you forgot to put the pivot
# back in its place
return pos
def quick_sort_recursive(lst, start, end):
if start < end: # this is enough to end recursion
pos = partition(lst, start, end)
quick_sort_recursive(lst, start, pos - 1)
quick_sort_recursive(lst, pos + 1, end)
#print(lst)
def iter(arr,l,h):
i = ( l - 1 )
x = arr[h]
for j in range(l , h):
if arr[j] <= x:
# increment index of smaller element
i = i+1
arr[i],arr[j] = arr[j],arr[i]
arr[i+1],arr[h] = arr[h],arr[i+1]
return (i+1)
def quickSortIterative(arr,l,h):
size = h - l + 1
stack = [0] * (size)
top = -1
top = top + 1
stack[top] = l
top = top + 1
stack[top] = h
while top >= 0:
# Pop h and l
h = stack[top]
top = top - 1
l = stack[top]
top = top - 1
p = iter( arr, l, h )
if p-1 > l:
top = top + 1
stack[top] = l
top = top + 1
stack[top] = p - 1
if p+1 < h:
top = top + 1
stack[top] = p + 1
top = top + 1
stack[top] = h
for i in range(len(numbersForBenchmark)):
arrRe = numbersForBenchmark[i][:]
arrIt = numbersForBenchmark[i][:]
n = len(arrIt)
start = time.time()
quickSortIterative(arrIt, 0, n-1)
end = time.time()
ITime = end - start
iterationTimeArray.append(ITime)
try:
n = len(arrRe)
start = time.time()
quick_sort_recursive(arrRe,0,n-1)
end = time.time()
rekTime = end - start
recursionTimeArray.append(rekTime)
except RecursionError as re:
print('Sorry but this maze solver was not able to finish '
'analyzing the maze: {}'.format(re.args[0]))
print("REK time", recursionTimeArray)
print("ITER TIME", iterationTimeArray)
# evenly sampled time at 200ms intervals
import matplotlib.pyplot as plt
plt.plot([10,100,500,1000,5000,8000 ], recursionTimeArray,[10,100,500,1000,5000,8000], iterationTimeArray)
plt.show()
The plots look OK, but I expected a completely different result. Hence my doubts about the results.

Insertion sort using binary search in python

I need a little help with the code below, I have been tasked with changing the insertion sort to use the given binary search function.
def binarySearch(aList, start, end, value):
#Return the position where value is or should be inserted.
while start <= end:
mid = (start + end) // 2
if aList[mid] == value:
return mid
if value < aList[mid]:
end = mid - 1
else:
start = mid + 1
return start
def insertionSort(aList):
#Sort aList in ascending order.
for index in range(1, len(aList)):
key = aList[index]
pos = index
while pos > 0 and aList[pos - 1] > key:
aList[pos] = aList[pos - 1]
pos = pos - 1
aList[pos] = key
The code I am using to test the function is:
numbers = [71, 32, 22, 19, 18, 1, 15, 40]
insertionSort(numbers)
print(numbers)
Any help is appreciated as I am having a mind blank
def binary_search(arr, val, start, end):
if start == end: #折半到最后start和end可能会相等,递归退出条件之一
if arr[start] > val:
return start
else:
return start+1
if start + 1 == end: #折半到最后start和end相差1,递归退出条件之一
return start+1
mid = (start+end)//2
if arr[mid] < val:
return binary_search(arr,val,mid, end)
elif arr[mid] > val:
return binary_search(arr, val, start, mid)
else:
return mid #折半遇到mid值与val相等,递归退出条件之一
def insertion_sort(arr:list):
for i in range(1, len(arr)):
if arr[i] <= arr[0]:
index = 0
elif arr[i] >= arr[i-1]:
continue
else:
index = binary_search(arr[0:i],arr[i],0, i-1)
arr = arr[:index]+ [arr[i]] + arr[index:i] + arr[i+1:]
return arr
arr = [30,20,80,40,50,10,60,70,90]
insertion_sort(arr)

Quicksort algorithm Python error

I am trying to implement Quicksort using Python.
This is my code:
import random
def quickSort(lst):
randomIndex = random.randint(0,len(lst)-1)
pivot = lst[randomIndex]
greater = []
less = []
equal = []
if len(lst) > 1:
for num in lst:
if num > pivot:
greater.append(num)
elif num == pivot:
equal.append(num)
else:
less.append(num)
return quickSort(less)+equal+quickSort(greater)
else:
return lst
def main():
lst = [1000000,100000,1000,10000,100,10]
sortedLst = quickSort(lst)
print("Quicksorted List: ", sortedLst)
main()
How come when I run my code, it says that it runs into this error:
ValueError: empty range for randrange() (0,0, 0)
The only problem is that you try to select randomIndex even, when lst is empty, just move your initializations into if condition where you are sure that they are non empty
import random
def quickSort(lst):
if len(lst) > 1:
randomIndex = random.randint(0,len(lst)-1)
pivot = lst[randomIndex]
greater = []
less = []
equal = []
for num in lst:
if num > pivot:
greater.append(num)
elif num == pivot:
equal.append(num)
else:
less.append(num)
return quickSort(less)+equal+quickSort(greater)
else:
return lst
def main():
lst = [1000000,100000,1000,10000,100,10]
sortedLst = quickSort(lst)
print("Quicksorted List: ", sortedLst)
main()

Why is the code (Python) giving error?

import numpy as np
import matplotlib.pyplot as plt
class Prisoners_Dilemma:
def __init__(self,n,p):
self.n = n
self.p = p
def decision_array(self):
self.dict_dict = {}
for i in range(1,self.n + 1):
self.dict_dict[i] = []
list_list = []
for j in range(1,self.n):
#np.random.seed(j)
self.r = np.random.uniform(0,1)
if self.r > self.p:
q = 0
else:
q = 1
list_list.append(q)
self.dict_dict[i] = list_list
return self.dict_dict
def payoff(self):
self.dict_dict_2 = {}
for i in range(1,self.n + 1):
self.dict_dict_2[i] = []
list_list_2 = []
list_list_3=[]
for j in range(1, i):
list_list_2.append(self.dict_dict[j][i-2])
for j in range(i + 1, self.n + 1):
list_list_2.append(self.dict_dict[j][i-1])
list_list_2_np = np.array(list_list_2)
against_i = np.sum(list_list_2_np)
for_i = np.sum(self.dict_dict[i])
if against_i == 0 and for_i == 0:
payoff_i = 2
elif against_i == 0 and for_i != 0:
payoff_i = 5
elif against_i != 0 and for_i == 0:
payoff_i = -5
else:
payoff_i = -2
list_list_3.append(payoff_i)
self.dict_dict_2[i]=list_list_3
return self.dict_dict_2
def gameplay(self, N, initial_count):
self.counter = initial_count
for i in range(N):
for j in range(1, self.n + 1):
z = self.dict_dict_2[j]
x = np.array(z)
self.counter += np.sum(z)
return self.counter
y = Prisoners_Dilemma(15,0.015)
print (y.gameplay(20,100))
In the above code, the compiler gives the error that instance has no attribute as dict_dict_2 even though its prefixed with self. Moreover, it is perfectly fine with dict_dict. For the sake of completeness I have included the whole code but the problem lies only in payoff and gameplay methods?
dict_dict_2 is only created in payoff(), therefore you must call it before attempting to call gameplay().
The issue is that you are only creating self.dict_dict_2 variable in the payoff function, but in your logic where you are calling gameplay() function , you are not calling the payoff() function before accessing dict_dict_2 , from the looks of it you are not calling that function anywhere at all.
Not sure what dict_dict_2 holds, but the above is the reason why you are getting the issue, maybe you can move the initialization part of dict_dict_2 to __init__() function , though that would not fix the complete issue, since you would still be trying to access dict_dict_1[j] which can error out if j is not a key in dict_dict_2 .

Python: Recursion problems

I am trying to make a sudoku solver that solves boards very quickly. At the moment my solver works on easy boards but never terminates on harder boards. I believe it has something to do with my recursion because easy boards do not require recursion and hard boards do. Any help is appreciated.
import sys
def rowno(i):
return i // 9
def colno(i):
return i % 9
def boxno(i):
return (i // 9 // 3 )*3 + (i // 3) % 3
def isNeighbor(i, j):
if rowno(i) == rowno(j) or colno(i) == colno(j) or boxno(i) == boxno(j):
return True
else:
return False
def getFileName():
if sys.platform == "win32":
filename = input("Filename? ")
else:
filename = sys.argv[-1]
return filename
solutionlist = []
class Board(object):
def __init__(self, puzzle):
self.puzzle = puzzle
self.board = [Cell(int(value), idx) for idx, value in enumerate(puzzle)]
self.change = False
def printAll(self):
print [cell.candidates for cell in self.board]
#return str(" ")
def update(self):
self.change = False
l = [cell for cell in self.board if len(cell.candidates) == 1]
for i in l:
for j in xrange(81):
if isNeighbor(i.dex, j) and i.dex != j:
old = self.board[j].candidates
self.board[j].delCandidate(i.value)
if len(old) != len(self.board[j].candidates):
self.change = True
def toString(self):
str1 = ''.join(str(e.value) for e in self.board)
return str1
def solved(self):
for cell in self.board:
if len(cell.candidates) != 1:
return False
return True
def solve(self):
self.change = True
while self.change == True:
self.update()
if self.solved():
solutionlist.append(self.board)
return
l = [cell for cell in self.board if len(cell.candidates) > 1]
for i in l:
for j in i.candidates:
newBoard = Board(self.toString())
curLen = 12
curCell = -1
for u in l:
if len(u.candidates)<curLen:
curLen=len(u.candidates)
curCell = u.dex
for c in newBoard.board[curCell].candidates:
newBoard.board[curCell].candidates = [int(c)]
newBoard.board[curCell].value = int(c)
newBoard.solve()
return
def __repr__(self):
l = [cell.value for cell in self.board]
return str(l)
class Cell(object):
def __init__(self, value, dex):
self.value = value
self.dex = dex
if value == 0:
self.candidates = [1,2,3,4,5,6,7,8,9]
else:
self.candidates = [int(value)]
def __str__(self):
return str(self.value)
def delCandidate(self, value):
# deletes value from candidate list
#return self.candidate.remove(value);
self.candidates = [x for x in self.candidates if x != value]
if len(self.candidates) == 1:
self.value = self.candidates[0]
easy = "700583006006001405052006083300200958500078060648010300060802500003150072215600030"
twosol = "000805200800000401705040009000100702040000000006430000030900000010006080000000000"
hard = "040090008000000070060000120030020000005839060080600700050170600000043000003000200"
#easy solution: 794583216836721495152496783371264958529378164648915327967832541483159672215647839
b = Board(hard)
print b
b.solve()
print "end of the line"
for i in solutionlist:
print [cell.value for cell in i]
print "\n"
One major issue is the line for i in l: in the solve method. Since you're recursing, you only need to fill in one cell - the recursion will take care of the rest. So instead of for i in l:, just recurse on the one cell that is the best candidate (curCell):
l = [cell for cell in self.board if len(cell.candidates) > 1]
if len(l) > 0:
newBoard = Board(self.toString())
curLen = 12
curCell = -1
for u in l:
if len(u.candidates)<curLen:
curLen=len(u.candidates)
curCell = u.dex
for c in newBoard.board[curCell].candidates:
newBoard.board[curCell].candidates = [int(c)]
newBoard.board[curCell].value = int(c)
newBoard.solve()

Categories