Numpy Arrays: Searching for subarrays - python

Basically, I have a bunch of numpy arrays each with a list of websites in a larger array. I wanted to, with input from the user, basically return the arrays where the input of the user is the first element of the array. It would return that, the user would input another website, and it would be the second element of the arrays that matched the first time. So for example:
bigarray = [['website1','website2', 'website3', 'website4'],
['website1', 'website7', 'website9', 'website3'],
['website1','website2','website5', 'website9','website24','website36']]
basically if someone were to input 'website1' it would return
{'website2':2, 'website7':1}
after if they were to input website 2 it would output
{'website3':1,"website5":1}
and so on. I hope I was clear, if not, please comment and I'll make it more clear. I don't know how to make this efficient and quick, I've been brainstorming but I can only come up with inefficient methods. Please help,
This is what I have so far, but it doesn't do a dictionary with frequencies. I can't figure out how to get frequencies in the dictionary, nor can I figure out how to get the second third fourth etc. elements searching. This only works for the first element.
import numpy as np
import cherrypy as cp
def initialize(self):
pagearray = np.load("pagearray.npy")
def submit(self, input):
for i in pagearray:
if input==i[0]:
subpagearray += [i[1:]]
possibilities +=i[0]
return possibilities
Thanks,
F

You could use a data structure that solves better your problem. Here you can find some options in Python. Try to avoid premature optimization, and keep your code simpler as you can.

Figured it out... this is what I was going for:
import numpy as np
import simplejson as json
import cherrypy as cp
import operator
global pagearray
def initialize(self):
global pagearray
pagearray = np.load("pagearray.npy")
#return os.path
def getUserPath(self, input):
subpagearray = []
possibilities = []
global pagearray
for i in pagearray:
try:
if input==i[0]:
subpagearray += [i[1:]]
possibilities+= [i[1]]
except IndexError:
pass
x = build_dict(possibilities)
sorted_x = sorted(x.items(), key=operator.itemgetter(1), reverse=True)
pagearray = subpagearray
totalelements = len(pagearray)
returnvaluelist = []
weight = []
for i in sorted_x:
returnvaluelist += [i[0]]
weight += [(i[1]/(totalelements*1.0))*100]
return returnvaluelist, weight
def build_dict(a_list=None):
if a_list is None:
a_list = []
site_dict = {}
for site in a_list:
try:
site_dict[site] = site_dict[site] + 1
except KeyError:
site_dict[site] = 1
return site_dict

def build_dict(a_list=None):
if a_list is None:
a_list = []
site_dict = {}
for site in a_list:
try:
site_dict[site] = site_dict[site] + 1
except KeyError:
site_dict[site] = 1
return site_dict
This is how you make a dictionary but I'm not sure what you're going for so you can use this as a template
I figured out what you're going for, I think. Let me know if this is it:
def advanced_dict(a_list=None):
if a_list is None:
a_list = []
index_holder = 0 # Holds the primary dict value
site_dict = {} # contains a dict of dicts
for sub_arr in big_array:
for site in sub_arr:
try:
site_dict['website'+str(index_holder)]
except KeyError:
site_dict['website'+str(index_holder)] = {} # if no dict create dict
try:
site_dict['website'+str(index_holder)][site] += 1
except KeyError:
site_dict['website'+str(index_holder)][site] = 1
index_holder += 1
index_holder = 0
return site_dict

Related

Anybody know how to use pyresttest's 'fixed_sequence' generator?

I'm trying to use pyresttest's benchmarking framework to generate a sequence of entries in my flask_sqlalchemy-based database. I would like to read input values from a pre-defined list as advertised by this framework's benchmarking generator type 'fixed_sequence', but it's only picking up the first element of the list.
Here is the issue that explains my problem in detail, with an example: https://github.com/svanoort/pyresttest/issues/264
Any pointer in the right direction will be greatly appreciated
I looked into the code, it is jsut a bug, this feature was never used by anyone.
https://github.com/svanoort/pyresttest/blob/master/pyresttest/generators.py#L100
instead of:
```
def factory_fixed_sequence(values):
""" Return a generator that runs through a list of values in order, looping after end """
def seq_generator():
my_list = list(values)
i = 0
while(True):
yield my_list[i]
if i == len(my_list):
i = 0
return seq_generator
It should be:
def factory_fixed_sequence(values):
""" Return a generator that runs through a list of values in order, looping after end """
def seq_generator():
my_list = list(values)
i = 0
while(True):
yield my_list[i]
i += 1
if i == len(my_list):
i = 0
return seq_generator
```
The i += 1 is missing

How can I get a counter to solve a sudoku puzzle?(python)

import os
from collections import counter
cwd = os.getcwd()
filename1 = cwd + "/sudoku1.txt"
grid1 = []
with open(filename1) as f:
for line in f:
grid1.append([int(i) for i in line.split()])
cwd = os.getcwd()
filename2 = cwd + "/sudoku2.txt"
grid2 = []
with open(filename2) as f:
for line in f:
grid2.append([int(i) for i in line.split()])
cwd = os.getcwd()
filename3 = cwd + "/sudoku3.txt"
grid3 = []
with open(filename3) as f:
for line in f:
grid3.append([int(i) for i in line.split()])
def allDifferent1D(l):
for i in l:
if i != 0:
if l.count(i)>1:
return False
return True
def allDifferent2D(l):
for row in l:
if not allDifferent1D(row):
return False
for c in range(len(l)):
col = []
for r in range(len(l)):
col.append(l[r][c])
if not allDifferent1D(col):
return False
return True
def checkAll3By3s(grid):
for i in [0,3,6]:
for j in [0,3,6]:
subGrid = [grid[i][j:j+3]]
subGrid.append(grid[i+1][j:j+3])
subGrid.append(grid[i+2][j:j+3])
if not check3By3(subGrid):
return False
return True
def check3By3(grid):
contains = dict()
for i in range(0,10):
contains[i] = False
for i in range(3):
for j in range(3):
if contains[grid[i][j]]:
return False
else:
contains[grid[i][j]] = True
return True
def isValidSudoku(grid):
# Check all rows and columns
if (not allDifferent2D(grid)):
return False
if (not checkAll3By3s(grid)):
return False
return True
def complete(grid):
#checks the grid for any zeros or negatives. Takes priority over the sudoku checking aspect as it is implied to be invalid
for i in range(len(grid)):
for j in range(len(grid[i])):
if grid[i][j]<=0:
return False
if (not allDifferent2D(grid)):
return False
if (not checkAll3By3s(grid)):
return False
return True
def compatableValue(grid,k):
# creates a dictionary for each row/column that, for the purpose of this function, takes k and compares it with other values in the row/column, giving it a value of 1 if it is unique for the grid[i][j] value solveSudoku is iterating over
for i in range(len(grid)):
seenValues=dict()
for j in range(len(grid[i])):
a=collections.counter(grid[i][j])
if k != 0 and k in seenValues:
return False
seenValues[k] += 1
return seenValues[k]
def solveSudoku(grid):
#if the grid isnt a sudoku solution, the function sets out to fill in blank spaces(as pre filled in spots are the conditions for the grid and thus necessary
if complete(grid)==True:
return(grid)
for i in range(0,9):
for j in range(0,9):
#only proceeds to change a value if it is zero. Calls compatableValue to see if each prospective value has been used
if grid[i][j]==0:
for k in range(1,10):
if compatableValue(grid,k)==1:
grid[i][j]=k
print(grid)
result=solveSudoku(grid)
if result != False:
solveSudoku(grid)
#changes values back to zero for next attempt at solving the problem
grid[i][j]=0
return False
return True
print(solveSudoku(grid2))
I am trying to solve the sudoku puzzle in which empty spaces are represented with zeros, and fill them in based on whether or not a counter has found them already in the row/column/3by3 grid. I am using python 3.4.1 and the counter does not work. I do not know what I am doing wrong.
Your import is:
from collections import counter
but then you try to use collections.counter instead. You never imported collections, so that will be a NameError exception. To fix it, change your import to
import collections
Also, as #DSM mentions in a comment, Counter must be spelled with an uppercase C.
I believe you have many other mistakes in that long, extremely repetitive code, for example you're trying to do a=collections.counter(grid[i][j]) -- counter called with a number makes no sense (and will fail), and then you ignore a anyway, I believe.
But the number of bugs per question should be low, so by fixing one I think I've done my part for now:-)
From the python docs:
c = Counter() # a new, empty counter
c = Counter('gallahad') # a new counter from an iterable
c = Counter({'red': 4, 'blue': 2}) # a new counter from a mapping
c = Counter(cats=4, dogs=8) # a new counter from keyword args
Counter() returns a counter object, and you can pass it either nothing, an iterable, a mapping or multiple named amounts. The idea behind a counter object is that it will count how many times a value is added to it. Say, if I want to count fruits in a bowl, I can do things like:
bowl = Counter()
bowl['banana'] = 3
bowl['banana'] += 4
Now in your code, you seem to be passing the content of a single sudoku cell to the Counter's constructor. I'm not sure about what you're trying to do with the counter, but I don't think you need one in the first place. You're not even using the counter after its failed creation. And I don't understand what the seenValues dict is used for. Perhaps you should try writing what you're trying to do in English first so we can understand what you're trying to achieve.

Hash Function in Python generating error

So I am trying to get a grasp on Hash Functions and how exactly they work. I have the following code but I keep getting an error when I try and run the code.
import sys
def part_one():
foo = open('input_table.txt')
for line in foo:
id, make, model, year = line.split(",")
print(make, model)
tuple_list = (make+model,)
return tuple_list
def hash_one(num_buffers, tuple_list):
#part_one()
# A being the first constant prime number to multiply by
# B being the prime number that we add to A*sum_of_chars
tuple_list = part_one()
A = 3
B = 5
count = 0
for item in tuple_list:
for char in item:
# sum_of_chars is the total of each letter in the word
count = ord(char)
count = count + tuple_list
index = ((A * sum_of_chars + B)) % num_buffers
return index
if __name__ == '__main__':
input_table = sys.argv[1]
num_buffers = int(sys.argv[2])
chars_per_buffer = int(sys.argv[3])
sys.argv[4] = 'make'
sys.argv[5] = 'model'
lst = []
for item in range(4, len(sys.argv)):
lst.append(sys.argv[item])
print(lst)
hash_one(lst)
What is wrong with my code that is causing the error? Can anyone help me?
1
You're calling hash() with no arguments, you have to hash something.
A hash of a number will just return the same number though, so it's not very interesting. It's for hashing things like strings.
2
part_one returns nothing, therefore when you call tuple_list = part_one(), it's value is set to None, and you can't iterate though it.
3
Passing in a list through an argument then overwriting it doesn't make any sense anyway. If you want to return a list then use a return statement.
4
It's odd to set argument variables in code, they're for reading things from the command line.
5
(Not an error, but...)
You can use a slice (lst = sys.argv[4:]) as an easier way to get a sub-section of a list.

Is there better way to swallow StopIteration exception generated by yield in python?

Now I plan to learn more about yield in python. And I found some codes about yield, which implemented the algorithm Reservoir Sampling as following:
def RandomSelect(knum, rand=None):
''' (int, func) -> list
Reservoir Sampling implementation
'''
selection = None
k_elems_list = []
count = 0
if rand is None:
rand = Random()
while True:
item = yield selection
if len(k_elems_list) < knum:
k_elems_list.append(item)
else:
# Randomly replace elements in the reservoir with a decreasing probability
r = rand.randint(0, count)
if r < knum:
k_elems_list[r] = item
count += 1
print k_elems_list
In order to break the while loop, I just add some codes after item = yield selection
if item == -1: # reach to the end of data, just break
break
Question 1, Is there any better way to break out the while loop?
To call the function RandomSelect,
myList = [1,2,3,4,5,6,7,8,-1]
cr = RandomSelect(3);
cr.next() # advance to the yield statement, otherwise I can't call send
try:
for val in myList:
cr.send(val)
except StopIteration:
pass
finally:
del cr
I have to catch the StopIteration exception explicitly.
Question 2, is there any better way to swallow the StopIteration in the codes?
I think a slightly cleaner way to accomplish what is being done — which addresses both your questions — would be to explicitly close the generator by calling itsclose()method to terminate it and break out of the loop. Doing so also means aStopIterationdoesn't need to be "swallowed". Another benefit is it's no longer necessary to add the -1 sentinel value at the end of the list.
def RandomSelect(knum, rand=None):
''' (int, func) -> list
Reservoir Sampling implementation
'''
selection = None
k_elems_list = []
count = 0
if rand is None:
rand = Random()
while True:
try:
item = yield selection
except GeneratorExit:
break
if len(k_elems_list) < knum:
k_elems_list.append(item)
else:
# Randomly replace elements in the reservoir with a decreasing probability
r = rand.randint(0, count)
if r < knum:
k_elems_list[r] = item
count += 1
print k_elems_list
myList = [1,2,3,4,5,6,7,8]
cr = RandomSelect(3)
cr.next() # advance to the yield statement, otherwise I can't call send
for val in myList:
cr.send(val)
cr.close()
del cr
A minor additional enhancement (about something you didn't ask about) would be to make it so it wasn't necessary to manually advance to theyieldstatement before callingsend(). A good way to accomplish that would be with a decorator function similar to the one namedconsumer()David Beazley described in his Generator Tricks
For Systems Programmers presentation at PyCon 2008:
def coroutine(func):
""" Decorator that takes care of starting a coroutine automatically. """
def start(*args, **kwargs):
cr = func(*args, **kwargs)
cr.next()
return cr
return start
#coroutine
def RandomSelect(knum, rand=None):
.
.
.
print k_elems_list
myList = [1,2,3,4,5,6,7,8]
cr = RandomSelect(3)
#cr.next() # NO LONGER NECESSARY
for val in myList:
cr.send(val)
cr.close()
del cr

Yielding from sorted iterators in sorted order in Python?

Is there a better way to merge/collate a bunch of sorted iterators into one so that it yields the items in sorted order? I think the code below works but I feel like there is a cleaner, more concise way of doing it that I'm missing.
def sortIters(*iterables, **kwargs):
key = kwargs.get('key', lambda x : x)
nextElems = {}
currentKey = None
for g in iterables:
try:
nextElems[g] = g.next()
k = key(nextElems[g])
if currentKey is None or k < currentKey:
currentKey = k
except StopIteration:
pass #iterator was empty
while nextElems:
minKey = None
stoppedIters = set()
for g, item in nextElems.iteritems():
k = key(item)
if k == currentKey:
yield item
try:
nextElems[g] = g.next()
except StopIteration:
stoppedIters.add(g)
minKey = k if minKey is None else min(k, minKey)
currentKey = minKey
for g in stoppedIters:
del nextElems[g]
The use case for this is that I have a bunch of csv files that I need to merge according to some sorted field. They are big enough that I don't want to just read them all into a list and call sort(). I'm using python2.6, but if there's a solution for python3 I'd still be interested in seeing it.
yes, you want heapq.merge() which does exactly one thing; iterate over sorted iterators in order
def sortkey(row):
return (row[5], row)
def unwrap(key):
sortkey, row = key
return row
from itertools import imap
FILE_LIST = map(file, ['foo.csv', 'bar.csv'])
input_iters = imap(sortkey, map(csv.csvreader, FILE_LIST))
output_iter = imap(unwrap, heapq.merge(*input_iters))

Categories