Getting specific values from ASCII table - python

I'm currently creating a genetic algorithm and am trying to only get certain values from the ASCII table so the runtime of the algorithm can be a bit faster. In the code below I get the values between 9-127 but I only need the values 9-10, and 32-127 from the ASCII table and I'm not sure on how to exactly only get those specific values. Code below is done in python.
import numpy as np
TARGET_PHRASE = """The smartest and fastest Pixel yet.
Google Tensor: Our first custom-built processor.
The first processor designed by Google and made for Pixel, Tensor makes the new Pixel phones our most powerful yet.
The most advanced Pixel Camera ever.
Capture brilliant color and vivid detail with Pixels best-in-class computational photography and new pro-level lenses.""" # target DNA
POP_SIZE = 4000 # population size
CROSS_RATE = 0.8 # mating probability (DNA crossover)
MUTATION_RATE = 0.00001 # mutation probability
N_GENERATIONS = 100000
DNA_SIZE = len(TARGET_PHRASE)
TARGET_ASCII = np.fromstring(TARGET_PHRASE, dtype=np.uint8) # convert string to number
ASCII_BOUND = [9, 127]
class GA(object):
def __init__(self, DNA_size, DNA_bound, cross_rate, mutation_rate, pop_size):
self.DNA_size = DNA_size
DNA_bound[1] += 1
self.DNA_bound = DNA_bound
self.cross_rate = cross_rate
self.mutate_rate = mutation_rate
self.pop_size = pop_size
self.pop = np.random.randint(*DNA_bound, size=(pop_size, DNA_size)).astype(np.int8) # int8 for convert to ASCII
def translateDNA(self, DNA): # convert to readable string
return DNA.tostring().decode('ascii')
def get_fitness(self): # count how many character matches
match_count = (self.pop == TARGET_ASCII).sum(axis=1)
return match_count
def select(self):
fitness = self.get_fitness() # add a small amount to avoid all zero fitness
idx = np.random.choice(np.arange(self.pop_size), size=self.pop_size, replace=True, p=fitness/fitness.sum())
return self.pop[idx]
def crossover(self, parent, pop):
if np.random.rand() < self.cross_rate:
i_ = np.random.randint(0, self.pop_size, size=1) # select another individual from pop
cross_points = np.random.randint(0, 2, self.DNA_size).astype(np.bool) # choose crossover points
parent[cross_points] = pop[i_, cross_points] # mating and produce one child
return parent
def mutate(self, child):
for point in range(self.DNA_size):
if np.random.rand() < self.mutate_rate:
child[point] = np.random.randint(*self.DNA_bound) # choose a random ASCII index
return child
def evolve(self):
pop = self.select()
pop_copy = pop.copy()
for parent in pop: # for every parent
child = self.crossover(parent, pop_copy)
child = self.mutate(child)
parent[:] = child
self.pop = pop
if __name__ == '__main__':
ga = GA(DNA_size=DNA_SIZE, DNA_bound=ASCII_BOUND, cross_rate=CROSS_RATE,
mutation_rate=MUTATION_RATE, pop_size=POP_SIZE)
for generation in range(N_GENERATIONS):
fitness = ga.get_fitness()
best_DNA = ga.pop[np.argmax(fitness)]
best_phrase = ga.translateDNA(best_DNA)
print('Gen', generation, ': ', best_phrase)
if best_phrase == TARGET_PHRASE:
break
ga.evolve()

You need a customed method to generate random samples in range 9-10, and 32-127, like
def my_rand(pop_size, DNA_size):
bold1=[9,10]
bold2=list(range(32,127))
bold=bold1+bold2
pop = np.random.choice(bold,(pop_size,DNA_size)).astype(np.int8)
return pop
then call this method to replace the line 29, like
delete -- self.pop = np.random.randint(*DNA_bound, size=(pop_size, DNA_size)).astype(np.int8) # int8 for convert to ASCII
call ---self.pop = my_rand(pop_size, DNA_size)

Related

Time limit exceeded when finding tree height

this is my code to find the height of a tree of up to 10^5 nodes. May I know why I get the following error?
Warning, long feedback: only the beginning and the end of the feedback message is shown, and the middle was replaced by " ... ". Failed case #18/24: time limit exceeded
Input:
100000
Your output:
stderr:
(Time used: 6.01/3.00, memory used: 24014848/2147483648.)
Is there a way to speed up this algo?
This is the exact problem description:
Problem Description
Task. You are given a description of a rooted tree. Your task is to compute and output its height. Recall
that the height of a (rooted) tree is the maximum depth of a node, or the maximum distance from a
leaf to the root. You are given an arbitrary tree, not necessarily a binary tree.
Input Format. The first line contains the number of nodes 𝑛. The second line contains 𝑛 integer numbers
from βˆ’1 to 𝑛 βˆ’ 1 β€” parents of nodes. If the 𝑖-th one of them (0 ≀ 𝑖 ≀ 𝑛 βˆ’ 1) is βˆ’1, node 𝑖 is the root,
otherwise it’s 0-based index of the parent of 𝑖-th node. It is guaranteed that there is exactly one root.
It is guaranteed that the input represents a tree.
Constraints. 1 ≀ 𝑛 ≀ 105
Output Format. Output the height of the tree.
# python3
import sys, threading
from collections import deque, defaultdict
sys.setrecursionlimit(10**7) # max depth of recursion
threading.stack_size(2**27) # new thread will get stack of such size
class TreeHeight:
def read(self):
self.n = int(sys.stdin.readline())
self.parent = list(map(int, sys.stdin.readline().split()))
def compute_height(self):
height = 0
nodes = [[] for _ in range(self.n)]
for child_index in range(self.n):
if self.parent[child_index] == -1:
# child_index = child value
root = child_index
nodes[0].append(root)
# do not add to index
else:
parent_index = None
counter = -1
updating_child_index = child_index
while parent_index != -1:
parent_index = self.parent[updating_child_index]
updating_child_index = parent_index
counter += 1
nodes[counter].append(child_index)
# nodes[self.parent[child_index]].append(child_index)
nodes2 = list(filter(lambda x: x, nodes))
height = len(nodes2)
return(height)
def main():
tree = TreeHeight()
tree.read()
print(tree.compute_height())
threading.Thread(target=main).start()
First, why are you using threading? Threading isn't good. It is a source of potentially hard to find race conditions and confusing complexity. Plus in Python, thanks to the GIL, you often don't get any performance win.
That said, your algorithm essentially looks like this:
for each node:
travel all the way to the root
record its depth
If the tree is entirely unbalanced and has 100,000 nodes, then for each of 100,000 nodes you have to visit an average of 50,000 other nodes for roughly 5,000,000,000 operations. This takes a while.
What you need to do is stop constantly traversing the tree back to the root to find the depths. Something like this should work.
import sys
class TreeHeight:
def read(self):
self.n = int(sys.stdin.readline())
self.parent = list(map(int, sys.stdin.readline().split()))
def compute_height(self):
height = [None for _ in self.parent]
todo = list(range(self.n))
while 0 < len(todo):
node = todo.pop()
if self.parent[node] == -1:
height[node] = 1
elif height[node] is None:
if height[self.parent[node]] is None:
# We will try again after doing our parent
todo.append(node)
todo.append(self.parent[node])
else:
height[node] = height[self.parent[node]] + 1
return max(height)
if __name__ == "__main__":
tree = TreeHeight()
tree.read()
print(tree.compute_height())
(Note, I switched to a standard indent, and then made that indent 4. See this classic study for evidence that an indent in the 2-4 range is better for comprehension than an indent of 8. And, of course, the pep8 standard for Python specifies 4 spaces.)
Here is the same code showing how to handle accidental loops, and hardcode a specific test case.
import sys
class TreeHeight:
def read(self):
self.n = int(sys.stdin.readline())
self.parent = list(map(int, sys.stdin.readline().split()))
def compute_height(self):
height = [None for _ in self.parent]
todo = list(range(self.n))
in_redo = set()
while 0 < len(todo):
node = todo.pop()
if self.parent[node] == -1:
height[node] = 1
elif height[node] is None:
if height[self.parent[node]] is None:
if node in in_redo:
# This must be a cycle, lie about its height.
height[node] = -100000
else:
in_redo.add(node)
# We will try again after doing our parent
todo.append(node)
todo.append(self.parent[node])
else:
height[node] = height[self.parent[node]] + 1
return max(height)
if __name__ == "__main__":
tree = TreeHeight()
# tree.read()
tree.n = 5
tree.parent = [-1, 0, 4, 0, 3]
print(tree.compute_height())

Spawning objects in groups when the first object of the group was spawned randomly Python

I'm currently doing a project, and in the code I have, I'm trying to get trees .*. and mountains .^. to spawn in groups around the first tree or mountain which is spawned randomly, however, I can't figure out how to get the trees and mountains to spawn in groups around a single randomly generated point. Any help?
grid = []
def draw_board():
row = 0
for i in range(0,625):
if grid[i] == 1:
print("..."),
elif grid[i] == 2:
print("..."),
elif grid[i] == 3:
print(".*."),
elif grid[i] == 4:
print(".^."),
elif grid[i] == 5:
print("[T]"),
else:
print("ERR"),
row = row + 1
if row == 25:
print ("\n")
row = 0
return
There's a number of ways you can do it.
Firstly, you can just simulate the groups directly, i.e. pick a range on the grid and fill it with a specific figure.
def generate_grid(size):
grid = [0] * size
right = 0
while right < size:
left = right
repeat = min(random.randint(1, 5), size - right) # *
right = left + repeat
grid[left:right] = [random.choice(figures)] * repeat
return grid
Note that the group size need not to be uniformly distributed, you can use any convenient distribution, e.g. Poisson.
Secondly, you can use a Markov Chain. In this case group lengths will implicitly follow a Geometric distribution. Here's the code:
def transition_matrix(A):
"""Ensures that each row of transition matrix sums to 1."""
copy = []
for i, row in enumerate(A):
total = sum(row)
copy.append([item / total for item in row])
return copy
def generate_grid(size):
# Transition matrix ``A`` defines the probability of
# changing from figure i to figure j for each pair
# of figures i and j. The grouping effect can be
# obtained by setting diagonal entries A[i][i] to
# larger values.
#
# You need to specify this manually.
A = transition_matrix([[5, 1],
[1, 5]]) # Assuming 2 figures.
grid = [random.choice(figures)]
for i in range(1, size):
current = grid[-1]
next = choice(figures, A[current])
grid.append(next)
return grid
Where the choice function is explained in this StackOverflow answer.

Parallel Processing - Pool - Python

I'm trying to learn how to use multiprocessing in Python.
I read about multiprocessing, and I trying to do something like this:
I have the following class(partial code), which has a method to produce voronoi diagrams:
class ImageData:
def generate_voronoi_diagram(self, seeds):
"""
Generate a voronoi diagram with *seeds* seeds
:param seeds: the number of seed in the voronoi diagram
"""
nx = []
ny = []
gs = []
for i in range(seeds):
# Generate a cell position
pos_x = random.randrange(self.width)
pos_y = random.randrange(self.height)
nx.append(pos_x)
ny.append(pos_y)
# Save the f(x,y) data
x = Utils.translate(pos_x, 0, self.width, self.range_min, self.range_max)
y = Utils.translate(pos_y, 0, self.height, self.range_min, self.range_max)
z = Utils.function(x, y)
gs.append(z)
for y in range(self.height):
for x in range(self.width):
# Return the Euclidean norm
d_min = math.hypot(self.width - 1, self.height - 1)
j = -1
for i in range(seeds):
# The distance from a cell to x, y point being considered
d = math.hypot(nx[i] - x, ny[i] - y)
if d < d_min:
d_min = d
j = i
self.data[x][y] = gs[j]
I have to generate a large number of this diagrams, so, this consumes a lot of time, so I thought this is a typical problem to be parallelized.
I was doing this, in the "normal" approach, like this:
if __name__ == "__main__":
entries = []
for n in range(images):
entry = ImD.ImageData(width, height)
entry.generate_voronoi_diagram(seeds)
entry.generate_heat_map_image("ImagesOutput/Entries/Entry"+str(n))
entries.append(entry)
Trying to parallelize this, I tried this:
if __name__ == "__main__":
entries = []
seeds = np.random.poisson(100)
p = Pool()
entry = ImD.ImageData(width, height)
res = p.apply_async(entry.generate_voronoi_diagram,(seeds))
entries.append(entry)
entry.generate_heat_map_image("ImagesOutput/Entries/EntryX")
But, besides it doesn't work not even to generate a single diagram, I don't know how to specify that this have to be made N times.
Any help would be very appreciated.
Thanks.
Python's multiprocessing doesn't share memory (unless you explicitly tell it to). That means that you won't see "side effects" of any function that gets run in a worker processes. Your generate_voronoi_diagram method works by adding data to an entry value, which is a side effect. In order to see the results, you need to be passing it back as a return values from your function.
Here's one approach that handles the entry instance as an argument and return value:
def do_voroni(entry, seeds):
entry.generate_voronoi_diagram(seeds)
return entry
Now, you can use this function in your worker processes:
if __name__ == "__main__":
entries = [ImD.ImageData(width, height) for _ in range(images)]
seeds = numpy.random.poisson(100, images) # array of values
pool = multiprocessing.Pool()
for i, e in enumerate(pool.starmap_async(do_voroni, zip(entries, seeds))):
e.generate_heat_map_image("ImagesOutput/Entries/Entry{:02d}".format(i))
The e values in the loop are not references to the values in the entries list. Rather, they're copies of those objects, which have been passed out to the worker process (which added data to them) and then passed back.
I might be wrong, but I think you should use
res = p.apply_async(entry.generate_voronoi_diagram,(seeds))
res.get(timeout=1)
you may get Can't pickle type 'instancemethod'
i think the easiest way is something like
import random
from multiprocessing import Pool
class ImageData:
def generate_voronoi_diagram(self, seeds):
ooxx
def generate_heat_map_image(self, path):
ooxx
def allinone(obj, seeds, path):
obj.generate_voronoi_diagram(seeds)
obj.generate_heat_map_image(path)
if __name__ == "__main__":
entries = []
seeds = random.random()
p = Pool()
entry = ImageData()
res = p.apply_async(allinone, (entry, seeds, 'tmp.txt'))
res.get(timeout=1)

Quickly counting particles in grid

I've written some python code to calculate a certain quantity from a cosmological simulation. It does this by checking whether a particle in contained within a box of size 8,000^3, starting at the origin and advancing the box when all particles contained within it are found. As I am counting ~2 million particles altogether, and the total size of the simulation volume is 150,000^3, this is taking a long time.
I'll post my code below, does anybody have any suggestions on how to improve it?
Thanks in advance.
from __future__ import division
import numpy as np
def check_range(pos, i, j, k):
a = 0
if i <= pos[2] < i+8000:
if j <= pos[3] < j+8000:
if k <= pos[4] < k+8000:
a = 1
return a
def sigma8(data):
N = []
to_do = data
print 'Counting number of particles per cell...'
for k in range(0,150001,8000):
for j in range(0,150001,8000):
for i in range(0,150001,8000):
temp = []
n = []
for count in range(len(to_do)):
n.append(check_range(to_do[count],i,j,k))
to_do[count][1] = n[count]
if to_do[count][1] == 0:
temp.append(to_do[count])
#Only particles that have not been found are
# searched for again
to_do = temp
N.append(sum(n))
print 'Next row'
print 'Next slice, %i still to find' % len(to_do)
print 'Calculating sigma8...'
if not sum(N) == len(data):
return 'Error!\nN measured = {0}, total N = {1}'.format(sum(N), len(data))
else:
return 'sigma8 = %.4f, variance = %.4f, mean = %.4f' % (np.sqrt(sum((N-np.mean(N))**2)/len(N))/np.mean(N), np.var(N),np.mean(N))
I'll try to post some code, but my general idea is the following: create a Particle class that knows about the box that it lives in, which is calculated in the __init__. Each box should have a unique name, which might be the coordinate of the bottom left corner (or whatever you use to locate your boxes).
Get a new instance of the Particle class for each particle, then use a Counter (from the collections module).
Particle class looks something like:
# static consts - outside so that every instance of Particle doesn't take them along
# for the ride...
MAX_X = 150,000
X_STEP = 8000
# etc.
class Particle(object):
def __init__(self, data):
self.x = data[xvalue]
self.y = data[yvalue]
self.z = data[zvalue]
self.compute_box_label()
def compute_box_label(self):
import math
x_label = math.floor(self.x / X_STEP)
y_label = math.floor(self.y / Y_STEP)
z_label = math.floor(self.z / Z_STEP)
self.box_label = str(x_label) + '-' + str(y_label) + '-' + str(z_label)
Anyway, I imagine your sigma8 function might look like:
def sigma8(data):
import collections as col
particles = [Particle(x) for x in data]
boxes = col.Counter([x.box_label for x in particles])
counts = boxes.most_common()
#some other stuff
counts will be a list of tuples which map a box label to the number of particles in that box. (Here we're treating particles as indistinguishable.)
Using list comprehensions is much faster than using loops---I think the reason is that you're basically relying more on the underlying C, but I'm not the person to ask. Counter is (supposedly) highly-optimized as well.
Note: None of this code has been tested, so you shouldn't try the cut-and-paste-and-hope-it-works method here.

Fitness proportionate selection (roulette wheel selection) in Python

I have a list of objects (Chromosome) which have an attribute fitness (chromosome.fitness is between 0 and 1)
Given a list of such objects, how can I implement a function which returns a single chromosome whose chance of being selected is proportional to its fitness? That is, a chromosome with fitness 0.8 is twice as likely to be selected as one with fitness 0.4.
I've found a few Python and pseudocode implementations, but they are too complex for this requirement: the function needs only a list of chromosomes. Chromosomes store their own fitness as an internal variable.
The implementation I already wrote was before I decided to allow chromosomes to store their own fitness, so was a lot more complicated and involved zipping lists and things.
----------------------------EDIT----------------------------
Thanks Lattyware. The following function seems to work.
def selectOne(self, population):
max = sum([c.fitness for c in population])
pick = random.uniform(0, max)
current = 0
for chromosome in population:
current += chromosome.fitness
if current > pick:
return chromosome
Use numpy.random.choice.
import numpy.random as npr
def selectOne(self, population):
max = sum([c.fitness for c in population])
selection_probs = [c.fitness/max for c in population]
return population[npr.choice(len(population), p=selection_probs)]
There is a very simple way to select a weighted random choice from a dictionary:
def weighted_random_choice(choices):
max = sum(choices.values())
pick = random.uniform(0, max)
current = 0
for key, value in choices.items():
current += value
if current > pick:
return key
If you don't have a dictionary at hand, you could modify this to suit your class (as you haven't given more details of it, or generate a dictionary:
choices = {chromosome: chromosome.fitness for chromosome in chromosomes}
Presuming that fitness is an attribute.
Here is an example of the function modified to take an iterable of chromosomes, again, making the same presumption.
def weighted_random_choice(chromosomes):
max = sum(chromosome.fitness for chromosome in chromosomes)
pick = random.uniform(0, max)
current = 0
for chromosome in chromosomes:
current += chromosome.fitness
if current > pick:
return chromosome
I'd prefer fewer lines:
import itertools
def choose(population):
bounds = list(itertools.accumulate(chromosome.fitness for chromosome in population))
pick = random.random() * bounds[-1]
return next(chromosome for chromosome, bound in zip(population, bounds) if pick < bound)
def Indvs_wieght(Indvs): # to comput probality of selecting each Indvs by its fitness
s=1
s=sum(i.fitness for i in Indvs)
wieghts = list()
for i in range(len(Indvs)) :
wieghts.append(Indvs[i].fitness/s)
return wieghts
def select_parents(indvs,indvs_wieghts,number_of_parents=40): # Roulette Wheel Selection method #number of selected parent
return np.random.choice(indvs,size=number_of_parents,p=indvs_wieghts)
from __future__ import division
import numpy as np
import random,pdb
import operator
def roulette_selection(weights):
'''performs weighted selection or roulette wheel selection on a list
and returns the index selected from the list'''
# sort the weights in ascending order
sorted_indexed_weights = sorted(enumerate(weights), key=operator.itemgetter(1));
indices, sorted_weights = zip(*sorted_indexed_weights);
# calculate the cumulative probability
tot_sum=sum(sorted_weights)
prob = [x/tot_sum for x in sorted_weights]
cum_prob=np.cumsum(prob)
# select a random a number in the range [0,1]
random_num=random.random()
for index_value, cum_prob_value in zip(indices,cum_prob):
if random_num < cum_prob_value:
return index_value
if __name__ == "__main__":
weights=[1,2,6,4,3,7,20]
print (roulette_selection(weights))
weights=[1,2,2,2,2,2,2]
print (roulette_selection(weights))
import random
def weighted_choice(items):
total_weight = sum(item.weight for item in items)
weight_to_target = random.uniform(0, total_weight)
for item in items:
weight_to_target -= item.weight
if weight_to_target <= 0:
return item

Categories