python queue from multiprocessing is empty - python

So I'm trying to assign values to a Queue from the module multiprocessing.
My code is:
from multiprocessing import Queue
fileBlocks = Queue()
filesDic = dict()
cont = 0
def eDiv(files, k):
global fileBlocks
listLinesAllWithIdx = []
index = 0
bytesCount = 0
blockLines = []
blockLinesAll = []
allBytes = 0
n = 0
lineCount = 0
for file in files:
with open(file) as f:
filesDic[file] = index
for line in f:
listLinesAllWithIdx.append((index,line, utf8len(line)))
allBytes += utf8len(line)
index += 1
while n < allBytes:
bytesCount = 0
blockLines = []
while bytesCount <= k and lineCount < len(listLinesAllWithIdx):
blockLines.append(listLinesAllWithIdx[lineCount])
bytesCount += listLinesAllWithIdx[lineCount][2]
lineCount += 1
n += bytesCount
blockLinesAll.append(blockLines)
for i in blockLinesAll:
fileBlocks.put(i)
print(fileBlocks.empty())
def utf8len(s):
return len(s.encode('utf-8'))
eDiv(["file1.txt","file2.txt"], 10)
I'm expecting fileBlocks.empty() = True but I'm getting False, and I don't understand why this is happening.

Related

Looking for resistance genes in water sample using kmers [Python]

I need some help with my code. I need to look for the presence of resistance genes in a water sample. That translates in having a huge file of reads coming from the water sample and a file of resistances genes. My problem is making the code run under 5 minutes, a thing that is not happening right now. Probably the issue relays on discarting reads as fast as possible, on having a smart method to only analyze meaningful reads. Do you have any suggestion? I cannot use any non standard python library
This is my code
import time
def build_lyb(TargetFile):
TargetFile = open(TargetFile)
res_gen = {}
for line in TargetFile:
if line.startswith(">"):
header = line[:-1]
res_gen[header] = ""
else:
res_gen[header] += line[:-1]
return res_gen
def build_kmers(sequence, k_size):
kmers = []
n_kmers = len(sequence) - k_size + 1
for i in range(n_kmers):
kmer = sequence[i:i + k_size]
kmers.append(kmer)
return kmers
def calculation(kmers, g):
matches = []
for i in range(0, len(genes[g])):
matches.append(0)
k = 0
while k < len(kmers):
if kmers[k] in genes[g]:
pos = genes[g].find(kmers[k])
for i in range(pos, pos+19):
matches[i] = 1
k += 19
else:
k += 1
return matches
def coverage(matches, g):
counter = 0
for i in matches[g]:
if i >= 1:
counter += 1
cov = counter/len(res_genes[g])*100
return cov
st = time.time()
genes = build_lyb("resistance_genes.fsa")
infile = open('test2.txt', 'r')
res_genes = {}
Flag = False
n_line = 0
for line in infile:
n_line += 1
if line.startswith("+"):
Flag = False
if Flag:
kmers = build_kmers(line[:-1], 19)
for g in genes:
counter = 18
k = 20
while k <= 41:
if kmers[k] in genes[g]:
counter += 19
k += 19
else:
k += 1
if counter >= 56:
print(n_line)
l1 = calculation(kmers, g)
if g in res_genes:
l2 = res_genes[g]
lr = [sum(i) for i in zip(l1, l2)]
res_genes[g] = lr
else:
res_genes[g] = l1
if line.startswith('#'):
Flag = True
for g in res_genes:
print(g)
for i in genes[g]:
print(i, " ", end='')
print('')
for i in res_genes[g]:
print(i, " ", end='')
print('')
print(coverage(res_genes, g))
et = time.time()
elapsed_time = et-st
print("Execution time:", elapsed_time, "s")

Program doesn't work after making exe file by pyinstaller

I made a Python program using numpy, sympy, sys which worked well in IDE. Here is the code...
import numpy as np
from sympy import *
import sys
f = open("result.txt", 'w+')
for line in sys.stdin:
f.write(line)
f.close()
f = open("result.txt", 'r')
data = f.read().split("\n")
i = 0
while i < len(data):
data[i] = data[i].split(' ')
i += 1
print(data)
amount = int(data[0][0])
print(amount)
matrix = np.zeros((amount, amount))
i = 0
j = 0
while i < amount:
while j < amount:
matrix[i, j] = int(data[i + 1][j])
j += 1
j = 0
i += 1
i = 0
j = 0
counter = 0
formula = 1
while i < amount:
while j < amount:
if matrix[i, j] == 1:
x = symbols(str(i + 1))
y = symbols(str(j + 1))
if counter == 0:
formula = (~x | ~y)
counter += 1
else:
formula = formula & (~x | ~y)
j += 1
j = 0
i += 1
formula_to_string = pycode(simplify_logic(formula, form='dnf', force=True))
massive_to_parse = formula_to_string.split("or")
k = 1
i = 0
while i < len(massive_to_parse):
print("{", end='')
while k < amount + 1:
try:
massive_to_parse[i].index(str(k))
except ValueError:
print("V",k, sep='', end='')
finally:
k += 1
print("}-максимальное внутренне устойчивое множество")
k = 1
i += 1
Then I made an exe file using pyinstaller with this command...
pyinstaller main.py
but when i launch it, this errors appear.
How can I fix this problem? Because I need exe file for university to launch it from other program on C

knapsack branch and bound wrong result

I have converted the code given at this link into a python version. The code is supposed to calculate the correct value of maximum value to be filled in knapsack of weight W. I have attached the code below:
#http://www.geeksforgeeks.org/branch-and-bound-set-2-implementation-of-01-knapsack/
from queue import Queue
class Node:
def __init__(self):
self.level = None
self.profit = None
self.bound = None
self.weight = None
def __str__(self):
return "Level: %s Profit: %s Bound: %s Weight: %s" % (self.level, self.profit, self.bound, self.weight)
def bound(node, n, W, items):
if(node.weight >= W):
return 0
profit_bound = int(node.profit)
j = node.level + 1
totweight = int(node.weight)
while ((j < n) and (totweight + items[j].weight) <= W):
totweight += items[j].weight
profit_bound += items[j].value
j += 1
if(j < n):
profit_bound += (W - totweight) * items[j].value / float(items[j].weight)
return profit_bound
Q = Queue()
def KnapSackBranchNBound(weight, items, total_items):
items = sorted(items, key=lambda x: x.value/float(x.weight), reverse=True)
u = Node()
v = Node()
u.level = -1
u.profit = 0
u.weight = 0
Q.put(u)
maxProfit = 0;
while not Q.empty():
u = Q.get()
if u.level == -1:
v.level = 0
if u.level == total_items - 1:
continue
v.level = u.level + 1
v.weight = u.weight + items[v.level].weight
v.profit = u.profit + items[v.level].value
if (v.weight <= weight and v.profit > maxProfit):
maxProfit = v.profit;
v.bound = bound(v, total_items, weight, items)
if (v.bound > maxProfit):
Q.put(v)
v.weight = u.weight
v.profit = u.profit
v.bound = bound(v, total_items, weight, items)
if (v.bound > maxProfit):
# print items[v.level]
Q.put(v)
return maxProfit
if __name__ == "__main__":
from collections import namedtuple
Item = namedtuple("Item", ['index', 'value', 'weight'])
input_data = open("test.data").read()
lines = input_data.split('\n')
firstLine = lines[0].split()
item_count = int(firstLine[0])
capacity = int(firstLine[1])
print "running from main"
items = []
for i in range(1, item_count+1):
line = lines[i]
parts = line.split()
items.append(Item(i-1, int(parts[0]), float(parts[1])))
kbb = KnapSackBranchNBound(capacity, items, item_count)
print kbb
The program is supposed to calculate value of 235 for following items inside file test.data:
5 10
40 2
50 3.14
100 1.98
95 5
30 3
The first line shows number of items and knapsack weight. Lines below first line shows the value and weight of those items. Items are made using a namedtuple and sorted according to value/weight. For this problem I am getting 135 instead of 235. What am I doing wrong here?
EDIT:
I have solved the problem of finding correct items based on branch and bound. If needed, one can check it here
The problem is that you're inserting multiple references to the same Node() object into your queue. The fix is to initialize two new v objects in each iteration of the while-loop as follows:
while not Q.empty():
u = Q.get()
v = Node() # Added line
if u.level == -1:
v.level = 0
if u.level == total_items - 1:
continue
v.level = u.level + 1
v.weight = u.weight + items[v.level].weight
v.profit = u.profit + items[v.level].value
if (v.weight <= weight and v.profit > maxProfit):
maxProfit = v.profit;
v.bound = bound(v, total_items, weight, items)
if (v.bound > maxProfit):
Q.put(v)
v = Node() # Added line
v.level = u.level + 1 # Added line
v.weight = u.weight
v.profit = u.profit
v.bound = bound(v, total_items, weight, items)
if (v.bound > maxProfit):
# print(items[v.level])
Q.put(v)
Without these reinitializations, you're modifying the v object that you already inserted into the queue.
This is different from C++ where the Node objects are values that are implicitly copied into the queue to avoid aliasing problems such as these.

Python stuck in a single thread of a multi-threaded program

I'm currently writing a program that is attempting to synchronize a visitor, car, pump, and gas station thread at a zoo where guests arrive, wait for an available car, take a tour, then exit, the cars must refuel every 5 rides, and the gas station must refuel every time 3 cars are refilled. My tests are with 35 visitors, 6 cars, 5 gas pumps, and a time interval of 2. The program reads a text file with the number of guests, cars, pumps, and a time interval between visitors boarding vehicles, then uses the data to fill a class. I create methods for each thread, then create the threads themselves in main. My problem is that my program gets stuck at 6 vehicles, which is the number specified by the text file, after running my first thread method, visitor_thread, and I cannot tell if any other threads are even running concurrently. I am a total novice at multithreading and python alike, and I'm not sure what the problem is. I have worked on the project for twelve hours straight and have been stuck at this point for the past four hours. In theory, visitor_thread, car_thread, and gas_station_thread should all run concurrently from main, and visitor_thread should have vehicles to work with again after some visitors finish their ride, but I just get stuck with six full cars in an infinite loop within visitor_thread. What is causing this infinite loop and how can I make sure all of my threads are actually running?
My code:
from threading import Thread
from threading import Lock
from threading import Event
event = Event()
lock = Lock()
done = Event()
is_done = 0
class Arguments:
def __init__(self, m_visitors, n_cars, k_pumps, t_time, thread_num, _done):
self.m_visitors = m_visitors
self.n_cars = n_cars
self.k_pumps = k_pumps
self.t_time = t_time
self.thread_num = thread_num
self.done = _done
class Car:
def __init__(self, control, rides, time, in_service, int_cus, in_queue):
self.control = control
self.rides = rides
self.time = time
self.in_service = in_service
self.int_cus = int_cus
self.in_queue = in_queue
class Pump:
def __init__(self, control, in_use, car_num, time):
self.control = control
self.in_use = in_use
self.car_num = car_num
self.time = time
class PumpQueue:
def __init__(self, pQueue, MAXsize, front, back, size):
self.q = Lock()
self.pQueue = pQueue
self.MAXsize = MAXsize
self.front = front
self.back = back
self.size = size
def visitor_thread(_visitor):
global is_done
visitor = _visitor
v = visitor.m_visitors
c = visitor.n_cars
p = visitor.k_pumps
t = visitor.t_time
id = visitor.thread_num
i = 0
j = 0
while i < v:
for j in range(0, c):
lock.acquire()
if cars[j].in_service is False and cars[j].rides < 5:
print('\nVisitor %d is currently in car %d' % (i+1, j+1))
cars[j].in_service = True
i += 1
print('\n%d customers waiting for a ride.' % (v - i))
lock.release()
break
lock.release()
lock.acquire()
is_done += 1
lock.release()
def car_thread(_car):
global is_done
carThread = _car
cars_done = 0
v = carThread.m_visitors
c = carThread.n_cars
p = carThread.k_pumps
t = carThread.t_time
id = carThread.thread_num
i = 0
while cars_done == 0:
cars_in_service = 0
while i < c:
lock.acquire()
if cars[i].in_service is True and cars[i].rides < 5:
# Car still being used, add more time
cars[i].time += 1
if cars[i].time == t:
cars[i].in_service = False
cars[i].rides += 1
cars[i].time = 0
if cars[i].rides == 5 and cars[i].in_queue is False:
push(i)
cars[i].in_queue = True
if cars[i].in_service is False:
cars_in_service += 1
i += 1
lock.release()
if cars_in_service == c and is_done >= 1:
cars_done = 1
lock.acquire()
is_done += 1
lock.release()
def gas_station_thread(_gas_station):
global is_done
gas_station = _gas_station
truck = False
cars_filled = 0
v = gas_station.m_visitors
c = gas_station.n_cars
p = gas_station.k_pumps
t = gas_station.t_time
id = gas_station.thread_num
gas_done = 0
pumps_in_service = 0
j = 0
while gas_done == 0:
while j < p:
lock.acquire()
if pumps[j].in_use is True:
pumps[j].time += 1
if pumps[j].time == 3:
lock.acquire()
cars[j].in_service = 0
cars[j].rides = 0
cars[j].time = 0
cars[j].in_queue = False
lock.release()
pumps[j].time = 0
pumps[j].in_use = False
cars_filled += 1
pumps_in_service -= 1
if truck is True and pumps[j].in_use is False:
truck = False
print('Fuel Truck is currently filling up the gas station.')
elif pumps[j].in_use is True and pump_line.size > 0:
pumps_in_service += 1
pumps[j].in_use = True
pumps[j].car_num.pop()
print('Car %d, pump %d' % (pumps[j].car_num + 1, i + 1))
pumps[j].time = 0
j += 1
lock.release()
if cars_filled > 3:
print('The Fuel Truck is on its way')
truck = True
cars_filled = 0
if pumps_in_service == 0 and is_done == 2:
gas_done = True
lock.acquire()
is_done += 1
lock.release()
def pop():
lock.acquire()
fr = pump_line.front
f = pump_line.pQueue[fr]
print("\n%d cars are waiting for pumps" % int(pump_line.size - 1))
if fr < (pump_line.MAXsize - 1):
pump_line.front += 1
else:
pump_line.front = 0
lock.release()
return f
def push(_c):
c =_c
lock.acquire()
b = pump_line.back
pump_line.pQueue[b] = c
print("\n%d cars are waiting for pumps" % int(pump_line.size + 1))
if b < (pump_line.MAXsize - 1):
pump_line.back += 1
else:
pump_line.back = 0
lock.release()
def isEmpty():
lock.acquire()
if (pump_line.front == pump_line.back):
boolean = True
else:
boolean = False
lock.release()
return boolean
if __name__ == "__main__":
arguments = Arguments(0, 0, 0, 0, 0, False)
main = Arguments(0, 0, 0, 0, 0, False)
thread = []
round_number = 0
io_control = 0
input_file = []
number_of_threads = 3
print("Enter the name of the text file to use as input:")
while io_control == 0:
try:
filename = input()
input_file = open(filename)
io_control = 1
except IOError:
print("Specified file does not exist, enter a different text file:")
# parse file into lines, separating by endlines
file_lines = []
num_lines = 0
for line in input_file:
line = line.replace(",", " ")
line = line.split()
num_lines += 1
if line:
line = [int(i) for i in line]
file_lines.append(line)
while main.done is False and round_number < num_lines:
main.m_visitors = int(file_lines[round_number][0])
main.n_cars = int(file_lines[round_number][1])
main.k_pumps = int(file_lines[round_number][2])
main.t_time = int(file_lines[round_number][3])
print("\nRound Number: %d" % (round_number + 1))
if main.n_cars == 0:
print('No data to read in this round, press enter to continue:')
input()
print("Number of Visitors: %d" % main.m_visitors)
print("Number of Cars: %d" % main.n_cars)
print("Number of Pumps: %d" % main.k_pumps)
print("Units of Time: %d" % main.t_time)
M = main.m_visitors
N = main.n_cars
K = main.k_pumps
T = main.t_time
thread_info = []
cars = []
pumps = []
for i in range(0, 3):
temp = Arguments(M, N, K, T, i, False)
thread_info.append(temp)
for i in range(0, N):
temp = Car(0, 0, 0, False, 0, False)
cars.append(temp)
for i in range(0, K):
temp = Pump(0, False, 0, 0)
pumps.append(temp)
pump_line = PumpQueue(0, 0, 0, 0, N)
visitorThread = Thread(target=visitor_thread, args=(thread_info[0],))
thread.append(visitorThread)
carsThread = Thread(target=car_thread, args=(thread_info[1],))
thread.append(carsThread)
gasThread = Thread(target=gas_station_thread, args=(thread_info[2],))
thread.append(gasThread)
visitorThread.start()
carsThread.start()
gasThread.start()
visitorThread.join()
carsThread.join()
gasThread.join()
round_number += 1
My output:
Round Number: 1 Number of Visitors: 35 Number of Cars: 6 Number of
Pumps: 5 Units of Time: 2
Visitor 1 is currently in car 1
34 customers waiting for a ride.
Visitor 2 is currently in car 2
33 customers waiting for a ride.
Visitor 3 is currently in car 3
32 customers waiting for a ride.
Visitor 4 is currently in car 4
31 customers waiting for a ride.
Visitor 5 is currently in car 5
30 customers waiting for a ride.
Visitor 6 is currently in car 6
29 customers waiting for a ride.
I think there's an index problem here:
if cars[j].in_service is False and cars[i].rides < 5:
should be
if cars[j].in_service is False and cars[j].rides < 5:
Index for cars is j not i

Parsing Data from live website in Python Enumerate problem!

The following script is supposed to fetch a specific line number and parse it from a live website. It works for like 30 loops but then it seems like enumerate(f) stops working correctly... the "i" in the for loop seems to stop at line 130 instead of like 200 something. Could this be due to the website I'm trying to fetch data from or something else? Thanks!!
import sgmllib
class MyParser(sgmllib.SGMLParser):
"A simple parser class."
def parse(self, s):
"Parse the given string 's'."
self.feed(s)
self.close()
def __init__(self, verbose=0):
"Initialise an object, passing 'verbose' to the superclass."
sgmllib.SGMLParser.__init__(self, verbose)
self.divs = []
self.descriptions = []
self.inside_div_element = 0
def start_div(self, attributes):
"Process a hyperlink and its 'attributes'."
for name, value in attributes:
if name == "id":
self.divs.append(value)
self.inside_div_element = 1
def end_div(self):
"Record the end of a hyperlink."
self.inside_div_element = 0
def handle_data(self, data):
"Handle the textual 'data'."
if self.inside_div_element:
self.descriptions.append(data)
def get_div(self):
"Return the list of hyperlinks."
return self.divs
def get_descriptions(self, check):
"Return a list of descriptions."
if check == 1:
self.descriptions.pop(0)
return self.descriptions
def rm_descriptions(self):
"Remove all descriptions."
self.descriptions.pop()
import urllib
import linecache
import sgmllib
tempLine = ""
tempStr = " "
tempStr2 = ""
myparser = MyParser()
count = 0
user = ['']
oldUser = ['none']
oldoldUser = [' ']
array = [" ", 0]
index = 0
found = 0
k = 0
j = 0
posIndex = 0
a = 0
firstCheck = 0
fCheck = 0
while a < 1000:
print a
f = urllib.urlopen("SITE")
a = a+1
for i, line in enumerate(f):
if i == 187:
print i
tempLine = line
print line
myparser.parse(line)
if fCheck == 1:
result = oldUser[0] is oldUser[1]
u1 = oldUser[0]
u2 = oldUser[1]
tempStr = oldUser[1]
if u1 == u2:
result = 1
else:
result = user is oldUser
fCheck = 1
user = myparser.get_descriptions(firstCheck)
tempStr = user[0]
firstCheck = 1
if result:
array[index+1] = array[index+1] +0
else:
j = 0
for z in array:
k = j+2
tempStr2 = user[0]
if k < len(array) and tempStr2 == array[k]:
array[j+3] = array[j+3] + 1
index = j+2
found = 1
break
j = j+1
if found == 0:
array.append(tempStr)
array.append(0)
oldUser = user
found = 0
print array
elif i > 200:
print "HERE"
break
print array
f.close()
Perhaps the number of lines on that web page are fewer than you think? What does this give you?:
print max(i for i, _ in enumerate(urllib.urlopen("SITE")))
Aside: Your indentation is stuffed after the while a < 1000: line. Excessive empty lines and one-letter names don't assist the understanding of your code.
enumerate is not broken. Instead of such speculation, inspect your data. Suggestion: replace
for i, line in enumerate(f):
by
lines = list(f)
print "=== a=%d linecount=%d === % (a, len(lines))
for i, line in enumerate(lines):
print " a=%d i=%d line=%r" % (a, i, line)
Examine the output carefully.

Categories