Index out of Range while counting inversions - python

I am trying to count the number of inversions in a .txt file given as an argument in a command line input. When ever it gets to line that actually checks if there is an inversion I get a index out of range error. I have tried writing down the place and value in i and j for each loop but I can't figure out how to stop it from going out of range. Here is the error
File "./counting_inversions.py", line 31, in sortAndCountSplit
if (l[i] <= r[j]):
IndexError: list index out of range
Does any one else know a solution?
import argparse
def readFile():
arg_parser = argparse.ArgumentParser(description='Print the given input file.')
arg_parser.add_argument('filename', help='path to a file')
args = arg_parser.parse_args()
with open(args.filename, 'r') as in_file:
n = int(in_file.readline())
vals = [int(val) for val in in_file.readlines()]
return([n, vals])
def sortAndCount(invList):
if (len(invList) == 1):
return (invList, 0)
else:
midpoint = int(len(invList) / 2)
left, lc = sortAndCount(invList[:midpoint])
right, rc = sortAndCount(invList[midpoint:])
arr, sc = sortAndCountSplit(left, right)
return (arr, (lc + rc + sc))
def sortAndCountSplit(l, r):
s = []
i = j = inversions = 0
for k in range((len(l) + len(r))):
if ((i < len(l)) and (l[i] <= r[j]) or j >= len(r)):
s.append(l[i])
i += 1
else:
s.append(r[j])
j += 1
inversions += len(l) - i
return (s, inversions)
def main():
file = readFile()
print(sortAndCount(file[1]))
main()

Related

Rewrite Python code for compilation it in PyPy

I am stuck in rewriting Python code to compile it in PyPy. My task is to calculate number of inversions in a list using modification of merge sort. Input data brings from input.txt, the result of the code is written in output.txt. My last attempt to do this looks like:
import sys
def merge(a, b, inverse_num):
n, m = len(a), len(b)
i = 0
j = 0
c = []
while i < n or j < m:
if j == m:
c.append(a[i])
i += 1
# inverse_num += 1
elif i < n and a[i] <= b[j]:
c.append(a[i])
i += 1
elif i < n and a[i] > b[j]:
c.append(b[j])
j += 1
inverse_num += n - i
if j == m and i == n - 1:
c.append(a[i])
i += 1
else:
c.append(b[j])
j += 1
return c, inverse_num
def merge_sort(arr, inverse_num=0):
n = len(arr)
if n == 1:
return arr, inverse_num
l = arr[:(n//2)]
r = arr[(n//2):n]
l, inverse_num_l = merge_sort(l, inverse_num)
r, inverse_num_r = merge_sort(r, inverse_num)
inverse_num = inverse_num_l + inverse_num_r
return merge(l, r, inverse_num)
def main():
with open('input.txt') as f:
n = int(f.readline().split()[0])
in_list = list(map(int, f.readline().split()))
output_file = open('output.txt', 'w')
sorted_arr, inverse_num = merge_sort(in_list, inverse_num=0)
output_file.write(str(inverse_num))
output_file.close()
return 0
def target(*args):
return entry_point, None
def entry_point(argv):
main()
return 0
if __name__ == "__main__":
entry_point(sys.argv)
After compilation it with command pypy "C:/pypy_compile/pypy-src/translate.py" Task2.py
error appears:
[translation:ERROR] TypeError: method_split() takes at least 2 arguments (1 given)
Processing block:
block#27[v4...] is a <class 'rpython.flowspace.flowcontext.SpamBlock'>
in (Task2:44)main
containing the following operations:
v6 = getattr(v5, ('split'))
v7 = simple_call(v6)
--end--
Thanks in advance for helping.

Run the Same Code for Multiple Files Python

I've written a code as you can see (I know it could have written better). So my question is how can I run this code for multiple files in a folder?. Can I use "for loop" for this? My files are in a folder called task1 and to be honest I couldn't figure out the way to do it. Thank you for spending your time to read it.
bands = list()
filename = "file1000"
with open (filename) as fin:
for line in fin:
bands.append(line.strip())
def partition(bands, start, end):
pivot = bands[start]
low = start + 1
high = end
while True:
while low <= high and bands[high] >= pivot:
high = high - 1
while low <= high and bands[low] <= pivot:
low = low + 1
if low <= high:
bands[low], bands[high] = bands[high], bands[low]
else:
break
bands[start], bands[high] = bands[high], bands[start]
return high
def quick_sort(array, start, end):
if start >= end:
return
p = partition(array, start, end)
quick_sort(array, start, p-1)
quick_sort(array, p+1, end)
def heapify(bands, n, i):
largest = i
l = 2 * i + 1
r = 2 * i + 2
if l < n and bands[i] < bands[l]:
largest = l
if r < n and bands[largest] < bands[r]:
largest = r
if largest != i:
bands[i], bands[largest] = bands[largest], bands[i]
heapify(bands, n, largest)
def heapSort(bands):
n = len(bands)
for i in range(n, -1, -1):
heapify(bands, n, i)
for i in range(n - 1, 0, -1):
bands[i], bands[0] = bands[0], bands[i]
heapify(bands, i, 0)
def mergeSort(bands):
if len(bands) > 1:
mid = len(bands) // 2
L = bands[:mid]
R = bands[mid:]
mergeSort(L)
mergeSort(R)
i = j = k = 0
while i < len(L) and j < len(R):
if L[i] < R[j]:
bands[k] = L[i]
i += 1
else:
bands[k] = R[j]
j += 1
k += 1
while i < len(L):
bands[k] = L[i]
i += 1
k += 1
while j < len(R):
bands[k] = R[j]
j += 1
k += 1
def insertionSort(bands):
for i in range(1, len(bands)):
key = bands[i]
j = i - 1
while j >= 0 and key < bands[j]:
bands[j + 1] = bands[j]
j -= 1
bands[j + 1] = key
import time
start_time = time.time()
quick_sort(bands, 0, len(bands) - 1)
file = open("time.txt","a")
file.write(str(time.time() - start_time))
file.write(" ")
file.close()
start_time = time.time()
heapSort(bands)
file = open("time.txt","a")
file.write(str(time.time() - start_time))
file.write(" ")
file.close()
start_time = time.time()
mergeSort(bands)
file = open("time.txt","a")
file.write(str(time.time() - start_time))
file.write(" ")
file.close()
start_time = time.time()
insertionSort(bands)
file = open("time.txt","a")
file.write(str(time.time() - start_time))
file.write(" ")
file.close()
You can use os.listdir(folder) to get all names in folder (it will be names of files and subfolders) and then you can use for-loop to run your code with every filename. listdir() gives only filenames and you need os.path.join() to create full path to file. You can also use if to filter names.
import os
folder = "/path/to/assignment"
for name in os.listdir(folder):
if name.startswith("file"): # use it if you have to filter files by name
filename = os.path.join(folder, name)
print(filename)
# ... your code ...
Eventually you can use glob for this. It can be useful if you wan to filter names. For all names use *. To filer you can use ie *.txt or file*, etc.
import glob
#for filename in glob.glob("/path/to/assignment/file*.txt"):
for filename in glob.glob("/path/to/assignment/*"):
print(filename)
# ... your code ...
If you will need to get also in subfolders then you can use os.walk(folder)
import os
folder = "/path/to/assignment"
for root, dirs, files in os.walk(folder):
for name in files:
if name.startswith("file"): # use it if you have to filter files by name
filename = os.path.join(root, name)
print(filename)
# ... your code ...
You can use:
d="**Provide the directory here**"
files=os.listdir(d)
file=[i[:-4] for i in files] #To store the csv file name as DataFrame name without the '.csv' part
a=[]
for i in range(len(files)):
exec("%s=pd.read_csv(d+files[i])"%file[i])
a.append(file[i])
Now you the list of DataFrames in 'a'. You can iterate for each of them and pass it to your function.

list assignment index out of range by code python?

I keep getting an
IndexError: list assignment index out of range.
The error on line 78
This code is written to find motif DNA to bioinformatics
How we can solve this error or the problem ?
Here is my code:
from math import log
class MotifMedianFinding(object):
def __init__(self, input_file):
super(MotifMedianFinding, self).__init__()
self.input_lines = open("C:\\Users\\A.Khassawneh\\Desktop\\fasta.txt")
def output(self):
#main method to call both functions
sequences = {}
for line in self.input_lines:
if '>' in line:
sequences[line] = self.input_lines.next()
for label, seq in sequences.iteritems():
print "DNA:" + seq + "\n\n\n\n\n"
median = self.median_string(seq, 5,5, len(seq))
self.motif(seq, median,5,len(seq))
def median_string(self, dna, t, n, l):
#bound and search method of calulating median string
start_pos = start_pos = [1,1,1,1,1]
best_dist = 1000000000
i = 1
while i > 0:
if i < l:
prefix = str(start_pos)
opt_dist = self.hamming_score(prefix, dna)
if opt_dist > best_dist:
s,i = self.bypass(start_pos,i,l,4)
else:
s,i = self.next_vertex(start_pos,i,l,4)
else:
word = str(s)
if self.hamming_score(word, dna) < best_dist:
best_dist = self.hamming_score(word, dna)
bestword = word
s,i = self.next_vertex(start_pos,i,l,4)
print "Best Word: %s (tot_dis = %s)" % (bestword,best_dist)
return bestword
def motif(self, dna, t, n, l):
#bound and search method of calculating motif
start_pos = [1,1,1,1,1]
best_score = 0
i = 1
while 1 > 0:
if i < t:
opt_score = Score(s, i, dna) + (t-1) * l
if opt_score < best_score:
start_pos, i = self.bypass(start_pos, i, t, n-l+1)
else:
start_pos, i = self.next_vertex(start_pos, i, t, n-l+1)
else:
if self.score(start_pos, dna) > best_score:
best_score = self.score(start_pos)
best_motif = str(s)
start_pos, i = self.next_vertex(start_pos, i, t, n-l+1)
print "motif consensus string: %s (consensus_score = %s) " % (best_motif, best_score)
print "motif positions/string s=(s1..st): %s" % ', '.join(start_pos)
return best_motif
def bypass(vertex, level, l, k):
#skip uncessary calculations in the tree
j = level
for ind in xrange(j,1,-1):
if a[j] < k:
a[j] = a[j] + 1
return vertex, j
return vertex, 0
def next_vertex(self, vertex, level, L, k):
#transverse the tree of a strand of genes
if level <L:
vertex[level+1] = 1
return vertex,level+1
else:
j = L
for ind in xrange(j,1,-1):
if vertex[ind] < k:
vertex[j] = vertex[j] + 1
return vertex, j
return vertex, 0
def score(start_pos):
# biggest score of motif
total = 0
for i in start_pos:
total += i
return total
def hamming_score(self, s, dna):
pass
motif_median = MotifMedianFinding('HMP-part.fa')
motif_median.output()
xrange(x,y) goes from x to y-1 (x, x+1.... y-1). In your code, it would have been fine to do xrange(1,j), because that wouldn't have included j. But if you swap it to xrange(j,1,-1), you go (j, j-1.... 2).
Basically, you probably need to change it to xrange(j-1,0,-1) depending on your intended range.

How to get position of opening/ending HTML tag in Python

How to solve this on Python3, using what lib, and using what sample code?
I have html file, at position Line:Col I have middle of html tag
<table ......>; how to get position of <table> tag edges (brackets < >) and position of its </table> tag edges?
(note: several table tags may be one inside another).
Like said in this SO answer, you should not use regex to parse an HTML file as the standard is highly irregular. You should instead use an HTML parsing library like html.parser : This library offers you HTMLParser.getpos() which returns you the line number and offset of the tag.
This gets you the coordinates of each tag with html.parser, where I monkeypatch the goahead function with a simple modification, calling the custom method get_endpos:
from html.parser import HTMLParser, starttagopen
from html import unescape
class MyHTMLParser(HTMLParser):
def __init__(self):
HTMLParser.__init__(self)
self.start_tags = []
self.end_tags = []
self.last_append = []
def handle_starttag(self, tag, attrs):
self.start_tags.append((tag, (self.getpos()[0], self.getpos()[1]),))
self.last_append = self.start_tags
def handle_endtag(self, tag):
self.end_tags.append((tag, (self.getpos()[0], self.getpos()[1]),))
self.last_append = self.end_tags
def get_endpos(self):
self.last_append[-1] = self.last_append[-1] + ((self.getpos()[0], self.getpos()[1]),)
def get_tags(self):
return self.start_tags, self.end_tags
def _reset(self):
HTMLParser.reset(self)
self.start_tags = []
self.end_tags = []
parser = MyHTMLParser()
# Internal -- handle data as far as reasonable. May leave state
# and data to be processed by a subsequent call. If 'end' is
# true, force handling all data as if followed by EOF marker.
def goahead(self, end):
rawdata = self.rawdata
i = 0
n = len(rawdata)
while i < n:
if self.convert_charrefs and not self.cdata_elem:
j = rawdata.find('<', i)
if j < 0:
# if we can't find the next <, either we are at the end
# or there's more text incoming. If the latter is True,
# we can't pass the text to handle_data in case we have
# a charref cut in half at end. Try to determine if
# this is the case before proceeding by looking for an
# & near the end and see if it's followed by a space or ;.
amppos = rawdata.rfind('&', max(i, n-34))
if (amppos >= 0 and
not re.compile(r'[\s;]').search(rawdata, amppos)):
break # wait till we get all the text
j = n
else:
match = self.interesting.search(rawdata, i) # < or &
if match:
j = match.start()
else:
if self.cdata_elem:
break
j = n
if i < j:
if self.convert_charrefs and not self.cdata_elem:
self.handle_data(unescape(rawdata[i:j]))
else:
self.handle_data(rawdata[i:j])
i = self.updatepos(i, j)
if i == n: break
startswith = rawdata.startswith
if startswith('<', i):
if starttagopen.match(rawdata, i): # < + letter
k = self.parse_starttag(i)
elif startswith("</", i):
k = self.parse_endtag(i)
elif startswith("<!--", i):
k = self.parse_comment(i)
elif startswith("<?", i):
k = self.parse_pi(i)
elif startswith("<!", i):
k = self.parse_html_declaration(i)
elif (i + 1) < n:
self.handle_data("<")
k = i + 1
else:
break
if k < 0:
if not end:
break
k = rawdata.find('>', i + 1)
if k < 0:
k = rawdata.find('<', i + 1)
if k < 0:
k = i + 1
else:
k += 1
if self.convert_charrefs and not self.cdata_elem:
self.handle_data(unescape(rawdata[i:k]))
else:
self.handle_data(rawdata[i:k])
i = self.updatepos(i, k)
self.get_endpos() # only modification: gets end position of tags
elif startswith("&#", i):
match = charref.match(rawdata, i)
if match:
name = match.group()[2:-1]
self.handle_charref(name)
k = match.end()
if not startswith(';', k-1):
k = k - 1
i = self.updatepos(i, k)
continue
else:
if ";" in rawdata[i:]: # bail by consuming &#
self.handle_data(rawdata[i:i+2])
i = self.updatepos(i, i+2)
break
elif startswith('&', i):
match = entityref.match(rawdata, i)
if match:
name = match.group(1)
self.handle_entityref(name)
k = match.end()
if not startswith(';', k-1):
k = k - 1
i = self.updatepos(i, k)
continue
match = incomplete.match(rawdata, i)
if match:
# match.group() will contain at least 2 chars
if end and match.group() == rawdata[i:]:
k = match.end()
if k <= i:
k = n
i = self.updatepos(i, i + 1)
# incomplete
break
elif (i + 1) < n:
# not the end of the buffer, and can't be confused
# with some other construct
self.handle_data("&")
i = self.updatepos(i, i + 1)
else:
break
else:
assert 0, "interesting.search() lied"
# end while
if end and i < n and not self.cdata_elem:
if self.convert_charrefs and not self.cdata_elem:
self.handle_data(unescape(rawdata[i:n]))
else:
self.handle_data(rawdata[i:n])
i = self.updatepos(i, n)
self.rawdata = rawdata[i:]
MyHTMLParser.goahead = goahead
parser.feed(your_html_file_as_a_string)
print(parser.get_tags())

python 2d array condition

i'm working on 8 queen(Genetic Algorithm) program with python 3.4
i use a matrix for keep queens position. but i have an error in sort() function,i dont underestand this error.
please help me ...
my code:
from random import randrange
__author__ = 'Moein'
class NQueen:
NUM_COLS = 8
POPULATIONS = 100
current = [[]]
def __init__(self):
self.current = [[0 for col in range(self.NUM_COLS + 1)] for row in range(self.POPULATIONS)]
# generate first Generation
for i in range(0, self.POPULATIONS):
for j in range(0, self.NUM_COLS):
self.current[i][j] = randrange(self.NUM_COLS)
count = 0
condition = True
while condition:
self.crossover()
self.mutation()
self.fitness()
self.sort()
count += 1
print(self.current)
# print(self.current[0])
if self.current[0][self.NUM_COLS] == 0:
condition = False
print(self.current[0])
pass
def fitness(self):
count = 0
for i in range(0, self.POPULATIONS):
for j in range(0, self.NUM_COLS):
for x in range(j + 1, self.NUM_COLS):
if self.current[i][j] == self.current[i][x]:
count += 1
if abs(j - x) == abs(self.current[i][j] - self.current[i][x]):
count += 1
self.current[i][self.NUM_COLS] = count
count = 0
pass
def sort(self):
for i in range(0, self.POPULATIONS - 1):
for j in range(i + 1, self.POPULATIONS):
if self.current[i][self.NUM_COLS] > self.current[j][self.NUM_COLS]:
for x in range(0, self.NUM_COLS + 1):
temp = self.current[i][x]
self.current[i][x] = self.current
self.current[j][x] = temp
pass
def crossover(self):
_new = [[0 for x in range(self.NUM_COLS + 1)] for x in range(self.POPULATIONS)]
for i in range(0, int(self.POPULATIONS / 2)):
for j in range(0, int(self.NUM_COLS / 2)):
_new[i + 49][j] = self.current[i][j]
_new[i + 49 + 1][j] = self.current[i + 1][j]
for j in range(int(self.NUM_COLS / 2), self.NUM_COLS):
_new[i + 49][j] = self.current[i][j]
_new[i + 49 + 1][j] = self.current[i + 1][j]
self.current = _new
pass
def mutation(self):
for i in range(0, self.POPULATIONS):
self.current[i][randrange(self.NUM_COLS)] = randrange(self.NUM_COLS)
pass
nQueen = NQueen()
print(nQueen.current[0])
and my error:
Traceback (most recent call last):
File "C:/Users/Moein/PycharmProjects/NQueen/project.py", line 81, in <module>
nQueen = NQueen()
File "C:/Users/Moein/PycharmProjects/NQueen/project.py", line 27, in __init__
self.sort()
File "C:/Users/Moein/PycharmProjects/NQueen/project.py", line 54, in sort
if self.current[i][self.NUM_COLS] > self.current[j][self.NUM_COLS]:
TypeError: unorderable types: list() > int()
self.current[i][x] = self.current
I guess that its this line causing the problem, since
self.current
is a list, so you are setting
self.current[i][x]
to be a list instead of an int. So at this point:
if self.current[i][self.NUM_COLS] > self.current[j][self.NUM_COLS]:
when you try to compare those values it might happen, that you compare
an int with a list, which causes the error.
TypeError: unorderable types: list() > int()
Cheers
EDIT:
I just tried it out.
Replacing
self.current
with an int for example 2 prevents the Exception from occurring.

Categories