ERROR: File "mtrand.pyx", line 902, in numpy.random.mtrand.RandomState.choice

ERROR: File "mtrand.pyx", line 902, in numpy.random.mtrand.RandomState.choice - python

..can.. Hi all, can someone take a look on this code, I have one problem but I don't know what is?
I'm working on generating various length and shapes of text on image, and when segmentated area is enough big then text is placed, but when the length of text a little bigger then this error shows me. Example, when the text has 1-8 words then the output is fine, but when the length is bigger then it shows me this error, but on some images works fine because it have bigger area to render the text. So I don't know what to do?
Terminal shows me these errors:
File "/..../text_utils.py", line 679, in sample
return self.fdict[kind](nline_max,nchar_max)
File "/..../text_utils.py", line 725, in sample_para
lines = self.get_lines(nline, nword, nchar_max, f=0.35)
File "/..../text_utils.py", line 657, in get_lines
lines = h_lines(niter=100)
File "/..../text_utils.py", line 649, in h_lines
line_start = np.random.choice(len(self.txt)-nline)
File "mtrand.pyx", line 902, in numpy.random.mtrand.RandomState.choice
ValueError: a must be greater than 0 unless no samples are taken
I saw this on this link: https://github.com/numpy/numpy/blob/main/numpy/random/mtrand.pyx there is some statement at 902 line but I don't understand.
And this is my code:
def get_lines(self, nline, nword, nchar_max, f=0.35, niter=100):
def h_lines(niter=100):
lines = ['']
iter = 0
while not np.all(self.is_good(lines,f)) and iter < niter:
iter += 1
**649 ---->** line_start = np.random.choice(len(self.txt)-nline)
lines = [self.txt[line_start+i] for i in range(nline)]
return lines
lines = ['']
iter = 0
while not np.all(self.is_good(lines,f)) and iter < niter:
iter += 1
**657 ---->** lines = h_lines(niter=100)
# get words per line:
nline = len(lines)
for i in range(nline):
words = lines[i].split()
dw = len(words)-nword[i]
if dw > 0:
first_word_index = random.choice(range(dw+1))
lines[i] = ' '.join(words[first_word_index:first_word_index+nword[i]])
while len(lines[i]) > nchar_max: #chop-off characters from end:
if not np.any([ch.isspace() for ch in lines[i]]):
lines[i] = ''
else:
lines[i] = lines[i][:len(lines[i])-lines[i][::-1].find(' ')].strip()
if not np.all(self.is_good(lines,f)):
return #None
else:
return lines
def sample(self, nline_max,nchar_max,kind='WORD'):
**679 ---->** return self.fdict[kind](nline_max,nchar_max)
def sample_para(self,nline_max,nchar_max):
# get number of lines in the paragraph:
nline = nline_max*sstat.beta.rvs(a=self.p_para_nline[0], b=self.p_para_nline[1])
nline = max(1, int(np.ceil(nline)))
# get number of words:
nword = [self.p_para_nword[2]*sstat.beta.rvs(a=self.p_para_nword[0], b=self.p_para_nword[1])
for _ in range(nline)]
nword = [max(1,int(np.ceil(n))) for n in nword]
**725 ---->** lines = self.get_lines(nline, nword, nchar_max, f=0.35)
if lines is not None:
# center align the paragraph-text:
if np.random.rand() < self.center_para:
lines = self.center_align(lines)
return '\n'.join(lines)
else:
return []

Related

Python: read line and modify it (if needed)

let's say I have a file Example.txt like this:
alpha_1 = 10
%alpha_2 = 20
Now, I'd like to have a python script which performs these tasks:
If the line containing alpha_1 parameter is not commented (% symbol), to rewrite the line adding %, like it is with alpha_2
To perform the task in 1. independently of the line order
To leave untouched the rest of the file Example.txt
The file I wrote is:
with open('Example.txt', 'r+') as config:
while 1:
line = config.readline()
if not line:
break
# remove line returns
line = line.strip('\r\n')
# make sure it has useful data
if (not "=" in line) or (line[0] == '%'):
continue
# split across equal sign
line = line.split("=",1)
this_param = line[0].strip()
this_value = line[1].strip()
for case in switch(this_param):
if case("alpha1"):
string = ('% alpha1 =', this_value )
s = str(string)
config.write(s)
Up to now the output is the same Example.txt with a further line (%alpha1 =, 10) down the original line alpha1 = 10.
Thanks everybody

I found the solution after a while. Everything can be easily done writing everything on another file and substituting it at the end.
configfile2 = open('Example.txt' + '_temp',"w")
with open('Example.txt', 'r') as configfile:
while 1:
line = configfile.readline()
string = line
if not line:
break
# remove line returns
line = line.strip('\r\n')
# make sure it has useful data
if (not "=" in line) or (line[0] == '%'):
configfile2.write(string)
else:
# split across equal sign
line = line.split("=",1)
this_param = line[0].strip()
this_value = line[1].strip()
#float values
if this_param == "alpha1":
stringalt = '% alpha1 = '+ this_value + ' \r\n'
configfile2.write(stringalt)
else:
configfile2.write(string)
configfile.close()
configfile2.close()
# the file is now replaced
os.remove('Example.txt' )
os.rename('Example.txt' + '_temp','Example.txt' )

When I open a text file, it only reads the last line

Say customPassFile.txt has two lines in it. First line is "123testing" and the second line is "testing321". If passwordCracking = "123testing", then the output would be that "123testing" was not found in the file (or something similar). If passwordCracking = "testing321", then the output would be that "testing321" was found in the file. I think that the for loop I have is only reading the last line of the text file. Any solutions to fix this?
import time
import linecache
def solution_one(passwordCracking):
print("Running Solution #1 # " + time.strftime("%Y-%m-%d %H:%M:%S",time.localtime()))
startingTimeSeconds = time.time()
currentLine = 1
attempt = 1
passwordFound = False
wordListFile = open("customPassFile.txt", encoding="utf8")
num_lines = sum(1 for line in open('customPassFile.txt'))
while(passwordFound == False):
for i, line in enumerate(wordListFile):
if(i == currentLine):
line = line
passwordChecking = line
if(passwordChecking == passwordCracking):
passwordFound = True
endingTimeSeconds = time.time()
overallTimeSeconds = endingTimeSeconds - startingTimeSeconds
print("~~~~~~~~~~~~~~~~~")
print("Password Found: {}".format(passwordChecking))
print("ATTEMPTS: {}".format(attempt))
print("TIME TO FIND: {} seconds".format(overallTimeSeconds))
wordListFile.close()
break
elif(currentLine == num_lines):
print("~~~~~~~~~~~~~~~~~")
print("Stopping Solution #1 # " + time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()))
print("REASON: Password could not be cracked")
print("ATTEMPTS: {}".format(attempt))
break
else:
attempt = attempt + 1
currentLine = currentLine + 1
continue

The main problem with your code is that you open the file and you read it multiple times. The first time the file object position goes to the end and stays there. Next time you read the file nothing happens, since you are already at the end of the file.
Example
Sometimes an example is worth more than lots of words.
Take the file test_file.txt with the following lines:
line1
line2
Now open the file and read it twice:
f = open('./test_file.txt')
f.tell()
>>> 0
for l in f:
print(l, end='')
else:
print('nothing')
>>> line1
>>> line2
>>> nothing
f.tell()
>>> 12
for l in f:
print(l, end='')
else:
print('nothing')
>>> nothing
f.close()
The second time nothing happen, as the file object is already at the end.
Solution
Here you have two options:
you read the file only once and save all the lines in a list and then use the list in your code. It should be enough to replace
wordListFile = open("customPassFile.txt", encoding="utf8")
num_lines = sum(1 for line in open('customPassFile.txt'))
with
with open("customPassFile.txt", encoding="utf8") as f:
wordListFile = f.readlines()
num_lines = len(wordListFile)
you reset the file object position after you read the file using seek. It would be something along the line:
for i, line in enumerate(wordListFile):
if(i == currentLine):
line = line
wordListFile.seek(0)
I would go with option 1., unless you have memory constraint (e.g. the file is bigger than memory)
Notes
I have a few extra notes:
python starts counters with 0 (like c/c++) and not 1 (like fortran). So probably you want to set:
currentLine = 0
when you read a file, the new line character \n is not stripped, so you have to do it (with strip) or account for it when comparing strings (using e.g. startswith). As example:
passwordChecking == passwordCracking
will likely always return False as passwordChecking contains \n and passwordCracking very likely doesn't.
Disclamer
I haven't tried the code, nor my suggestions, so there might be other bugs lurking around.

**I will delete this answer after OP understands the problem in indentation of I understand his intention of his code.*
for i, line in enumerate(wordListFile):
if(i == currentLine):
line = line
passwordChecking = line
#rest of the code.
Here your code is outside of for loop so only last line is cached.
for i, line in enumerate(wordListFile):
if(i == currentLine):
line = line
passwordChecking = line
#rest of the code.

How do I count the number of lines that are full-line comments in python?

I'm trying to create a function that accepts a file as input and prints the number of lines that are full-line comments (i.e. the line begins with #followed by some comments).
For example a file that contains say the following lines should print the result 2:
abc
#some random comment
cde
fgh
#another random comment
So far I tried along the lines of but just not picking up the hash symbol:
infile = open("code.py", "r")
line = infile.readline()
def countHashedLines(filename) :
while line != "" :
hashes = '#'
value = line
print(value) #here you will get all
#if(value == hashes): tried this but just wasn't working
# print("hi")
for line in value:
line = line.split('#', 1)[1]
line = line.rstrip()
print(value)
line = infile.readline()
return()
Thanks in advance,
Jemma

I re-worded a few statements for ease of use (subjective) but this will give you the desired output.
def countHashedLines(lines):
tally = 0
for line in lines:
if line.startswith('#'): tally += 1
return tally
infile = open('code.py', 'r')
all_lines = infile.readlines()
num_hash_nums = countHashedLines(all_lines) # <- 2
infile.close()
...or if you want a compact and clean version of the function...
def countHashedLines(lines):
return len([line for line in lines if line.startswith('#')])

I would pass the file through standard input
import sys
count = 0
for line in sys.stdin: """ Note: you could also open the file and iterate through it"""
if line[0] == '#': """ Every time a line begins with # """
count += 1 """ Increment """
print(count)

Here is another solution that uses regular expressions and will detect comments that have white space in front.
import re
def countFullLineComments(infile) :
count = 0
p = re.compile(r"^\s*#.*$")
for line in infile.readlines():
m = p.match(line)
if m:
count += 1
print(m.group(0))
return count
infile = open("code.py", "r")
print(countFullLineComments(infile))

Multiple loops logic and speed optimization in Python?

Here is two python functions to transfer data from one file to another file. Both source file and target file have the same number of objects but with different data.
def getBlock(rigObj, objName):
rigObj.seek(0)
Tag = False
block = ""
for line in rigObj:
if line.find("ObjectAlias " + str(objName) + "\n") != -1:
for line in rigObj:
if line.find("BeginKeyframe") != -1:
Tag = True
elif line.lstrip().startswith("0.000 ") and line.rstrip().endswith("default"):
Tag = False
break
elif Tag:
block += line
return (block)
def buildScene(sceneObj, rigObj, objList):
sceneObj.seek(0)
rigObj.seek(0)
newscene = ""
for line in sceneObj:
newscene += line
for obj in objList:
if line.find("ObjectAlias " + obj + "\n") != -1:
Tag = True
for line in sceneObj:
if line.find("BeginKeyframe") != -1:
newscene += line
newscene += getBlock(rigObj, obj)
Tag = False
elif line.lstrip().startswith("0.000 ") and line.rstrip().endswith("default"):
newscene += line
Tag = True
break
elif Tag:
newscene += line
return (newscene)
getBlock is a sub-function for getting data from rigobj;
buildScene is my main function, it has three parameters:
First parameter(sceneobj) is the file that I want to put data into;
Second parameter(rigobj) is the file that I get the data from;
Third parameter(objlist) is a list of what object's data to be transfered.
So far, the function does its job, the only problem is a bit of slow(sceneobj<10MB, rigobj<2MB, objlist<10 objects), I am not sure if there are a logic problem in the code, should I loop the sceneObj first or loop the objList first? Does it affect the speed?
UPDATE:
Both sceneObj and rigObj have similar data like this:
lines
BeginObject
lines
ObjectAlias xxx #--> object in transfer list
lines
BeginKeyframe 10 12
-9.000 4095 default #--> transfer begins
lines #--> transfer from rigObj to sceneObj and override lines in sceneObj
-8.000 63 default #--> same
lines #--> same
-7.000 63 default #--> same
lines #--> same
-1.000 63 default #--> same
lines #--> transfer ends
0.000 -1 default
lines
EndKeyframe
EndMotion
lines
EndObject
The data want to be transfered and overrided is only lines bewteen BeginKeyframe and 0.000 -1 default of any specified objects(by objList)

Most obvious optimization is to index data for getBlock function, so you will able to seek to needed position instead of always parsing full file from beginning.
like so:
def create_rig_index(rig_obj):
""" This function creates dict of offsets for specific ObjectAlias
Example:
data:
line with offset 100: ObjectAlias xxx
more lines
line with offset 200: ObjectAlias yyy
more lines
line with offset 300: ObjectAlias xxx
more lines
result will be:
xxx: [100, 300]
yyy: [200]
"""
idx = defaultdict( list )
position = 0
for line in rig_obj:
strip_line = line.strip()
if strip_line.startswith( "ObjectAlias" ):
obj_name = strip_line.split()[1]
idx[ obj_name ].append( position )
# unfortunately python prevent `tell` calls during iteration.
position += len( bytes( line, 'utf-8' ) )
# if data guaranteed to be ascii only its possible to use len( line )
# or you can write custom line generator on top of read function.
return idx;
def getBlock(rigObj, rigIdx, objName):
""" same as your getBlock, but uses precalculated offsets"""
block = ""
for idx in rigIdx[ objName ]:
rigObj.seek( idx )
Tag = False
for line in rigObj:
if line.find("BeginKeyframe") != -1:
Tag = True
elif line.lstrip().startswith("0.000 ") and line.rstrip().endswith("default"):
break
elif Tag:
block += line
return (block)
In buildScene method you should create rig_index before running for loop, and use this index in getBlock function.

appending values to a list in python

i am doing this:
def GetDistinctValues(theFile, theColumn):
lines=theFile.split('\n')
allValues=[]
for line in lines:
allValues.append(line[theColumn-1])
return list(set(allValues))
i am getting string index out of range on this line:
allValues.append(line[theColumn-1])
does anyone know what i am doing wrong?
here's the complete code if needed:
import hashlib
def doStuff():
createFiles('together.csv')
def readFile(fileName):
a=open(fileName)
fileContents=a.read()
a.close()
return fileContents
def GetDistinctValues(theFile, theColumn):
lines=theFile.split('\n')
allValues=[]
for line in lines:
allValues.append(line[theColumn-1])
return list(set(allValues))
def createFiles(inputFile):
inputFileText=readFile(inputFile)
b = inputFileText.split('\n')
r = readFile('header.txt')
DISTINCTCOLUMN=12
dValues = GetDistinctValues(inputFileText,DISTINCTCOLUMN)
for uniqueValue in dValues:
theHash=hashlib.sha224(uniqueValue).hexdigest()
for x in b:
if x[DISTINCTCOLUMN]==uniqueValue:
x = x.replace(', ',',').decode('latin-1','ignore')
y = x.split(',')
if len(y) < 3:
break
elif len(y) > 3:
desc = ' '.join(y[3:])
else:
desc = 'No description'
# Replacing non-XML-allowed characters here (add more if needed)
y[2] = y[2].replace('&','&')
desc = desc.replace('&','&')
r += '\n<Placemark><name>'+y[2].encode('utf-8','xmlcharrefreplace')+'</name>' \
'\n<description>'+desc.encode('utf-8','xmlcharrefreplace')+'</description>\n' \
'<Point><coordinates>'+y[0]+','+y[1]+'</coordinates></Point>\n</Placemark>'
r += readFile('footer.txt')
f = open(theHash,'w')
f.write(r)
f.close()

The error isn't caused by append(), It's because the line isn't long enough. Maybe your file has a empty line at the end. You could try
def GetDistinctValues(theFile, theColumn):
lines=theFile.split('\n')
allValues=[]
for line in lines:
if line:
allValues.append(line[theColumn-1])
return list(set(allValues))
otherwise an exception handler can help find what's going wrong
def GetDistinctValues(theFile, theColumn):
lines=theFile.split('\n')
allValues=[]
for line in lines:
try:
allValues.append(line[theColumn-1])
except IndexError:
print "line: %r"%line
return list(set(allValues))

That is happening because line doesn't have as many elements as the code is assuming. Try the following:
for line in lines:
if len(line) < theColumn:
print "This line doesn't have enough elements:\n" + line
else:
allValues.append(line[theColumn-1])
return list(set(allValues))
That will give you a hint, that is the type of error you expect when trying to access an element out of the range of a list i. e. a non existent element.

line[theColumn-1])
This will of course raise the mentioned error if the string(line) is shorted then 'theColumn'.
What else would you expect?

We Keep Coding

Python is a programming language that lets you work quickly and integrate systems more effectively.

ERROR: File "mtrand.pyx", line 902, in numpy.random.mtrand.RandomState.choice - python

Related

Python: read line and modify it (if needed)

When I open a text file, it only reads the last line

How do I count the number of lines that are full-line comments in python?

Multiple loops logic and speed optimization in Python?

appending values to a list in python

Categories

Resources