When I try to execute one particular python file. I am getting
following errors
Traceback (most recent call last):
File "<pyshell#4>", line 1, in <module>
g.stem(u"തുറക്കുക")
File "/usr/local/lib/python2.7/dist-packages/indicstemmer-0.1-py2.7.egg/indicstemmer/core.py", line 48, in stem
self.rulesDict = self.LoadRules()
File "/usr/local/lib/python2.7/dist-packages/indicstemmer-0.1-py2.7.egg/indicstemmer/core.py", line 81, in LoadRules
errors='ignore')
File "/usr/lib/python2.7/codecs.py", line 881, in open
file = __builtin__.open(filename, mode, buffering)
IOError: [Errno 2] No such file or directory: '/usr/local/lib/python2.7/dist-packages/indicstemmer-0.1-py2.7.egg/indicstemmer/stemmer_ml.rules'
What is the problem here?
class Stemmer:
"""
Instantiate class to get the methods
"""
def __init__(self):
self.rules_file = os.path.join(os.path.dirname(__file__), \
'stemmer_ml.rules')
self.rulesDict = None
self.normalizer = normalizer.getInstance()
def stem(self, text):
"""
:param text: unicode encoded malayalam string
:returns: dictionary with words as the key and the stemmer result
as the values. stems all the words in the given text and
returns a dictionary
"""
text = self.normalizer.normalize(text)
if self.rulesDict is None:
self.rulesDict = self.LoadRules()
words = text.split(" ")
word_count = len(words)
result_dict = dict()
word_iter = 0
word = ""
while word_iter < word_count:
word = words[word_iter]
word = self.trim(word)
word = word.strip('!,.?:')
word_length = len(word)
suffix_pos_itr = 2
word_stemmed = ""
while suffix_pos_itr < word_length:
suffix = word[suffix_pos_itr:word_length]
if suffix in self.rulesDict:
word_stemmed = word[0:suffix_pos_itr] + \
self.rulesDict[suffix]
break
suffix_pos_itr = suffix_pos_itr+1
word_iter = word_iter+1
if(word_stemmed == ""):
word_stemmed = word
result_dict[ word ] = word_stemmed
print result_dict[2]
return result_dict
def LoadRules(self):
#print "Loading the rules..."
rules_dict = dict()
line = []
line_number = 0
rule_number = 0
rules_file = codecs.open(self.rules_file, encoding='utf-8', \
errors='ignore')
while 1:
line_number = line_number +1
text = unicode( rules_file.readline())
if text == "":
break
if text[0] == '#':
continue #this is a comment - ignore
text = text.split("#")[0] #remove the comment part of the line
line_number = line_number +1
line = text.strip() # remove unwanted space
if(line == ""):
continue
if(len(line.split("=")) != 2):
print "[Error] Syntax Error in the Rules. Line number: ", \
line_number
print "Line: "+ text
continue
lhs = line.split("=")[0].strip()
rhs = line.split("=")[1].strip()
if(len(rhs)>0):
if(lhs[0] == '"'):
lhs = lhs[1:len(lhs)] # if the string is "quoted"
if(lhs[len(lhs)-1] == '"'):
lhs = lhs[0:len(lhs)-1] # if the string is "quoted"
if(len(rhs)>0):
if(rhs[0] == '"'):
rhs = rhs[1:len(rhs)] # if the string is "quoted"
if(rhs[len(rhs)-1] == '"'):
rhs = rhs[0:len(rhs)-1] # if the string is "quoted"
rule_number = rule_number+1
rules_dict[lhs] = rhs
#print "[", rule_number ,"] " +lhs + " : " +rhs
#print "Found ",rule_number, " rules."
return rules_dict
setup file
from setuptools import setup, find_packages
name = "indicstemmer"
setup(
name=name,
version="0.1",
license="LGPL-3.0",
description="Malayalam word stemmer",
long_description="""This application helps you to stem the words
in the given text. Currently supports only
Note that this is very experimental and uses a rule based approach.
""",
packages=find_packages(),
include_package_data=True,
setup_requires=['setuptools-git'],
install_requires=['setuptools','normalizer'],
test_suite="tests",
zip_safe=False,
)
Test
import unittest
from indicstemmer import getInstance
class TestIndicStemmer(unittest.TestCase):
def setUp(self):
self.instance = getInstance()
def test_stemmer(self):
self.assertEqual(u"തുറക്കുക",self.instance.stem(u"തുറക്കുന്ന")[u"തുറക്കുന്ന"])
def main():
suite = unittest.TestLoader().loadTestsFromTestCase(TestIndicStemmer)
unittest.TextTestRunner(verbosity=2).run(suite)
if __name__ == "__main__":
main()
I am using Ubuntu 12.04 desktop version
The significant line of the error message is
File "/usr/lib/python2.7/codecs.py", line 881, in open
file = __builtin__.open(filename, mode, buffering)
IOError: [Errno 2] No such file or directory: '/usr/local/lib/python2.7/dist-packages/indicstemmer-0.1-py2.7.egg/indicstemmer/stemmer_ml.rules'
This implies that the indicstemmer modules had not been installed properly because the required file stemmer_ml.rules could not be found.
check that you do not need to set up defaults before you call gstem() and that the permissions in the python library allow you to get to the rules. Other than that the library package my need to be reinstalled. I have noticed that if different versions of python exist, sometimes packages get installed in the wrong version. However, I doubt it in this case because it got all the way to the rules file before crashing.
Related
I need to create a simplified version of grep in python which will print a line when a keyword is used such as using this command "python mygrep.py duck animals.txt" and getting the output, "The duck goes quack". I have a file where it contains different outputs but I'm not sure how to get it to print the line that contains the "keyword" such as the line with "duck" in it. Im suppose to only use "import sys" and not "re" since its suppose to be a simple version.
import sys
def main():
if len(sys.argv) != 3:
exit('Please pass 2 arguments.')
search_text = sys.argv[1]
filename = sys.argv[2]
with open("animals.txt", 'r') as f:
text = f.read()
for line in text:
print(line)
if __name__ == '__main__':
main()
The operator 'in' should be sufficient.
for line in text:
if search_text in line:
print(line)
Here is a an implementation of grep in python with after/before feature:
def _fetch_logs(self, data, log_file, max_result_size, current_result_size):
after = data.get("after", 0)
before = data.get("before", 0)
exceeded_max = False
result = []
before_counter = 0
frame = []
found = False
for line in log_file:
frame.append(line)
match_patterns = all(self._search_in(data, pattern, line) for pattern in data["patterns"])
if match_patterns:
before_counter = len(frame)
found = True
if not found and len(frame) > before:
frame.pop(0)
if found and len(frame) >= before_counter + after:
found = False
before_counter = 0
result += frame
frame = []
if current_result_size + len(result) >= max_result_size:
exceeded_max = True
break
if found:
result += frame
return exceeded_max, result
I am trying to write a code in python that goes through a text file and creates a word index of every word in the file. However, It's not executing properly. So far this is what I have:
import sys
import re
line = sys.stdin.readline()
pattern = re.compile("[a-zA-Z0-9]+")
while line:
def build_word_index(txt):
out = {}
for i, line in enumerate(txt.split("\n")):
for word in line.strip().split(" "):
if word not in out:
out[word] = [i + 1]
else:
out[word].append(i + 1)
return out
You never call the function!
import sys
import re
pattern = re.compile("[a-zA-Z0-9]+")
def build_word_index(txt):
out = {}
for i, line in enumerate(txt.split("\n")):
for word in line.strip().split(" "):
if word not in out:
out[word] = [i + 1]
else:
out[word].append(i + 1)
return out
while True:
line = sys.stdin.readline()
if not line:
break
out = build_word_index(line)
print(out)
I am getting an error when I want to run a python script:
The error is following one:
The code is given below:
#!/usr/bin/python
import subprocess
code_dir = "code"
title = "Stanford ACM-ICPC Team Notebook"
def get_sections():
sections = []
section_name = None
with open('contents.txt', 'r') as f:
for line in f:
if '#' in line: line = line[:line.find('#')]
line = line.strip()
if len(line) == 0: continue
if line[0] == '[':
section_name = line[1:-1]
subsections = []
if section_name is not None:
sections.append((section_name, subsections))
else:
tmp = line.split('\t', 1)
if len(tmp) == 1:
raise ValueError('Subsection parse error: %s' % line)
filename = tmp[0]
subsection_name = tmp[1]
if subsection_name is None:
raise ValueError('Subsection given without section')
subsections.append((filename, subsection_name))
return sections
def get_style(filename):
ext = filename.lower().split('.')[-1]
if ext in ['c', 'cc', 'cpp']:
return 'cpp'
elif ext in ['java']:
return 'java'
elif ext in ['py']:
return 'py'
else:
return 'txt'
# TODO: check if this is everything we need
def texify(s):
#s = s.replace('\'', '\\\'')
#s = s.replace('\"', '\\\"')
return s
def get_tex(sections):
tex = ''
for (section_name, subsections) in sections:
tex += '\\section{%s}\n' % texify(section_name)
for (filename, subsection_name) in subsections:
tex += '\\subsection{%s}\n' % texify(subsection_name)
tex += '\\raggedbottom\\lstinputlisting[style=%s]{%s/%s}\n' % (get_style(filename), code_dir, filename)
tex += '\\hrulefill\n'
tex += '\n'
return tex
if __name__ == "__main__":
sections = get_sections()
tex = get_tex(sections)
with open('contents.tex', 'w') as f:
f.write(tex)
latexmk_options = ["latexmk", "-pdf", "notebook.tex"]
subprocess.call(latexmk_options)
I have already tried to install latexmk, But didn't succeed.
Can you help me about the detailed instruction of installation latexmk. I have already googled much. And for copyright thats not even my code. Its a code from stanford acm to make their own. Now I want to use to make my own.
Make sure that latexmk is accessible from your command line. You can check this by typing latexmk -version from your command line. If it is not accessible from command line then you need to add the latexmk path to environment variable.
If latexmk is not installed follow this link to properly install the latexmk.
I think following these steps might fix your problem.
I am having an issue getting the train function to work correctly in python. I can not modify the def function. I am at the point where I need to get the second file to read lines one at a time for PosList and i need to match the value of movieWordCount[z] in OpenPos. If the file is there, then I am good to incrment column 2 by one of t hat line (segmented by a space). If it is not, then I need the else to append it to the file end. It does not work. It does not append the values if it is missing and I am not sure if it will find the value if it is there. I have been stuck getting thsi to work for two days.
Here is my code segment I am working with:
with open("PosList") as OpenPos:
lines = OpenPos.readlines()
print lines
if movieWordCount[z] in lines:
print "found"
#Now use tokenize to split it apart by space and set to new array for me to call column2
else:
print "not found"
lines.append(movieWordCount[z] + " 1" + "\n")
Here is my full code:
#!/usr/bin/python
#Import Counter
import collections
from collections import Counter
#Was already here but pickle is used for data input and export
import math, os, pickle, re
class Bayes_Classifier:
def __init__(self, trainDirectory = "movie_reviews/"):
#If file listing exists skip to train
if os.path.isfile('iFileList'):
print "file found"
self.train()
#self.classify()
#If file listing does not exist skip to train
if not os.path.isfile('iFileList'):
print "no file"
newfile = 'iFileList'
tempList = set()
subDir = './movie_reviews'
for filenames in os.listdir(subDir):
my_sub_path = os.path.join(os.sep,subDir,filenames)
tempList.add(filenames)
self.save("filenames", "try3")
f = []
for fFileObj in os.walk("movie_reviews/"):
f.extend(fFileObj)
break
pickle.dump(f, open( "save.p", "wb" ))
self.save(f, "try4")
with open(newfile, 'wb') as fi:
pickle.dump(tempList, fi)
#print tempList
self.train()
#self.classify()
def train(self):
'''Trains the Naive Bayes Sentiment Classifier.'''
print "File ready for training"
#Open iFileList to use as input for opening movie files
x = 0
OpenIFileList = open('iFileList','r')
print "iFileList now Open"
#Loop through the file
for line in OpenIFileList:
#print "Ready to read lines"
#print "reading line " + line
if x > 4:
if x % 2 == 0:
#print line
s = line
if '-' in s:
comp = s.split("'")
#print comp[2]
print comp[1] #This is What you need for t he movie file
compValue1 = comp[1]
#Determine Positive/Negative.
#compType is the variable I am storing it to.
compType = compValue1.split("-",2)[1]
#print compType #Prints that middle value like 5 or 1
# This will do the work based on the value.
if compType == '5':
# print "you have a five" #Confirms the loop I am in.
#If file does not exists create it
if not os.path.exists('PosList'):
print "no file"
file('PosList', 'w').close()
#Open file that needs to be reviewed for word count
compValue2 = "movie_reviews/" + compValue1
print compValue2 #Prints the directory and file path
OpenMovieList = open(compValue2,'r')
for commentLine in OpenMovieList:
commentPositive = commentLine.split(" ")
commentPositiveCounter = Counter(commentPositive)
#print commentPositiveCounter # " Comment Pos goes here"
#if commentLine != '' or commentLine != ' ':
#Get first word, second word, ....
if commentLine and (not commentLine.isspace()):
movieWordCount = self.tokenize(commentLine)
y = len(movieWordCount) #determines length of string
print y
z = 0
#print movieWordCount[0] # Shows the zero position in the file.
while z < y:
print "position " + str(z) + " word is " + movieWordCount[z] # Shows the word we are at and position id
with open("PosList") as OpenPos:
lines = OpenPos.readlines()
print lines
if movieWordCount[z] in lines:
print "found"
else:
print "not found"
lines.append(movieWordCount)
z = z + 1
#Close the files
OpenMovieList.close()
OpenPos.close()
x += 1
#for line2 in OpenIFileList.readlines():
#for line in open('myfile','r').readlines():
#do_something(line)
#Save results
#Close the File List
OpenIFileList.close()
def loadFile(self, sFilename):
'''Given a file name, return the contents of the file as a string.'''
f = open(sFilename, "r")
sTxt = f.read()
f.close()
return sTxt
def save(self, dObj, sFilename):
'''Given an object and a file name, write the object to the file using pickle.'''
f = open(sFilename, "w")
p = pickle.Pickler(f)
p.dump(dObj)
f.close()
def load(self, sFilename):
'''Given a file name, load and return the object stored in the file.'''
f = open(sFilename, "r")
u = pickle.Unpickler(f)
dObj = u.load()
f.close()
return dObj
def tokenize(self, sText):
'''Given a string of text sText, returns a list of the individual tokens that
occur in that string (in order).'''
lTokens = []
sToken = ""
for c in sText:
if re.match("[a-zA-Z0-9]", str(c)) != None or c == "\'" or c == "_" or c == '-':
sToken += c
else:
if sToken != "":
lTokens.append(sToken)
sToken = ""
if c.strip() != "":
lTokens.append(str(c.strip()))
if sToken != "":
lTokens.append(sToken)
return lTokens
To open a file for writing, you can use
with open('PosList', 'w') as Open_Pos
As you are using the with form, you do not need to close the file; Python will do that for you at the end of the with-block.
So assuming that the way you add data to the lines variable is correct, you could remove the superfluous code OpenMovieList.close() and OpenPos.close(), and append 2 lines to your code:
with open("PosList") as OpenPos:
lines = OpenPos.readlines()
print lines
if movieWordCount[z] in lines:
print "found"
else:
print "not found"
lines.append(movieWordCount)
with open("PosList", "w") as OpenPos:
OpenPos.write(lines)
I have to figure out a way to take a code that was already given and improving it by making it a class that is object oriented.
This code: was already given and we use it for our new code. the file 'students2txt' is being extracted line by line (being split based on a ':') and the StudentFileReader class is imported into the new class StudentReport(object). And so the finished project is supposed give a student list that has ID numbers, first and last names, gpa's (all information is give in the 'students2.txt' I just have to make the code print all of the info.
filereader.py:
class StudentFileReader:
def __init__(self, inputSrc):
self._inputSrc = inputSrc
self._inputFile = None
def open(self):
self._inputFile = open(self._inputSrc, 'r')
def close(self):
self._inputFile.close()
self._inputFile = None
def fetchRecord(self):
line = self._inputFile.readline()
if line == "":
return None
record = StudentRecord()
#change
record.idNum = int(line)
record.firstName = self._inputFile.readline().rstrip().rsplit(':')
record.lastName = self._inputFile.readline().rstrip().rsplit(':')
record.classCode = int(self._inputFile.readline())
record.gpa = float(self._inputFile.readline())
return record
class StudentRecord:
def __init__(self):
self.idNum = 0
self.firstName = ""
self.lastName = ""
self.classCode = 0
self.gpa = 0.0
New file:
from filereader import StudentFileReader
class StudentReport(object):
def __init__(self):
self._theList = None
def loadRecords(self, filename):
self.reader = StudentFileReader(filename)
self.reader.open()
theList = []
record = self.reader.fetchRecord()
while record is not None:
theList.append(record)
record = self.reader.fetchRecord()
reader.close()
return theList
def sortByid(self):
self._studentList.sort(key = lambda rec: rec.idNum)
def sortByName(self):
pass
def __str__(self):
classNames = [ "", "Freshman", "Sophomore", "Junior", "Senior" ]
print( "LIST OF STUDENTS".center(50) )
print( "" )
print( "%-5s %-25s %-10s %-4s" % ('ID', 'NAME', 'CLASS', 'GPA'))
print( "%5s %25s %10s %4s" % ('-' * 5, '-' * 25, '-' * 10, '-' * 4))
# Print the body.
for record in theList :
print( "%5d %-25s %-10s %4.2f" % \
(record.idNum, \
record.lastName + ', ' + record.firstName,
classNames[record.classCode], record.gpa) )
# Add a footer.
print( "-" * 50 )
print( "Number of students:", len(theList) )
if __name__ == "__main__":
s = StudentReport()
s.loadRecords('students2.txt')
s.sortByName()
print str(s)
This code was taken from the textbook Data Structures and Algorithms Using Python. I'm supposed to make an object oriented class. I've started the StudentRecord class and written the __init__ but I'm not really sure what to do after that. When I try to run anything it gives me a invalid literal for int() with base 10 error. I'm very new to python so I'm not sure how to make any class object oriented easily..
edit: yes, the error came from the fetchRecord function
Traceback (most recent call last):
File "C:\Users\...\studentreport.py", line 24, in <module>
s.loadRecords('students2.txt')
File "C:\Users\...\studentreport.py", line 13, in loadRecords
record = self.reader.fetchRecord()
File "C:\Users\...\filereader.py", line 22, in fetchRecord
record.idNum = int(line)
ValueError: invalid literal for int() with base 10: '10015:John:Smith:2:3.01\n'
Your line parsing code doesn't match the format of the file.
You are trying to interpret the whole line as an integer, but the line contains more.
Perhaps you wanted to split the line first? That one line contains all elements of the record:
parts = line.strip().split(':')
record.idNum = int(parts[0])
record.firstName = parts[1]
record.lastName = parts[2]
record.classCode = parts[3]
record.gpa = float(parts[4])
You can override the original StudentFileReader.fetchRecord()) method by subclassing the class in your own code:
class MyStudentFileReader(StudentFileReader):
def fetchRecord(self):
line = self._inputFile.readline()
if not line:
return None
record = StudentRecord()
parts = line.strip().split(':')
record.idNum = int(parts[0])
record.firstName = parts[1]
record.lastName = parts[2]
record.classCode = parts[3]
record.gpa = float(parts[4])
return record
Then use MyStudentFileReader() instead of StudentFileReader().
You need to split your line before you start trying to convert the pieces into the formats you want for your individual data items. Right now, you're calling readline repeatedly, so each of the values you're calculating for a student comes from a separate line from the file.
Instead, try splitting and unpacking the result directly into local variables:
idNum, firstName, lastName, classCode, GPA = line.rstrip().split(':')
Then do whatever conversions each of those need (e.g. record.idNum = int(idNum)).