Python and Comparing File Changes - python

d = feedparser.parse('somerssfeed/rss.xml')
message = {}
smessage = {}
for post in d.entries:
message[post.link] = post.title
fwrite = open("db.txt", "a")
for k, v in message.items():
if k in open("db.txt", "r"):
print("already exists")
else:
fwrite.write("\n" + "{0}".format(k) + "\n")
smessage[k] = v
What i want to achieve is parsing RSS feeds and write their links in to a text file. But the problem is when i run the script next time it should't return old rss items so i compare them via text file except it's failing. On the first run it writes all links, second run it should return empty because all of links are the same but it writes again the same links
EDIT:
after a whole day of trial and error this worked:
for k, v in message.items():
if k in open('db.txt').read():
print('already exists')
else:
smessage[k] = v
fwrite = open("db.txt", "a")
fwrite.write('\n{0}\n'.format(k))
fwrite.close()

You aren't using the correct syntax to open the file. Use this :
g = open("db.txt","r")
lines = xml_file.readlines()
if k in lines:
print ("already exists");

Related

Python - Write each item from a dict's key to a new line

I think this should be fairly simple but I can't find the write solution for my situation. I have dict where each key has a list. For each key in the dict, I want to open a csv file and write each item in each list to a new line. As the program loops through the dict, a new csv file is created for each key, lines for each item in the list for that key are written to a csv, and then the csv closes so the next csv can be created for the next key:
for k, v in Dict.items():
name = "coQ" + str(k) + ".csv"
cPath = r"C:\Path"
coQ = os.path.join(cPath, name)
company_file = open(coQ, 'w')
for i in v:
company_file.write(str(i))
company_file.close()
This writes the list to a csv but all the list items are on the same line in the csv outputs. I've tried opening with append 'a' but I get the same result and I don't think newline will work as it's not a new line but an item in a list that needs to be written.
Try:
for k, v in Dict.items():
name = "coQ" + str(k) + ".csv"
cPath = r"C:\Path"
coQ = os.path.join(cPath, name)
company_file = open(coQ, 'w')
for i in v:
company_file.write("\n")
company_file.write(str(i))
company_file.close()
Or as patrick said:
for k, v in Dict.items():
name = "coQ" + str(k) + ".csv"
cPath = r"C:\Path"
coQ = os.path.join(cPath, name)
company_file = open(coQ, 'w')
for i in v:
company_file.write(str(i)+'\n')
company_file.close()

reading and deleting lines in python

I am trying to read all the lines in a specific file, and it prints the number of the line as an index.
What I am trying to do is to delete the line by inputting the number of the line by the user.
As far as it is now, it prints all the lines with the number of that line, but when I enter the number of the line to be deleted, it's not deleted.
This is the code of the delete function:
def deleteorders ():
index = 0
fh = open ('orders.txt', 'r')
lines = fh.readlines()
for line in lines:
lines = fh.readlines()
index = index+1
print (str(index) + ' ' + line)
try:
indexinp = int(input('Enter the number of the order to be deleted, or "B" to go back: '))
if indexinp == 'B':
return
else:
del line[indexinp]
print (line)
fh = open ('orders.txt', 'w')
fh.writelines(line)
fh.close()
except:
print ('The entered number is not in the range')
return
This should work (you'll need to add the error handling back in):
lines = enumerate(open('orders.txt'))
for i, line in lines:
print i, line
i = int(input(">"))
open('orders.txt', 'w').write(''.join((v for k, v in lines if k != i)))

Python coding for opening and saving data to a file

I am having an issue getting the train function to work correctly in python. I can not modify the def function. I am at the point where I need to get the second file to read lines one at a time for PosList and i need to match the value of movieWordCount[z] in OpenPos. If the file is there, then I am good to incrment column 2 by one of t hat line (segmented by a space). If it is not, then I need the else to append it to the file end. It does not work. It does not append the values if it is missing and I am not sure if it will find the value if it is there. I have been stuck getting thsi to work for two days.
Here is my code segment I am working with:
with open("PosList") as OpenPos:
lines = OpenPos.readlines()
print lines
if movieWordCount[z] in lines:
print "found"
#Now use tokenize to split it apart by space and set to new array for me to call column2
else:
print "not found"
lines.append(movieWordCount[z] + " 1" + "\n")
Here is my full code:
#!/usr/bin/python
#Import Counter
import collections
from collections import Counter
#Was already here but pickle is used for data input and export
import math, os, pickle, re
class Bayes_Classifier:
def __init__(self, trainDirectory = "movie_reviews/"):
#If file listing exists skip to train
if os.path.isfile('iFileList'):
print "file found"
self.train()
#self.classify()
#If file listing does not exist skip to train
if not os.path.isfile('iFileList'):
print "no file"
newfile = 'iFileList'
tempList = set()
subDir = './movie_reviews'
for filenames in os.listdir(subDir):
my_sub_path = os.path.join(os.sep,subDir,filenames)
tempList.add(filenames)
self.save("filenames", "try3")
f = []
for fFileObj in os.walk("movie_reviews/"):
f.extend(fFileObj)
break
pickle.dump(f, open( "save.p", "wb" ))
self.save(f, "try4")
with open(newfile, 'wb') as fi:
pickle.dump(tempList, fi)
#print tempList
self.train()
#self.classify()
def train(self):
'''Trains the Naive Bayes Sentiment Classifier.'''
print "File ready for training"
#Open iFileList to use as input for opening movie files
x = 0
OpenIFileList = open('iFileList','r')
print "iFileList now Open"
#Loop through the file
for line in OpenIFileList:
#print "Ready to read lines"
#print "reading line " + line
if x > 4:
if x % 2 == 0:
#print line
s = line
if '-' in s:
comp = s.split("'")
#print comp[2]
print comp[1] #This is What you need for t he movie file
compValue1 = comp[1]
#Determine Positive/Negative.
#compType is the variable I am storing it to.
compType = compValue1.split("-",2)[1]
#print compType #Prints that middle value like 5 or 1
# This will do the work based on the value.
if compType == '5':
# print "you have a five" #Confirms the loop I am in.
#If file does not exists create it
if not os.path.exists('PosList'):
print "no file"
file('PosList', 'w').close()
#Open file that needs to be reviewed for word count
compValue2 = "movie_reviews/" + compValue1
print compValue2 #Prints the directory and file path
OpenMovieList = open(compValue2,'r')
for commentLine in OpenMovieList:
commentPositive = commentLine.split(" ")
commentPositiveCounter = Counter(commentPositive)
#print commentPositiveCounter # " Comment Pos goes here"
#if commentLine != '' or commentLine != ' ':
#Get first word, second word, ....
if commentLine and (not commentLine.isspace()):
movieWordCount = self.tokenize(commentLine)
y = len(movieWordCount) #determines length of string
print y
z = 0
#print movieWordCount[0] # Shows the zero position in the file.
while z < y:
print "position " + str(z) + " word is " + movieWordCount[z] # Shows the word we are at and position id
with open("PosList") as OpenPos:
lines = OpenPos.readlines()
print lines
if movieWordCount[z] in lines:
print "found"
else:
print "not found"
lines.append(movieWordCount)
z = z + 1
#Close the files
OpenMovieList.close()
OpenPos.close()
x += 1
#for line2 in OpenIFileList.readlines():
#for line in open('myfile','r').readlines():
#do_something(line)
#Save results
#Close the File List
OpenIFileList.close()
def loadFile(self, sFilename):
'''Given a file name, return the contents of the file as a string.'''
f = open(sFilename, "r")
sTxt = f.read()
f.close()
return sTxt
def save(self, dObj, sFilename):
'''Given an object and a file name, write the object to the file using pickle.'''
f = open(sFilename, "w")
p = pickle.Pickler(f)
p.dump(dObj)
f.close()
def load(self, sFilename):
'''Given a file name, load and return the object stored in the file.'''
f = open(sFilename, "r")
u = pickle.Unpickler(f)
dObj = u.load()
f.close()
return dObj
def tokenize(self, sText):
'''Given a string of text sText, returns a list of the individual tokens that
occur in that string (in order).'''
lTokens = []
sToken = ""
for c in sText:
if re.match("[a-zA-Z0-9]", str(c)) != None or c == "\'" or c == "_" or c == '-':
sToken += c
else:
if sToken != "":
lTokens.append(sToken)
sToken = ""
if c.strip() != "":
lTokens.append(str(c.strip()))
if sToken != "":
lTokens.append(sToken)
return lTokens
To open a file for writing, you can use
with open('PosList', 'w') as Open_Pos
As you are using the with form, you do not need to close the file; Python will do that for you at the end of the with-block.
So assuming that the way you add data to the lines variable is correct, you could remove the superfluous code OpenMovieList.close() and OpenPos.close(), and append 2 lines to your code:
with open("PosList") as OpenPos:
lines = OpenPos.readlines()
print lines
if movieWordCount[z] in lines:
print "found"
else:
print "not found"
lines.append(movieWordCount)
with open("PosList", "w") as OpenPos:
OpenPos.write(lines)

retrieving multiple json data in for loop using python

Thanks.
Am trying to get the particular key value from json data using python. logic is to call the function and function should return key (json output sometimes will be just 1 index data or sometime morethan 1 index data)
I was able to get the data and print them, it works fine in inside for loop but when I return back to main then am getting only one value. not sure if something to do with for loop.
json data :
[{
id: "587e569472",
hostname: "I-56BXX",
env: "Beta",
site: "I",
version: "2.0.0.38-1"},
{
id: "587e64472",
hostname: "I-56AXX",
env: "Beta",
site: "I",
version: "2.0.0.39-1"}]
main script :
def get_jsondata(url, hosts):
u = urllib2.urlopen(url)
json_object = json.load(u)
u.close
indexcount = len(json_object)
#print indexcount
#for i in json_object:
#print i['hostname']
if json_object == []:
print 'No Data!'
else:
for rows in json_object:
#print 'hostname:' + rows['hostname']
#print 'env:' + rows['env']
print 'hostname and its env:' + rows['hostname'] + " " + rows['env']
#return rows['hostname']
hosts = rows['hostname']
#print hosts
return (hosts)
#if __name__ == '__main__':
# main()
#main section
url = 'http://api.com/AppData/'
hosts = ""
hosts = get_jsondata(url, hosts)
#print "The required hostname " + str(hostname) + " and its env is " + str(venue)
print(hosts)
After running the script am getting output as :
hostname and its env:I-56BXX I
I-56BXX
I was trying to get both hostname return back to main so, output would be like
hostname and its env:I-56BXX I
hostname and its env:I-56AXX I
I-56BXX
I-56AXX
first 2 line from above output is from print stmt inside for loop and next 2 lines are from return data.
Well, your return statement is inside the loop, so it return on the first iteration, you can use thing like yield or stocking your result into a list you would return at the end of the function
someting like
return [row['hostname'] for row in json_object]
The reason you are only printing one record in json_object is because you are returning hosts too soon. If you remove the line return (hosts) from inside the for loop, then all records from json_object will be printed.
The format that you are hoping to end up with will require a little more work. The format that will be printed will look like this:
hostname and its env:I-56BXX I
I-56BXX
hostname and its env:I-56AXX I
I-56AXX
If you would like to print in your stated format, you should have one for loop that prints the first message and a second for loop to print the second message.
Create an empty list, append to the list, then move your return outside the loop. The other answers are cleaner and great, but here is the most direct fix of your code:
def get_jsondata(url, hosts):
u = urllib2.urlopen(url)
json_object = json.load(u)
u.close
indexcount = len(json_object)
#print indexcount
#for i in json_object:
#print i['hostname']
if json_object == []:
print 'No Data!'
else:
hosts = []
for rows in json_object:
#print 'hostname:' + rows['hostname']
#print 'env:' + rows['env']
print 'hostname and its env:' + rows['hostname'] + " " + rows['env']
#return rows['hostname']
hosts.append(rows['hostname'])
#print hosts
return (hosts)
Please do not post code with comment on StackOverflow: it's hard to read.
Keep your snippets simple. Plus, your code is not very clean (parenthesis at return statement, inconsistency with parenthesis on print statements, missing parenthesis on function calls u.close()...)
With that said, get_jsondata only output one value because of your return statement in your for loop. The return instruction breaks the loop during the first iteration.
Now I didn't get what the get_jsondata is supposed to return. By reading your code I guess it's a list of hostnames. In order to return the list of all the hostnames in your json data, you can use one powerful feature of python: list comprehension.
e.g
[x['hostname'] for x in json_object]

Python: loop doing the same thing to one item n-times as opposed doing it once to n-items

Here is the example:
I am trying to grab a series of XML pages, and then extract data from them.
It downloads each individual page, as the while loop was designed to do but the tester() function prints the data from the first file it downloads V number of times despite it downloading and clearing the file after each time it loops through.
This is killing me what am I doing wrong?
def tester():
with open('raw.txt') as myFile:
test = linecache.getline('raw.txt', 12)
print test
test = ""
myFile.close
def grab_data(Year, rcvote):
link = "XXX/%s/roll%s.xml" % (Year, rc)
site = urllib2.urlopen(link)
localFile = open('raw.txt', 'w')
localFile.write(site.read(100000))
localFile.close()
tester()
while (V !=0):
rc = str(V)
if (len(rc) == 2):
rc = "0%s" % (rc)
elif (len(rc) == 1):
rc = "00%s" % (rc)
else:
rc = rc
grab_data(Year, rc)
V = V - 1
The problem is the linecache module. It assumes that same-named files are the same.
But why write the data to a file just to read it again anyway?
def tester(text):
line12 = text.splitlines()[11]
print line12
def grab_data(year, rcvote):
link = "XXX/%s/roll%03d.xml" % (year, rcvote)
site = urllib2.urlopen(link)
tester(site.read(100000))
while v:
grab_data(year, rc)
v -= 1

Categories