calling citeparser function in python - python

I wrote the following function:
def CiteParser():
with open("/tmp/content.txt") as myfile:
soup = BeautifulSoup(myfile)
for cite in soup.find_all('cite'):
print cite.string
Now I want to call it in my program like this:
result = open("/tmp/result.txt", "a+")
res = CiteParser()
result.write(str(res))
result.close()
I have also another function that appends url content to /tmp/content.txt and I put CiteParser into a loop.
But it returns always same result for me..
Am I calling CiteParser correctly? if not how is it possible?
Thank you

Instead of printing strings, you need to return it.
def CiteParser():
with open("/tmp/content.txt") as myfile:
soup = BeautifulSoup(myfile)
result = []
for cite in soup.find_all('cite'):
result.append(cite.string)
return '\n'.join(result)
Otherwise, the function return nothing; implicitly return None.

Related

Read file and format it into dictionary

How to capture the string in function.py and track the def step1() and its following function create() and login() into dictionary format? (The format i want to achieve is below)
function.py
#!C:\Python\Python39\python.exe
# print ('Content-type: text/html\n\n')
def step1():
create()
login()
def step2():
authenticate()
def step3():
send()
Expected output
thisdict = {
'def step1()': ['create(),login()'],
'def step2():':['authenticate()'],
'def step3():': ['send()']
}
You can read the file function.py, split it in order to separate the different functions, and then for each function, split it once more to get the signature as key and the commands as values:
with open('function.py', 'r') as inFile:
funcs = inFile.read().split('\n\n')[1:]
result = {}
for elem in funcs:
sign, commands = elem.split(':')
commands = list(map(str.strip, commands.split('\n')))[1:]
result.update({sign : commands})
print(result)
This will return:
{'def step1()': ['create()', 'login()'], 'def step2()': ['authenticate()'], 'def step3()': ['send()']}
You could use a regex that would find each method and content (def \w+\(.*\):)((?:\n[ \t]+.+)+)
(def \w+\(.*\):) for the method definition
\n[ \t]+.+ for each method row (with the previous \n)
import json
import re
with open("function.py") as fic:
content = fic.read()
groups = re.findall(r"(def \w+\(.*\):)((?:\n[ \t]+.+)+)", content)
result = {key: [",".join(map(str.strip, val.strip().splitlines()))]
for key, val in groups}
print(json.dumps(result, indent=4))
you can do something like that:
with open('function.py', 'r') as f:
file = f.readlines()
thisdict = {'start':[]}
temp = []
a = '_start_' #just to get the first lines if there is some things before the first function
for line in file:
if line.startsWith('def'): #You might want to add something for the spacing
thisdict[a] = temp
a = line[3:]
temp=[]
else:
temp.append(line)
thisdict[a] = temp
print(thisdict)
this clearly isn't the best code but it's easy to understand and easy to implement :)

python for loop and its generator issue

I am trying to generate some link.
NOTE: THERE IS PROBLEM WITH return vs print.
when i write the code with with return, it is only return one linK:
run this code:
import requests
import re
wikiurl = 'https://en.wikipedia.org/wiki/List_of_states_and_territories_of_the_United_States'
state_pat = re.compile(r'title=\"(\w+)\">')
def get_page_content(url):
response = requests.get(url)
return response.text
def link_generator(wikiurl):
content = get_page_content(wikiurl)
names = state_pat.findall(content)
for i in names:
return 'https://www.local.com/business/results/listing.cfm?s=tile+and+grout+cleaning&ar=' + i + '%2CNY&gsp=ZFZWU1RaU09zWGNYdjFEV1l2ZHFLNVZUUFRPT3c3a21lbFVCbERQOU5VS3p6ai9DRXNMa29PcVZ0ZVV0TXZLM01wUVFUUHZYK2lrMnB5VGJyMHZJeUNoK1dXaUoxZ1NKT3AxbVlJOGN1aVBEb1NRMzlCemdDVHh5aGd3eU5DYUpKWDRtNFVQR0llOFJibUhQR3pSV3ppWFR4ekJoRVltL29UdFQ0MW9KUS9IenJrcjVBMUt3bkErRnlSVnFjRnZ0TjhRWEdET0FuZWRVUGNkemdxUlkzOUYyUjZXbHBzQWRMY3hEUTY4WmtnYkRsSkEvazBrVVY5d0NmSVVMaWp0WnNDNmFsZFNzMitWeHZDYTg2YmJwRGQzSisvOUJaYWNBaFdUd21LaWJpNk9veS9OT1N1VE5DV3RUNDIxdkY5NmZ4bWFVcWtLc1BlVkNRNlEvSG4ydER1T1ZkcXk4Um5BWU5kUU9UZnVOUE9BPQ%253D%253D&lwfilter=&wsrt=&wpn='
a = link_generator(wikiurl)
print(a)
and if i run this code adding a print into fuction, it returns all the link, why? i need all the link with return
run this code: you will see different:
import requests
import re
wikiurl = 'https://en.wikipedia.org/wiki/List_of_states_and_territories_of_the_United_States'
state_pat = re.compile(r'title=\"(\w+)\">')
def get_page_content(url):
response = requests.get(url)
return response.text
def link_generator(wikiurl):
content = get_page_content(wikiurl)
names = state_pat.findall(content)
for i in names:
print('https://www.local.com/business/results/listing.cfm?s=tile+and+grout+cleaning&ar=' + i + '%2CNY&gsp=ZFZWU1RaU09zWGNYdjFEV1l2ZHFLNVZUUFRPT3c3a21lbFVCbERQOU5VS3p6ai9DRXNMa29PcVZ0ZVV0TXZLM01wUVFUUHZYK2lrMnB5VGJyMHZJeUNoK1dXaUoxZ1NKT3AxbVlJOGN1aVBEb1NRMzlCemdDVHh5aGd3eU5DYUpKWDRtNFVQR0llOFJibUhQR3pSV3ppWFR4ekJoRVltL29UdFQ0MW9KUS9IenJrcjVBMUt3bkErRnlSVnFjRnZ0TjhRWEdET0FuZWRVUGNkemdxUlkzOUYyUjZXbHBzQWRMY3hEUTY4WmtnYkRsSkEvazBrVVY5d0NmSVVMaWp0WnNDNmFsZFNzMitWeHZDYTg2YmJwRGQzSisvOUJaYWNBaFdUd21LaWJpNk9veS9OT1N1VE5DV3RUNDIxdkY5NmZ4bWFVcWtLc1BlVkNRNlEvSG4ydER1T1ZkcXk4Um5BWU5kUU9UZnVOUE9BPQ%253D%253D&lwfilter=&wsrt=&wpn=')
a = link_generator(wikiurl)
print(a)
When you issue a return statement in a function it doesn't execute any further lines and returns to its caller. If you want to iteratively return items in a generator you can replace return with yield. Alternatively collect the results as a list and return the list.
You then need to change your final line when you're calling this to:
a = list(link_generator(wikiurl))
to unpack your generator

Try statement not running as I expect

I have three functions, the readHeader thet reads the header of the a txt file, readExpertsFile that reads the contents of the file and the exceptionNH function that compares the file name and header and raises an exception if the two are not compatible (e.g. if the date in the name is not the same as the header).
Here are the three functions and a txt example:
def readHeader(fileName):
fileIn = open(fileName, "r")
fileIn.readline()
day = fileIn.readline().replace("\n", "")
fileIn.readline()
time = fileIn.readline().replace("\n", "")
fileIn.readline()
company = fileIn.readline().replace("\n", "")
scope = fileIn.readline().replace(":", "").replace("\n", "")
fileIn.close()
return (day, time, company, scope)
def readFile(fileName):
expertsList = []
expertsList.append(readHeader(fileName))
fileIn = open(fileName, "r")
for line_counter in range(LNHEADER):
fileIn.readline()
fileInE.close()
return expertsList
def exceptionNH(fileName):
try:
assert fileName[10:17] == readFile(fileName)[3][0].lower().replace(":", "")
except AssertionError:
print("Error in input file: inconsistent name and header in file", fileName,".")
exit()
fileName = "file.txt"
exceptionNH("2018y03m28experts10h30.txt")
2018y03m28experts10h30.txt:
Day:
2018-03-28
Time:
10:30
Company:
XXX
Experts:
...
...
My problem here is that on the try statement I expect the assert "sees" the comparation as True and skip the except clause but this is not happening.
I suspect that the .lower() is not working but I can't understand why.
If you see other things that could be better feel free to share, as I'm a new at python and want to improve myself.
I've found the error. I was thinking that when I want to get the first element from the first tuple inside a list, I would need to write list[position of item][position of tuple], instead of it's inverse.
Following the mkrieger1's advice, I printed fileName[10:17] and readFile(fileName)[3][0].lower().replace(":", ""), the first was good but the second was not showing the third item of the first tuple (that's from readHeader) but the first item of the third tuple.
I've changed from readFile(fileName)[3][0].lower().replace(":", "") to readFile(fileName)[0][3].lower().replace(":", "") and it's working now, thank you for the help.

Python - How to check if the text is in a file txt?

I have a function that checks if the text is in file.txt or not.
The function works like this: If the text is contained in the file, the file is closed. If the text is not contained in the file, it is added.
But it doesn't work.
import urllib2, re
from bs4 import BeautifulSoup as BS
def SaveToFile(fileToSave, textToSave):
datafile = file(fileToSave)
for line in datafile:
if textToSave in line:
datafile.close()
else:
datafile.write(textToSave + '\n')
datafile.close()
urls = ['url1', 'url2'] # i dont want to public the links.
patGetTitle = re.compile(r'<title>(.*)</title>')
for url in urls:
u = urllib2.urlopen(url)
webpage = u.read()
title = re.findall(patGetTitle, webpage)
SaveToFile('articles.txt', title)
# so here. If the title of the website is already in articles.txt
# the function should close the file.
# But if the title is not found in articles.txt the function should add it.
You can change the SaveToFile function like this
Your title is a list and not a string so you should call it like this SaveToFile('articles.txt', title[0]) to get the first element of the list
def SaveToFile(fileToSave, textToSave):
with open(fileToSave, "r+") as datafile:
for line in datafile:
if textToSave in line:
break
else:
datafile.write(textToSave + '\n')
Notes:
Since you very looping over an empty file the loop did not even run once.
i.e.)
for i in []:
print i # This will print nothing since it is iterating over empty list same as yours
You have passed a list and not a string since re.findall returns a list object you have to pass the first element of the list to the function.
I have used for..else here if the loop is not terminated properly the else case will work.
i.e.)
for i in []:
print i
else:
print "Nooooo"
Output:
Nooooo
Just use r+ mode like this:
def SaveToFile(fileToSave, textToSave):
with open(fileToSave, 'r+') as datafile:
if textToSave not in datafile.read():
datafile.write(textToSave + '\n')
About that file mode, from this answer:
``r+'' Open for reading and writing. The stream is positioned at the
beginning of the file.
And re.find_all() always return a list, so if you're trying to write a list instead of string you'll get an error.
So you could use:
def SaveToFile(fileToSave, textToSave):
if len(textToSave) => 1:
textToSave = textToSave[0]
else:
return
with open(fileToSave, 'r+') as datafile:
if textToSave not in datafile.read():
datafile.write(textToSave + '\n')
You should refactor your SaveToFile function to like this.
def SaveToFile(fileToSave, titleList):
with open(fileToSave, 'a+') as f:
data = f.read()
for titleText in titleList:
if titleText not in data:
f.write(titleText + '\n')
f.close()
This function read a content of file (if exist or created if not) and checks whether textToSave is in the file contents. If it found textToSave then, close file otherwise write content to file.
This seems closer to your problem.
This checks if the text in the file:
def is_text_in_file(file_name, text):
with open(file_name) as fobj:
for line in fobj:
if text in line:
return True
return False
This use the function above to check and writes the text to end of the file if it is not in file yet.
def save_to_file(file_name, text):
if not is_text_in_file in (file_name, text):
with open(file_name, 'a') as fobj:
fobj.write(text + '\n')

Is it possible to print a next line in a code?

Is it possible to make a method, which prints a next line of a code?
def print_next_line():
sth
import fxx
print 'XXX'
print_next_line()
file.split('/')
....
>>> 'XXX'
>>> 'file.split('/')'
I was thinking that It could be somewhere in the stack, but I'm not sure because it is next, not previous line.
Straight approach. I use inspect module to determine file and line where print_next_line was called. Later I read the file to find next string. You might want to add some error handling here (what if there is no next line in a file? and so on)
def print_next_line():
def get_line(f, lineno):
with open(f) as fp:
lines = fp.readlines()
return lines[lineno-1]
import inspect
callerframerecord = inspect.stack()[1]
frame = callerframerecord[0]
info = inspect.getframeinfo(frame)
line_ = info.lineno
file_ = info.filename
print get_line(file_, line_ + 1)
print 'XXX'
a = 1
print_next_line()
b = a*2
All you need is a profiling tool or just a debugger.
Use Python's inspect module:
import inspect
def print_next_line():
lineno = inspect.currentframe().f_back.f_lineno
with open(__file__) as f:
print(f.readlines()[lineno].rstrip())
Well you could open() your .py file and iterate to find specific line, then print it.

Categories