when I run this function in Django my output is none. what is wrong with the news() function?
Code:
import feedparser
from django.http import HttpResponse
def news():
YahooContent = feedparser.parse ("http://news.yahoo.com/rss/")
for feed in YahooContent.entries:
print feed.published
print feed.title
print feed.link + "\n"
return
def html(request):
html = "<html><body> %s </body></html>" % news()
return HttpResponse(html)
Error:
webpage shows None
You are printing the results, not returning them. In fact, the return statement will return None, just like all methods that don't have a return statement.
You should build the string in your method itself, like this:
def html(request):
head = '<html><body>'
foot = '</body></html>'
entries = []
for entry in feedparser.parse("http://news.yahoo.com/rss/").entries:
entries.append('{}<br />{}<br />{}<br />'.format(entry.published,
entry.title, entry.link))
return HttpResponse(head+''.join(entries)+foot)
Can you explain your code a little bit?
Imagine you have a list of "entries", like this:
entries = [a, b, c]
Each entry has a .published, .title, .link attribute that you want to print as a list in HTML.
You can do this easily by looping through it and using the print statement:
print('<ul>')
for entry in entries:
print('<li>{0.published} - {0.title} - {0.link}</li>'.format(entry))
print('</ul>')
However, what we need here is to send this data to the browser as a HTML response. We can build the HTML string by replacing print with a string that we keep adding to, like this:
result = '<ul>'
for entry in entries:
result += '<li>{0.published} - {0.title} - {0.link}</li>'.format(entry)
result += '</ul>'
This will work but is inefficient and slow, it is better to collect the strings in a list, and then join them together. This is what I did in my original answer:
result = ['<ul>']
for entry in entries:
result.append('<li>{0.published} - {0.title} - {0.link}</li>'.format(entry))
result.append('</li>')
Then I just joined them all up with a header and a footer, and then combined each individual string in the list with a space, and returned it as the response:
return HttpResponse('<html><body>'+''.join(result)+'</body></html>')
obviously your news() method returns nothing..
The right way should be:
def news():
YahooContent = feedparser.parse ("http://news.yahoo.com/rss/")
result = ''
for feed in YahooContent.entries:
result += feed.published + '\n'
result += feed.title + '\n'
result += feed.link + "\n"
return result
def html(request):
html = "<html><body> %s </body></html>" % news()
return HttpResponse(html)
Related
This is the part of Udacity course WEB SEARCH ENGINE.The goal of this quiz is to write a program which extract all links from the web page.On the output program must return only LINKS.But in my case program returns all links and "NONE" twice.I know that the error in the second part of program after "WHILE" and after "ELSE".But i dont know what i must write there.
def get_next_target(page):
start_link = page.find('<a href=')
if start_link == -1:
return None,0
else:
start_quote = page.find('"', start_link)
endquo = page.find('"',start_quote + 1)
url = page[(start_quote + 1) : endquo]
return url,endquo
page = 'i know what you doing summer <a href="Udasity".i know what you doing summer <a href="Georgia" i know what you doing summer '
def ALLlink(page):
url = 1
while url != None:
url,endquo = get_next_target(page)
if url:
print url
page = page[endquo:]
else:
print ALLlink(page)
First, you can remove your else statement in your ALLlink() function since it's not doing anything.
Also, when comparing to None, you should use is not instead of !=:
while url != None: # bad
while url is not None # good
That said, I think your error is in your last line:
print ALLlink(page)
You basically have two print statements. The first is inside your function and the second is on the last line of your script. Really, you don't need the last print statement there because you're already printing in your ALLlink() function. So if you change the line to just ALLlink(page), I think it'll work.
If you do want to print there, you could modify your function to store the URLs in an array, and then print that array. Something like this:
def ALLlink(page):
urls = []
url = 1
while url is not None:
url, endquo = get_next_target(page)
if url:
urls.append(url)
page = page[endquo:]
return urls
print ALLlink(page)
I am returning a bunch of strings to Front End using Flask RESTful API. The string is generated after a ML algo runs on the input string and classify this to a predefined Answer. The back-end is in MongoDB.
The code given below was working fine earlier. Ever since I have inserted the following lines (marked as *) it is not returning any value. After debugging I found the error as built-in error: The view function could not return a list.
Below is my code (only relevant part)
app = Flask(__name__)
#app.route('/')
def index():
return render_template('index.html')
#app.route('/response/',methods = ['GET','POST'])
def response():
if request.method=='POST':
text_org = request.json['foo']
text = json.loads(json.dumps(text_org))
text = text.lower()
if len(text.split())==0:
return 'Please ask a question'
elif len(text.split())<=3:
resp = ' '.join(['What more would you like to know about', clean_text(text), '? Please be little more specific..'])
return resp
else:
df_to_match = pd.DataFrame({'Slot_Value':tbl_df['slot_value'],'User_In':[clean_text(text)]*len(tbl_df['slot_value'])})
is_match = [process.extract(i, df_to_match['Slot_Value'], limit=3) for i in df_to_match['User_In']]
is_match_ratio = [i for w in is_match for i in w]
if list(is_match_ratio[0]).pop(1) > 65:
#tup = [w for (w,i,n) in is_match_ratio]
x = model.predict([text])
x_t = le.inverse_transform(x) '# *new line
x_t = x_t.tolist() '# *new line
x_f = ' '.join(x_t)
x_f = re.sub('[^A-Za-z0-9,]',' ',x_f) # * new line
y=x_f.replace(' ','').split(',')
slot_value tbl_df.loc[tbl_df.Combi.str.contains(y),'slot_value']
text_clean = clean_text(text) #User Defined Function for Text preprocessing
df_exact = pd.DataFrame({'Slot_Value':slot_value,'User_input':[text_clean]*len(slot_value)})
slot_exact = [process.extract(i, df_exact['Slot_Value'], limit=1) for i in df_exact['User_input']]
slot_exact = ' '.join([n[0] for n in slot_exact[0]])
for i in db_col.find({"slot_value":slot_exact},{"Answer":1,"_id":0}): # querying mongodb with only 'Answer' shall be retrieved
m = json.dumps(i).split('Answer',1)[-1]
resp = m.replace('\\n','<br>').replace('"','').replace(':','').replace('}','')
resp_final = email_url(resp) # * new line
return resp_final
else:
resp = ' '.join(['Sorry I have not understood this, please rephrase the question or'])
return resp
else:
resp = [random.choices(msg)]
return resp
if __name__ == "__main__":
print("**Starting Server...")
app.run(host='0.0.0.0',port=5002, debug=True)
email_url is a UDF that performs some regex and return the variable resp with HTML tags. What I have figured out that all the exceptional cases like text.split()<=3, ' '.join(['Sorry I have not understood ..... ']) are working fine. So this means problem is with new lines as marked *, may be I am missing out anything?
Edit: Few More Information
le: LabelEncoder
model: MultinomialNB()
The above model is used to predict the class for new input as received via text_org = request.json['foo']
I got the solution. I the below line
slot_value tbl_df.loc[tbl_df.Combi.str.contains(y),'slot_value']
instead of y which is a list, I have changed it to ','.join(y)
To enlighten others: le.inverse_transform always produce an array which needs to be converted to list. In the above code snippet y is that list. Hence such error was occurring.
I wrote some code that grabs the numbers I need from this website, but I don't know what to do next.
It grabs the numbers from the table at the bottom. The ones under calving ease, birth weight, weaning weight, yearling weight, milk and total maternal.
#!/usr/bin/python
import urllib2
from bs4 import BeautifulSoup
import pyperclip
def getPageData(url):
if not ('abri.une.edu.au' in url):
return -1
webpage = urllib2.urlopen(url).read()
soup = BeautifulSoup(webpage, "html.parser")
# This finds the epd tree and saves it as a searchable list
pedTreeTable = soup.find('table', {'class':'TablesEBVBox'})
# This puts all of the epds into a list.
# it looks for anything in pedTreeTable with an td tag.
pageData = pedTreeTable.findAll('td')
pageData.pop(7)
return pageData
def createPedigree(animalPageData):
''' make animalPageData much more useful. Strip the text out and put it in a dict.'''
animals = []
for animal in animalPageData:
animals.append(animal.text)
prettyPedigree = {
'calving_ease' : animals[18],
'birth_weight' : animals[19],
'wean_weight' : animals[20],
'year_weight' : animals[21],
'milk' : animals[22],
'total_mat' : animals[23]
}
for animalKey in prettyPedigree:
if animalKey != 'year_weight' and animalKey != 'dam':
prettyPedigree[animalKey] = stripRegNumber(prettyPedigree[animalKey])
return prettyPedigree
def stripRegNumber(animal):
'''returns the animal with its registration number stripped'''
lAnimal = animal.split()
strippedAnimal = ""
for word in lAnimal:
if not word.isdigit():
strippedAnimal += word + " "
return strippedAnimal
def prettify(pedigree):
''' Takes the pedigree and prints it out in a usable format '''
s = ''
pedString = ""
# this is also ugly, but it was the only way I found to format with a variable
cFormat = '{{:^{}}}'
rFormat = '{{:>{}}}'
#row 1 of string
s += rFormat.format(len(pedigree['calving_ease'])).format(
pedigree['calving_ease']) + '\n'
#row 2 of string
s += rFormat.format(len(pedigree['birth_weight'])).format(
pedigree['birth_weight']) + '\n'
#row 3 of string
s += rFormat.format(len(pedigree['wean_weight'])).format(
pedigree['wean_weight']) + '\n'
#row 4 of string
s += rFormat.format(len(pedigree['year_weight'])).format(
pedigree['year_weight']) + '\n'
#row 4 of string
s += rFormat.format(len(pedigree['milk'])).format(
pedigree['milk']) + '\n'
#row 5 of string
s += rFormat.format(len(pedigree['total_mat'])).format(
pedigree['total_mat']) + '\n'
return s
if __name__ == '__main__':
while True:
url = raw_input('Input a url you want to use to make life easier: \n')
pageData = getPageData(url)
s = prettify(createPedigree(pageData))
pyperclip.copy(s)
if len(s) > 0:
print 'the easy string has been copied to your clipboard'
I've just been using this code for easy copying and pasting. All I have to do is insert the URL, and it saves the numbers to my clipboard.
Now I want to use this code on my website; I want to be able to insert a URL in my HTML code, and it displays these numbers on my page in a table.
My questions are as follows:
How do I use the python code on the website?
How do I insert collected data into a table with HTML?
It sounds like you would want to use something like Django. Although the learning curve is a bit steep, it is worth it and it (of course) supports python.
I am trying to gather a bunch of links using xpath which need to be scraped from the next page however, I keep getting the error that can only parse strings? I tried looking at the type of lk and it was a string after I casted it? What seems to be wrong?
def unicode_to_string(types):
try:
types = unicodedata.normalize("NFKD", types).encode('ascii', 'ignore')
return types
except:
return types
def getData():
req = "http://analytical360.com/access-points"
page = urllib2.urlopen(req)
tree = etree.HTML(page.read())
i = 0
for lk in tree.xpath('//a[#class="sabai-file sabai-file-image sabai-file-type-jpg "]//#href'):
print "Scraping Vendor #" + str(i)
trees = etree.HTML(urllib2.urlopen(unicode_to_string(lk)))
for ll in trees.xpath('//table[#id="archived"]//tr//td//a//#href'):
final = etree.HTML(urllib2.urlopen(unicode_to_string(ll)))
You should pass in strings not urllib2.orlopen.
Perhaps change the code like so:
trees = etree.HTML(urllib2.urlopen(unicode_to_string(lk)).read())
for i, ll in enumerate(trees.xpath('//table[#id="archived"]//tr//td//a//#href')):
final = etree.HTML(urllib2.urlopen(unicode_to_string(ll)).read())
Also, you don't seem to increment i.
For those familiar with imageboards, an OP post may or may not contain a 'subject' and a 'comment'
I wrote this to search all pages of a given board for thread subjects and OP posts.
If my search term exists on one of them but the other key is inexistent it will not get appended to my res list.
So how do I search json keys where 1 key or the other may not exist?
import urllib, json, HTMLParser
def s4Chan(board, search):
logo = '3::54chan'
res = []
p = HTMLParser.HTMLParser()
catalog = json.load(urllib.urlopen('https://api.4chan.org/%s/catalog.json' % board))
for i in catalog:
for j in i['threads']:
try:
if search.lower() in j['sub'].lower() or search.lower() in j['com'].lower():
subject = j['sub']
post = p.unescape(str(j['com'])).replace('<br>', ' ')
if len(post) > 300:
post = post[0:300]
post = post + '...'
text = str('%s /%s/ %s | %s | %s (R:%s, I:%s)' % (logo, board, subject, post, 'https://4chan.org/%s/res/%s' % (board, j['no']), j['replies'], j['images']))
res.append(text)
except(KeyError):
continue
return res
json.load returns objects as Python dictionaries. You can, for example, use the get method of dict:
if search.lower() in j.get('sub', '').lower() or search.lower() in j.get('com', '').lower():