How can I implement dynamic routing in Python? - python

I'm attempting to implement dynamic routing for a web framework. At the moment, the goal is to pass arguments into a function by way of the url. So, if user offers a url of "/page/23", then the route function will extract the "23" which will then be used as a parameter for the page function. I am getting a "keyerror", however.
import re
routing_table = {}
url = "/page/23"
def route(url, func):
key = url
key = re.findall(r"(.+?)/<[a-zA-Z_][a-zA-Z0-9_]*>", url)
if key:
params = re.findall(r"<([a-zA-Z_][a-zA-Z0-9_]*)>", url)
routing_table[key[0]] = [params, func]
else:
routing_table[url] = func
def find_path(url):
if url in routing_table:
return routing_table[url]
else:
return None
def page(page_id):
return "this is page %d" % page_id
route("/page/<page_id>", page)
print(routing_table[url])

When you called route, you used a url equal to "/page/<page_id>", but in the last line, url is a global variable equal to "/page/23".
It looks like there are other problems: replace your last line with
print(routing_table)
to see what you're doing.

Related

get parameters in a get request in django rest framework?

I want to get the parameters sent to my rest api
what I want is to obtain the parameters that to use them consume another api and return the response of the third party api
but in name and comic i get None
http://127.0.0.1:8000/searchComics/
{name:"3-D Man","comics":12}
this is my view
class MarvelApi(APIView):
def get(self, request):
private_key = "88958f2d87bd2c0c2fa07b7ea654bcdf9f0389b3"
public_key = "8d415ffcc9add56b0a47c0a7c851afc3"
ts = 1
md5_hash = "46ecbbd63108b0561b8778a57823bd34"
query_params = self.request.query_params
name = query_params.get('kword', None)
comic = query_params.get('comic', None)
end_point = f"https://gateway.marvel.com:443/v1/public/characters?ts={ts}&apikey={public_key}&hash={md5_hash}&name={name}&comic={comic}"
response = requests.get(end_point)
response_json = json.loads(response.text)
return Response(status=status.HTTP_200_OK, data=response_json)
I think the problem is these two lines
name = query_params.get('kword', None)
comic = query_params.get('comic', None)
that do not capture the values ​​correctly, do you know how to solve it?
You wanted to get them from GET method, but instead you gave a dictionary, so I guess you sent it via POST. Instead of posting dictionary you should go with url:
http://127.0.0.1:8000/searchComics/?name=3-D+Man&comic=12
And you had probably a typo. You had plural "comics" in dictionary and you seek for "comic" singular.
And if you want to have data with POST method, just change def get(...) to def post(...).

REST API requests - Use Concurrent.futures in the right way

The code below is a sample from my complete program, I tried it to make understandable.
It sends requests to a REST API. It starts with an URL and the number of pages for this specific search and tries to catch the content for each page.
Each page has several results. Each result becomes a FinalObject.
Because there are as many API requests as there are pages, I decided to use multi-threading and the concurrent.futures module.
=> It works but, as I'm new in coding and Python, I still have these 2 questions:
How to use ThreadPoolExecutor sequentially in this case,
Is there a better way to handle multi-threading in this case?
from concurrent.futures import ThreadPoolExecutor
from requests import get as re_get
def main_function(global_page_number, headers, url_request):
# create a list of pages number
pages_numbers_list = [i for i in range(global_page_number)]
# for each page, call the page_handler (MultiThreading)
with ThreadPoolExecutor(max_workers=10) as executor:
for item in pages_numbers_list:
executor.submit(
page_handler,
item,
url_request,
headers
)
def page_handler(page_number, url_request, headers):
# we change the page number in the url request
url_request = change_page(url_request, page_number)
# new request with the new url
result = re_get(url_request, headers=headers)
result = result.json()
# in the result, with found the list of dict in order to create the
# final object
final_object_creation(result['results_list'])
def change_page(url_request, new_page_number):
"to increment the value of the 'page=' attribute in the url"
current_nb_page = ''
start_nb = url_request.find("page=") + len('page=')
while 1:
if url_request[start_nb].isdigit():
current_nb_page = url_request[start_nb]
else:
break
new_url_request = url_request.replace("page=" + current_nb_page,
"page=" + str(new_page_number))
return new_url_request
def final_object_creation(results_list):
'thanks to the object from requests.get(), it builts the final object'
global current_id_decision, dict_decisions
# each item in the results lis should be an instance of the final object
for item in results_list:
# On définit l'identifiant du nouvel objet Decision
current_id_decision += 1
new_id = current_id_decision
# On crée l'objet Décision et on l'ajoute au dico des décisions
dict_decisions[new_id] = FinalObject(item)
class FinalObject:
def __init__(self, content):
self.content = content
current_id_decision = 0
dict_decisions = {}
main_function(1000, "headers", "https://api/v1.0/search?page=0&query=test")

How to load web scraped data using Pandas and Beautifulsoup into Dataframe?

I have this code, which scrapes the Hacker News website with beautifulsoup4 and I am looking for a way to save the results into a Dataframe using Pandas. I have already imported pandas in the below code but I do not know how I can save the results into a DataFrame. It only scrapes the most favored Hacker News post now but it can be changed.
import pandas as pd
from requests import get
from requests.exceptions import RequestException
from contextlib import closing
from bs4 import BeautifulSoup
from math import ceil
import json, sys, argparse, validators
MAX_NUM_POSTS = 100
class HackerNewsScraper:
URL = 'https://news.ycombinator.com/news'
def __init__(self, posts):
self._total_posts = posts
self._total_pages = int(ceil(posts/30))
self._stories = []
def scrape_stories(self):
"""
Fetches all HTML data.
Each page is limited to 30 stories, this function will ensure enough pages are fetched.
"""
page = 1
while(page <= self._total_pages): # Makes sure to visit sufficient amount of pages
url = '{}?p={}'.format(self.URL, page)
html = get_html(url)
self.parse_stories(html)
page += 1
def parse_stories(self, html):
"""
Given a BeautifulSoup nested data structure, html. parse_stories(html) will parse the data and select the desired fields.
After getting title, uri, author, comments, points, and rank, it will save them in dictionary form in self._stories.
"""
for storytext, subtext in zip(html.find_all('tr', {'class': 'athing'}),
html.find_all('td', {'class': 'subtext'})):
storylink = storytext.find_all('a',{'class':'storylink'})
sublink = subtext.select('a')
# All requested data being saved in the dictionary story below
TITLE = storylink[0].text.strip()
LINK = storylink[0]['href']
AUTHOR = sublink[0].text
COMMENTS = sublink[-1].text
POINTS = subtext.select('span')[0].text
RANK = storytext.select('span.rank')[0].text.strip('.')
story = {
'title' : TITLE,
'uri' : LINK,
'author' : AUTHOR,
'points' : POINTS,
'comments' : COMMENTS,
'rank' : RANK
}
# Make sure data satisfies requirements
story = validate_story(story)
# self._stories is an array of dictionaries that saves the requested number of stories
self._stories.append(story)
# If required number of stories met, stop parsing
if len(self._stories) >= self._total_posts:
return
def print_stories(self):
"""
Outputs the stories from list of dictionary format to JSON in STDOUT.
"""
json.dump(self._stories, sys.stdout, indent=4)
def get_stories(self):
"""
Returns the scraped stories to the user in a list of dictionary format.
Used for testing purposes.
"""
return self._stories
def get_html(url):
"""
Runs the HTML data through BeautifulSoup to get a BeautifulSoup object, a nested data structure.
"""
response = get_response(url)
if response is not None:
html = BeautifulSoup(response, 'html.parser')
return html
def validate_story(story):
"""
Ensures that all the story data is valid according to the task.
Will return valid data for each field.
"""
story['title'] = story['title'][:256]
if not valid_title(story['title']):
story['title'] = 'Valid title not found'
story['author'] = story['author'][:256]
if not valid_author(story['author']):
story['author'] = 'Valid author not found'
if not valid_uri(story['uri']):
story['uri'] = 'Valid URI not found'
story['comments'] = validate_number(story['comments'])
story['points'] = validate_number(story['points'])
story['rank'] = validate_number(story['rank'])
return story
def valid_title(title):
"""
Ensures that title is non empty string with <= 256 characters
"""
return (len(title) <= 256 and title)
def valid_author(author):
"""
Ensures that author is non empty string and <= 256 characters.
Solved the issue of not finding an author by checking the fetched data with HN username rules.
"""
if(author.find(' ') > -1): #Hacker news username doesnt support whitespace
return False
return (len(author) <= 256 and author)
def valid_uri(url):
"""
To be able to find the scraped stories, we need their URL.
If data is not a valid URL, return False.
"""
if(validators.url(url)):
return True
return False
def validate_number(numString):
"""
Will make sure that the returned number is an integer.
Will strip any non digits from the input and return the first number.
"""
if numString.find('ago') > -1: #If not found, 'time since posted' would replace points for example
return 0
digits = [int(s) for s in numString.split() if s.isdigit()]
if len(digits) > 0:
return digits[0]
return 0
def get_response(url):
"""
Attempts to get the content at 'url' by making an HTTP GET request.
If the content-type of response is some kind of HTML/XML, return the
text content, otherwise return None.
"""
try:
with closing(get(url, stream=True)) as resp:
if is_good_response(resp):
return resp.content
else:
return None
except RequestException as e:
log_error('Error during requests to {0} : {1}'.format(url, str(e)))
return None
def is_good_response(resp):
"""
Returns True if the response seems to be HTML, False otherwise.
"""
content_type = resp.headers['Content-Type'].lower()
return (resp.status_code == 200
and content_type is not None
and content_type.find('html') > -1)
def log_error(e):
"""
Log the errors. Currently just printing them out to user.
"""
print(e)
def validate_input(arg, arg_max):
"""
Validate the user input. Makes sure it is less than or equal to 100 posts.
"""
error_msg = 'Posts cannot exceed {}'.format(arg_max)
if arg > arg_max:
raise argparse.ArgumentTypeError(error_msg)
# Parses the number of posts input from user. Default is 10.
def parse_arguments():
"""
Parses the argument input from the user. Default is 10.
"""
parser = argparse.ArgumentParser()
parser.add_argument('--posts', '-p', metavar='n', type=int, default=1, help='number of posts (max 100)')
args = parser.parse_args()
validate_input(args.posts, MAX_NUM_POSTS)
return args.posts
def main():
"""
If user input is valid, will create a scraper and fetch requests number of posts and print them to the user.
"""
try:
posts = parse_arguments()
hnews_scraper = HackerNewsScraper(posts)
hnews_scraper.scrape_stories()
hnews_scraper.print_stories()
except argparse.ArgumentTypeError as ex:
log_error(ex)
if __name__ == '__main__':
main()
Try This:
Don't forget to import Pandas
story = {
'title' : TITLE,
'uri' : LINK,
'author' : AUTHOR,
'points' : POINTS,
'comments' : COMMENTS,
'rank' : RANK
}
data = list(zip(TITLE, LINK, AUTHOR, POINTS, COMMENTS, RANK))
dt = pd.DataFrame(data, columns = ['title', 'uri', 'author', 'points', 'comments', 'rank'])

How to create dynamic url with = and questionmarks in flask

How do I create a url in Flask that looks like:
localhost/something/info?userID=123&itemID=456
I am intressed in how to generate the info?userID=123&itemID=456 part.
Is it:
#app.route('something/info<userID>&<itemID>/')
do_something(userID, itemID):
return
And I have seen something about "next" being used. But can't find any good example.
Edit
I looked at the other question but I didn't understand the example there.
In your case userID and itemID are GET parameters but not part of route.
Flask request has view_args(which will used as arguments to view) and args(other parameters that will not be set to view function, but you can use them).
Here an example which can explain how it works.
#app.route('/<string:route_arg>/')
def route(route_arg):
message = [
# route_arg - part of route and argument of our view function
'route_arg = %s ' % route_arg,
# we can get route_arg from request
'route_arg from request = %s ' % request.view_args.get('route_arg'),
# request.args - parameters after '?' in url
'userID = %s' % request.args.get('userID'),
'itemID = %s' % request.args.get('itemID'),
# example of url
'url with args = %s' % url_for('route', route_arg='info', userID=123, itemID=456)
]
return '<br/>'.join(message)
Let's open /info/ you will see the next result:
route_arg = info # because is part of route and argument
route_arg from request = info
# because we didn't set parameters
userID = None
itemID = None
url with args = /info/?itemID=456&userID=123
Let's open /info/?itemID=456&userID=123:
route_arg = info
route_arg from request = info
# not part of route, but part of request string(?itemID=456&userID=123)
userID = 123
itemID = 456
url with args = /info/?itemID=456&userID=123
So, in your case parameters will not used as arguments to view. You should work with them using request.args.

Python script for get data from merriam-webster

How can I get specific word's definition from merriam-webster using python's script?
I have window with text box and button, and I want to print word's definition on the screen.
thanks
# Import libraries/ modules
import logging
logging.basicConfig(level = logging.INFO)
import json
import requests
# write custom definition
def connect_mw_dictionary(api_key, word):
logging.info("Connecting...")
URL = "https://www.dictionaryapi.com/api/v3/references/sd2/json/"+word+"?
key="+api_key
PARAMS = {'word': word,'key': api_key}
r = requests.get(url = URL, params = PARAMS)
r.encoding = 'utf-8'
if r.status_code == 200:
logging.info("Connection successful.")
return True, r.json()
logging.info("Connection failed.")
return False
def fetch_dictionary_result(res, lang, loc, audio_format, word):
dict_txt = str(res[0]['def'][0]['sseq'][0][0][1]['dt'][0][1]).replace("
{bc}","")
return dict_txt
api_key = "xxxxxxxxxxxx" # replace with you api key
word = "keyword" # replace with word to lookup
lang = "en"
loc = "us"
audio_format = "mp3"
status, result = connect_mw_dictionary(api_key, word)
if status == True:
status, dictionary_result = fetch_dictionary_result(result, lang, loc,
audio_format, word)
dictionary_result
I am assuming merriam-webster is a website. Check if they an API. If so you can use it to achieve your task. If they do not have an API, I don't see how you can achieve your task without some highly advanced hacking, crawling algorithm. My suggestion is, as it appears you are trying to develop a dictionary type app, research dictionary websites that have open APIs.

Categories