I'm curious if there's a simpler way to remove a particular parameter from a url. What I came up with is the following. This seems a bit verbose. Libraries to use or a more pythonic version appreciated.
parsed = urlparse(url)
if parsed.query != "":
params = dict([s.split("=") for s in parsed.query.split("&")])
if params.get("page"):
del params["page"]
url = urlunparse((parsed.scheme,
None,
parsed.path,
None,
urlencode(params.items()),
parsed.fragment,))
parsed = urlparse(url)
Use urlparse.parse_qsl() to crack the query string. You can filter this in one go:
params = [(k,v) for (k,v) in parse_qsl(parsed.query) if k != 'page']
I've created a small helper class to represent a url in a structured way:
import cgi, urllib, urlparse
class Url(object):
def __init__(self, url):
"""Construct from a string."""
self.scheme, self.netloc, self.path, self.params, self.query, self.fragment = urlparse.urlparse(url)
self.args = dict(cgi.parse_qsl(self.query))
def __str__(self):
"""Turn back into a URL."""
self.query = urllib.urlencode(self.args)
return urlparse.urlunparse((self.scheme, self.netloc, self.path, self.params, self.query, self.fragment))
Then you can do:
u = Url(url)
del u.args['page']
url = str(u)
More about this: Web development peeve.
Related
I am using a class based service in python and I get error whenever I want to use it. Unable to figure out the reason.
#!/usr/bin/python
# -*- coding: utf-8 -*-
from xml.dom import minidom
from pysimplesoap.client import SoapClient
from pysimplesoap.helpers import sort_dict
MEDIA_ROOT = '/User/sunand/documents/resumes/'
parser = ResumeParser()
names = parser.get_names(MEDIA_ROOT)
print names
class ParserClient(SoapClient):
""" Extends the soap client to encode the response with utf-8 encoding.
"""
def wsdl_call(
self,
method,
*args,
**kwargs
):
""" Override wsdl_call method to make sure unmarshall is not called.
"""
operation = self.get_operation(method)
# get i/o type declarations:
inp = operation['input']
header = operation.get('header')
if 'action' in operation:
self.action = operation['action']
# construct header and parameters
if header:
self.__call_headers = sort_dict(header, self.__headers)
(method, params) = self.wsdl_call_get_params(method, inp,
*args, **kwargs)
response = self.call(method, *params)
return response
def send(self, method, xml):
""" Overrides the send method to get the actual xml content.
"""
content = super(ParserClient, self).send(method, xml)
self.result = content
return content
class ResumeParser(object):
""" Connects to the Resume Parser's XML api to get parsed data.
"""
def __init__(self, simple=True, timeout=60):
""" Initializes the ResumeParser class.
"""
self.wsdl = \
'http://jobsite.onlineresumeparser.com/rPlusParseResume.asmx?WSDL'
self.secret = 'my-secret-key' # Enter key here
self.encoding = 'base64'
self.simple = simple
self.client = ParserClient(wsdl=self.wsdl, timeout=timeout)
self.names = []
def get_file_content(self, file_path):
""" Return the encoded content for the given file.
"""
file_obj = open(os.path.abspath(file_path), 'r')
content = file_obj.read().encode(self.encoding)
file_obj.close()
return content
def get_names(self, path):
"""
Given a path to a folder that contains resume files this method
will parse the resumes and will return the names of the candidates
as a list.
"""
opt = os.path
resumes = [opt.join(path, r) for r in os.listdir(path)
if opt.isfile(opt.join(path, r))]
# Parse information for each resume.
for resume in resumes:
try:
xml_data = self.get_xml(resume)
name = self.get_name_from_xml(xml_data)
if name:
self.names.append(name)
except Exception, err:
# print name
print 'Error parsing resume: %s' % str(err)
return list(set(self.names))
def get_name_from_xml(self, data):
""" Returns the full name from the xml data given.
"""
xmldata = minidom.parseString(data)
name = xmldata.getElementsByTagName('CANDIDATE_FULL_NAME')
name = name[0].childNodes[0].data.title()
return name
def get_xml(self, filepath):
""" Fetches and returns the xml for the given file from the api.
"""
filename = os.path.basename(filepath)
extension = os.path.splitext(filepath)[1]
base64 = self.get_file_content(filepath)
filedata = {
'B64FileZippedContent': base64,
'FileName': filename,
'InputType': extension,
'UserID': 1,
'secretKey': self.secret,
}
get = \
(self.client.GetSimpleXML if self.simple else self.client.getHRXML)
get(**filedata)
return self.process_raw_xml()
def process_raw_xml(self, data=None):
""" Processes and returns the clean XML.
"""
raw = (data if data else self.client.result)
parsed = minidom.parseString(raw)
result = parsed.getElementsByTagName('GetSimpleXMLResult')[0]
text_node = result.childNodes[0]
data = text_node.data.encode('UTF-8')
return data
Upon running the code I am getting an error
TypeError: wsdl_call_get_params() got an unexpected keyword argument 'secretKey'
What am I doing wrong?
It looks like you are incorrectly overriding wsdl_call.
Firstly, we can see that SoapClient (which you extend in ParserClient), has a __getattr__ function that fetches pseudo-attributes of the SoapClient.
def __getattr__(self, attr):
"Return a pseudo-method that can be called"
if not self.services: # not using WSDL?
return lambda self=self, *args, **kwargs: self.call(attr,*args,**kwargs)
else: # using WSDL:
return lambda *args, **kwargs: self.wsdl_call(attr,*args,**kwargs)
You can see that this function is using wsdl_call to help it map functions to unknown attributes.
The specific pseudo-method that is causing the problem is in your code (or appears to be):
filedata = {
'B64FileZippedContent': base64,
'FileName': filename,
'InputType': extension,
'UserID': 1,
'secretKey': self.secret, # <-- the secretKey key word argument
}
get = \
(self.client.GetSimpleXML if self.simple else self.client.getHRXML)
get(**filedata)
# here client is an instance of your `ParserClient` (and `SoapClient`).
This above bit took me a while to track down. With a full stack trace I would have found it much quicker. Please always post stack traces (when there is one) in future when asking for help.
How to solve this
Provide a concrete implementation of GetSimpleXML and getHRXML. This will solve the immediate problem, but not the larger problem.
Rewrite wsdl_call
The rewritten section of code should check the value of the method argument and either do what you want, or delegate to the SoapClient implementation.
eg.
def wsdl_call(self, method, *args, **kwargs):
if method == "some_method":
return self._my_wsdl_call(method, *args, **kwargs)
else:
return super(ParserClient, self).wsdl_call(method, *args, **kwargs)
def _my_wsdl_call(self, method, *args, **kwargs):
...
How can I get specific word's definition from merriam-webster using python's script?
I have window with text box and button, and I want to print word's definition on the screen.
thanks
# Import libraries/ modules
import logging
logging.basicConfig(level = logging.INFO)
import json
import requests
# write custom definition
def connect_mw_dictionary(api_key, word):
logging.info("Connecting...")
URL = "https://www.dictionaryapi.com/api/v3/references/sd2/json/"+word+"?
key="+api_key
PARAMS = {'word': word,'key': api_key}
r = requests.get(url = URL, params = PARAMS)
r.encoding = 'utf-8'
if r.status_code == 200:
logging.info("Connection successful.")
return True, r.json()
logging.info("Connection failed.")
return False
def fetch_dictionary_result(res, lang, loc, audio_format, word):
dict_txt = str(res[0]['def'][0]['sseq'][0][0][1]['dt'][0][1]).replace("
{bc}","")
return dict_txt
api_key = "xxxxxxxxxxxx" # replace with you api key
word = "keyword" # replace with word to lookup
lang = "en"
loc = "us"
audio_format = "mp3"
status, result = connect_mw_dictionary(api_key, word)
if status == True:
status, dictionary_result = fetch_dictionary_result(result, lang, loc,
audio_format, word)
dictionary_result
I am assuming merriam-webster is a website. Check if they an API. If so you can use it to achieve your task. If they do not have an API, I don't see how you can achieve your task without some highly advanced hacking, crawling algorithm. My suggestion is, as it appears you are trying to develop a dictionary type app, research dictionary websites that have open APIs.
I have the following code in Python:
def post(self):
example = self.request.get("example")
other = self.request.get("other")
How do I get all post data if unknown? I'm very new to Python but something along the lines of:
def post(self):
array = self.request.get()
myJSON = MagicArrayToJson(array)
self.response.headers['Content-Type'] = 'application/json'
self.response.write(myJSON)
It may depend on the framework you use. But i suppose that there all have pretty same notation like self.request.data or self.request.body for post request.
Try self.request.data like this
def post(self):
data = self.request.data
# If data is in querystring, convert it to dict
# urlparse lib is convenient for this
myJSON = MagicArrayToJson(array)
self.response.headers['Content-Type'] = 'application/json'
self.response.write(myJSON)
I'm attempting to implement dynamic routing for a web framework. At the moment, the goal is to pass arguments into a function by way of the url. So, if user offers a url of "/page/23", then the route function will extract the "23" which will then be used as a parameter for the page function. I am getting a "keyerror", however.
import re
routing_table = {}
url = "/page/23"
def route(url, func):
key = url
key = re.findall(r"(.+?)/<[a-zA-Z_][a-zA-Z0-9_]*>", url)
if key:
params = re.findall(r"<([a-zA-Z_][a-zA-Z0-9_]*)>", url)
routing_table[key[0]] = [params, func]
else:
routing_table[url] = func
def find_path(url):
if url in routing_table:
return routing_table[url]
else:
return None
def page(page_id):
return "this is page %d" % page_id
route("/page/<page_id>", page)
print(routing_table[url])
When you called route, you used a url equal to "/page/<page_id>", but in the last line, url is a global variable equal to "/page/23".
It looks like there are other problems: replace your last line with
print(routing_table)
to see what you're doing.
everyone : )
I have A database that can search like "http://key.xxxxxx.org:10093/?q="
when type the keyword in url (behind the =mark), it can show lots of json data.
Now i write a simple webapp on GAE use webapp2 : on the /search page, when i type the keyword,
it can parse the jsondata content on the /result page. Here is my part of code :
# -*- coding: utf-8 -*-
import os
import re
import httplib
import json
import urllib
from string import letters
import webapp2
import jinja2
template_dir = os.path.join(os.path.dirname(__file__), 'templates')
jinja_env = jinja2.Environment(loader = jinja2.FileSystemLoader(template_dir),
autoescape = True)
def render_str(template, **params):
t = jinja_env.get_template(template)
return t.render(params)
def renderJson(query):
conn = httplib.HTTPConnection('xxx.xxxxx.org:10093')
conn.request('GET', '/?q=%s' % urllib.quote_plus(query))
res = conn.getresponse()
data = res.read()
j = json.loads(data)
results = []
result = j['data']['group'][0]['resultitem']
for l in result:
for k,v in l.items():
if k == 'word':
results.append(l[k])
return results
class BaseHandler(webapp2.RequestHandler):
def render(self, template, **kw):
self.response.out.write(render_str(template, **kw))
def write(self, *a, **kw):
self.response.out.write(*a, **kw)
class Search(BaseHandler):
def get(self):
self.render("search.html")
def post(self):
q = self.request.get('query')
if q:
self.redirect('/result/?q='+query)
class Result(BaseHandler):
def get(self):
q = self.request.get('query')
self.render('result.html', query = q, result = renderJson(q))
app = webapp2.WSGIApplication([
('/search', Search),
('/result', Result)
],
debug=True)
in the search.html, i wrote
<input type="text" name="q" value="{{query}}"></input>
and in result.html
<p>{{result}}</p>
For getting the parameter from the url you need to use:
q = self.request.get('query')
See the Getting Started guide and the webapp improved documentation.
Then you would pass that parameter to renderJson as renderJson(q), also in renderJson
the second line will need to quote the query string first (and pass the q argument)
conn.request('GET', '/?q=%s' % urllib.quote(query))
Finally for rendering the final output please read the Getting Started Guide.