Using Python to search string where a number iterates - python

I'm trying to write a script that will search a string in google, loop and iterate the number in the string, and print the top links. I have this:
import urllib.parse
import urllib.request
import json as m_json
for x in range(3, 5):
query = '"Amazon Best Sellers Rank: #' + str(x) + ' in Kitchen & Dining": Amazon.com'
query = urllib.parse.urlencode ( { 'q' : query } )
response = urllib.request.urlopen ( 'http://ajax.googleapis.com/ajax/services/search/web?v=1.0&' + query ).read().decode()
json = m_json.loads ( response )
results = json [ 'responseData' ] [ 'results' ]
for result in results:
title = result['title']
url = result['url'] # was URL in the original and that threw a name error exception
print ( title + '; ' + url )
I'm getting this error:
"TypeError: 'NoneType' object is not subscriptable" on line 10, results = ...

Same question Was posted by you beforetwo months and now again you are posting that Link to your question.
And by the way the answer to your question is also has been provided already on stackoverflow.
But again I am posting the code for you .
And using your code only I am getting the desired result in Python 2.7 .
import urllib
import json as m_json
for x in range(3, 5):
query = 'x mile run'
query = urllib.urlencode ( { 'q' : query } )
response = urllib.urlopen ( 'http://ajax.googleapis.com/ajax/services/search/web?v=1.0&' + query ).read()
json = m_json.loads ( response )
results = json [ 'responseData' ] [ 'results' ]
for result in results:
title = result['title']
url = result['url']
print ( title + '; ' + url )

Related

Error 400 to limit number in Openweathermap API

I'm trying to use Openweathermap and I'm using their docs to get the calls.
I don't know why but I get always this error:
{'cod': '400', 'message': '{limit} is not a number'}
This is my code:
import requests
import json
API_KEY = "49edcdeb737a08b5371c42f85fb4ce3d"
weather_url = "http://api.openweathermap.org/geo/1.0/direct?q={city_name},{country_code}&limit={limit}&appid="
final_url = weather_url + API_KEY
limit = "1"
city_name = "Brindisi"
country_code = "3166"
weather_data = requests.get(final_url).json()
print(weather_data)
You are not replacing the query parameters with actual values. q={city_name},{country_code}&limit={limit} is hard coded in the url and is invalid.
You can use the F-string in python to replace placeholder value with actual value.
limit = "1"
city_name = "Brindisi"
country_code = "3166"
weather_url = f"http://api.openweathermap.org/geo/1.0/direct?q={city_name},{country_code}&limit={limit}&appid="
final_url = weather_url + API_KEY

Search on Splunk via Python SDK

I'm trying to run simple search via Python SDK (Python 3.8.5, splunk-sdk 1.6.14). Examples that are presented on dev.splunk.com are clear but something goes wrong when I run search with my own parameters
The code is as simple as this
search_kwargs_params = {
"exec_mode": "blocking",
"earliest_time": "2020-09-04T06:57:00.000-00:00",
"latest_time": "2020-11-08T07:00:00.000-00:00",
}
search_query = 'search index=qwe1 trace=111-aaa-222 action=Event.OpenCase'
job = self.service.jobs.create(search_query, **search_kwargs_params)
for result in results.ResultsReader(job.results()):
print(result)
But search returns no results. When I run same query manually in Splunk web GUI it works fine.
I've also tried to put all parameters in 'search_kwargs_params' dictionary, widened search time period and got some search results but they seem to be inappropriate to what I got in GUI.
Can someone advise?
This worked for me. You may also try this:
import requests
import time
import json
scheme = 'https'
host = '<your host>'
username = '<your username>'
password = '<your password>'
unique_id = '2021-03-22T18-43-00' #You may give any unique identifier here
search_query = 'search <your splunk query>'
post_data = { 'id' : unique_id,
'search' : search_query,
'earliest_time' : '1',
'latest_time' : 'now',
}
#'earliest_time' : '1', 'latest_time' : 'now'
#This will run the search query for all time
splunk_search_base_url = scheme + '://' + host +
'/servicesNS/{}/search/search/jobs'.format(username)
resp = requests.post(splunk_search_base_url, data = post_data, verify = False, auth =
(username, password))
print(resp.text)
is_job_completed = ''
while(is_job_completed != 'DONE'):
time.sleep(5)
get_data = {'output_mode' : 'json'}
job_status_base_url = scheme + '://' + host +
'/servicesNS/{}/search/search/jobs/{}'.format(username, unique_id)
resp_job_status = requests.post(job_status_base_url, data = get_data, verify =
False, auth = (username, password))
resp_job_status_data = resp_job_status.json()
is_job_completed = resp_job_status_data['entry'][0]['content']['dispatchState']
print("Current job status is {}".format(is_job_completed))
splunk_summary_base_url = scheme + '://' + host +
'/servicesNS/{}/search/search/jobs/{}/results?count=0'.format(username, unique_id)
splunk_summary_results = requests.get(splunk_summary_base_url, data = get_data, verify
= False, auth = (username, password))
splunk_summary_data = splunk_summary_results.json()
#Print the results in python format (strings will be in single quotes)
for data in splunk_summary_data['results']:
print(data)
print('status code...')
print(splunk_summary_results.status_code)
print('raise for status...')
print(splunk_summary_results.raise_for_status())
print('Results as JSON : ')
#Print the results in valid JSON format (Strings will be in double quotes)
#To get complete json data:
print(json.dumps(splunk_summary_data))
#To get only the relevant json data:
print(json.dumps(splunk_summary_data['results']))
Cheers!
You may also like to have a look at this very handy tutorial. https://www.youtube.com/watch?v=mmTzzp2ldgU

How to return unique values from Google Places API?

I'm making a request from Google Places API with a query "Hotels in Brazil", with the following code:
# url variable store url
url = "https://maps.googleapis.com/maps/api/place/textsearch/json?"
# The text string on which to search
query = input('Search query: ')
# get method of requests module
# return response object
r = requests.get(url + 'query=' + query +
'&key=' + api_key +
'&maxResults=1000')
# json method of response object convert
x = r.json()
# store the value of result key in variable y
y = x['results']
#Getting information from other pages with "next_page_token" param
places=[]
params = {
'query': query,
'key' : api_key,
'maxResults': '1000'
}
while "next_page_token" in x:
params['pageToken'] = x['next_page_token'],
res = requests.get(url, params = params)
z = json.loads(res.content)
places.extend(z['results'])
print(places)
But it's taking too long to return the results ( more than 1 day). So i stopped the execution and when i printed it, i got many repeated names, for an example, i got 16K places but only 26 unique places.
Is it possible to return unique values from Google Places API?

how to convert compiled url in normail form using python

I just used gogole api to search via python and used this script below
import urllib
import json as m_json
query = raw_input ( 'Query: ' )
query = urllib.urlencode ( { 'q' : query } )
response = urllib.urlopen ( 'http://ajax.googleapis.com/ajax/services/search/web?v=1.0&' + query ).read()
json = m_json.loads ( response )
results = json [ 'responseData' ] [ 'results' ]
for result in results:
url = result['url'] # was URL in the original and that threw a name error exception
print ( url )
and after that I got result below :
Query: inurl:"readnews.php?id="
http://www.idmantv.az/readnews.php%3Fid%3D14999
http://www.kanda.com/readnews.php%3Fid%3D9
http://www.dcfever.com/news/readnews.php%3Fid%3D12573
http://www.thegrower.org/readnews.php%3Fid%3D6c0p5n0e8i6b
but I want this url in normal form like
http://www.idmantv.az/readnews.php?id=14999
How to do that with python?
Use urllib.unquote or urllib.unquote_plus to decode %-encoded string:
>>> urllib.unquote('http://www.idmantv.az/readnews.php%3Fid%3D14999')
'http://www.idmantv.az/readnews.php?id=14999'

Python web scraping with requests - Got only a small part of data in the response

I'm trying to get some financial data from this url:
http://www.casablanca-bourse.com/bourseweb/en/Negociation-History.aspx?Cat=24&IdLink=225
My code work only for a very small date interval (less than 19 days) but in the web site we are allowed to get 3 years of data!.
My code is as follow:
import requests
import string
import csv
from bs4 import BeautifulSoup
# a simple helper function
def formatIt(s) :
output = ''
for i in s :
if i in string.printable :
output += i
return output
# default url
uri = "http://www.casablanca-bourse.com/bourseweb/en/Negociation-History.aspx?Cat=24&IdLink=225"
def get_viewState_and_symVal (symbolName, session) :
#session = requests.Session()
r = session.get(uri)
soup = BeautifulSoup(r.content) #soup = BeautifulSoup(r.text)
# let's get the viewstate value
viewstate_val = soup.find('input', attrs = {"id" : "__VIEWSTATE"})['value']
# let's get the symbol value
selectSymb = soup.find('select', attrs = {"name" : "HistoriqueNegociation1$HistValeur1$DDValeur"})
for i in selectSymb.find_all('option') :
if i.text == symbolName :
symbol_val = i['value']
# simple sanity check before return !
try :
symbol_val
except :
raise NameError ("Symbol Name not found !!!")
else :
return (viewstate_val, symbol_val)
def MainFun (symbolName, dateFrom, dateTo) :
session = requests.Session()
request1 = get_viewState_and_symVal (symbolName, session)
viewstate = request1[0]
symbol = request1[1]
payload = {
'TopControl1$ScriptManager1' : r'HistoriqueNegociation1$UpdatePanel1|HistoriqueNegociation1$HistValeur1$Image1',
'__VIEWSTATE' : viewstate,
'HistoriqueNegociation1$HistValeur1$DDValeur' : symbol,
'HistoriqueNegociation1$HistValeur1$historique' : r'RBSearchDate',
'HistoriqueNegociation1$HistValeur1$DateTimeControl1$TBCalendar' : dateFrom,
'HistoriqueNegociation1$HistValeur1$DateTimeControl2$TBCalendar' : dateTo,
'HistoriqueNegociation1$HistValeur1$DDuree' : r'6',
'hiddenInputToUpdateATBuffer_CommonToolkitScripts' : r'1',
'HistoriqueNegociation1$HistValeur1$Image1.x' : r'27',
'HistoriqueNegociation1$HistValeur1$Image1.y' : r'8'
}
request2 = session.post(uri, data = payload)
soup2 = BeautifulSoup(request2.content)
ops = soup2.find_all('table', id = "arial11bleu")
for i in ops :
try :
i['class']
except :
rslt = i
break
output = []
for i in rslt.find_all('tr')[1:] :
temp = []
for j in i.find_all('td') :
sani = j.text.strip()
if not sani in string.whitespace :
temp.append(formatIt(sani))
if len(temp) > 0 :
output.append(temp)
with open("output.csv", "wb") as f :
writer = csv.writer(f, delimiter = ';')
writer.writerows(output)
return writer
# working example
MainFun ("ATLANTA", "1/1/2014", "30/01/2014")
# not working example
MainFun ("ATLANTA", "1/1/2014", "30/03/2014")
It may be that the site automatically detects scrapers and blocks you. Try adding a small sleep statement somewhere to give their server some time to breathe. This is generally a polite thing to do while scraping anyway.
from time import sleep
sleep(1) # pauses 1 second
It seems like there is something wrong in my windows environment. The code works fine in a debian based virtual machine and under a python virtualenv.

Categories