Add key-value in JSON when calling an API - python

I am calling an API to get a list of properties. I obtain the jsons in the 'listings' file (https://api.nestoria.es/show_example?name=search_listings_es&syntax=1).
I am using zip codes to find each property and I need to add to each property its zip code. At the end I transform the json to a csv. I don't know how to add to each property the zip code (so I would need a new key-value to each result from the API call)...
Thanks!!!!!!!!!
Here it is the code:
from requests import get
import json
import pandas as pd
import time
import datetime
import csv
def get_nestoria(type):
#call the api
api = 'http://api.nestoria.es/api?action=search_listings'
place = '&place_name=' + area_name
listing_type = '&listing_type=' + type
json_es = '&encoding=json&pretty=1&country=es'
page = '&page='
api_input = api + place + listing_type + json_es
response = get(api_input)
# Check if the API has worked
if response.status_code == 200:
print("API called successfully")
elif response.status_code == 400:
print("Wrong request for" + area_name + ". Chechk this area is searchable")
elif response.status_code == 403:
print("Forbidden API call. Maximum number of calls reached.")
else:
print("Wrong code", response.status_code)
content_as_string = response.content.decode()
# Decode JSON
content = json.loads(content_as_string)
content_response = content['response']
# Number of total web pages neded for the area
web_pages = content_response['total_pages']
print('Number of pages in that area: ', web_pages)
print("Numer of total properties " + area_name, content_response['total_results'])
#2nd call to the API
homes = pd.DataFrame()
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 6.0; WOW64; rv:24.0) Gecko/20100101 Firefox/24.0'}
for i in range(1, web_pages+1):
api_input = api + place + listing_type + json_es + page + str(i)
response = get(api_input, headers=headers)
content_as_string = response.content.decode()
content = json.loads(content_as_string)
content_response = content['response']
listings = content_response['listings']
listings = pd.DataFrame(listings)
if i==1:
homes = listings
else:
homes = homes.append(listings, sort=True)
time.sleep(3)
if homes.empty:
homes = homes
else:
homes = homes[['bathroom_number','bedroom_number','car_spaces','commission','construction_year','datasource_name', 'img_height','img_url','img_width', 'floor',
'keywords','latitude','lister_url','listing_type','location_accuracy','longitude','price','price_currency','price_formatted','price_high','price_low',
'property_type','room_number','size','size_type','size_unit','summary','thumb_height','thumb_url','thumb_width','title','updated_in_days','updated_in_days_formatted']]
return homes
homes = pd.DataFrame()
codigos_postales = ['01008']
today=datetime.date.today() #to change the name of the file
for i in codigos_postales:
area_name = i
temp = get_nestoria('buy')
if i == 0:
homes =temp
else:
homes = homes.append(temp,sort=True)
print('Number of extracted properties ', len(homes))
print(homes.head())
homes.to_csv('D:\\a000Master Big Data\\Prácticas\\Web scrapping\\Nestoria\\GranadaVenta'+str(today)+'.csv')

data = response.json()
Here data is the parsed response you get from sending the request.
You can then update the listings like this:
for i in data['listings']:
i['ZipCode'] = zipcode
zipcode being the one you want to assign i.e the one you sent in the request.
You can either convert the data object into dataframe and then call pd.to_csv or use python in-built csv.writer
If you don't have the ZipCode you can use the google maps API to get that
http://maps.googleapis.com/maps/api/geocode/json?address=valencia&sensor=true_or_false&key=YOUR_API_KEY
You will have to Sign Up to get your API key and then you will get zip code from the JSON response.
UPDATE:
Here is an example on how to use it.
api = 'http://api.nestoria.es/api?action=search_listings'
place = '&place_name=' + area_name
listing_type = '&listing_type=' + type
json_es = '&encoding=json&pretty=1&country=es'
page = '&page='
api_input = api + place + listing_type + json_es
response = get(api_input)
update = response.json()['response']['listings']
for i in update:
i['Zipcode'] = zipcode

Related

Python | NameError: name 'null' is not defined

i have a json response as a string inside a json list
as you in the picture
enter image description here
i trying to get the value inside the string i tired to use eval()
but output shows me this error NameError: name 'null' is not defined
i can't read the json values when they are a string
enter image description here
this is my code :
url = "https://api.pipedream.com/v1/sources/code/event_summaries?
expand=event"
headers = {"Authorization": "Bearer hash "}
response = requests.get(url, headers=headers)
data = response.text
datas = json.loads(data)
darts = datas['data']
for i in darts:
trake = i['event']['body']
for docz in trake:
open_time = open_time = docz['open_time']
print(open_time)
enter image description here
the problem is the json values are string i cannot detect values
By the way the Bearer Authorization is just a demo
The data you needed is inside a dict key. So, you need to use .keys() attribute to retrieve it and then you have to use json.loads() to convert it to a dictionary.
Please check the below code:
import requests
import http.client
import json
from ast import literal_eval as evall
url = "https://api.pipedream.com/v1/sources/code/event_summaries?expand=event"
headers = {"Authorization": "Bearer hash"}
response = requests.get(url, headers=headers)
data = response.text
datas = json.loads(data)
darts = datas['data']
for i in darts:
trake = i['event']['body']
for docz in trake:
print(docz)
for tracks in darts:
tracks = json.loads(list(tracks['event']['body'].keys())[0])
print(tracks)
open_time = tracks['event']['trade'].get('open_time', '')
close_time = tracks['event']['trade'].get('close_time', '')
Lots = tracks['event']['trade'].get('lots', '')
balance = tracks['event']['account'].get('balance', '')
symbol = tracks['event']['trade'].get('symbol', '')
profit = tracks['event']['trade'].get('profit', '')
total_profit = tracks['event']['trade'].get('total_profit', '')
msg = """
Open time : """ +open_time + """
Close time : """ +close_time + """
Symbol : """ +symbol + """
lots : """ +Lots + """
Balance : """ +balance + """
"""
print(msg)
print("success")

Using a python web crawler to scrape twitter accounts

I'm writing this program for my A-Level Computer Science coursework, and I am trying to get a crawler to scrape all the found users from a given users following/followed list.
The start of the script is as followed:
import requests
# import database as db
from bs4 import BeautifulSoup
debug = True
def getStartNode(): # Get the Twitter profile of the starting node
global startNodeFollowing # Declare the nodes vars as global for use in external functions
global startNodeFollowers
global startNodeLink
if not debug: # If debugging == False, allow the user to enter any starting node Twitter profile
startNodeLink = input("Enter a link to the starting users Twitter profile\n[URL]: ")[:-1] # Get profile link, remove the last char from input (space char, needed to enter link in terminal)
else: # If debugging == True, have predetermined starting node to save time during development
startNodeLink = ("https://twitter.com/ckjellberg03")
startNodeFollowers = (startNodeLink + "/followers") # Create a new var using the starting node's Twitter profile, append for followers and following URL pages
startNodeFollowing = (startNodeLink + "/following")
And the crawler is here:
def spider(): # Web Crawler
getStartNode()
print("\nUsing:", startNodeLink)
urlFollowers = startNodeFollowers
sourceCode = requests.get(urlFollowers)
plainText = sourceCode.text # Source code of the URL (urlFollowers) in plain text format
soup = BeautifulSoup(plainText,'lxml') # BeautifulSoup object to search through plainText for specific items/classes etc
for link in soup.findAll('a', {'class': 'css-4rbku5 css-18t94o4 css-1dbjc4n r-1loqt21 r-1wbh5a2 r-dnmrzs r-1ny4l3l'}): # 'a' is a link in HTML (anchor), class is the Twitter class for a profile
href = link.get(href)
print(href) # Display everything found (development purposes)
I'm pretty sure the class identifier for a users link to their Twitter profile from a /followers is "css-4rbku5 css-18t94o4 css-1dbjc4n r-1loqt21 r-1wbh5a2 r-dnmrzs r-1ny4l3l" from looking at source code, but printing results displays nothing.
Any advice to point me in the right direction?
Thanks!
It's pretty difficult to scrape Twitter (trust me I have try every way), you can use Twitter API but they have limitation (you can't have the name of the followers only the number) if you want to scrape some information with Twitter API you can use this code:
from TwitterAPI import TwitterAPI, TwitterPager
import tweepy
from tweepy import Cursor
from datetime import datetime, date, time, timedelta
consumer_key = 'consumer key'
consumer_secret = 'consumer secret'
token = 'token'
token_secret = 'token secret'
auth= tweepy.OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(token, token_secret)
api = tweepy.API(auth)
account_list = ['POTUS44']
for target in account_list:
print("Getting data for " + target)
item = api.get_user(target)
print("name: " + item.name)
print("screen_name: " + item.screen_name)
print("description: " + item.description)
print("statuses_count: " + str(item.statuses_count))
print("friends_count: " + str(item.friends_count))
print("followers_count: " + str(item.followers_count))
tweets = item.statuses_count
account_created_date = item.created_at
delta = datetime.utcnow() - account_created_date
account_age_days = delta.days
print("Account age (in days): " + str(account_age_days))
if account_age_days > 0:
print("Average tweets per day: " + "%.2f"%(float(tweets)/float(account_age_days)))
tweets = item.statuses_count
account_created_date = item.created_at
delta = datetime.utcnow() - account_created_date
account_age_days = delta.days
print("Account age (in days): " + str(account_age_days))
if account_age_days > 0:
print("Average tweets per day: " + "%.2f"%(float(tweets)/float(account_age_days)))
hashtags = []
mentions = []
tweet_count = 0
end_date = datetime.utcnow() - timedelta(days=30)
for status in Cursor(api.user_timeline, id=target).items():
tweet_count += 1
if hasattr(status, "entities"):
entities = status.entities
if "hashtags" in entities:
for ent in entities["hashtags"]:
if ent is not None:
if "text" in ent:
hashtag = ent["text"]
if hashtag is not None:
hashtags.append(hashtag)
if "user_mentions" in entities:
for ent in entities["user_mentions"]:
if ent is not None:
if "screen_name" in ent:
name = ent["screen_name"]
if name is not None:
mentions.append(name)
if status.created_at < end_date:
break
Here is how to do it without API. Some difficulties stem from using the right
browser in User-Agent,
import re, requests
headers = { 'User-Agent': 'UCWEB/2.0 (compatible; Googlebot/2.1; +google.com/bot.html)'}
def cleanhtml(raw_html):
cleanr = re.compile('<.*?>')
cleantext = re.sub(cleanr, '', raw_html)
return cleantext
content = ""
for user in ['billgates']:
content += "============================\n\n"
content += user + "\n\n"
content += "============================\n\n"
url_twitter = 'https://twitter.com/%s' % user
resp = requests.get(url_twitter, headers=headers) # Send request
res = re.findall(r'<p class="TweetTextSize.*?tweet-text.*?>(.*?)</p>',resp.text)
for x in res:
x = cleanhtml(x)
x = x.replace("'","'")
x = x.replace('"','"')
x = x.replace(" "," ")
content += x
content += "\n\n"
content += "---"
content += "\n\n"

Max retries exceeded : Geocode Multiple Addresses and Write to excel

i am trying to geocode a list of addresses and append it to an excel file using HERE API but i am encountering an error max retries exceeded .
Here is my current code:
def geocode(location):
# api-endpoint
URL = "https://geocoder.ls.hereapi.com/6.2/geocode.json"
# defining a params dict for the parameters to be sent to the API
PARAMS = {'searchtext':location, 'gen':9,'apiKey':"s"}
# sending get request and saving the response as response object
r = requests.get(url = URL, params = PARAMS)
if response.status_code == 200:
# extracting data in json format
out = r.json()
try:
area = out['Response']['View'][0]['Result'][0]['Location']['Address']['County']
city = out['Response']['View'][0]['Result'][0]['Location']['Address']['City']
zip = out['Response']['View'][0]['Result'][0]['Location']['Address']['PostalCode']
except IndexError :
area = ""
city = ""
zip = ""
return out,area,city,zip
Here is my loop :
# Loop thru addresses
i = 1
for primary in primary_address:
i = i + 1
out,area,city,zip = geocode(primary)
sheet.cell(row=i, column=first_empty_col).value = area
sheet.cell(row=i, column=first_empty_col + 1).value = city
sheet.cell(row=i, column=first_empty_col + 2).value = zip
book.save('file.xlsx')
How do I do this the most efficient way?
For efficiently Geocoding a larger number of locations, you should take a look at the Batch Geocoder API.

Python TypeError on executing weather service code

I am using a weather API to design a slack bot service using python.
My source code is-
import requests
import re
import json
from bs4 import BeautifulSoup
def weather(cityname):
cityid = extractid(cityname)
url = "http://api.openweathermap.org/data/2.5/forecast?id=" + str(cityid) + "&APPID=c72f730d08a4ea1d121c8e25da7e4411"
while True:
r = requests.get(url, timeout=5)
while r.status_code is not requests.codes.ok:
r = requests.get(url, timeout=5)
soup = BeautifulSoup(r.text)
data = ("City: " + soup.city["name"] + ", Country: " + soup.country.text + "\nTemperature: " + soup.temperature["value"] +
" Celsius\nWind: " + soup.speed["name"] + ", Direction: " + soup.direction["name"] + "\n\n" + soup.weather["value"])
# print data
return data
def extractid(cname):
with open('/home/sourav/Git-Github/fabulous/fabulous/services/city.list.json') as data_file:
data = json.load(data_file)
for item in data:
if item["name"] == cname:
return item["id"]
def on_message(msg, server):
text = msg.get("text", "")
match = re.findall(r"~weather (.*)", text)
if not match:
return
searchterm = match[0]
return weather(searchterm.encode("utf8"))
on_bot_message = on_message
But executing the code gives the following error-
File "/usr/local/lib/python2.7/dist-packages/fabulous-0.0.1-py2.7.egg/fabulous/services/weather.py", line 19, in weather
" Celsius\nWind: " + soup.speed["name"] + ", Direction: " + soup.direction["name"] + "\n\n" + soup.weather["value"])
TypeError: 'NoneType' object has no attribute '__getitem__'
I can't figure out what's the error. Please help!
__getitem__ is called when you ask for dictionary key like a['abc'] translates to a.__getitem__('abc')
so in this case one attribute of soup is None (speed, direction or weather)
ensure that your r.text contains data you want, simply print it:
print(r.text)
list structure in parsed data:
for child in soup.findChildren():
print child
always assume your entry data might be wrong, instead doing soup.city do soup.find('city'), it might be empty so:
city = soup.find('city')
if len(city):
city_name = city[0]['name']
else:
city_name = 'Error' # or empty, or sth

Search image on Google images with the new Custom Search API?

So, I am testing this piece of code :
import requests
import json
searchTerm = 'parrot'
startIndex = '0'
searchUrl = "http://ajax.googleapis.com/ajax/services/search/images?v=1.0&q=" + \
searchTerm + "&start=" + startIndex
r = requests.get(searchUrl)
response = r.content.decode('utf-8')
result = json.loads(response)
print(r)
print(result)
And the response is :
<Response [200]>
{'responseData': None, 'responseStatus': 403, 'responseDetails': 'This API is no longer available.'}
Seems that I am trying to use the old API and it is deprecated now. When I check on the Google Custom Search API I don't see any way to search straight on google images, is this even possible with the new API ?
It is possible, here is new API reference:
https://developers.google.com/custom-search/json-api/v1/reference/cse/list
import requests
import json
searchTerm = 'parrot'
startIndex = '1'
key = ' Your API key here. '
cx = ' Your CSE ID:USER here. '
searchUrl = "https://www.googleapis.com/customsearch/v1?q=" + \
searchTerm + "&start=" + startIndex + "&key=" + key + "&cx=" + cx + \
"&searchType=image"
r = requests.get(searchUrl)
response = r.content.decode('utf-8')
result = json.loads(response)
print(searchUrl)
print(r)
print(result)
That works fine, I just tried.

Categories