Save Scraped Data as CSV file? - python

I am trying to scrape data from a link that contains JSON data and this is the code:
import requests
import json
parameters = ['a:1','a:2','a:3','a:4','a:3','a:4','a:5','a:6','a:7','a:8','a:9','a:10',]
for item in parameters:
key, value = item.split(':')[0], item.split(':')[1]
url = "https://xxxx.000webhostapp.com/getNamesEnc02Motasel2.php?keyword=%s&type=2&limit=%s" %(key, value)
r = requests.get(url)
cont = json.loads(r.content)
print(cont)
And the output be like
[{'name': 'Absz', 'phone': '66343212'}, {'name': 'ddd ', 'phone': '545432211'}, {'name': 'ezd' 'phone':'54856886'}]
I want to store all the data in a CSV file.
How can I do this?
Also, As you can see I am using parameters list to do multi requests but I think there is a way that I can loop the limit parameter from 1 to 200 without typing every single keyword and number in parameters.
Thanks in advance.

Try the below code it will create csv row wise:
import csv
import json
header = ["name","phone"]
for item in range(1,200):
key, value = 'a', item # Generating key and value from range 1 --> 200
url = "https://xxxx.000webhostapp.com/getNamesEnc02Motasel2.php?keyword=%s&type=2&limit=%s" %(key, value)
r = requests.get(url)
cont = json.loads(r.content)
print(cont)
with open('people.csv', 'a') as writeFile:
writer = csv.writer(writeFile)
writer.writerow(header)
for a_row in cont:
writer.writerow([a_row["name"],a_row["phone"]]) # To write name and phone
Hope this answers your question!!

import requests
import json
import pandas as pd
parameters = ['a:1','a:2','a:3','a:4','a:3','a:4','a:5','a:6','a:7','a:8','a:9','a:10']
results = pd.DataFrame()
for item in parameters:
key, value = item.split(':')
url = "https://xxxx.000webhostapp.com/getNamesEnc02Motasel2.php?keyword=%s&type=2&limit=%s" %(key, value)
r = requests.get(url)
cont = json.loads(r.content)
temp_df = pd.DataFrame(cont)
results = results.append(temp_df)
results.to_csv('path/to/filename.csv', index=False)

Related

for loop print only last value from dict in live market

try to print LTP data for more than one crypto in live market but printing only for one crypto.
import pandas as pd
import requests
import json
ltp_data= []
crypto = {"BTCUSDT", "LTCUSDT", "DOGEUSDT"}
def live_ltp():
for i in crypto:
key = "https://api.binance.com/api/v3/ticker/price?symbol="
url = key+i
response = requests.get(url)
Ltp = response.json()
ltp_data.append(Ltp)
return Ltp
while True:
print(str(live_ltp()))
return will exit your loop as soon as it is hit. If you bring your return statement outside of the loop, and have it return ltp_data (instead of the "LTP" json object) you should be able to get the items in the list you appear to be populating.
ltp_data= []
crypto = {"BTCUSDT", "LTCUSDT", "DOGEUSDT"}
def live_ltp():
for i in crypto:
key = "https://api.binance.com/api/v3/ticker/price?symbol="
url = key+i
response = requests.get(url)
Ltp = response.json()
ltp_data.append(Ltp)
return ltp_data
crypto_ltps = live_ltp()
print(crypto_ltps)
You have added the return statement at the end of loop because of which it's executing only one time and returning only 1 data.
Instead,
import pandas as pd
import requests
import json
ltp_data= []
crypto = {"BTCUSDT", "LTCUSDT", "DOGEUSDT"}
def live_ltp():
responses = []
for i in crypto:
key = "https://api.binance.com/api/v3/ticker/price?symbol="
url = key+i
response = requests.get(url)
Ltp = response.json()
ltp_data.append(Ltp)
responses.append(Ltp)
return responses
while True:
print(str(live_ltp()))
This will solve the problem.
Hope this helps you!!!
Please free to comment if you get any error in this and mark the answer as correct if it worked.
You have a return Ltp in the for loop so you will always just get a single response for the first item in the set of crypto id's. You could instead do return lpd_data after the loop ends. But that creates a new problem - since you are updating a global list, it will just keep growing and growing.
Instead, write your function to take input parameters and return a locally-generated list.
import pandas as pd
import requests
import json
def live_ltp(crypto_ids):
ltp_data = []
for i in crypto_ids:
key = "https://api.binance.com/api/v3/ticker/price?symbol="
url = key+i
response = requests.get(url)
Ltp = response.json()
ltp_data.append(Ltp)
return ltp_data
crypto = {"BTCUSDT", "LTCUSDT", "DOGEUSDT"}
while True:
print(str(live_ltp(crypto)))
solution with dataframe in place.
you will need to pass empty dataframe to function: live_ltp(df_frame).
I would also use .json_normalize to set table in place properly.
import pandas as pd
import requests
import json
ltp_data = pd.DataFrame() # empty dataframe (not list) which will be updated in the function below
crypto = {"BTCUSDT", "LTCUSDT", "DOGEUSDT"}
def live_ltp(df_frame):
for i in crypto:
key = "https://api.binance.com/api/v3/ticker/price?symbol="
url = key+i
response = requests.get(url)
Ltp = response.json()
ltp_df = pd.json_normalize(Ltp)
ltp_df['time'] = pd.Timestamp.now()
df_frame = pd.concat([df_frame, ltp_df], axis=0)
return df_frame
while True:
final_df = live_ltp(ltp_data) # passing empty dataframe to function
final_df.to_excel('test.xlsx', index=False)
print(final_df)

Writing to a CSV file with an API call

I'm a python beginner and I'm pulling data from this URL: https://api.openweathermap.org/data/2.5
I'm trying to write the data I get into a csv file but the fields are all over the place (see link to image below).
This is my code:
import requests
import csv
import json
API_KEY = 'redacted'
BASE_URL = 'https://api.openweathermap.org/data/2.5/weather'
city = input('Enter a city name: ')
request_url = f"{BASE_URL}?appid={API_KEY}&q={city}"
csvheaders = ['City', 'Description', 'Temp.']
response = requests.get(request_url)
if response.status_code == 200:
data = response.json()
city = data['name']
weather = data['weather'][0]['description']
temperature = round(data['main']['temp'] - 273.15, 2)
else:
print('Error')
with open('weather_api.csv', 'w', encoding='UTF8', newline='') as f:
writer = csv.writer(f)
writer.writerow(csvheaders)
writer.writerows([city, weather, temperature ])
print('done')
And the resultant csv output looks like this
Could someone tell me what I'm doing wrong and how I can get accurately pull data into the correct columns? That would be much appreciated.
If there is a much simpler way of doing this I'm all ears!

Plotly Takes to long to Render an output

I need a help please :)
Hi! I have a dataframe CSV file in which I have longitude column, latitude column, and sales.
I would love to visualize my data geographically.
First I have tried to do a scatters directly CSV and wrote the next.
px.set_mapbox_access_token("my token on mapbox")
fig = px.scatter_mapbox(df, lat="latitude", lon="longitude",color="sales",
color_continuous_scale=px.colors.cyclical.IceFire, size_max=20,zoom=12)
fig.show()
fig.write_html("example_map.html")
It has never opened.
Then I have tried to try without mapbox and converted CSV into Json file.
import csv
import json
from collections import OrderedDict
li = []
with open("Path to my file") as csvfile:
reader = csv.DictReader(csvfile)
for row in reader:
d = OrderedDict()
d['type'] = 'zipCode'
d['geometry'] = {
'type': 'Point',
'coordinates': [float(row['latitude']), float(row['longitude'])]
}
li.append(d)
d = OrderedDict()
d['type'] = 'FeatureCollection'
d['features'] = li
with open('output.json','w') as f:
json.dump(d,f,indent=2)
and next I have tried to plot it:
import csv
import json
from collections import OrderedDict
li = []
with open("C:\\Users\\Dell\\Desktop\\Intern 2021\\McGill\\full_dataset_csv.csv", newline='') as csvfile:
reader = csv.DictReader(csvfile)
for row in reader:
d = OrderedDict()
d['type'] = 'zipCode'
d['geometry'] = {
'type': 'Point',
'coordinates': [float(row['latitude']), float(row['longitude'])]
}
li.append(d)
d = OrderedDict()
d['type'] = 'FeatureCollection'
d['features'] = li
with open('output.json','w') as f:
json.dump(d,f,indent=2)
Again it took forever to show an output. I think I might have done something wrong in codding? Or I should think about an alternative to Plotly?

Input CSV file of lat and long coordinates into API to extract the weather data?

Here is my code below where I used long and lat coordinates in locations variable and attached it to the URL via coordinates_str. SInce I have CSV file which has latitude and longitude coordinates of around many locations and then call that CSV file as a input to this API(that needs authentication).
How do I input CSV file into this code instead of locations variable?
import requests
import pprint
locations = [(13.84, -12.57), (12.21, -14.69)]
coordinates_str = ','.join(map(lambda a: ' '.join(f'{f:.3f}' for f in a), locations))
# Replace "poi-settings" with the endpoint you would like to call.
URL = f'https://ubiconnect-eu.ubimet.com:8090/pinpoint-data?coordinates={coordinates_str}'
TOKEN = 'TOKEN KEY'
# Create session object that can be used for all requests.
session = requests.Session()
session.headers['Authorization'] = 'Token {token}'.format(token=TOKEN)
# Send GET request to UBIconnect.
res = session.get(URL)
res.raise_for_status()
# Decode JSON response.
poi_info = res.json()
pprint.pprint(poi_info, indent=2, compact=True)
Then I tried this way: in place of coordinates_str I did this
import requests
import pprint
import pandas as pd
df = pd.read_csv(r'E:\route_points.csv')
print(df)
# Replace "poi-settings" with the endpoint you would like to call.
URL = f'https://ubiconnect-eu.ubimet.com:8090/pinpoint-data?'
TOKEN = 'API TOKEN'
params= {'coordinates':(df)}
# Create session object that can be used for all requests.
session = requests.Session()
session.headers['Authorization'] = 'Token {token}'.format(token=TOKEN)
# Send GET request to UBIconnect.
res = session.get(URL, params= params)
res.raise_for_status()
# Decode JSON response.
poi_info = res.json()
pprint.pprint(poi_info, indent=2, compact=True)
Still not working.
Format needed to call the API from Documentation is:
# Replace "poi-settings" with the endpoint you would like to call.
URL = 'https://ubiconnect-eu.ubimet.com:8090/poi-settings'
TOKEN = '<YOUR TOKEN GOES HERE>'
so I replaced the poi-settings by pinpoint-data
URL = 'https://ubiconnect-eu.ubimet.com:8090/pinpoint-data?coordinates=longitude<space<latitude'
For Example: I put one coordinate set into API URL
URL = 'https://ubiconnect-eu.ubimet.com:8090/pinpoint-data?coordinates=132.85 12.84'
then with above URL I get the weather data for that location.
If you just want to submit a block of coordinates at a time from your CSV file then something like the following should suffice:
from itertools import islice
import requests
import pprint
import csv
def grouper(n, iterable):
it = iter(iterable)
return iter(lambda: tuple(islice(it, n)), ())
block_size = 10 # how many pairs to submit per request
TOKEN = 'TOKEN KEY'
# Create session object that can be used for all requests.
session = requests.Session()
session.headers['Authorization'] = 'Token {token}'.format(token=TOKEN)
with open('coordinates.csv', newline='') as f_input:
csv_input = csv.reader(f_input)
header = next(csv_input) # skip the header
for coords in grouper(block_size, csv_input):
coordinates = ','.join(f'{float(long):.3f} {float(lat):.3f}' for long, lat in coords)
print(coordinates)
URL = f'https://ubiconnect-eu.ubimet.com:8090/pinpoint-data?coordinates={coordinates}'
# Send GET request to UBIconnect.
res = session.get(URL)
res.raise_for_status()
# Decode JSON response.
poi_info = res.json()
pprint.pprint(poi_info, indent=2, compact=True)
(obviously this was not tested - no token). Make sure there are no blank lines in your CSV file.
To output to a file add an output file:
with open('coordinates.csv', newline='') as f_input, open('output.json', 'w', encoding='utf-8') as f_output:
and use this in the pprint() call:
pprint.pprint(poi_info, f_output, indent=2, compact=True)
f_output.write('\n') # add blank line if needed
Hope this is what you are looking for
import csv
locations = list()
with open("foo.csv") as csvf:
csvreader = csv.DictReader(csvf)
for row in csvreader:
locations.append((float(row["lat"]), float(row["long"])))
# now add your code
coordinates_str = ','.join(map(lambda a: ' '.join(f'{f:.3f}' for f in a), locations))

get all values for key in json list file python

I can't iterate over a JSON file to get all values for a key. I've tried multiple ways of writing this with many errors.
# Import package
from urllib.request import urlretrieve
# Import pandas
import pandas as pd
# Assign url of file: url
url = 'https://data.sfgov.org/resource/wwmu-gmzc.json'
# Save file locally
urlretrieve(url, 'wwmu-gmzc.json')
# Loading JSONs in Python
import json
with open('wwmu-gmzc.json', 'r') as json_file:
#json_data = json.load(json_file) # type list
json_data = json.load(json_file)[0] # turn into type dict
print(type(json_data))
# Print each key-value pair in json_data
#for k in json_data.keys():
# print(k + ': ', json_data[k])
for line in json_data['title']:
print(line)
#w_title = json_data['title']
#print(w_title)
for key, value in json_data.items():
print(key + ':', value)
#print(json_data.keys('title') + ':' , jason_data['title'])
The current version of this code only gives the first line of the file:
<class 'dict'> 1 8 0 release_year: 2011 actor_2: Nithya Menon writer: Umarji Anuradha, Jayendra, Aarthi Sriram, & Suba locations: Epic Roasthouse (399 Embarcadero) director: Jayendra title: 180 production_company: SPI Cinemas actor_1: Siddarth actor_3: Priya Anand
Corrected code below and accounts for missing keys:
# Loading JSONs in Python
import json
with open('wwmu-gmzc.json', 'r') as json_file:
content = json_file.read()
json_data = json.loads(content)
print(type(json_data))
for json_i in json_data:
try:
print(json_i['locations'])
except:
print('***** NO KEY FOUND *****')
You are loading only first data in the dataset.
with open('wwmu-gmzc.json', 'r') as json_file:
json_data = json.load(json_file) # Input is list of dict. So,load everything
for json_i in json_data:
print(json_i.get('your_key', 'default_value'))
Your code does not work because the data your are fetching is actually a list. To read each item in the list (each item is a key-value pair) you can do.
# Import package
from urllib.request import urlretrieve
import json
# Assign url of file: url
url = 'https://data.sfgov.org/resource/wwmu-gmzc.json'
# Save file locally
urlretrieve(url, 'wwmu-gmzc.json')
# Loading JSONs in Python
with open('wwmu-gmzc.json', 'r') as json_file:
content = json_file.read()
json_data = json.loads(content)
for item in json_data:
print('======')
for key, value in item.items():
print(key + ':', value)

Categories