Trying to create Python code that inputs json and then displays it - python

import json
import requests
def download_file(url):
r = requests.get(url)
filename = url.split('/')[-1]
with open(filename, 'wb') as f:
f.write(r.content)
api_url = 'https://api.fda.gov/download.json'
r = requests.get(api_url)
files = [file['file'] for file in json.loads(r.text)['results']['drug']['event']['partitions']]
count = 1
for file in files:
download_file(file)
print(f"{count}/{len(files)} downloaded!")
count += 1
This is the other code
import urllib.request, json
with urllib.request.urlopen("https://api.fda.gov/drug/label.json") as url:
data = json.loads(url.read().decode())
print(data)
The first code just downloads it. I wondering if theres a way to not have to download any of the 1000+ files and just display it, so the code can be used locally. While the second one prints the json in the terminal.

requests.get() and urllib.request.urlopen() both "download" the full response of the URL they are given.
If you do not want to "save" the file to disk, then remove the code that calls f.write()
More specifically,
import json
import requests
api_url = 'https://api.fda.gov/download.json'
r = requests.get(api_url)
files = [file['file'] for file in r.json()['results']['drug']['event']['partitions']]
total_files = len(files)
count = 0
for file in files:
print(requests.get(file).content)
print(f"{count+1}/{total_files} downloaded!")
count += 1

Related

Download images from URL Python

I have problem with my script when i try download images from web url. It works on other page (offex.pl) but in my shop images are not working.
i just have all files but i can't open Files
my code:
import os
import time
import requests
from termcolor import colored
def get_folder(url):
all_folders= os.path.dirname(url)
folder=os.path.basename(all_folders)
return folder
def filename(url):
file=url[url.rfind("/") + 1:]
return file
def download(link):
error = []
ok = 0
fail = 0
root_folder = get_folder(link)
path = "{}/{}".format("download", root_folder)
if not os.path.exists(path):
os.makedirs(path)
url = link
file = filename(link)
result = requests.get(url, stream=True)
completeName = os.path.join("download", root_folder, file)
print(completeName)
if result.status_code == 200:
image = result.raw.read()
open(completeName, "wb").write(image)
ok += 1
succes = "{} {} {}".format(ok, colored("Pobrano:", "green"), url)
print(succes)
time.sleep(1)
else:
found_error = "{} {}".format(colored("Brak pliku!:", "red"), url)
print(found_error)
fail += 1
error.append("ID:{} NUMBER:{} link: {}".format(id, url))
with open("log.txt", "w") as filehandle:
for listitem in error:
filehandle.write('%s\n' % listitem)
print(colored("Pobrano plików: ", "green"), ok)
print(colored("Błędy pobierania: ", "red"), fail)
img_url="https://sw19048.smartweb-static.com/upload_dir/shop/misutonida_ec-med-384-ix.jpg"
download(img_url)
What Im doing wrong?
for example (https://offex.pl/images/detailed/11/94102_jeep_sbhn-8h.jpg) download OK
but for my shop url https://sw19048.smartweb-static.com/upload_dir/shop/misutonida_ec-med-384-ix.jpg is not working.
If you want to use requests module,you can use this:
import requests
response = requests.get("https://sw19048.smartweb-static.com/upload_dir/shop/misutonida_ec-med-384-ix.jpg")
with open('./Image.jpg','wb') as f:
f.write(response.content)
The issue is with the URL which you are using to download. Its not an issue, but a difference from other URL you have mentioned.
Let me explain
The URL https://offex.pl/images/detailed/11/94102_jeep_sbhn-8h.jpg returns an image as response with out any compression.
On the other hand, the shop URL https://sw19048.smartweb-static.com/upload_dir/shop/misutonida_ec-med-384-ix.jpg returns the image with gzip compression enabled in the headers.
So the raw response you get is compressed with gzip compression. You can decompress the response with gzip, if you know the compression is always gzip like below
import gzip
import io
image = result.raw.read()
buffer = io.BytesIO(image)
deflatedContent = gzip.GzipFile(fileobj=buffer)
open("D:/sample.jpg", "wb").write(deflatedContent.read())
Or you can use alternative libraries like urllib2 or similar ones, which takes care of decompression. I was trying to explain why it failed for your URL , but not for other. Hope this makes sense.
try :
import urllib2
def download_web_image(url):
request = urllib2.Request(url)
img = urllib2.urlopen(request).read()
with open('test.jpg', 'wb') as f:
f.write(img)
download_web_image("https://sw19048.smartweb-static.com/upload_dir/shop/misutonida_ec-med-384-ix.jpg")
It is working for your URL. I think the issue is with the request response of the used library.
from io import BytesIO
import requests
from PIL import Image
fileRequest = requests.get("https://sw19048.smartweb-static.com/upload_dir/shop/misutonida_ec-med-384-ix.jpg")
doc = Image.open(BytesIO(fileRequest.content))
doc.save("newFile.jpg")

Flask: api call downloads excel sheet, need to store in folder

The following url downloads an excel spreadsheet
http://www.bocsar.nsw.gov.au/Documents/RCS-Annual/bluemountainslga.xlsx
via flask code I want to call that url, and save the spreadsheet to a folder
So far I have
r = requests.get('http://www.bocsar.nsw.gov.au/Documents/RCS-Annual/bluemountainslga.xlsx')
But need help moving the spread sheet to a downloads folder inside the project. The folder structure is.
App
+static
+templates
main.py
+downloads
|__ move file here
This is a minimal example of something I just got to work;
import requests
import shutil
def download(url):
filename = url.split("/")[-1]
path = "downloads/" + filename
r = requests.get(url, stream=True)
if r.status_code == 200:
with open(path, 'wb') as f:
r.raw.decode_content = True
shutil.copyfileobj(r.raw, f)
else:
r.raise_for_status()
download('http://www.bocsar.nsw.gov.au/Documents/RCS-Annual/bluemountainslga.xlsx')
It reads the raw data from the request object and writes to a file where you want it to.

Print JSON data from csv list of multiple urls

Very new to Python and haven't found specific answer on SO but apologies in advance if this appears very naive or elsewhere already.
I am trying to print 'IncorporationDate' JSON data from multiple urls of public data set. I have the urls saved as a csv file, snippet below. I am only getting as far as printing ALL the JSON data from one url, and I am uncertain how to run that over all of the csv urls, and write to csv just the IncorporationDate values.
Any basic guidance or edits are really welcomed!
try:
# For Python 3.0 and later
from urllib.request import urlopen
except ImportError:
# Fall back to Python 2's urllib2
from urllib2 import urlopen
import json
def get_jsonparsed_data(url):
response = urlopen(url)
data = response.read().decode("utf-8")
return json.loads(data)
url = ("http://data.companieshouse.gov.uk/doc/company/01046514.json")
print(get_jsonparsed_data(url))
import csv
with open('test.csv') as f:
lis=[line.split() for line in f]
for i,x in enumerate(lis):
print ()
import StringIO
s = StringIO.StringIO()
with open('example.csv', 'w') as f:
for line in s:
f.write(line)
Snippet of csv:
http://business.data.gov.uk/id/company/01046514.json
http://business.data.gov.uk/id/company/01751318.json
http://business.data.gov.uk/id/company/03164710.json
http://business.data.gov.uk/id/company/04403406.json
http://business.data.gov.uk/id/company/04405987.json
Welcome to the Python world.
For dealing with making http requests, we commonly use requests because it's dead simple api.
The code snippet below does what I believe you want:
It grabs the data from each of the urls you posted
It creates a new CSV file with each of the IncorporationDate keys.
```
import csv
import requests
COMPANY_URLS = [
'http://business.data.gov.uk/id/company/01046514.json',
'http://business.data.gov.uk/id/company/01751318.json',
'http://business.data.gov.uk/id/company/03164710.json',
'http://business.data.gov.uk/id/company/04403406.json',
'http://business.data.gov.uk/id/company/04405987.json',
]
def get_company_data():
for url in COMPANY_URLS:
res = requests.get(url)
if res.status_code == 200:
yield res.json()
if __name__ == '__main__':
for data in get_company_data():
try:
incorporation_date = data['primaryTopic']['IncorporationDate']
except KeyError:
continue
else:
with open('out.csv', 'a') as csvfile:
writer = csv.writer(csvfile)
writer.writerow([incorporation_date])
```
First step, you have to read all the URLs in your CSV
import csv
csvReader = csv.reader('text.csv')
# next(csvReader) uncomment if you have a header in the .CSV file
all_urls = [row for row in csvReader if row]
Second step, fetch the data from the URL
from urllib.request import urlopen
def get_jsonparsed_data(url):
response = urlopen(url)
data = response.read().decode("utf-8")
return json.loads(data)
url_data = get_jsonparsed_data("give_your_url_here")
Third step:
Go through all URLs that you got from CSV file
Get JSON data
Fetch the field what you need, in your case "IncorporationDate"
Write into an output CSV file, I'm naming it as IncorporationDates.csv
Code below:
for each_url in all_urls:
url_data = get_jsonparsed_data(each_url)
with open('IncorporationDates.csv', 'w' ) as abc:
abc.write(url_data['primaryTopic']['IncorporationDate'])

How to batch read and then write a list of weblink .JSON files to specified locations on C drive in Python v2.7

I have a long list of .json files that I need to download to my computer. I want to download them as .json files (so no parsing or anything like that at this point).
I have some code that works for small files, but it is pretty buggy. Also it doesn't handle multiple links well.
Appreciate any advice to fix up this code:
import os
filename = 'test.json'
path = "C:/Users//Master"
fullpath = os.path.join(path, filename)
import urllib2
url = 'https://www.premierlife.com/secure/index.json'
response = urllib2.urlopen(url)
webContent = response.read()
f = open(fullpath, 'w')
f.write(webContent)
f.close
It's creating a blank file because the f.close at the end should be f.close().
I took your code and made a little function and then called it on a little loop to go through a .txt file with the list of urls called "list_of_urls.txt" having 1 url per line (you can change the delimiter in the split function if you want to format it differently).
def save_json(url):
import os
filename = url.replace('/','').replace(':','')
# this replaces / and : in urls
path = "C:/Users/Master"
fullpath = os.path.join(path, filename)
import urllib2
response = urllib2.urlopen(url)
webContent = response.read()
f = open(fullpath, 'w')
f.write(webContent)
f.close()
And then the loop:
f = open('list_of_urls.txt')
p = f.read()
url_list = p.split('\n') #here's where \n is the line break delimiter that can be changed
for url in url_list:
save_json(url)

Downloading Many Files using Python Web-Scraping

If I have a link to a CSV on Yahoo Finance: http://ichart.finance.yahoo.com/table.csv?s=LOW&d=4&e=29&f=2014&g=d&a=8&b=22&c=1981&ignore=.csv
how would I write a web scraper to download multiple files based on a list of symbols: [LOW, SPY, AAPL]
from StringIO import StringIO
from urllib2 import urlopen
for symbol in symbols:
f = urlopen ('http://www.myurl.com'+symbol+'therestoftheurl')
p = f.read()
d = StringIO(p)
f.close
Do I need to write the contents of the url to file, or will it download automatically into a directory?
You can use a method like this to download files:
import urllib2
file_name = "myfile.xyz"
u = urllib2.urlopen(url)
f = open(file_name, 'wb')
block_sz = 4096
while True:
buffer = u.read(block_sz)
if not buffer:
break
f.write(buffer)
f.close()

Categories