Scraping of the BBB site converting JSON to a DataFrame - python

I would like to put this information into a dataframe and then export to excel. So far tutorials in python produce table errors. No luck converting the JSON data to a data frame.
Any tips would be very helpful.
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline
from urllib.request import urlopen
import bs4
import requests, re, json
headers = {'User-Agent':'Mozilla/5.0'}
r = requests.get('https://www.bbb.org/search?find_country=USA&find_entity=10126-000&find_id=357_10126-000_alias&find_text=roofing&find_type=Category&page=1&touched=1', headers = headers)
p = re.compile(r'PRELOADED_STATE__ = (.*?);')
data = json.loads(p.findall(r.text)[0])
results = [(item['businessName'], ' '.join([item['address'],item['city'], item['state'], item['postalcode']]), item['phone']) for item in data['searchResult']['results']]
print(results)

import re
import json
import requests
import pandas as pd
from bs4 import BeautifulSoup
headers = {'User-Agent':'Mozilla/5.0'}
r = requests.get('https://www.bbb.org/search?find_country=USA&find_entity=10126-000&find_id=357_10126-000_alias&find_text=roofing&find_type=Category&page=1&touched=1', headers = headers)
p = re.compile(r'PRELOADED_STATE__ = (.*?);')
data = json.loads(p.findall(r.text)[0])
results = [(item['businessName'], ' '.join([item['address'],item['city'], item['state'], item['postalcode']]), item['phone']) for item in data['searchResult']['results']]
df = pd.DataFrame(results, columns=['Business Name', 'Address', 'Phone'])
print(df)
df.to_csv('data.csv')
Prints:
Business Name Address Phone
0 Trinity Roofing, LLC Stilwell KS 66085-8238 [(913) 432-4425, (303) 699-7999]
1 Trinity Roofing, LLC 14241 E 4th Ave Ste 5-300 Aurora CO 80011-8733 [(913) 432-4425, (303) 699-7999]
2 CMR Construction & Roofing of Texas, LLC 12500 E US Highway 40, Ste. B1 Independence MO... [(855) 376-6326, (855) 766-3267]
3 All-Star Home Repairs LLC 1806 Grove Ave Richmond VA 23220-4506 [(804) 405-9337]
4 MadSky Roofing & Restoration, LLC Bank of America Center, 16th Floor 1111 E. Mai... [(855) 623-7597]
5 Robert Owens Roofing Bealeton VA 22712-9706 [(540) 878-3544]
6 Proof Seal of Athens PO Box 80732 Canton OH 447080732 [(330) 685-6363]
7 Proof Seal of Athens Athens OH 45701-1847 [(330) 685-6363]
8 Tenecela General Services Corp 57 Anderson St Lowell MA 01852-5357 None
9 Water Tight Roofing & Siding 57 Whitehall Way Hyannis MA 02601-2149 [(508) 364-8323]
10 Tenecela General Services Corp 745 Broadway St Fl 2 Lowell MA 01854-3137 None
11 Just In Time Roofing & Contracting, LLC ----- Ft Worth TX 76102 [(888) 666-3122, (254) 296-8016, (888) 370-3331]
12 Paramount Construction of Southerntier NY Inc. 323 Fluvanna Ave. Jamestown NY 14701 [(716) 487-0093]
13 Paramount Construction of Southerntier NY Inc. P O Box 488 Falconer NY 14733 [(716) 487-0093]
14 Paramount Construction of Southerntier NY Inc. 1879 Lyndon Boulevard Falconer NY 14733 [(716) 487-0093]
And saves data.csv (screenshot from LibreOffice):

Related

How to apply a pandas geocode function to Pyspark column

Table is like this
id
ADDRESS
0
6101 SUMMITVIEW AVE STE 200 YAKIMA
1
527 CEDAR WAY SUITE 105 OAKMONT
2
1700 N ROSE AVE SUITE 460 OXNARD
3
1275 YORK AVE NEW YORK
4
2300 MANCHESTER EXPY A SUITE 101 A COLUMBUS
5
401 N MICHIGAN AVE CHICAGO
6
111 GROSSMAN DR INTERNAL MEDICINE BRAINTREE
7
1850 N CENTRAL AVE STE 1600 PHOENIX
8
47 NEW SCOTLAND AVENUE ALBANY MEDICAL CENTER A...
9
201 N VINE ST EL DORADO
10
4420 LAKE BOONE TRL RALEIGH
11
2727 W HOLCOMBE BLVD HOUSTON
12
850 PETER BRYCE BLVD TUSCALOOSA
13
1803 WEHRLI RD NAPERVILLE
14
4321 N MACDILL AVE STE 203 TAMPA
15
111 CONTINENTAL DR SUITE 412 NEWARK
16
1834 E INNOVATION PARK DR ORO VALLEY
17
880 KEMPSVILLE RD SUITE 2200 NORFOLK
18
701 PRINCETON AVE SW BIRMINGHAM
19
4729 COUNTY ROAD 101 MINNETONKA
import pandas as pd
import geopandas as gpd
import geopy
from geopy.geocoders import Nominatim
from geopy.extra.rate_limiter import RateLimiter
import matplotlib.pyplot as plt
import folium
from folium.plugins import FastMarkerCluster
locator = Nominatim(user_agent="myGeocoder")
from geopy.extra.rate_limiter import RateLimiter
geocode = RateLimiter(locator.geocode,min_delay_seconds=0.0, error_wait_seconds=1.0, swallow_exceptions=True, return_value_on_exception=None)
apprix_1_na['location'] = apprix_1_na['ADDRESS'].apply(geocode)
apprix_1_na['point'] = apprix_1_na['location'].apply(lambda loc: tuple(loc.point) if loc enter code hereelse None)
I want this code to work in Pyspark for longitude and latitude
I'll show a "complex" example with GoogleV3 API. It is easy suitable to your case
from geopy.geocoders import GoogleV3
from pyspark.sql.functions import col, udf
from pyspark.sql.types import FloatType, ArrayType
df = spark.createDataFrame([("123 Fake St, Springfield, 12345, USA",),("1000 N West Street, Suite 1200 Wilmington, DE 19801, USA",)], ["address"])
df.display()
address
123 Fake St, Springfield, 12345, USA
1000 N West Street, Suite 1200 Wilmington, DE 19801, USA
#udf(returnType=ArrayType(FloatType()))
def geoloc(address):
api = 'your_api_key_here'
geolocator = GoogleV3(api)
#get lat_long
return geolocator.geocode(address)[1]
#find coord
df = df.withColumn('geocode', geoloc(col('address')))
#separate tuple
df = df.withColumn("latitude", col('geocode').getItem(0))\
.withColumn("longitude", col('geocode').getItem(1))
df.display()
address
geocode
latitude
longitude
123 Fake St, Springfield, 12345, USA
[44.046238, -123.022026]
44.046238
-123.022026
1000 N West Street, Suite 1200 Wilmington, DE 19801, USA
[39.74717, -75.54999]
39.74717
-75.54999

Beautifulsoup Python loops

I have this code that returns None for each row, can someone help me?
from bs4 import BeautifulSoup
import requests
import pandas as pd
website = 'https://www.bloodyelbow.com/22198483/comprehensive-list-of-ufc-fighters-who-have-tested-positive-for-covid-19'
response = requests.get(website)
soup = BeautifulSoup(response.content, 'html.parser')
results = soup.find('table',{'class':'p-data-table'}).find('tbody').find_all('tr')
name=[]
reported_date=[]
card=[]
card_date=[]
opponent=[]
resolution=[]
for result in results:
print(name.append(i.find_all('td')[0].get_text()))
You can use pandas directly and get all the columns up to the last two:
import pandas
website = "https://www.bloodyelbow.com/22198483/comprehensive-list-of-ufc-fighters-who-have-tested-positive-for-covid-19"
df = pandas.read_html(website)[0].iloc[:, :-2]
print(df.to_string())
Output (truncated):
Fighter Reported Card Card Date Opponent Resolution
0 Rani Yahya 7/31/2021 UFC Vegas 33 7/31/2021 Kyung Ho Kang Fight scratched
1 Amanda Nunes 7/29/2021 UFC 265 8/7/2021 Julianna Pena Fight scratched
2 Amanda Ribas 5/23/2021 UFC Vegas 28 6/5/2021 Angela Hill Fight scratched
3 Jack Hermansson 5/19/2021 UFC 262 5/17/2021 Edmen Shahbazyan Rescheduled for UFC Vegas 27 - May 22

scrape address and phone numbers from this website

how do i scrape the data from the and contact info class and export to csv file using bs4 and pandas libraries? from this site? i need help on how to scrape data from the tag and contact info class.
import pandas as pd
import bs4
import requests
import re
full_dict={'Title':[],'Description':[],'Address':[]}
res=requests.get("https://cupcakemaps.com/cupcakes/cupcakes-near-me/p:2")
listings=soup.findAll(class_='media')
for listing in listings:
listing_title=listing.find(True,{'title':True}).attrs['title']
listing_Description=listing.find('p',{'class':'summary-desc'})
listing_address=listing.find('p',{'class':'contact-`info'}).text=re.compile(r'[0-9]{0,4}')`
.strip() - in-built function of Python is used to remove all the leading and trailing spaces from a string.
.to_csv() - Write object to a comma-separated values (csv) file.
Ex.
import pandas as pd
from bs4 import BeautifulSoup,Tag
import requests
import re
res=requests.get("https://cupcakemaps.com/cupcakes/cupcakes-near-me/p:2")
soup = BeautifulSoup(res.text,'lxml')
listings=soup.findAll(class_='media')
data = []
for listing in listings:
listing_title=listing.find(True,{'title':True}).attrs['title']
listing_Description=listing.find('p',{'class':'summary-desc'})
if isinstance(listing_Description,Tag):
listing_Description = listing_Description.text.strip()
listing_address=listing.find('p',{'class':'contact-info'})
if isinstance(listing_address,Tag):
number_text = listing_address.text.strip()
listing_address = ''.join(filter(str.isdigit,number_text))
full_dict = {'Title': listing_title, 'Description': listing_Description, 'Address': listing_address}
data.append(full_dict)
df = pd.DataFrame(data)
# saved data into csv file
df.to_csv("contact.csv")
print(df)
O/P:
Title Description Address
0 Explore Category 'Anaheim CA Birthday Cupcakes... Delectable Anaheim, CA - Delectable check out ... 7147156086
1 Explore Category 'Costa Mesa CA Birthday Cupca... Lisa's Gourmet Snacks Costa Mesa CA check out... 7144275814
2 Explore Category 'Shorewood IL Birthday Cupcak... Acapulco Bakery Inc Shorewood, IL - Acapulco B... 8157291737
3 Explore Category 'San Francisco CA Birthday Cu... Hilda's Mart & Bake Shop San Francisco CA che... 4153333122
4 Explore Category 'Los Angeles CA Birthday Cupc... Lenny's Deli Los Angeles, CA - Lenny's Deli ch... 3104755771
5 Explore Category 'San Francisco CA Birthday Cu... Sweet Inspirations San Francisco CA check out... None
6 Explore Category 'Costa Mesa CA Birthday Cupca... The Cupcake Costa Mesa CA check out The Cupc... 9496420571
7 Explore Category 'Los Angeles CA Birthday Cupc... United Bread & Pastry Inc Los Angeles CA chec... 3236610037
8 Explore Category 'Garden Grove CA Birthday Cup... Pescadores Garden Grove CA check out Pescado... 7145395585
9 Explore Category 'Bakersfield CA Birthday Cupc... Bimbo Bakeries Usa Bakersfield CA check out ... 6613219352

Scrape website to only show populated categories

I am in the process of scraping a website and it pulls the contents of the page, but there are categories with headers that are technically empty, but it still shows the header. I would like to only see categories with events in them. Ideally I could even have the components of each transactions so I can choose which elements I want displayed.
import requests
from bs4 import BeautifulSoup
headers = {'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/62.0.3202.94 Safari/537.36'}
print('Scraping NH Dept of Banking...')
print()
NHurl = 'https://www.nh.gov/banking/corporate-activities/index.htm'
NHr = requests.get(NHurl, headers = headers)
NHsoup = BeautifulSoup(NHr.text, 'html.parser')
NHlist = []
for events in NHsoup.findAll('tr')[2:]:
print(events.text)
NHlist.append(events.text)
print(' '.join(NHlist))
Like I said, this works to get all of the information, but there are a lot of headers/empty space that doesn't need to be pulled. For example, at the time I'm writing this the 'acquisitions', 'conversions', and 'change in control' are empty, but the headers still come in and there's are relatively large blank space after the headers. I feel like a I need some sort of loop to go through each header ('td') and then get it's contents ('tr') but I'm just not quite sure how to do it.
You can use itertools.groupby to group elements and then filter out empty rows:
import requests
from itertools import groupby
from bs4 import BeautifulSoup
headers = {'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/62.0.3202.94 Safari/537.36'}
print('Scraping NH Dept of Banking...')
print()
NHurl = 'https://www.nh.gov/banking/corporate-activities/index.htm'
NHr = requests.get(NHurl, headers = headers)
NHsoup = BeautifulSoup(NHr.text, 'html.parser')
NHlist = []
for _, g in groupby(NHsoup.select('tr'), lambda k, d={'g':0}: (d.update(g=d['g']+1), d['g']) if k.select('th') else (None, d['g'])):
s = [tag.get_text(strip=True, separator=' ') for tag in g]
if any(i == '' for i in s):
continue
NHlist.append(s)
# This is just pretty printing, all the data are already in NHlist:
l = max(map(len,(j for i in NHlist for j in i))) + 5
for item in NHlist:
print('{: <4} {}'.format(' ', item[0]))
print('-' * l)
for i, ev in enumerate(item[1:], 1):
print('{: <4} {}'.format(i, ev))
print()
Prints:
Scraping NH Dept of Banking...
New Bank
------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
1 12/11/18 The Millyard Bank
Interstate Bank Combination
------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
1 01/16/19 Optima Bank & Trust Company with and into Cambridge Trust Company Portsmouth, NH 03/29/19
Amendment to Articles of Agreement or Incorporation; Business or Capital Plan
------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
1 11/26/18 John Hancock Trust Company Boston, MA 01/14/19
2 12/04/18 Franklin Savings Bank Franklin, NH 01/28/19
3 12/12/18 MFS Heritage Trust Company Boston, MA 01/28/19
4 02/25/19 Ankura Trust Company, LLC Fairfield, CT 03/22/19
5 4/25/19 Woodsville Guaranty Savings Bank Woodsville, NH 06/04/19
6 5/10/19 AB Trust Company New York, NY 06/04/19
Reduction in Capital
------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
1 03/07/19 Primary Bank Bedford, NH 04/10/19
Amendment to Bylaws
------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
1 12/10/18 Northeast Credit Union Porstmouth, NH 02/25/19
2 2/25/19 Members First Credit Union Manchester, NH 04/05/19
3 4/24/19 St. Mary's Bank Manchester, NH 05/30/19
4 6/28/19 Bellwether Community Credit Union
Interstate Branch Office
------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
1 01/23/19 Newburyport Five Cents Savings Bank 141 Portsmouth Ave Exeter, NH 02/01/19
2 03/08/19 One Credit Union Newport, NH 03/29/19
3 03/01/19 JPMorgan Chase Bank, NA Nashua, NH 04/04/19
4 03/26/19 Mascoma Bank Lebanon, NH 04/09/19
5 04/24/19 Newburyport Five Cents Savings Bank 321 Lafayette Rd Hampton NH 05/08/19
6 07/10/19 Mascoma Bank 242-244 North Winooski Avenue Burlington VT 07/18/19
7 07/10/19 Mascoma Bank 431 Pine Street Burlington VT 07/18/19
Interstate Branch Office Closure
------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
1 02/15/19 The Provident Bank 321 Lafayette Rd Hampton, NH 02/25/19
New Branch Office
------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
1 12/07/18 Bank of New Hampshire 16-18 South Main Street Concord NH 01/02/19
2 3/4/19 Triangle Credit Union 360 Daniel Webster Highway, Merrimack, NH 03/11/19
3 04/03/19 Bellwether Community Credit Union 425-453 Commercial Street Manchester, NH 04/17/19
4 06/11/19 Primary Bank 23 Crystal Avenue Derry NH 06/11/19
Branch Office Closure
------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
1 5/15/19 Northeast Credit Union Merrimack, NH 05/21/19
New Loan Production Office
------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
1 04/08/19 Community National Bank 367 Route 120, Unit B-5 Lebanon, NH
03766-1430 04/15/19
Loan Production Office Closure
------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
1 07/22/19 The Provident Bank 20 Trafalgar Square, Suite 447 Nashua NH 03063 07/31/19
Trade Name Requests
------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
1 04/16/19 John Hancock Trust Company To use trade name "Manulife Investment Management Trust Company" 04/24/19
New Trust Company
------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
1 02/19/19 Janney Trust Co., LLC
2 02/25/19 Darwin Trust Company of New Hampshire, LLC
3 07/15/`9 Harbor Trust Company
Dissolution of Trust Company
------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
1 09/19/17 Cambridge Associates Fiduciary Trust, LLC Boston, MA 02/05/19
Trust Office Closure
------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
1 5/10/19 Charter Trust Company Rochester, NH 05/20/19
New Trust Office
------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
1 02/25/19 Ankura Trust Company, LLC 140 Sherman Street, 4th Floor Fairfield, CT 06824 03/22/19
Relocation of Trust Office
------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
1 01/23/19 Geode Capital Management Trust Company, LLC Relocate from: One Post Office Square, 20th Floor, Boston MA To: 100 Summer Street, 12th Flr, Boston, MA 02/01/19
2 03/15/19 Drivetrain Trust Company LLC Relocate from: 630 3rd Avenue, 21st Flr New York, NY 10017 To: 410 Park Avenue, Suite 900 New York, NY 10022 03/29/19
3 04/14/19 Boston Partners Trust Company Relocate from: 909 Third Avenue New York, NY 10022 To: One Grand Central Place 60 East 42nd Street, Ste 1550 New York, NY 10165 04/23/19
You could test which rows contain all '\xa0' (appear blank) and exclude. I append to list and convert to pandas dataframe but you could just print the row direct.
import requests
from bs4 import BeautifulSoup as bs
import pandas as pd
r = requests.get('https://www.nh.gov/banking/corporate-activities/index.htm')
soup = bs(r.content, 'lxml')
results = []
for tr in soup.select('tr'):
row = [i.text for i in tr.select('th,td')]
if row.count('\xa0') != len(row):
results.append(row)
pd.set_option('display.width', 100)
df = pd.DataFrame(results)
df.style.set_properties(**{'text-align': 'left'})
df.columns = df.iloc[0]
df = df[1:]
df.fillna(value='', inplace=True)
print(df.head(20))
Not sure if this is how you want it, and there is probably a more elegant way, but I basically did was
Pandas to get the table
Pandas automatically assigns columns, so moved column to first row
Found were rows are all nulls
Dropped rows with all nulls and the previous row (it's sub header)
import pandas as pd
print('Scraping NH Dept of Banking...')
print()
NHurl = 'https://www.nh.gov/banking/corporate-activities/index.htm'
df = pd.read_html(NHurl)[0]
top_row = pd.DataFrame([df.columns], index=[-1])
df.columns = top_row.columns
df = df.append(top_row, sort=True).sort_index().reset_index(drop=True)
null_rows = df[df.isnull().values.all(axis=1)].index.tolist()
drop_hdr_rows = [x - 1 for x in null_rows ]
drop_rows = drop_hdr_rows + null_rows
new_df = df[~df.index.isin(drop_rows)]
Output:
print (new_df.to_string())
0 1 2 3
2 New Bank New Bank New Bank New Bank
3 12/11/18 The Millyard Bank NaN NaN
4 Interstate Bank Combination Interstate Bank Combination Interstate Bank Combination Interstate Bank Combination
5 01/16/19 Optima Bank & Trust Company with and into Camb... Portsmouth, NH 03/29/19
12 Amendment to Articles of Agreement or Incorpor... Amendment to Articles of Agreement or Incorpor... Amendment to Articles of Agreement or Incorpor... Amendment to Articles of Agreement or Incorpor...
13 11/26/18 John Hancock Trust Company Boston, MA 01/14/19
14 12/04/18 Franklin Savings Bank Franklin, NH 01/28/19
15 12/12/18 MFS Heritage Trust Company Boston, MA 01/28/19
16 02/25/19 Ankura Trust Company, LLC Fairfield, CT 03/22/19
17 4/25/19 Woodsville Guaranty Savings Bank Woodsville, NH 06/04/19
18 5/10/19 AB Trust Company New York, NY 06/04/19
19 Reduction in Capital Reduction in Capital Reduction in Capital Reduction in Capital
20 03/07/19 Primary Bank Bedford, NH 04/10/19
21 Amendment to Bylaws Amendment to Bylaws Amendment to Bylaws Amendment to Bylaws
22 12/10/18 Northeast Credit Union Porstmouth, NH 02/25/19
23 2/25/19 Members First Credit Union Manchester, NH 04/05/19
24 4/24/19 St. Mary's Bank Manchester, NH 05/30/19
25 6/28/19 Bellwether Community Credit Union NaN NaN
26 Interstate Branch Office Interstate Branch Office Interstate Branch Office Interstate Branch Office
27 01/23/19 Newburyport Five Cents Savings Bank 141 Portsmouth Ave Exeter, NH 02/01/19
28 03/08/19 One Credit Union Newport, NH 03/29/19
29 03/01/19 JPMorgan Chase Bank, NA Nashua, NH 04/04/19
30 03/26/19 Mascoma Bank Lebanon, NH 04/09/19
31 04/24/19 Newburyport Five Cents Savings Bank 321 Lafayette Rd Hampton NH 05/08/19
32 07/10/19 Mascoma Bank 242-244 North Winooski Avenue Burlington VT 07/18/19
33 07/10/19 Mascoma Bank 431 Pine Street Burlington VT 07/18/19
34 Interstate Branch Office Closure Interstate Branch Office Closure Interstate Branch Office Closure Interstate Branch Office Closure
35 02/15/19 The Provident Bank 321 Lafayette Rd Hampton, NH 02/25/19
36 New Branch Office New Branch Office New Branch Office New Branch Office
37 12/07/18 Bank of New Hampshire 16-18 South Main Street Concord NH 01/02/19
38 3/4/19 Triangle Credit Union 360 Daniel Webster Highway, Merrimack, NH 03/11/19
39 04/03/19 Bellwether Community Credit Union 425-453 Commercial Street Manchester, NH 04/17/19
40 06/11/19 Primary Bank 23 Crystal Avenue Derry NH 06/11/19
41 Branch Office Closure Branch Office Closure Branch Office Closure Branch Office Closure
42 5/15/19 Northeast Credit Union Merrimack, NH 05/21/19
43 New Loan Production Office New Loan Production Office New Loan Production Office New Loan Production Office
44 04/08/19 Community National Bank 367 Route 120, Unit B-5 Lebanon, NH 03766-1430 04/15/19
45 Loan Production Office Closure Loan Production Office Closure Loan Production Office Closure Loan Production Office Closure
46 07/22/19 The Provident Bank 20 Trafalgar Square, Suite 447 Nashua NH 03063 07/31/19
51 Trade Name Requests Trade Name Requests Trade Name Requests Trade Name Requests
52 04/16/19 John Hancock Trust Company To use trade name "Manulife Investment Managem... 04/24/19
53 New Trust Company New Trust Company New Trust Company New Trust Company
54 02/19/19 Janney Trust Co., LLC NaN NaN
55 02/25/19 Darwin Trust Company of New Hampshire, LLC NaN NaN
56 07/15/`9 Harbor Trust Company NaN NaN
57 Dissolution of Trust Company Dissolution of Trust Company Dissolution of Trust Company Dissolution of Trust Company
58 09/19/17 Cambridge Associates Fiduciary Trust, LLC Boston, MA 02/05/19
59 Trust Office Closure Trust Office Closure Trust Office Closure Trust Office Closure
60 5/10/19 Charter Trust Company Rochester, NH 05/20/19
61 New Trust Office New Trust Office New Trust Office New Trust Office
62 02/25/19 Ankura Trust Company, LLC 140 Sherman Street, 4th Floor Fairfield, CT 0... 03/22/19
63 Relocation of Trust Office Relocation of Trust Office Relocation of Trust Office Relocation of Trust Office
64 01/23/19 Geode Capital Management Trust Company, LLC Relocate from: One Post Office Square, 20th Fl... 02/01/19
65 03/15/19 Drivetrain Trust Company LLC Relocate from: 630 3rd Avenue, 21st Flr New Y... 03/29/19
66 04/14/19 Boston Partners Trust Company Relocate from: 909 Third Avenue New York, NY ... 04/23/19

Extracting Lat/Long from returned json via google api(Geocoding)

I have a definition which looks like the below block. I think I'm experiencing the same problem. I think the api has updated, and thus the extraction of the lat/long coordinates may be in a slightly different position. I have made requests successfully by inputing an example address in as a parameter, but I can't get that to work in my def(http://docs.python-requests.org/en/master/user/quickstart/#make-a-request). I want my definition to return the lat/longs from the address using a for loop. I'm unfamiliar with parsing json:/ Any help appreciated!
Also, would geocode_result need to be json_results from my request results codeblock?
def geocode_address(loc):
gmaps = googlemaps.Client(key= creds.GOOGLE_MAPS['api_key'])
geocode_result = gmaps.geocode(loc)
lat = json_results[0]["geometry"]["location"]["lat"]
lon = json_results[0]["geometry"]["location"]["lng"]
print (lat,lon)
I don't see a difference between what this does and what your code does. Hope it's of some use to you.
>>> import requests
>>> payload = {'key':}
>>> base_url = 'https://maps.googleapis.com/maps/api/geocode/json'
>>> payload = {'address': '1845 E. Broadway Road Ste. 102, Tempe, AE, 85282'}
>>> r = requests.get(base_url, params=payload)
>>> r
<Response [200]>
>>> coords = r.json()['results'][0]['geometry']['location']
>>> coords['lat']
33.406601
>>> coords['lng']
-111.9075196
EDIT:
Start with a dataframe of two columns, one with the names of the veterinary hospitals, one with their addresses.
>>> import pandas as pd
>>> df
0 \
0 The Animal Clinic
1 Sherbourne Animal Hospital
2 Spadina Animal Hospital
3 Wellesley Animal Hospital
4 Cabbagetown Pet Clinic
1
0 106 Mutual St Toronto Ontario M5B 2R7
1 320 Richmond Street East Unit 8 Toronto Onta...
2 125 Spadina Avenue Toronto Ontario M5V 2K8
3 8 Wellesley St W Toronto Ontario M4Y 1E7
4 239 Gerrard St E Toronto Ontario M5A 2G1
Use .tolist() to obtain the addresses in the form of a list so that they can be passed one at a time to google for their latitudes and longitudes which are stored in the eponymous lists. Display the results.
>>> import requests
>>> base_url = 'https://maps.googleapis.com/maps/api/geocode/json'
>>> latitudes = []
>>> longitudes = []
>>> for address in df[1].tolist():
... payload = {'address': address}
... r = requests.get(base_url, params=payload)
... coords = r.json()['results'][0]['geometry']['location']
... latitudes.append(coords['lat'])
... longitudes.append(coords['lng'])
...
>>> latitudes
[43.6572571, 43.6535161, 43.6472168, 43.6650199, 43.6617416]
>>> longitudes
[-79.37609119999999, -79.3688681, -79.39527749999999, -79.3851912, -79.369494]
Now put the results into the dataframe and display the complete result.
>>> df['latitudes'] = latitudes
>>> df['longitudes'] = longitudes
>>> df
0 \
0 The Animal Clinic
1 Sherbourne Animal Hospital
2 Spadina Animal Hospital
3 Wellesley Animal Hospital
4 Cabbagetown Pet Clinic
1 lat latitudes \
0 106 Mutual St Toronto Ontario M5B 2R7 -31 43.657257
1 320 Richmond Street East Unit 8 Toronto Onta... -42 43.653516
2 125 Spadina Avenue Toronto Ontario M5V 2K8 -20 43.647217
3 8 Wellesley St W Toronto Ontario M4Y 1E7 19 43.665020
4 239 Gerrard St E Toronto Ontario M5A 2G1 50 43.661742
longitudes
0 -79.376091
1 -79.368868
2 -79.395277
3 -79.385191
4 -79.369494

Categories