Geocoder in python takes too much time to run - python

I'm trying to associate the city/country/state name to the latitude and longitude of my dataset. This is how I made it:
import pandas as pd
import io
from geopy.geocoders import Nominatim
geolocator = Nominatim(user_agent="geoapiExercises")
def city_state_country(row):
coord = f"{row['latitude']}, {row['longitude']}"
location = geolocator.reverse(coord, exactly_one=True)
address = location.raw['address']
city = address.get('city', '')
state = address.get('state', '')
country = address.get('country', '')
row['city'] = city
row['state'] = state
row['country'] = country
return row
ddf_slice= ddf_slice.apply(city_state_country, axis=1)
but I have so many rows and it takes forever. how can I solve this?

Related

Within a dataframe, how can we write one row at a time and one column at a time?

I'm looping through records in a dataframe column and trying to pull geocode data for each. Here's the code that I'm testing.
from geopy.geocoders import Nominatim
geolocator = Nominatim(user_agent="http")
for item in df_fin['market_address']:
try:
location = geolocator.geocode(item)
df_fin.loc['address'] = location.address
df_fin.loc['latitude'] = location.latitude
df_fin.loc['longitude'] = location.longitude
df_fin.loc['raw'] = location.raw
print(location.raw)
except:
df_fin.loc['raw'] = 'no info for: ' + item
print('no info for: ' + item)
I must be missing something simple, but I'm just not seeing what the issue is here.
UPDATE:
from geopy.geocoders import Nominatim
geolocator = Nominatim(user_agent="http")
for index, row in df_fin.market_address.iterrows():
try:
location = geolocator.geocode(row)
row['address'] = location.address
row['latitude'] = location.latitude
row['longitude'] = location.longitude
row['raw'] = location.raw
print(location.raw)
except:
row['raw'] = 'no info for: ' + row
print('no info for: ' + row)
df_fin.tail(10)
You can reference below code :
from geopy.geocoders import Nominatim
geolocator = Nominatim(user_agent="http")
for index, row in df_fin.iterrows():
try:
location = geolocator.geocode(item)
row['address'] = location.address
row['latitude'] = location.latitude
row['longitude'] = location.longitude
row['raw'] = location.raw
print(location.raw)
except:
row['raw'] = 'no info for: ' + item
print('no info for: ' + item)
And if you are more familiar with Pandas, you can use #DYZ's answer.
You should define a function that converts market_address into the address, lat, and long, and .apply that function to the DataFrame.
def locate(market_address):
loc = geolocator.geocode(market_address)
return pd.Series({'address': loc.address if loc else np.nan,
'latitude': loc.latitude if loc else np.nan,
'longitude': loc.longitude if loc else np.nan,
'raw': loc.raw if loc else np.nan})
df_fin.join(df_fin['market_address'].apply(locate))
Note that loc.raw is a dictionary. When you store a dictionary in a DataFrame, you are looking for trouble in the future.

How to store the result of function to datafram with related column

Return Data from function as dictionary and store it in data frame.
While run it using for loop getting error.
import pyowm
from pyowm.utils import config
from pyowm.utils import timestamps
owm = pyowm.OWM(" your free api key from OpenWeatherMap")
mgr = owm.weather_manager()
data =[]
# Create function to get weather details
def get_weather(city):
observation = mgr.weather_at_place(city)
l = observation.weather
Wind_Speed = l.wind()['speed']
Temp = l.temperature('celsius')['temp']
Max_temp = l.temperature('celsius')['temp_max']
Min_temp = l.temperature('celsius')['temp_min']
#Heat_index = l.heat_index
Humidity = l.humidity
Pressure = l.pressure['press']
weather = {"City": city, "Wind_Speed" : Wind_Speed, "Temp":
Temp,"Max_temp":Max_temp, "Min_temp":Min_temp,
"Humidity":Humidity, "Pressure":Pressure}
return weather
for city in df_location['City']:
get_weather(city)
df = df.append(data, True)
Want to store that weather details in same df with relative city.
Current df_location is like:

Get time from city name using Python

As you can see in the title, I want to find the time of given city in Python. How can I achieve this? I've tried geopy and timezonefinder modules but they are giving me different results too. (like 'What time is it in Spotify?', 'It's 12:04')
What I'm trying to achieve is:
What time is it in California?
It's 16:15
THE CODE
import nltk
import datetime
import calendar
import pytz
from geopy.geocoders import Nominatim
from timezonefinder import TimezoneFinder
self.inp = input("City name: ")
# Find city name using NLP
# Get city name
findCityName = str(self.inp.title())
# NLP
word = nltk.word_tokenize(findCityName)
pos_tag = nltk.pos_tag(word)
chunk = nltk.ne_chunk(pos_tag)
self.inp = [ " ".join(w for w, t in ele) for ele in chunk if isinstance(ele, nltk.Tree)]
self.inp = ' '.join(self.inp)
# Get lat, long from city name
geolocator = Nominatim(user_agent='xxx')
location = geolocator.geocode(self.inp.capitalize())
# Get timezone from coordinates
tf = TimezoneFinder()
latitude, longitude = location.latitude, location.longitude
# Timezone
datez = tf.timezone_at(lng=longitude, lat=latitude)
datez = str(datez)
globalDate = datetime.datetime.now(pytz.timezone(datez))
print("The date in " + str(self.inp) + " is: " + globalDate.strftime('%A, %m/%d/%y'))

Finding nearby cities using Google API

I want to get nearby cities from passed latitude and longitude. I have used the geonames and geobytes APIs but want to use Google API for finding nearby cities.
This is my code:
def getNearbyCities(self):
# lat, lon = self.getLatLon()
# res_url = urlopen('http://gd.geobytes.com/GetNearbyCities?callback=?&radius=100&limit=100&Latitude=' + str(lat) + '&Longitude=' + str(lon))
res_url = urlopen('http://getnearbycities.geobytes.com/GetNearbyCities?callback=?&radius=100&locationcode=' + str(self.city))
resp = str(res_url.read())
print(resp)
validate_res = resp.split("b'?(")[-1].split(");'")[0]
validated_res = ast.literal_eval(validate_res)
cities_nd_distence = []
for data in validated_res:
data_tuple = (data[1], data[7])
if data[1] not in cities_nd_distence:
cities_nd_distence.append(data_tuple)
import pprint
pprint.pprint(cities_nd_distence)
return cities_nd_distence
If you only want to get cities based on latitude and longitude, you can have a look at https://github.com/coderholic/django-cities
from cities.models import City
from django.contrib.gis.geos import Point
from django.contrib.gis.db.models.functions import Distance
p = Point(-118, 34, srid=4326)
City.objects.annotate(distance=Distance('location', p)).order_by("distance").first()
<City: Hacienda Heights>

Get latitude & longitude from address geopandas

I have a csv of about 100 million logs. Where one of the column is address and I am trying to get latitude and longitude of the address. I want to try something like mentioned in the Solution , But the solution given is arcGIS and that is a commercial tool. I did try google API that has limit of 2000 entries only.
What is next best alternative to get address's Lat & Long into the large dataset.
Input: The column Site is the address from the City Paris
start_time,stop_time,duration,input_octets,output_octets,os,browser,device,langue,site
2016-08-27T16:15:00+05:30,2016-08-27T16:28:00+05:30,721.0,69979.0,48638.0,iOS,CFNetwork,iOS-Device,zh_CN,NULL
2016-08-27T16:16:00+05:30,2016-08-27T16:30:00+05:30,835.0,2528858.0,247541.0,iOS,Mobile Safari UIWebView,iPhone,en_GB,Berges de Seine Rive Gauche - Gros Caillou
2016-08-27T16:16:00+05:30,2016-08-27T16:47:00+05:30,1805.0,133303549.0,4304680.0,Android,Android,Samsung GT-N7100,fr_FR,Centre d'Accueil Kellermann
2016-08-27T16:17:00+05:30,,2702.0,32499482.0,7396904.0,Other,Apache-HttpClient,Other,NULL,Bibliothèque Saint Fargeau
2016-08-27T16:17:00+05:30,2016-08-27T17:07:00+05:30,2966.0,39208187.0,1856761.0,iOS,Mobile Safari UIWebView,iPad,fr_FR,NULL
2016-08-27T16:18:00+05:30,,2400.0,1505716.0,342726.0,NULL,NULL,NULL,NULL,NULL
2016-08-27T16:18:00+05:30,,302.0,3424123.0,208827.0,Android,Chrome Mobile,Samsung SGH-I337M,fr_CA,Square Jean Xxiii
2016-08-27T16:19:00+05:30,,1500.0,35035181.0,1913667.0,iOS,Mobile Safari UIWebView,iPhone,fr_FR,Parc Monceau 1 (Entrée)
2016-08-27T16:19:00+05:30,,6301.0,9227174.0,5681273.0,Mac OS X,AppleMail,Other,fr_FR,Bibliothèque Parmentier
The address with NULL can be neglected and also can be removed from the output.
The output should have following columns
start_time,stop_time,duration,input_octets,output_octets,os,browser,device,langue,site, latitude, longitude
Appreciate all the help, Thank you in advance!!
import csv
from geopy.geocoders import Nominatim
#if your sites are located in France only you can use the country_bias parameters to restrict search
geolocator = Nominatim(country_bias="France")
with open('c:/temp/input.csv', 'rb') as csvinput:
with open('c:/temp/output.csv', 'wb') as csvoutput:
output_fieldnames = ['Site', 'Address_found', 'Latitude', 'Longitude']
writer = csv.DictWriter(csvoutput, delimiter=';', fieldnames=output_fieldnames)
writer.writeheader()
reader = csv.DictReader(csvinput)
for row in reader:
site = row['site']
if site != "NULL":
try:
location = geolocator.geocode(site)
address = location.address
latitude = location.latitude
longitude = location.longitude
except:
address = 'Not found'
latitude = 'N/A'
longitude = 'N/A'
else:
address = 'N/A'
latitude = 'N/A'
longitude = 'N/A'
#here is the writing section
output_row = {}
output_row['Site'] = row['site']
output_row['Address_found'] = address.encode("utf-8")
output_row['Latitude'] = latitude
output_row['Longitude'] = longitude
writer.writerow(output_row)

Categories