Get geographical coordinates in a given square - python

My task is as follows: knowing the center (starting point), for example - [{'lat': -7.7940023, 'lng': 110.3656535}] and knowing the radius 5km I need to get all the points included in this square in 1 km increments. How do I achieve this?
P.S Using the Haversine formula I can check if a point is in a given square according to the radius
Image

if you consider a spherical Earth with radius R, the associated angle of a segment with length L (5km in your case) is:
import numpy as np
R = 6378.0 # km
L = 5.0 # km
angle = np.degrees(L/R)
so now, you can easily check if a point is inside your square:
center = {'lat': -7.7940023, 'lng': 110.3656535}
point = {'lat': 'point latitude', 'lng': 'point longitude'} # insert here your values
if (point['lat']-center['lat'] < angle) and (point['lng']-center['lng'] < angle):
print('Point is inside')
else:
print('Point is outside')
EDIT: check the one below.
import numpy as np
R = 6378.0 # km
L = 5 # side of the square
center = {'lat': -7.7940023, 'lng': 110.3656535}
square_side = np.linspace(-L/2, L/2, L+1)
angle = np.degrees(square_side/R)
latitude, longitude = np.meshgrid(angle+center['lat'], angle+center['lng'])
points = []
for lat, lng in zip(latitude.flatten(), longitude.flatten()):
points.append({'lat': lat, 'lng': lng})

This example should illustrate the point:
import pandas as pd
import numpy as np
#for_map = pd.read_csv('campaign_contributions_for_map.tsv', sep='\t')
df_airports = pd.read_csv('C:\\airports.csv')
print(df_airports.head(3))
df_cities = pd.read_csv('C:\\worldcities.csv')
print(df_cities.head(3))
# join the two dataframes - must be the same length
df = pd.concat([df_cities, df_airports], axis=1)
# cast latitudes and longitudes to numeric
cols = ["lat", "lng", "latitude_deg", "longitude_deg"]
df[cols] = df[cols].apply(pd.to_numeric, errors='coerce', axis=1)
# create a mask where our conditions are met (difference between lat fuze and lat air < 5 and difference between long fuze and long air < 0.1)
mask = ((abs(df["lat"] - df["latitude_deg"]) < .5) & (abs(df["lng"] - df["longitude_deg"]) < .5))
# fill the type column
df.loc[mask, 'Type'] = "Airport"
df.shape
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
df.head()
# The haversine formula determines the great-circle distance between two points on a sphere given
# their longitudes and latitudes. Important in navigation, it is a special case of a more general
# formula in spherical trigonometry, the law of haversines, that relates the sides and angles of
# spherical triangles.
lat1 = df['lat']
lon1 = df['lng']
lat2 = df['latitude_deg']
lon2 = df['longitude_deg']
from math import radians, cos, sin, asin, sqrt
def haversine(lat1, lon1, lat2, lon2):
"""
Calculate the great circle distance between two points
on the earth (specified in decimal degrees)
"""
# convert decimal degrees to radians
lon1, lat1, lon2, lat2 = map(radians, [lon1, lat1, lon2, lat2])
# haversine formula
dlon = lon2 - lon1
dlat = lat2 - lat1
a = sin(dlat/2)**2 + cos(lat1) * cos(lat2) * sin(dlon/2)**2
c = 2 * asin(sqrt(a))
# Radius of earth in kilometers is 6371
km = 6371* c
return km
# Creating a new column to generate the output by passing lat long information to Haversine Equation
df['distance'] = [haversine(df.lat[i],df.lng[i],df.latitude_deg[i],df.longitude_deg[i]) for i in range(len(df))]
df['distance'] = df['distance'].round(decimals=3)
# Printing the data table
df.sort_values(by=['distance'], inplace=True)
df.head()
# Let's sort by our newly created field, which identifies airport lat/lonn coordinates within .5 places of
# a city's lat/long coordinates
# Create a mask where our conditions are met (difference between lat and latitude_deg < 0.1 and
# difference between lng and longitude_deg < 0.1)
mask = ((abs(df["lat"] - df["latitude_deg"]) < 0.1) & (abs(df["lng"] - df["longitude_deg"]) < 0.1))
# Fill the type column
df.loc[mask, 'Type'] = "Airport"
df.sort_values(by=['Type'], inplace=True)
df.head()
More details here.
https://github.com/ASH-WICUS/Notebooks/blob/master/Haversine%20Distance%20-%20Airport%20or%20Not.ipynb

Related

Calculate the speed from two longitude and latitude GPS coordinates

Im fiddling around with a GPS i bought a while back, a "GlobalSat GPS Receiver" model BU-S353S4. And it seems to be working well!
I have been able to read the GPS signal from the device with the help from "Cody Wilsons" excellent explanation! And convert the GPGGA output to a longitude and latitude coordinate with the help of the excellent python package "Pynmea2".
But how can I calculate my current speed, from two positions? I have found this thread that refers to this thread on how to calculate the distance with the Haversine formula.
My code looks like this:
from math import radians, cos, sin, asin, sqrt
import serial
import pynmea2
ser = serial.Serial('/dev/ttyUSB0', 4800, timeout = 5)
def haversine(lon1, lat1, lon2, lat2):
"""
Calculate the great circle distance in kilometers between two points
on the earth (specified in decimal degrees)
"""
# convert decimal degrees to radians
lon1, lat1, lon2, lat2 = map(radians, [lon1, lat1, lon2, lat2])
# haversine formula
dlon = lon2 - lon1
dlat = lat2 - lat1
a = sin(dlat/2)**2 + cos(lat1) * cos(lat2) * sin(dlon/2)**2
c = 2 * asin(sqrt(a))
r = 6371 # Radius of earth in kilometers. Use 3956 for miles. Determines return value units.
return c * r
counter = 0
while 1:
line = ser.readline().decode('UTF-8')
splitline = line.split(',')
if splitline[0] == '$GPGGA':
counter += 1
msg = line
data = pynmea2.parse(msg)
lat1 = data.latitude
lon1 = data.longitude
if counter % 2:
distance = haversine(lon1, lat1, data.longitude, data.latitude)
print(distance)
This outputs approx every the distance i have traveled. But heres the problem, it always returns 0.0. I maybe havent traveled far enough?
And how should i proceed to calculate the speed?
I know the formula speed = distance/time. Pynmea2 have a time property (msg.timestamp). But frankly, i dont know how to do this.
Final result that seems to be working
from math import radians, cos, sin, asin, sqrt, atan2, degrees
import serial
import pynmea2
ser = serial.Serial('/dev/ttyUSB0', 4800, timeout = 5)
def haversine(lon1, lat1, lon2, lat2):
"""
Calculate the great circle distance in kilometers between two points
on the earth (specified in decimal degrees)
"""
# convert decimal degrees to radians
lon1, lat1, lon2, lat2 = map(radians, [lon1, lat1, lon2, lat2])
# haversine formula
dlon = lon2 - lon1
dlat = lat2 - lat1
a = sin(dlat/2)**2 + cos(lat1) * cos(lat2) * sin(dlon/2)**2
c = 2 * asin(sqrt(a))
r = 6371 # Radius of earth in kilometers. Use 3956 for miles. Determines return value units.
return c * r
prev_data = None
while 1:
line = ser.readline().decode('UTF-8')
splitline = line.split(',')
if splitline[0] == '$GPGGA':
msg = line
data = pynmea2.parse(msg)
if prev_data is not None:
distance = haversine(data.longitude, data.latitude, prev_data.longitude, prev_data.latitude)
print('distance', distance)
print('speed', round(distance*3600, 2))
prev_data = data
The interface updates once every second hence the speed formula.
Your formula for haversine is correct.
You have a bug in your program that you are setting lat1 and lon1 to data.longtitude and data.latitude, and then immediately using these to test the distance from data.longitude and data.latitude. Of course you're going to get a distance of zero. You need to separate the "current" longitude and latitude from the "previous" longitude and latitude.
(Though minor nit. You're not calculating the haversine, you're calculating the distance using haversine. You probably just want to name your function distance. hav(x) = sin(x/2)**2'
prev_data = None
while True:
....
if prev_data is not None:
distance = haversine(data.longitude, data.latitude,
prev_data.longitude, prev_data.latitude)
pre_data = data
....

Find nearest location based on coordinates in pyspark

I have one dataframe which contains names of stations and its coordinates.
I want for every station the nearest station based on its coordinates.
What i have is 2 functions:
import math
def dist2(lat1, long1, lat2, long2):
"""
Calculate the great circle distance between two points
on the earth (specified in decimal degrees)
"""
# convert decimal degrees to radians
lat1, long1, lat2, long2 = map(lambda x: x*pi /180.0, [lat1, long1, lat2, long2])
# haversine formula
dlon = long2 - long1
dlat = lat2 - lat1
a = math.sin(dlat/2)**2 + math.cos(lat1) * math.cos(lat2) * math.sin(dlon/2)**2
c = 2 * math.asin(math.sqrt(a))
# Radius of earth in kilometers is 6371
km = 6371* c
return km
def find_nearest2(lat, lng):
min_dist =100000
min_index = None
distances = df_onlystations_clean.apply(
lambda row: dist2(lat, lng, row['lat'], row['lng']),
axis=1)
if d < min_dist:
min_dist = d
return df_onlystations_clean.loc[min_dist.idxmin(), 'name']
df_onlystations_clean.apply(
lambda row: find_nearest2(row['lat'], row['lng']),
axis=1)
I always get the same error: 'DataFrame' object has no attribute 'apply'
How can I loop through the df. What I'm doing wrong??
It's because a Spark dataframe is not a Pandas dataframe, you cannot loop over the rows or call the apply method.
You'd have to use Spark udf API to pass a Python user define function on the data.

downsampling gps data using haversine formula using python

I have a high frequency of gps data which i want to downsample to every 50 meters ie keep gps latitude and longitude every 50 meter and discard inbetween points. I found a python code on the internet which basically calculates the distance between two points. But i am not sure how to basically read from a csv the lat and long values and feed it into the function and calculate the distance. If the distance reaches 50 meter i simply save that gps coordinates. So far, i have the following python code
from math import radians, cos, sin, asin, sqrt
def haversine(lon1, lat1, lon2, lat2):
lon1, lat1, lon2, lat2 = map(radians, [lon1, lat1, lon2, lat2])
# haversine formula
dlon = lon2 - lon1
dlat = lat2 - lat1
a = sin(dlat/2)**2 + cos(lat1) * cos(lat2) * sin(dlon/2)**2
c = 2 * asin(sqrt(a))
r = 6371 # Radius of earth in kilometers. Use 3956 for miles
return c * r
x1 = 52.19421607
x2 = 52.20000327
y1 = -1.484984011
y2 = -1.48533465
result = haversine(x1,y1,x2,y2) #need to give input from a csv
#if result is greater than 50m , save the coordinates
print(result)
How can i solve the problem?Any direction would be appreciated.
Here is a outline and a working code example - where I made some assumptions about which to keep/drop. I assume the dataframe is sorted.
First calculate distance to next point, indeed use haversine for lat/long pairs. This part is not fast in my implementation - you can find faster.
Use cumsum() of distances, to create distance groups, where group 1 is all distances below 50, group 2 between 50 and 100, etc...
Within each group, keep for instance only the first()
Note that this is approximately each 50 units based on group, so be aware this is different than take a point and jump to next point which is closest to 50 units away and repeat. But for data reduction purposes it should be fine.
Generate some random data around London.
import numpy as np
import sklearn
import pandas as pd
LONDON = (51.509865, -0.118092)
random_gps = np.random.random( (10000,2) ) / 25
random_gps[:,0] += np.arange(random_gps.shape[0]) / 25
random_gps[:,0] += LONDON[0]
random_gps[:,1] += LONDON[1]
gps_data = pd.DataFrame( random_gps, columns=["lat","long"] )
Shift the data to get the lat/long of the next point
gps_data['next_lat'] = gps_data.lat.shift(1)
gps_data['next_long'] = gps_data.long.shift(1)
gps_data.head()
Define the distance metric. This part can be improved in terms of speed by using vector expressions with numpy, so if speed is important change this part.
from sklearn.neighbors import DistanceMetric
dist = DistanceMetric.get_metric('haversine')
EARTH_RADIUS = 6371.009
def haversine_distance(row):
point_a = np.array([[row.lat, row.long]])
point_b = np.array([[row.next_lat, row.next_long]])
return EARTH_RADIUS * dist.pairwise(np.radians(point_a), np.radians(point_b) )[0][0]
and apply our distance function (slow part, which can be improved)
gps_data["distance_to_next"] = gps_data.apply( haversine_distance, axis=1)
gps_data["distance_cumsum"] = gps_data.distance_to_next.cumsum()
Finally, create groups and drop. (!) The haversine is returning the distance in KM - so here i wrongly did an example of 50 km instead of meters.
gps_data["distance_group"] = gps_data.distance_cumsum // 50
filtered = gps_data.groupby(['distance_group']).first()

Nested dict from for loop adding same values to all nested keys

I have address data and shapefiles with polygons, and am trying to determine the closest distance (in miles) of each address from each polygon, then create a nested dict containing all the info, with this format:
nested_dict = {poly_1: {address1: distance, address2 : distance},
poly2: {address1: distance, address2: distance}, etc}
The full, applicable code I'm using is:
import pandas as pd
from shapely.geometry import mapping, Polygon, LinearRing, Point
import geopandas as gpd
from math import radians, cos, sin, asin, sqrt
address_dict = {k: [] for k in addresses_geo.input_string}
sludge_dtc = {k: [] for k in sf_geo.unique_name}
def haversine(lon1, lat1, lon2, lat2):
"""
Calculate the great circle distance between two points
on the earth (specified in decimal degrees)
"""
# convert decimal degrees to radians
lon1, lat1, lon2, lat2 = map(radians, [lon1, lat1, lon2, lat2])
# haversine formula
dlon = lon2 - lon1
dlat = lat2 - lat1
a = sin(dlat/2)**2 + cos(lat1) * cos(lat2) * sin(dlon/2)**2
c = 2 * asin(sqrt(a))
r = 3956 # Radius of earth in miles. Use 6371 for kilometers
return c * r
# Here's the key loop that isn't working correctly
for unique_name, i in zip(sf_geo.unique_name, sf_geo.index):
for address, pt in zip(addresses_geo.input_string, addresses_geo.index):
pol_ext = LinearRing(sf_geo.iloc[i].geometry.exterior.coords)
d = pol_ext.project(addresses_geo.iloc[pt].geometry)
p = pol_ext.interpolate(d)
closest_point_coords = list(p.coords)[0]
# print(closest_point_coords)
dist = haversine(addresses_geo.iloc[pt].geometry.x,
addresses_geo.iloc[pt].geometry.y,
closest_point_coords[0], closest_point_coords[1])
address_dict[address] = dist
sludge_dtc[unique_name] = address_dict
# Test results on a single address
addresses_with_sludge_distance = pd.DataFrame(sludge_dtc)
print(addresses_with_sludge_distance.iloc[[1]].T)
If I break this code out and try and calculate the distances for a single polygon, it seems to work fine. However, when I create the DataFrame and check an address, it lists the same distance for every single polygon.
So, inner-dict-key '123 Main Street' will have 5.25 miles for each of the polygon keys in the outer dict, and '456 South Street' will have 6.13 miles for each of the polygon keys in the outer dict. (Made up examples.)
I realize I must be doing something dumb in the way I have the for loops set up, but I can't figure it out. I've reversed the order of the for statements, messed with indents-- all the same result.
To make it clear, what I want to happen is:
Take a single polygon, then
For each address in the address data, find the distance from that polygon and add to the address_dict dictionary with the address as the key and the distance as the value
When all addresses have been calculated, add the entire address dict as the value for the polygon key in sludge_dtc
Move on to the next polygon and continue
Any ideas what I'm missing?
The problem is very simple, you are always using the same address_dict instance.
You just need to recreate it inside every key loop.
import pandas as pd
from shapely.geometry import mapping, Polygon, LinearRing, Point
import geopandas as gpd
from math import radians, cos, sin, asin, sqrt
def haversine(lon1, lat1, lon2, lat2):
"""
Calculate the great circle distance between two points
on the earth (specified in decimal degrees)
"""
# convert decimal degrees to radians
lon1, lat1, lon2, lat2 = map(radians, [lon1, lat1, lon2, lat2])
# haversine formula
dlon = lon2 - lon1
dlat = lat2 - lat1
a = sin(dlat/2)**2 + cos(lat1) * cos(lat2) * sin(dlon/2)**2
c = 2 * asin(sqrt(a))
r = 3956 # Radius of earth in miles. Use 6371 for kilometers
return c * r
sludge_dtc = {k: [] for k in sf_geo.unique_name}
# Here's the key loop that isn't working correctly
for unique_name, i in zip(sf_geo.unique_name, sf_geo.index):
address_dict = {k: [] for k in addresses_geo.input_string}
for address, pt in zip(addresses_geo.input_string, addresses_geo.index):
pol_ext = LinearRing(sf_geo.iloc[i].geometry.exterior.coords)
d = pol_ext.project(addresses_geo.iloc[pt].geometry)
p = pol_ext.interpolate(d)
closest_point_coords = list(p.coords)[0]
# print(closest_point_coords)
dist = haversine(addresses_geo.iloc[pt].geometry.x,
addresses_geo.iloc[pt].geometry.y,
closest_point_coords[0], closest_point_coords[1])
address_dict[address] = dist
sludge_dtc[unique_name] = address_dict
# Test results on a single address
addresses_with_sludge_distance = pd.DataFrame(sludge_dtc)
print(addresses_with_sludge_distance.iloc[[1]].T)
Another consideration:
Your are creating empty dictionaries with empty lists as values, but after you set values directly (empty list are replaced). If you need to collect a list of values you should append values to the existing list, eg:
address_dict[address].append(dist)
and
sludge_dtc[unique_name].append(address_dict)

IDW interpolation of point data using python and gdal

I have a CSV file with the Lat, Long and Rainfall Information. I would like to interpolate those point and create tiff file. Can any one can suggest me the easiest way to do that.
I am trying to using gdal_grid. I am very new on using gdal in python.
This is actually several questions. Assuming you have some scattered data for lats and longs you'll to build all the location were you want to make estimation (all lats and longs for the pixels of you Tiff image).
Once you have that you can use any of the solutions around to do IWD over your data (using a recent example in another question):
class Estimation():
# IWD. Check: https://stackoverflow.com/questions/36031338/interpolate-z-values-in-a-3d-surface-starting-from-an-irregular-set-of-points/36037288#36037288
def __init__(self,lon,lat,values):
self.x = lat
self.y = lon
self.v = values
def estimate(self,x,y,using='ISD'):
"""
Estimate point at coordinate x,y based on the input data for this
class.
"""
if using == 'ISD':
return self._isd(x,y)
def _isd(self,x,y):
#d = np.sqrt((x-self.x)**2+(y-self.y)**2)
d = x.copy()
for i in range(d.shape[0]):
d[i] = haversine(self.x[i],self.y[i],x,y)
if d.min() > 0:
v = np.sum(self.v*(1/d**2)/np.sum(1/d**2))
return v
else:
return self.v[d.argmin()]
The code above is actually adapted to calculate distance with the Haversine formula (which gives great-circle distances between two points on a sphere from their longitudes and latitudes). Notice again you can find all sorts of solutions for the haversine distance like this one:
def haversine(lon1, lat1, lon2, lat2):
"""
Check: https://stackoverflow.com/questions/15736995/how-can-i-quickly-estimate-the-distance-between-two-latitude-longitude-points
Calculate the great circle distance between two points
on the earth (specified in decimal degrees)
"""
# convert decimal degrees to radians
lon1, lat1, lon2, lat2 = map(radians, [lon1, lat1, lon2, lat2])
# haversine formula
dlon = lon2 - lon1
dlat = lat2 - lat1
a = sin(dlat/2)**2 + cos(lat1) * cos(lat2) * sin(dlon/2)**2
c = 2 * asin(sqrt(a))
km = 6367 * c
return km
Finally once you have your array ready you should just build the Tiff using GDAL. For this check the following question for which I quote a part of it's solution:
driver = gdal.GetDriverByName('GTiff')
ds = driver.Create('output.tif',xsize, ysize, 1, gdal.GDT_Float32, )
# this assumes the projection is Geographic lat/lon WGS 84
srs = osr.SpatialReference()
srs.ImportFromEPSG(4326)
ds.SetProjection(srs.ExportToWkt())
gt = [ulx, xres, 0, uly, 0, yres ]
ds.SetGeoTransform(gt)
outband=ds.GetRasterBand(1)
outband.SetStatistics(np.min(mag_grid), np.max(mag_grid), np.average(mag_grid), np.std(mag_grid))
outband.WriteArray(mag_grid)

Categories