Python GDAL: Georeference array using other file for projection - python

I have an array of data, for each point I know the latitude and longitude of that point, and I'd like to write the data to a GTiff with projection taken from another file. How do I properly georeference the new file?
This is what I'm attempting just now:
import numpy as np
import gdal
from gdalconst import *
from osgeo import osr
def GetGeoInfo(FileName):
SourceDS = gdal.Open(FileName, GA_ReadOnly)
GeoT = SourceDS.GetGeoTransform()
Projection = osr.SpatialReference()
Projection.ImportFromWkt(SourceDS.GetProjectionRef())
return GeoT, Projection
def CreateGeoTiff(Name, Array, driver,
xsize, ysize, GeoT, Projection):
DataType = gdal.GDT_Float32
NewFileName = Name+'.tif'
# Set up the dataset
DataSet = driver.Create( NewFileName, xsize, ysize, 1, DataType )
# the '1' is for band 1.
DataSet.SetGeoTransform(GeoT)
DataSet.SetProjection( Projection.ExportToWkt() )
# Write the array
DataSet.GetRasterBand(1).WriteArray( Array )
return NewFileName
def ReprojectCoords(x, y,src_srs,tgt_srs):
trans_coords=[]
transform = osr.CoordinateTransformation( src_srs, tgt_srs)
x,y,z = transform.TransformPoint(x, y)
return x, y
# Some Data
Data = np.random.rand(5,6)
Lats = np.array([-5.5, -5.0, -4.5, -4.0, -3.5])
Lons = np.array([135.0, 135.5, 136.0, 136.5, 137.0, 137.5])
# A raster file that exists in the same approximate aregion.
RASTER_FN = 'some_raster.tif'
# Open the raster file and get the projection, that's the
# projection I'd like my new raster to have, it's 'projected',
# i.e. x, y values are numbers of pixels.
GeoT, TargetProjection, DataType = GetGeoInfo(RASTER_FN)
# Meanwhile my raster is currently in geographic coordinates.
SourceProjection = TargetProjection.CloneGeogCS()
# Get the corner coordinates of my array
LatSize, LonSize = len(Lats), len(Lons)
LatLow, LatHigh = Lats[0], Lats[-1]
LonLow, LonHigh = Lons[0], Lons[-1]
# Reproject the corner coordinates from geographic
# to projected...
TopLeft = ReprojectCoords(LonLow, LatHigh, SourceProjection, TargetProjection)
BottomLeft = ReprojectCoords(LonLow, LatLow, SourceProjection, TargetProjection)
TopRight = ReprojectCoords(LonHigh, LatHigh, SourceProjection, TargetProjection)
# And define my Geotransform
GeoTNew = [TopLeft[0], (TopLeft[0]-TopRight[0])/(LonSize-1), 0,
TopLeft[1], 0, (TopLeft[1]-BottomLeft[1])/(LatSize-1)]
# I want a GTiff
driver = gdal.GetDriverByName('GTiff')
# Create the new file...
NewFileName = CreateGeoTiff('Output', Data, driver, LatSize, LonSize, GeoTNew, TargetProjection)

If all you want to do is save the data to a raster for use in QGIS, you can simply construct a new Geotiff (or any other GDAL format) from your data. There is no need for a 'target raster' unless you want to do some form of reprojection or interpolation.
Here is an example:
import gdal
import osr
import numpy as np
data = np.random.rand(5,6)
lats = np.array([-5.5, -5.0, -4.5, -4.0, -3.5])
lons = np.array([135.0, 135.5, 136.0, 136.5, 137.0, 137.5])
xres = lons[1] - lons[0]
yres = lats[1] - lats[0]
ysize = len(lats)
xsize = len(lons)
ulx = lons[0] - (xres / 2.)
uly = lats[-1] - (yres / 2.)
driver = gdal.GetDriverByName('GTiff')
ds = driver.Create('D:\\test.tif', xsize, ysize, 1, gdal.GDT_Float32)
# this assumes the projection is Geographic lat/lon WGS 84
srs = osr.SpatialReference()
srs.ImportFromEPSG(4326)
ds.SetProjection(srs.ExportToWkt())
gt = [ulx, xres, 0, uly, 0, yres ]
ds.SetGeoTransform(gt)
outband = ds.GetRasterBand(1)
outband.WriteArray(data)
ds = None
In this example i assumed that your lat/lon's refer to the center of a pixel, since GDAL works with the edge, adding half a pixelsize is necessary.

Related

Determine coordinates at highest point of raster

I have a raster from which I want to derive the coordinates of the highest point (elevation) in the raster.
Getting the highest elevation is easy, but I don't know how to get its coordinates.
What I have so far:
# required modules
from osgeo import gdal
from osgeo import osr
import numpy as np
import rasterio
# allow GDAL to use python exceptions
gdal.UseExceptions()
# save paths to the files needed
input_raster = 'data/dem.tif'
# open input raster file with errorcatching
try:
ds = gdal.Open(input_raster)
except RuntimeError as err:
print (err)
exit(keep_kernel=True)
if ds is None:
print ('Unable to open %s' % input_raster)
exit(keep_kernel=True)
#access size of file
cols = ds.RasterXSize
rows = ds.RasterYSize
#access band and data as numpy arrays
band = ds.GetRasterBand(1)
data1 = band.ReadAsArray(0, 0, cols, rows).astype(float)
#set nodata values to Nan
nodata_val = band.GetNoDataValue()
print(nodata_val)
data_masked = np.ma.masked_equal(data1,nodata_val)
#determine highest elevation value und its coordinates
highest_val = data_masked.max()
geotransform = ds.GetGeoTransform()
originX = geotransform[0]
originY = geotransform[3]
pixelWidth = geotransform[1]
pixelHeight = geotransform[5]
I'm stuck and thankful for any advice.
Get the indices of the max value(s):
indices = np.where(data_masked == data_masked.max())
Also decide what to do when there are multiple cells with the maximum value.
Compute the coordinates with the transforms:
x = indices[0][0] * pixelWidth + originX

How to insert TXT data into netcdf in python

I'm new to python, so I'm sorry if I make any beginner mistakes. I'm trying to insert my text file into a netcdf.
I'm using the netcdf4 package and follow the example in this website: https://pyhogs.github.io/intro_netcdf4.html and I managed to reproduce the example (the example uses random data):
Problem: My text file contains: Lon, Lat , SST and when I try to insert this values, the netcdf file is created, however, it's not correct:
In my code I'm trying to apply a Barnes interpolation (var) or a griddata interpolation (interp).
I think this is what has to enter in my variable netcdf (maybe I'm wrong).
Here my code so far:
import os
import numpy as np
from scipy.interpolate import griddata
import matplotlib.pyplot as plt
import numpy.ma as ma
import netCDF4 as nc4
from numpy.random import uniform, seed
from metpy.interpolate import (interpolate_to_grid, remove_nan_observations, inverse_distance_to_grid, remove_repeat_coordinates)
# Open file
arq_sst = np.loadtxt(fname = "C:\\Users\\Rodrigo\\XYZ.txt", skiprows=0, delimiter=",")
# Getting the Arrays
lonf = arq_sst[:, 0]
latf = arq_sst[:, 1]
sstf = arq_sst[:, 2]
# Atmosphere level
z = [1]
#shapping grid
x_1, y_1 = np.meshgrid(lonf, latf)
#Barnes Interpolation
var = inverse_distance_to_grid(lonf, latf, sstf, x_1, y_1, r=100000, gamma=0.25, kappa=5.052, min_neighbors=3, kind='barnes')
#Or
#Another interpolation
interp = griddata((lonf, latf), sstf, (lonf[None,:], latf[:,None]), method='nearest')
#Open netcdf to write
f = nc4.Dataset('file_created.nc','w', format='NETCDF4')
#Creating group in netcdf file
tempgrp = f.createGroup('SAT_DATA')
#Specifying dimensions
tempgrp.createDimension('lon', len(lonf))
tempgrp.createDimension('lat', len(latf))
tempgrp.createDimension('z', len(z))
tempgrp.createDimension('time', None)
#Building variables
longitude = tempgrp.createVariable('Longitude', 'f4', 'lon')
latitude = tempgrp.createVariable('Latitude', 'f4', 'lat')
levels = tempgrp.createVariable('Levels', 'i4', 'z')
sst = tempgrp.createVariable('sst', 'f4', ('time', 'lon', 'lat', 'z'))
time = tempgrp.createVariable('Time', 'i4', 'time')
#Passing data into variables
longitude[:] = lonf
latitude[:] = latf
levels[:] = z
sst[0,:,:,:] = var
#get time in days since Jan 01,01
from datetime import datetime
today = datetime.today()
time_num = today.toordinal()
time[0] = time_num
#Add global attributes
f.description = "XYZ dataset containing one group"
f.history = "Created " + today.strftime("%d/%m/%y")
#Add local attributes to variable instances
longitude.units = 'degrees east'
latitude.units = 'degrees north'
time.units = 'days since Jan 01, 0001'
sst.units = 'degrees'
levels.units = 'meters'
sst.warning = 'This data is not real!'
#Closing the dataset
f.close()
Here is my text data(Header: Longitude,Latitude,SST). I decreased the number of lines to fit here:
-42.1870,-22.9940,22.4844
-37.4000,-29.9700,20.2000
-37.4200,-29.9600,20.1000
-39.1800,-30.0000,20.5000
-39.2100,-30.0000,20.4000
-39.2300,-30.0000,20.4000
-39.2200,-29.9800,20.4000
-39.2300,-29.9900,20.4000
-39.2000,-29.9800,20.4000
-39.1900,-30.0000,20.5000
-39.2800,-29.9900,20.5000
-39.2700,-29.9900,20.4000
-39.3400,-29.9700,20.5000
-39.3300,-29.9600,20.4000
-39.3100,-29.9600,20.4000
-39.3600,-29.9700,20.6000
-39.3500,-29.9900,20.4000
-39.3900,-29.9900,20.4000
-38.4600,-30.0000,20.3000
-38.4900,-29.9800,20.7000
-37.4800,-29.8800,20.4000
-37.5000,-29.8600,20.3000
-37.4600,-29.8900,20.3000
-41.3800,-29.9900,20.0000
-41.4000,-29.9900,20.1000
-41.0400,-29.9300,20.1000
-41.0200,-29.9200,20.2000
-41.0600,-29.9300,20.1000
-41.1000,-29.9400,19.9000
-41.0900,-29.9600,19.9000
-41.1100,-29.9800,19.9000
-41.1100,-29.9600,20.0000
-41.1200,-29.9400,20.0000
-41.1400,-29.9400,20.0000
-41.1600,-29.9500,20.1000
-41.1700,-29.9500,20.1000
-41.1900,-29.9700,20.0000
-41.1900,-29.9500,20.1000
-40.6800,-29.9900,20.1000
-40.7400,-29.9600,20.1000
-40.7700,-29.9700,20.1000
-40.7800,-29.9700,20.1000
-40.7100,-29.9000,20.1000
-40.7600,-29.9100,20.1000
-40.7400,-29.9000,20.1000
-40.7200,-29.9000,20.2000
-40.7600,-29.9200,20.1000
-40.7500,-29.9400,20.1000
-40.7800,-29.9100,20.2000
-40.8000,-29.9100,20.2000
-40.8100,-29.9300,20.1000
-40.8200,-29.9200,20.2000
-40.7900,-29.9300,20.2000
-40.7900,-29.9500,20.1000
-40.7700,-29.9300,20.1000
-40.8400,-29.9600,20.2000
-40.8600,-29.9600,20.3000
-40.9000,-29.9100,20.1000
-40.9100,-29.9100,20.0000
-40.3900,-29.9400,20.0000
-40.3900,-29.9200,20.0000
-40.4100,-29.9200,20.0000
-40.4100,-29.9400,20.0000
-40.3800,-29.9000,20.0000
-40.3800,-29.9200,20.0000
-40.4000,-29.9000,20.1000
-40.3700,-29.9600,20.0000
-40.3600,-29.9700,20.0000
-40.3800,-29.9800,20.0000
-40.4200,-29.9000,20.0000
-40.4300,-29.9300,20.1000
-40.4500,-29.9300,20.1000
-40.4700,-29.9300,20.0000
-40.4400,-29.9100,20.0000
-40.4500,-29.9100,20.0000
-40.4700,-29.9100,20.0000
-40.5000,-29.9400,19.9000
-40.5300,-29.9200,20.1000
-40.5100,-29.9200,20.1000
-40.4900,-29.9400,19.9000
-40.4900,-29.9200,20.0000
-40.6200,-30.0000,20.2000
-40.6000,-30.0000,20.1000
-40.6800,-29.9900,20.1000
-40.4000,-29.8400,20.1000
-40.4800,-29.8700,20.1000
-40.4500,-29.8300,20.3000
-40.4600,-29.8900,20.1000
-40.4600,-29.8700,20.0000
-40.5000,-29.8800,20.3000
-40.4900,-29.9000,20.1000
-40.5100,-29.9000,20.3000
-40.5300,-29.9000,20.2000
-40.5600,-29.8500,20.3000
-40.5800,-29.8500,20.3000
-40.6300,-29.9000,19.9000
-40.7100,-29.9000,20.1000
-40.0500,-29.9600,20.3000
-40.1100,-29.9800,20.2000
-40.1100,-30.0000,20.2000
Can anybody help me?
So there are a couple of things. First of all, you are not providing the correct equally spaced dimensions for the interpolation and the resulting netCDF file. This is how I created the space for the meshgrid, (I chose a linear space of 100 but depending on what resolution you want your data you may want to change this to whatever suits your purpose):
spacing_x = np.linspace(np.min(lonf),np.max(lonf),100)
spacing_y = np.linspace(np.min(latf),np.max(latf),100)
x_1, y_1 = np.meshgrid(spacing_x, spacing_y)
Then doing the interpolation as follows:
#Barnes Interpolation
var = inverse_distance_to_grid(lonf, latf, sstf, x_1, y_1, r=100000, gamma=0.25, kappa=5.052, min_neighbors=3, kind='barnes')
#Or
#Another interpolation
interp = griddata((lonf, latf), sstf, (x_1, y_1), method='nearest')
Finally you will want to add the linear spaces as the latitude and longitude dimensions since the interpolated data is being broadcasted to them:
#Passing data into variables
longitude[:] = x_1[0]
latitude[:] = y_1[:,0]
Another note is that for Panoply or other software to show your data in a Geo2D format, you will want to name your lat lon dimensions the same as your variables. The full code is below:
import os
import numpy as np
from scipy.interpolate import griddata
import matplotlib.pyplot as plt
import numpy.ma as ma
import netCDF4 as nc4
from numpy.random import uniform, seed
from metpy.interpolate import (interpolate_to_grid, remove_nan_observations, inverse_distance_to_grid, remove_repeat_coordinates)
# Open file
arq_sst = np.loadtxt(fname = r"C:\Users\Rodrigo\XYZ.txt", skiprows=0, delimiter=",")
# Getting the Arrays
lonf = arq_sst[:, 0]
latf = arq_sst[:, 1]
sstf = arq_sst[:, 2]
# Atmosphere level
z = [1]
#shapping grid
spacing_x = np.linspace(np.min(lonf),np.max(lonf),100)
spacing_y = np.linspace(np.min(latf),np.max(latf),100)
x_1, y_1 = np.meshgrid(spacing_x, spacing_y)
#Barnes Interpolation
var = inverse_distance_to_grid(lonf, latf, sstf, x_1, y_1, r=100000, gamma=0.25, kappa=5.052, min_neighbors=3, kind='barnes')
#Or
#Another interpolation
interp = griddata((lonf, latf), sstf, (x_1, y_1), method='nearest')
#Open netcdf to write
f = nc4.Dataset('file_created.nc','w', format='NETCDF4')
#Creating group in netcdf file
tempgrp = f.createGroup('SAT_DATA')
#Specifying dimensions
tempgrp.createDimension('longitude', len(spacing_x))
tempgrp.createDimension('latitude', len(spacing_y))
tempgrp.createDimension('z', len(z))
tempgrp.createDimension('time', None)
#Building variables
longitude = tempgrp.createVariable('longitude', 'f8', 'longitude', fill_value=np.nan)
latitude = tempgrp.createVariable('latitude', 'f8', 'latitude', fill_value=np.nan)
levels = tempgrp.createVariable('z', 'i4', 'z')
sst = tempgrp.createVariable('sst', 'f8', ('time','longitude','latitude','z'), fill_value=np.nan)
time = tempgrp.createVariable('time', 'f8', 'time', fill_value=np.nan)
#Passing data into variables
longitude[:] = x_1[0]
latitude[:] = y_1[:,0]
levels[:] = z
sst[0,:,:,:] = var
#get time in days since Jan 01,01
from datetime import datetime
today = datetime.today()
time_num = today.toordinal()
time[0] = time_num
#Add global attributes
f.description = "XYZ dataset containing one group"
f.history = "Created " + today.strftime("%d/%m/%y")
#Add local attributes to variable instances
longitude.units = 'degrees_east'
longitude.point_spacing = "even";
longitude._CoordinateAxisType = "Lon";
latitude.units = 'degrees_north'
latitude.point_spacing = "even";
latitude._CoordinateAxisType = "Lat";
time.units = "days since Jan 01, 0001";
time._ChunkSizes = [1]
sst.long_name = "SEA SURFACE TEMPERATURE"
sst.history = "From coads_climatology"
sst.units = "Deg C";
sst.missing_value = -1.0
sst._ChunkSizes = [1, 100, 100]
levels.units = 'meters'
sst.warning = 'This data is not real!'
#Closing the dataset
f.close()
Let me know if you have any questions.

get pixel coordinates of all pixels in a raster image

I have a raster image of shape 9000x10000 that has RGB bands. I use the below code to get the XY coordinates of all pixels in the image. But it is very slow. Is there a faster way to do it?
filename='file.dat'
inDs = gdal.Open(filename)
outDs = gdal.Translate('{}.xyz'.format(filename), inDs, format='XYZ', creationOptions=["ADD_HEADER_LINE=YES"])
I want to save the XY coordinates and the pixel values in a dataframe.
If your raster file has a GeoTransform attribute, you can try this:
import gdal
import pandas as pd
def ix2xy(r,c,gt):
'''Gets x,y from row and column'''
x = gt[0] + r * gt[1]
y = gt[3] + c * gt[5]
return(x,y)
This little function gets the X/Y coordinates from the GeoTransform attribute which is a tuple with (xorigin, xres, 0, yorigin, 0, yres).
ds = gdal.Open('file.dat')
gt = ds.GetGeoTransform()
df = pd.DataFrame.from_records(itertools.product(range(ds.RasterYSize),range(ds.RasterXSize)),columns=['Row','Column'])
ds = None
df['X'], df['Y'] = zip(*df.apply(lambda x: ix2xy(x['Column'],x['Row'],gt),axis=1))
This should give you a tidy dataframe with the columns Row, Column, X and Y.

How to convert a coordinates to indice array (vice versa) in a NetworkX graph?

I want to use a raster for a A* and bidirectional Dijkstra path analysis in NetworkX. I am using Python for this project.
Raster example (it's a png file converted when uploaded, but the real problem is TIFF):
First I read in the raster with GDAL
input_raster = "raster.tif"
raster = gdal.Open(input_raster)
Next I read the raster as an array
bandraster = raster.GetRasterBand(1)
arr = bandraster.ReadAsArray()
So, I'll transform coords using a function:
def coord2pixelOffset(rasterfn, x, y):
raster = gdal.Open(rasterfn)
geotransform = raster.GetGeoTransform()
originX = geotransform[0]
originY = geotransform[3]
pixelWidth = geotransform[1]
pixelHeight = geotransform[5]
xOffset = int((x - originX)/pixelWidth)
yOffset = int((y - originY)/pixelHeight)
return xOffset, yOffset
CostSurfacefn = 'raster.tif'
source_coord = (-41.1823753163, -13.83393276)
target_coord = (-40.3726182077, -14.2361991946)
# coordinates to array index
source = coord2pixelOffset(CostSurfacefn, source_coord[0], source_coord[1])
target = coord2pixelOffset(CostSurfacefn, target_coord[0], target_coord[1])
The array is like this (example):
# Grid with 2x2. The float numbers are the pixel values
[[ 1.83781120e+08 1.90789248e+08]
[ 1.83781120e+08 1.90789248e+08]]
# array[0][0] is 1.83781120e+08
# array[0][1] is 1.90789248e+08
# array[1][0] is 1.83781120e+08
# array[1][1] is 1.90789248e+08
Next, the graph is loaded and bi-dijkstra function is called (but I want for example from array[0][0] to array[1][1] ):
G = nx.from_numpy_matrix(np.array(arr))
length, path = nx.bidirectional_dijkstra(G, source, target)
How to get the node id of source and target by array?

Extract Point From Raster in GDAL

I have a raster file and a WGS84 lat/lon point.
I would like to know what value in the raster corresponds with the point.
My feeling is that I should use GetSpatialRef() on the raster object or one of its bands and then apply a ogr.osr.CoordinateTransformation() to the point to map it to the raster's space.
My hope would then be that I could simply ask the rasters' bands what is at that point.
However, the raster object doesn't seem to have a GetSpatialRef() or a way to access a geo-located point, so I'm somewhat at a loss for how to do this.
Any thoughts?
Say i have a geotiff file test.tif. Then followin code should look up value somewhere near the pixel. I am not that confident for the part looking up cell, and will fix there is error. This page should help, "GDAL Data Model"
Also, you may go to gis.stackexchange.com to find experts, if you haven't.
import gdal, osr
class looker(object):
"""let you look up pixel value"""
def __init__(self, tifname='test.tif'):
"""Give name of tif file (or other raster data?)"""
# open the raster and its spatial reference
self.ds = gdal.Open(tifname)
srRaster = osr.SpatialReference(self.ds.GetProjection())
# get the WGS84 spatial reference
srPoint = osr.SpatialReference()
srPoint.ImportFromEPSG(4326) # WGS84
# coordinate transformation
self.ct = osr.CoordinateTransformation(srPoint, srRaster)
# geotranformation and its inverse
gt = self.ds.GetGeoTransform()
dev = (gt[1]*gt[5] - gt[2]*gt[4])
gtinv = ( gt[0] , gt[5]/dev, -gt[2]/dev,
gt[3], -gt[4]/dev, gt[1]/dev)
self.gt = gt
self.gtinv = gtinv
# band as array
b = self.ds.GetRasterBand(1)
self.arr = b.ReadAsArray()
def lookup(self, lon, lat):
"""look up value at lon, lat"""
# get coordinate of the raster
xgeo,ygeo,zgeo = self.ct.TransformPoint(lon, lat, 0)
# convert it to pixel/line on band
u = xgeo - self.gtinv[0]
v = ygeo - self.gtinv[3]
# FIXME this int() is probably bad idea, there should be
# half cell size thing needed
xpix = int(self.gtinv[1] * u + self.gtinv[2] * v)
ylin = int(self.gtinv[4] * u + self.gtinv[5] * v)
# look the value up
return self.arr[ylin,xpix]
# test
l = looker('test.tif')
lon,lat = -100,30
print l.lookup(lon,lat)
lat,lon =28.816944, -96.993333
print l.lookup(lon,lat)
Yes, the API isn't consistent. The raster (the data source) has a GetProjection() method instead (which returns WKT).
Here is a function that does what you want (drawn from here):
def extract_point_from_raster(point, data_source, band_number=1):
"""Return floating-point value that corresponds to given point."""
# Convert point co-ordinates so that they are in same projection as raster
point_sr = point.GetSpatialReference()
raster_sr = osr.SpatialReference()
raster_sr.ImportFromWkt(data_source.GetProjection())
transform = osr.CoordinateTransformation(point_sr, raster_sr)
point.Transform(transform)
# Convert geographic co-ordinates to pixel co-ordinates
x, y = point.GetX(), point.GetY()
forward_transform = Affine.from_gdal(*data_source.GetGeoTransform())
reverse_transform = ~forward_transform
px, py = reverse_transform * (x, y)
px, py = int(px + 0.5), int(py + 0.5)
# Extract pixel value
band = data_source.GetRasterBand(band_number)
structval = band.ReadRaster(px, py, 1, 1, buf_type=gdal.GDT_Float32)
result = struct.unpack('f', structval)[0]
if result == band.GetNoDataValue():
result = float('nan')
return result
Its documentation is as follows (drawn from here):
spatial.extract_point_from_raster(point, data_source, band_number=1)
data_source is a GDAL raster, and point is an OGR point object. The
function returns the value of the pixel of the specified band of
data_source that is nearest to point.
point and data_source need not be in the same reference system, but
they must both have an appropriate spatial reference defined.
If the point does not fall in the raster, RuntimeError is raised.
project = self.ds.GetProjection()
srPoint = osr.SpatialReference(wkt=project)
done... with that, the vector file has adopted the projection from input raster file

Categories