I'm new to python, so I'm sorry if I make any beginner mistakes. I'm trying to insert my text file into a netcdf.
I'm using the netcdf4 package and follow the example in this website: https://pyhogs.github.io/intro_netcdf4.html and I managed to reproduce the example (the example uses random data):
Problem: My text file contains: Lon, Lat , SST and when I try to insert this values, the netcdf file is created, however, it's not correct:
In my code I'm trying to apply a Barnes interpolation (var) or a griddata interpolation (interp).
I think this is what has to enter in my variable netcdf (maybe I'm wrong).
Here my code so far:
import os
import numpy as np
from scipy.interpolate import griddata
import matplotlib.pyplot as plt
import numpy.ma as ma
import netCDF4 as nc4
from numpy.random import uniform, seed
from metpy.interpolate import (interpolate_to_grid, remove_nan_observations, inverse_distance_to_grid, remove_repeat_coordinates)
# Open file
arq_sst = np.loadtxt(fname = "C:\\Users\\Rodrigo\\XYZ.txt", skiprows=0, delimiter=",")
# Getting the Arrays
lonf = arq_sst[:, 0]
latf = arq_sst[:, 1]
sstf = arq_sst[:, 2]
# Atmosphere level
z = [1]
#shapping grid
x_1, y_1 = np.meshgrid(lonf, latf)
#Barnes Interpolation
var = inverse_distance_to_grid(lonf, latf, sstf, x_1, y_1, r=100000, gamma=0.25, kappa=5.052, min_neighbors=3, kind='barnes')
#Or
#Another interpolation
interp = griddata((lonf, latf), sstf, (lonf[None,:], latf[:,None]), method='nearest')
#Open netcdf to write
f = nc4.Dataset('file_created.nc','w', format='NETCDF4')
#Creating group in netcdf file
tempgrp = f.createGroup('SAT_DATA')
#Specifying dimensions
tempgrp.createDimension('lon', len(lonf))
tempgrp.createDimension('lat', len(latf))
tempgrp.createDimension('z', len(z))
tempgrp.createDimension('time', None)
#Building variables
longitude = tempgrp.createVariable('Longitude', 'f4', 'lon')
latitude = tempgrp.createVariable('Latitude', 'f4', 'lat')
levels = tempgrp.createVariable('Levels', 'i4', 'z')
sst = tempgrp.createVariable('sst', 'f4', ('time', 'lon', 'lat', 'z'))
time = tempgrp.createVariable('Time', 'i4', 'time')
#Passing data into variables
longitude[:] = lonf
latitude[:] = latf
levels[:] = z
sst[0,:,:,:] = var
#get time in days since Jan 01,01
from datetime import datetime
today = datetime.today()
time_num = today.toordinal()
time[0] = time_num
#Add global attributes
f.description = "XYZ dataset containing one group"
f.history = "Created " + today.strftime("%d/%m/%y")
#Add local attributes to variable instances
longitude.units = 'degrees east'
latitude.units = 'degrees north'
time.units = 'days since Jan 01, 0001'
sst.units = 'degrees'
levels.units = 'meters'
sst.warning = 'This data is not real!'
#Closing the dataset
f.close()
Here is my text data(Header: Longitude,Latitude,SST). I decreased the number of lines to fit here:
-42.1870,-22.9940,22.4844
-37.4000,-29.9700,20.2000
-37.4200,-29.9600,20.1000
-39.1800,-30.0000,20.5000
-39.2100,-30.0000,20.4000
-39.2300,-30.0000,20.4000
-39.2200,-29.9800,20.4000
-39.2300,-29.9900,20.4000
-39.2000,-29.9800,20.4000
-39.1900,-30.0000,20.5000
-39.2800,-29.9900,20.5000
-39.2700,-29.9900,20.4000
-39.3400,-29.9700,20.5000
-39.3300,-29.9600,20.4000
-39.3100,-29.9600,20.4000
-39.3600,-29.9700,20.6000
-39.3500,-29.9900,20.4000
-39.3900,-29.9900,20.4000
-38.4600,-30.0000,20.3000
-38.4900,-29.9800,20.7000
-37.4800,-29.8800,20.4000
-37.5000,-29.8600,20.3000
-37.4600,-29.8900,20.3000
-41.3800,-29.9900,20.0000
-41.4000,-29.9900,20.1000
-41.0400,-29.9300,20.1000
-41.0200,-29.9200,20.2000
-41.0600,-29.9300,20.1000
-41.1000,-29.9400,19.9000
-41.0900,-29.9600,19.9000
-41.1100,-29.9800,19.9000
-41.1100,-29.9600,20.0000
-41.1200,-29.9400,20.0000
-41.1400,-29.9400,20.0000
-41.1600,-29.9500,20.1000
-41.1700,-29.9500,20.1000
-41.1900,-29.9700,20.0000
-41.1900,-29.9500,20.1000
-40.6800,-29.9900,20.1000
-40.7400,-29.9600,20.1000
-40.7700,-29.9700,20.1000
-40.7800,-29.9700,20.1000
-40.7100,-29.9000,20.1000
-40.7600,-29.9100,20.1000
-40.7400,-29.9000,20.1000
-40.7200,-29.9000,20.2000
-40.7600,-29.9200,20.1000
-40.7500,-29.9400,20.1000
-40.7800,-29.9100,20.2000
-40.8000,-29.9100,20.2000
-40.8100,-29.9300,20.1000
-40.8200,-29.9200,20.2000
-40.7900,-29.9300,20.2000
-40.7900,-29.9500,20.1000
-40.7700,-29.9300,20.1000
-40.8400,-29.9600,20.2000
-40.8600,-29.9600,20.3000
-40.9000,-29.9100,20.1000
-40.9100,-29.9100,20.0000
-40.3900,-29.9400,20.0000
-40.3900,-29.9200,20.0000
-40.4100,-29.9200,20.0000
-40.4100,-29.9400,20.0000
-40.3800,-29.9000,20.0000
-40.3800,-29.9200,20.0000
-40.4000,-29.9000,20.1000
-40.3700,-29.9600,20.0000
-40.3600,-29.9700,20.0000
-40.3800,-29.9800,20.0000
-40.4200,-29.9000,20.0000
-40.4300,-29.9300,20.1000
-40.4500,-29.9300,20.1000
-40.4700,-29.9300,20.0000
-40.4400,-29.9100,20.0000
-40.4500,-29.9100,20.0000
-40.4700,-29.9100,20.0000
-40.5000,-29.9400,19.9000
-40.5300,-29.9200,20.1000
-40.5100,-29.9200,20.1000
-40.4900,-29.9400,19.9000
-40.4900,-29.9200,20.0000
-40.6200,-30.0000,20.2000
-40.6000,-30.0000,20.1000
-40.6800,-29.9900,20.1000
-40.4000,-29.8400,20.1000
-40.4800,-29.8700,20.1000
-40.4500,-29.8300,20.3000
-40.4600,-29.8900,20.1000
-40.4600,-29.8700,20.0000
-40.5000,-29.8800,20.3000
-40.4900,-29.9000,20.1000
-40.5100,-29.9000,20.3000
-40.5300,-29.9000,20.2000
-40.5600,-29.8500,20.3000
-40.5800,-29.8500,20.3000
-40.6300,-29.9000,19.9000
-40.7100,-29.9000,20.1000
-40.0500,-29.9600,20.3000
-40.1100,-29.9800,20.2000
-40.1100,-30.0000,20.2000
Can anybody help me?
So there are a couple of things. First of all, you are not providing the correct equally spaced dimensions for the interpolation and the resulting netCDF file. This is how I created the space for the meshgrid, (I chose a linear space of 100 but depending on what resolution you want your data you may want to change this to whatever suits your purpose):
spacing_x = np.linspace(np.min(lonf),np.max(lonf),100)
spacing_y = np.linspace(np.min(latf),np.max(latf),100)
x_1, y_1 = np.meshgrid(spacing_x, spacing_y)
Then doing the interpolation as follows:
#Barnes Interpolation
var = inverse_distance_to_grid(lonf, latf, sstf, x_1, y_1, r=100000, gamma=0.25, kappa=5.052, min_neighbors=3, kind='barnes')
#Or
#Another interpolation
interp = griddata((lonf, latf), sstf, (x_1, y_1), method='nearest')
Finally you will want to add the linear spaces as the latitude and longitude dimensions since the interpolated data is being broadcasted to them:
#Passing data into variables
longitude[:] = x_1[0]
latitude[:] = y_1[:,0]
Another note is that for Panoply or other software to show your data in a Geo2D format, you will want to name your lat lon dimensions the same as your variables. The full code is below:
import os
import numpy as np
from scipy.interpolate import griddata
import matplotlib.pyplot as plt
import numpy.ma as ma
import netCDF4 as nc4
from numpy.random import uniform, seed
from metpy.interpolate import (interpolate_to_grid, remove_nan_observations, inverse_distance_to_grid, remove_repeat_coordinates)
# Open file
arq_sst = np.loadtxt(fname = r"C:\Users\Rodrigo\XYZ.txt", skiprows=0, delimiter=",")
# Getting the Arrays
lonf = arq_sst[:, 0]
latf = arq_sst[:, 1]
sstf = arq_sst[:, 2]
# Atmosphere level
z = [1]
#shapping grid
spacing_x = np.linspace(np.min(lonf),np.max(lonf),100)
spacing_y = np.linspace(np.min(latf),np.max(latf),100)
x_1, y_1 = np.meshgrid(spacing_x, spacing_y)
#Barnes Interpolation
var = inverse_distance_to_grid(lonf, latf, sstf, x_1, y_1, r=100000, gamma=0.25, kappa=5.052, min_neighbors=3, kind='barnes')
#Or
#Another interpolation
interp = griddata((lonf, latf), sstf, (x_1, y_1), method='nearest')
#Open netcdf to write
f = nc4.Dataset('file_created.nc','w', format='NETCDF4')
#Creating group in netcdf file
tempgrp = f.createGroup('SAT_DATA')
#Specifying dimensions
tempgrp.createDimension('longitude', len(spacing_x))
tempgrp.createDimension('latitude', len(spacing_y))
tempgrp.createDimension('z', len(z))
tempgrp.createDimension('time', None)
#Building variables
longitude = tempgrp.createVariable('longitude', 'f8', 'longitude', fill_value=np.nan)
latitude = tempgrp.createVariable('latitude', 'f8', 'latitude', fill_value=np.nan)
levels = tempgrp.createVariable('z', 'i4', 'z')
sst = tempgrp.createVariable('sst', 'f8', ('time','longitude','latitude','z'), fill_value=np.nan)
time = tempgrp.createVariable('time', 'f8', 'time', fill_value=np.nan)
#Passing data into variables
longitude[:] = x_1[0]
latitude[:] = y_1[:,0]
levels[:] = z
sst[0,:,:,:] = var
#get time in days since Jan 01,01
from datetime import datetime
today = datetime.today()
time_num = today.toordinal()
time[0] = time_num
#Add global attributes
f.description = "XYZ dataset containing one group"
f.history = "Created " + today.strftime("%d/%m/%y")
#Add local attributes to variable instances
longitude.units = 'degrees_east'
longitude.point_spacing = "even";
longitude._CoordinateAxisType = "Lon";
latitude.units = 'degrees_north'
latitude.point_spacing = "even";
latitude._CoordinateAxisType = "Lat";
time.units = "days since Jan 01, 0001";
time._ChunkSizes = [1]
sst.long_name = "SEA SURFACE TEMPERATURE"
sst.history = "From coads_climatology"
sst.units = "Deg C";
sst.missing_value = -1.0
sst._ChunkSizes = [1, 100, 100]
levels.units = 'meters'
sst.warning = 'This data is not real!'
#Closing the dataset
f.close()
Let me know if you have any questions.
I have a raster file and a WGS84 lat/lon point.
I would like to know what value in the raster corresponds with the point.
My feeling is that I should use GetSpatialRef() on the raster object or one of its bands and then apply a ogr.osr.CoordinateTransformation() to the point to map it to the raster's space.
My hope would then be that I could simply ask the rasters' bands what is at that point.
However, the raster object doesn't seem to have a GetSpatialRef() or a way to access a geo-located point, so I'm somewhat at a loss for how to do this.
Any thoughts?
Say i have a geotiff file test.tif. Then followin code should look up value somewhere near the pixel. I am not that confident for the part looking up cell, and will fix there is error. This page should help, "GDAL Data Model"
Also, you may go to gis.stackexchange.com to find experts, if you haven't.
import gdal, osr
class looker(object):
"""let you look up pixel value"""
def __init__(self, tifname='test.tif'):
"""Give name of tif file (or other raster data?)"""
# open the raster and its spatial reference
self.ds = gdal.Open(tifname)
srRaster = osr.SpatialReference(self.ds.GetProjection())
# get the WGS84 spatial reference
srPoint = osr.SpatialReference()
srPoint.ImportFromEPSG(4326) # WGS84
# coordinate transformation
self.ct = osr.CoordinateTransformation(srPoint, srRaster)
# geotranformation and its inverse
gt = self.ds.GetGeoTransform()
dev = (gt[1]*gt[5] - gt[2]*gt[4])
gtinv = ( gt[0] , gt[5]/dev, -gt[2]/dev,
gt[3], -gt[4]/dev, gt[1]/dev)
self.gt = gt
self.gtinv = gtinv
# band as array
b = self.ds.GetRasterBand(1)
self.arr = b.ReadAsArray()
def lookup(self, lon, lat):
"""look up value at lon, lat"""
# get coordinate of the raster
xgeo,ygeo,zgeo = self.ct.TransformPoint(lon, lat, 0)
# convert it to pixel/line on band
u = xgeo - self.gtinv[0]
v = ygeo - self.gtinv[3]
# FIXME this int() is probably bad idea, there should be
# half cell size thing needed
xpix = int(self.gtinv[1] * u + self.gtinv[2] * v)
ylin = int(self.gtinv[4] * u + self.gtinv[5] * v)
# look the value up
return self.arr[ylin,xpix]
# test
l = looker('test.tif')
lon,lat = -100,30
print l.lookup(lon,lat)
lat,lon =28.816944, -96.993333
print l.lookup(lon,lat)
Yes, the API isn't consistent. The raster (the data source) has a GetProjection() method instead (which returns WKT).
Here is a function that does what you want (drawn from here):
def extract_point_from_raster(point, data_source, band_number=1):
"""Return floating-point value that corresponds to given point."""
# Convert point co-ordinates so that they are in same projection as raster
point_sr = point.GetSpatialReference()
raster_sr = osr.SpatialReference()
raster_sr.ImportFromWkt(data_source.GetProjection())
transform = osr.CoordinateTransformation(point_sr, raster_sr)
point.Transform(transform)
# Convert geographic co-ordinates to pixel co-ordinates
x, y = point.GetX(), point.GetY()
forward_transform = Affine.from_gdal(*data_source.GetGeoTransform())
reverse_transform = ~forward_transform
px, py = reverse_transform * (x, y)
px, py = int(px + 0.5), int(py + 0.5)
# Extract pixel value
band = data_source.GetRasterBand(band_number)
structval = band.ReadRaster(px, py, 1, 1, buf_type=gdal.GDT_Float32)
result = struct.unpack('f', structval)[0]
if result == band.GetNoDataValue():
result = float('nan')
return result
Its documentation is as follows (drawn from here):
spatial.extract_point_from_raster(point, data_source, band_number=1)
data_source is a GDAL raster, and point is an OGR point object. The
function returns the value of the pixel of the specified band of
data_source that is nearest to point.
point and data_source need not be in the same reference system, but
they must both have an appropriate spatial reference defined.
If the point does not fall in the raster, RuntimeError is raised.
project = self.ds.GetProjection()
srPoint = osr.SpatialReference(wkt=project)
done... with that, the vector file has adopted the projection from input raster file