Turning a raggedly filled array into heatmap in python - python

I am working with python and trying to implement a heatmap. I have data that is being stored into a 2d array. the array is roughly array[1000][1000] but sometimes the xvalues are greater than 1000.
I am looking to make a heatmap of this array, and I cannot figure out how to do it. I have no idea how to get my data to conform since it is a ragged array not a standard size.
Here is a rough copy of my code I have been attempting
sum_data = None
#count = 0
#for in_file in file_values:
print "summing file: data"
data = []
with open('data', 'r') as in_data:
for line in in_data:
line_no_end= line.rstrip('\n')
list_container = []
list_container = line_no_end.split(",")
#print "eachRow: %s" % list_container
data.append(list_container)
if sum_data == None:
sum_data = data
else:
sum_data = [[int(sum_data[y][x]) + int(data[y][x]) for x in range(len(data[y]))] for y in range(len(data))] #makes 2d array from file
and then I was trying to get it to conform to this heatmap example
heatmap, xedges, yedges = np.histogram2d(x,y,bins = 50)
extent = [xedges[0], xedges[-1], yedges[0], yedges[-1]]
plt.clf()
plt.imshow(heatmap,extent=extent)
plt.show()
plt.savefig('output.png')

Related

Matplotlib is making positives into negatives

I'm just trying to graph some simple data and whether I try to do it with plot or subplot it comes out the same. All values in my lists are positive but the y axis is acting like a number line with only positives.
import matplotlib.pyplot as plt
xVal = []
yVal1 = []
yVal2 = []
yVal3 = []
data = []
# load data
with open(r"path", 'r') as f:
data = f.readlines()
yVal1 = data[0].split(",")
yVal2 = data[1].split(",")
yVal3 = data[2].split(",")
del yVal1[-1]
del yVal2[-1]
del yVal3[-1]
print(yVal1)
print(yVal2)
print(yVal3)
# graph dem bois
xVal = [*range(0, len(yVal1))]
'''fig, ax = plt.subplots(3)
ax[0].plot(xVal, yVal1)
ax[0].set_title("pm5")
ax[1].plot(xVal, yVal2)
ax[1].set_title("pm7.5")
ax[2].plot(xVal, yVal3)
ax[2].set_title("pm10")
fig.suptitle("Particulate Levels over time")'''
plt.plot(xVal, yVal3)
plt.show()
As per the comment by Jody Klymak I converted the string lists into float lists and it worked.
fyVal1 = [float(x) for x in yVal1]

Using Follium to Add Markers to Leaflet Map from Data Stored in Array

I'm attempting to use Folium to create a web map using Leaflet with data stored in an array. However, the markers are not being added to the map.
I'm using the code below.
centerX = 34.2104
centerY = -77.8868
def Map():
coord = [centerX, centerY]
m = folium.Map(coord, zoom_start=13)
m.save("index.html")
# Parse CSV
marker = []
with open(userInFile, 'r') as ds:
i=0
for line in ds:
i+=1
strippedLine = line.strip()
lineList = strippedLine.split(',')
if i > 1:
x = lineList[16]
y = lineList[17]
marker.append(x)
marker.append(y)
# Add markers to map
print(marker)
i = 0
while i < len(marker)-1:
folium.Marker([float(marker[i]), float(marker[i+1])], popup="new location", icon=folium.Icon(color="green")).add_to(m)
print(marker[i], marker[i+1])
i+=2
I wanted to make sure the data was being added to array correctly, below is the output of the print statements.
But none of the markers are being added to the map, only the basemap shows up. Could anyone help me find where my error is?

Only last graph is getting pasted in pdf file in python

I am reading the parameters from different CSV files and creating the graphs after comparing the parameters across the CSVs. The problem is only last graph is getting pasted in PDF for the last parameter.
with PdfPages('example.pdf') as pdf:
for arg in sys.argv[1:]:
file_reader= open(arg, "rt", encoding='ascii')
read = csv.reader(file_reader)
for row in read:
if operation_OnDut in row:
column_Result = row[10]
resultOfOperations_OnDut_List.append(column_Result)
buildNumber = row[0]
buildName_List.append(buildNumber)
N = len(resultOfOperations_OnDut_List)
ind = np.arange(N)
#Draw graph for operations performed in that TEST CASE
y = resultOfOperations_OnDut_List
width = .1
fig, ax = plt.subplots()
plt.bar(ind, y, width, label = column_Parameters, color="blue")
plt.xticks(ind, buildName_List)
plt.title("Performance and Scale")
plt.ylabel('Result of Operations')
plt.xlabel('Execution Builds')
plt.legend()
plt.tight_layout()
pdf.savefig()
plt.close()
resultOfOperations_OnDut_List = []
buildName_List = []
You probably got the indentation wrong...
Try
with PdfPages('example.pdf') as pdf:
for arg in sys.argv[1:]:
file_reader= open(arg, "rt", encoding='ascii')
read = csv.reader(file_reader)
for row in read:
if operation_OnDut in row:
column_Result = row[10]
....
# one level deeper
N = len(resultOfOperations_OnDut_List)
ind = np.arange(N)
#Draw graph for operations performed in that TEST CASE
...
Note that the section starting with N = len(resultOfOperations_OnDut_List) has been shifted four spaces to the left to be within the first for loop. If you want it to be within the second for loop add four more spaces.

IndexError: too many indices for array for an array that is definitely as big

I'm trying to make a movie by taking png images of an updating plot and stitching them together. There are three variables: degrees, ksB, and mp. Only mp changes each frame; the other two are constant. The data for mp for all times is stored in X. This is the relevant part of the code:
def plot(fname, haveMLPY=False):
# Load data from .npz file.
data = np.load(fname)
X = data["X"]
T = data["T"]
N = X.shape[1]
A = data["vipWeights"]
degrees = A.sum(1)
ksB = data["ksB"]
# Initialize a figure.
figure = plt.figure()
# Generate a plottable axis as the first subplot in 1 rows and 1 columns.
axis = figure.add_subplot(1,1,1)
# MP is the first (0th) variable. Plot one trajectory for each cell over time.
axis.plot(T, X[:,:,0], color="black")
# Decorate the plot.
axis.set_xlabel("time [hours]")
axis.set_ylabel("MP [nM]")
axis.set_title("PER mRNA concentration across all %d cells" % N)
firstInd = int(T.size / 2)
if haveMLPY:
import circadian.analysis
# Generate a and plot Signal object, which encapsulates wavelet analysis.
signal = circadian.analysis.Signal(X[firstInd:, 0, 0], T[firstInd:])
signal.showSpectrum(show=False)
files=[]
# filename for the name of the resulting movie
filename = 'animation'
mp = X[10**4-1,:,0]
from mpl_toolkits.mplot3d import Axes3D
for i in range(10**4):
print i
mp = X[i,:,0]
data2 = np.c_[degrees, ksB, mp]
# Find best fit surface for data2
# regular grid covering the domain of the data
mn = np.min(data2, axis=0)
mx = np.max(data2, axis=0)
X,Y = np.meshgrid(np.linspace(mn[0], mx[0], 20), np.linspace(mn[1], mx[1], 20))
XX = X.flatten()
YY = Y.flatten()
order = 2 # 1: linear, 2: quadratic
if order == 1:
# best-fit linear plane
A = np.c_[data2[:,0], data2[:,1], np.ones(data2.shape[0])]
C,_,_,_ = scipy.linalg.lstsq(A, data2[:,2]) # coefficients
# evaluate it on grid
Z = C[0]*X + C[1]*Y + C[2]
# or expressed using matrix/vector product
#Z = np.dot(np.c_[XX, YY, np.ones(XX.shape)], C).reshape(X.shape)
elif order == 2:
# best-fit quadratic curve
A = np.c_[np.ones(data2.shape[0]), data2[:,:2], np.prod(data2[:,:2], axis=1), data2[:,:2]**2]
C,_,_,_ = scipy.linalg.lstsq(A, data2[:,2])
# evaluate it on a grid
Z = np.dot(np.c_[np.ones(XX.shape), XX, YY, XX*YY, XX**2, YY**2], C).reshape(X.shape)
fig = plt.figure()
ax = fig.add_subplot(111, projection='3d')
ax.plot_surface(X, Y, Z, rstride=1, cstride=1, alpha=0.2)
ax.scatter(degrees, ksB, mp)
ax.set_xlabel('degrees')
ax.set_ylabel('ksB')
ax.set_zlabel('mp')
# form a filename
fname2 = '_tmp%03d.png'%i
# save the frame
savefig(fname2)
# append the filename to the list
files.append(fname2)
# call mencoder
os.system("mencoder 'mf://_tmp*.png' -mf type=png:fps=10 -ovc lavc -lavcopts vcodec=wmv2 -oac copy -o " + filename + ".mpg")
# cleanup
for fname2 in files: os.remove(fname2)
Basically, all the data is stored in X. The format X[i, i, i] means X[time, neuron, data type]. Each time through the loop, I want to update the time, but still plot mp (the 0th variable) for all the neurons.
When I run this code, I get "IndexError: too many indices for array". I asked it to print i to see when the code was going wrong. I get an error when i = 1, meaning that the code loops through once but then has the error the second time.
However, I have data for 10^4 time steps. You can see in the first line of the provided code, I access X[10**4-1, :, 0] successfully. That's why it's confusing to me why X[1,:,0] would be out of range. If anybody could explain why/help me get around this, that would be great.
The traceback error is
Traceback (most recent call last):
File"/Users/angadanand/Documents/LiClipseWorkspace/Circadian/scripts /runMeNets.py", line 196, in module
plot(fname)
File"/Users/angadanand/Documents/LiClipseWorkspace/Circadian/scripts /runMeNets.py", line 142, in plot
mp = X[i,:,0]
IndexError: too many indices for array
Thanks!
Your problem is that you overwrite your X inside your loop:
X,Y = np.meshgrid(np.linspace(mn[0], mx[0], 20), np.linspace(mn[1], mx[1], 20))
So afterwards it will have another shape and contain different data. I would suggest changing this second X to x_grid and check where you need this "other" X and where the original.
for example:
X_grid, Y_grid = np.meshgrid(np.linspace(mn[0], mx[0], 20), np.linspace(mn[1], mx[1], 20))

Interpolate latitude, longitude, data in Python (structured grid inside a circle)

I have a set of latitude, longitude points with a data-variable e.g. drive-time from an address. These points have been created by sampling a structured grid and then cutting out a circle.
As such I don't think I can have a matrix of data because some columns will have more zeros/missing than others (the top and bottom parts of the circle) which may confuse the algorithm?
Ideally, I would like to fill in the circle with more points; e.g. at 5 decimal places such that instead of having 51.5454 and 51.5455 I have 51.54540, 51.54541, .... , 51.54550.
My data looks like this:
And I would like to fill in the gaps:
I have tried using:
from scipy.interpolate import RectSphereBivariateSpline
In the following fashion - (test-case), however I am not sure if this is the correct approach in general?
def geointerp(lats, lons, data, grid_size_deg, mesh=False):
deg2rad = np.pi/180.
new_lats = np.linspace(50, 51, 180/grid_size_deg)
new_lons = np.linspace(-1, 1, 360/grid_size_deg)
new_lats, new_lons = np.meshgrid(new_lats*deg2rad, new_lons*deg2rad)
#We need to set up the interpolator object
lut = RectSphereBivariateSpline(lons*deg2rad, lats*deg2rad, data)
new_lats = new_lats.ravel()
new_lons = new_lons.ravel()
data_interp = lut.ev(new_lats,new_lons)
if mesh == True:
data_interp = data_interp.reshape((360/grid_size_deg, 180/grid_size_deg)).T
return new_lats/deg2rad, new_lons/deg2rad, data_interp
# Read in-data
lats_in = []
lons_in = []
data_in = []
with open('interpolation_test.csv') as f:
for x in csv.reader(f):
lats_in.append(float(x[0]))
lons_in.append(float(x[1]))
data_in.append(float(x[2]))
# Interpolate:
lats_in = np.asarray(lats_in)
lons_in = np.asarray(lons_in)
data_in = np.asarray(data_in)
output_list = geointerp(lats_in, lons_in, data_in, 0.01)
# Output
f = open('interpolation_test_out.csv', 'w', newline='')
w = csv.writer(f)
for out in output_list:
w.writerow([out])
f.close()
Not to mention errors such as:
"if not v.size == r.shape[1]:
IndexError: tuple index out of range"

Categories