Clustering near Lines using coordinates in Python

Clustering near Lines using coordinates in Python - python

I have a list with x- and y-coordinates of start and Endpoints of some lines.Lines as csv
331,178,486,232
185,215,386,308
172,343,334,419
406,128,570,165
306,106,569,166
159,210,379,299
236,143,526,248
303,83,516,178
409,62,572,106
26,287,372,427
31,288,271,381
193,228,432,330
120,196,432,329
136,200,374,297
111,189,336,289
284,186,560,249
333,202,577,254
229,194,522,219
349,111,553,165
121,322,342,416
78,303,285,391
103,315,340,415
The lines look like this on my example image. Lines plotted
I want to group lines which are close to each other into clusters and create one line for each cluster. For this example i would like to have 5 clusters. After that i want to calculate the distance from each clusterline to the next.
import csv, math
file = open("lines.csv")
csvreader = csv.reader(file)
lines = []
for data in csvreader:
lines.append({'x1':int(data[0]), 'y1':int(data[1]), 'x2':int(data[2]), 'y2':int(data[3])})
def point_delta(p1, p2):
return abs(p1 - p2)
for line in lines[:2]:
for line_rev in lines:
#x_start_delta = abs(line['x1'] - line_rev['x1'])
x_start_delta = point_delta(line['x1'], line_rev['x1'])
y_start_delta = abs(line['y1'] - line_rev['y1'])
start_distance = math.sqrt(x_start_delta**2 + y_start_delta**2)
x_end_delta = abs(line['x2'] - line_rev['x2'])
y_end_delta = abs(line['y2'] - line_rev['y2'])
end_distance = math.sqrt(x_end_delta**2 + y_end_delta**2)
avg_distance = (start_distance + end_distance)/2
cluster = 0
if avg_distance < 100:
print(f"distance: {avg_distance}")
print("############## next line ##############")
I have written some code to calculate the distance between each line but cant find a way to save the lines which are near to each other in different lists.
Does somebody know how to do this or is there another way to create clusters? Im also thinking about using the middlepoint instead of the start-/endpoint

You could throw a clustering on it, but it has trouble with the lonely line at the end
data = [[331,178,486,232],
[185,215,386,308],
[172,343,334,419],
[406,128,570,165],
[306,106,569,166],
[159,210,379,299],
[236,143,526,248],
[303,83,516,178],
[409,62,572,106],
[26,287,372,427],
[31,288,271,381],
[193,228,432,330],
[120,196,432,329],
[136,200,374,297],
[111,189,336,289],
[284,186,560,249],
[333,202,577,254],
[229,194,522,219],
[349,111,553,165],
[121,322,342,416],
[78,303,285,391],
[103,315,340,415]]
import pandas as pd
import sklearn
from sklearn.cluster import MiniBatchKMeans
import numpy as np
lines = pd.DataFrame(data)
CLUSTERS = 5
X = lines.values
kmeans = MiniBatchKMeans(n_clusters=CLUSTERS,max_no_improvement=100).fit(X)
import numpy as np
import pylab as pl
from matplotlib import collections as mc
lines_segments = [ [ (l[0],l[1]),([l[2],l[3]]) ] for l in lines.values]
center_segments = [ [ (l[0],l[1]),([l[2],l[3]]) ] for l in kmeans.cluster_centers_]
line_collection = mc.LineCollection(lines_segments, linewidths=2)
centers = mc.LineCollection(center_segments, colors='red', linewidths=4, alpha=1)
fig, ax = pl.subplots()
ax.add_collection(line_collection)
ax.add_collection(centers)
ax.autoscale()
ax.margins(0.1)
You can see the centers with
kmeans.cluster_centers_

Related

Reverse Array in a dataframe

Hi I am trying to extract data from a netCDF file, but the data is upside down. How can I reverse the database:
The data I want to extract is the height data from the (netcdf) at the points I have in the CSV file. my Data:
import numpy as np
from netCDF4 import Dataset
import matplotlib.pyplot as plt
import pandas as pd
from mpl_toolkits.basemap import Basemap
from matplotlib.patches import Path, PathPatch
csv_data = np.loadtxt('CSV with target coordinates',skiprows=1,delimiter=',')
num_el = csv_data[:,0]
lat = csv_data[:,1]
lon = csv_data[:,2]
value = csv_data[:,3]
data = Dataset("elevation Data",'r')
lon_range = data.variables['x_range'][:]
lat_range = data.variables['y_range'][:]
topo_range = data.variables['z_range'][:]
spacing = data.variables['spacing'][:]
dimension = data.variables['dimension'][:]
z = data.variables['z'][:]
lon_num = dimension[0]
lat_num = dimension[1]
etopo_lon = np.linspace(lon_range[0],lon_range[1],dimension[0])
etopo_lat = np.linspace(lat_range[0],lat_range[1],dimension[1])
topo = np.reshape(z, (lat_num, lon_num))
height = np.empty_like(num_el)
desired_lat_idx = np.empty_like(num_el)
desired_lon_idx = np.empty_like(num_el)
for i in range(len(num_el)):
tmp_lat = np.abs(etopo_lat - lat[i]).argmin()
tmp_lon = np.abs(etopo_lon - lon[i]).argmin()
desired_lat_idx[i] = tmp_lat
desired_lon_idx[i] = tmp_lon
height[i] = topo[tmp_lat,tmp_lon]
height[height<-10]=0
print(len(desired_lat_idx))
print(len(desired_lon_idx))
print(len(height))
dfl= pd.DataFrame({
'Latitude' : lat.reshape(-1),
'Longitude': lon.reshape(-1),
'Altitude': height.reshape(-1)
});
print(dfl)
# but the Lat should not be changed here (the dfl must be correct)
df =dfl
lat=np.array(df['Latitude'])
lon=np.array(df['Longitude'])
val=np.array(df['Altitude'])
m = basemap.Basemap(projection='robin', lon_0=0, lat_0=0, resolution='l',area_thresh=1000)
m.drawcoastlines(color = 'black')
x,y = m(lon,lat)
colormesh= m.contourf(x,y,val,100, tri=True, cmap = 'terrain')
plt.colorbar(location='bottom',pad=0.04,fraction=0.06)
plt.show()
I have already tried:
lat = csv_data[:,1]
lat= lat*(-1)
But this didn´t work

It's a plotting artifact().
Just do:
colormesh= m.contourf(x,y[::-1],val,100, tri=True, cmap = 'terrain')
y[::-1] will reverse the order of the y latitude elements (as opposed to the land-mass outlines; and while keeping the x longitude coordinates the same) and hence flip them.
I've often had this problem with plotting numpy image data in the past.
Your raw CSV data are unlikely to be flipped themselves (why would they be?). You should try sanity-checking them [I am not a domain expert I'm afraid]! Overlaying an actual coordinate grid can help with this.
Another way to do it is given here: Reverse Y-Axis in PyPlot
You could also therefore just do
ax = plt.gca()
ax.invert_yaxis()

DBSCAN Clustering loop

Using DB scan I am iterating through a csv with x, y, z, and id data. I would like to generate a new csv for a every combination of eps and minimum samples that occur within a set range.
The output csv should also include the number of points in the cluster, eps value, and minimum samples used as columns along with the original x, y, z, and id data. The code below will do this, but will only create a csv for the last cluster calculated.
%matplotlib notebook
from interp import *
import pandas as pd
import pandas as pdfrom sklearn.cluster import DBSCAN
from sklearn.neighbors import NearestNeighbors
from sklearn.metrics import silhouette_score
import itertools
points_file = "examples/example-1/fcr.csv"
eps_values = np.arange(0.1,0.5,0.1)
min_samples = np.arange(5,20)
dbscan_params = list(itertools.product(eps_values, min_samples))
cluster_n = []
epsvalues = []
min_samp = []
for p in dbscan_params:
dbscan_cluster = DBSCAN(eps=p[0],
min_samples=p[1]).fit(points)
epsvalues.append(p[0])
min_samp.append(p[1]), cluster_n.append(
len(np.unique(dbscan_cluster.labels_)))
df = pd.read_csv(points_file, header=None, names=["x", "y", "z", "id"])
df["cluster"] = clustering.labels_
df["eps"] = eps
df["min_sample"] = min_sam
csv_name = f'fcr_{eps}e{min_sam}m.csv'
df.to_csv(csv_name, index=False)

Finding multiple lines in a point cloud using ransac

I have a point cloud stored in a text file in the form of x and y as shown in the image. I need to find multiple lines representing each side for the shown point cloud using ransac so that I can split the points representing inliers for each line. How can I do that using ransac in python so that the result is something like the second image but with better accuracy?
import matplotlib.pyplot as plt
import numpy as np
from sklearn import linear_model, datasets
from skimage.measure import LineModelND, ransac
import pandas as pd
import math
def plot_ransac(segment_data_x, segment_data_y):
data = np.column_stack([segment_data_x, segment_data_y])
model_robust, inliers = ransac(data, min_samples=5,residual_threshold=5, max_trials=500)
def distance(x1,y1,x2,y2):
return np.sqrt((x1-x2)**2 + (y1-y2)**2)
x,y = np.loadtxt('D:/Point cloud data/Data mabna 4/Data_without_RGB/Downsampled/Walls_only_downsampled_5cm.txt', unpack=True)
x_data = np.array(x)
y_data = np.array(y)
x_segments = []
y_segments = []
distances = []
start = 0
for i in range(len(x_data)-1):
distance_to_point = distance(x_data[i], y_data[i], x_data[i+1], y_data[i+1])
distances.append(distance_to_point)
if distance_to_point > 5:
if i-start>10:
x_segments.append(x_data[start:i])
y_segments.append(y_data[start:i])
start = i+1
if i == len(x_data)-2:
if i-start>10:
x_segments.append(x_data[start:i])
y_segments.append(y_data[start:i])
# plt.plot(x_data, y_data, '.', color = 'grey')
for x_seg, y_seg in zip(x_segments, y_segments):
plt.plot(x_seg, y_seg,'.', markersize = 5)
# plot_ransac(x_seg, y_seg)
plt.axis('equal')
plt.show()

Visualizing SOM and adding labels to the map

I have been trying to apply SOM on my dataframe, my dataframe has 25 columns where each column represents a house, each house has a values for power consumption for two years, and I want to cluster the data with number of clusters = 3.
I have done the following:
import sys
sys.path.insert(0, '../')
%load_ext autoreload
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from pylab import plot,axis,show,pcolor,colorbar,bone
from matplotlib.patches import Patch
%matplotlib inline
from minisom import MiniSom
from sklearn.preprocessing import minmax_scale, scale
%autoreload 2
data1 = pd.read_excel(r"C:\Users\user\Desktop\Thesis\Tarek\Consumption.xlsx")
data1['h1'] = data1['h1'].str.split(';').str[2].astype('float')
data1['h2'] = data1['h2'].str.split(';').str[2].astype('float')
data1['h3'] = data1['h3'].str.split(';').str[2].astype('float')
data1['h4'] = data1['h4'].str.split(';').str[2].astype('float')
data1['h5'] = data1['h5'].str.split(';').str[2].astype('float')
data1['h6'] = data1['h6'].str.split(';').str[2].astype('float')
data1['h7'] = data1['h7'].str.split(';').str[2].astype('float')
data1['h8'] = data1['h8'].str.split(';').str[2].astype('float')
data1['h9'] = data1['h9'].str.split(';').str[2].astype('float')
data1['h10'] = data1['h10'].str.split(';').str[2].astype('float')
data1['h11'] = data1['h11'].str.split(';').str[2].astype('float')
data1['h12'] = data1['h12'].str.split(';').str[2].astype('float')
data1['h13'] = data1['h13'].str.split(';').str[2].astype('float')
data1['h14'] = data1['h14'].str.split(';').str[2].astype('float')
data1['h15'] = data1['h15'].str.split(';').str[2].astype('float')
data1['h16'] = data1['h16'].str.split(';').str[2].astype('float')
data1['h17'] = data1['h17'].str.split(';').str[2].astype('float')
data1['h18'] = data1['h18'].str.split(';').str[2].astype('float')
data1['h19'] = data1['h19'].str.split(';').str[2].astype('float')
data1['h20'] = data1['h20'].str.split(';').str[2].astype('float')
data1['h21'] = data1['h21'].str.split(';').str[2].astype('float')
data1['h22'] = data1['h22'].str.split(';').str[2].astype('float')
data1['h23'] = data1['h23'].str.split(';').str[2].astype('float')
data1['h24'] = data1['h24'].str.split(';').str[2].astype('float')
data1['h25'] = data1['h25'].str.split(';').str[2].astype('float')
data1.fillna(0,inplace=True)
data1=data1.round(decimals=2)
X=data1.values
som =MiniSom(x=3,y=3,input_len=25,sigma=1.0, learning_rate=0.5)
som.random_weights_init(X)
som.train_batch(data=X ,num_iteration=1000,verbose=True)
bone()
pcolor(som.distance_map().T)
colorbar()
markers = ['o' , 's','v']
colors = ['r', 'g','y']
for i, x in enumerate(X):
w = som.winner(x)
plot(w[0] + 0.5,
w[1] + 0.5,
markers[i],
markeredgecolor = colors[i],
markerfacecolor = 'None',
markersize = 10,
markeredgewidth = 2)
show()
when I am running the code, I am getting this error:
IndexError: list index out of range
please any tips to add the markers and colors in the right way without having any problems, and I would be glad if any one can help, I am a bit new to Python and tried to find a solution but I couldn`t find any.

The problem seems to be that the length of your X=data1.values is around 25 but the length of your markers and colors is only 3. So in the following for loop, when i is 3, you are trying to access markers[3] and colors[3] which throws an IndexError because both markers and colors goes up to index 2 (indexing starts from 0 in python)
for i, x in enumerate(X):
One solution is to define custom list of 25 markers and 25 colors. While you might want to define your own markers, you can leave the colors out and let the code choose automatic colors for the markeredgecolor

Python : shapely, cascaded intersections within one polygon

I'd like to split a polygon into a list of polygons corresponding to all intersections with other polygons (and intersections between themselves).
from shapely.geometry import Point
circleA = Point((0, 0)).buffer(1)
circleB = Point((1, 0)).buffer(1)
circleC = Point((1, 1)).buffer(1)
def cascaded_intersections(poly1, lst_poly):
# ???
return result
result = cascaded_intersections(circleA, (circleB, circleC))
The result should be a list of 4 Polygons, corresponding to the 4 complementary parts of A (above: [AC!B, ABC, AB!C, rest of A]).
The problem is the same than spitting a polygon into its smallest parts from a list of covering LineStrings.
How to write cascaded_intersections ?

A colleague of mine, Pascal L., found a solution :
#!/usr/bin/env python
# -*- coding:utf-8 -*-
from shapely.geometry import MultiPolygon, Polygon, Point, GeometryCollection
from shapely.ops import cascaded_union
EMPTY = GeometryCollection()
def partition(poly_a, poly_b):
"""
Splits polygons A and B into their differences and intersection.
"""
if not poly_a.intersects(poly_b):
return poly_a, poly_b, EMPTY
only_a = poly_a.difference(poly_b)
only_b = poly_b.difference(poly_a)
inter = poly_a.intersection(poly_b)
return only_a, only_b, inter
def eliminate_small_areas(poly, small_area):
"""
Eliminates tiny parts of a MultiPolygon (or Polygon)
"""
if poly.area < small_area:
return EMPTY
if isinstance(poly, Polygon):
return poly
assert isinstance(poly, MultiPolygon)
l = [p for p in poly if p.area > small_area]
if len(l) == 0:
return EMPTY
if len(l) == 1:
return l[0]
return MultiPolygon(l)
def cascaded_intersections(poly1, lst_poly):
"""
Splits Polygon poly1 into intersections of/with list of other polygons.
"""
result = [(lst_poly[0], (0,))]
for i, poly in enumerate(lst_poly[1:], start=1):
current = []
while result:
result_geometry, result_indexes = result.pop(0)
only_result, only_poly, inter = partition(result_geometry, poly)
for geometry, indexes in ((only_result, result_indexes), (inter, result_indexes + (i,))):
if not geometry.is_empty:
current.append((geometry, indexes))
current_union = cascaded_union([elt[0] for elt in current])
only_poly = poly.difference(current_union)
if not only_poly.is_empty:
current.append((only_poly, (i,)))
result = current
for r in range(len(result)-1, -1, -1):
geometry, indexes = result[r]
if poly1.intersects(geometry):
inter = poly1.intersection(geometry)
result[r] = (inter, indexes)
else:
del result[r]
only_poly1 = poly1.difference(cascaded_union([elt[0] for elt in result]))
only_poly1 = eliminate_small_areas(only_poly1, 1e-16*poly1.area)
if not only_poly1.is_empty:
result.append((only_poly1, None))
return [r[0] for r in result]
a=Point(0,0).buffer(1)
b1=Point(0,1).buffer(1)
b2=Point(1,0).buffer(1)
b3=Point(1,1).buffer(1)
result = cascaded_intersections(a, (b1,b2,b3))

Hi hi again, here's a better solution than my own, using part of gene's answer # stackexchange.com it uses shapely.ops functions cascaded_union, unary_union and polygonize.
import matplotlib.pyplot as plt
import numpy as np
import shapely.geometry as sg
from shapely.ops import cascaded_union, unary_union, polygonize
import shapely.affinity
import descartes
from itertools import combinations
circleA = sg.Point((0, 0)).buffer(1)
circleB = sg.Point((1, 0)).buffer(1)
circleC = sg.Point((1, 1)).buffer(1)
circles = [circleA,circleB,circleC]
listpoly = [a.intersection(b) for a, b in combinations(circles, 2)] #list of intersections
rings = [sg.LineString(list(pol.exterior.coords)) for pol in listpoly] #list of rings
union = unary_union(rings)
result = [geom for geom in polygonize(union)] #list all intersection geometries
multi = cascaded_union(result) #Create a single geometry out of all intersections
fin = [c.difference(multi) for c in circles] #Cut multi from circles and leave only outside geometries.
result = result + fin #add the outside geometries to the intersections geometries
#Plot settings:
plt.figure(figsize=(5,5))
ax = plt.gca()
name = 1
for e in result:
ax.add_patch(descartes.PolygonPatch(e,
fc=np.random.rand(3),
ec=None,
alpha=0.5))
ax.text(e.centroid.x,e.centroid.y,
'%s'%name,fontsize=9,
bbox=dict(facecolor='orange', alpha=0.5),
color='blue',
horizontalalignment='center')
name += 1
plt.xlim(-1.5,2.5)
plt.ylim(-1.5,2.5)
plt.show()

Como va. Hi Eric, I tried using the split function from shapely.ops. Here is the result. This is not the most time efficient or elegant solution but it works:
import matplotlib.pyplot as plt
import numpy as np #use np.random to give random RGB color to each polygon
import shapely.geometry as sg
from shapely.ops import split
import descartes
from itertools import combinations
def cascade_split(to_split,splitters): #Helper function for split recursion
'''
Return a list of all intersections between multiple polygons.
to_split: list, polygons or sub-polygons to split
splitters: list, polygons used as splitters
Returns a list of all the polygons formed by the multiple intersections.
'''
if len(splitters) == 0: # Each splitting geometry will be removed
return to_split # at the end of the function, reaching len == 0 at some point,
# only the it will return all the final splits.
new_to_split = [] # make a list that will run again though the function
for ts in to_split:
s = split(ts,splitters[0].boundary) # split geometry using the boundaries of another
for i in list(s):
new_to_split.append(i) #save the splits
splitters.remove(splitters[0]) #remove the splitting geometry to
#allow the split with the next polygon in line.
return cascade_split(new_to_split,splitters) #Use recursion to exhaust all splitting possibilities
#Create polygons, in this case circles.
circleA = sg.Point((0, 0)).buffer(1)
circleB = sg.Point((1, 0)).buffer(1)
circleC = sg.Point((1, 1)).buffer(1)
#Put all circles in list
circles = [circleA,circleB,circleC]
#The combinations tool takes the last polygon
#from list to split with the remaning polygons in list,
#creating a backwards copy of the circles list will help keep track of shapes.
back_circles = circles[::-1] #backwards copy of circles list
index_count = 0 #Keep track of which circle will get splitted
polys = [] #Final list of splitted polygons
for i in combinations(circles,len(circles)-1):
c_split = cascade_split([back_circles[index_count]],list(i)) #Use helper function here
for p in c_split:
#There will be duplicate polygon splits, the following condition will filter those:
if not any(poly.equals(p) for poly in polys):
polys.append(p)
index_count += 1
#plotting settings
plt.figure(figsize=(5,5))
ax = plt.gca()
for e in range(len(polys)):
ax.add_patch(descartes.PolygonPatch(polys[e],
fc=np.random.rand(3), #give random color to each split
ec=None,
alpha=0.5))
ax.text(polys[e].centroid.x,polys[e].centroid.y,
'%s' %(e+1),fontsize=9,
bbox=dict(facecolor='orange', alpha=0.5),
color='blue',
horizontalalignment='center')
plt.xlim(-1.5,2.5)
plt.ylim(-1.5,2.5)
plt.show()
polys #Output the polys list to see all the splits

We Keep Coding

Python is a programming language that lets you work quickly and integrate systems more effectively.

Clustering near Lines using coordinates in Python - python

Related

Reverse Array in a dataframe

DBSCAN Clustering loop

Finding multiple lines in a point cloud using ransac

Visualizing SOM and adding labels to the map

Python : shapely, cascaded intersections within one polygon

Categories

Resources