unsupervised module¶
Unsupervised machine learning module.
Cluster
¶
__init__(self, data)
special
¶
Instantiate the Cluster class object.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
data |
array |
Numpy array containing geospatial and attribute data. |
required |
Source code in mygeopackage/unsupervised.py
def __init__(self,data):
"""Instantiate the Cluster class object.
Args:
data (array): Numpy array containing geospatial and attribute data.
"""
self.cluster_centers = []
self.labels = []
self.data = data
self.identifier = 0
show(self, map=None, to_display=True)
¶
Draw the object on the map with Folium.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
map |
Folium map object |
If set to None, the function will create a new map object. If given the map object, the layer will be drawn on the map. Defaults to None. |
None |
Source code in mygeopackage/unsupervised.py
def show(self,map = None, to_display=True):
"""Draw the object on the map with Folium.
Args:
map (Folium map object, optional): If set to None, the function will create a new map object. If given the map object, the layer will be drawn on the map. Defaults to None.
"""
cluster_class = np.unique(self.labels)
data = self.data
if map == None:
m = folium.Map(location=[data[0][1],data[0][0]],zoom_start=10)
else:
m = map
for label in cluster_class:
color = rgb_to_hex([randint(100, 255), randint(100, 255), randint(100, 255)])
for member in self.data[np.where(self.labels == label)]:
folium.CircleMarker([member[1],member[0]],radius=6,popup='Cluster: '+str(label),fill=True,color=color,fill_color=color,fill_opacity=1).add_to(m)
if to_display == True:
display(m)
toGeoJson(self)
¶
Convert cluster results to Geojson
Returns:
Type | Description |
---|---|
GeoJSON (str) |
Serialized JSON text. |
Source code in mygeopackage/unsupervised.py
def toGeoJson(self):
"""Convert cluster results to Geojson
Returns:
GeoJSON (str): Serialized JSON text.
"""
geojson = dict()
geojson['type'] = 'FeatureCollection'
geojson['name'] = 'K-Means Results'
geojson['features'] = []
for i in range(len(self.data)):
item = dict()
item['type'] = 'Feature'
item['properties'] = dict()
item['properties']['ID'] = self.data[i][self.identifier]
item['properties']['Class'] = int(self.labels[i])
item['geometry'] = dict()
item['geometry']['type'] = 'Point'
item['geometry']['coordinates'] = [self.data[i][0],self.data[i][1]]
geojson['features'].append(item)
#print(geojson)
return json.dumps(geojson)
dbscan(eps, min_samples, field, cluster, identifier)
¶
DBSCAN unsupervised learning for geospatial or attribute data.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
eps |
float |
The maximum distance between two samples for one to be considered as in the neighborhood of the other. |
required |
min_samples |
int |
The number of samples (or total weight) in a neighborhood for a point to be considered as a core point. This includes the point itself. |
required |
field |
int |
The index for the fields to be clustered. |
required |
cluster |
Cluster |
Cluster class object to store the results. |
required |
identifier |
int |
The index for the field as the identifier of the dataset. |
required |
Source code in mygeopackage/unsupervised.py
def dbscan(eps,min_samples,field,cluster:Cluster,identifier):
"""DBSCAN unsupervised learning for geospatial or attribute data.
Args:
eps (float): The maximum distance between two samples for one to be considered as in the neighborhood of the other.
min_samples (int): The number of samples (or total weight) in a neighborhood for a point to be considered as a core point. This includes the point itself.
field (int): The index for the fields to be clustered.
cluster (Cluster): Cluster class object to store the results.
identifier (int): The index for the field as the identifier of the dataset.
"""
_dbscan = DBSCAN(eps=eps,min_samples=min_samples).fit(cluster.data[:,field].astype(np.float64))
cluster_results = cluster
cluster_results.cluster_centers = None
cluster_results.labels = _dbscan.labels_
cluster_results.identifier = identifier
k_means(n, field, cluster, identifier)
¶
K-Means unsupervised learning for geospatial or attribute data.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
n |
int |
Desired number of clusters for K-Means analysis. |
required |
field |
int |
The index for the fields to be clustered. |
required |
cluster |
Cluster |
Cluster class object to store the results. |
required |
identifier |
int |
The index for the field as the identifier of the dataset. |
required |
Source code in mygeopackage/unsupervised.py
def k_means(n,field,cluster:Cluster,identifier):
"""K-Means unsupervised learning for geospatial or attribute data.
Args:
n (int): Desired number of clusters for K-Means analysis.
field (int): The index for the fields to be clustered.
cluster (Cluster): Cluster class object to store the results.
identifier (int): The index for the field as the identifier of the dataset.
"""
kmeans = KMeans(n_clusters=n).fit(cluster.data[:,field])
cluster_results = cluster
cluster_results.cluster_centers = kmeans.cluster_centers_
cluster_results.labels = kmeans.labels_
cluster_results.identifier = identifier
#cluster_results = new Cluster(geo.data,kmeans.cluster_centers_,kmeans.labels_)