Skip to content

unsupervised module

Unsupervised machine learning module.

Cluster

__init__(self, data) special

Instantiate the Cluster class object.

Parameters:

Name Type Description Default
data array

Numpy array containing geospatial and attribute data.

required
Source code in mygeopackage/unsupervised.py
def __init__(self,data):
    """Instantiate the Cluster class object.

    Args:
        data (array): Numpy array containing geospatial and attribute data.
    """
    self.cluster_centers = []
    self.labels = []
    self.data = data
    self.identifier = 0

show(self, map=None, to_display=True)

Draw the object on the map with Folium.

Parameters:

Name Type Description Default
map Folium map object

If set to None, the function will create a new map object. If given the map object, the layer will be drawn on the map. Defaults to None.

None
Source code in mygeopackage/unsupervised.py
def show(self,map = None, to_display=True):
    """Draw the object on the map with Folium.

    Args:
        map (Folium map object, optional): If set to None, the function will create a new map object. If given the map object, the layer will be drawn on the map. Defaults to None.
    """
    cluster_class = np.unique(self.labels)
    data = self.data
    if map == None:
        m = folium.Map(location=[data[0][1],data[0][0]],zoom_start=10)
    else:
        m = map
    for label in cluster_class:
        color = rgb_to_hex([randint(100, 255), randint(100, 255), randint(100, 255)])
        for member in self.data[np.where(self.labels == label)]:
            folium.CircleMarker([member[1],member[0]],radius=6,popup='Cluster: '+str(label),fill=True,color=color,fill_color=color,fill_opacity=1).add_to(m)

    if to_display == True:
        display(m)

toGeoJson(self)

Convert cluster results to Geojson

Returns:

Type Description
GeoJSON (str)

Serialized JSON text.

Source code in mygeopackage/unsupervised.py
def toGeoJson(self):
    """Convert cluster results to Geojson

    Returns:
        GeoJSON (str): Serialized JSON text.
    """
    geojson = dict()
    geojson['type'] = 'FeatureCollection'
    geojson['name'] = 'K-Means Results'
    geojson['features'] = []

    for i in range(len(self.data)):
        item = dict()
        item['type'] = 'Feature'
        item['properties'] = dict()
        item['properties']['ID'] = self.data[i][self.identifier]
        item['properties']['Class'] = int(self.labels[i])
        item['geometry'] = dict()
        item['geometry']['type'] = 'Point'
        item['geometry']['coordinates'] = [self.data[i][0],self.data[i][1]]
        geojson['features'].append(item)
    #print(geojson)
    return json.dumps(geojson)

dbscan(eps, min_samples, field, cluster, identifier)

DBSCAN unsupervised learning for geospatial or attribute data.

Parameters:

Name Type Description Default
eps float

The maximum distance between two samples for one to be considered as in the neighborhood of the other.

required
min_samples int

The number of samples (or total weight) in a neighborhood for a point to be considered as a core point. This includes the point itself.

required
field int

The index for the fields to be clustered.

required
cluster Cluster

Cluster class object to store the results.

required
identifier int

The index for the field as the identifier of the dataset.

required
Source code in mygeopackage/unsupervised.py
def dbscan(eps,min_samples,field,cluster:Cluster,identifier):
    """DBSCAN unsupervised learning for geospatial or attribute data.

    Args:
        eps (float): The maximum distance between two samples for one to be considered as in the neighborhood of the other.
        min_samples (int): The number of samples (or total weight) in a neighborhood for a point to be considered as a core point. This includes the point itself.
        field (int): The index for the fields to be clustered.
        cluster (Cluster): Cluster class object to store the results.
        identifier (int): The index for the field as the identifier of the dataset.
    """
    _dbscan = DBSCAN(eps=eps,min_samples=min_samples).fit(cluster.data[:,field].astype(np.float64))
    cluster_results = cluster
    cluster_results.cluster_centers = None
    cluster_results.labels = _dbscan.labels_
    cluster_results.identifier = identifier

k_means(n, field, cluster, identifier)

K-Means unsupervised learning for geospatial or attribute data.

Parameters:

Name Type Description Default
n int

Desired number of clusters for K-Means analysis.

required
field int

The index for the fields to be clustered.

required
cluster Cluster

Cluster class object to store the results.

required
identifier int

The index for the field as the identifier of the dataset.

required
Source code in mygeopackage/unsupervised.py
def k_means(n,field,cluster:Cluster,identifier):
    """K-Means unsupervised learning for geospatial or attribute data.

    Args:
        n (int): Desired number of clusters for K-Means analysis.
        field (int): The index for the fields to be clustered.
        cluster (Cluster): Cluster class object to store the results.
        identifier (int): The index for the field as the identifier of the dataset.
    """
    kmeans = KMeans(n_clusters=n).fit(cluster.data[:,field])
    cluster_results = cluster
    cluster_results.cluster_centers = kmeans.cluster_centers_
    cluster_results.labels = kmeans.labels_
    cluster_results.identifier = identifier
    #cluster_results = new Cluster(geo.data,kmeans.cluster_centers_,kmeans.labels_)

Last update: 2021-05-03