# -*- coding: utf-8 -*-
from __future__ import absolute_import, division, print_function, unicode_literals
import functools
import numpy as np
import scipy.cluster.hierarchy
from scipy.spatial import distance
[docs]def haversine(latlon1, latlon2):
r"""
Calculate the great circle distance between two points
on the earth (specified in decimal degrees)
Args:
latlon1 (tuple): (lat, lon)
latlon2 (tuple): (lat, lon)
Returns:
float : distance in kilometers
References:
en.wikipedia.org/wiki/Haversine_formula
gis.stackexchange.com/questions/81551/matching-gps-tracks
stackoverflow.com/questions/4913349/haversine-distance-gps-points
Example:
>>> from occurrence_blackbox import * # NOQA
>>> import scipy.spatial.distance as spdist
>>> import functools
>>> latlon1 = [-80.21895315, -158.81099213]
>>> latlon2 = [ 9.77816711, -17.27471498]
>>> kilometers = haversine(latlon1, latlon2)
>>> result = ('kilometers = %s' % (kilometers,))
>>> print(result)
kilometers = 11930.9093642
"""
# convert decimal degrees to radians
lat1, lon1 = np.radians(latlon1)
lat2, lon2 = np.radians(latlon2)
# haversine formula
dlon = lon2 - lon1
dlat = lat2 - lat1
a = (np.sin(dlat / 2) ** 2) + np.cos(lat1) * np.cos(lat2) * (np.sin(dlon / 2) ** 2)
c = 2 * np.arcsin(np.sqrt(a))
EARTH_RADIUS_KM = 6367
kilometers = EARTH_RADIUS_KM * c
return kilometers
[docs]def timespace_distance(pt1, pt2, km_per_sec=.02):
"""
Computes distance between two points in space and time.
Time is converted into spatial units using km_per_sec
Args:
pt1 (tuple) : (seconds, lat, lon)
pt2 (tuple) : (seconds, lat, lon)
km_per_sec (float): reasonable animal walking speed
Returns:
float: distance in kilometers
Example:
>>> from occurrence_blackbox import * # NOQA
>>> import scipy.spatial.distance as spdist
>>> import functools
>>> km_per_sec = .02
>>> latlon1 = [-80.21895315, -158.81099213]
>>> latlon2 = [ 9.77816711, -17.27471498]
>>> pt1 = [360.0] + latlon1
>>> pt2 = [0.0] + latlon2
>>> kilometers = timespace_distance(pt1, pt2)
>>> result = ('kilometers = %s' % (kilometers,))
>>> print(result)
kilometers = 2058.6323187
"""
(sec1, lat1, lon1) = pt1
(sec2, lat2, lon2) = pt2
# Get pure gps distance
km_dist = haversine((lat1, lon1), (lat2, lon2))
# Get distance in seconds and convert to km
sec_dist = np.abs(sec1 - sec2) * km_per_sec
# Add distances
timespace_dist = km_dist + sec_dist
return timespace_dist
[docs]def cluster_timespace(X_data, thresh, km_per_sec=.02):
"""
Agglometerative clustering of time/space data
Args:
X_data (ndarray) : Nx3 array where columns are (seconds, lat, lon)
thresh (float) : threshold in kilometers
References:
http://docs.scipy.org/doc/scipy-0.14.0/reference/generated/
scipy.cluster.hierarchy.linkage.html
scipy.cluster.hierarchy.fcluster.html
Notes:
# Visualize spots
http://www.darrinward.com/lat-long/?id=2009879
Example:
>>> # DISABLE_DOCTEST
>>> from occurrence_blackbox import * # NOQA
>>> # Nx1 matrix denoting groundtruth locations (for testing)
>>> X_name = np.array([0, 1, 1, 1, 1, 1, 2, 2, 2])
>>> # Nx3 matrix where each columns are (time, lat, lon)
>>> X_data = np.array([
>>> (0, 42.727985, -73.683994), # MRC
>>> (0, 42.657414, -73.774448), # Park1
>>> (0, 42.658333, -73.770993), # Park2
>>> (0, 42.654384, -73.768919), # Park3
>>> (0, 42.655039, -73.769048), # Park4
>>> (0, 42.657872, -73.764148), # Park5
>>> (0, 42.876974, -73.819311), # CP1
>>> (0, 42.862946, -73.804977), # CP2
>>> (0, 42.849809, -73.758486), # CP3
>>> ])
>>> thresh = 5.0 # kilometers
>>> X_labels = cluster_timespace(X_data, thresh)
>>> result = ('X_labels = %r' % (X_labels,))
>>> print(result)
X_labels = array([3, 2, 2, 2, 2, 2, 1, 1, 1], dtype=int32)
"""
# Compute pairwise distances between all inputs
dist_func = functools.partial(timespace_distance, km_per_sec=km_per_sec)
condenced_dist_mat = distance.pdist(X_data, dist_func)
# Compute heirarchical linkages
linkage_mat = scipy.cluster.hierarchy.linkage(condenced_dist_mat,
method='single')
# Cluster linkages
X_labels = scipy.cluster.hierarchy.fcluster(linkage_mat, thresh,
criterion='distance')
return X_labels
if __name__ == '__main__':
r"""
CommandLine:
python occurrence_blackbox.py --lat 42.727985 42.657414 42.658333 42.654384 --lon -73.683994 -73.774448 -73.770993 -73.768919 --sec 0 0 0 0
# Should return
X_labels = [2, 1, 1, 1]
"""
import argparse
parser = argparse.ArgumentParser(description='Compute agglomerative cluster')
parser.add_argument('--lat', type=float, nargs='*', help='list of latitude coords')
parser.add_argument('--lon', type=float, nargs='*', help='list of longitude coords')
parser.add_argument('--sec', type=float, nargs='*', help='list of POSIX_TIMEs in seconds')
parser.add_argument('--thresh', type=float, nargs=1, default=1., help='threshold in kilometers')
parser.add_argument('--km_per_sec', type=float, nargs=1, default=.02, help='reasonable animal speed in km/s')
args = parser.parse_args()
sec = [0] * len(args.lat) if args.sec is None else args.sec
X_data = np.vstack([sec, args.lat, args.lon]).T
X_labels = cluster_timespace(X_data, args.thresh, km_per_sec=args.km_per_sec)
print('X_labels = %r' % (X_labels.tolist(),))