これは直接的な解決策ではないかもしれませんが、より参考になります。以前に hasersine 式を使用してみましたが、それを使用して最も近い n 個の近傍のセットを計算すると、数千のポイントに使用すると耐えられないほど長くなりました。
そこで、高速検索を可能にするためにバイナリ ツリーに入れることができるハッシュ クラス (またはマッピング?) を作成しました。距離ではなく、角度 (緯度、経度) では機能しません。
find 関数は、ツリー内の最も近いポイントを見つけてから、n 個のノードが見つかるまでツリーを上っていきます。
geocode.py:
units = [(31-q, 180.0/(2 ** q)) for q in range(32)]
def bit_sum(bits):
return sum([2**bit for bit in bits])
class gpshash(object):
def __init__(self, longitude = None, latitude = None, **kwargs):
if(kwargs):
if(kwargs.has_key("longitude ") and kwargs.has_key("latitude ")):
self.longitude = geohash(degrees=kwargs["degrees"])
self.latitude = geohash(degrees=kwargs["hash"])
else:
if(longitude == None or latitude == None):
self.longitude = geohash(degrees=0)
self.latitude = geohash(degrees=0)
else:
self.longitude = geohash(degrees=longitude)
self.latitude = geohash(degrees=latitude)
long_hash = self.longitude.bin_hash
lat_hash = self.latitude.bin_hash
hash_str = ""
if(len(long_hash) == len(lat_hash)):
for i in range(len(long_hash)):
hash_str += (long_hash[i]+lat_hash[i])
self.bin_hash = hash_str
def __str__(self):
return "%s, %s" % (str(self.longitude.hash), str(self.latitude.hash))
def __repr__(self):
return str("<gpshash long: %f lat: %f>" % (self.longitude.degrees, self.latitude.degrees))
def __eq__(self, other):
if(isinstance(self, gpshash) and isinstance(other, gpshash)):
return (((self.longitude._hash ^ other.longitude._hash) == 0) and ((self.latitude._hash ^ other.latitude._hash) == 0))
else:
return False
class geohash(object):
def __init__(self, degrees = 0, **kwargs):
if(kwargs):
if(kwargs.has_key("degrees")):
self.degrees = kwargs["degrees"] % 360
self._hash = self.encode()
elif(kwargs.has_key("hash")):
self._hash = kwargs["hash"] % ((2 << 31) - 1)
self.degrees = self.decode()
else:
self.degrees = degrees % 360
self._hash = self.encode()
def __str__(self):
return str(self.hash)
def __repr__(self):
return str("<geohash degrees: %f hash: %s>" % (self.degrees, self.hash))
def __add__(self, other):
return geohash(hash=(self._hash + other._hash))
def __sub__(self, other):
return geohash(hash=(self._hash - other._hash))
def __xor__(self, other):
return geohash(hash=(self._hash ^ other._hash))
def __eq__(self, other):
if(isinstance(self, geohash) and isinstance(other, geohash)):
return ((self._hash ^ other._hash) == 0)
else:
return False
def encode(self):
lesser = filter(lambda (bit, angle): self.degrees >= angle, units)
combined = reduce(lambda (bits, angles), (bit, angle): (bits+[bit], angles + angle) if((angles + angle) <= self.degrees) else (bits, angles), lesser, ([], 0))
return bit_sum(combined[0])
def decode(self):
lesser = filter(lambda (bit, angle): self._hash>= (2 ** bit), units)
combined = reduce(lambda (bits, angles), (bit, angle): (bits+[bit], angles + angle) if((bit_sum(bits+[bit])) <= self._hash) else (bits, angles), lesser, ([], 0))
return combined[1]
@property
def hash(self):
self._hash = self.encode()
return "%08x" % self._hash
@property
def inv_hash(self):
self._inv_hash = self.decode()
return self._inv_hash
@property
def bin_hash(self):
self._bin_hash = bin(self._hash)[2:].zfill(32)
return self._bin_hash
class gdict(object):
'''
Base Geo Dictionary
Critical Components taken from Python26\Lib\UserDict.py
'''
__slots__ = ["parent", "left", "right", "hash_type"]
hash_type = None
def __init__(self, ihash=None, iparent=None):
def set_helper(iparent, cur_hex, hex_list):
ret_bin_tree = self.__class__(None, iparent)
if(hex_list):
ret_bin_tree.set_child(cur_hex, set_helper(ret_bin_tree, hex_list[0], hex_list[1:]))
return ret_bin_tree
elif(cur_hex):
ret_bin_tree.set_child(cur_hex, ihash)
return ret_bin_tree
self.parent = self
self.left = None
self.right = None
if(iparent != None):
self.parent = iparent
if(isinstance(ihash, self.hash_type)):
ilist = list(ihash.bin_hash)
if(len(ilist) > 1):
ret_bin_tree = set_helper(self, ilist[1], ilist[2:])
if(ret_bin_tree):
self.set_child(ilist[0], ret_bin_tree)
def set_child(self, istr, ichild):
if(istr == "0"):
self.left = ichild
elif(istr == "1"):
self.right = ichild
def get_child(self, istr):
if(istr == "0"):
return self.left
elif(istr == "1"):
return self.right
else:
return ""
def __repr__(self):
def repr_print_helper(ibin_tree, fmt_str = "", depth = 1):
if(not isinstance(ibin_tree, self.__class__)):
fmt_str += "\n"
fmt_str += ("%%%ds" % (depth)) % ""
fmt_str += ibin_tree.__repr__()
else:
if((ibin_tree.left != None and ibin_tree.right == None) or (ibin_tree.left == None and ibin_tree.right != None)):
if(ibin_tree.left != None):
fmt_str += "0"
fmt_str = repr_print_helper(ibin_tree.left, fmt_str, depth + 1)
elif(ibin_tree.right != None):
fmt_str += "1"
fmt_str = repr_print_helper(ibin_tree.right, fmt_str, depth + 1)
else:
if(ibin_tree.left != None):
fmt_str += "\n"
fmt_str += ("%%%ds" % (depth - 1)) % ""
fmt_str += "0"
fmt_str = repr_print_helper(ibin_tree.left, fmt_str, depth + 1)
if(ibin_tree.right != None):
fmt_str += "\n"
fmt_str += ("%%%ds" % (depth - 1)) % ""
fmt_str += "1"
fmt_str = repr_print_helper(ibin_tree.right, fmt_str, depth + 1)
return fmt_str
return repr_print_helper(self)
def find(self, ihash, itarget = 1):
class flat(list):
pass
def find_helper_base(iparent, ibin_tree, ihash):
ret_find = None
if(isinstance(ibin_tree, self.hash_type)):
ret_find = iparent
elif(len(ihash) > 0):
if(ibin_tree.get_child(ihash[0])):
ret_find = find_helper_base(ibin_tree, ibin_tree.get_child(ihash[0]), ihash[1:])
else:
ret_find = ibin_tree
return ret_find
def up_find(iflat, iparent, ibin_tree, ibias = "0"):
if((ibin_tree != iparent) and (len(iflat) < itarget)):
if(iparent.left):
if(len(iflat) >= itarget):
return
if(iparent.left != ibin_tree):
down_find(iflat, iparent.left, ibias)
if(iparent.right):
if(len(iflat) >= itarget):
return
if(iparent.right != ibin_tree):
down_find(iflat, iparent.right, ibias)
up_find(iflat, ibin_tree.parent.parent, ibin_tree.parent, ibias)
def down_find(iflat, ibin_tree, ibias = "0"):
if(len(iflat) >= itarget):
return
elif(isinstance(ibin_tree, self.hash_type)):
iflat += [ibin_tree]
else:
if(ibias == "1"):
if(ibin_tree.left):
down_find(iflat, ibin_tree.left, ibias)
if(ibin_tree.right):
down_find(iflat, ibin_tree.right, ibias)
else:
if(ibin_tree.right):
down_find(iflat, ibin_tree.right, ibias)
if(ibin_tree.left):
down_find(iflat, ibin_tree.left, ibias)
if(isinstance(ihash, self.hash_type)):
flatter = flat()
hasher = ihash.bin_hash
base = find_helper_base(self, self.get_child(hasher[0]), hasher[1:])
if(base):
down_find(flatter, base)
bias = flatter[0].bin_hash[0]
up_find(flatter, base.parent, base, bias)
return list(flatter)
def merge(self, from_bin_tree):
def merge_helper(to_bin_tree, from_bin_tree):
if(isinstance(from_bin_tree, self.__class__)):
if(from_bin_tree.left != None):
if(to_bin_tree.left != None):
merge_helper(to_bin_tree.left, from_bin_tree.left)
else:
from_bin_tree.left.parent = to_bin_tree
to_bin_tree.left = from_bin_tree.left
elif(from_bin_tree.right != None):
if(to_bin_tree.right != None):
merge_helper(to_bin_tree.right, from_bin_tree.right)
else:
from_bin_tree.right.parent = to_bin_tree
to_bin_tree.right = from_bin_tree.right
merge_helper(self, from_bin_tree)
class geodict(gdict):
'''
Geo Dictionary
'''
hash_type = geohash
def __init__(self, ihash=None, iparent=None):
gdict.__init__(self, ihash, iparent)
class gpsdict(gdict):
'''
GPS Dictionary
'''
hash_type = gpshash
def __init__(self, ihash=None, iparent=None):
gdict.__init__(self, ihash, iparent)
if(__name__ == "__main__"):
gpses = \
[
gpshash(90, 90),
gpshash(68, 24),
gpshash(144, 60),
gpshash(48, 91),
gpshash(32, 105),
gpshash(32, 150),
gpshash(167, 20),
gpshash(49, 116),
gpshash(81, 82),
gpshash(63, 79),
gpshash(129, 76)
]
base_dict = gpsdict()
for cur_hash in gpses:
base_dict.merge(gpsdict(cur_hash ))
print "All locations added:"
print base_dict
print ""
print "Trying to find 3 nearest points to:"
to_find = gpshash(60, 20)
print to_find.__repr__()
found = base_dict.find(to_find, 3)
print ""
print "Found the following:"
for x in found:
print x.__repr__()