Source code for ibeis.model.hots.neighbor_index

# -*- coding: utf-8 -*-
"""
TODO:
    Remove Bloat

module which handles the building and caching of individual flann indexes


CommandLine:
    # Runs the incremental query test
    # {0:testdb1, 1:PZ_MTEST, 2:GZ_ALL, 3:PZ_Master0}
    python -m ibeis.model.hots.qt_inc_automatch --test-test_inc_query:0
    python -m ibeis.model.hots.qt_inc_automatch --test-test_inc_query:1
    python -m ibeis.model.hots.qt_inc_automatch --test-test_inc_query:2
    python -m ibeis.model.hots.qt_inc_automatch --test-test_inc_query:3

    utprof.py -m ibeis.model.hots.qt_inc_automatch --test-test_inc_query:0
    utprof.py -m ibeis.model.hots.qt_inc_automatch --test-test_inc_query:1
    utprof.py -m ibeis.model.hots.qt_inc_automatch --test-test_inc_query:3

"""
from __future__ import absolute_import, division, print_function
import six
import numpy as np
import utool as ut
import pyflann
#import lockfile
from os.path import join
from os.path import basename
from six.moves import range, zip, map  # NOQA
import vtool as vt
from ibeis.model.hots import hstypes
from ibeis.model.hots import _pipeline_helpers as plh  # NOQA
(print, rrr, profile) = ut.inject2(__name__, '[neighbor_index]', DEBUG=False)

USE_HOTSPOTTER_CACHE = not ut.get_argflag('--nocache-hs')
NOCACHE_FLANN = ut.get_argflag('--nocache-flann') and USE_HOTSPOTTER_CACHE
NOSAVE_FLANN = ut.get_argflag('--nosave-flann')
NOCACHE_UUIDS = ut.get_argflag('--nocache-uuids') and USE_HOTSPOTTER_CACHE

# LRU cache for nn_indexers. Ensures that only a few are ever in memory
#MAX_NEIGHBOR_CACHE_SIZE = ut.get_argval('--max-neighbor-cachesize', type_=int, default=2)
MAX_NEIGHBOR_CACHE_SIZE = ut.get_argval('--max-neighbor-cachesize', type_=int, default=1)
NEIGHBOR_CACHE = ut.get_lru_cache(MAX_NEIGHBOR_CACHE_SIZE)
# Background process for building indexes
CURRENT_THREAD = None
# Global map to keep track of UUID lists with prebuild indexers.
UUID_MAP = ut.ddict(dict)


[docs]class UUIDMapHyrbridCache(object): """ Class that lets multiple ways of writing to the uuid_map be swapped in and out interchangably TODO: the global read / write should periodically sync itself to disk and it should be loaded from disk initially """ def __init__(self): self.uuid_maps = ut.ddict(dict) #self.uuid_map_fpath = uuid_map_fpath #self.init(uuid_map_fpath, min_reindex_thresh)
[docs] def init(self, *args, **kwargs): self.args = args self.kwargs = kwargs #self.read_func = self.read_uuid_map_cpkl #self.write_func = self.write_uuid_map_cpkl self.read_func = self.read_uuid_map_dict self.write_func = self.write_uuid_map_dict #def __call__(self): # return self.read_func(*self.args, **self.kwargs)
[docs] def dump(self, cachedir): # TODO: DUMP AND LOAD THIS HYBRID CACHE TO DISK #write_uuid_map_cpkl fname = 'uuid_maps_hybrid_cache.cPkl' cpkl_fpath = join(cachedir, fname) ut.lock_and_save_cPkl(cpkl_fpath, self.uuid_maps)
[docs] def load(self, cachedir): """ Returns a cache UUIDMap """ fname = 'uuid_maps_hybrid_cache.cPkl' cpkl_fpath = join(cachedir, fname) self.uuid_maps = ut.lock_and_load_cPkl(cpkl_fpath) #def __setitem__(self, daids_hashid, visual_uuid_list): # uuid_map_fpath = self.uuid_map_fpath # self.write_func(uuid_map_fpath, visual_uuid_list, daids_hashid) #@profile #def read_uuid_map_shelf(self, uuid_map_fpath, min_reindex_thresh): # #with ut.EmbedOnException(): # with lockfile.LockFile(uuid_map_fpath + '.lock'): # with ut.shelf_open(uuid_map_fpath) as uuid_map: # candidate_uuids = { # key: val for key, val in six.iteritems(uuid_map) # if len(val) >= min_reindex_thresh # } # return candidate_uuids #@profile #def write_uuid_map_shelf(self, uuid_map_fpath, visual_uuid_list, daids_hashid): # print('Writing %d visual uuids to uuid map' % (len(visual_uuid_list))) # with lockfile.LockFile(uuid_map_fpath + '.lock'): # with ut.shelf_open(uuid_map_fpath) as uuid_map: # uuid_map[daids_hashid] = visual_uuid_list #@profile #def read_uuid_map_cpkl(self, uuid_map_fpath, min_reindex_thresh): # with lockfile.LockFile(uuid_map_fpath + '.lock'): # #with ut.shelf_open(uuid_map_fpath) as uuid_map: # try: # uuid_map = ut.load_cPkl(uuid_map_fpath) # candidate_uuids = { # key: val for key, val in six.iteritems(uuid_map) # if len(val) >= min_reindex_thresh # } # except IOError: # return {} # return candidate_uuids #@profile #def write_uuid_map_cpkl(self, uuid_map_fpath, visual_uuid_list, daids_hashid): # """ # let the multi-indexer know about any big caches we've made multi-indexer. # Also lets nnindexer know about other prebuilt indexers so it can attempt to # just add points to them as to avoid a rebuild. # """ # print('Writing %d visual uuids to uuid map' % (len(visual_uuid_list))) # with lockfile.LockFile(uuid_map_fpath + '.lock'): # try: # uuid_map = ut.load_cPkl(uuid_map_fpath) # except IOError: # uuid_map = {} # uuid_map[daids_hashid] = visual_uuid_list # ut.save_cPkl(uuid_map_fpath, uuid_map)
@profile
[docs] def read_uuid_map_dict(self, uuid_map_fpath, min_reindex_thresh): """ uses in memory dictionary instead of disk """ uuid_map = self.uuid_maps[uuid_map_fpath] candidate_uuids = { key: val for key, val in six.iteritems(uuid_map) if len(val) >= min_reindex_thresh } return candidate_uuids
@profile
[docs] def write_uuid_map_dict(self, uuid_map_fpath, visual_uuid_list, daids_hashid): """ uses in memory dictionary instead of disk let the multi-indexer know about any big caches we've made multi-indexer. Also lets nnindexer know about other prebuilt indexers so it can attempt to just add points to them as to avoid a rebuild. """ if NOCACHE_UUIDS: print('uuid cache is off') return #with ut.EmbedOnException(): uuid_map = self.uuid_maps[uuid_map_fpath] uuid_map[daids_hashid] = visual_uuid_list
UUID_MAP_CACHE = UUIDMapHyrbridCache() #@profile
[docs]def get_nnindexer_uuid_map_fpath(qreq_): """ Example: >>> # ENABLE_DOCTEST >>> from ibeis.model.hots.neighbor_index import * # NOQA >>> # build test data >>> ibs, qreq_ = plh.get_pipeline_testdata(defaultdb='testdb1', preload=False) >>> uuid_map_fpath = get_nnindexer_uuid_map_fpath(qreq_) >>> result = str(ut.path_ndir_split(uuid_map_fpath, 3)) >>> print(result) .../_ibeis_cache/flann/uuid_map_FLANN(8_kdtrees)_FEAT(hesaff+sift_)_CHIP(sz450).cPkl """ flann_cachedir = qreq_.ibs.get_flann_cachedir() # Have uuid shelf conditioned on the baseline flann and feature parameters flann_cfgstr = qreq_.qparams.flann_cfgstr feat_cfgstr = qreq_.qparams.feat_cfgstr uuid_map_cfgstr = ''.join((flann_cfgstr, feat_cfgstr)) #uuid_map_ext = '.shelf' uuid_map_ext = '.cPkl' uuid_map_prefix = 'uuid_map' uuid_map_fname = ut.consensed_cfgstr(uuid_map_prefix, uuid_map_cfgstr) + uuid_map_ext uuid_map_fpath = join(flann_cachedir, uuid_map_fname) return uuid_map_fpath
[docs]def clear_memcache(): global NEIGHBOR_CACHE NEIGHBOR_CACHE.clear() #@profile
[docs]def clear_uuid_cache(qreq_): """ CommandLine: python -m ibeis.model.hots.neighbor_index --test-clear_uuid_cache Example: >>> # DISABLE_DOCTEST >>> from ibeis.model.hots.neighbor_index import * # NOQA >>> import ibeis >>> # build test data >>> cfgdict = dict(fg_on=False) >>> ibs, qreq_ = plh.get_pipeline_testdata(defaultdb='testdb1', preload=False, cfgdict=cfgdict) >>> # execute function >>> fgws_list = clear_uuid_cache(qreq_) >>> # verify results >>> result = str(fgws_list) >>> print(result) """ print('[nnindex] clearing uuid cache') uuid_map_fpath = get_nnindexer_uuid_map_fpath(qreq_) ut.delete(uuid_map_fpath) ut.delete(uuid_map_fpath + '.lock') print('[nnindex] finished uuid cache clear')
[docs]def request_ibeis_nnindexer(qreq_, verbose=True, use_memcache=True, force_rebuild=False): """ CALLED BY QUERYREQUST::LOAD_INDEXER FIXME: and use params from qparams instead of ibs.cfg IBEIS interface into neighbor_index Args: qreq_ (QueryRequest): hyper-parameters Returns: NeighborIndexer: nnindexer CommandLine: python -m ibeis.model.hots.neighbor_index --test-request_ibeis_nnindexer Example: >>> # ENABLE_DOCTEST >>> from ibeis.model.hots.neighbor_index import * # NOQA >>> nnindexer, qreq_, ibs = test_nnindexer(None) >>> nnindexer = request_ibeis_nnindexer(qreq_) """ daid_list = qreq_.get_internal_daids() if not hasattr(qreq_.qparams, 'use_augmented_indexer'): qreq_.qparams.use_augmented_indexer = True if qreq_.qparams.use_augmented_indexer: nnindexer = request_augmented_ibeis_nnindexer(qreq_, daid_list, verbose=verbose, use_memcache=use_memcache, force_rebuild=force_rebuild) else: nnindexer = request_memcached_ibeis_nnindexer(qreq_, daid_list, verbose=verbose, use_memcache=use_memcache, force_rebuild=force_rebuild) return nnindexer #@profile
[docs]def request_augmented_ibeis_nnindexer(qreq_, daid_list, verbose=True, use_memcache=True, force_rebuild=False, memtrack=None): """ DO NOT USE. THIS FUNCTION CAN CURRENTLY CAUSE A SEGFAULT tries to give you an indexer for the requested daids using the least amount of computation possible. By loading and adding to a partially build nnindex if possible and if that fails fallbs back to request_memcache. Args: qreq_ (QueryRequest): query request object with hyper-parameters daid_list (list): Returns: str: nnindex_cfgstr CommandLine: python -m ibeis.model.hots.neighbor_index --test-request_augmented_ibeis_nnindexer Example: >>> # ENABLE_DOCTEST >>> from ibeis.model.hots.neighbor_index import * # NOQA >>> import ibeis >>> # build test data >>> ZEB_PLAIN = ibeis.const.Species.ZEB_PLAIN >>> ibs = ibeis.opendb('testdb1') >>> use_memcache, max_covers, verbose = True, None, True >>> daid_list = ibs.get_valid_aids(species=ZEB_PLAIN)[0:6] >>> qreq_ = ibs.new_query_request(daid_list, daid_list) >>> qreq_.qparams.min_reindex_thresh = 1 >>> min_reindex_thresh = qreq_.qparams.min_reindex_thresh >>> # CLEAR CACHE for clean test >>> clear_uuid_cache(qreq_) >>> # LOAD 3 AIDS INTO CACHE >>> aid_list = ibs.get_valid_aids(species=ZEB_PLAIN)[0:3] >>> # Should fallback >>> nnindexer = request_augmented_ibeis_nnindexer(qreq_, aid_list) >>> # assert the fallback >>> uncovered_aids, covered_aids_list = group_daids_by_cached_nnindexer( ... qreq_, daid_list, min_reindex_thresh, max_covers) >>> result2 = uncovered_aids, covered_aids_list >>> ut.assert_eq(result2, ([4, 5, 6], [[1, 2, 3]]), 'pre augment') >>> # Should augment >>> nnindexer = request_augmented_ibeis_nnindexer(qreq_, daid_list) >>> uncovered_aids, covered_aids_list = group_daids_by_cached_nnindexer( ... qreq_, daid_list, min_reindex_thresh, max_covers) >>> result3 = uncovered_aids, covered_aids_list >>> ut.assert_eq(result3, ([], [[1, 2, 3, 4, 5, 6]]), 'post augment') >>> # Should fallback >>> nnindexer2 = request_augmented_ibeis_nnindexer(qreq_, daid_list) >>> assert nnindexer is nnindexer2 """ global NEIGHBOR_CACHE min_reindex_thresh = qreq_.qparams.min_reindex_thresh if not force_rebuild: new_daid_list, covered_aids_list = group_daids_by_cached_nnindexer( qreq_, daid_list, min_reindex_thresh, max_covers=1) can_augment = ( len(covered_aids_list) > 0 and not ut.list_set_equal(covered_aids_list[0], daid_list)) else: can_augment = False if verbose: print('[aug] Requesting augmented nnindexer') if can_augment: covered_aids = covered_aids_list[0] #with ut.PrintStartEndContext('AUGMENTING NNINDEX', verbose=verbose): # with ut.Indenter('| '): if verbose: print('[aug] Augmenting index %r old daids with %d new daids' % (len(covered_aids), len(new_daid_list))) # Load the base covered indexer # THIS SHOULD LOAD NOT REBUILD IF THE UUIDS ARE COVERED base_nnindexer = request_memcached_ibeis_nnindexer(qreq_, covered_aids, verbose=verbose, use_memcache=use_memcache) # Remove this indexer from the memcache because we are going to change it if NEIGHBOR_CACHE.has_key(base_nnindexer.cfgstr): # NOQA print('Removing key from memcache') NEIGHBOR_CACHE[base_nnindexer.cfgstr] = None del NEIGHBOR_CACHE[base_nnindexer.cfgstr] new_vecs_list, new_fgws_list = get_support_data(qreq_, new_daid_list) base_nnindexer.add_support(new_daid_list, new_vecs_list, new_fgws_list, verbose=True) # FIXME: pointer issues nnindexer = base_nnindexer # Change to the new cfgstr nnindex_cfgstr = build_nnindex_cfgstr(qreq_, daid_list) nnindexer.cfgstr = nnindex_cfgstr cachedir = qreq_.ibs.get_flann_cachedir() nnindexer.save(cachedir) # Write to inverse uuid if len(daid_list) > min_reindex_thresh: uuid_map_fpath = get_nnindexer_uuid_map_fpath(qreq_) daids_hashid = get_data_cfgstr(qreq_.ibs, daid_list) visual_uuid_list = qreq_.ibs.get_annot_visual_uuids(daid_list) UUID_MAP_CACHE.write_uuid_map_dict(uuid_map_fpath, visual_uuid_list, daids_hashid) # Write to memcache if ut.VERBOSE: print('[aug] Wrote to memcache=%r' % (nnindex_cfgstr,)) NEIGHBOR_CACHE[nnindex_cfgstr] = nnindexer return nnindexer else: #if ut.VERBOSE: if verbose: print('[aug] Nothing to augment, fallback to memcache') # Fallback nnindexer = request_memcached_ibeis_nnindexer( qreq_, daid_list, verbose=verbose, use_memcache=use_memcache, force_rebuild=force_rebuild, memtrack=memtrack ) return nnindexer #@profile
[docs]def request_memcached_ibeis_nnindexer(qreq_, daid_list, use_memcache=True, verbose=ut.NOT_QUIET, veryverbose=False, force_rebuild=False, allow_memfallback=True, memtrack=None): """ FOR INTERNAL USE ONLY takes custom daid list. might not be the same as what is in qreq_ CommandLine: python -m ibeis.model.hots.neighbor_index --test-request_memcached_ibeis_nnindexer Example: >>> # DISABLE_DOCTEST >>> from ibeis.model.hots.neighbor_index import * # NOQA >>> import ibeis >>> # build test data >>> ibs = ibeis.opendb('testdb1') >>> qreq_.qparams.min_reindex_thresh = 3 >>> ZEB_PLAIN = ibeis.const.Species.ZEB_PLAIN >>> daid_list = ibs.get_valid_aids(species=ZEB_PLAIN)[0:3] >>> qreq_ = ibs.new_query_request(daid_list, daid_list) >>> verbose = True >>> use_memcache = True >>> # execute function >>> nnindexer = request_memcached_ibeis_nnindexer(qreq_, daid_list, use_memcache) >>> # verify results >>> result = str(nnindexer) >>> print(result) """ global NEIGHBOR_CACHE #try: if veryverbose: print('[nnindex.MEMCACHE] len(NEIGHBOR_CACHE) = %r' % (len(NEIGHBOR_CACHE),)) # the lru cache wont be recognized by get_object_size_str, cast to pure python objects print('[nnindex.MEMCACHE] size(NEIGHBOR_CACHE) = %s' % (ut.get_object_size_str(NEIGHBOR_CACHE.items()),)) #if memtrack is not None: # memtrack.report('IN REQUEST MEMCACHE') nnindex_cfgstr = build_nnindex_cfgstr(qreq_, daid_list) # neighbor memory cache if not force_rebuild and use_memcache and NEIGHBOR_CACHE.has_key(nnindex_cfgstr): # NOQA (has_key is for a lru cache) if veryverbose or ut.VERYVERBOSE: print('... nnindex memcache hit: cfgstr=%s' % (nnindex_cfgstr,)) nnindexer = NEIGHBOR_CACHE[nnindex_cfgstr] else: if veryverbose or ut.VERYVERBOSE: print('... nnindex memcache miss: cfgstr=%s' % (nnindex_cfgstr,)) # Write to inverse uuid nnindexer = request_diskcached_ibeis_nnindexer( qreq_, daid_list, nnindex_cfgstr, verbose, force_rebuild=force_rebuild, memtrack=memtrack) NEIGHBOR_CACHE_WRITE = True if NEIGHBOR_CACHE_WRITE: # Write to memcache if ut.VERBOSE or ut.VERYVERBOSE: print('[disk] Write to memcache=%r' % (nnindex_cfgstr,)) NEIGHBOR_CACHE[nnindex_cfgstr] = nnindexer else: if ut.VERBOSE or ut.VERYVERBOSE: print('[disk] Did not write to memcache=%r' % (nnindex_cfgstr,)) return nnindexer #@profile
[docs]def request_diskcached_ibeis_nnindexer(qreq_, daid_list, nnindex_cfgstr=None, verbose=True, force_rebuild=False, memtrack=None): """ builds new NeighborIndexer which will try to use a disk cached flann if available Args: qreq_ (QueryRequest): query request object with hyper-parameters daid_list (list): nnindex_cfgstr (?): verbose (bool): Returns: NeighborIndexer: nnindexer CommandLine: python -m ibeis.model.hots.neighbor_index --test-request_diskcached_ibeis_nnindexer Example: >>> # DISABLE_DOCTEST >>> from ibeis.model.hots.neighbor_index import * # NOQA >>> import ibeis >>> # build test data >>> ibs = ibeis.opendb('testdb1') >>> daid_list = ibs.get_valid_aids(species=ibeis.const.Species.ZEB_PLAIN) >>> qreq_ = ibs.new_query_request(daid_list, daid_list) >>> nnindex_cfgstr = build_nnindex_cfgstr(qreq_, daid_list) >>> verbose = True >>> # execute function >>> nnindexer = request_diskcached_ibeis_nnindexer(qreq_, daid_list, nnindex_cfgstr, verbose) >>> # verify results >>> result = str(nnindexer) >>> print(result) """ if nnindex_cfgstr is None: nnindex_cfgstr = build_nnindex_cfgstr(qreq_, daid_list) cfgstr = nnindex_cfgstr cachedir = qreq_.ibs.get_flann_cachedir() flann_params = qreq_.qparams.flann_params flann_params['checks'] = qreq_.qparams.checks #if memtrack is not None: # memtrack.report('[PRE SUPPORT]') # Get annot descriptors to index print('[nnindex] Loading support data to build diskcached indexer') vecs_list, fgws_list = get_support_data(qreq_, daid_list) if memtrack is not None: memtrack.report('[AFTER GET SUPPORT DATA]') try: nnindexer = new_neighbor_index( daid_list, vecs_list, fgws_list, flann_params, cachedir, cfgstr=cfgstr, verbose=verbose, force_rebuild=force_rebuild, memtrack=memtrack) except Exception as ex: ut.printex(ex, True, msg_='cannot build inverted index', key_list=['ibs.get_infostr()']) raise # Record these uuids in the disk based uuid map so they can be augmented if # needed min_reindex_thresh = qreq_.qparams.min_reindex_thresh if len(daid_list) > min_reindex_thresh: uuid_map_fpath = get_nnindexer_uuid_map_fpath(qreq_) daids_hashid = get_data_cfgstr(qreq_.ibs, daid_list) visual_uuid_list = qreq_.ibs.get_annot_visual_uuids(daid_list) UUID_MAP_CACHE.write_uuid_map_dict(uuid_map_fpath, visual_uuid_list, daids_hashid) if memtrack is not None: memtrack.report('[AFTER WRITE_UUID_MAP]') return nnindexer #@profile
[docs]def group_daids_by_cached_nnindexer(qreq_, daid_list, min_reindex_thresh, max_covers=None): r""" FIXME: This function is slow due to ibs.get_annot_aids_from_visual_uuid 282.253 seconds for 600 queries CommandLine: python -m ibeis.model.hots.neighbor_index --test-group_daids_by_cached_nnindexer Example: >>> # ENABLE_DOCTEST >>> from ibeis.model.hots.neighbor_index import * # NOQA >>> import ibeis >>> ibs = ibeis.opendb('testdb1') >>> ZEB_PLAIN = ibeis.const.Species.ZEB_PLAIN >>> daid_list = ibs.get_valid_aids(species=ZEB_PLAIN) >>> qreq_ = ibs.new_query_request(daid_list, daid_list) >>> # Set the params a bit lower >>> max_covers = None >>> qreq_.qparams.min_reindex_thresh = 1 >>> min_reindex_thresh = qreq_.qparams.min_reindex_thresh >>> # STEP 0: CLEAR THE CACHE >>> clear_uuid_cache(qreq_) >>> # STEP 1: ASSERT EMPTY INDEX >>> daid_list = ibs.get_valid_aids(species=ZEB_PLAIN)[0:3] >>> uncovered_aids, covered_aids_list = group_daids_by_cached_nnindexer( ... qreq_, daid_list, min_reindex_thresh, max_covers) >>> result1 = uncovered_aids, covered_aids_list >>> ut.assert_eq(result1, ([1, 2, 3], []), 'pre request') >>> # TEST 2: SHOULD MAKE 123 COVERED >>> nnindexer = request_memcached_ibeis_nnindexer(qreq_, daid_list) >>> uncovered_aids, covered_aids_list = group_daids_by_cached_nnindexer( ... qreq_, daid_list, min_reindex_thresh, max_covers) >>> result2 = uncovered_aids, covered_aids_list >>> ut.assert_eq(result2, ([], [[1, 2, 3]]), 'post request') """ ibs = qreq_.ibs # read which annotations have prebuilt caches uuid_map_fpath = get_nnindexer_uuid_map_fpath(qreq_) candidate_uuids = UUID_MAP_CACHE.read_uuid_map_dict(uuid_map_fpath, min_reindex_thresh) # find a maximum independent set cover of the requested annotations annot_vuuid_list = ibs.get_annot_visual_uuids(daid_list) # 3.2 % covertup = ut.greedy_max_inden_setcover( candidate_uuids, annot_vuuid_list, max_covers) # 0.2 % uncovered_vuuids, covered_vuuids_list, accepted_keys = covertup # return the grouped covered items (so they can be loaded) and # the remaining uuids which need to have an index computed. # uncovered_aids_ = ibs.get_annot_aids_from_visual_uuid(uncovered_vuuids) # 28.0% covered_aids_list_ = ibs.unflat_map( ibs.get_annot_aids_from_visual_uuid, covered_vuuids_list) # 68% # FIXME: uncovered_aids = sorted(uncovered_aids_) #covered_aids_list = list(map(sorted, covered_aids_list_)) covered_aids_list = covered_aids_list_ return uncovered_aids, covered_aids_list
[docs]def get_data_cfgstr(ibs, daid_list): """ part 2 data hash id """ daids_hashid = ibs.get_annot_hashid_visual_uuid(daid_list) return daids_hashid #@profile
[docs]def build_nnindex_cfgstr(qreq_, daid_list): """ builds a string that uniquely identified an indexer built with parameters from the input query requested and indexing descriptor from the input annotation ids Args: qreq_ (QueryRequest): query request object with hyper-parameters daid_list (list): Returns: str: nnindex_cfgstr CommandLine: python -c 'import utool; print(utool.auto_docstr("ibeis.model.hots.neighbor_index", "build_nnindex_cfgstr"))' python -m ibeis.model.hots.neighbor_index --test-build_nnindex_cfgstr Example: >>> # ENABLE_DOCTEST >>> from ibeis.model.hots.neighbor_index import * # NOQA >>> # build test data >>> import ibeis >>> ibs = ibeis.opendb(db='testdb1') >>> daid_list = ibs.get_valid_aids(species=ibeis.const.Species.ZEB_PLAIN) >>> qreq_ = ibs.new_query_request(daid_list, daid_list, cfgdict=dict(fg_on=False)) >>> # execute function >>> nnindex_cfgstr = build_nnindex_cfgstr(qreq_, daid_list) >>> # verify results >>> result = str(nnindex_cfgstr) >>> print(result) _VUUIDS((6)fvpdb9cud49@ll@+)_FLANN(8_kdtrees)_FEATWEIGHT(OFF)_FEAT(hesaff+sift_)_CHIP(sz450) """ flann_cfgstr = qreq_.qparams.flann_cfgstr featweight_cfgstr = qreq_.qparams.featweight_cfgstr data_hashid = get_data_cfgstr(qreq_.ibs, daid_list) nnindex_cfgstr = ''.join((data_hashid, flann_cfgstr, featweight_cfgstr)) return nnindex_cfgstr #@profile
[docs]def get_fgweights_hack(qreq_, daid_list): """ hack to get feature weights. returns None if feature weights are turned off in config settings """ # <HACK:featweight> if qreq_.qparams.fg_on: fgws_list = qreq_.ibs.get_annot_fgweights( daid_list, config2_=qreq_.get_internal_data_config2(), ensure=True) else: fgws_list = None return fgws_list # </HACK:featweight>
[docs]def get_support_data(qreq_, daid_list): # TODO: look into ut.cached_func vecs_list = qreq_.ibs.get_annot_vecs(daid_list, config2_=qreq_.get_internal_data_config2()) fgws_list = get_fgweights_hack(qreq_, daid_list) return vecs_list, fgws_list #@profile
[docs]def new_neighbor_index(daid_list, vecs_list, fgws_list, flann_params, cachedir, cfgstr, force_rebuild=False, verbose=True, memtrack=None): """ constructs neighbor index independent of ibeis Args: daid_list (list): vecs_list (list): fgws_list (list): flann_params (dict): flann_cachedir (None): nnindex_cfgstr (str): use_memcache (bool): Returns: nnindexer CommandLine: python -m ibeis.model.hots.neighbor_index --test-new_neighbor_index Example: >>> # SLOW_DOCTEST >>> from ibeis.model.hots.neighbor_index import * # NOQA >>> import ibeis >>> # build test data >>> ibs = ibeis.opendb('testdb1') >>> daid_list = ibs.get_valid_aids(species=ibeis.const.Species.ZEB_PLAIN) >>> qreq_ = ibs.new_query_request(daid_list, daid_list) >>> nnindex_cfgstr = build_nnindex_cfgstr(qreq_, daid_list) >>> verbose = True >>> nnindex_cfgstr = build_nnindex_cfgstr(qreq_, daid_list) >>> cfgstr = nnindex_cfgstr >>> cachedir = qreq_.ibs.get_flann_cachedir() >>> flann_params = qreq_.qparams.flann_params >>> # Get annot descriptors to index >>> vecs_list, fgws_list = get_support_data(qreq_, daid_list) >>> # execute function >>> nnindexer = new_neighbor_index(daid_list, vecs_list, fgws_list, flann_params, cachedir, cfgstr, verbose=True) >>> result = ('nnindexer.ax2_aid = %s' % (str(nnindexer.ax2_aid),)) >>> print(result) nnindexer.ax2_aid = [1 2 3 4 5 6] """ nnindexer = NeighborIndex(flann_params, cfgstr) #if memtrack is not None: # memtrack.report('CREATEED NEIGHTOB INDEX') # Initialize neighbor with unindexed data nnindexer.init_support(daid_list, vecs_list, fgws_list, verbose=verbose) if memtrack is not None: memtrack.report('AFTER INIT SUPPORT') # Load or build the indexing structure nnindexer.ensure_indexer(cachedir, verbose=verbose, force_rebuild=force_rebuild, memtrack=memtrack) if memtrack is not None: memtrack.report('AFTER LOAD OR BUILD') return nnindexer #@profile
[docs]def prepare_index_data(aid_list, vecs_list, fgws_list, verbose=True): """ flattens vecs_list and builds a reverse index from the flattened indices (idx) to the original aids and fxs """ print('[nnindex] Preparing data for indexing / loading index') # Check input assert len(aid_list) == len(vecs_list), 'invalid input. bad len' assert len(aid_list) > 0, ('len(aid_list) == 0.' 'Cannot invert index without features!') # Create indexes into the input aids ax_list = np.arange(len(aid_list)) idx2_vec, idx2_ax, idx2_fx = invert_index(vecs_list, ax_list, verbose=verbose) # <HACK:fgweights> if fgws_list is not None: idx2_fgw = np.hstack(fgws_list) try: assert len(idx2_fgw) == len(idx2_vec), 'error. weights and vecs do not correspond' except Exception as ex: ut.printex(ex, keys=[(len, 'idx2_fgw'), (len, 'idx2_vec')]) raise else: idx2_fgw = None # </HACK:fgweights> ax2_aid = np.array(aid_list) _preptup = (ax2_aid, idx2_vec, idx2_fgw, idx2_ax, idx2_fx) return _preptup
@six.add_metaclass(ut.ReloadingMetaclass)
[docs]class NeighborIndex(object): """ wrapper class around flann stores flann index and data it needs to index into Example: >>> # ENABLE_DOCTEST >>> from ibeis.model.hots.neighbor_index import * # NOQA >>> nnindexer, qreq_, ibs = test_nnindexer() """ ext = '.flann' prefix1 = 'flann' def __init__(nnindexer, flann_params, cfgstr): """ initialize an empty neighbor indexer """ nnindexer.flann = None # Approximate search structure nnindexer.ax2_aid = None # (A x 1) Mapping to original annot ids nnindexer.idx2_vec = None # (M x D) Descriptors to index nnindexer.idx2_fgw = None # (M x 1) Descriptor forground weight nnindexer.idx2_ax = None # (M x 1) Index into the aid_list nnindexer.idx2_fx = None # (M x 1) Index into the annot's features nnindexer.max_distance_sqrd = None # max possible distance^2 for normalization nnindexer.cfgstr = cfgstr # configuration id if 'random_seed' not in flann_params: # Make flann determenistic for the same data flann_params['random_seed'] = 42 nnindexer.flann_params = flann_params nnindexer.cores = flann_params.get('cores', 0) nnindexer.checks = flann_params.get('checks', 1028) nnindexer.num_indexed = None nnindexer.flann_fpath = None #def __del__(nnindexer): # print('+------------') # print('!!! DELETING NNINDEXER: ' + nnindexer.cfgstr) # print('L___________') # if nnindexer.flann is not None: # nnindexer.flann.delete_index() #@profile
[docs] def init_support(nnindexer, aid_list, vecs_list, fgws_list, verbose=True): """ prepares inverted indicies and FLANN data structure """ assert nnindexer.flann is None, 'already initalized' _preptup = prepare_index_data(aid_list, vecs_list, fgws_list, verbose=verbose) (ax2_aid, idx2_vec, idx2_fgw, idx2_ax, idx2_fx) = _preptup nnindexer.flann = pyflann.FLANN() # Approximate search structure nnindexer.ax2_aid = ax2_aid # (A x 1) Mapping to original annot ids nnindexer.idx2_vec = idx2_vec # (M x D) Descriptors to index nnindexer.idx2_fgw = idx2_fgw # (M x 1) Descriptor forground weight nnindexer.idx2_ax = idx2_ax # (M x 1) Index into the aid_list nnindexer.idx2_fx = idx2_fx # (M x 1) Index into the annot's features nnindexer.num_indexed = nnindexer.idx2_vec.shape[0] """ arr = np.array([1, 2, 3], dtype=np.uint8) arr.dtype == np.uint8 """ if nnindexer.idx2_vec.dtype == hstypes.VEC_TYPE: # these are sift descriptors nnindexer.max_distance_sqrd = hstypes.VEC_PSEUDO_MAX_DISTANCE_SQRD else: # FIXME: hacky way to support siam128 descriptors. #raise AssertionError( #'NNindexer should get uint8s right now unless the algorithm has changed') nnindexer.max_distance_sqrd = None
@ut.tracefunc_xml
[docs] def add_ibeis_support(nnindexer, qreq_, new_daid_list, verbose=ut.NOT_QUIET): # TODO: ensure that the memcache changes appropriately clear_memcache() if verbose: print('[nnindex] request add %d annots to single-indexer' % (len(new_daid_list))) duplicate_aids = set(new_daid_list).intersection(nnindexer.get_indexed_aids()) if len(duplicate_aids) > 0: if verbose: print('[nnindex] request has %d annots that are already indexed. ignore those' % (len(duplicate_aids),)) new_daid_list_ = np.array(sorted(list(set(new_daid_list) - duplicate_aids))) else: new_daid_list_ = new_daid_list if len(new_daid_list_) == 0: if verbose: print('[nnindex] Nothing to do') else: new_vecs_list, new_fgws_list = get_support_data(qreq_, new_daid_list_) nnindexer.add_support(new_daid_list_, new_vecs_list, new_fgws_list, verbose=verbose)
@ut.tracefunc_xml
[docs] def remove_ibeis_support(nnindexer, qreq_, remove_daid_list, verbose=ut.NOT_QUIET): # TODO: ensure that the memcache changes appropriately if verbose: print('[nnindex] request remove %d annots from single-indexer' % (len(remove_daid_list))) clear_memcache() nnindexer.remove_support(remove_daid_list, verbose=verbose)
[docs] def remove_support(nnindexer, remove_daid_list, verbose=ut.NOT_QUIET): """ CommandLine: python -m ibeis.model.hots.neighbor_index --test-remove_support SeeAlso: ~/code/flann/src/python/pyflann/index.py Example: >>> # SLOW_DOCTEST >>> # (IMPORTANT) >>> from ibeis.model.hots.neighbor_index import * # NOQA >>> nnindexer, qreq_, ibs = test_nnindexer(use_memcache=False) >>> remove_daid_list = [8, 9, 10, 11] >>> K = 2 >>> qfx2_vec = ibs.get_annot_vecs(1, config2_=qreq_.get_internal_query_config2()) >>> # get before data >>> (qfx2_idx1, qfx2_dist1) = nnindexer.knn(qfx2_vec, K) >>> # execute test function >>> nnindexer.remove_support(remove_daid_list) >>> # test before data vs after data >>> (qfx2_idx2, qfx2_dist2) = nnindexer.knn(qfx2_vec, K) >>> # >>> ax2_nvecs = ut.dict_take(ut.dict_hist(nnindexer.idx2_ax), range(len(nnindexer.ax2_aid))) >>> assert qfx2_idx2.max() < ax2_nvecs[0], 'should only get points from aid 7' >>> assert qfx2_idx1.max() > ax2_nvecs[0], 'should get points from everyone' """ if ut.DEBUG2: print('REMOVING POINTS') # TODO: ensure no duplicates ax2_remove_flag = np.in1d(nnindexer.ax2_aid, remove_daid_list) remove_ax_list = np.nonzero(ax2_remove_flag)[0] idx2_remove_flag = np.in1d(nnindexer.idx2_ax, remove_ax_list) remove_idx_list = np.nonzero(idx2_remove_flag)[0] if verbose: print('[nnindex] Found %d / %d annots that need removing' % (len(remove_ax_list), len(remove_daid_list))) print('[nnindex] Removing %d indexed features' % (len(remove_idx_list),)) # FIXME: indicies may need adjustment after remove points # Currently this is not being done and the data is just being left alone # This should be ok temporarilly because removed ids should not # be returned by the flann object nnindexer.flann.remove_points(remove_idx_list) # FIXME: #nnindexer.ax2_aid if True: nnindexer.ax2_aid[remove_ax_list] = -1 nnindexer.idx2_fx[remove_idx_list] = -1 nnindexer.idx2_vec[remove_idx_list] = 0 if nnindexer.idx2_fgw is not None: nnindexer.idx2_fgw[remove_idx_list] = np.nan # FIXME: This will definitely bug out if you remove points and then try # to add the same points back again. if ut.DEBUG2: print('DONE REMOVE POINTS') #@profile
[docs] def add_support(nnindexer, new_daid_list, new_vecs_list, new_fgws_list, verbose=ut.NOT_QUIET): """ adds support data (aka data to be indexed) Args: new_daid_list (list): list of annotation ids that are being added new_vecs_list (list): list of descriptor vectors for each annotation new_fgws_list (list): list of weights per vector for each annotation verbose (bool): verbosity flag(default = True) CommandLine: python -m ibeis.model.hots.neighbor_index --test-add_support Example: >>> # ENABLE_DOCTEST >>> from ibeis.model.hots.neighbor_index import * # NOQA >>> nnindexer, qreq_, ibs = test_nnindexer(use_memcache=False) >>> new_daid_list = [2, 3, 4] >>> K = 2 >>> qfx2_vec = ibs.get_annot_vecs(1, config2_=qreq_.get_internal_query_config2()) >>> # get before data >>> (qfx2_idx1, qfx2_dist1) = nnindexer.knn(qfx2_vec, K) >>> new_vecs_list, new_fgws_list = get_support_data(qreq_, new_daid_list) >>> # execute test function >>> nnindexer.add_support(new_daid_list, new_vecs_list, new_fgws_list) >>> # test before data vs after data >>> (qfx2_idx2, qfx2_dist2) = nnindexer.knn(qfx2_vec, K) >>> assert qfx2_idx2.max() > qfx2_idx1.max() """ # TODO: ensure no duplicates nAnnots = nnindexer.num_indexed_annots() nVecs = nnindexer.num_indexed_vecs() nNewAnnots = len(new_daid_list) new_ax_list = np.arange(nAnnots, nAnnots + nNewAnnots) new_idx2_vec, new_idx2_ax, new_idx2_fx = \ invert_index(new_vecs_list, new_ax_list, verbose=verbose) nNewVecs = len(new_idx2_vec) if verbose or ut.VERYVERBOSE: print(('[nnindex] Adding %d vecs from %d annots to nnindex ' 'with %d vecs and %d annots') % (nNewVecs, nNewAnnots, nVecs, nAnnots)) if ut.DEBUG2: print('STACKING') # Stack inverted information old_idx2_vec = nnindexer.idx2_vec if nnindexer.idx2_fgw is not None: new_idx2_fgw = np.hstack(new_fgws_list) #nnindexer.old_vecs.append(new_idx2_fgw) ##--- _ax2_aid = np.hstack((nnindexer.ax2_aid, new_daid_list)) _idx2_ax = np.hstack((nnindexer.idx2_ax, new_idx2_ax)) _idx2_fx = np.hstack((nnindexer.idx2_fx, new_idx2_fx)) _idx2_vec = np.vstack((old_idx2_vec, new_idx2_vec)) if nnindexer.idx2_fgw is not None: _idx2_fgw = np.hstack((nnindexer.idx2_fgw, new_idx2_fgw)) if ut.DEBUG2: print('REPLACING') nnindexer.ax2_aid = _ax2_aid nnindexer.idx2_ax = _idx2_ax nnindexer.idx2_vec = _idx2_vec nnindexer.idx2_fx = _idx2_fx if nnindexer.idx2_fgw is not None: nnindexer.idx2_fgw = _idx2_fgw #nnindexer.idx2_kpts = None #nnindexer.idx2_oris = None # Add new points to flann structure if ut.DEBUG2: print('ADD POINTS (FIXME: SOMETIMES SEGFAULT OCCURS)') print('new_idx2_vec.dtype = %r' % new_idx2_vec.dtype) print('new_idx2_vec.shape = %r' % (new_idx2_vec.shape,)) nnindexer.flann.add_points(new_idx2_vec) if ut.DEBUG2: print('DONE ADD POINTS')
[docs] def ensure_indexer(nnindexer, cachedir, verbose=True, force_rebuild=False, memtrack=None): """ Ensures that you get a neighbor indexer. It either loads a chached indexer or rebuilds a new one. """ #with ut.PrintStartEndContext(msg='CACHED NNINDEX', verbose=verbose): if NOCACHE_FLANN or force_rebuild: print('...nnindex flann cache is forced off') load_success = False else: load_success = nnindexer.load(cachedir, verbose=verbose) if load_success: if not ut.QUIET: nVecs = nnindexer.num_indexed_vecs() nAnnots = nnindexer.num_indexed_annots() print('...nnindex flann cache hit: %d vectors, %d annots' % (nVecs, nAnnots)) else: if not ut.QUIET: nVecs = nnindexer.num_indexed_vecs() nAnnots = nnindexer.num_indexed_annots() print('...nnindex flann cache miss: %d vectors, %d annots' % (nVecs, nAnnots)) nnindexer.build_and_save(cachedir, verbose=verbose, memtrack=memtrack)
[docs] def build_and_save(nnindexer, cachedir, verbose=True, memtrack=None): nnindexer.reindex(memtrack=memtrack) nnindexer.save(cachedir, verbose=verbose)
[docs] def reindex(nnindexer, verbose=True, memtrack=None): """ indexes all vectors with FLANN. """ num_vecs = nnindexer.num_indexed notify_num = 1E6 verbose_ = ut.VERYVERBOSE or verbose or (not ut.QUIET and num_vecs > notify_num) if verbose_: print('[nnindex] ...building kdtree over %d points (this may take a sec).' % num_vecs) tt = ut.tic(msg='Building index') idx2_vec = nnindexer.idx2_vec flann_params = nnindexer.flann_params if num_vecs == 0: print('WARNING: CANNOT BUILD FLANN INDEX OVER 0 POINTS. THIS MAY BE A SIGN OF A DEEPER ISSUE') else: if memtrack is not None: memtrack.report('BEFORE BUILD FLANN INDEX') nnindexer.flann.build_index(idx2_vec, **flann_params) if memtrack is not None: memtrack.report('AFTER BUILD FLANN INDEX') if verbose_: ut.toc(tt) # ---- <cachable_interface> ---
[docs] def save(nnindexer, cachedir, verbose=True): """ Caches a neighbor indexer to disk """ if NOSAVE_FLANN: if ut.VERYVERBOSE or verbose: print('[nnindex] flann save is deactivated') return False flann_fpath = nnindexer.get_fpath(cachedir) nnindexer.flann_fpath = flann_fpath if ut.VERYVERBOSE or verbose: print('[nnindex] flann.save_index(%r)' % ut.path_ndir_split(flann_fpath, n=5)) nnindexer.flann.save_index(flann_fpath)
[docs] def load(nnindexer, cachedir, verbose=True): """ Loads a cached neighbor indexer from disk """ load_success = False flann_fpath = nnindexer.get_fpath(cachedir) nnindexer.flann_fpath = flann_fpath if ut.checkpath(flann_fpath, verbose=verbose): idx2_vec = nnindexer.idx2_vec # Warning: Loading a FLANN index with old headers may silently fail. try: nnindexer.flann.load_index(flann_fpath, idx2_vec) except (IOError, pyflann.FLANNException) as ex: ut.printex(ex, '... cannot load nnindex flann', iswarning=True) else: load_success = True return load_success
[docs] def get_prefix(nnindexer): return nnindexer.prefix1 #@profile
[docs] def get_cfgstr(nnindexer, noquery=False): """ returns string which uniquely identified configuration and support data Args: noquery (bool): if True cfgstr is only relevant to building the index. No search params are returned (default = False) Returns: str: flann_cfgstr CommandLine: python -m ibeis.model.hots.neighbor_index --test-get_cfgstr Example: >>> # DISABLE_DOCTEST >>> from ibeis.model.hots.neighbor_index import * # NOQA >>> cfgdict = dict(fg_on=False) >>> ibs, qreq_ = plh.get_pipeline_testdata(defaultdb='testdb1', preload=True, cfgdict=cfgdict) >>> nnindexer = qreq_.indexer >>> noquery = True >>> flann_cfgstr = nnindexer.get_cfgstr(noquery) >>> result = ('flann_cfgstr = %s' % (str(flann_cfgstr),)) >>> print(result) flann_cfgstr = _FLANN((algo=kdtree,seed=42,t=8,))_VECS((5232,128)4mu3cl+!se1x13je) """ flann_cfgstr_list = [] use_params_hash = True if use_params_hash: flann_defaults = vt.get_flann_params(nnindexer.flann_params['algorithm']) flann_params_clean = flann_defaults.copy() ut.updateif_haskey(flann_params_clean, nnindexer.flann_params) if noquery: ut.delete_dict_keys(flann_params_clean, ['checks']) shortnames = dict(algorithm='algo', checks='chks', random_seed='seed', trees='t') short_params = dict([(shortnames.get(key, key), str(val)[0:7]) for key, val in six.iteritems(flann_params_clean)]) # if key == 'algorithm']) # or val != flann_defaults.get(key, None)]) flann_valsig_ = ut.dict_str( short_params, nl=False, explicit=True, strvals=True) flann_valsig_ = flann_valsig_.lstrip('dict').replace(' ', '') #flann_valsig_ = str(list(flann_params.values())) #flann_valsig = ut.remove_chars(flann_valsig_, ', \'[]') flann_cfgstr_list.append('_FLANN(' + flann_valsig_ + ')') use_data_hash = True if use_data_hash: idx2_vec = nnindexer.idx2_vec vecs_hashstr = ut.hashstr_arr(idx2_vec, '_VECS') flann_cfgstr_list.append(vecs_hashstr) flann_cfgstr = ''.join(flann_cfgstr_list) return flann_cfgstr
[docs] def get_fname(nnindexer): return basename(nnindexer.get_fpath(''))
[docs] def get_fpath(nnindexer, cachedir, cfgstr=None): _args2_fpath = ut.util_cache._args2_fpath dpath = cachedir prefix = nnindexer.get_prefix() cfgstr = nnindexer.get_cfgstr(noquery=True) ext = nnindexer.ext fpath = _args2_fpath(dpath, prefix, cfgstr, ext, write_hashtbl=False) print('flann fpath = %r' % (fpath,)) return fpath # ---- </cachable_interface> ---
[docs] def get_dtype(nnindexer): return nnindexer.idx2_vec.dtype #@profile
[docs] def knn(nnindexer, qfx2_vec, K): r""" Returns the indices and squared distance to the nearest K neighbors. The distance is noramlized between zero and one using VEC_PSEUDO_MAX_DISTANCE = (np.sqrt(2) * VEC_PSEUDO_MAX) Args: qfx2_vec : (N x D) an array of N, D-dimensional query vectors K: number of approximate nearest neighbors to find Returns: tuple of (qfx2_idx, qfx2_dist) ndarray : qfx2_idx[n][k] (N x K) is the index of the kth approximate nearest data vector w.r.t qfx2_vec[n] ndarray : qfx2_dist[n][k] (N x K) is the distance to the kth approximate nearest data vector w.r.t. qfx2_vec[n] distance is normalized squared euclidean distance. CommandLine: python -m ibeis.model.hots.neighbor_index --test-knn:0 python -m ibeis.model.hots.neighbor_index --test-knn:1 Example: >>> # ENABLE_DOCTEST >>> from ibeis.model.hots.neighbor_index import * # NOQA >>> nnindexer, qreq_, ibs = test_nnindexer() >>> qfx2_vec = ibs.get_annot_vecs(1, config2_=qreq_.get_internal_query_config2()) >>> K = 2 >>> nnindexer.debug_nnindexer() >>> assert vt.check_sift_validity(qfx2_vec), 'bad SIFT properties' >>> (qfx2_idx, qfx2_dist) = nnindexer.knn(qfx2_vec, K) >>> result = str(qfx2_idx.shape) + ' ' + str(qfx2_dist.shape) >>> print('qfx2_vec.dtype = %r' % (qfx2_vec.dtype,)) >>> print('nnindexer.max_distance_sqrd = %r' % (nnindexer.max_distance_sqrd,)) >>> assert np.all(qfx2_dist < 1.0), 'distance should be less than 1. got %r' % (qfx2_dist,) >>> # Ensure distance calculations are correct >>> qfx2_dvec = nnindexer.idx2_vec[qfx2_idx.T] >>> targetdist = vt.L2_sift(qfx2_vec, qfx2_dvec).T ** 2 >>> rawdist = vt.L2_sqrd(qfx2_vec, qfx2_dvec).T >>> assert np.all(qfx2_dist * nnindexer.max_distance_sqrd == rawdist), 'inconsistant distance calculations' >>> assert np.allclose(targetdist, qfx2_dist), 'inconsistant distance calculations' Example2: >>> # ENABLE_DOCTEST >>> from ibeis.model.hots.neighbor_index import * # NOQA >>> nnindexer, qreq_, ibs = test_nnindexer() >>> qfx2_vec = np.empty((0, 128), dtype=nnindexer.get_dtype()) >>> K = 2 >>> (qfx2_idx, qfx2_dist) = nnindexer.knn(qfx2_vec, K) >>> result = str(qfx2_idx.shape) + ' ' + str(qfx2_dist.shape) >>> print(result) (0, 2) (0, 2) """ if K == 0: (qfx2_idx, qfx2_dist) = nnindexer.empty_neighbors(len(qfx2_vec), 0) if K > nnindexer.num_indexed or K == 0: # If we want more points than there are in the database # FLANN will raise an exception. This corner case # will hopefully only be hit if using the multi-indexer # so try this workaround which should seemlessly integrate # when the multi-indexer stacks the subindxer results. # There is a very strong possibility that this will cause errors # If this corner case is used in non-multi-indexer code K = nnindexer.num_indexed (qfx2_idx, qfx2_dist) = nnindexer.empty_neighbors(len(qfx2_vec), 0) elif len(qfx2_vec) == 0: (qfx2_idx, qfx2_dist) = nnindexer.empty_neighbors(0, K) else: try: # perform nearest neighbors (qfx2_idx, qfx2_raw_dist) = nnindexer.flann.nn_index( qfx2_vec, K, checks=nnindexer.checks, cores=nnindexer.cores) except pyflann.FLANNException as ex: ut.printex(ex, 'probably misread the cached flann_fpath=%r' % (nnindexer.flann_fpath,)) #ut.embed() # Uncommend and use if the flan index needs to be deleted #ibs = ut.search_stack_for_localvar('ibs') #cachedir = ibs.get_flann_cachedir() #flann_fpath = nnindexer.get_fpath(cachedir) raise # Ensure that distance returned are between 0 and 1 if nnindexer.max_distance_sqrd is not None: qfx2_dist = np.divide(qfx2_raw_dist, nnindexer.max_distance_sqrd) else: qfx2_dist = qfx2_raw_dist if ut.DEBUG2: # Ensure distance calculations are correct qfx2_dvec = nnindexer.idx2_vec[qfx2_idx.T] targetdist = vt.L2_sift(qfx2_vec, qfx2_dvec).T ** 2 rawdist = vt.L2_sqrd(qfx2_vec, qfx2_dvec).T assert np.all(qfx2_raw_dist == rawdist), 'inconsistant distance calculations' assert np.allclose(targetdist, qfx2_dist), 'inconsistant distance calculations' #qfx2_dist = np.sqrt(qfx2_dist) / nnindexer.max_distance_sqrd return (qfx2_idx, qfx2_dist)
[docs] def debug_nnindexer(nnindexer): """ Makes sure the indexer has valid SIFT descriptors """ # FIXME: they might not agree if data has been added / removed init_data, extra_data = nnindexer.flann.get_indexed_data() with ut.Indenter('[NNINDEX_DEBUG]'): print('extra_data = %r' % (extra_data,)) print('init_data = %r' % (init_data,)) print('nnindexer.max_distance_sqrd = %r' % (nnindexer.max_distance_sqrd,)) data_agrees = nnindexer.idx2_vec is nnindexer.flann.get_indexed_data()[0] if data_agrees: print('indexed_data agrees') assert vt.check_sift_validity(init_data), 'bad SIFT properties' assert data_agrees, 'indexed data does not agree'
[docs] def empty_neighbors(nnindexer, nQfx, K): qfx2_idx = np.empty((0, K), dtype=np.int32) qfx2_dist = np.empty((0, K), dtype=np.float64) return (qfx2_idx, qfx2_dist)
[docs] def num_indexed_vecs(nnindexer): #invalid_idxs = (nnindexer.ax2_aid[nnindexer.idx2_ax] == -1) return nnindexer.idx2_vec.shape[0] #return len(nnindexer.idx2_vec)
[docs] def num_indexed_annots(nnindexer): #invalid_idxs = (nnindexer.ax2_aid[nnindexer.idx2_ax] == -1) return (nnindexer.ax2_aid != -1).sum() #nnindexer.ax2_aid.shape[0] #return len(nnindexer.ax2_aid)
[docs] def get_indexed_aids(nnindexer): return nnindexer.ax2_aid[nnindexer.ax2_aid != -1]
[docs] def get_indexed_vecs(nnindexer): valid_idxs = (nnindexer.ax2_aid[nnindexer.idx2_ax] != -1) valid_idx2_vec = nnindexer.idx2_vec.compress(valid_idxs, axis=0) return valid_idx2_vec
[docs] def get_removed_idxs(nnindexer): """ __removed_ids = nnindexer.flann._FLANN__removed_ids invalid_idxs = nnindexer.get_removed_idxs() assert len(np.intersect1d(invalid_idxs, __removed_ids)) == len(__removed_ids) """ invalid_idxs = np.nonzero(nnindexer.ax2_aid[nnindexer.idx2_ax] == -1)[0] return invalid_idxs
[docs] def get_nn_vecs(nnindexer, qfx2_nnidx): """ gets matching vectors """ return nnindexer.idx2_vec.take(qfx2_nnidx, axis=0)
[docs] def get_nn_axs(nnindexer, qfx2_nnidx): """ gets matching internal annotation indices """ return nnindexer.idx2_ax.take(qfx2_nnidx) #@profile
[docs] def get_nn_aids(nnindexer, qfx2_nnidx): """ Args: qfx2_nnidx : (N x K) qfx2_idx[n][k] is the index of the kth approximate nearest data vector Returns: qfx2_aid : (N x K) qfx2_fx[n][k] is the annotation id index of the kth approximate nearest data vector CommandLine: python -m ibeis.model.hots.neighbor_index --exec-get_nn_aids Example: >>> # ENABLE_DOCTEST >>> from ibeis.model.hots.neighbor_index import * # NOQA >>> cfgdict = dict(fg_on=False) >>> ibs, qreq_ = plh.get_pipeline_testdata(defaultdb='testdb1', cfgdict=cfgdict, preload=True) >>> nnindexer = qreq_.indexer >>> qfx2_vec = qreq_.ibs.get_annot_vecs(qreq_.get_internal_qaids()[0], config2_=qreq_.get_internal_query_config2()) >>> num_neighbors = 4 >>> (qfx2_nnidx, qfx2_dist) = nnindexer.knn(qfx2_vec, num_neighbors) >>> qfx2_aid = nnindexer.get_nn_aids(qfx2_nnidx) >>> assert qfx2_aid.shape[1] == num_neighbors >>> result = ('qfx2_aid.shape = %r' % (qfx2_aid.shape,)) >>> print(result) qfx2_aid.shape = (1257, 4) """ #qfx2_ax = nnindexer.idx2_ax[qfx2_nnidx] #qfx2_aid = nnindexer.ax2_aid[qfx2_ax] qfx2_ax = nnindexer.idx2_ax.take(qfx2_nnidx) qfx2_aid = nnindexer.ax2_aid.take(qfx2_ax) return qfx2_aid
[docs] def get_nn_featxs(nnindexer, qfx2_nnidx): """ Args: qfx2_nnidx : (N x K) qfx2_idx[n][k] is the index of the kth approximate nearest data vector Returns: qfx2_fx : (N x K) qfx2_fx[n][k] is the feature index (w.r.t the source annotation) of the kth approximate nearest data vector """ #return nnindexer.idx2_fx[qfx2_nnidx] qfx2_fx = nnindexer.idx2_fx.take(qfx2_nnidx) return qfx2_fx
[docs] def get_nn_fgws(nnindexer, qfx2_nnidx): r""" Gets forground weights of neighbors CommandLine: python -m ibeis.model.hots.neighbor_index --exec-NeighborIndex.get_nn_fgws Args: qfx2_nnidx : (N x K) qfx2_idx[n][k] is the index of the kth approximate nearest data vector Returns: qfx2_fgw : (N x K) qfx2_fgw[n][k] is the annotation id index of the kth forground weight Example: >>> # ENABLE_DOCTEST >>> from ibeis.model.hots.neighbor_index import * # NOQA >>> nnindexer, qreq_, ibs = test_nnindexer(dbname='testdb1') >>> qfx2_nnidx = np.array([[0, 1, 2], [3, 4, 5]]) >>> qfx2_fgw = nnindexer.get_nn_fgws(qfx2_nnidx) """ #qfx2_ax = nnindexer.idx2_ax[qfx2_nnidx] #qfx2_aid = nnindexer.ax2_aid[qfx2_ax] if nnindexer.idx2_fgw is None: qfx2_fgw = np.ones(qfx2_nnidx.shape) else: qfx2_fgw = nnindexer.idx2_fgw.take(qfx2_nnidx) return qfx2_fgw #@profile
[docs]def invert_index(vecs_list, ax_list, verbose=ut.NOT_QUIET): r""" Aggregates descriptors of input annotations and returns inverted information Args: vecs_list (list): ax_list (list): verbose (bool): verbosity flag(default = True) Returns: tuple: (idx2_vec, idx2_ax, idx2_fx) CommandLine: python -m ibeis.model.hots.neighbor_index --test-invert_index Example: >>> # SLOW_DOCTEST >>> from ibeis.model.hots.neighbor_index import * # NOQA >>> import vtool as vt >>> num = 100 >>> rng = np.random.RandomState(0) >>> ax_list = np.arange(num) >>> vecs_list = [vt.tests.dummy.get_dummy_dpts(rng.randint(100)) for ax in ax_list] >>> verbose = True >>> (idx2_vec, idx2_ax, idx2_fx) = invert_index(vecs_list, ax_list, verbose) """ if ut.VERYVERBOSE: print('[nnindex] stacking descriptors from %d annotations' % len(ax_list)) try: idx2_vec, idx2_ax, idx2_fx = vt.invertible_stack(vecs_list, ax_list) assert idx2_vec.shape[0] == idx2_ax.shape[0] assert idx2_vec.shape[0] == idx2_fx.shape[0] except MemoryError as ex: ut.printex(ex, 'cannot build inverted index', '[!memerror]') raise if ut.VERYVERBOSE or verbose: print('[nnindex] stacked nVecs={nVecs} from nAnnots={nAnnots}'.format( nVecs=len(idx2_vec), nAnnots=len(ax_list))) print('[nnindex] idx2_vecs.dtype = {}'.format(idx2_vec.dtype)) print('[nnindex] memory(idx2_vecs) = {}'.format( ut.byte_str2(idx2_vec.size * idx2_vec.dtype.itemsize))) return idx2_vec, idx2_ax, idx2_fx
[docs]def test_nnindexer(dbname='testdb1', with_indexer=True, use_memcache=True): """ Example: >>> # ENABLE_DOCTEST >>> from ibeis.model.hots.neighbor_index import * # NOQA >>> nnindexer, qreq_, ibs = test_nnindexer() """ import ibeis daid_list = [7, 8, 9, 10, 11] ibs = ibeis.opendb(db=dbname) # use_memcache isn't use here because we aren't lazy loading the indexer cfgdict = dict(fg_on=False) qreq_ = ibs.new_query_request(daid_list, daid_list, use_memcache=use_memcache, cfgdict=cfgdict) if with_indexer: # we do an explicit creation of an indexer for these tests nnindexer = request_ibeis_nnindexer(qreq_, use_memcache=use_memcache) else: nnindexer = None return nnindexer, qreq_, ibs # ------------ # NEW #@profile
[docs]def check_background_process(): """ checks to see if the process has finished and then writes the uuid map to disk """ global CURRENT_THREAD if CURRENT_THREAD is None or CURRENT_THREAD.is_alive(): print('[FG] background thread is not ready yet') return False # Get info set in background process finishtup = CURRENT_THREAD.finishtup (uuid_map_fpath, daids_hashid, visual_uuid_list, min_reindex_thresh) = finishtup # Clean up background process CURRENT_THREAD.join() CURRENT_THREAD = None # Write data to current uuidcache if len(visual_uuid_list) > min_reindex_thresh: UUID_MAP_CACHE.write_uuid_map_dict(uuid_map_fpath, visual_uuid_list, daids_hashid) return True
[docs]def can_request_background_nnindexer(): return CURRENT_THREAD is None or not CURRENT_THREAD.is_alive() #@profile
[docs]def request_background_nnindexer(qreq_, daid_list): """ FIXME: Duplicate code Args: qreq_ (QueryRequest): query request object with hyper-parameters daid_list (list): Returns: bool: CommandLine: python -m ibeis.model.hots.neighbor_index --test-request_background_nnindexer Example: >>> # DISABLE_DOCTEST >>> from ibeis.model.hots.neighbor_index import * # NOQA >>> from ibeis.model.hots import neighbor_index # NOQA >>> import ibeis >>> # build test data >>> ibs = ibeis.opendb('testdb1') >>> daid_list = ibs.get_valid_aids(species=ibeis.const.Species.ZEB_PLAIN) >>> qreq_ = ibs.new_query_request(daid_list, daid_list) >>> # execute function >>> neighbor_index.request_background_nnindexer(qreq_, daid_list) >>> # verify results >>> result = str(False) >>> print(result) """ global CURRENT_THREAD print('Requesting background reindex') if not can_request_background_nnindexer(): # Make sure this function doesn't run if it is already running print('REQUEST DENIED') return False print('REQUEST ACCPETED') daids_hashid = qreq_.ibs.get_annot_hashid_visual_uuid(daid_list) cfgstr = build_nnindex_cfgstr(qreq_, daid_list) cachedir = qreq_.ibs.get_flann_cachedir() # Save inverted cache uuid mappings for min_reindex_thresh = qreq_.qparams.min_reindex_thresh # Grab the keypoints names and image ids before query time? flann_params = qreq_.qparams.flann_params # Get annot descriptors to index vecs_list, fgws_list = get_support_data(qreq_, daid_list) # Dont hash rowids when given enough info in nnindex_cfgstr flann_params['cores'] = 2 # Only ues a few cores in the background # Build/Load the flann index uuid_map_fpath = get_nnindexer_uuid_map_fpath(qreq_) visual_uuid_list = qreq_.ibs.get_annot_visual_uuids(daid_list) # set temporary attribute for when the thread finishes finishtup = (uuid_map_fpath, daids_hashid, visual_uuid_list, min_reindex_thresh) CURRENT_THREAD = ut.spawn_background_process( background_flann_func, cachedir, daid_list, vecs_list, fgws_list, flann_params, cfgstr) CURRENT_THREAD.finishtup = finishtup
[docs]def background_flann_func(cachedir, daid_list, vecs_list, fgws_list, flann_params, cfgstr, uuid_map_fpath, daids_hashid, visual_uuid_list, min_reindex_thresh): """ FIXME: Duplicate code """ print('[BG] Starting Background FLANN') # FIXME. dont use flann cache nnindexer = NeighborIndex(flann_params, cfgstr) # Initialize neighbor with unindexed data nnindexer.init_support(daid_list, vecs_list, fgws_list, verbose=True) # Load or build the indexing structure nnindexer.ensure_indexer(cachedir, verbose=True) if len(visual_uuid_list) > min_reindex_thresh: UUID_MAP_CACHE.write_uuid_map_dict(uuid_map_fpath, visual_uuid_list, daids_hashid) print('[BG] Finished Background FLANN')
if __name__ == '__main__': """ CommandLine: python -m ibeis.model.hots.neighbor_index python -m ibeis.model.hots.neighbor_index --allexamples python -m ibeis.model.hots.neighbor_index --allexamples --noface --nosrc utprof.sh ibeis/model/hots/neighbor_index.py --allexamples """ import multiprocessing multiprocessing.freeze_support() # for win32 ut.doctest_funcs()