Source code for ibeis.model.hots.neighbor_index

# -*- coding: utf-8 -*-
"""
TODO:
    Remove Bloat

module which handles the building and caching of individual flann indexes


CommandLine:
    # Runs the incremental query test
    # {0:testdb1, 1:PZ_MTEST, 2:GZ_ALL, 3:PZ_Master0}
    python -m ibeis.model.hots.qt_inc_automatch --test-test_inc_query:0
    python -m ibeis.model.hots.qt_inc_automatch --test-test_inc_query:1
    python -m ibeis.model.hots.qt_inc_automatch --test-test_inc_query:2
    python -m ibeis.model.hots.qt_inc_automatch --test-test_inc_query:3

    utprof.py -m ibeis.model.hots.qt_inc_automatch --test-test_inc_query:0
    utprof.py -m ibeis.model.hots.qt_inc_automatch --test-test_inc_query:1
    utprof.py -m ibeis.model.hots.qt_inc_automatch --test-test_inc_query:3

"""
from __future__ import absolute_import, division, print_function
import six
import numpy as np
import utool as ut
import pyflann
#import lockfile
from os.path import join
from os.path import basename
from six.moves import range, zip, map  # NOQA
import vtool as vt
from ibeis.model.hots import hstypes
from ibeis.model.hots import _pipeline_helpers as plh  # NOQA
(print, rrr, profile) = ut.inject2(__name__, '[neighbor_index]', DEBUG=False)

USE_HOTSPOTTER_CACHE = not ut.get_argflag('--nocache-hs')
NOCACHE_FLANN = ut.get_argflag('--nocache-flann') and USE_HOTSPOTTER_CACHE
NOSAVE_FLANN = ut.get_argflag('--nosave-flann')
NOCACHE_UUIDS = ut.get_argflag('--nocache-uuids') and USE_HOTSPOTTER_CACHE

# LRU cache for nn_indexers. Ensures that only a few are ever in memory
#MAX_NEIGHBOR_CACHE_SIZE = ut.get_argval('--max-neighbor-cachesize', type_=int, default=2)
MAX_NEIGHBOR_CACHE_SIZE = ut.get_argval('--max-neighbor-cachesize', type_=int, default=1)
NEIGHBOR_CACHE = ut.get_lru_cache(MAX_NEIGHBOR_CACHE_SIZE)
# Background process for building indexes
CURRENT_THREAD = None
# Global map to keep track of UUID lists with prebuild indexers.
UUID_MAP = ut.ddict(dict)


[docs]class UUIDMapHyrbridCache(object):
    """
    Class that lets multiple ways of writing to the uuid_map
    be swapped in and out interchangably

    TODO: the global read / write should periodically sync itself to disk and it
    should be loaded from disk initially
    """
    def __init__(self):
        self.uuid_maps = ut.ddict(dict)
        #self.uuid_map_fpath = uuid_map_fpath
        #self.init(uuid_map_fpath, min_reindex_thresh)

[docs]    def init(self, *args, **kwargs):
        self.args = args
        self.kwargs = kwargs
        #self.read_func  = self.read_uuid_map_cpkl
        #self.write_func = self.write_uuid_map_cpkl
        self.read_func  = self.read_uuid_map_dict
        self.write_func = self.write_uuid_map_dict

    #def __call__(self):
    #    return  self.read_func(*self.args, **self.kwargs)

[docs]    def dump(self, cachedir):
        # TODO: DUMP AND LOAD THIS HYBRID CACHE TO DISK
        #write_uuid_map_cpkl
        fname = 'uuid_maps_hybrid_cache.cPkl'
        cpkl_fpath = join(cachedir, fname)
        ut.lock_and_save_cPkl(cpkl_fpath, self.uuid_maps)

[docs]    def load(self, cachedir):
        """
        Returns a cache UUIDMap
        """
        fname = 'uuid_maps_hybrid_cache.cPkl'
        cpkl_fpath = join(cachedir, fname)
        self.uuid_maps = ut.lock_and_load_cPkl(cpkl_fpath)

    #def __setitem__(self, daids_hashid, visual_uuid_list):
    #    uuid_map_fpath = self.uuid_map_fpath
    #    self.write_func(uuid_map_fpath, visual_uuid_list, daids_hashid)

    #@profile
    #def read_uuid_map_shelf(self, uuid_map_fpath, min_reindex_thresh):
    #    #with ut.EmbedOnException():
    #    with lockfile.LockFile(uuid_map_fpath + '.lock'):
    #        with ut.shelf_open(uuid_map_fpath) as uuid_map:
    #            candidate_uuids = {
    #                key: val for key, val in six.iteritems(uuid_map)
    #                if len(val) >= min_reindex_thresh
    #            }
    #    return candidate_uuids

    #@profile
    #def write_uuid_map_shelf(self, uuid_map_fpath, visual_uuid_list, daids_hashid):
    #    print('Writing %d visual uuids to uuid map' % (len(visual_uuid_list)))
    #    with lockfile.LockFile(uuid_map_fpath + '.lock'):
    #        with ut.shelf_open(uuid_map_fpath) as uuid_map:
    #            uuid_map[daids_hashid] = visual_uuid_list

    #@profile
    #def read_uuid_map_cpkl(self, uuid_map_fpath, min_reindex_thresh):
    #    with lockfile.LockFile(uuid_map_fpath + '.lock'):
    #        #with ut.shelf_open(uuid_map_fpath) as uuid_map:
    #        try:
    #            uuid_map = ut.load_cPkl(uuid_map_fpath)
    #            candidate_uuids = {
    #                key: val for key, val in six.iteritems(uuid_map)
    #                if len(val) >= min_reindex_thresh
    #            }
    #        except IOError:
    #            return {}
    #    return candidate_uuids

    #@profile
    #def write_uuid_map_cpkl(self, uuid_map_fpath, visual_uuid_list, daids_hashid):
    #    """
    #    let the multi-indexer know about any big caches we've made multi-indexer.
    #    Also lets nnindexer know about other prebuilt indexers so it can attempt to
    #    just add points to them as to avoid a rebuild.
    #    """
    #    print('Writing %d visual uuids to uuid map' % (len(visual_uuid_list)))
    #    with lockfile.LockFile(uuid_map_fpath + '.lock'):
    #        try:
    #            uuid_map = ut.load_cPkl(uuid_map_fpath)
    #        except IOError:
    #            uuid_map = {}
    #        uuid_map[daids_hashid] = visual_uuid_list
    #        ut.save_cPkl(uuid_map_fpath, uuid_map)

    @profile
[docs]    def read_uuid_map_dict(self, uuid_map_fpath, min_reindex_thresh):
        """ uses in memory dictionary instead of disk """
        uuid_map = self.uuid_maps[uuid_map_fpath]
        candidate_uuids = {
            key: val for key, val in six.iteritems(uuid_map)
            if len(val) >= min_reindex_thresh
        }
        return candidate_uuids

    @profile
[docs]    def write_uuid_map_dict(self, uuid_map_fpath, visual_uuid_list, daids_hashid):
        """
        uses in memory dictionary instead of disk

        let the multi-indexer know about any big caches we've made multi-indexer.
        Also lets nnindexer know about other prebuilt indexers so it can attempt to
        just add points to them as to avoid a rebuild.
        """
        if NOCACHE_UUIDS:
            print('uuid cache is off')
            return
        #with ut.EmbedOnException():
        uuid_map = self.uuid_maps[uuid_map_fpath]
        uuid_map[daids_hashid] = visual_uuid_list


UUID_MAP_CACHE = UUIDMapHyrbridCache()


#@profile
[docs]def get_nnindexer_uuid_map_fpath(qreq_):
    """
    Example:
        >>> # ENABLE_DOCTEST
        >>> from ibeis.model.hots.neighbor_index import *  # NOQA
        >>> # build test data
        >>> ibs, qreq_ = plh.get_pipeline_testdata(defaultdb='testdb1', preload=False)
        >>> uuid_map_fpath = get_nnindexer_uuid_map_fpath(qreq_)
        >>> result = str(ut.path_ndir_split(uuid_map_fpath, 3))
        >>> print(result)
        .../_ibeis_cache/flann/uuid_map_FLANN(8_kdtrees)_FEAT(hesaff+sift_)_CHIP(sz450).cPkl
    """
    flann_cachedir = qreq_.ibs.get_flann_cachedir()
    # Have uuid shelf conditioned on the baseline flann and feature parameters
    flann_cfgstr    = qreq_.qparams.flann_cfgstr
    feat_cfgstr     = qreq_.qparams.feat_cfgstr
    uuid_map_cfgstr = ''.join((flann_cfgstr, feat_cfgstr))
    #uuid_map_ext    = '.shelf'
    uuid_map_ext    = '.cPkl'
    uuid_map_prefix = 'uuid_map'
    uuid_map_fname  = ut.consensed_cfgstr(uuid_map_prefix, uuid_map_cfgstr) + uuid_map_ext
    uuid_map_fpath  = join(flann_cachedir, uuid_map_fname)
    return uuid_map_fpath


[docs]def clear_memcache():
    global NEIGHBOR_CACHE
    NEIGHBOR_CACHE.clear()


#@profile
[docs]def clear_uuid_cache(qreq_):
    """
    CommandLine:
        python -m ibeis.model.hots.neighbor_index --test-clear_uuid_cache

    Example:
        >>> # DISABLE_DOCTEST
        >>> from ibeis.model.hots.neighbor_index import *  # NOQA
        >>> import ibeis
        >>> # build test data
        >>> cfgdict = dict(fg_on=False)
        >>> ibs, qreq_ = plh.get_pipeline_testdata(defaultdb='testdb1', preload=False, cfgdict=cfgdict)
        >>> # execute function
        >>> fgws_list = clear_uuid_cache(qreq_)
        >>> # verify results
        >>> result = str(fgws_list)
        >>> print(result)
    """
    print('[nnindex] clearing uuid cache')
    uuid_map_fpath = get_nnindexer_uuid_map_fpath(qreq_)
    ut.delete(uuid_map_fpath)
    ut.delete(uuid_map_fpath + '.lock')
    print('[nnindex] finished uuid cache clear')


[docs]def print_uuid_cache(qreq_):
    """
    CommandLine:
        python -m ibeis.model.hots.neighbor_index --test-print_uuid_cache

    Example:
        >>> # DISABLE_DOCTEST
        >>> from ibeis.model.hots.neighbor_index import *  # NOQA
        >>> import ibeis
        >>> # build test data
        >>> cfgdict = dict(fg_on=False)
        >>> ibs, qreq_ = plh.get_pipeline_testdata(defaultdb='PZ_Master0', preload=False, cfgdict=cfgdict)
        >>> # execute function
        >>> print_uuid_cache(qreq_)
        >>> # verify results
        >>> result = str(nnindexer)
        >>> print(result)
    """
    print('[nnindex] clearing uuid cache')
    uuid_map_fpath = get_nnindexer_uuid_map_fpath(qreq_)
    candidate_uuids = UUID_MAP_CACHE.read_uuid_map_dict(uuid_map_fpath, 0)
    print(candidate_uuids)


#@profile
[docs]def request_ibeis_nnindexer(qreq_, verbose=True, use_memcache=True, force_rebuild=False):
    """
    CALLED BY QUERYREQUST::LOAD_INDEXER

    FIXME: and use params from qparams instead of ibs.cfg
    IBEIS interface into neighbor_index

    Args:
        qreq_ (QueryRequest): hyper-parameters

    Returns:
        NeighborIndexer: nnindexer

    CommandLine:
        python -m ibeis.model.hots.neighbor_index --test-request_ibeis_nnindexer

    Example:
        >>> # ENABLE_DOCTEST
        >>> from ibeis.model.hots.neighbor_index import *  # NOQA
        >>> nnindexer, qreq_, ibs = test_nnindexer(None)
        >>> nnindexer = request_ibeis_nnindexer(qreq_)
    """
    daid_list = qreq_.get_internal_daids()
    if not hasattr(qreq_.qparams, 'use_augmented_indexer'):
        qreq_.qparams.use_augmented_indexer = True
    if qreq_.qparams.use_augmented_indexer:
        nnindexer = request_augmented_ibeis_nnindexer(qreq_, daid_list,
                                                      verbose=verbose,
                                                      use_memcache=use_memcache,
                                                      force_rebuild=force_rebuild)
    else:
        nnindexer = request_memcached_ibeis_nnindexer(qreq_, daid_list,
                                                      verbose=verbose,
                                                      use_memcache=use_memcache,
                                                      force_rebuild=force_rebuild)
    return nnindexer


#@profile
[docs]def request_augmented_ibeis_nnindexer(qreq_, daid_list, verbose=True,
                                      use_memcache=True, force_rebuild=False, memtrack=None):
    """
    DO NOT USE. THIS FUNCTION CAN CURRENTLY CAUSE A SEGFAULT

    tries to give you an indexer for the requested daids using the least amount
    of computation possible. By loading and adding to a partially build nnindex
    if possible and if that fails fallbs back to request_memcache.

    Args:
        qreq_ (QueryRequest):  query request object with hyper-parameters
        daid_list (list):

    Returns:
        str: nnindex_cfgstr

    CommandLine:
        python -m ibeis.model.hots.neighbor_index --test-request_augmented_ibeis_nnindexer

    Example:
        >>> # ENABLE_DOCTEST
        >>> from ibeis.model.hots.neighbor_index import *  # NOQA
        >>> import ibeis
        >>> # build test data
        >>> ZEB_PLAIN = ibeis.const.Species.ZEB_PLAIN
        >>> ibs = ibeis.opendb('testdb1')
        >>> use_memcache, max_covers, verbose = True, None, True
        >>> daid_list = ibs.get_valid_aids(species=ZEB_PLAIN)[0:6]
        >>> qreq_ = ibs.new_query_request(daid_list, daid_list)
        >>> qreq_.qparams.min_reindex_thresh = 1
        >>> min_reindex_thresh = qreq_.qparams.min_reindex_thresh
        >>> # CLEAR CACHE for clean test
        >>> clear_uuid_cache(qreq_)
        >>> # LOAD 3 AIDS INTO CACHE
        >>> aid_list = ibs.get_valid_aids(species=ZEB_PLAIN)[0:3]
        >>> # Should fallback
        >>> nnindexer = request_augmented_ibeis_nnindexer(qreq_, aid_list)
        >>> # assert the fallback
        >>> uncovered_aids, covered_aids_list = group_daids_by_cached_nnindexer(
        ...     qreq_, daid_list, min_reindex_thresh, max_covers)
        >>> result2 = uncovered_aids, covered_aids_list
        >>> ut.assert_eq(result2, ([4, 5, 6], [[1, 2, 3]]), 'pre augment')
        >>> # Should augment
        >>> nnindexer = request_augmented_ibeis_nnindexer(qreq_, daid_list)
        >>> uncovered_aids, covered_aids_list = group_daids_by_cached_nnindexer(
        ...     qreq_, daid_list, min_reindex_thresh, max_covers)
        >>> result3 = uncovered_aids, covered_aids_list
        >>> ut.assert_eq(result3, ([], [[1, 2, 3, 4, 5, 6]]), 'post augment')
        >>> # Should fallback
        >>> nnindexer2 = request_augmented_ibeis_nnindexer(qreq_, daid_list)
        >>> assert nnindexer is nnindexer2
    """
    global NEIGHBOR_CACHE
    min_reindex_thresh = qreq_.qparams.min_reindex_thresh
    if not force_rebuild:
        new_daid_list, covered_aids_list = group_daids_by_cached_nnindexer(
            qreq_, daid_list, min_reindex_thresh, max_covers=1)
        can_augment = (
            len(covered_aids_list) > 0 and
            not ut.list_set_equal(covered_aids_list[0], daid_list))
    else:
        can_augment = False
    if verbose:
        print('[aug] Requesting augmented nnindexer')
    if can_augment:
        covered_aids = covered_aids_list[0]
        #with ut.PrintStartEndContext('AUGMENTING NNINDEX', verbose=verbose):
        #    with ut.Indenter('|  '):
        if verbose:
            print('[aug] Augmenting index %r old daids with %d new daids' %
                  (len(covered_aids), len(new_daid_list)))
        # Load the base covered indexer
        # THIS SHOULD LOAD NOT REBUILD IF THE UUIDS ARE COVERED
        base_nnindexer = request_memcached_ibeis_nnindexer(qreq_, covered_aids,
                                                           verbose=verbose,
                                                           use_memcache=use_memcache)
        # Remove this indexer from the memcache because we are going to change it
        if NEIGHBOR_CACHE.has_key(base_nnindexer.cfgstr):  # NOQA
            print('Removing key from memcache')
            NEIGHBOR_CACHE[base_nnindexer.cfgstr] = None
            del NEIGHBOR_CACHE[base_nnindexer.cfgstr]

        new_vecs_list, new_fgws_list = get_support_data(qreq_, new_daid_list)
        base_nnindexer.add_support(new_daid_list, new_vecs_list, new_fgws_list, verbose=True)
        # FIXME: pointer issues
        nnindexer = base_nnindexer
        # Change to the new cfgstr
        nnindex_cfgstr = build_nnindex_cfgstr(qreq_, daid_list)
        nnindexer.cfgstr = nnindex_cfgstr
        cachedir = qreq_.ibs.get_flann_cachedir()
        nnindexer.save(cachedir)
        # Write to inverse uuid
        if len(daid_list) > min_reindex_thresh:
            uuid_map_fpath = get_nnindexer_uuid_map_fpath(qreq_)
            daids_hashid   = get_data_cfgstr(qreq_.ibs, daid_list)
            visual_uuid_list = qreq_.ibs.get_annot_visual_uuids(daid_list)
            UUID_MAP_CACHE.write_uuid_map_dict(uuid_map_fpath, visual_uuid_list, daids_hashid)
        # Write to memcache
        if ut.VERBOSE:
            print('[aug] Wrote to memcache=%r' % (nnindex_cfgstr,))
        NEIGHBOR_CACHE[nnindex_cfgstr] = nnindexer
        return nnindexer
    else:
        #if ut.VERBOSE:
        if verbose:
            print('[aug] Nothing to augment, fallback to memcache')
        # Fallback
        nnindexer = request_memcached_ibeis_nnindexer(
            qreq_, daid_list, verbose=verbose, use_memcache=use_memcache, force_rebuild=force_rebuild, memtrack=memtrack
        )
        return nnindexer


#@profile
[docs]def request_memcached_ibeis_nnindexer(qreq_, daid_list, use_memcache=True,
                                      verbose=ut.NOT_QUIET, veryverbose=False,
                                      force_rebuild=False, allow_memfallback=True, memtrack=None):
    """
    FOR INTERNAL USE ONLY
    takes custom daid list. might not be the same as what is in qreq_

    CommandLine:
        python -m ibeis.model.hots.neighbor_index --test-request_memcached_ibeis_nnindexer

    Example:
        >>> # DISABLE_DOCTEST
        >>> from ibeis.model.hots.neighbor_index import *  # NOQA
        >>> import ibeis
        >>> # build test data
        >>> ibs = ibeis.opendb('testdb1')
        >>> qreq_.qparams.min_reindex_thresh = 3
        >>> ZEB_PLAIN = ibeis.const.Species.ZEB_PLAIN
        >>> daid_list = ibs.get_valid_aids(species=ZEB_PLAIN)[0:3]
        >>> qreq_ = ibs.new_query_request(daid_list, daid_list)
        >>> verbose = True
        >>> use_memcache = True
        >>> # execute function
        >>> nnindexer = request_memcached_ibeis_nnindexer(qreq_, daid_list, use_memcache)
        >>> # verify results
        >>> result = str(nnindexer)
        >>> print(result)
    """
    global NEIGHBOR_CACHE
    #try:
    if veryverbose:
        print('[nnindex.MEMCACHE] len(NEIGHBOR_CACHE) = %r' % (len(NEIGHBOR_CACHE),))
        # the lru cache wont be recognized by get_object_size_str, cast to pure python objects
        print('[nnindex.MEMCACHE] size(NEIGHBOR_CACHE) = %s' % (ut.get_object_size_str(NEIGHBOR_CACHE.items()),))
    #if memtrack is not None:
    #    memtrack.report('IN REQUEST MEMCACHE')
    nnindex_cfgstr = build_nnindex_cfgstr(qreq_, daid_list)
    # neighbor memory cache
    if not force_rebuild and use_memcache and NEIGHBOR_CACHE.has_key(nnindex_cfgstr):  # NOQA (has_key is for a lru cache)
        if veryverbose or ut.VERYVERBOSE:
            print('... nnindex memcache hit: cfgstr=%s' % (nnindex_cfgstr,))
        nnindexer = NEIGHBOR_CACHE[nnindex_cfgstr]
    else:
        if veryverbose or ut.VERYVERBOSE:
            print('... nnindex memcache miss: cfgstr=%s' % (nnindex_cfgstr,))
        # Write to inverse uuid
        nnindexer = request_diskcached_ibeis_nnindexer(
            qreq_, daid_list, nnindex_cfgstr, verbose,
            force_rebuild=force_rebuild, memtrack=memtrack)
        NEIGHBOR_CACHE_WRITE = True
        if NEIGHBOR_CACHE_WRITE:
            # Write to memcache
            if ut.VERBOSE or ut.VERYVERBOSE:
                print('[disk] Write to memcache=%r' % (nnindex_cfgstr,))
            NEIGHBOR_CACHE[nnindex_cfgstr] = nnindexer
        else:
            if ut.VERBOSE or ut.VERYVERBOSE:
                print('[disk] Did not write to memcache=%r' % (nnindex_cfgstr,))
    return nnindexer


#@profile
[docs]def request_diskcached_ibeis_nnindexer(qreq_, daid_list, nnindex_cfgstr=None, verbose=True, force_rebuild=False, memtrack=None):
    """
    builds new NeighborIndexer which will try to use a disk cached flann if
    available

    Args:
        qreq_ (QueryRequest):  query request object with hyper-parameters
        daid_list (list):
        nnindex_cfgstr (?):
        verbose (bool):

    Returns:
        NeighborIndexer: nnindexer

    CommandLine:
        python -m ibeis.model.hots.neighbor_index --test-request_diskcached_ibeis_nnindexer

    Example:
        >>> # DISABLE_DOCTEST
        >>> from ibeis.model.hots.neighbor_index import *  # NOQA
        >>> import ibeis
        >>> # build test data
        >>> ibs = ibeis.opendb('testdb1')
        >>> daid_list = ibs.get_valid_aids(species=ibeis.const.Species.ZEB_PLAIN)
        >>> qreq_ = ibs.new_query_request(daid_list, daid_list)
        >>> nnindex_cfgstr = build_nnindex_cfgstr(qreq_, daid_list)
        >>> verbose = True
        >>> # execute function
        >>> nnindexer = request_diskcached_ibeis_nnindexer(qreq_, daid_list, nnindex_cfgstr, verbose)
        >>> # verify results
        >>> result = str(nnindexer)
        >>> print(result)
    """
    if nnindex_cfgstr is None:
        nnindex_cfgstr = build_nnindex_cfgstr(qreq_, daid_list)
    cfgstr = nnindex_cfgstr
    cachedir     = qreq_.ibs.get_flann_cachedir()
    flann_params = qreq_.qparams.flann_params
    flann_params['checks'] = qreq_.qparams.checks
    #if memtrack is not None:
    #    memtrack.report('[PRE SUPPORT]')
    # Get annot descriptors to index
    print('[nnindex] Loading support data to build diskcached indexer')
    vecs_list, fgws_list = get_support_data(qreq_, daid_list)
    if memtrack is not None:
        memtrack.report('[AFTER GET SUPPORT DATA]')
    try:
        nnindexer = new_neighbor_index(
            daid_list, vecs_list, fgws_list, flann_params, cachedir,
            cfgstr=cfgstr, verbose=verbose, force_rebuild=force_rebuild, memtrack=memtrack)
    except Exception as ex:
        ut.printex(ex, True, msg_='cannot build inverted index',
                        key_list=['ibs.get_infostr()'])
        raise
    # Record these uuids in the disk based uuid map so they can be augmented if
    # needed
    min_reindex_thresh = qreq_.qparams.min_reindex_thresh
    if len(daid_list) > min_reindex_thresh:
        uuid_map_fpath = get_nnindexer_uuid_map_fpath(qreq_)
        daids_hashid   = get_data_cfgstr(qreq_.ibs, daid_list)
        visual_uuid_list = qreq_.ibs.get_annot_visual_uuids(daid_list)
        UUID_MAP_CACHE.write_uuid_map_dict(uuid_map_fpath, visual_uuid_list, daids_hashid)
        if memtrack is not None:
            memtrack.report('[AFTER WRITE_UUID_MAP]')
    return nnindexer


#@profile
[docs]def group_daids_by_cached_nnindexer(qreq_, daid_list, min_reindex_thresh,
                                    max_covers=None):
    r"""
    FIXME: This function is slow due to ibs.get_annot_aids_from_visual_uuid
    282.253 seconds for 600 queries

    CommandLine:
        python -m ibeis.model.hots.neighbor_index --test-group_daids_by_cached_nnindexer

    Example:
        >>> # ENABLE_DOCTEST
        >>> from ibeis.model.hots.neighbor_index import *  # NOQA
        >>> import ibeis
        >>> ibs = ibeis.opendb('testdb1')
        >>> ZEB_PLAIN = ibeis.const.Species.ZEB_PLAIN
        >>> daid_list = ibs.get_valid_aids(species=ZEB_PLAIN)
        >>> qreq_ = ibs.new_query_request(daid_list, daid_list)
        >>> # Set the params a bit lower
        >>> max_covers = None
        >>> qreq_.qparams.min_reindex_thresh = 1
        >>> min_reindex_thresh = qreq_.qparams.min_reindex_thresh
        >>> # STEP 0: CLEAR THE CACHE
        >>> clear_uuid_cache(qreq_)
        >>> # STEP 1: ASSERT EMPTY INDEX
        >>> daid_list = ibs.get_valid_aids(species=ZEB_PLAIN)[0:3]
        >>> uncovered_aids, covered_aids_list = group_daids_by_cached_nnindexer(
        ...     qreq_, daid_list, min_reindex_thresh, max_covers)
        >>> result1 = uncovered_aids, covered_aids_list
        >>> ut.assert_eq(result1, ([1, 2, 3], []), 'pre request')
        >>> # TEST 2: SHOULD MAKE 123 COVERED
        >>> nnindexer = request_memcached_ibeis_nnindexer(qreq_, daid_list)
        >>> uncovered_aids, covered_aids_list = group_daids_by_cached_nnindexer(
        ...     qreq_, daid_list, min_reindex_thresh, max_covers)
        >>> result2 = uncovered_aids, covered_aids_list
        >>> ut.assert_eq(result2, ([], [[1, 2, 3]]), 'post request')
    """
    ibs = qreq_.ibs
    # read which annotations have prebuilt caches
    uuid_map_fpath = get_nnindexer_uuid_map_fpath(qreq_)
    candidate_uuids = UUID_MAP_CACHE.read_uuid_map_dict(uuid_map_fpath, min_reindex_thresh)
    # find a maximum independent set cover of the requested annotations
    annot_vuuid_list = ibs.get_annot_visual_uuids(daid_list)  # 3.2 %
    covertup = ut.greedy_max_inden_setcover(
        candidate_uuids, annot_vuuid_list, max_covers)  # 0.2 %
    uncovered_vuuids, covered_vuuids_list, accepted_keys = covertup
    # return the grouped covered items (so they can be loaded) and
    # the remaining uuids which need to have an index computed.
    #
    uncovered_aids_ = ibs.get_annot_aids_from_visual_uuid(uncovered_vuuids)  # 28.0%
    covered_aids_list_ = ibs.unflat_map(
        ibs.get_annot_aids_from_visual_uuid, covered_vuuids_list)  # 68%
    # FIXME:
    uncovered_aids = sorted(uncovered_aids_)
    #covered_aids_list = list(map(sorted, covered_aids_list_))
    covered_aids_list = covered_aids_list_
    return uncovered_aids, covered_aids_list


[docs]def get_data_cfgstr(ibs, daid_list):
    """ part 2 data hash id """
    daids_hashid = ibs.get_annot_hashid_visual_uuid(daid_list)
    return daids_hashid


#@profile
[docs]def build_nnindex_cfgstr(qreq_, daid_list):
    """
    builds a string that  uniquely identified an indexer built with parameters
    from the input query requested and indexing descriptor from the input
    annotation ids

    Args:
        qreq_ (QueryRequest):  query request object with hyper-parameters
        daid_list (list):

    Returns:
        str: nnindex_cfgstr

    CommandLine:
        python -c 'import utool; print(utool.auto_docstr("ibeis.model.hots.neighbor_index", "build_nnindex_cfgstr"))'
        python -m ibeis.model.hots.neighbor_index --test-build_nnindex_cfgstr

    Example:
        >>> # ENABLE_DOCTEST
        >>> from ibeis.model.hots.neighbor_index import *  # NOQA
        >>> # build test data
        >>> import ibeis
        >>> ibs = ibeis.opendb(db='testdb1')
        >>> daid_list = ibs.get_valid_aids(species=ibeis.const.Species.ZEB_PLAIN)
        >>> qreq_ = ibs.new_query_request(daid_list, daid_list, cfgdict=dict(fg_on=False))
        >>> # execute function
        >>> nnindex_cfgstr = build_nnindex_cfgstr(qreq_, daid_list)
        >>> # verify results
        >>> result = str(nnindex_cfgstr)
        >>> print(result)
        _VUUIDS((6)fvpdb9cud49@ll@+)_FLANN(8_kdtrees)_FEATWEIGHT(OFF)_FEAT(hesaff+sift_)_CHIP(sz450)
    """
    flann_cfgstr      = qreq_.qparams.flann_cfgstr
    featweight_cfgstr = qreq_.qparams.featweight_cfgstr
    data_hashid   = get_data_cfgstr(qreq_.ibs, daid_list)
    nnindex_cfgstr = ''.join((data_hashid, flann_cfgstr, featweight_cfgstr))
    return nnindex_cfgstr


#@profile
[docs]def get_fgweights_hack(qreq_, daid_list):
    """
    hack to get  feature weights. returns None if feature weights are turned off
    in config settings
    """
    # <HACK:featweight>
    if qreq_.qparams.fg_on:
        fgws_list = qreq_.ibs.get_annot_fgweights(
            daid_list, config2_=qreq_.get_internal_data_config2(), ensure=True)
    else:
        fgws_list = None
    return fgws_list
    # </HACK:featweight>


[docs]def get_support_data(qreq_, daid_list):
    # TODO: look into ut.cached_func
    vecs_list = qreq_.ibs.get_annot_vecs(daid_list, config2_=qreq_.get_internal_data_config2())
    fgws_list = get_fgweights_hack(qreq_, daid_list)
    return vecs_list, fgws_list


#@profile
[docs]def new_neighbor_index(daid_list, vecs_list, fgws_list, flann_params, cachedir,
                       cfgstr, force_rebuild=False, verbose=True, memtrack=None):
    """
    constructs neighbor index independent of ibeis

    Args:
        daid_list (list):
        vecs_list (list):
        fgws_list (list):
        flann_params (dict):
        flann_cachedir (None):
        nnindex_cfgstr (str):
        use_memcache (bool):

    Returns:
        nnindexer

    CommandLine:
        python -m ibeis.model.hots.neighbor_index --test-new_neighbor_index

    Example:
        >>> # SLOW_DOCTEST
        >>> from ibeis.model.hots.neighbor_index import *  # NOQA
        >>> import ibeis
        >>> # build test data
        >>> ibs = ibeis.opendb('testdb1')
        >>> daid_list = ibs.get_valid_aids(species=ibeis.const.Species.ZEB_PLAIN)
        >>> qreq_ = ibs.new_query_request(daid_list, daid_list)
        >>> nnindex_cfgstr = build_nnindex_cfgstr(qreq_, daid_list)
        >>> verbose = True
        >>> nnindex_cfgstr = build_nnindex_cfgstr(qreq_, daid_list)
        >>> cfgstr = nnindex_cfgstr
        >>> cachedir     = qreq_.ibs.get_flann_cachedir()
        >>> flann_params = qreq_.qparams.flann_params
        >>> # Get annot descriptors to index
        >>> vecs_list, fgws_list = get_support_data(qreq_, daid_list)
        >>> # execute function
        >>> nnindexer = new_neighbor_index(daid_list, vecs_list, fgws_list, flann_params, cachedir, cfgstr, verbose=True)
        >>> result = ('nnindexer.ax2_aid = %s' % (str(nnindexer.ax2_aid),))
        >>> print(result)
        nnindexer.ax2_aid = [1 2 3 4 5 6]

    """
    nnindexer = NeighborIndex(flann_params, cfgstr)
    #if memtrack is not None:
    #    memtrack.report('CREATEED NEIGHTOB INDEX')
    # Initialize neighbor with unindexed data
    nnindexer.init_support(daid_list, vecs_list, fgws_list, verbose=verbose)
    if memtrack is not None:
        memtrack.report('AFTER INIT SUPPORT')
    # Load or build the indexing structure
    nnindexer.ensure_indexer(cachedir, verbose=verbose, force_rebuild=force_rebuild, memtrack=memtrack)
    if memtrack is not None:
        memtrack.report('AFTER LOAD OR BUILD')
    return nnindexer


#@profile
[docs]def prepare_index_data(aid_list, vecs_list, fgws_list, verbose=True):
    """
    flattens vecs_list and builds a reverse index from the flattened indices
    (idx) to the original aids and fxs
    """
    print('[nnindex] Preparing data for indexing / loading index')
    # Check input
    assert len(aid_list) == len(vecs_list), 'invalid input. bad len'
    assert len(aid_list) > 0, ('len(aid_list) == 0.'
                                    'Cannot invert index without features!')
    # Create indexes into the input aids
    ax_list = np.arange(len(aid_list))
    idx2_vec, idx2_ax, idx2_fx = invert_index(vecs_list, ax_list, verbose=verbose)
    # <HACK:fgweights>
    if fgws_list is not None:
        idx2_fgw = np.hstack(fgws_list)
        try:
            assert len(idx2_fgw) == len(idx2_vec), 'error. weights and vecs do not correspond'
        except Exception as ex:
            ut.printex(ex, keys=[(len, 'idx2_fgw'), (len, 'idx2_vec')])
            raise
    else:
        idx2_fgw = None
    # </HACK:fgweights>
    ax2_aid = np.array(aid_list)
    _preptup = (ax2_aid, idx2_vec, idx2_fgw, idx2_ax, idx2_fx)
    return _preptup


@six.add_metaclass(ut.ReloadingMetaclass)
[docs]class NeighborIndex(object):
    """
    wrapper class around flann
    stores flann index and data it needs to index into

    Example:
        >>> # ENABLE_DOCTEST
        >>> from ibeis.model.hots.neighbor_index import *  # NOQA
        >>> nnindexer, qreq_, ibs = test_nnindexer()
    """
    ext     = '.flann'
    prefix1 = 'flann'

    def __init__(nnindexer, flann_params, cfgstr):
        """ initialize an empty neighbor indexer """
        nnindexer.flann    = None  # Approximate search structure
        nnindexer.ax2_aid  = None  # (A x 1) Mapping to original annot ids
        nnindexer.idx2_vec = None  # (M x D) Descriptors to index
        nnindexer.idx2_fgw = None  # (M x 1) Descriptor forground weight
        nnindexer.idx2_ax  = None  # (M x 1) Index into the aid_list
        nnindexer.idx2_fx  = None  # (M x 1) Index into the annot's features
        nnindexer.max_distance_sqrd = None  # max possible distance^2 for normalization
        nnindexer.cfgstr   = cfgstr  # configuration id
        if 'random_seed' not in flann_params:
            # Make flann determenistic for the same data
            flann_params['random_seed'] = 42
        nnindexer.flann_params = flann_params
        nnindexer.cores  = flann_params.get('cores', 0)
        nnindexer.checks = flann_params.get('checks', 1028)
        nnindexer.num_indexed = None
        nnindexer.flann_fpath = None

    #def __del__(nnindexer):
    #    print('+------------')
    #    print('!!! DELETING NNINDEXER: ' + nnindexer.cfgstr)
    #    print('L___________')
    #    if nnindexer.flann is not None:
    #        nnindexer.flann.delete_index()

    #@profile
[docs]    def init_support(nnindexer, aid_list, vecs_list, fgws_list, verbose=True):
        """
        prepares inverted indicies and FLANN data structure
        """
        assert nnindexer.flann is None, 'already initalized'
        _preptup = prepare_index_data(aid_list, vecs_list, fgws_list, verbose=verbose)
        (ax2_aid, idx2_vec, idx2_fgw, idx2_ax, idx2_fx) = _preptup
        nnindexer.flann    = pyflann.FLANN()  # Approximate search structure
        nnindexer.ax2_aid  = ax2_aid   # (A x 1) Mapping to original annot ids
        nnindexer.idx2_vec = idx2_vec  # (M x D) Descriptors to index
        nnindexer.idx2_fgw = idx2_fgw  # (M x 1) Descriptor forground weight
        nnindexer.idx2_ax  = idx2_ax   # (M x 1) Index into the aid_list
        nnindexer.idx2_fx  = idx2_fx   # (M x 1) Index into the annot's features
        nnindexer.num_indexed = nnindexer.idx2_vec.shape[0]
        """
        arr = np.array([1, 2, 3], dtype=np.uint8)
        arr.dtype == np.uint8
        """
        if nnindexer.idx2_vec.dtype == hstypes.VEC_TYPE:
            # these are sift descriptors
            nnindexer.max_distance_sqrd = hstypes.VEC_PSEUDO_MAX_DISTANCE_SQRD
        else:
            # FIXME: hacky way to support siam128 descriptors.
            #raise AssertionError(
            #'NNindexer should get uint8s right now unless the algorithm has changed')
            nnindexer.max_distance_sqrd = None

    @ut.tracefunc_xml
[docs]    def add_ibeis_support(nnindexer, qreq_, new_daid_list, verbose=ut.NOT_QUIET):
        # TODO: ensure that the memcache changes appropriately
        clear_memcache()
        if verbose:
            print('[nnindex] request add %d annots to single-indexer' % (len(new_daid_list)))
        duplicate_aids = set(new_daid_list).intersection(nnindexer.get_indexed_aids())
        if len(duplicate_aids) > 0:
            if verbose:
                print('[nnindex] request has %d annots that are already indexed. ignore those'
                      % (len(duplicate_aids),))
            new_daid_list_ = np.array(sorted(list(set(new_daid_list) - duplicate_aids)))
        else:
            new_daid_list_ = new_daid_list
        if len(new_daid_list_) == 0:
            if verbose:
                print('[nnindex] Nothing to do')
        else:
            new_vecs_list, new_fgws_list = get_support_data(qreq_, new_daid_list_)
            nnindexer.add_support(new_daid_list_, new_vecs_list, new_fgws_list, verbose=verbose)

    @ut.tracefunc_xml
[docs]    def remove_ibeis_support(nnindexer, qreq_, remove_daid_list, verbose=ut.NOT_QUIET):
        # TODO: ensure that the memcache changes appropriately
        if verbose:
            print('[nnindex] request remove %d annots from single-indexer' %
                  (len(remove_daid_list)))
        clear_memcache()
        nnindexer.remove_support(remove_daid_list, verbose=verbose)

[docs]    def remove_support(nnindexer, remove_daid_list, verbose=ut.NOT_QUIET):
        """
        CommandLine:
            python -m ibeis.model.hots.neighbor_index --test-remove_support

        SeeAlso:
            ~/code/flann/src/python/pyflann/index.py

        Example:
            >>> # SLOW_DOCTEST
            >>> # (IMPORTANT)
            >>> from ibeis.model.hots.neighbor_index import *  # NOQA
            >>> nnindexer, qreq_, ibs = test_nnindexer(use_memcache=False)
            >>> remove_daid_list = [8, 9, 10, 11]
            >>> K = 2
            >>> qfx2_vec = ibs.get_annot_vecs(1, config2_=qreq_.get_internal_query_config2())
            >>> # get before data
            >>> (qfx2_idx1, qfx2_dist1) = nnindexer.knn(qfx2_vec, K)
            >>> # execute test function
            >>> nnindexer.remove_support(remove_daid_list)
            >>> # test before data vs after data
            >>> (qfx2_idx2, qfx2_dist2) = nnindexer.knn(qfx2_vec, K)
            >>> #
            >>> ax2_nvecs = ut.dict_take(ut.dict_hist(nnindexer.idx2_ax), range(len(nnindexer.ax2_aid)))
            >>> assert qfx2_idx2.max() < ax2_nvecs[0], 'should only get points from aid 7'
            >>> assert qfx2_idx1.max() > ax2_nvecs[0], 'should get points from everyone'
        """
        if ut.DEBUG2:
            print('REMOVING POINTS')
        # TODO: ensure no duplicates
        ax2_remove_flag = np.in1d(nnindexer.ax2_aid, remove_daid_list)
        remove_ax_list = np.nonzero(ax2_remove_flag)[0]
        idx2_remove_flag = np.in1d(nnindexer.idx2_ax, remove_ax_list)
        remove_idx_list = np.nonzero(idx2_remove_flag)[0]
        if verbose:
            print('[nnindex] Found %d / %d annots that need removing' %
                  (len(remove_ax_list), len(remove_daid_list)))
            print('[nnindex] Removing %d indexed features' % (len(remove_idx_list),))
        # FIXME: indicies may need adjustment after remove points
        # Currently this is not being done and the data is just being left alone
        # This should be ok temporarilly because removed ids should not
        # be returned by the flann object
        nnindexer.flann.remove_points(remove_idx_list)

        # FIXME:
        #nnindexer.ax2_aid
        if True:
            nnindexer.ax2_aid[remove_ax_list] = -1
            nnindexer.idx2_fx[remove_idx_list] = -1
            nnindexer.idx2_vec[remove_idx_list] = 0
            if nnindexer.idx2_fgw is not None:
                nnindexer.idx2_fgw[remove_idx_list] = np.nan

        # FIXME: This will definitely bug out if you remove points and then try
        # to add the same points back again.

        if ut.DEBUG2:
            print('DONE REMOVE POINTS')

    #@profile
[docs]    def add_support(nnindexer, new_daid_list, new_vecs_list, new_fgws_list,
                    verbose=ut.NOT_QUIET):
        """
        adds support data (aka data to be indexed)

        Args:
            new_daid_list (list): list of annotation ids that are being added
            new_vecs_list (list): list of descriptor vectors for each annotation
            new_fgws_list (list): list of weights per vector for each annotation
            verbose (bool):  verbosity flag(default = True)

        CommandLine:
            python -m ibeis.model.hots.neighbor_index --test-add_support

        Example:
            >>> # ENABLE_DOCTEST
            >>> from ibeis.model.hots.neighbor_index import *  # NOQA
            >>> nnindexer, qreq_, ibs = test_nnindexer(use_memcache=False)
            >>> new_daid_list = [2, 3, 4]
            >>> K = 2
            >>> qfx2_vec = ibs.get_annot_vecs(1, config2_=qreq_.get_internal_query_config2())
            >>> # get before data
            >>> (qfx2_idx1, qfx2_dist1) = nnindexer.knn(qfx2_vec, K)
            >>> new_vecs_list, new_fgws_list = get_support_data(qreq_, new_daid_list)
            >>> # execute test function
            >>> nnindexer.add_support(new_daid_list, new_vecs_list, new_fgws_list)
            >>> # test before data vs after data
            >>> (qfx2_idx2, qfx2_dist2) = nnindexer.knn(qfx2_vec, K)
            >>> assert qfx2_idx2.max() > qfx2_idx1.max()
        """
        # TODO: ensure no duplicates
        nAnnots = nnindexer.num_indexed_annots()
        nVecs = nnindexer.num_indexed_vecs()
        nNewAnnots = len(new_daid_list)
        new_ax_list = np.arange(nAnnots, nAnnots + nNewAnnots)
        new_idx2_vec, new_idx2_ax, new_idx2_fx = \
                invert_index(new_vecs_list, new_ax_list, verbose=verbose)
        nNewVecs = len(new_idx2_vec)
        if verbose or ut.VERYVERBOSE:
            print(('[nnindex] Adding %d vecs from %d annots to nnindex '
                   'with %d vecs and %d annots') %
                  (nNewVecs, nNewAnnots, nVecs, nAnnots))
        if ut.DEBUG2:
            print('STACKING')
        # Stack inverted information
        old_idx2_vec = nnindexer.idx2_vec
        if nnindexer.idx2_fgw is not None:
            new_idx2_fgw = np.hstack(new_fgws_list)
            #nnindexer.old_vecs.append(new_idx2_fgw)
        ##---
        _ax2_aid = np.hstack((nnindexer.ax2_aid, new_daid_list))
        _idx2_ax = np.hstack((nnindexer.idx2_ax, new_idx2_ax))
        _idx2_fx = np.hstack((nnindexer.idx2_fx, new_idx2_fx))
        _idx2_vec = np.vstack((old_idx2_vec, new_idx2_vec))
        if nnindexer.idx2_fgw is not None:
            _idx2_fgw = np.hstack((nnindexer.idx2_fgw, new_idx2_fgw))
        if ut.DEBUG2:
            print('REPLACING')
        nnindexer.ax2_aid  = _ax2_aid
        nnindexer.idx2_ax  = _idx2_ax
        nnindexer.idx2_vec = _idx2_vec
        nnindexer.idx2_fx  = _idx2_fx
        if nnindexer.idx2_fgw is not None:
            nnindexer.idx2_fgw = _idx2_fgw
        #nnindexer.idx2_kpts   = None
        #nnindexer.idx2_oris   = None
        # Add new points to flann structure
        if ut.DEBUG2:
            print('ADD POINTS (FIXME: SOMETIMES SEGFAULT OCCURS)')
            print('new_idx2_vec.dtype = %r' % new_idx2_vec.dtype)
            print('new_idx2_vec.shape = %r' % (new_idx2_vec.shape,))
        nnindexer.flann.add_points(new_idx2_vec)
        if ut.DEBUG2:
            print('DONE ADD POINTS')

[docs]    def ensure_indexer(nnindexer, cachedir, verbose=True, force_rebuild=False, memtrack=None):
        """
        Ensures that you get a neighbor indexer. It either loads a chached
        indexer or rebuilds a new one.
        """
        #with ut.PrintStartEndContext(msg='CACHED NNINDEX', verbose=verbose):
        if NOCACHE_FLANN or force_rebuild:
            print('...nnindex flann cache is forced off')
            load_success = False
        else:
            load_success = nnindexer.load(cachedir, verbose=verbose)
        if load_success:
            if not ut.QUIET:
                nVecs   = nnindexer.num_indexed_vecs()
                nAnnots = nnindexer.num_indexed_annots()
                print('...nnindex flann cache hit: %d vectors, %d annots' %
                      (nVecs, nAnnots))
        else:
            if not ut.QUIET:
                nVecs   = nnindexer.num_indexed_vecs()
                nAnnots = nnindexer.num_indexed_annots()
                print('...nnindex flann cache miss: %d vectors, %d annots' %
                      (nVecs, nAnnots))
            nnindexer.build_and_save(cachedir, verbose=verbose, memtrack=memtrack)

[docs]    def build_and_save(nnindexer, cachedir, verbose=True, memtrack=None):
        nnindexer.reindex(memtrack=memtrack)
        nnindexer.save(cachedir, verbose=verbose)

[docs]    def reindex(nnindexer, verbose=True, memtrack=None):
        """ indexes all vectors with FLANN. """
        num_vecs = nnindexer.num_indexed
        notify_num = 1E6
        verbose_ = ut.VERYVERBOSE or verbose or (not ut.QUIET and num_vecs > notify_num)
        if verbose_:
            print('[nnindex] ...building kdtree over %d points (this may take a sec).' % num_vecs)
            tt = ut.tic(msg='Building index')
        idx2_vec = nnindexer.idx2_vec
        flann_params = nnindexer.flann_params
        if num_vecs == 0:
            print('WARNING: CANNOT BUILD FLANN INDEX OVER 0 POINTS. THIS MAY BE A SIGN OF A DEEPER ISSUE')
        else:
            if memtrack is not None:
                memtrack.report('BEFORE BUILD FLANN INDEX')
            nnindexer.flann.build_index(idx2_vec, **flann_params)
            if memtrack is not None:
                memtrack.report('AFTER BUILD FLANN INDEX')
        if verbose_:
            ut.toc(tt)

    # ---- <cachable_interface> ---

[docs]    def save(nnindexer, cachedir, verbose=True):
        """
        Caches a neighbor indexer to disk
        """
        if NOSAVE_FLANN:
            if ut.VERYVERBOSE or verbose:
                print('[nnindex] flann save is deactivated')
            return False
        flann_fpath = nnindexer.get_fpath(cachedir)
        nnindexer.flann_fpath = flann_fpath
        if ut.VERYVERBOSE or verbose:
            print('[nnindex] flann.save_index(%r)' % ut.path_ndir_split(flann_fpath, n=5))
        nnindexer.flann.save_index(flann_fpath)

[docs]    def load(nnindexer, cachedir, verbose=True):
        """
        Loads a cached neighbor indexer from disk
        """
        load_success = False
        flann_fpath = nnindexer.get_fpath(cachedir)
        nnindexer.flann_fpath = flann_fpath
        if ut.checkpath(flann_fpath, verbose=verbose):
            idx2_vec = nnindexer.idx2_vec
            # Warning: Loading a FLANN index with old headers may silently fail.
            try:
                nnindexer.flann.load_index(flann_fpath, idx2_vec)
            except (IOError, pyflann.FLANNException) as ex:
                ut.printex(ex, '... cannot load nnindex flann', iswarning=True)
            else:
                load_success = True
        return load_success

[docs]    def get_prefix(nnindexer):
        return nnindexer.prefix1

    #@profile
[docs]    def get_cfgstr(nnindexer, noquery=False):
        """ returns string which uniquely identified configuration and support data

        Args:
            noquery (bool): if True cfgstr is only relevant to building the
                index. No search params are returned (default = False)

        Returns:
            str: flann_cfgstr

        CommandLine:
            python -m ibeis.model.hots.neighbor_index --test-get_cfgstr

        Example:
            >>> # DISABLE_DOCTEST
            >>> from ibeis.model.hots.neighbor_index import *  # NOQA
            >>> cfgdict = dict(fg_on=False)
            >>> ibs, qreq_ = plh.get_pipeline_testdata(defaultdb='testdb1', preload=True, cfgdict=cfgdict)
            >>> nnindexer = qreq_.indexer
            >>> noquery = True
            >>> flann_cfgstr = nnindexer.get_cfgstr(noquery)
            >>> result = ('flann_cfgstr = %s' % (str(flann_cfgstr),))
            >>> print(result)
            flann_cfgstr = _FLANN((algo=kdtree,seed=42,t=8,))_VECS((5232,128)4mu3cl+!se1x13je)
        """
        flann_cfgstr_list = []
        use_params_hash = True
        if use_params_hash:
            flann_defaults = vt.get_flann_params(nnindexer.flann_params['algorithm'])
            flann_params_clean = flann_defaults.copy()
            ut.updateif_haskey(flann_params_clean, nnindexer.flann_params)
            if noquery:
                ut.delete_dict_keys(flann_params_clean, ['checks'])
            shortnames = dict(algorithm='algo', checks='chks', random_seed='seed', trees='t')
            short_params = dict([(shortnames.get(key, key), str(val)[0:7])
                                 for key, val in six.iteritems(flann_params_clean)])
            #  if key == 'algorithm'])  # or val != flann_defaults.get(key, None)])
            flann_valsig_ = ut.dict_str(
                short_params, nl=False, explicit=True, strvals=True)
            flann_valsig_ = flann_valsig_.lstrip('dict').replace(' ', '')
            #flann_valsig_ = str(list(flann_params.values()))
            #flann_valsig = ut.remove_chars(flann_valsig_, ', \'[]')
            flann_cfgstr_list.append('_FLANN(' + flann_valsig_ + ')')
        use_data_hash = True
        if use_data_hash:
            idx2_vec = nnindexer.idx2_vec
            vecs_hashstr = ut.hashstr_arr(idx2_vec, '_VECS')
            flann_cfgstr_list.append(vecs_hashstr)
        flann_cfgstr = ''.join(flann_cfgstr_list)
        return flann_cfgstr

[docs]    def get_fname(nnindexer):
        return basename(nnindexer.get_fpath(''))

[docs]    def get_fpath(nnindexer, cachedir, cfgstr=None):
        _args2_fpath = ut.util_cache._args2_fpath
        dpath  = cachedir
        prefix = nnindexer.get_prefix()
        cfgstr = nnindexer.get_cfgstr(noquery=True)
        ext    = nnindexer.ext
        fpath  = _args2_fpath(dpath, prefix, cfgstr, ext, write_hashtbl=False)
        print('flann fpath = %r' % (fpath,))
        return fpath

    # ---- </cachable_interface> ---

[docs]    def get_dtype(nnindexer):
        return nnindexer.idx2_vec.dtype

    #@profile
[docs]    def knn(nnindexer, qfx2_vec, K):
        r"""
        Returns the indices and squared distance to the nearest K neighbors.
        The distance is noramlized between zero and one using
        VEC_PSEUDO_MAX_DISTANCE = (np.sqrt(2) * VEC_PSEUDO_MAX)

        Args:
            qfx2_vec : (N x D) an array of N, D-dimensional query vectors

            K: number of approximate nearest neighbors to find

        Returns: tuple of (qfx2_idx, qfx2_dist)
            ndarray : qfx2_idx[n][k] (N x K) is the index of the kth
                        approximate nearest data vector w.r.t qfx2_vec[n]

            ndarray : qfx2_dist[n][k] (N x K) is the distance to the kth
                        approximate nearest data vector w.r.t. qfx2_vec[n]
                        distance is normalized squared euclidean distance.

        CommandLine:
            python -m ibeis.model.hots.neighbor_index --test-knn:0
            python -m ibeis.model.hots.neighbor_index --test-knn:1

        Example:
            >>> # ENABLE_DOCTEST
            >>> from ibeis.model.hots.neighbor_index import *  # NOQA
            >>> nnindexer, qreq_, ibs = test_nnindexer()
            >>> qfx2_vec = ibs.get_annot_vecs(1, config2_=qreq_.get_internal_query_config2())
            >>> K = 2
            >>> nnindexer.debug_nnindexer()
            >>> assert vt.check_sift_validity(qfx2_vec), 'bad SIFT properties'
            >>> (qfx2_idx, qfx2_dist) = nnindexer.knn(qfx2_vec, K)
            >>> result = str(qfx2_idx.shape) + ' ' + str(qfx2_dist.shape)
            >>> print('qfx2_vec.dtype = %r' % (qfx2_vec.dtype,))
            >>> print('nnindexer.max_distance_sqrd = %r' % (nnindexer.max_distance_sqrd,))
            >>> assert np.all(qfx2_dist < 1.0), 'distance should be less than 1. got %r' % (qfx2_dist,)
            >>> # Ensure distance calculations are correct
            >>> qfx2_dvec = nnindexer.idx2_vec[qfx2_idx.T]
            >>> targetdist = vt.L2_sift(qfx2_vec, qfx2_dvec).T ** 2
            >>> rawdist    = vt.L2_sqrd(qfx2_vec, qfx2_dvec).T
            >>> assert np.all(qfx2_dist * nnindexer.max_distance_sqrd == rawdist), 'inconsistant distance calculations'
            >>> assert np.allclose(targetdist, qfx2_dist), 'inconsistant distance calculations'

        Example2:
            >>> # ENABLE_DOCTEST
            >>> from ibeis.model.hots.neighbor_index import *  # NOQA
            >>> nnindexer, qreq_, ibs = test_nnindexer()
            >>> qfx2_vec = np.empty((0, 128), dtype=nnindexer.get_dtype())
            >>> K = 2
            >>> (qfx2_idx, qfx2_dist) = nnindexer.knn(qfx2_vec, K)
            >>> result = str(qfx2_idx.shape) + ' ' + str(qfx2_dist.shape)
            >>> print(result)
            (0, 2) (0, 2)

        """
        if K == 0:
            (qfx2_idx, qfx2_dist) = nnindexer.empty_neighbors(len(qfx2_vec), 0)
        if K > nnindexer.num_indexed or K == 0:
            # If we want more points than there are in the database
            # FLANN will raise an exception. This corner case
            # will hopefully only be hit if using the multi-indexer
            # so try this workaround which should seemlessly integrate
            # when the multi-indexer stacks the subindxer results.
            # There is a very strong possibility that this will cause errors
            # If this corner case is used in non-multi-indexer code
            K = nnindexer.num_indexed
            (qfx2_idx, qfx2_dist) = nnindexer.empty_neighbors(len(qfx2_vec), 0)
        elif len(qfx2_vec) == 0:
            (qfx2_idx, qfx2_dist) = nnindexer.empty_neighbors(0, K)
        else:
            try:
                # perform nearest neighbors
                (qfx2_idx, qfx2_raw_dist) = nnindexer.flann.nn_index(
                    qfx2_vec, K, checks=nnindexer.checks, cores=nnindexer.cores)
            except pyflann.FLANNException as ex:
                ut.printex(ex, 'probably misread the cached flann_fpath=%r' % (nnindexer.flann_fpath,))
                #ut.embed()
                # Uncommend and use if the flan index needs to be deleted
                #ibs = ut.search_stack_for_localvar('ibs')
                #cachedir = ibs.get_flann_cachedir()
                #flann_fpath = nnindexer.get_fpath(cachedir)
                raise
            # Ensure that distance returned are between 0 and 1
            if nnindexer.max_distance_sqrd is not None:
                qfx2_dist = np.divide(qfx2_raw_dist, nnindexer.max_distance_sqrd)
            else:
                qfx2_dist = qfx2_raw_dist
            if ut.DEBUG2:
                # Ensure distance calculations are correct
                qfx2_dvec = nnindexer.idx2_vec[qfx2_idx.T]
                targetdist = vt.L2_sift(qfx2_vec, qfx2_dvec).T ** 2
                rawdist    = vt.L2_sqrd(qfx2_vec, qfx2_dvec).T
                assert np.all(qfx2_raw_dist == rawdist), 'inconsistant distance calculations'
                assert np.allclose(targetdist, qfx2_dist), 'inconsistant distance calculations'
            #qfx2_dist = np.sqrt(qfx2_dist) / nnindexer.max_distance_sqrd
        return (qfx2_idx, qfx2_dist)

[docs]    def debug_nnindexer(nnindexer):
        """
        Makes sure the indexer has valid SIFT descriptors
        """
        # FIXME: they might not agree if data has been added / removed
        init_data, extra_data = nnindexer.flann.get_indexed_data()
        with ut.Indenter('[NNINDEX_DEBUG]'):
            print('extra_data = %r' % (extra_data,))
            print('init_data = %r' % (init_data,))
            print('nnindexer.max_distance_sqrd = %r' % (nnindexer.max_distance_sqrd,))
            data_agrees = nnindexer.idx2_vec is nnindexer.flann.get_indexed_data()[0]
            if data_agrees:
                print('indexed_data agrees')
            assert vt.check_sift_validity(init_data), 'bad SIFT properties'
            assert data_agrees, 'indexed data does not agree'

[docs]    def empty_neighbors(nnindexer, nQfx, K):
        qfx2_idx  = np.empty((0, K), dtype=np.int32)
        qfx2_dist = np.empty((0, K), dtype=np.float64)
        return (qfx2_idx, qfx2_dist)

[docs]    def num_indexed_vecs(nnindexer):
        #invalid_idxs = (nnindexer.ax2_aid[nnindexer.idx2_ax] == -1)
        return nnindexer.idx2_vec.shape[0]
        #return len(nnindexer.idx2_vec)

[docs]    def num_indexed_annots(nnindexer):
        #invalid_idxs = (nnindexer.ax2_aid[nnindexer.idx2_ax] == -1)
        return (nnindexer.ax2_aid != -1).sum()
        #nnindexer.ax2_aid.shape[0]
        #return len(nnindexer.ax2_aid)

[docs]    def get_indexed_aids(nnindexer):
        return nnindexer.ax2_aid[nnindexer.ax2_aid != -1]

[docs]    def get_indexed_vecs(nnindexer):
        valid_idxs = (nnindexer.ax2_aid[nnindexer.idx2_ax] != -1)
        valid_idx2_vec = nnindexer.idx2_vec.compress(valid_idxs, axis=0)
        return valid_idx2_vec

[docs]    def get_removed_idxs(nnindexer):
        """
        __removed_ids = nnindexer.flann._FLANN__removed_ids
        invalid_idxs = nnindexer.get_removed_idxs()
        assert len(np.intersect1d(invalid_idxs, __removed_ids)) == len(__removed_ids)
        """
        invalid_idxs = np.nonzero(nnindexer.ax2_aid[nnindexer.idx2_ax] == -1)[0]
        return invalid_idxs

[docs]    def get_nn_vecs(nnindexer, qfx2_nnidx):
        """ gets matching vectors """
        return nnindexer.idx2_vec.take(qfx2_nnidx, axis=0)

[docs]    def get_nn_axs(nnindexer, qfx2_nnidx):
        """ gets matching internal annotation indices """
        return nnindexer.idx2_ax.take(qfx2_nnidx)

    #@profile
[docs]    def get_nn_aids(nnindexer, qfx2_nnidx):
        """
        Args:
            qfx2_nnidx : (N x K) qfx2_idx[n][k] is the index of the kth
                                  approximate nearest data vector
        Returns:
            qfx2_aid : (N x K) qfx2_fx[n][k] is the annotation id index of the
                                kth approximate nearest data vector

        CommandLine:
            python -m ibeis.model.hots.neighbor_index --exec-get_nn_aids

        Example:
            >>> # ENABLE_DOCTEST
            >>> from ibeis.model.hots.neighbor_index import *  # NOQA
            >>> cfgdict = dict(fg_on=False)
            >>> ibs, qreq_ = plh.get_pipeline_testdata(defaultdb='testdb1', cfgdict=cfgdict, preload=True)
            >>> nnindexer = qreq_.indexer
            >>> qfx2_vec = qreq_.ibs.get_annot_vecs(qreq_.get_internal_qaids()[0], config2_=qreq_.get_internal_query_config2())
            >>> num_neighbors = 4
            >>> (qfx2_nnidx, qfx2_dist) = nnindexer.knn(qfx2_vec, num_neighbors)
            >>> qfx2_aid = nnindexer.get_nn_aids(qfx2_nnidx)
            >>> assert qfx2_aid.shape[1] == num_neighbors
            >>> result = ('qfx2_aid.shape = %r' % (qfx2_aid.shape,))
            >>> print(result)
            qfx2_aid.shape = (1257, 4)
        """
        #qfx2_ax = nnindexer.idx2_ax[qfx2_nnidx]
        #qfx2_aid = nnindexer.ax2_aid[qfx2_ax]
        qfx2_ax = nnindexer.idx2_ax.take(qfx2_nnidx)
        qfx2_aid = nnindexer.ax2_aid.take(qfx2_ax)
        return qfx2_aid

[docs]    def get_nn_featxs(nnindexer, qfx2_nnidx):
        """
        Args:
            qfx2_nnidx : (N x K) qfx2_idx[n][k] is the index of the kth
                                  approximate nearest data vector
        Returns:
            qfx2_fx : (N x K) qfx2_fx[n][k] is the feature index (w.r.t the
                               source annotation) of the kth approximate
                               nearest data vector
        """
        #return nnindexer.idx2_fx[qfx2_nnidx]
        qfx2_fx = nnindexer.idx2_fx.take(qfx2_nnidx)
        return qfx2_fx

[docs]    def get_nn_fgws(nnindexer, qfx2_nnidx):
        r"""
        Gets forground weights of neighbors

        CommandLine:
            python -m ibeis.model.hots.neighbor_index --exec-NeighborIndex.get_nn_fgws

        Args:
            qfx2_nnidx : (N x K) qfx2_idx[n][k] is the index of the kth
                                  approximate nearest data vector
        Returns:
            qfx2_fgw : (N x K) qfx2_fgw[n][k] is the annotation id index of the
                                kth forground weight
        Example:
            >>> # ENABLE_DOCTEST
            >>> from ibeis.model.hots.neighbor_index import *  # NOQA
            >>> nnindexer, qreq_, ibs = test_nnindexer(dbname='testdb1')
            >>> qfx2_nnidx = np.array([[0, 1, 2], [3, 4, 5]])
            >>> qfx2_fgw = nnindexer.get_nn_fgws(qfx2_nnidx)
        """
        #qfx2_ax = nnindexer.idx2_ax[qfx2_nnidx]
        #qfx2_aid = nnindexer.ax2_aid[qfx2_ax]
        if nnindexer.idx2_fgw is None:
            qfx2_fgw = np.ones(qfx2_nnidx.shape)
        else:
            qfx2_fgw = nnindexer.idx2_fgw.take(qfx2_nnidx)
        return qfx2_fgw


#@profile
[docs]def invert_index(vecs_list, ax_list, verbose=ut.NOT_QUIET):
    r"""
    Aggregates descriptors of input annotations and returns inverted information

    Args:
        vecs_list (list):
        ax_list (list):
        verbose (bool):  verbosity flag(default = True)

    Returns:
        tuple: (idx2_vec, idx2_ax, idx2_fx)

    CommandLine:
        python -m ibeis.model.hots.neighbor_index --test-invert_index

    Example:
        >>> # SLOW_DOCTEST
        >>> from ibeis.model.hots.neighbor_index import *  # NOQA
        >>> import vtool as vt
        >>> num = 100
        >>> rng = np.random.RandomState(0)
        >>> ax_list = np.arange(num)
        >>> vecs_list = [vt.tests.dummy.get_dummy_dpts(rng.randint(100)) for ax in ax_list]
        >>> verbose = True
        >>> (idx2_vec, idx2_ax, idx2_fx) = invert_index(vecs_list, ax_list, verbose)
    """
    if ut.VERYVERBOSE:
        print('[nnindex] stacking descriptors from %d annotations' % len(ax_list))
    try:
        idx2_vec, idx2_ax, idx2_fx = vt.invertible_stack(vecs_list, ax_list)
        assert idx2_vec.shape[0] == idx2_ax.shape[0]
        assert idx2_vec.shape[0] == idx2_fx.shape[0]
    except MemoryError as ex:
        ut.printex(ex, 'cannot build inverted index', '[!memerror]')
        raise
    if ut.VERYVERBOSE or verbose:
        print('[nnindex] stacked nVecs={nVecs} from nAnnots={nAnnots}'.format(
            nVecs=len(idx2_vec), nAnnots=len(ax_list)))
        print('[nnindex] idx2_vecs.dtype = {}'.format(idx2_vec.dtype))
        print('[nnindex] memory(idx2_vecs) = {}'.format(
            ut.byte_str2(idx2_vec.size * idx2_vec.dtype.itemsize)))
    return idx2_vec, idx2_ax, idx2_fx


[docs]def test_nnindexer(dbname='testdb1', with_indexer=True, use_memcache=True):
    """
    Example:
        >>> # ENABLE_DOCTEST
        >>> from ibeis.model.hots.neighbor_index import *  # NOQA
        >>> nnindexer, qreq_, ibs = test_nnindexer()
    """
    import ibeis
    daid_list = [7, 8, 9, 10, 11]
    ibs = ibeis.opendb(db=dbname)
    # use_memcache isn't use here because we aren't lazy loading the indexer
    cfgdict = dict(fg_on=False)
    qreq_ = ibs.new_query_request(daid_list, daid_list,
                                  use_memcache=use_memcache, cfgdict=cfgdict)
    if with_indexer:
        # we do an explicit creation of an indexer for these tests
        nnindexer = request_ibeis_nnindexer(qreq_, use_memcache=use_memcache)
    else:
        nnindexer = None
    return nnindexer, qreq_, ibs


# ------------
# NEW


#@profile
[docs]def check_background_process():
    """
    checks to see if the process has finished and then
    writes the uuid map to disk
    """
    global CURRENT_THREAD
    if CURRENT_THREAD is None or CURRENT_THREAD.is_alive():
        print('[FG] background thread is not ready yet')
        return False
    # Get info set in background process
    finishtup = CURRENT_THREAD.finishtup
    (uuid_map_fpath, daids_hashid, visual_uuid_list, min_reindex_thresh) = finishtup
    # Clean up background process
    CURRENT_THREAD.join()
    CURRENT_THREAD = None
    # Write data to current uuidcache
    if len(visual_uuid_list) > min_reindex_thresh:
        UUID_MAP_CACHE.write_uuid_map_dict(uuid_map_fpath, visual_uuid_list, daids_hashid)
    return True


[docs]def can_request_background_nnindexer():
    return CURRENT_THREAD is None or not CURRENT_THREAD.is_alive()


#@profile
[docs]def request_background_nnindexer(qreq_, daid_list):
    """ FIXME: Duplicate code

    Args:
        qreq_ (QueryRequest):  query request object with hyper-parameters
        daid_list (list):

    Returns:
        bool:

    CommandLine:
        python -m ibeis.model.hots.neighbor_index --test-request_background_nnindexer

    Example:
        >>> # DISABLE_DOCTEST
        >>> from ibeis.model.hots.neighbor_index import *  # NOQA
        >>> from ibeis.model.hots import neighbor_index  # NOQA
        >>> import ibeis
        >>> # build test data
        >>> ibs = ibeis.opendb('testdb1')
        >>> daid_list = ibs.get_valid_aids(species=ibeis.const.Species.ZEB_PLAIN)
        >>> qreq_ = ibs.new_query_request(daid_list, daid_list)
        >>> # execute function
        >>> neighbor_index.request_background_nnindexer(qreq_, daid_list)
        >>> # verify results
        >>> result = str(False)
        >>> print(result)
    """
    global CURRENT_THREAD
    print('Requesting background reindex')
    if not can_request_background_nnindexer():
        # Make sure this function doesn't run if it is already running
        print('REQUEST DENIED')
        return False
    print('REQUEST ACCPETED')
    daids_hashid = qreq_.ibs.get_annot_hashid_visual_uuid(daid_list)
    cfgstr = build_nnindex_cfgstr(qreq_, daid_list)
    cachedir = qreq_.ibs.get_flann_cachedir()
    # Save inverted cache uuid mappings for
    min_reindex_thresh = qreq_.qparams.min_reindex_thresh
    # Grab the keypoints names and image ids before query time?
    flann_params =  qreq_.qparams.flann_params
    # Get annot descriptors to index
    vecs_list, fgws_list = get_support_data(qreq_, daid_list)
    # Dont hash rowids when given enough info in nnindex_cfgstr
    flann_params['cores'] = 2  # Only ues a few cores in the background
    # Build/Load the flann index
    uuid_map_fpath   = get_nnindexer_uuid_map_fpath(qreq_)
    visual_uuid_list = qreq_.ibs.get_annot_visual_uuids(daid_list)

    # set temporary attribute for when the thread finishes
    finishtup = (uuid_map_fpath, daids_hashid, visual_uuid_list, min_reindex_thresh)
    CURRENT_THREAD = ut.spawn_background_process(
        background_flann_func, cachedir, daid_list, vecs_list, fgws_list,
        flann_params, cfgstr)

    CURRENT_THREAD.finishtup = finishtup


[docs]def background_flann_func(cachedir, daid_list, vecs_list, fgws_list, flann_params, cfgstr,
                          uuid_map_fpath, daids_hashid,
                          visual_uuid_list, min_reindex_thresh):
    """ FIXME: Duplicate code """
    print('[BG] Starting Background FLANN')
    # FIXME. dont use flann cache
    nnindexer = NeighborIndex(flann_params, cfgstr)
    # Initialize neighbor with unindexed data
    nnindexer.init_support(daid_list, vecs_list, fgws_list, verbose=True)
    # Load or build the indexing structure
    nnindexer.ensure_indexer(cachedir, verbose=True)
    if len(visual_uuid_list) > min_reindex_thresh:
        UUID_MAP_CACHE.write_uuid_map_dict(uuid_map_fpath, visual_uuid_list, daids_hashid)
    print('[BG] Finished Background FLANN')


if __name__ == '__main__':
    """
    CommandLine:
        python -m ibeis.model.hots.neighbor_index
        python -m ibeis.model.hots.neighbor_index --allexamples
        python -m ibeis.model.hots.neighbor_index --allexamples --noface --nosrc

        utprof.sh ibeis/model/hots/neighbor_index.py --allexamples
    """
    import multiprocessing
    multiprocessing.freeze_support()  # for win32
    ut.doctest_funcs()