Source code for ibeis.model.hots.smk.smk_repr

# -*- coding: utf-8 -*-
from __future__ import absolute_import, division, print_function
#import six
import utool as ut
#import weakref
import numpy as np
import six
from six.moves import zip, map  # NOQA
from vtool import nearest_neighbors as nntool
from ibeis.model.hots import hstypes
from ibeis.model.hots.smk import smk_scoring
from ibeis.model.hots.smk import smk_index
from collections import namedtuple
(print, print_, printDBG, rrr, profile) = ut.inject(__name__, '[smk_repr]')


DEBUG_SMK = ut.DEBUG2 or ut.get_argflag('--debug-smk')


INVERTED_INDEX_INJECT_KEY = ('InvertedIndex', __name__)


@six.add_metaclass(ut.ReloadingMetaclass)
[docs]class InvertedIndex(object): r""" Stores inverted index state information (mapping from words to database aids and fxs_list) Attributes: idx2_dvec (ndarray[S x DIM]): stacked index -> descriptor vector (currently sift) idx2_daid (ndarray[S x 1]): stacked index -> annot id idx2_dfx (ndarray[S x 1]): stacked index -> feature index (wrt daid) idx2_fweight (ndarray[S x 1]): stacked index -> feature weight idx2_wxs (list): stacked index -> word indexes (jagged) words (ndarray[C x DIM]): visual word centroids wordflann (FLANN): FLANN search structure wx2_idxs (dict of lists of ndarrays): word index -> stacked indexes wx2_fxs (dict of lists of ndarrays): word index -> aggregate feature indexes wx2_aids (dict of ndarrays[N_c x 1]): word index -> aggregate aids wx2_drvecs (dict of ndarrays[N_c x DIM]): word index -> residual vectors wx2_dflags (dict of ndarrays[N_c x 1]): word index -> residual flags wx2_idf (dict of ndarrays[N_c x 1]): word index -> idf (wx normalizer) wx2_maws (dict of ndarrays[N_c x 1]): word index -> multi-assign weights daids (ndarray): indexed annotation ids daid2_sccw (dict of floats): daid -> sccw (daid self-consistency weight) daid2_label (dict of tuples): daid -> label (name, view) """ def __init__(invindex, words, wordflann, idx2_vec, idx2_aid, idx2_fx, daids, daid2_label): invindex.words = words invindex.wordflann = wordflann invindex.idx2_dvec = idx2_vec invindex.idx2_daid = idx2_aid invindex.idx2_dfx = idx2_fx invindex.daids = daids invindex.daid2_label = daid2_label invindex.wx2_idxs = None invindex.wx2_aids = None invindex.wx2_fxs = None invindex.wx2_maws = None invindex.wx2_drvecs = None invindex.wx2_dflags = None invindex.wx2_idf = None invindex.daid2_sccw = None invindex.idx2_fweight = None invindex.idx2_wxs = None # stacked index -> word indexes # Inject debug function from ibeis.model.hots.smk import smk_debug ut.make_class_method_decorator(INVERTED_INDEX_INJECT_KEY)(smk_debug.invindex_dbgstr) ut.inject_instance(invindex, classkey=INVERTED_INDEX_INJECT_KEY)
@ut.make_class_method_decorator(INVERTED_INDEX_INJECT_KEY)
[docs]def report_memory(obj, objname='obj'): """ obj = invindex objname = 'invindex' """ print('Object Memory Usage for %s' % objname) maxlen = max(map(len, six.iterkeys(obj.__dict__))) for key, val in six.iteritems(obj.__dict__): fmtstr = 'memusage({0}.{1}){2} = ' lbl = fmtstr.format(objname, key, ' ' * (maxlen - len(key))) sizestr = ut.get_object_size_str(val, lbl=lbl, unit='MB') print(sizestr)
report_memsize = ut.make_class_method_decorator(INVERTED_INDEX_INJECT_KEY)(ut.report_memsize) QueryIndex = namedtuple( 'QueryIndex', ( 'wx2_qrvecs', 'wx2_qflags', 'wx2_maws', 'wx2_qaids', 'wx2_qfxs', 'query_sccw', ))
[docs]class LazyGetter(object): """ DEPRICATE """ def __init__(self, getter_func): self.getter_func = getter_func def __getitem__(self, index): return self.getter_func(index) def __call__(self, index): return self.getter_func(index)
[docs]class DataFrameProxy(object): """ DEPRICATE pandas is actually really slow. This class emulates it so I don't have to change my function calls, but without all the slowness. """ def __init__(annots_df, ibs): annots_df.ibs = ibs def __getitem__(annots_df, key): if key == 'kpts': return LazyGetter(annots_df.ibs.get_annot_kpts) elif key == 'vecs': return LazyGetter(annots_df.ibs.get_annot_vecs) elif key == 'labels': return LazyGetter(annots_df.ibs.get_annot_class_labels)
@profile
[docs]def make_annot_df(ibs): """ Creates a pandas like DataFrame interface to an IBEISController DEPRICATE Args: ibs (): Returns: annots_df Example: >>> from ibeis.model.hots.smk.smk_repr import * # NOQA >>> from ibeis.model.hots.smk import smk_debug >>> ibs = smk_debug.testdata_ibeis() >>> annots_df = make_annot_df(ibs) >>> print(ut.hashstr(repr(annots_df.values))) j12n+x93m4c!4un3 #>>> from ibeis.model.hots.smk import smk_debug #>>> smk_debug.rrr() #>>> smk_debug.check_dtype(annots_df) Auto: from ibeis.model.hots.smk import smk_repr import utool as ut argdoc = ut.make_default_docstr(smk_repr.make_annot_df) print(argdoc) """ annots_df = DataFrameProxy(ibs) return annots_df
@profile
[docs]def new_qindex(annots_df, qaid, invindex, qparams): r""" Gets query read for computations Args: annots_df (DataFrameProxy): pandas-like data interface qaid (int): query annotation id invindex (InvertedIndex): inverted index object qparams (QueryParams): query parameters object Returns: qindex: named tuple containing query information CommandLine: python -m ibeis.model.hots.smk.smk_repr --test-new_qindex Example: >>> # DISABLE_DOCTEST >>> from ibeis.model.hots.smk.smk_repr import * # NOQA >>> from ibeis.model.hots.smk import smk_debug >>> ibs, annots_df, qaid, invindex, qparams = smk_debug.testdata_query_repr(db='PZ_Mothers', nWords=128000) >>> qindex = new_qindex(annots_df, qaid, invindex, qparams) >>> assert smk_debug.check_wx2_rvecs(qindex.wx2_qrvecs), 'has nan' >>> smk_debug.invindex_dbgstr(invindex) Ignore:: idx2_vec = qfx2_vec idx2_aid = qfx2_aid idx2_fx = qfx2_qfx wx2_idxs = _wx2_qfxs wx2_maws = _wx2_maws from ibeis.model.hots.smk import smk_repr import utool as ut ut.rrrr() print(ut.make_default_docstr(smk_repr.new_qindex)) """ # TODO: Precompute and lookup residuals and assignments if not ut.QUIET: print('[smk_repr] Query Repr qaid=%r' % (qaid,)) # nAssign = qparams.nAssign massign_alpha = qparams.massign_alpha massign_sigma = qparams.massign_sigma massign_equal_weights = qparams.massign_equal_weights # aggregate = qparams.aggregate smk_alpha = qparams.smk_alpha smk_thresh = qparams.smk_thresh # wx2_idf = invindex.wx2_idf words = invindex.words wordflann = invindex.wordflann #qfx2_vec = annots_df['vecs'][qaid] # TODO: remove all mention of annot_df and ensure that qparams is passed corectly to config2_ qfx2_vec = annots_df.ibs.get_annot_vecs(qaid, config2_=qparams) #------------------- # Assign query to (multiple) words #------------------- _wx2_qfxs, _wx2_maws, qfx2_wxs = smk_index.assign_to_words_( wordflann, words, qfx2_vec, nAssign, massign_alpha, massign_sigma, massign_equal_weights) # Hack to make implementing asmk easier, very redundant qfx2_aid = np.array([qaid] * len(qfx2_wxs), dtype=hstypes.INTEGER_TYPE) qfx2_qfx = np.arange(len(qfx2_vec)) #------------------- # Compute query residuals #------------------- wx2_qrvecs, wx2_qaids, wx2_qfxs, wx2_maws, wx2_qflags = smk_index.compute_residuals_( words, _wx2_qfxs, _wx2_maws, qfx2_vec, qfx2_aid, qfx2_qfx, aggregate) # each value in wx2_ dicts is a list with len equal to the number of rvecs if ut.VERBOSE: print('[smk_repr] Query SCCW smk_alpha=%r, smk_thresh=%r' % (smk_alpha, smk_thresh)) #------------------- # Compute query sccw #------------------- wx_sublist = np.array(wx2_qrvecs.keys(), dtype=hstypes.INDEX_TYPE) idf_list = [wx2_idf[wx] for wx in wx_sublist] rvecs_list = [wx2_qrvecs[wx] for wx in wx_sublist] maws_list = [wx2_maws[wx] for wx in wx_sublist] flags_list = [wx2_qflags[wx] for wx in wx_sublist] query_sccw = smk_scoring.sccw_summation(rvecs_list, flags_list, idf_list, maws_list, smk_alpha, smk_thresh) try: assert query_sccw > 0, 'query_sccw=%r is not positive!' % (query_sccw,) except Exception as ex: ut.printex(ex) raise #------------------- # Build query representationm class/tuple #------------------- if DEBUG_SMK: from ibeis.model.hots.smk import smk_debug qfx2_vec = annots_df['vecs'][qaid] assert smk_debug.check_wx2_rvecs2( invindex, wx2_qrvecs, wx2_qfxs, qfx2_vec), 'bad qindex' qindex = QueryIndex(wx2_qrvecs, wx2_qflags, wx2_maws, wx2_qaids, wx2_qfxs, query_sccw) return qindex #@profile
[docs]def index_data_annots(annots_df, daids, words, qparams, with_internals=True, memtrack=None, delete_rawvecs=False): """ Builds the initial inverted index from a dataframe, daids, and words. Optionally builds the internals of the inverted structure Args: annots_df (): daids (): words (): qparams (): with_internals (): memtrack (): memory debugging object Returns: invindex Example: >>> from ibeis.model.hots.smk.smk_repr import * # NOQA >>> from ibeis.model.hots.smk import smk_debug >>> ibs, annots_df, daids, qaids, qreq_, words = smk_debug.testdata_words() >>> qparams = qreq_.qparams >>> with_internals = False >>> invindex = index_data_annots(annots_df, daids, words, qparams, with_internals) Ignore: #>>> print(ut.hashstr(repr(list(invindex.__dict__.values())))) #v8+i5i8+55j0swio Auto: from ibeis.model.hots.smk import smk_repr import utool as ut ut.rrrr() print(ut.make_default_docstr(smk_repr.index_data_annots)) """ if not ut.QUIET: print('[smk_repr] index_data_annots') flann_params = {} # Compute fast lookup index for the words wordflann = nntool.flann_cache(words, flann_params=flann_params, appname='smk') _vecs_list = annots_df['vecs'][daids] _label_list = annots_df['labels'][daids] idx2_dvec, idx2_daid, idx2_dfx = nntool.invertible_stack(_vecs_list, daids) # TODO: # Need to individually cache residual vectors. # rvecs_list = annots_df['rvecs'][daids] # # Residual vectors depend on # * nearest word (word assignment) # * original vectors # * multiassignment daid2_label = dict(zip(daids, _label_list)) invindex = InvertedIndex(words, wordflann, idx2_dvec, idx2_daid, idx2_dfx, daids, daid2_label) # Decrement reference count so memory can be cleared in the next function del words, idx2_dvec, idx2_daid, idx2_dfx, daids, daid2_label del _vecs_list, _label_list if with_internals: compute_data_internals_(invindex, qparams, memtrack=memtrack, delete_rawvecs=delete_rawvecs) # 99% return invindex
@profile
[docs]def compute_data_internals_(invindex, qparams, memtrack=None, delete_rawvecs=True): """ Builds each of the inverted index internals. invindex (InvertedIndex): object for fast vocab lookup qparams (QueryParams): hyper-parameters memtrack (None): delete_rawvecs (bool): Returns: None Example: >>> from ibeis.model.hots.smk.smk_repr import * # NOQA >>> from ibeis.model.hots.smk import smk_debug >>> ibs, annots_df, daids, qaids, invindex, qreq_ = smk_debug.testdata_raw_internals0() >>> compute_data_internals_(invindex, qreq_.qparams) Ignore: idx2_vec = idx2_dvec wx2_maws = _wx2_maws # NOQA """ # Get information #if memtrack is None: # memtrack = ut.MemoryTracker('[DATA INTERNALS ENTRY]') #memtrack.report('[DATA INTERNALS1]') # aggregate = qparams.aggregate smk_alpha = qparams.smk_alpha smk_thresh = qparams.smk_thresh # massign_alpha = qparams.massign_alpha massign_sigma = qparams.massign_sigma massign_equal_weights = qparams.massign_equal_weights # vocab_weighting = qparams.vocab_weighting # nAssign = 1 # single assignment for database side idx2_vec = invindex.idx2_dvec idx2_dfx = invindex.idx2_dfx idx2_daid = invindex.idx2_daid daids = invindex.daids wordflann = invindex.wordflann words = invindex.words daid2_label = invindex.daid2_label wx_series = np.arange(len(words)) #memtrack.track_obj(idx2_vec, 'idx2_vec') if not ut.QUIET: print('[smk_repr] compute_data_internals_') if ut.VERBOSE: print('[smk_repr] * len(daids) = %r' % (len(daids),)) print('[smk_repr] * len(words) = %r' % (len(words),)) print('[smk_repr] * len(idx2_vec) = %r' % (len(idx2_vec),)) print('[smk_repr] * aggregate = %r' % (aggregate,)) print('[smk_repr] * smk_alpha = %r' % (smk_alpha,)) print('[smk_repr] * smk_thresh = %r' % (smk_thresh,)) # Try to use the cache #cfgstr = ut.hashstr_arr(words, 'words') + qparams.feat_cfgstr #cachekw = dict( #cfgstr=cfgstr, #appname='smk_test' #) #invindex_cache = ut.Cacher('inverted_index', **cachekw) #try: # raise IOError('cache is off') # #cachetup = invindex_cache.load() # #(idx2_wxs, wx2_idxs, wx2_idf, wx2_drvecs, wx2_aids, wx2_fxs, wx2_maws, daid2_sccw) = cachetup # invindex.idx2_dvec = None #except IOError as ex: # Database word assignments (perform single assignment on database side) wx2_idxs, _wx2_maws, idx2_wxs = smk_index.assign_to_words_( wordflann, words, idx2_vec, nAssign, massign_alpha, massign_sigma, massign_equal_weights) if ut.DEBUG2: assert len(idx2_wxs) == len(idx2_vec) assert len(wx2_idxs.keys()) == len(_wx2_maws.keys()) assert len(wx2_idxs.keys()) <= len(words) try: assert len(wx2_idxs.keys()) == len(words) except AssertionError as ex: ut.printex(ex, iswarning=True) # Database word inverse-document-frequency (idf weights) wx2_idf = smk_index.compute_word_idf_( wx_series, wx2_idxs, idx2_daid, daids, daid2_label, vocab_weighting, verbose=True) if ut.DEBUG2: assert len(wx2_idf) == len(wx2_idf.keys()) # Compute (normalized) residual vectors and inverse mappings wx2_drvecs, wx2_aids, wx2_fxs, wx2_dmaws, wx2_dflags = smk_index.compute_residuals_( words, wx2_idxs, _wx2_maws, idx2_vec, idx2_daid, idx2_dfx, aggregate, verbose=True) if not ut.QUIET: print('[smk_repr] unloading idx2_vec') if delete_rawvecs: # Try to save some memory del _wx2_maws invindex.idx2_dvec = None del idx2_vec # Compute annotation normalization factor daid2_sccw = smk_index.compute_data_sccw_( idx2_daid, wx2_drvecs, wx2_dflags, wx2_aids, wx2_idf, wx2_dmaws, smk_alpha, smk_thresh, verbose=True) # Cache save #cachetup = (idx2_wxs, wx2_idxs, wx2_idf, wx2_drvecs, wx2_aids, wx2_fxs, wx2_dmaws, daid2_sccw) #invindex_cache.save(cachetup) # Store information invindex.idx2_wxs = idx2_wxs # stacked index -> word indexes (might not be needed) invindex.wx2_idxs = wx2_idxs invindex.wx2_idf = wx2_idf invindex.wx2_drvecs = wx2_drvecs invindex.wx2_dflags = wx2_dflags # flag nan rvecs invindex.wx2_aids = wx2_aids # needed for asmk invindex.wx2_fxs = wx2_fxs # needed for asmk invindex.wx2_dmaws = wx2_dmaws # needed for awx2_mawssmk invindex.daid2_sccw = daid2_sccw #memtrack.report('[DATA INTERNALS3]') if ut.DEBUG2: from ibeis.model.hots.smk import smk_debug smk_debug.check_invindex_wx2(invindex)
if __name__ == '__main__': """ CommandLine: python -m ibeis.model.hots.smk.smk_repr python -m ibeis.model.hots.smk.smk_repr --allexamples python -m ibeis.model.hots.smk.smk_repr --allexamples --noface --nosrc """ import multiprocessing multiprocessing.freeze_support() # for win32 import utool as ut # NOQA ut.doctest_funcs()