Source code for ibeis.model.hots.smk.smk_debug

# -*- coding: utf-8 -*-
"""
TODO:
    Define easy to use classes functions for the following concepts

    Vocabulary / Dictionary / Codebook -
        centroids that partition descriptor space.  Many methods can be used to
        define a vocabulary.  The simplest technique is K-Means clustering.  Other
        learning algorithms can be implemented.

    Vocabulary Quantizer -
        Quantizes / Codes a raw descriptor by assigning it to one or more visual words with assignment weights.
        Can be implemented using simple approximate nearest neighbor to centroids or via tree partitioning or
        some other method.

    Inverted Index -
        Uses the vocabulary to index quantized descriptors.
        Needs add / remove methods that add and remove images as sets of descriptors.
        Needs to update the vocabulary and recompute any internal image representations.
        Needs to encode individual images or subimages.

    Vocabulary Matching -
        Uses the inverted index to match individual or aggregated features between query and database images


"""
from __future__ import absolute_import, division, print_function
import utool as ut
import numpy as np
import six
import ibeis
from ibeis.model.hots import hstypes
from ibeis.model.hots.smk import smk_index
from ibeis.model.hots.smk import smk_repr
from ibeis.model.hots.smk import smk_match
from ibeis.model.hots.smk import smk_scoring
from ibeis.model.hots import query_request
(print, print_, printDBG, rrr, profile) = ut.inject(__name__, '[smk_debug]')


# <TESTDATA>


[docs]def testdata_ibeis(**kwargs): """ DEPRICATE Step 1 builds ibs for testing Example: >>> from ibeis.model.hots.smk.smk_debug import * # NOQA >>> kwargs = {} """ print(' === Test Data IBEIS ===') print('kwargs = ' + ut.dict_str(kwargs)) print('[smk_debug] testdata_ibeis') db = kwargs.get('db', ut.get_argval('--db', str, 'PZ_MTEST')) #with ut.Indenter('ENSURE'): if db == 'PZ_MTEST': ibeis.ensure_pz_mtest() ibs = ibeis.opendb(db=db) ibs._default_config() aggregate = kwargs.get('aggregate', ut.get_argflag(('--agg', '--aggregate'))) nWords = kwargs.get( 'nWords', ut.get_argval(('--nWords', '--nCentroids'), int, default=8E3)) nAssign = kwargs.get( 'nAssign', ut.get_argval(('--nAssign', '--K'), int, default=10)) # Configs ibs.cfg.query_cfg.pipeline_root = 'smk' ibs.cfg.query_cfg.smk_cfg.aggregate = aggregate ibs.cfg.query_cfg.smk_cfg.smk_alpha = 3 ibs.cfg.query_cfg.smk_cfg.smk_thresh = 0 ibs.cfg.query_cfg.smk_cfg.vocabtrain_cfg.nWords = nWords ibs.cfg.query_cfg.smk_cfg.vocabassign_cfg.nAssign = nAssign if ut.VERYVERBOSE: ibs.cfg.query_cfg.smk_cfg.printme3() return ibs
[docs]def testdata_ibeis2(cfgdict=None, **kwargs): """ Step 2 selects training and test set Example: >>> from ibeis.model.hots.smk.smk_debug import * # NOQA >>> kwargs = {} """ from ibeis.model.hots.smk import smk_debug print('[smk_debug] testdata_ibeis2') ibs = smk_debug.testdata_ibeis(**kwargs) valid_aids = ibs.get_valid_aids() # Training/Database/Search set taids = valid_aids[:] daids = valid_aids #daids = valid_aids[1:10] #daids = valid_aids[0:3] #qaids = valid_aids[0::2] #qaids = valid_aids[0:2] #qaids = [37] # NOQA new test case for PZ_MTEST #qaids = [valid_aids[0], valid_aids[4]] qaids = [valid_aids[0]] # FIXME: can't set to a list right now cfgdict = { 'vocab_taids': 'all', } if cfgdict is not None: cfgdict.update(cfgdict) qreq_ = query_request.new_ibeis_query_request(ibs, qaids, daids, cfgdict) qreq_.ibs = ibs # Hack return ibs, taids, daids, qaids, qreq_
[docs]def testdata_dataframe(cfgdict=None, **kwargs): from ibeis.model.hots.smk import smk_debug ibs, taids, daids, qaids, qreq_ = smk_debug.testdata_ibeis2(cfgdict=cfgdict, **kwargs) print('[smk_debug] testdata_dataframe') # Pandas Annotation Dataframe annots_df = smk_repr.make_annot_df(ibs) nWords = qreq_.qparams.nWords return ibs, annots_df, taids, daids, qaids, qreq_, nWords
[docs]def testdata_words(**kwargs): from ibeis.model.hots.smk import smk_debug ibs, annots_df, taids, daids, qaids, qreq_, nWords = smk_debug.testdata_dataframe(**kwargs) print('[smk_debug] testdata_words') words = smk_index.learn_visual_words(annots_df, qreq_) return ibs, annots_df, daids, qaids, qreq_, words
[docs]def testdata_raw_internals0(**kwargs): from ibeis.model.hots.smk import smk_debug ibs, annots_df, daids, qaids, qreq_, words = smk_debug.testdata_words(**kwargs) qparams = qreq_.qparams print('[smk_debug] testdata_raw_internals0') with_internals = False invindex = smk_repr.index_data_annots(annots_df, daids, words, qparams, with_internals) return ibs, annots_df, daids, qaids, invindex, qreq_
[docs]def testdata_raw_internals1(**kwargs): from ibeis.model.hots.smk import smk_debug ibs, annots_df, daids, qaids, invindex, qreq_ = smk_debug.testdata_raw_internals0(**kwargs) qparams = qreq_.qparams print('[smk_debug] testdata_raw_internals1') words = invindex.words wordflann = invindex.wordflann idx2_vec = invindex.idx2_dvec nAssign = 1 # 1 for database massign_sigma = qparams.massign_sigma massign_alpha = qparams.massign_alpha massign_equal_weights = qparams.massign_equal_weights # TODO: Extract args from function via inspect _dbargs = (wordflann, words, idx2_vec, nAssign, massign_alpha, massign_sigma, massign_equal_weights) (wx2_idxs, wx2_dmaws, idx2_wxs) = smk_index.assign_to_words_(*_dbargs) invindex.wx2_idxs = wx2_idxs invindex.wx2_dmaws = wx2_dmaws invindex.idx2_wxs = idx2_wxs return ibs, annots_df, daids, qaids, invindex, wx2_idxs, qparams
[docs]def testdata_raw_internals1_5(**kwargs): """ contains internal data up to idf weights Example: >>> from ibeis.model.hots.smk.smk_debug import * # NOQA """ from ibeis.model.hots.smk import smk_debug ibs, annots_df, daids, qaids, invindex, wx2_idxs, qparams = smk_debug.testdata_raw_internals1(**kwargs) print('[smk_debug] testdata_raw_internals1_5') words = invindex.words wx_series = np.arange(len(words)) idx2_aid = invindex.idx2_daid wx2_idf = smk_index.compute_word_idf_(wx_series, wx2_idxs, idx2_aid, daids) invindex.wx2_idf = wx2_idf return ibs, annots_df, daids, qaids, invindex, wx2_idxs, qparams
[docs]def testdata_compute_data_sccw(**kwargs): """ Example: >>> from ibeis.model.hots.smk.smk_debug import * # NOQA """ from ibeis.model.hots.smk import smk_debug ibs, annots_df, daids, qaids, invindex, wx2_idxs, qparams = smk_debug.testdata_raw_internals1_5(**kwargs) print('[smk_debug] testdata_compute_data_sccw') aggregate = ibs.cfg.query_cfg.smk_cfg.aggregate idx2_vec = invindex.idx2_dvec idx2_fx = invindex.idx2_dfx wx2_dmaws = invindex.wx2_dmaws idx2_aid = invindex.idx2_daid words = invindex.words wx2_idf = invindex.wx2_idf wx2_drvecs, wx2_aids, wx2_fxs, wx2_dmaws, wx2_dflags = smk_index.compute_residuals_( words, wx2_idxs, wx2_dmaws, idx2_vec, idx2_aid, idx2_fx, aggregate) invindex.wx2_dmaws = wx2_dmaws invindex.wx2_drvecs = wx2_drvecs invindex.wx2_dflags = wx2_dflags return ibs, annots_df, invindex, wx2_idxs, wx2_idf, wx2_drvecs, wx2_aids, qparams
[docs]def testdata_query_repr(**kwargs): """ Example: >>> from ibeis.model.hots.smk.smk_debug import * # NOQA """ from ibeis.model.hots.smk import smk_debug ibs, annots_df, daids, qaids, invindex, wx2_idxs, qparams = smk_debug.testdata_raw_internals1_5(**kwargs) print('[smk_debug] testdata_query_repr') qaid = qaids[0] #qreq_ = query_request.new_ibeis_query_request(ibs, qaids, daids) return ibs, annots_df, qaid, invindex, qparams
[docs]def testdata_sccw_sum(**kwargs): from ibeis.model.hots.smk import smk_debug from ibeis.model.hots.smk import smk_index ibs, annots_df, qaid, invindex, qparams = smk_debug.testdata_query_repr(**kwargs) aggregate = qparams.aggregate smk_alpha = qparams.smk_alpha smk_thresh = qparams.smk_thresh nAssign = qparams.nAssign massign_sigma = qparams.massign_sigma massign_alpha = qparams.massign_alpha massign_equal_weights = qparams.massign_equal_weights nAssign = qparams.nAssign wx2_idf = invindex.wx2_idf words = invindex.words wordflann = invindex.wordflann #qfx2_vec = annots_df['vecs'][qaid].values qfx2_vec = annots_df['vecs'][qaid] # Assign query to (multiple) words _wx2_qfxs, wx2_qmaws, qfx2_wxs = smk_index.assign_to_words_( wordflann, words, qfx2_vec, nAssign, massign_alpha, massign_sigma, massign_equal_weights) # Hack to make implementing asmk easier, very redundant qfx2_aid = np.array([qaid] * len(qfx2_wxs), dtype=hstypes.INTEGER_TYPE) qfx2_qfx = np.arange(len(qfx2_vec)) # Compute query residuals wx2_qrvecs, wx2_qaids, wx2_qfxs, wx2_qmaws, wx2_flags = smk_index.compute_residuals_( words, _wx2_qfxs, wx2_qmaws, qfx2_vec, qfx2_aid, qfx2_qfx, aggregate) # Compute query sccw if ut.VERBOSE: print('[smk_index] Query TF smk_alpha=%r, smk_thresh=%r' % (smk_alpha, smk_thresh)) wx_sublist = np.array(wx2_qrvecs.keys(), dtype=hstypes.INDEX_TYPE) idf_list = [wx2_idf[wx] for wx in wx_sublist] rvecs_list = [wx2_qrvecs[wx] for wx in wx_sublist] qmaws_list = [wx2_qmaws[wx] for wx in wx_sublist] qflags_list = [wx2_flags[wx] for wx in wx_sublist] return idf_list, rvecs_list, qflags_list, qmaws_list, smk_alpha, smk_thresh
[docs]def testdata_internals_full(delete_rawvecs=True, **kwargs): """ Example: >>> from ibeis.model.hots.smk.smk_debug import * # NOQA >>> kwargs = {} """ from ibeis.model.hots.smk import smk_debug ibs, annots_df, daids, qaids, qreq_, words = smk_debug.testdata_words(**kwargs) print('[smk_debug] testdata_internals_full') with_internals = True qparams = qreq_.qparams invindex = smk_repr.index_data_annots( annots_df, daids, words, qparams, with_internals, delete_rawvecs=delete_rawvecs) return ibs, annots_df, daids, qaids, invindex, qreq_
[docs]def testdata_match_kernel_L2(**kwargs): """ Example: >>> from ibeis.model.hots.smk.smk_debug import * # NOQA """ from ibeis.model.hots.smk import smk_debug ibs, annots_df, daids, qaids, invindex, qreq_ = smk_debug.testdata_internals_full(**kwargs) print('[smk_debug] testdata_match_kernel_L2') qparams = qreq_.qparams qaid = qaids[0] qindex = smk_repr.new_qindex(annots_df, qaid, invindex, qparams) return ibs, invindex, qindex, qparams
[docs]def testdata_nonagg_rvec(): from ibeis.model.hots.smk import smk_debug ibs, annots_df, daids, qaids, invindex, wx2_idxs, qparams = smk_debug.testdata_raw_internals1() words = invindex.words idx2_vec = invindex.idx2_dvec wx2_dmaws = invindex.wx2_dmaws idx2_daid = invindex.idx2_daid # wx_sublist denotes which words are used wx_sublist = np.array(list(wx2_idxs.keys())) idxs_list = [wx2_idxs[wx].astype(np.int32) for wx in wx_sublist] maws_list = [wx2_dmaws[wx] for wx in wx_sublist] aids_list = [idx2_daid.take(idxs) for idxs in idxs_list] return words, wx_sublist, aids_list, idxs_list, idx2_vec, maws_list
[docs]def get_test_float_norm_rvecs(num=1000, dim=None, rng=np.random): import numpy.linalg as npl from ibeis.model.hots import hstypes if dim is None: dim = hstypes.VEC_DIM rvecs_float = rng.normal(size=(num, dim)) rvecs_norm_float = rvecs_float / npl.norm(rvecs_float, axis=1)[:, None] return rvecs_norm_float
[docs]def get_test_rvecs(num=1000, dim=None, nanrows=None, rng=np.random): from ibeis.model.hots import hstypes max_ = hstypes.RVEC_MAX min_ = hstypes.RVEC_MIN dtype = hstypes.RVEC_TYPE if dim is None: dim = hstypes.VEC_DIM dtype_range = max_ - min_ rvecs_float = rng.normal(size=(num, dim)) rvecs = ((dtype_range * rvecs_float) - hstypes.RVEC_MIN).astype(dtype) if nanrows is not None: rvecs[nanrows] = np.nan """ dtype = np.int8 max_ = 128 min_ = -128 nanrows = 1 import numpy.ma as ma if dtype not in [np.float16, np.float32, np.float64]: rvecs.view(ma.MaskedArray) np.ma.array([1,2,3,4,5], dtype=int) """ return rvecs
[docs]def get_test_maws(rvecs, rng=np.random): from ibeis.model.hots import hstypes return (rng.rand(rvecs.shape[0])).astype(hstypes.FLOAT_TYPE)
[docs]def testdata_match_kernel_L0(): from ibeis.model.hots.smk import smk_debug from ibeis.model.hots import hstypes np.random.seed(0) smk_alpha = 3.0 smk_thresh = 0.0 num_qrvecs_per_word = [0, 1, 3, 4, 5] num_drvecs_per_word = [0, 1, 2, 4, 6] qrvecs_list = [smk_debug.get_test_rvecs(n, dim=2) for n in num_qrvecs_per_word] drvecs_list = [smk_debug.get_test_rvecs(n, dim=2) for n in num_drvecs_per_word] daids_list = [list(range(len(rvecs))) for rvecs in drvecs_list] qaids_list = [[42] * len(rvecs) for rvecs in qrvecs_list] qmaws_list = [smk_debug.get_test_maws(rvecs) for rvecs in qrvecs_list] dmaws_list = [np.ones(rvecs.shape[0], dtype=hstypes.FLOAT_TYPE) for rvecs in drvecs_list] idf_list = [1.0 for _ in qrvecs_list] daid2_sccw = {daid: 1.0 for daid in range(10)} query_sccw = smk_scoring.sccw_summation(qrvecs_list, idf_list, qmaws_list, smk_alpha, smk_thresh) qaid2_sccw = {42: query_sccw} core1 = smk_alpha, smk_thresh, query_sccw, daids_list, daid2_sccw core2 = qrvecs_list, drvecs_list, qmaws_list, dmaws_list, idf_list extra = qaid2_sccw, qaids_list return core1, core2, extra
[docs]def testdata_similarity_function(): from ibeis.model.hots.smk import smk_debug qrvecs_list = [smk_debug.get_test_rvecs(_) for _ in range(10)] drvecs_list = [smk_debug.get_test_rvecs(_) for _ in range(10)] return qrvecs_list, drvecs_list
[docs]def testdata_apply_weights(): from ibeis.model.hots.smk import smk_debug from ibeis.model.hots import hstypes qrvecs_list, drvecs_list = smk_debug.testdata_similarity_function() simmat_list = smk_scoring.similarity_function(qrvecs_list, drvecs_list) qmaws_list = [smk_debug.get_test_maws(rvecs) for rvecs in qrvecs_list] dmaws_list = [np.ones(rvecs.shape[0], dtype=hstypes.FLOAT_TYPE) for rvecs in qrvecs_list] idf_list = [1 for _ in qrvecs_list] return simmat_list, qmaws_list, dmaws_list, idf_list
[docs]def testdata_selectivity_function(): from ibeis.model.hots.smk import smk_debug smk_alpha = 3 smk_thresh = 0 simmat_list, qmaws_list, dmaws_list, idf_list = smk_debug.testdata_apply_weights() wsim_list = smk_scoring.apply_weights(simmat_list, qmaws_list, dmaws_list, idf_list) return wsim_list, smk_alpha, smk_thresh # </TESTDATA> #L-------- #+-------- # <ASSERTS>
[docs]def test_sccw_cache(): ibs, annots_df, taids, daids, qaids, qreq_, nWords = testdata_dataframe() smk_alpha = ibs.cfg.query_cfg.smk_cfg.smk_alpha smk_thresh = ibs.cfg.query_cfg.smk_cfg.smk_thresh qparams = qreq_.qparams words = smk_index.learn_visual_words(annots_df, taids, nWords) with_internals = True invindex = smk_repr.index_data_annots(annots_df, daids, words, qparams, with_internals) idx2_daid = invindex.idx2_daid wx2_drvecs = invindex.wx2_drvecs wx2_idf = invindex.wx2_idf wx2_aids = invindex.wx2_aids wx2_dmaws = invindex.wx2_dmaws daids = invindex.daids daid2_sccw1 = smk_index.compute_data_sccw_(idx2_daid, wx2_drvecs, wx2_aids, wx2_idf, wx2_dmaws, smk_alpha, smk_thresh, use_cache=True) daid2_sccw2 = smk_index.compute_data_sccw_(idx2_daid, wx2_drvecs, wx2_aids, wx2_idf, wx2_dmaws, smk_alpha, smk_thresh, use_cache=False) daid2_sccw3 = smk_index.compute_data_sccw_(idx2_daid, wx2_drvecs, wx2_aids, wx2_idf, wx2_dmaws, smk_alpha, smk_thresh, use_cache=True) check_daid2_sccw(daid2_sccw1) check_daid2_sccw(daid2_sccw2) check_daid2_sccw(daid2_sccw3) if not np.all(daid2_sccw2 == daid2_sccw3): raise AssertionError('caching error in sccw') if not np.all(daid2_sccw1 == daid2_sccw2): raise AssertionError('cache outdated in sccw')
[docs]def check_invindex_wx2(invindex): words = invindex.words #wx2_idf = invindex.wx2_idf wx2_rvecs = invindex.wx2_drvecs #wx2_idxs = invindex.wx2_idxs wx2_aids = invindex.wx2_aids # needed for asmk wx2_fxs = invindex.wx2_fxs # needed for asmk check_wx2(words, wx2_rvecs, wx2_aids, wx2_fxs)
[docs]def check_wx2(words=None, wx2_rvecs=None, wx2_aids=None, wx2_fxs=None): """ provides debug info for mappings from word indexes to values """ if words is None: nWords = max(wx2_rvecs.keys()) + 1 else: nWords = len(words) print('[smk_debug] checking wx2 for %d words' % (nWords)) def missing_word(wx2_xxx, wx=None): return (wx2_xxx is not None) and (wx not in wx2_xxx) def missing_word_or_None(wx2_xxx, wx=None): return (wx2_xxx is None) or (wx not in wx2_xxx) def same_size_or_None(wx2_xxx1, wx2_xxx2, wx=None): if (wx2_xxx1 is None or wx2_xxx2 is None): return True if missing_word(wx2_xxx1, wx) and missing_word(wx2_xxx2, wx): return True return len(wx2_xxx1[wx]) == len(wx2_xxx2[wx]) nMissing = 0 for wx in range(nWords): if (missing_word(wx2_fxs, wx) or missing_word(wx2_aids, wx) or missing_word(wx2_rvecs, wx)): assert missing_word_or_None(wx2_aids, wx), 'in one but not others' assert missing_word_or_None(wx2_rvecs, wx), 'in one but not others' assert missing_word_or_None(wx2_fxs, wx), 'in one but not others' nMissing += 1 assert same_size_or_None(wx2_aids, wx2_rvecs, wx=None) assert same_size_or_None(wx2_aids, wx2_fxs, wx=None) assert same_size_or_None(wx2_rvecs, wx2_fxs, wx=None) print('[smk_debug] %d words had 0 members' % nMissing) print(' lenstats(wx2_rvecs) = ' + wx_len_stats(wx2_rvecs)) print(' lenstats(wx2_aids) = ' + wx_len_stats(wx2_aids)) print(' lenstats(wx2_fxs) = ' + wx_len_stats(wx2_fxs))
[docs]def check_wx2_rvecs(wx2_rvecs, verbose=True): flag = True for wx, rvecs in six.iteritems(wx2_rvecs): shape = rvecs.shape if shape[0] == 0: print('word[wx={wx}] has no rvecs'.format(wx=wx)) flag = False if np.any(np.isnan(rvecs)): #rvecs[:] = 1 / np.sqrt(128) print('word[wx={wx}] has nans'.format(wx=wx)) flag = False if verbose: if flag: print('check_wx2_rvecs passed') else: print('check_wx2_rvecs failed') return flag
[docs]def check_wx2_idxs(wx2_idxs, nWords): wx_list = list(wx2_idxs.keys()) missing_vals, missing_indices, duplicate_items = ut.debug_consec_list(wx_list) empty_wxs = [wx for wx, idxs in six.iteritems(wx2_idxs) if len(idxs) == 0] print('[smk_debug] num indexes with no support: %r' % len(missing_vals)) print('[smk_debug] num indexes with empty idxs: %r' % len(empty_wxs))
[docs]def check_wx2_rvecs2(invindex, wx2_rvecs=None, wx2_idxs=None, idx2_vec=None, verbose=True): words = invindex.words if wx2_rvecs is None: if verbose: print('[smk_debug] check_wx2_rvecs2 inverted index') wx2_rvecs = invindex.wx2_drvecs wx2_idxs = invindex.wx2_idxs idx2_vec = invindex.idx2_dvec else: if verbose: print('[smk_debug] check_wx2_rvecs2 queryrepr index') flag = True nan_wxs = [] no_wxs = [] for wx, rvecs in six.iteritems(wx2_rvecs): shape = rvecs.shape if shape[0] == 0: #print('word[wx={wx}] has no rvecs'.format(wx=wx)) no_wxs.append(wx) for sx in range(shape[0]): if np.any(np.isnan(rvecs[sx])): #rvecs[:] = 1 / np.sqrt(128) #print('word[wx={wx}][sx={sx}] has nans'.format(wx=wx)) nan_wxs.append((wx, sx)) if verbose: print('[smk_debug] %d words had no residuals' % len(no_wxs)) print('[smk_debug] %d words have nans' % len(nan_wxs)) if not (wx2_rvecs is None or wx2_idxs is None or idx2_vec is None): failed_wx = [] for count, (wx, sx) in enumerate(nan_wxs): rvec = wx2_rvecs[wx][sx] idxs = wx2_idxs[wx][sx] dvec = idx2_vec[idxs] word = words[wx] truth = (word == dvec) if not np.all(truth): failed_wx.append(wx) if verbose: print('+=====================') print('Bad RVEC #%d' % count) print('[smk_debug] wx=%r, sx=%r was nan and not equal to its word' % (wx, sx)) print('[smk_debug] rvec=%r ' % (rvec,)) print('[smk_debug] dvec=%r ' % (dvec,)) print('[smk_debug] word=%r ' % (word,)) print('[smk_debug] truth=%r ' % (truth,)) flag = False if len(failed_wx) == 0: if verbose: print('[smk_debug] all nan rvecs were equal to their words') return flag
[docs]def assert_single_assigned_maws(maws_list): try: assert all([np.all(np.array(maws) == 1) for maws in maws_list]), 'cannot multiassign database' except AssertionError: print(maws_list) raise
[docs]def check_data_smksumm(aididf_list, aidrvecs_list): #sccw_list = [] try: for count, (idf_list, rvecs_list) in enumerate(zip(aididf_list, aidrvecs_list)): assert len(idf_list) == len(rvecs_list), 'one list for each word' #sccw = smk_scoring.sccw_summation(rvecs_list, idf_list, None, smk_alpha, smk_thresh) except Exception as ex: ut.printex(ex) #ut.embed() raise
[docs]def check_invindex(invindex, verbose=True): """ Example: >>> from ibeis.model.hots.smk import smk_index >>> from ibeis.model.hots.smk import smk_debug >>> ibs, annots_df, taids, daids, qaids, qreq_, nWords = smk_debug.testdata_dataframe() >>> words = smk_index.learn_visual_words(annots_df, taids, nWords) >>> qparams = qreq_.qparams >>> invindex = smk_repr.index_data_annots(annots_df, daids, words, qparams) """ daids = invindex.daids daid2_sccw = invindex.daid2_sccw check_daid2_sccw(daid2_sccw, verbose=verbose) assert daid2_sccw.shape[0] == daids.shape[0] if verbose: print('each aid has a sccw')
[docs]def check_daid2_sccw(daid2_sccw, verbose=True): daid2_sccw_values = daid2_sccw assert not np.any(np.isnan(daid2_sccw_values)), 'sccws are nan' if verbose: print('database sccws are not nan') print('database sccw stats:') print(ut.get_stats_str(daid2_sccw_values, newlines=True))
[docs]def check_dtype(annots_df): """ Example: >>> from ibeis.model.hots.smk.smk_index import * # NOQA >>> import ibeis >>> ibs = ibeis.opendb('PZ_MTEST') >>> annots_df = make_annot_df(ibs) """ #ut.printex(Exception('check'), keys=[ # 'annots_df.index' #] #) vecs = annots_df['vecs'] kpts = annots_df['kpts'] locals_ = locals() key_list = [ 'annots_df.index.dtype', 'annots_df.columns.dtype', 'annots_df.columns', 'vecs.index.dtype', 'kpts.index.dtype', #'vecs', #'kpts', ] ut.print_keys(key_list)
[docs]def check_rvecs_list_eq(rvecs_list, rvecs_list2): """ Example: >>> rvecs_list, flag_list = smk_residual.compute_nonagg_rvecs(*_args1) # 125 ms >>> rvecs_list2 = smk_speed.compute_nonagg_residuals_forloop(*_args1) """ assert len(rvecs_list) == len(rvecs_list2) for rvecs, rvecs2 in zip(rvecs_list, rvecs_list2): try: assert len(rvecs) == len(rvecs2) assert rvecs.shape == rvecs2.shape #assert np.all(rvecs == rvecs2) np.testing.assert_equal(rvecs, rvecs2, verbose=True) except AssertionError: ut.print_keys([rvecs, rvecs2]) raise
[docs]def check_qaid2_chipmatch(qaid2_chipmatch, qaids, verbose=True): try: assert isinstance(qaid2_chipmatch, dict), 'type(qaid2_chipmatch) = %r' % type(qaid2_chipmatch) qaid_list = list(qaid2_chipmatch.keys()) _qaids = set(qaids) assert _qaids == set(qaid_list), 'something is wrong' print('has correct key. (len(keys) = %r)' % len(_qaids)) chipmatch_list = list(qaid2_chipmatch.values()) for count, daid2_chipmatch in enumerate(chipmatch_list): check_daid2_chipmatch(daid2_chipmatch) except Exception as ex: ut.printex(ex, keys=['qaid2_chipmatch', 'daid2_chipmatch', 'count']) raise
[docs]def check_daid2_chipmatch(daid2_chipmatch, verbose=True): print('[smk_debug] checking %d chipmatches' % len(daid2_chipmatch)) ## Concatenate into full fmfsfk reprs #def concat_chipmatch(cmtup): # fm_list = [_[0] for _ in cmtup] # fs_list = [_[1] for _ in cmtup] # fk_list = [_[2] for _ in cmtup] # assert len(fm_list) == len(fs_list) # assert len(fk_list) == len(fs_list) # cmtup_old = (np.vstack(fm_list), np.hstack(fs_list), np.hstack(fk_list)) # assert len(cmtup_old[0]) == len(cmtup_old[1]) # assert len(cmtup_old[2]) == len(cmtup_old[1]) # return cmtup_old ##daid2_chipmatch = {} ##for daid, cmtup in six.iteritems(daid2_chipmatch_): ## daid2_chipmatch[daid] = concat_chipmatch(cmtup) featmatches = 0 daid2_fm, daid2_fs, daid2_fk = daid2_chipmatch for daid in six.iterkeys(daid2_fm): cmtup_old = (daid2_fm[daid], daid2_fs[daid], daid2_fk[daid]) try: assert len(cmtup_old) == 3, ( 'cmtup_old = %r' % (cmtup_old.shape,)) (fm, fs, fk) = cmtup_old featmatches += len(fm) assert len(fm) == len(fs), ( 'fm.shape = %r, fs.shape=%r' % (fm.shape, fs.shape)) assert len(fk) == len(fs), ( 'fk.shape = %r, fs.shape=%r' % (fk.shape, fs.shape)) assert fm.shape[1] == 2 except AssertionError as ex: ut.printex(ex, keys=[ 'daid', 'cmtup_old', ]) raise print('[smk_debug] checked %d featmatches in %d chipmatches' % (featmatches, len(daid2_chipmatch))) # <ASSERTS> #L-------- #+-------- # <INFO>
[docs]def dbstr_qindex(qindex_=None): qindex = ut.get_localvar_from_stack('qindex') common_wxs = ut.get_localvar_from_stack('common_wxs') wx2_qaids = ut.get_localvar_from_stack('wx2_qaids') qindex.query_sccw qmaws_list = [qindex.wx2_maws[wx] for wx in common_wxs] qaids_list = [qindex.wx2_qaids[wx] for wx in common_wxs] qfxs_list = [qindex.wx2_qfxs[wx] for wx in common_wxs] qrvecs_list = [qindex.wx2_qrvecs[wx] for wx in common_wxs] qaids_list = [wx2_qaids[wx] for wx in common_wxs] print('-- max --') print('list_depth(qaids_list) = %d' % ut.list_depth(qaids_list, max)) print('list_depth(qmaws_list) = %d' % ut.list_depth(qmaws_list, max)) print('list_depth(qfxs_list) = %d' % ut.list_depth(qfxs_list, max)) print('list_depth(qrvecs_list) = %d' % ut.list_depth(qrvecs_list, max)) print('-- min --') print('list_depth(qaids_list) = %d' % ut.list_depth(qaids_list, min)) print('list_depth(qmaws_list) = %d' % ut.list_depth(qmaws_list, min)) print('list_depth(qfxs_list) = %d' % ut.list_depth(qfxs_list, min)) print('list_depth(qrvecs_list) = %d' % ut.list_depth(qrvecs_list, min)) print('-- sig --') print('list_depth(qaids_list) = %r' % ut.depth_profile(qaids_list)) print('list_depth(qmaws_list) = %r' % ut.depth_profile(qmaws_list)) print('list_depth(qfxs_list) = %r' % ut.depth_profile(qfxs_list)) print('list_depth(qrvecs_list) = %r' % ut.depth_profile(ut.depth_profile(qrvecs_list))) print(qfxs_list[0:3]) print(qaids_list[0:3]) print(qmaws_list[0:3])
[docs]def wx_len_stats(wx2_xxx): """ Example: >>> from ibeis.model.hots.smk.smk_debug import * # NOQA >>> from ibeis.model.hots.smk import smk_debug >>> from ibeis.model.hots.smk import smk_repr >>> ibs, annots_df, taids, daids, qaids, qreq_, nWords = smk_debug.testdata_dataframe() >>> qreq_ = query_request.new_ibeis_query_request(ibs, qaids, daids) >>> qparams = qreq_.qparams >>> invindex = smk_repr.index_data_annots(annots_df, daids, words) >>> qaid = qaids[0] >>> wx2_qrvecs, wx2_qaids, wx2_qfxs, query_sccw = smk_repr.new_qindex(annots_df, qaid, invindex, qparams) >>> print(ut.dict_str(wx2_rvecs_stats(wx2_qrvecs))) """ import utool as ut if wx2_xxx is None: return 'None' if isinstance(wx2_xxx, dict): #len_list = [len(xxx) for xxx in ] val_list = wx2_xxx.values() else: val_list = wx2_xxx try: len_list = [len(xxx) for xxx in val_list] statdict = ut.get_stats(len_list) return ut.dict_str(statdict, strvals=True, newlines=False) except Exception as ex: ut.printex(ex) for count, xxx in wx2_xxx: try: len(xxx) except Exception: print('failed on count=%r' % (count,)) print('failed on xxx=%r' % (xxx,)) pass raise
[docs]def display_info(ibs, invindex, annots_df): from vtool import clustering2 as clustertool ################ from ibeis.other import dbinfo print(ibs.get_infostr()) dbinfo.get_dbinfo(ibs, verbose=True) ################ print('Inverted Index Stats: vectors per word') print(ut.get_stats_str(map(len, invindex.wx2_idxs.values()))) ################ #qfx2_vec = annots_df['vecs'][1] centroids = invindex.words num_pca_dims = 2 # 3 whiten = False kwd = dict(num_pca_dims=num_pca_dims, whiten=whiten,) #clustertool.rrr() def makeplot_(fnum, prefix, data, labels='centroids', centroids=centroids): return clustertool.plot_centroids(data, centroids, labels=labels, fnum=fnum, prefix=prefix + '\n', **kwd) makeplot_(1, 'centroid vecs', centroids) #makeplot_(2, 'database vecs', invindex.idx2_dvec) #makeplot_(3, 'query vecs', qfx2_vec) #makeplot_(4, 'database vecs', invindex.idx2_dvec) #makeplot_(5, 'query vecs', qfx2_vec) #################
[docs]def vector_normal_stats(vectors): import numpy.linalg as npl norm_list = npl.norm(vectors, axis=1) #norm_list2 = np.sqrt((vectors ** 2).sum(axis=1)) #assert np.all(norm_list == norm_list2) norm_stats = ut.get_stats(norm_list) print('normal_stats:' + ut.dict_str(norm_stats, newlines=False))
[docs]def vector_stats(vectors, name, verbose=True): line_list = [] line_list.append('+--- Vector Stats --') line_list.append(' * vectors = %r' % name) key_list = ut.codeblock( ''' vectors.shape vectors.dtype vectors.max() vectors.min() ''' ).split('\n') strlist_ = ut.parse_locals_keylist(locals(), key_list) line_list.extend(strlist_) #line_list.append(vectors) line_list.append('L--- Vector Stats --') statstr = '\n'.join(line_list) if verbose: print(statstr) return statstr
[docs]def sift_stats(): import ibeis ibs = ibeis.opendb('PZ_Mothers') aid_list = ibs.get_valid_aids() stacked_sift = np.vstack(ibs.get_annot_vecs(aid_list)) vector_stats(stacked_sift, 'sift') # We see that SIFT vectors are actually normalized # Between 0 and 512 and clamped to uint8 vector_stats(stacked_sift.astype(np.float32) / 512.0, 'sift')
[docs]def invindex_dbgstr(invindex): """ >>> from ibeis.model.hots.smk.smk_debug import * # NOQA >>> ibs, annots_df, daids, qaids, invindex = testdata_raw_internals0() >>> invindex_dbgstr(invindex) """ print('+--- INVINDEX DBGSTR ---') print('called by %r' % (ut.get_caller_name(),)) locals_ = {'invindex': invindex} #print(dictinfo(invindex.wx2_fxs)) key_list = [ 'invindex.words.shape', 'invindex.words.dtype', 'invindex.daids.dtype', 'invindex.idx2_dvec.shape', 'invindex.idx2_dvec.dtype', 'invindex.idx2_daid.shape', 'invindex.idx2_daid.dtype', 'invindex.idx2_dfx.shape', (dictinfo, 'invindex.daid2_sccw'), (dictinfo, 'invindex.wx2_drvecs'), (dictinfo, 'invindex.wx2_dmaws'), (dictinfo, 'invindex.wx2_dflags'), (dictinfo, 'invindex.wx2_idf'), (dictinfo, 'invindex.wx2_aids'), (dictinfo, 'invindex.wx2_fxs'), (dictinfo, 'invindex.wx2_idxs'), ] keystr_list = ut.parse_locals_keylist(locals_, key_list) append = keystr_list.append def stats_(arr): return wx_len_stats(arr) append('lenstats(invindex.wx2_idxs) = ' + stats_(invindex.wx2_idxs)) #append('lenstats(invindex.wx2_idf) = ' + stats_(invindex.wx2_idf)) append('lenstats(invindex.wx2_drvecs) = ' + stats_(invindex.wx2_drvecs)) append('lenstats(invindex.wx2_aids) = ' + stats_(invindex.wx2_aids)) def mapval(func, dict_): if isinstance(dict_, dict): return map(func, six.itervalues(dict_)) else: return map(func, dict_) def isunique(aids): return len(set(aids)) == len(aids) if invindex.wx2_aids is not None: wx_series = list(invindex.wx2_aids.keys()) aids_list = list(invindex.wx2_aids.values()) nAids_list = map(len, aids_list) invindex.wx2_aids append('sum(mapval(len, invindex.wx2_aids))) = ' + str(sum(nAids_list))) probably_asmk = all(mapval(isunique, invindex.wx2_aids)) if probably_asmk: append('All wx2_aids are unique. aggregate probably is True') else: append('Some wx2_aids are duplicates. aggregate probably is False') maxkey = wx_series[np.array(nAids_list).argmax()] print('wx2_aids[maxkey=%r] = \n' % (maxkey,) + str(invindex.wx2_aids[maxkey])) dbgstr = '\n'.join(keystr_list) print(dbgstr) print('L--- END INVINDEX DBGSTR ---') # </INFO> #L-------- #+-------- # <UTIL>
[docs]def dictinfo(dict_): if not isinstance(dict_, dict): return 'expected dict got %r' % type(dict_) keys = list(dict_.keys()) vals = list(dict_.values()) num_keys = len(keys) key_types = list(set(map(type, keys))) val_types = list(set(map(type, vals))) fmtstr_ = '\n' + ut.unindent(''' * num_keys = {num_keys} * key_types = {key_types} * val_types = {val_types} '''.strip('\n')) if len(val_types) == 1: if val_types[0] == np.ndarray: # each key holds an ndarray val_shape_stats = ut.get_stats(set(map(np.shape, vals)), axis=0) val_shape_stats_str = ut.dict_str(val_shape_stats, strvals=True, newlines=False) val_dtypes = list(set([val.dtype for val in vals])) fmtstr_ += ut.unindent(''' * val_shape_stats = {val_shape_stats_str} * val_dtypes = {val_dtypes} '''.strip('\n')) elif val_types[0] == list: # each key holds a list val_len_stats = ut.get_stats(set(map(len, vals))) val_len_stats_str = ut.dict_str(val_len_stats, strvals=True, newlines=False) depth = ut.list_depth(vals) deep_val_types = list(set(ut.list_deep_types(vals))) fmtstr_ += ut.unindent(''' * list_depth = {depth} * val_len_stats = {val_len_stats_str} * deep_types = {deep_val_types} '''.strip('\n')) if len(deep_val_types) == 1: if deep_val_types[0] == np.ndarray: deep_val_dtypes = list(set([val.dtype for val in vals])) fmtstr_ += ut.unindent(''' * deep_val_dtypes = {deep_val_dtypes} ''').strip('\n') elif val_types[0] in [np.uint8, np.int8, np.int32, np.int64, np.float16, np.float32, np.float64]: # each key holds a scalar val_stats = ut.get_stats(vals) fmtstr_ += ut.unindent(''' * val_stats = {val_stats} ''').strip('\n') fmtstr = fmtstr_.format(**locals()) return ut.indent(fmtstr) # </UTIL> #L--------
[docs]def query_smk_test(annots_df, invindex, qreq_): """ ibeis interface Example: >>> from ibeis.model.hots.smk import smk_match >>> from ibeis.model.hots.smk import smk_debug >>> ibs, annots_df, daids, qaids, invindex, qreq_ = smk_debug.testdata_internals_full() >>> qaid2_qres_ = smk_match.query_smk(annots_df, invindex, qreq_) Dev:: qres = qaid2_qres_[qaids[0]] fig = qres.show_top(ibs) """ from ibeis.model.hots import pipeline from ibeis.model.hots.smk import smk_match # NOQA qaids = qreq_.get_external_qaids() qaid2_chipmatch = {} qaid2_scores = {} aggregate = qreq_.qparams.aggregate smk_alpha = qreq_.qparams.smk_alpha smk_thresh = qreq_.qparams.smk_thresh lbl = '[smk_match] asmk query: ' if aggregate else '[smk_match] smk query: ' #mark, end_ = ut.log_progress(lbl, len(qaids), freq=1, with_time=True, backspace=False) withinfo = True for qaid in ut.ProgressIter(enumerate(qaids), lbl=lbl, freq=1): daid2_score, daid2_chipmatch = smk_match.query_inverted_index( annots_df, qaid, invindex, withinfo, aggregate, smk_alpha, smk_thresh) qaid2_scores[qaid] = daid2_score qaid2_chipmatch[qaid] = daid2_chipmatch try: #filt2_meta = {} cm_list = convert_smkmatch_to_chipmatch(qaid2_chipmatch, qaid2_scores) #qaid2_qres_ = pipeline.chipmatch_to_resdict(qaid2_chipmatch, filt2_meta, qreq_) qaid2_qres_ = pipeline.chipmatch_to_resdict(qreq_, cm_list) except Exception as ex: ut.printex(ex) ut.qflag() raise return qaid2_qres_
[docs]def convert_smkmatch_to_chipmatch(qaid2_chipmatch, qaid2_scores): """ function to fix oldstyle chipmatches into newstyle that is accepted by the pipeline """ from ibeis.model.hots import chip_match qaid_list = list(six.iterkeys(qaid2_chipmatch)) score_smk_list = ut.dict_take(qaid2_scores, qaid_list) chipmatch_smk_list = ut.dict_take(qaid2_chipmatch, qaid_list) aid2_H = None def aid2_fs_to_fsv(aid2_fs): return {aid: fs[:, None] for aid, fs in six.iteritems(aid2_fs)} cmtup_old_list = [ (aid2_fm, aid2_fs_to_fsv(aid2_fs), aid2_fk, aid2_score, aid2_H) for (aid2_fm, aid2_fs, aid2_fk), aid2_score in zip(chipmatch_smk_list, score_smk_list) ] cm_list = [ chip_match.ChipMatch2.from_cmtup_old(cmtup_old, qaid=qaid) for qaid, cmtup_old in zip(qaid_list, cmtup_old_list) ] return cm_list
[docs]def main_smk_debug(): """ CommandLine: python -m ibeis.model.hots.smk.smk_debug --test-main_smk_debug Example: >>> from ibeis.model.hots.smk.smk_debug import * # NOQA >>> main_smk_debug() """ print('+------------') print('SMK_DEBUG MAIN') print('+------------') from ibeis.model.hots import pipeline ibs, annots_df, taids, daids, qaids, qreq_, nWords = testdata_dataframe() # Query using SMK #qaid = qaids[0] nWords = qreq_.qparams.nWords aggregate = qreq_.qparams.aggregate smk_alpha = qreq_.qparams.smk_alpha smk_thresh = qreq_.qparams.smk_thresh nAssign = qreq_.qparams.nAssign #aggregate = ibs.cfg.query_cfg.smk_cfg.aggregate #smk_alpha = ibs.cfg.query_cfg.smk_cfg.smk_alpha #smk_thresh = ibs.cfg.query_cfg.smk_cfg.smk_thresh print('+------------') print('SMK_DEBUG PARAMS') print('[smk_debug] aggregate = %r' % (aggregate,)) print('[smk_debug] smk_alpha = %r' % (smk_alpha,)) print('[smk_debug] smk_thresh = %r' % (smk_thresh,)) print('[smk_debug] nWords = %r' % (nWords,)) print('[smk_debug] nAssign = %r' % (nAssign,)) print('L------------') # Learn vocabulary #words = qreq_.words = smk_index.learn_visual_words(annots_df, taids, nWords) # Index a database of annotations #qreq_.invindex = smk_repr.index_data_annots(annots_df, daids, words, aggregate, smk_alpha, smk_thresh) qreq_.ibs = ibs # Smk Mach print('+------------') print('SMK_DEBUG MATCH KERNEL') print('+------------') qaid2_scores, qaid2_chipmatch_SMK = smk_match.execute_smk_L5(qreq_) SVER = ut.get_argflag('--sver') if SVER: print('+------------') print('SMK_DEBUG SVER? YES!') print('+------------') qaid2_chipmatch_SVER_ = pipeline.spatial_verification(qaid2_chipmatch_SMK, qreq_) qaid2_chipmatch = qaid2_chipmatch_SVER_ else: print('+------------') print('SMK_DEBUG SVER? NO') print('+------------') qaid2_chipmatch = qaid2_chipmatch_SMK print('+------------') print('SMK_DEBUG DISPLAY RESULT') print('+------------') cm_list = convert_smkmatch_to_chipmatch(qaid2_chipmatch, qaid2_scores) #filt2_meta = {} #qaid2_qres_ = pipeline.chipmatch_to_resdict(qaid2_chipmatch, filt2_meta, qreq_) qaid2_qres_ = pipeline.chipmatch_to_resdict(qreq_, cm_list) #qaid2_qres_ = pipeline.chipmatch_to_resdict(qaid2_chipmatch, filt2_meta, qreq_) for count, (qaid, qres) in enumerate(six.iteritems(qaid2_qres_)): print('+================') #qres = qaid2_qres_[qaid] qres.show_top(ibs, fnum=count) for aid in qres.aid2_score.keys(): smkscore = qaid2_scores[qaid][aid] sumscore = qres.aid2_score[aid] if not ut.almost_eq(smkscore, sumscore): print('scorediff aid=%r, smkscore=%r, sumscore=%r' % (aid, smkscore, sumscore)) scores = qaid2_scores[qaid] #print(scores) print(qres.get_inspect_str(ibs)) print('L================') #ut.embed() #print(qres.aid2_fs) #daid2_totalscore, cmtup_old = smk_index.query_inverted_index(annots_df, qaid, invindex) ## Pack into QueryResult #qaid2_chipmatch = {qaid: cmtup_old} #qaid2_qres_ = pipeline.chipmatch_to_resdict(qaid2_chipmatch, {}, qreq_) ## Show match #daid2_totalscore.sort(axis=1, ascending=False) #print(daid2_totalscore) #daid2_totalscore2, cmtup_old = query_inverted_index(annots_df, daids[0], invindex) #print(daid2_totalscore2) #display_info(ibs, invindex, annots_df) print('finished main') return locals()
if __name__ == '__main__': """ CommandLine: python -m ibeis.model.hots.smk.smk_debug python -m ibeis.model.hots.smk.smk_debug --allexamples python -m ibeis.model.hots.smk.smk_debug --allexamples --noface --nosrc """ import multiprocessing multiprocessing.freeze_support() # for win32 import utool as ut # NOQA ut.doctest_funcs()