# -*- coding: utf-8 -*-
from __future__ import absolute_import, division, print_function
#import six
import utool as ut
#import weakref
import numpy as np
import six
from six.moves import zip, map # NOQA
from vtool import nearest_neighbors as nntool
from ibeis.model.hots import hstypes
from ibeis.model.hots.smk import smk_scoring
from ibeis.model.hots.smk import smk_index
from collections import namedtuple
(print, print_, printDBG, rrr, profile) = ut.inject(__name__, '[smk_repr]')
DEBUG_SMK = ut.DEBUG2 or ut.get_argflag('--debug-smk')
INVERTED_INDEX_INJECT_KEY = ('InvertedIndex', __name__)
@six.add_metaclass(ut.ReloadingMetaclass)
[docs]class InvertedIndex(object):
r"""
Stores inverted index state information
(mapping from words to database aids and fxs_list)
Attributes:
idx2_dvec (ndarray[S x DIM]): stacked index -> descriptor vector (currently sift)
idx2_daid (ndarray[S x 1]): stacked index -> annot id
idx2_dfx (ndarray[S x 1]): stacked index -> feature index (wrt daid)
idx2_fweight (ndarray[S x 1]): stacked index -> feature weight
idx2_wxs (list): stacked index -> word indexes (jagged)
words (ndarray[C x DIM]): visual word centroids
wordflann (FLANN): FLANN search structure
wx2_idxs (dict of lists of ndarrays): word index -> stacked indexes
wx2_fxs (dict of lists of ndarrays): word index -> aggregate feature indexes
wx2_aids (dict of ndarrays[N_c x 1]): word index -> aggregate aids
wx2_drvecs (dict of ndarrays[N_c x DIM]): word index -> residual vectors
wx2_dflags (dict of ndarrays[N_c x 1]): word index -> residual flags
wx2_idf (dict of ndarrays[N_c x 1]): word index -> idf (wx normalizer)
wx2_maws (dict of ndarrays[N_c x 1]): word index -> multi-assign weights
daids (ndarray): indexed annotation ids
daid2_sccw (dict of floats): daid -> sccw (daid self-consistency weight)
daid2_label (dict of tuples): daid -> label (name, view)
"""
def __init__(invindex, words, wordflann, idx2_vec, idx2_aid, idx2_fx,
daids, daid2_label):
invindex.words = words
invindex.wordflann = wordflann
invindex.idx2_dvec = idx2_vec
invindex.idx2_daid = idx2_aid
invindex.idx2_dfx = idx2_fx
invindex.daids = daids
invindex.daid2_label = daid2_label
invindex.wx2_idxs = None
invindex.wx2_aids = None
invindex.wx2_fxs = None
invindex.wx2_maws = None
invindex.wx2_drvecs = None
invindex.wx2_dflags = None
invindex.wx2_idf = None
invindex.daid2_sccw = None
invindex.idx2_fweight = None
invindex.idx2_wxs = None # stacked index -> word indexes
# Inject debug function
from ibeis.model.hots.smk import smk_debug
ut.make_class_method_decorator(INVERTED_INDEX_INJECT_KEY)(smk_debug.invindex_dbgstr)
ut.inject_instance(invindex, classkey=INVERTED_INDEX_INJECT_KEY)
@ut.make_class_method_decorator(INVERTED_INDEX_INJECT_KEY)
[docs]def report_memory(obj, objname='obj'):
"""
obj = invindex
objname = 'invindex'
"""
print('Object Memory Usage for %s' % objname)
maxlen = max(map(len, six.iterkeys(obj.__dict__)))
for key, val in six.iteritems(obj.__dict__):
fmtstr = 'memusage({0}.{1}){2} = '
lbl = fmtstr.format(objname, key, ' ' * (maxlen - len(key)))
sizestr = ut.get_object_size_str(val, lbl=lbl, unit='MB')
print(sizestr)
report_memsize = ut.make_class_method_decorator(INVERTED_INDEX_INJECT_KEY)(ut.report_memsize)
QueryIndex = namedtuple(
'QueryIndex', (
'wx2_qrvecs',
'wx2_qflags',
'wx2_maws',
'wx2_qaids',
'wx2_qfxs',
'query_sccw',
))
[docs]class LazyGetter(object):
"""
DEPRICATE
"""
def __init__(self, getter_func):
self.getter_func = getter_func
def __getitem__(self, index):
return self.getter_func(index)
def __call__(self, index):
return self.getter_func(index)
[docs]class DataFrameProxy(object):
"""
DEPRICATE
pandas is actually really slow. This class emulates it so
I don't have to change my function calls, but without all the slowness.
"""
def __init__(annots_df, ibs):
annots_df.ibs = ibs
def __getitem__(annots_df, key):
if key == 'kpts':
return LazyGetter(annots_df.ibs.get_annot_kpts)
elif key == 'vecs':
return LazyGetter(annots_df.ibs.get_annot_vecs)
elif key == 'labels':
return LazyGetter(annots_df.ibs.get_annot_class_labels)
@profile
[docs]def make_annot_df(ibs):
"""
Creates a pandas like DataFrame interface to an IBEISController
DEPRICATE
Args:
ibs ():
Returns:
annots_df
Example:
>>> from ibeis.model.hots.smk.smk_repr import * # NOQA
>>> from ibeis.model.hots.smk import smk_debug
>>> ibs = smk_debug.testdata_ibeis()
>>> annots_df = make_annot_df(ibs)
>>> print(ut.hashstr(repr(annots_df.values)))
j12n+x93m4c!4un3
#>>> from ibeis.model.hots.smk import smk_debug
#>>> smk_debug.rrr()
#>>> smk_debug.check_dtype(annots_df)
Auto:
from ibeis.model.hots.smk import smk_repr
import utool as ut
argdoc = ut.make_default_docstr(smk_repr.make_annot_df)
print(argdoc)
"""
annots_df = DataFrameProxy(ibs)
return annots_df
@profile
[docs]def new_qindex(annots_df, qaid, invindex, qparams):
r"""
Gets query read for computations
Args:
annots_df (DataFrameProxy): pandas-like data interface
qaid (int): query annotation id
invindex (InvertedIndex): inverted index object
qparams (QueryParams): query parameters object
Returns:
qindex: named tuple containing query information
CommandLine:
python -m ibeis.model.hots.smk.smk_repr --test-new_qindex
Example:
>>> # DISABLE_DOCTEST
>>> from ibeis.model.hots.smk.smk_repr import * # NOQA
>>> from ibeis.model.hots.smk import smk_debug
>>> ibs, annots_df, qaid, invindex, qparams = smk_debug.testdata_query_repr(db='PZ_Mothers', nWords=128000)
>>> qindex = new_qindex(annots_df, qaid, invindex, qparams)
>>> assert smk_debug.check_wx2_rvecs(qindex.wx2_qrvecs), 'has nan'
>>> smk_debug.invindex_dbgstr(invindex)
Ignore::
idx2_vec = qfx2_vec
idx2_aid = qfx2_aid
idx2_fx = qfx2_qfx
wx2_idxs = _wx2_qfxs
wx2_maws = _wx2_maws
from ibeis.model.hots.smk import smk_repr
import utool as ut
ut.rrrr()
print(ut.make_default_docstr(smk_repr.new_qindex))
"""
# TODO: Precompute and lookup residuals and assignments
if not ut.QUIET:
print('[smk_repr] Query Repr qaid=%r' % (qaid,))
#
nAssign = qparams.nAssign
massign_alpha = qparams.massign_alpha
massign_sigma = qparams.massign_sigma
massign_equal_weights = qparams.massign_equal_weights
#
aggregate = qparams.aggregate
smk_alpha = qparams.smk_alpha
smk_thresh = qparams.smk_thresh
#
wx2_idf = invindex.wx2_idf
words = invindex.words
wordflann = invindex.wordflann
#qfx2_vec = annots_df['vecs'][qaid]
# TODO: remove all mention of annot_df and ensure that qparams is passed corectly to config2_
qfx2_vec = annots_df.ibs.get_annot_vecs(qaid, config2_=qparams)
#-------------------
# Assign query to (multiple) words
#-------------------
_wx2_qfxs, _wx2_maws, qfx2_wxs = smk_index.assign_to_words_(
wordflann, words, qfx2_vec, nAssign, massign_alpha,
massign_sigma, massign_equal_weights)
# Hack to make implementing asmk easier, very redundant
qfx2_aid = np.array([qaid] * len(qfx2_wxs), dtype=hstypes.INTEGER_TYPE)
qfx2_qfx = np.arange(len(qfx2_vec))
#-------------------
# Compute query residuals
#-------------------
wx2_qrvecs, wx2_qaids, wx2_qfxs, wx2_maws, wx2_qflags = smk_index.compute_residuals_(
words, _wx2_qfxs, _wx2_maws, qfx2_vec, qfx2_aid, qfx2_qfx, aggregate)
# each value in wx2_ dicts is a list with len equal to the number of rvecs
if ut.VERBOSE:
print('[smk_repr] Query SCCW smk_alpha=%r, smk_thresh=%r' % (smk_alpha, smk_thresh))
#-------------------
# Compute query sccw
#-------------------
wx_sublist = np.array(wx2_qrvecs.keys(), dtype=hstypes.INDEX_TYPE)
idf_list = [wx2_idf[wx] for wx in wx_sublist]
rvecs_list = [wx2_qrvecs[wx] for wx in wx_sublist]
maws_list = [wx2_maws[wx] for wx in wx_sublist]
flags_list = [wx2_qflags[wx] for wx in wx_sublist]
query_sccw = smk_scoring.sccw_summation(rvecs_list, flags_list, idf_list, maws_list, smk_alpha, smk_thresh)
try:
assert query_sccw > 0, 'query_sccw=%r is not positive!' % (query_sccw,)
except Exception as ex:
ut.printex(ex)
raise
#-------------------
# Build query representationm class/tuple
#-------------------
if DEBUG_SMK:
from ibeis.model.hots.smk import smk_debug
qfx2_vec = annots_df['vecs'][qaid]
assert smk_debug.check_wx2_rvecs2(
invindex, wx2_qrvecs, wx2_qfxs, qfx2_vec), 'bad qindex'
qindex = QueryIndex(wx2_qrvecs, wx2_qflags, wx2_maws, wx2_qaids, wx2_qfxs, query_sccw)
return qindex
#@profile
[docs]def index_data_annots(annots_df, daids, words, qparams, with_internals=True,
memtrack=None, delete_rawvecs=False):
"""
Builds the initial inverted index from a dataframe, daids, and words.
Optionally builds the internals of the inverted structure
Args:
annots_df ():
daids ():
words ():
qparams ():
with_internals ():
memtrack (): memory debugging object
Returns:
invindex
Example:
>>> from ibeis.model.hots.smk.smk_repr import * # NOQA
>>> from ibeis.model.hots.smk import smk_debug
>>> ibs, annots_df, daids, qaids, qreq_, words = smk_debug.testdata_words()
>>> qparams = qreq_.qparams
>>> with_internals = False
>>> invindex = index_data_annots(annots_df, daids, words, qparams, with_internals)
Ignore:
#>>> print(ut.hashstr(repr(list(invindex.__dict__.values()))))
#v8+i5i8+55j0swio
Auto:
from ibeis.model.hots.smk import smk_repr
import utool as ut
ut.rrrr()
print(ut.make_default_docstr(smk_repr.index_data_annots))
"""
if not ut.QUIET:
print('[smk_repr] index_data_annots')
flann_params = {}
# Compute fast lookup index for the words
wordflann = nntool.flann_cache(words, flann_params=flann_params, appname='smk')
_vecs_list = annots_df['vecs'][daids]
_label_list = annots_df['labels'][daids]
idx2_dvec, idx2_daid, idx2_dfx = nntool.invertible_stack(_vecs_list, daids)
# TODO:
# Need to individually cache residual vectors.
# rvecs_list = annots_df['rvecs'][daids]
#
# Residual vectors depend on
# * nearest word (word assignment)
# * original vectors
# * multiassignment
daid2_label = dict(zip(daids, _label_list))
invindex = InvertedIndex(words, wordflann, idx2_dvec, idx2_daid, idx2_dfx,
daids, daid2_label)
# Decrement reference count so memory can be cleared in the next function
del words, idx2_dvec, idx2_daid, idx2_dfx, daids, daid2_label
del _vecs_list, _label_list
if with_internals:
compute_data_internals_(invindex, qparams, memtrack=memtrack,
delete_rawvecs=delete_rawvecs) # 99%
return invindex
@profile
[docs]def compute_data_internals_(invindex, qparams, memtrack=None,
delete_rawvecs=True):
"""
Builds each of the inverted index internals.
invindex (InvertedIndex): object for fast vocab lookup
qparams (QueryParams): hyper-parameters
memtrack (None):
delete_rawvecs (bool):
Returns:
None
Example:
>>> from ibeis.model.hots.smk.smk_repr import * # NOQA
>>> from ibeis.model.hots.smk import smk_debug
>>> ibs, annots_df, daids, qaids, invindex, qreq_ = smk_debug.testdata_raw_internals0()
>>> compute_data_internals_(invindex, qreq_.qparams)
Ignore:
idx2_vec = idx2_dvec
wx2_maws = _wx2_maws # NOQA
"""
# Get information
#if memtrack is None:
# memtrack = ut.MemoryTracker('[DATA INTERNALS ENTRY]')
#memtrack.report('[DATA INTERNALS1]')
#
aggregate = qparams.aggregate
smk_alpha = qparams.smk_alpha
smk_thresh = qparams.smk_thresh
#
massign_alpha = qparams.massign_alpha
massign_sigma = qparams.massign_sigma
massign_equal_weights = qparams.massign_equal_weights
#
vocab_weighting = qparams.vocab_weighting
#
nAssign = 1 # single assignment for database side
idx2_vec = invindex.idx2_dvec
idx2_dfx = invindex.idx2_dfx
idx2_daid = invindex.idx2_daid
daids = invindex.daids
wordflann = invindex.wordflann
words = invindex.words
daid2_label = invindex.daid2_label
wx_series = np.arange(len(words))
#memtrack.track_obj(idx2_vec, 'idx2_vec')
if not ut.QUIET:
print('[smk_repr] compute_data_internals_')
if ut.VERBOSE:
print('[smk_repr] * len(daids) = %r' % (len(daids),))
print('[smk_repr] * len(words) = %r' % (len(words),))
print('[smk_repr] * len(idx2_vec) = %r' % (len(idx2_vec),))
print('[smk_repr] * aggregate = %r' % (aggregate,))
print('[smk_repr] * smk_alpha = %r' % (smk_alpha,))
print('[smk_repr] * smk_thresh = %r' % (smk_thresh,))
# Try to use the cache
#cfgstr = ut.hashstr_arr(words, 'words') + qparams.feat_cfgstr
#cachekw = dict(
#cfgstr=cfgstr,
#appname='smk_test'
#)
#invindex_cache = ut.Cacher('inverted_index', **cachekw)
#try:
# raise IOError('cache is off')
# #cachetup = invindex_cache.load()
# #(idx2_wxs, wx2_idxs, wx2_idf, wx2_drvecs, wx2_aids, wx2_fxs, wx2_maws, daid2_sccw) = cachetup
# invindex.idx2_dvec = None
#except IOError as ex:
# Database word assignments (perform single assignment on database side)
wx2_idxs, _wx2_maws, idx2_wxs = smk_index.assign_to_words_(
wordflann, words, idx2_vec, nAssign, massign_alpha, massign_sigma,
massign_equal_weights)
if ut.DEBUG2:
assert len(idx2_wxs) == len(idx2_vec)
assert len(wx2_idxs.keys()) == len(_wx2_maws.keys())
assert len(wx2_idxs.keys()) <= len(words)
try:
assert len(wx2_idxs.keys()) == len(words)
except AssertionError as ex:
ut.printex(ex, iswarning=True)
# Database word inverse-document-frequency (idf weights)
wx2_idf = smk_index.compute_word_idf_(
wx_series, wx2_idxs, idx2_daid, daids, daid2_label, vocab_weighting,
verbose=True)
if ut.DEBUG2:
assert len(wx2_idf) == len(wx2_idf.keys())
# Compute (normalized) residual vectors and inverse mappings
wx2_drvecs, wx2_aids, wx2_fxs, wx2_dmaws, wx2_dflags = smk_index.compute_residuals_(
words, wx2_idxs, _wx2_maws, idx2_vec, idx2_daid, idx2_dfx,
aggregate, verbose=True)
if not ut.QUIET:
print('[smk_repr] unloading idx2_vec')
if delete_rawvecs:
# Try to save some memory
del _wx2_maws
invindex.idx2_dvec = None
del idx2_vec
# Compute annotation normalization factor
daid2_sccw = smk_index.compute_data_sccw_(
idx2_daid, wx2_drvecs, wx2_dflags, wx2_aids, wx2_idf, wx2_dmaws, smk_alpha,
smk_thresh, verbose=True)
# Cache save
#cachetup = (idx2_wxs, wx2_idxs, wx2_idf, wx2_drvecs, wx2_aids, wx2_fxs, wx2_dmaws, daid2_sccw)
#invindex_cache.save(cachetup)
# Store information
invindex.idx2_wxs = idx2_wxs # stacked index -> word indexes (might not be needed)
invindex.wx2_idxs = wx2_idxs
invindex.wx2_idf = wx2_idf
invindex.wx2_drvecs = wx2_drvecs
invindex.wx2_dflags = wx2_dflags # flag nan rvecs
invindex.wx2_aids = wx2_aids # needed for asmk
invindex.wx2_fxs = wx2_fxs # needed for asmk
invindex.wx2_dmaws = wx2_dmaws # needed for awx2_mawssmk
invindex.daid2_sccw = daid2_sccw
#memtrack.report('[DATA INTERNALS3]')
if ut.DEBUG2:
from ibeis.model.hots.smk import smk_debug
smk_debug.check_invindex_wx2(invindex)
if __name__ == '__main__':
"""
CommandLine:
python -m ibeis.model.hots.smk.smk_repr
python -m ibeis.model.hots.smk.smk_repr --allexamples
python -m ibeis.model.hots.smk.smk_repr --allexamples --noface --nosrc
"""
import multiprocessing
multiprocessing.freeze_support() # for win32
import utool as ut # NOQA
ut.doctest_funcs()