Source code for ibeis.model.hots.smk.smk_plots

#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
Algorithm:
    Feature Weighting
    Viewpoints Labels
    Choose Examplars based on Scores
    Normalizing Scores
    Per Name
    Incremental Version
"""
from __future__ import absolute_import, division, print_function
import utool as ut
import numpy as np
from ibeis.model.hots.smk import smk_debug
from vtool import patch as ptool
from vtool import image as gtool
import six
import scipy.stats.mstats as spms
from os.path import join
from os.path import basename
import scipy.spatial.distance as spdist
from collections import namedtuple
(print, print_, printDBG, rrr, profile) = ut.inject(__name__, '[smk_plots]')


Metrics = namedtuple('Metrics', ('wx2_nMembers', 'wx2_pdist_stats', 'wx2_wdist_stats',))


[docs]def vizualize_vocabulary(ibs, invindex):
    """
    cleaned up version of dump_word_patches. Makes idf scatter plots and dumps
    the patches that contributed to each word.

    CommandLine:
        python -m ibeis.model.hots.smk.smk_plots --test-vizualize_vocabulary
        python -m ibeis.model.hots.smk.smk_plots --test-vizualize_vocabulary --vf

    Example:
        >>> from ibeis.model.hots.smk.smk_plots import *  # NOQA
        >>> from ibeis.model.hots.smk import smk_debug
        >>> from ibeis.model.hots.smk import smk_repr
        >>> #tup = smk_debug.testdata_raw_internals0(db='GZ_ALL', nWords=64000)
        >>> #tup = smk_debug.testdata_raw_internals0(db='GZ_ALL', nWords=8000)
        >>> tup = smk_debug.testdata_raw_internals0(db='PZ_Master0', nWords=64000)
        >>> #tup = smk_debug.testdata_raw_internals0(db='PZ_Mothers', nWords=8000)
        >>> ibs, annots_df, daids, qaids, invindex, qreq_ = tup
        >>> smk_repr.compute_data_internals_(invindex, qreq_.qparams, delete_rawvecs=False)
        >>> vizualize_vocabulary(ibs, invindex)
    """
    invindex.idx2_wxs = np.array(invindex.idx2_wxs)

    print('[smk_plots] Vizualizing vocabulary')

    # DUMPING PART --- dumps patches to disk
    figdir = ibs.get_fig_dir()
    ut.ensuredir(figdir)
    if ut.get_argflag('--vf'):
        ut.view_directory(figdir)

    # Compute Word Statistics
    metrics = compute_word_metrics(invindex)
    wx2_nMembers, wx2_pdist_stats, wx2_wdist_stats = metrics
    #(wx2_pdist, wx2_wdist, wx2_nMembers, wx2_pdist_stats, wx2_wdist_stats) = metrics

    #wx2_prad = {wx: pdist_stats['max'] for wx, pdist_stats in six.iteritems(wx2_pdist_stats) if 'max' in pdist_stats}
    #wx2_wrad = {wx: wdist_stats['max'] for wx, wdist_stats in six.iteritems(wx2_wdist_stats) if 'max' in wdist_stats}

    wx2_prad = {wx: stats['max'] for wx, stats in wx2_pdist_stats.items() if 'max' in stats}
    wx2_wrad = {wx: stats['max'] for wx, stats in wx2_wdist_stats.items() if 'max' in stats}
    #wx2_prad = get_metric(metrics, 'wx2_pdist_stats', 'max')
    #wx2_wrad = get_metric(metrics, 'wx2_wdist_stats', 'max')

    wx_sample1 = select_by_metric(wx2_nMembers)
    wx_sample2 = select_by_metric(wx2_prad)
    wx_sample3 = select_by_metric(wx2_wrad)

    wx_sample = wx_sample1 + wx_sample2 + wx_sample3
    overlap123 = len(wx_sample) - len(set(wx_sample))
    print('overlap123 = %r' % overlap123)
    wx_sample  = set(wx_sample)
    print('len(wx_sample) = %r' % len(wx_sample))

    #make_scatterplots(ibs, figdir, invindex, metrics)

    vocabdir = join(figdir, 'vocab_patches2')
    wx2_dpath = get_word_dpaths(vocabdir, wx_sample, metrics)

    make_wordfigures(ibs, metrics, invindex, figdir, wx_sample, wx2_dpath)


[docs]def metric_clamped_stat(metrics, wx_list, key):
    """
    if key is a tuple it specifies a statdict and a chosen stat
    else its just a key
    """
    try:
        if isinstance(key, tuple):
            metrickey, statkey = key
            wx2_statdict = metrics.__dict__[metrickey]
            def wx2_metric(wx):
                return wx2_statdict[wx][statkey] if wx in wx2_statdict and statkey in wx2_statdict[wx] else -1
            stat_list = np.array([wx2_metric(wx) for wx in wx_list])
        else:
            wx2_metric = metrics.__dict__[key]
            stat_list = np.array([wx2_metric[wx] for wx in wx_list])
        stat_list = ut.negative_minclamp_inplace(stat_list)
    except Exception as ex:
        ut.printex(ex, keys=['key'])
    return stat_list


[docs]def compute_word_metrics(invindex):
    invindex.idx2_wxs = np.array(invindex.idx2_wxs)
    wx2_idxs  = invindex.wx2_idxs
    idx2_dvec = invindex.idx2_dvec
    words     = invindex.words
    wx2_pdist = {}
    wx2_wdist = {}
    wx2_nMembers = {}
    wx2_pdist_stats = {}
    wx2_wdist_stats = {}
    wordidx_iter = ut.progiter(six.iteritems(wx2_idxs), lbl='Word Dists: ', num=len(wx2_idxs), freq=200)

    for _item in wordidx_iter:
        wx, idxs = _item
        dvecs = idx2_dvec.take(idxs, axis=0)
        word = words[wx:wx + 1]
        wx2_pdist[wx] = spdist.pdist(dvecs)  # pairwise dist between words
        wx2_wdist[wx] = ut.euclidean_dist(dvecs, word)  # dist to word center
        wx2_nMembers[wx] = len(idxs)

    for wx, pdist in ut.progiter(six.iteritems(wx2_pdist), lbl='Word pdist Stats: ', num=len(wx2_idxs), freq=2000):
        wx2_pdist_stats[wx] = ut.get_stats(pdist)

    for wx, wdist in ut.progiter(six.iteritems(wx2_wdist), lbl='Word wdist Stats: ', num=len(wx2_idxs), freq=2000):
        wx2_wdist_stats[wx] = ut.get_stats(wdist)

    ut.print_stats(wx2_nMembers.values(), 'word members')
    metrics = Metrics(wx2_nMembers, wx2_pdist_stats, wx2_wdist_stats)
    return metrics
    #word_pdist = spdist.pdist(invindex.words)


[docs]def draw_scatterplot(figdir, ibs, datax, datay, xlabel, ylabel, color, fnum=None):
    from plottool import df2
    datac = [color for _ in range(len(datax))]
    assert len(datax) == len(datay), '%r %r' % (len(datax), len(datay))
    df2.figure(fnum=fnum, doclf=True, docla=True)
    df2.plt.scatter(datax, datay,  c=datac, s=20, marker='o', alpha=.9)
    ax = df2.gca()
    title = '%s vs %s.\nnWords=%r. db=%r' % (xlabel, ylabel, len(datax), ibs.get_dbname())
    df2.set_xlabel(xlabel)
    df2.set_ylabel(ylabel)
    ax.set_ylim(min(datay) - 1, max(datay) + 1)
    ax.set_xlim(min(datax) - 1, max(datax) + 1)
    df2.dark_background()
    df2.set_figtitle(title)
    figpath = join(figdir, title)
    df2.save_figure(fnum, figpath)


[docs]def dump_word_patches(ibs, vocabdir, invindex, wx_sample, metrics):
    """
    Dumps word member patches to disk
    """
    wx2_dpath = get_word_dpaths(vocabdir, wx_sample, metrics)

    # Write each patch from each annotation to disk
    idx2_daid = invindex.idx2_daid
    daids = invindex.daids
    idx2_dfx = invindex.idx2_dfx
    #maws_list = invindex.idx2_wxs[idxs]

    # Loop over all annotations skipping the ones without any words in the sample
    ax2_idxs = [np.where(idx2_daid == aid_)[0] for aid_ in ut.progiter(daids, 'Building Forward Index: ', freq=100)]
    patchdump_iter = ut.progiter(zip(daids, ax2_idxs), freq=1,
                                    lbl='Dumping Selected Patches: ', num=len(daids))
    for aid, idxs in patchdump_iter:
        wxs_list  = invindex.idx2_wxs[idxs]
        if len(set(ut.flatten(wxs_list)).intersection(set(wx_sample))) == 0:
            # skip this annotation
            continue
        fx_list   = idx2_dfx[idxs]
        chip      = ibs.get_annot_chips(aid)
        chip_kpts = ibs.get_annot_kpts(aid)
        nid       = ibs.get_annot_name_rowids(aid)
        patches, subkpts = ptool.get_warped_patches(chip, chip_kpts)
        for fx, wxs, patch in zip(fx_list, wxs_list, patches):
            assert len(wxs) == 1, 'did you multiassign the database? If so implement it here too'
            for k, wx in enumerate(wxs):
                if wx not in wx_sample:
                    continue
                patch_fname = 'patch_nid=%04d_aid=%04d_fx=%04d_k=%d' % (nid, aid, fx, k)
                fpath = join(wx2_dpath[wx], patch_fname)
                #gtool.imwrite(fpath, patch, fallback=True)
                gtool.imwrite_fallback(fpath, patch)


[docs]def get_word_dname(wx, metrics):
    stats_ = metrics.wx2_wdist_stats[wx]
    wname_clean = 'wx=%06d' % wx
    stats1 = 'max={max},min={min},mean={mean},'.format(**stats_)
    stats2 = 'std={std},nMaxMin=({nMax},{nMin}),shape={shape}'.format(**stats_)
    fname_fmt = wname_clean + '_{stats1}{stats2}'
    fmt_dict = dict(stats1=stats1, stats2=stats2)
    word_dname = ut.long_fname_format(fname_fmt, fmt_dict, ['stats2', 'stats1'], max_len=250, hashlen=4)
    return word_dname


[docs]def get_word_dpaths(vocabdir, wx_sample, metrics):
    """
    Gets word folder names and ensure they exist
    """
    ut.ensuredir(vocabdir)
    wx2_dpath = {wx: join(vocabdir, get_word_dname(wx, metrics)) for wx in wx_sample}
    iter_ = ut.progiter(six.itervalues(wx2_dpath), lbl='Ensuring word_dpath: ', freq=200)
    for dpath in iter_:
        ut.ensuredir(dpath)
    return wx2_dpath


[docs]def select_by_metric(wx2_metric, per_quantile=20):
    # sample a few words around the quantile points
    metric_list = np.array(list(wx2_metric.values()))
    wx_list = np.array(list(wx2_metric.keys()))
    metric_quantiles = spms.mquantiles(metric_list)
    metric_quantiles = np.array(metric_quantiles.tolist() + [metric_list.max(), metric_list.min()])
    wx_interest = []
    for scalar in metric_quantiles:
        dist = (metric_list - scalar) ** 2
        wx_quantile = wx_list[dist.argsort()[0:per_quantile]]
        wx_interest.extend(wx_quantile.tolist())
    overlap = len(wx_interest) - len(set(wx_interest))
    if overlap > 0:
        print('warning: overlap=%r' % overlap)
    return wx_interest


[docs]def get_metric(metrics, tupkey, statkey=None):
    wx2_metric = metrics.__dict__[tupkey]
    if statkey is not None:
        wx2_submetric = [stats_[statkey] for wx, stats_ in six.iteritems(wx2_metric) if statkey in stats_]
        return wx2_submetric
    return wx2_metric

#{wx: pdist_stats['max'] for wx, pdist_stats in six.iteritems(wx2_pdist_stats) if 'max' in pdist_stats}
#wx2_wrad = {wx: wdist_stats['max'] for wx, wdist_stats in six.iteritems(wx2_wdist_stats) if 'max' in wdist_stats}


[docs]def make_scatterplots(ibs, figdir, invindex, metrics):
    from plottool import draw_func2 as df2
    wx2_pdist_stats = metrics.wx2_pdist_stats
    wx2_wdist_stats = metrics.wx2_pdist_stats
    wx2_nMembers = metrics.wx2_nMembers

    def wx2_avepdist(wx):
        return wx2_pdist_stats[wx]['mean'] if wx in wx2_pdist_stats and 'mean' in wx2_pdist_stats[wx] else -1
    def wx2_avewdist(wx):
        return wx2_wdist_stats[wx]['mean'] if wx in wx2_wdist_stats and 'mean' in wx2_wdist_stats[wx] else -1

    wx2_idf = invindex.wx2_idf

    # data
    wx_list  =  list(wx2_idf.keys())
    idf_list =  [wx2_idf[wx] for wx in wx_list]
    nPoints_list =  [wx2_nMembers[wx] if wx in wx2_nMembers else -1 for wx in wx_list]
    avepdist_list = [wx2_avepdist(wx) for wx in wx_list]
    avewdist_list = [wx2_avewdist(wx) for wx in wx_list]

    df2.reset()
    draw_scatterplot(figdir, ibs, idf_list, avepdist_list, 'idf', 'mean(pdist)', df2.WHITE, fnum=1)
    draw_scatterplot(figdir, ibs, idf_list, avewdist_list, 'idf', 'mean(wdist)', df2.PINK, fnum=3)
    draw_scatterplot(figdir, ibs, nPoints_list, avepdist_list, 'nPointsInWord', 'mean(pdist)', df2.GREEN, fnum=2)
    draw_scatterplot(figdir, ibs, avepdist_list, avewdist_list, 'mean(pdist)', 'mean(wdist)', df2.YELLOW, fnum=4)
    draw_scatterplot(figdir, ibs, nPoints_list, avewdist_list, 'nPointsInWord', 'mean(wdist)', df2.ORANGE, fnum=5)
    draw_scatterplot(figdir, ibs, idf_list, nPoints_list, 'idf', 'nPointsInWord', df2.LIGHT_BLUE, fnum=6)
    #df2.present()


[docs]def make_wordfigures(ibs, metrics, invindex, figdir, wx_sample, wx2_dpath):
    """
    Builds mosaics of patches assigned to words in sample
    ouptuts them to disk
    """
    from plottool import draw_func2 as df2
    import vtool as vt
    import parse

    vocabdir = join(figdir, 'vocab_patches2')
    ut.ensuredir(vocabdir)
    dump_word_patches(ibs, vocabdir, invindex, wx_sample, metrics)

    # COLLECTING PART --- collects patches in word folders
    #vocabdir

    seldpath = vocabdir + '_selected'
    ut.ensurepath(seldpath)
    # stack for show
    for wx, dpath in ut.progiter(six.iteritems(wx2_dpath), lbl='Dumping Word Images:', num=len(wx2_dpath), freq=1, backspace=False):
        #df2.rrr()
        fpath_list = ut.ls(dpath)
        fname_list = [basename(fpath_) for fpath_ in fpath_list]
        patch_list = [gtool.imread(fpath_) for fpath_ in fpath_list]
        # color each patch by nid
        nid_list = [int(parse.parse('{}_nid={nid}_{}', fname)['nid']) for fname in fname_list]
        nid_set = set(nid_list)
        nid_list = np.array(nid_list)
        if len(nid_list) == len(nid_set):
            # no duplicate names
            newpatch_list = patch_list
        else:
            # duplicate names. do coloring
            sortx = nid_list.argsort()
            patch_list = np.array(patch_list, dtype=object)[sortx]
            fname_list = np.array(fname_list, dtype=object)[sortx]
            nid_list = nid_list[sortx]
            colors = (255 * np.array(df2.distinct_colors(len(nid_set)))).astype(np.int32)
            color_dict = dict(zip(nid_set, colors))
            wpad, hpad = 3, 3
            newshape_list = [tuple((np.array(patch.shape) + (wpad * 2, hpad * 2, 0)).tolist()) for patch in patch_list]
            color_list = [color_dict[nid_] for nid_ in nid_list]
            newpatch_list = [np.zeros(shape) + color[None, None] for shape, color in zip(newshape_list, color_list)]
            for patch, newpatch in zip(patch_list, newpatch_list):
                newpatch[wpad:-wpad, hpad:-hpad, :] = patch
            #img_list = patch_list
            #bigpatch = vt.stack_image_recurse(patch_list)
        #bigpatch = vt.stack_image_list(patch_list, vert=False)
        bigpatch = vt.stack_square_images(newpatch_list)
        bigpatch_fpath = join(seldpath, basename(dpath) + '_patches.png')

        #
        def _dictstr(dict_):
            str_ = ut.dict_str(dict_, newlines=False)
            str_ = str_.replace('\'', '').replace(': ', '=').strip('{},')
            return str_

        figtitle = '\n'.join([
            'wx=%r' % wx,
            'stat(pdist): %s' % _dictstr(metrics.wx2_pdist_stats[wx]),
            'stat(wdist): %s' % _dictstr(metrics.wx2_wdist_stats[wx]),
        ])
        metrics.wx2_nMembers[wx]

        df2.figure(fnum=1, doclf=True, docla=True)
        fig, ax = df2.imshow(bigpatch, figtitle=figtitle)
        #fig.show()
        df2.set_figtitle(figtitle)
        df2.adjust_subplots(top=.878, bottom=0)
        df2.save_figure(1, bigpatch_fpath)
        #gtool.imwrite(bigpatch_fpath, bigpatch)


[docs]def get_cached_vocabs():
    import parse
    # Parse some of the training data from fname
    parse_str = '{}nC={num_cent},{}_DPTS(({num_dpts},{dim}){}'
    smkdir = ut.get_app_resource_dir('smk')
    fname_list = ut.glob(smkdir, 'akmeans*')
    fpath_list = [join(smkdir, fname) for fname in fname_list]
    result_list = [parse.parse(parse_str, fpath) for fpath in fpath_list]
    nCent_list = [int(res['num_cent']) for res in result_list]
    nDpts_list = [int(res['num_dpts']) for res in result_list]
    key_list = zip(nCent_list, nDpts_list)
    fpath_sorted = ut.sortedby(fpath_list, key_list, reverse=True)
    return fpath_sorted


[docs]def view_vocabs():
    """
    looks in vocab cachedir and prints info / vizualizes the vocabs using PCA

    CommandLine:
        python -m ibeis.model.hots.smk.smk_plots --test-view_vocabs --show

    Example:
        >>> # DISABLE_DOCTEST
        >>> from ibeis.model.hots.smk.smk_plots import *  # NOQA
        >>> # build test data
        >>> # execute function
        >>> view_vocabs()
        >>> ut.quit_if_noshow()
        >>> ut.show_if_requested()
    """
    from vtool import clustering2 as clustertool
    import numpy as np

    fpath_sorted = get_cached_vocabs()

    num_pca_dims = 2  # 3
    whiten       = False
    kwd = dict(num_pca_dims=num_pca_dims,
               whiten=whiten,)

    def view_vocab(fpath):
        # QUANTIZED AND FLOATING POINT STATS
        centroids = ut.load_cPkl(fpath)
        print('viewing vocat fpath=%r' % (fpath,))
        smk_debug.vector_stats(centroids, 'centroids')
        #centroids_float = centroids.astype(np.float64) / 255.0
        centroids_float = centroids.astype(np.float64) / 512.0
        smk_debug.vector_stats(centroids_float, 'centroids_float')

        fig = clustertool.plot_centroids(centroids, centroids, labels='centroids',
                                         fnum=1, prefix='centroid vecs\n', **kwd)
        fig.show()

    for count, fpath in enumerate(fpath_sorted):
        if count > 0:
            break
        view_vocab(fpath)


[docs]def plot_chip_metric(ibs, aid, metric=None, fnum=1, lbl='', figtitle='', colortype='score',
                     darken=.5, cmap_='hot', reverse_cmap=False, **kwargs):
    """
    Plots one annotation with one metric.

    The word metric is used liberally.

    Example:
        >>> from ibeis.model.hots.smk.smk_plots import *  # NOQA
        >>> from ibeis.model.hots.smk import smk_debug
        >>> from ibeis.model.hots.smk import smk_plots
        >>> from ibeis.model.hots.smk import smk_repr
        >>> #tup = smk_debug.testdata_raw_internals0(db='GZ_ALL', nWords=64000)
        >>> #tup = smk_debug.testdata_raw_internals0(db='GZ_ALL', nWords=8000)
        >>> #tup = smk_debug.testdata_raw_internals0(db='PZ_Master0', nWords=64000)
        >>> tup = smk_debug.testdata_raw_internals0(db='PZ_Mothers', nWords=8000)
        >>> ibs, annots_df, daids, qaids, invindex, qreq_ = tup
        >>> smk_repr.compute_data_internals_(invindex, qreq_.qparams, delete_rawvecs=False)
        >>> invindex.idx2_wxs = np.array(invindex.idx2_wxs)
        >>> metric = None
        >>> aid = 1
        >>> fnum = 0
        >>> lbl='test'
        >>> colortype='score'
        >>> kwargs = {'annote': False}
        #>>> df2.rrr()
        >>> smk_plots.plot_chip_metric(ibs, aid, metric, fnum, lbl, colortype, **kwargs)
        >>> df2.present()
    """
    import plottool.draw_func2 as df2
    from ibeis.viz import viz_chip
    df2.figure(fnum=fnum, doclf=True, docla=True)
    if metric is not None:
        if  colortype == 'score':
            colors = df2.scores_to_color(metric, cmap_=cmap_, reverse_cmap=reverse_cmap)
        elif colortype == 'label':
            colors = df2.label_to_colors(metric)
        elif colortype == 'custom':
            # Give ranks of -1 and -2 special meaning
            val2_customcolor = {
                -1: df2.UNKNOWN_PURP,
                -2: df2.LIGHT_BLUE,
            }
            # Inconsistent but visable colors
            scale_max = .7
            #consistent colors (needs to know highest K)
            #maxval = np.array(metric).max()
            #scale_max = .7 * (float(maxval) / 20.0)
            colors = df2.scores_to_color(metric, cmap_=cmap_,
                                         reverse_cmap=reverse_cmap,
                                         scale_max=scale_max,
                                         val2_customcolor=val2_customcolor)
        else:
            raise ValueError('no known colortype = %r' % (colortype,))
    else:
        colors = 'distinct'
    viz_chip.show_chip(ibs, aid, color=colors, darken=darken,
                       ell_alpha=.8,
                       #ell_linewidth=4,
                       ell_linewidth=2,
                       **kwargs)
    df2.set_figtitle(figtitle)
    if metric is not None:
        cb = df2.colorbar(metric, colors, custom=(colortype == 'custom'))
        cb.set_label(lbl)


[docs]def get_qres_and_closest_valid_k(ibs, aid, K=4):
    """
    Example:
        >>> from ibeis.model.hots.smk.smk_plots import *  # NOQA
        >>> import numpy as np
        >>> from ibeis.model.hots import query_request
        >>> import ibeis
        >>> ibs = ibeis.opendb('testdb1')
        >>> aid = 2
    """
    # FIXME: Put query_cfg into the qreq_ structure by itself.
    # Don't change the IBEIS Structure
    cfgdict = {
        'pipeline_root': 'vsmany',
        'with_metadata': True,
        'K': K,
        #'sv_on': False,
        'sv_on': True,
        #K=4
    }
    #ibs.cfg.query_cfg.pipeline_root = 'vsmany'
    #ibs.cfg.query_cfg.with_metadata = True
    qaid2_qres, qreq_ = ibs.query_chips([aid], ibs.get_valid_aids(), use_cache=False, return_request=True, cfgdict=cfgdict)
    indexer = qreq_.indexer
    qres = qaid2_qres[aid]
    return qres, None
    (qfx2_idx, qfx2_dist) = qres.metadata['nns']
    nid = ibs.get_annot_name_rowids(aid)
    qfx2_aids = indexer.get_nn_aids(qfx2_idx)
    qfx2_nids = ibs.get_annot_name_rowids(qfx2_aids)
    qfx2_isself  = qfx2_aids != aid
    qfx2_correct = np.logical_and(qfx2_nids == nid, qfx2_isself)
    # Mark the top ranked groundtruth
    qfx2_valid_ks = [np.flatnonzero(ranks) for ranks in qfx2_correct]
    NO_VALID_RANKS_CODE = -2
    POSSIBLY_VALID_RANKS_CODE = -1
    qfx2_closest_k = [ks[0] if len(ks) > 0 else NO_VALID_RANKS_CODE for ks in qfx2_valid_ks]
    # Mark cases where it is not possible to know the groundtruth
    qfx2_isimpossible = np.logical_and(qfx2_nids < 0, qfx2_isself)
    qfx2_possibly_impossible_ks = [np.flatnonzero(ranks) for ranks in qfx2_isimpossible]
    # Mark as POSSIBLY_VALID_RANKS_CODE if there is no best k
    #
    def is_possible(k, pi_ks):
        ERR_ON_THE_SIDE_OF_THE_IMPOSSIBLE = False
        if len(pi_ks) == 0:
            return False
        elif k == NO_VALID_RANKS_CODE:
            return True
        elif ERR_ON_THE_SIDE_OF_THE_IMPOSSIBLE and pi_ks[0] < k:
            return True
        else:
            return False

    qfx2_closest_k2 = [POSSIBLY_VALID_RANKS_CODE if is_possible(k, pi_ks) else k
                       for pi_ks, k in zip(qfx2_possibly_impossible_ks, qfx2_closest_k)]
    return qres, qfx2_closest_k2


[docs]def viz_annot_with_metrics(ibs, invindex, aid, metrics,
                           metric_keys=['wx2_nMembers',
                                        ('wx2_pdist_stats', 'mean'),
                                        ('wx2_wdist_stats', 'mean')],
                           show_orig=True,
                           show_idf=True,
                           show_words=False,
                           show_analysis=True,
                           show_aveprecision=True,
                           show_featweights=True,
                           qfx2_closest_k_list=None,
                           show_word_correct_assignments=False,
                           qres_list=None):
    """
    Args:
        ibs (IBEISController):
        invindex (InvertedIndex): object for fast vocab lookup
        aid (int):
        metrics (namedtuple):

    Example:
        >>> from ibeis.model.hots.smk.smk_plots import *  # NOQA
        >>> from ibeis.model.hots.smk import smk_debug
        >>> from ibeis.model.hots.smk import smk_repr
        >>> #tup = smk_debug.testdata_raw_internals0(db='GZ_ALL', nWords=64000)
        >>> #tup = smk_debug.testdata_raw_internals0(db='GZ_ALL', nWords=8000)
        >>> #tup = smk_debug.testdata_raw_internals0(db='PZ_Master0', nWords=64000)
        >>> tup = smk_debug.testdata_raw_internals0(db='PZ_Mothers', nWords=8000)
        >>> ibs, annots_df, daids, qaids, invindex, qreq_ = tup
        >>> smk_repr.compute_data_internals_(invindex, qreq_.qparams, delete_rawvecs=False)
        >>> invindex.idx2_wxs = np.array(invindex.idx2_wxs)
        >>> metric_keys=['wx2_nMembers', ('wx2_pdist_stats', 'mean'), ('wx2_wdist_stats', 'mean')]
        >>> metrics = compute_word_metrics(invindex)
        >>> aid = 1

    """
    #viz_chip.rrr()
    #df2.rrr()
    kpts = ibs.get_annot_kpts(aid)
    if ut.VERBOSE:
        ut.super_print(kpts)

    if show_word_correct_assignments or show_idf:
        # Get only the first assigned word
        # FIXME: need to look at multi-assignment
        _mask = invindex.idx2_daid == aid
        fxs = invindex.idx2_dfx[_mask]
        wxs = invindex.idx2_wxs[_mask].T[0].T

        assert len(fxs) == len(kpts)
        assert len(fxs) == len(wxs)

    fnum = 1

    dbname = ibs.get_dbname()
    def _plot(metric, fnum=1, lbl='', annote=True, darken=.1, colortype='score', **kwargs):
        print('ploting fnum=%r' % fnum)
        #lblaug = ' db=%r, nWords = %r' % (dbname, nWords)
        lblaug = ' db=%r' % (dbname)
        figtitle = lbl + lblaug
        lbl = lbl
        plot_chip_metric(ibs, aid, metric=metric, fnum=fnum, lbl=lbl, figtitle=figtitle,
                         annote=annote, darken=darken, colortype=colortype, **kwargs)
        return fnum + 1

    # Original Plot
    if show_orig:
        fnum = _plot(None, fnum=fnum, lbl='Orig Chip', annote=False, darken=None)

    # IDF Plot
    if show_idf:
        idf_list = np.array(list(ut.dict_take_gen(invindex.wx2_idf, wxs)))
        fnum = _plot(idf_list, fnum=fnum, lbl='IDF')
        print('stats(idf_list) = ' + ut.get_stats_str(idf_list))

    # Word Plot
    if show_words:
        fnum = _plot(wxs, fnum=fnum, lbl='Words', colortype='label')

    # LNBNN Result Plots
    if qfx2_closest_k_list is not None:
        for qres, qfx2_closest_k in zip(qres_list, qfx2_closest_k_list):
            print('  --- qres item ---')
            if qres is not None:
                from ibeis.model.hots.hots_query_result import QueryResult
                assert isinstance(qres, QueryResult)
                if show_analysis:
                    qres.show_analysis(ibs=ibs, fnum=fnum, figtitle=qres.make_smaller_title())
                    fnum += 1
                if show_aveprecision:
                    qres.show_precision_recall_curve(ibs=ibs, fnum=fnum)
                    fnum += 1

            if qfx2_closest_k is not None:
                # Plot ranked positions
                qfx2_closest_k = np.array(qfx2_closest_k)
                qfx2_closest_k_qeq0 = qfx2_closest_k[qfx2_closest_k >= 0]
                qfx2_closest_k_lt0  = qfx2_closest_k[qfx2_closest_k < 0]
                print('stats(qfx2_closest_k_qeq0) = ' + ut.get_stats_str(qfx2_closest_k_qeq0))
                print('stats(qfx2_closest_k_lt0)  = ' + ut.get_stats_str(qfx2_closest_k_lt0))
                fnum = _plot(qfx2_closest_k, fnum=fnum, lbl='Correct Ranks ' + qres.make_smaller_title(), colortype='custom', reverse_cmap=True)

    # Correct word assignment plots
    if show_word_correct_assignments:
        unique_wxs, unique_inverse = np.unique(wxs, return_inverse=True)
        # Get the aids that belong to each word
        _idxs_list = ut.dict_take(invindex.wx2_idxs, unique_wxs)
        _aids_list = [invindex.idx2_daid.take(idxs) for idxs in _idxs_list]
        # Check if this word will provide a correct assignment -
        # two ground truth chip exist within the same word
        gt_aids = np.array(ibs.get_annot_groundtruth(aid))
        _hastp_list = np.array([len(np.intersect1d(aids, gt_aids)) > 0 for aids in _aids_list])
        # Map back to the space of features
        # mark each feature match as having a correct word mapping or not
        hascorrectmatch = _hastp_list[unique_inverse]
        hascorrectmatch_ = hascorrectmatch.astype(np.int32) * 3 - 2
        lbl = 'Correct Words ' + qres.make_smaller_title() + '\n Yellow means the word contains a correct match in the word\'s invindex. Blue is the opposite.'
        fnum = _plot(hascorrectmatch_, fnum=fnum, lbl=lbl, colortype='custom', reverse_cmap=False)

    # Feature Weight Plots
    if show_featweights:
        from ibeis.model.preproc import preproc_featweight
        featweights = preproc_featweight.compute_fgweights(ibs, [aid])[0]
        # plot rf feature weights
        detect_cfgstr = ibs.cfg.detect_cfg.get_cfgstr()
        fnum = _plot(featweights, fnum=fnum, lbl='Feature Weights ' + detect_cfgstr, colortype='score')

    # Word Metric Plots
    for count, metrickey in enumerate(metric_keys):
        if isinstance(metrickey, tuple):
            #lbl = repr(metrickey)
            def fixstr(str_):
                return str_.replace('wx2_', '').replace('_stats', '')
            lbl = '%s(%s)' % (metrickey[1].upper(), fixstr(metrickey[0]))
        else:
            lbl = str(metrickey)
        metric_list = metric_clamped_stat(metrics, wxs, metrickey)
        fnum = _plot(metric_list, fnum=fnum, lbl=lbl)


[docs]def smk_plots_main():
    """
    smk
    python smk_plots.py --db PZ_MTEST --notoolbar

    CommandLine:
        python -m ibeis.model.hots.smk.smk_plots --test-smk_plots_main
        python -m ibeis.model.hots.smk.smk_plots --test-smk_plots_main --db PZ_MTEST --notoolbar

    Example:
        >>> # DISABLE_DOCTEST
        >>> from ibeis.model.hots.smk.smk_plots import *  # NOQA
        >>> smk_plots_main()
    """
    from ibeis.model.hots.smk import smk_plots
    import utool as ut
    #from plottool import draw_func2 as df2
    kwargs = {
        #'db': 'GZ_ALL',
        #'db': 'PZ_MTEST',
        'db': ut.get_argval('--db', str, default='testdb1'),
        'nWords': ut.get_argval('--nWords', int, default=8000),
        'delete_rawvecs': False,
    }
    (ibs, annots_df, daids, qaids, invindex, qreq_) = smk_debug.testdata_internals_full(**kwargs)
    kwargs = {}
    aid = 3
    #try:
    #    testdata = ('metrics',)
    #    metrics = ut.load_testdata(*testdata)
    #except Exception as ex:
    metrics = smk_plots.compute_word_metrics(invindex)
    #ut.save_testdata(*testdata)
    valid_aids =  ibs.get_valid_aids()

    # HACK
    if ibs.get_dbname().startswith('GZ_'):
        ibs.cfg.detect_cfg.species_text = 'zebra_grevys'
    else:
        ibs.cfg.detect_cfg.species_text = 'zebra_plains'

    # Define the plots you want

    startx = ut.get_argval(('--startx', '--x'), int, default=min(18, len(valid_aids) - 1))

    for aid in ut.InteractiveIter(valid_aids, startx=startx):
        #df2.rrr()
        #smk_plots.rrr()
        print('[smk_plot] visualizing annotation aid=%r' % (aid,))
        kwargs = smk_plots.main_options()
        qres_list = []
        qfx2_closest_k_list = []
        K_list = kwargs.pop('K_list')
        for K in K_list:
            qres, qfx2_closest_k = smk_plots.get_qres_and_closest_valid_k(ibs, aid, K=K)
            qres_list.append(qres)
            qfx2_closest_k_list.append(qfx2_closest_k)
        smk_plots.viz_annot_with_metrics(ibs, invindex, aid, metrics,
                                         qfx2_closest_k_list=qfx2_closest_k_list,
                                         qres_list=qres_list, **kwargs)
        smk_plots.present()
    #return execstr


[docs]def present():
    # In its own function for reloadableness
    from plottool import draw_func2 as df2
    return df2.present(max_rows=4, row_first=False)


[docs]def main_options():
    metric_keys = [
        #'wx2_nMembers',
        #('wx2_pdist_stats', 'mean'),
        #('wx2_wdist_stats', 'mean'),
    ]

    kwargs = dict(
        show_orig=False,
        show_idf=False,
        show_words=False,
        show_analysis=True,
        show_aveprecision=False,
        show_featweights=False,
        show_word_correct_assignments=True,
        metric_keys=metric_keys,
        K_list=[2, 4, 10],
        #K_list=[10, 20],
        #K_list=[4, 10],
    )
    return kwargs


#if __name__ == '__main__':
#    """
#    >>> aid = 1
#    """
#    execstr = smk_plots_main()
#    #exec(execstr)

if __name__ == '__main__':
    """
    python -m ibeis.model.hots.smk.smk_plots --test-view_vocabs --show
    python -m ibeis.model.hots.smk.smk_debug --test-main_smk_debug
    python -m ibeis.model.hots.smk.smk_plots --test-smk_plots_main --db PZ_MTEST --notoolbar

    CommandLine:
        python -m ibeis.model.hots.smk.smk_plots
        python -m ibeis.model.hots.smk.smk_plots --allexamples
        python -m ibeis.model.hots.smk.smk_plots --allexamples --noface --nosrc
    """
    import multiprocessing
    multiprocessing.freeze_support()  # for win32
    import utool as ut  # NOQA
    ut.doctest_funcs()