Source code for ibeis.scripts.name_recitifer

# -*- coding: utf-8 -*-
from __future__ import absolute_import, division, print_function, unicode_literals
import utool as ut


[docs]def rectify_names(ibs, aid_list=None, old_img2_names=None, hack_prefix=''): r""" Changes the names in the IA-database to correspond to an older naming convention. If splits and merges were preformed tries to find the maximally consistent renaming scheme. Args: ibs (ibeis.IBEISController): image analysis api aid_list (list): list of annotation rowids img_list (list): name_list (list): (default = None) CommandLine: python -m ibeis.scripts.name_recitifer rectify_names --show Example: >>> # DISABLE_DOCTEST >>> from ibeis.scripts.name_recitifer import * # NOQA >>> import ibeis >>> ibs = ibeis.opendb(defaultdb='testdb1') >>> aid_list = None >>> hack_prefix = '' >>> old_img2_names = None #['img_fred.png', '] >>> result = rectify_names(ibs, aid_list, img_list, name_list) """ if aid_list is None: aid_list = ibs.get_valid_aids() # Group annotations by their current IA-name nid_list = ibs.get_annot_name_rowids(aid_list) nid2_aids = ut.group_items(aid_list, nid_list) unique_nids = list(nid2_aids.keys()) grouped_aids = list(nid2_aids.values()) # Get grouped images grouped_imgnames = ibs.unflat_map(ibs.get_annot_image_names, grouped_aids) # Assume a mapping from old image names to old names is given. # Or just hack it in the Lewa case. if old_img2_names is None: def hackkey(gname): from os.path import splitext gname_, ext = splitext(gname) gname_.lstrip(hack_prefix) return gname_ # Create mapping from image name to the desired "name" for the image. old_img2_names = {gname: hackkey(gname) for gname in ut.flatten(grouped_imgnames)} # Find which old names correspond to the current IA-name grouping grouped_oldnames = [ut.take(old_img2_names, gnames) for gnames in grouped_imgnames] # The task is now to map each name in unique_nids to one of these names # subject to the contraint that each name can only be used once. # This is solved using a maximum bipartite matching. The new names are the left nodes, # the old name are the right nodes, and grouped_oldnames definse the adjacency matrix. # NOTE: In rare cases it may be impossible to find a correct labeling using # only old names. In this case new names will be created. new_name_text = find_consistent_labeling(grouped_oldnames) dry = False if not dry: # Update the state of the image analysis database ibs.set_name_texts(unique_nids, new_name_text)
[docs]def find_consistent_labeling(grouped_oldnames): """ Solves a a maximum bipirtite matching problem to find a consistent name assignment. Notes: # Install module containing the Hungarian algorithm for matching pip install munkres Example: >>> # DISABLE_DOCTEST >>> from ibeis.scripts.name_recitifer import * # NOQA >>> grouped_oldnames = [['a', 'b'], ['b', 'c'], ['c', 'a', 'a']] >>> new_names = find_consistent_labeling(grouped_oldnames) >>> print(new_names) [u'b', u'c', u'a'] Example: >>> # DISABLE_DOCTEST >>> from ibeis.scripts.name_recitifer import * # NOQA >>> grouped_oldnames = [['a', 'b', 'c'], ['b', 'c'], ['c', 'e', 'e']] >>> new_names = find_consistent_labeling(grouped_oldnames) >>> print(new_names) [u'a', u'b', u'e'] Example: >>> # DISABLE_DOCTEST >>> from ibeis.scripts.name_recitifer import * # NOQA >>> grouped_oldnames = [['a', 'b'], ['a', 'a', 'b'], ['a']] >>> new_names = find_consistent_labeling(grouped_oldnames) >>> print(new_names) [u'a', u'b', u'e'] """ import numpy as np try: import munkres except ImportError: print('Need to install Hungrian algorithm bipartite matching solver.') print('Run:') print('pip install munkres') raise unique_old_names = ut.unique(ut.flatten(grouped_oldnames)) num_new_names = len(grouped_oldnames) num_old_names = len(unique_old_names) extra_oldnames = [] # Create padded dummy values. This accounts for the case where it is # impossible to uniquely map to the old db num_extra = num_new_names - num_old_names if num_extra > 0: extra_oldnames = ['_extra_name%d' % (count,) for count in range(num_extra)] elif num_extra < 0: pass else: extra_oldnames = [] assignable_names = unique_old_names + extra_oldnames total = len(assignable_names) # Allocate assignment matrix profit_matrix = np.zeros((total, total), dtype=np.int) # Populate assignment profit matrix oldname2_idx = ut.make_index_lookup(assignable_names) name_freq_list = [ut.dict_hist(names) for names in grouped_oldnames] for rowx, name_freq in enumerate(name_freq_list): for name, freq in name_freq.items(): colx = oldname2_idx[name] profit_matrix[rowx, colx] += freq # Add extra profit for using a previously used name profit_matrix[profit_matrix > 0] += 2 # Add small profit for using an extra name extra_colxs = ut.take(oldname2_idx, extra_oldnames) profit_matrix[:, extra_colxs] += 1 # Convert to minimization problem big_value = (profit_matrix.max()) cost_matrix = big_value - profit_matrix m = munkres.Munkres() indexes = m.compute(cost_matrix) # Map output to be aligned with input rx2_cx = dict(indexes) assignment = [assignable_names[rx2_cx[rx]] for rx in range(num_new_names)] return assignment