# -*- coding: utf-8 -*-
from __future__ import absolute_import, division, print_function, unicode_literals
import utool as ut
[docs]def rectify_names(ibs, aid_list=None, old_img2_names=None, hack_prefix=''):
r"""
Changes the names in the IA-database to correspond to an older naming
convention. If splits and merges were preformed tries to find the
maximally consistent renaming scheme.
Args:
ibs (ibeis.IBEISController): image analysis api
aid_list (list): list of annotation rowids
img_list (list):
name_list (list): (default = None)
CommandLine:
python -m ibeis.scripts.name_recitifer rectify_names --show
Example:
>>> # DISABLE_DOCTEST
>>> from ibeis.scripts.name_recitifer import * # NOQA
>>> import ibeis
>>> ibs = ibeis.opendb(defaultdb='testdb1')
>>> aid_list = None
>>> hack_prefix = ''
>>> old_img2_names = None #['img_fred.png', ']
>>> result = rectify_names(ibs, aid_list, img_list, name_list)
"""
if aid_list is None:
aid_list = ibs.get_valid_aids()
# Group annotations by their current IA-name
nid_list = ibs.get_annot_name_rowids(aid_list)
nid2_aids = ut.group_items(aid_list, nid_list)
unique_nids = list(nid2_aids.keys())
grouped_aids = list(nid2_aids.values())
# Get grouped images
grouped_imgnames = ibs.unflat_map(ibs.get_annot_image_names, grouped_aids)
# Assume a mapping from old image names to old names is given.
# Or just hack it in the Lewa case.
if old_img2_names is None:
def hackkey(gname):
from os.path import splitext
gname_, ext = splitext(gname)
gname_.lstrip(hack_prefix)
return gname_
# Create mapping from image name to the desired "name" for the image.
old_img2_names = {gname: hackkey(gname) for gname in ut.flatten(grouped_imgnames)}
# Find which old names correspond to the current IA-name grouping
grouped_oldnames = [ut.take(old_img2_names, gnames) for gnames in grouped_imgnames]
# The task is now to map each name in unique_nids to one of these names
# subject to the contraint that each name can only be used once.
# This is solved using a maximum bipartite matching. The new names are the left nodes,
# the old name are the right nodes, and grouped_oldnames definse the adjacency matrix.
# NOTE: In rare cases it may be impossible to find a correct labeling using
# only old names. In this case new names will be created.
new_name_text = find_consistent_labeling(grouped_oldnames)
dry = False
if not dry:
# Update the state of the image analysis database
ibs.set_name_texts(unique_nids, new_name_text)
[docs]def find_consistent_labeling(grouped_oldnames):
"""
Solves a a maximum bipirtite matching problem to find a consistent
name assignment.
Notes:
# Install module containing the Hungarian algorithm for matching
pip install munkres
Example:
>>> # DISABLE_DOCTEST
>>> from ibeis.scripts.name_recitifer import * # NOQA
>>> grouped_oldnames = [['a', 'b'], ['b', 'c'], ['c', 'a', 'a']]
>>> new_names = find_consistent_labeling(grouped_oldnames)
>>> print(new_names)
[u'b', u'c', u'a']
Example:
>>> # DISABLE_DOCTEST
>>> from ibeis.scripts.name_recitifer import * # NOQA
>>> grouped_oldnames = [['a', 'b', 'c'], ['b', 'c'], ['c', 'e', 'e']]
>>> new_names = find_consistent_labeling(grouped_oldnames)
>>> print(new_names)
[u'a', u'b', u'e']
Example:
>>> # DISABLE_DOCTEST
>>> from ibeis.scripts.name_recitifer import * # NOQA
>>> grouped_oldnames = [['a', 'b'], ['a', 'a', 'b'], ['a']]
>>> new_names = find_consistent_labeling(grouped_oldnames)
>>> print(new_names)
[u'a', u'b', u'e']
"""
import numpy as np
try:
import munkres
except ImportError:
print('Need to install Hungrian algorithm bipartite matching solver.')
print('Run:')
print('pip install munkres')
raise
unique_old_names = ut.unique(ut.flatten(grouped_oldnames))
num_new_names = len(grouped_oldnames)
num_old_names = len(unique_old_names)
extra_oldnames = []
# Create padded dummy values. This accounts for the case where it is
# impossible to uniquely map to the old db
num_extra = num_new_names - num_old_names
if num_extra > 0:
extra_oldnames = ['_extra_name%d' % (count,) for count in
range(num_extra)]
elif num_extra < 0:
pass
else:
extra_oldnames = []
assignable_names = unique_old_names + extra_oldnames
total = len(assignable_names)
# Allocate assignment matrix
profit_matrix = np.zeros((total, total), dtype=np.int)
# Populate assignment profit matrix
oldname2_idx = ut.make_index_lookup(assignable_names)
name_freq_list = [ut.dict_hist(names) for names in grouped_oldnames]
for rowx, name_freq in enumerate(name_freq_list):
for name, freq in name_freq.items():
colx = oldname2_idx[name]
profit_matrix[rowx, colx] += freq
# Add extra profit for using a previously used name
profit_matrix[profit_matrix > 0] += 2
# Add small profit for using an extra name
extra_colxs = ut.take(oldname2_idx, extra_oldnames)
profit_matrix[:, extra_colxs] += 1
# Convert to minimization problem
big_value = (profit_matrix.max())
cost_matrix = big_value - profit_matrix
m = munkres.Munkres()
indexes = m.compute(cost_matrix)
# Map output to be aligned with input
rx2_cx = dict(indexes)
assignment = [assignable_names[rx2_cx[rx]]
for rx in range(num_new_names)]
return assignment