Source code for ibeis.algo.hots._grave_bayes

"""

Arc reversal
http://www.cs.toronto.edu/~cebly/Papers/simulation.pdf

TODO:
    Need to find faster more mature libraries
    http://dlib.net/bayes.html
    http://www.cs.waikato.ac.nz/ml/weka/
    http://www.cs.waikato.ac.nz/~remco/weka.bn.pdf
    https://code.google.com/p/pebl-project/
    https://github.com/abhik/pebl
    http://www.cs.ubc.ca/~murphyk/Software/bnsoft.html

    Demo case where we think we know the labels of others.  Only one unknown
    name. Need to classify it as one of the other known names.

References:
    https://en.wikipedia.org/wiki/Bayesian_network
    https://class.coursera.org/pgm-003/lecture/17
    http://www.cs.ubc.ca/~murphyk/Bayes/bnintro.html
    http://www3.cs.stonybrook.edu/~sael/teaching/cse537/Slides/chapter14d_BP.pdf
    http://www.cse.unsw.edu.au/~cs9417ml/Bayes/Pages/PearlPropagation.html
    https://github.com/pgmpy/pgmpy.git
    http://pgmpy.readthedocs.org/en/latest/
    http://nipy.bic.berkeley.edu:5000/download/11
    http://pgmpy.readthedocs.org/en/latest/wiki.html#add-feature-to-accept-and-output-state-names-for-models
    http://www.csse.monash.edu.au/bai/book/BAI_Chapter2.pdf


Clustering with CRF:
    http://srl.informatik.uni-freiburg.de/publicationsdir/tipaldiIROS09.pdf
    http://www.dis.uniroma1.it/~dottoratoii/media/students/documents/thesis_tipaldi.pdf
    An Unsupervised Conditional Random Fields Approach for Clustering Gene Expression Time Series
    http://bioinformatics.oxfordjournals.org/content/24/21/2467.full

CRFs:
    http://homepages.inf.ed.ac.uk/csutton/publications/crftutv2.pdf

AlphaBeta Swap:
    https://github.com/amueller/gco_python
    https://github.com/pmneila/PyMaxflow
    http://www.cs.cornell.edu/rdz/papers/bvz-iccv99.pdf

    http://arxiv.org/pdf/1411.6340.pdf  Iteratively Reweighted Graph Cut for Multi-label MRFs with Non-convex Priors

Fusion Moves:
    http://www.robots.ox.ac.uk/~vilem/fusion.pdf
    http://hci.iwr.uni-heidelberg.de/publications/mip/techrep/beier_15_fusion.pdf

Consensus Clustering

Explaining Away

Course Notes:
    Tie breaking for MAP assignment.
    https://class.coursera.org/pgm-003/lecture/60
    * random perdibiation

    Correspondence Problem is discussed in
    https://class.coursera.org/pgm-003/lecture/68

    Sparse Pattern Factors

    Collective Inference:
    Plate Models / Aggragator CPD is used to define dependencies.

"""
def try_query(model, infr, evidence, interest_ttypes=[], verbose=True):
[docs] r""" CommandLine: python -m ibeis.algo.hots.bayes --exec-try_query --show Example: >>> # DISABLE_DOCTEST >>> from ibeis.algo.hots.bayes import * # NOQA >>> verbose = True >>> other_evidence = {} >>> name_evidence = [1, None, 0, None] >>> score_evidence = ['high', 'low', 'low'] >>> query_vars = None >>> model = make_name_model(num_annots=4, num_names=4, verbose=True, mode=1) >>> model, evidence, soft_evidence = update_model_evidence(model, name_evidence, score_evidence, other_evidence) >>> interest_ttypes = ['name'] >>> infr = pgmpy.inference.BeliefPropagation(model) >>> evidence = infr._ensure_internal_evidence(evidence, model) >>> query_results = try_query(model, infr, evidence, interest_ttypes, verbose) >>> result = ('query_results = %s' % (str(query_results),)) >>> ut.quit_if_noshow() >>> show_model(model, show_prior=True, **query_results) >>> ut.show_if_requested() Ignore: query_vars = ut.setdiff_ordered(model.nodes(), list(evidence.keys())) probs = infr.query(query_vars, evidence) map_assignment = infr.map_query(query_vars, evidence) """ infr = pgmpy.inference.VariableElimination(model) #infr = pgmpy.inference.BeliefPropagation(model) if True: return bruteforce(model, query_vars=None, evidence=evidence) else: import vtool as vt query_vars = ut.setdiff_ordered(model.nodes(), list(evidence.keys())) # hack query_vars = ut.setdiff_ordered(query_vars, ut.list_getattr(model.ttype2_cpds['score'], 'variable')) if verbose: evidence_str = ', '.join(model.pretty_evidence(evidence)) print('P(' + ', '.join(query_vars) + ' | ' + evidence_str + ') = ') # Compute MAP joints # There is a bug here. #map_assign = infr.map_query(query_vars, evidence) # (probably an invalid thing to do) #joint_factor = pgmpy.factors.factor_product(*factor_list) # Brute force MAP name_vars = ut.list_getattr(model.ttype2_cpds['name'], 'variable') query_name_vars = ut.setdiff_ordered(name_vars, list(evidence.keys())) # TODO: incorporate case where Na is assigned to Fred #evidence_h = ut.delete_keys(evidence.copy(), ['Na']) joint = model.joint_distribution() joint.evidence_based_reduction( query_name_vars, evidence, inplace=True) # Find static row labels in the evidence given_name_vars = [var for var in name_vars if var in evidence] given_name_idx = ut.dict_take(evidence, given_name_vars) given_name_val = [joint.statename_dict[var][idx] for var, idx in zip(given_name_vars, given_name_idx)] new_vals = joint.values.ravel() # Add static evidence variables to the relabeled name states new_vars = given_name_vars + joint.variables new_rows = [tuple(given_name_val) + row for row in joint._row_labels()] # Relabel rows based on the knowledge that # everything is the same, only the names have changed. temp_basis = [i for i in range(model.num_names)] def relabel_names(names, temp_basis=temp_basis): names = list(map(six.text_type, names)) mapping = {} for n in names: if n not in mapping: mapping[n] = len(mapping) new_names = tuple([temp_basis[mapping[n]] for n in names]) return new_names relabeled_rows = list(map(relabel_names, new_rows)) # Combine probability of rows with the same (new) label data_ids = np.array(vt.other.compute_unique_data_ids_(relabeled_rows)) unique_ids, groupxs = vt.group_indices(data_ids) reduced_row_lbls = ut.take(relabeled_rows, ut.get_list_column(groupxs, 0)) reduced_row_lbls = list(map(list, reduced_row_lbls)) reduced_values = np.array([ g.sum() for g in vt.apply_grouping(new_vals, groupxs) ]) # Relabel the rows one more time to agree with initial constraints used_ = [] replaced = [] for colx, (var, val) in enumerate(zip(given_name_vars, given_name_val)): # All columns must be the same for this labeling alias = reduced_row_lbls[0][colx] reduced_row_lbls = ut.list_replace(reduced_row_lbls, alias, val) replaced.append(alias) used_.append(val) basis = model.ttype2_cpds['name'][0]._template_.basis find_remain_ = ut.setdiff_ordered(temp_basis, replaced) repl_remain_ = ut.setdiff_ordered(basis, used_) for find, repl in zip(find_remain_, repl_remain_): reduced_row_lbls = ut.list_replace(reduced_row_lbls, find, repl) # Now find the most likely state sortx = reduced_values.argsort()[::-1] sort_reduced_row_lbls = ut.take(reduced_row_lbls, sortx.tolist()) sort_reduced_values = reduced_values[sortx] # Remove evidence based labels new_vars_ = new_vars[len(given_name_vars):] sort_reduced_row_lbls_ = ut.get_list_column(sort_reduced_row_lbls, slice(len(given_name_vars), None)) sort_reduced_row_lbls_[0] # hack into a new joint factor var_states = ut.lmap(ut.unique_ordered, zip(*sort_reduced_row_lbls_)) statename_dict = dict(zip(new_vars, var_states)) cardinality = ut.lmap(len, var_states) val_lookup = dict(zip(ut.lmap(tuple, sort_reduced_row_lbls_), sort_reduced_values)) values = np.zeros(np.prod(cardinality)) for idx, state in enumerate(ut.iprod(*var_states)): if state in val_lookup: values[idx] = val_lookup[state] joint2 = pgmpy.factors.Factor(new_vars_, cardinality, values, statename_dict=statename_dict) print(joint2) max_marginals = {} for i, var in enumerate(query_name_vars): one_out = query_name_vars[:i] + query_name_vars[i + 1:] max_marginals[var] = joint2.marginalize(one_out, inplace=False) # max_marginals[var] = joint2.maximize(one_out, inplace=False) print(joint2.marginalize(['Nb', 'Nc'], inplace=False)) factor_list = max_marginals.values() # Better map assignment based on knowledge of labels map_assign = dict(zip(new_vars_, sort_reduced_row_lbls_[0])) sort_reduced_rowstr_lbls = [ ut.repr2(dict(zip(new_vars, lbls)), explicit=True, nobraces=True, strvals=True) for lbls in sort_reduced_row_lbls_ ] top_assignments = list(zip(sort_reduced_rowstr_lbls[:3], sort_reduced_values)) if len(sort_reduced_values) > 3: top_assignments += [('other', 1 - sum(sort_reduced_values[:3]))] # import utool # utool.embed() # Compute all marginals # probs = infr.query(query_vars, evidence) #probs = infr.query(query_vars, evidence) # factor_list = probs.values() ## Marginalize over non-query, non-evidence #irrelevant_vars = ut.setdiff_ordered(joint.variables, list(evidence.keys()) + query_vars) #joint.marginalize(irrelevant_vars) #joint.normalize() #new_rows = joint._row_labels() #new_vals = joint.values.ravel() #map_vals = new_rows[new_vals.argmax()] #map_assign = dict(zip(joint.variables, map_vals)) # Compute Marginalized MAP joints #marginalized_joints = {} #for ttype in interest_ttypes: # other_vars = [v for v in joint_factor.scope() # if model.var2_cpd[v].ttype != ttype] # marginal = joint_factor.marginalize(other_vars, inplace=False) # marginalized_joints[ttype] = marginal query_results = { 'factor_list': factor_list, 'top_assignments': top_assignments, 'map_assign': map_assign, 'marginalized_joints': None, } return query_results def name_model_mode5(num_annots, num_names=None, verbose=True, mode=1):
[docs] mode = ut.get_argval('--mode', default=mode) annots = ut.chr_range(num_annots, base=ut.get_argval('--base', default='a')) # The indexes of match CPDs will not change if another annotation is added upper_diag_idxs = ut.colwise_diag_idxs(num_annots, 2) if num_names is None: num_names = num_annots # -- Define CPD Templates name_cpd_t = pgm_ext.TemplateCPD( 'name', ('n', num_names), varpref='N', special_basis_pool=SPECIAL_BASIS_POOL) name_cpds = [name_cpd_t.new_cpd(parents=aid) for aid in annots] def match_pmf(match_type, n1, n2): return { True: {'same': 1.0, 'diff': 0.0}, False: {'same': 0.0, 'diff': 1.0}, }[n1 == n2][match_type] match_cpd_t = pgm_ext.TemplateCPD( 'match', ['diff', 'same'], varpref='M', evidence_ttypes=[name_cpd_t, name_cpd_t], pmf_func=match_pmf) namepair_cpds = ut.list_unflat_take(name_cpds, upper_diag_idxs) match_cpds = [match_cpd_t.new_cpd(parents=cpds) for cpds in namepair_cpds] def trimatch_pmf(match_ab, match_bc, match_ca): lookup = {'same': {'same': {'same': 1, 'diff': 0, }, 'diff': {'same': 0, 'diff': 1, }, }, 'diff': {'same': {'same': 0, 'diff': 1, }, 'diff': {'same': .5, 'diff': .5, }, } } return lookup[match_ca][match_bc][match_ab] trimatch_cpd_t = pgm_ext.TemplateCPD( 'tri_match', ['diff', 'same'], varpref='T', evidence_ttypes=[match_cpd_t, match_cpd_t], pmf_func=trimatch_pmf) #triple_idxs = ut.colwise_diag_idxs(num_annots, 3) tid2_match = {cpd._template_id: cpd for cpd in match_cpds} trimatch_cpds = [] # such hack for cpd in match_cpds: parents = [] this_ = list(cpd._template_id) for aid in annots: if aid in this_: continue for aid2 in this_: key = aid2 + aid if key not in tid2_match: key = aid + aid2 parents += [tid2_match[key]] trimatch_cpds += [trimatch_cpd_t.new_cpd(parents=parents)] def score_pmf(score_type, match_type): score_lookup = { 'same': {'low': .1, 'high': .9, 'veryhigh': .9}, 'diff': {'low': .9, 'high': .09, 'veryhigh': .01} } val = score_lookup[match_type][score_type] return val score_cpd_t = pgm_ext.TemplateCPD( 'score', ['low', 'high'], varpref='S', evidence_ttypes=[match_cpd_t], pmf_func=score_pmf) score_cpds = [score_cpd_t.new_cpd(parents=cpds) for cpds in zip(match_cpds)] #score_cpds = [score_cpd_t.new_cpd(parents=cpds) # for cpds in zip(trimatch_cpds)] cpd_list = name_cpds + score_cpds + match_cpds + trimatch_cpds print('score_cpds = %r' % (ut.list_getattr(score_cpds, 'variable'),)) # Make Model model = pgm_ext.define_model(cpd_list) model.num_names = num_names if verbose: model.print_templates() return model def name_model_mode1(num_annots, num_names=None, verbose=True):
[docs] r""" spaghettii CommandLine: python -m ibeis.algo.hots.bayes --exec-name_model_mode1 --show python -m ibeis.algo.hots.bayes --exec-name_model_mode1 python -m ibeis.algo.hots.bayes --exec-name_model_mode1 --num-annots=3 Example: >>> # DISABLE_DOCTEST >>> from ibeis.algo.hots.bayes import * # NOQA >>> defaults = dict(num_annots=2, num_names=2, verbose=True) >>> kw = ut.argparse_funckw(name_model_mode1, defaults) >>> model = name_model_mode1(**kw) >>> ut.quit_if_noshow() >>> show_model(model, show_prior=False, show_title=False) >>> ut.show_if_requested() Ignore: import nx2tikz print(nx2tikz.dumps_tikz(model, layout='layered', use_label=True)) """ annots = ut.chr_range(num_annots, base=ut.get_argval('--base', default='a')) # The indexes of match CPDs will not change if another annotation is added upper_diag_idxs = ut.colwise_diag_idxs(num_annots, 2) if num_names is None: num_names = num_annots # +--- Define CPD Templates --- # +-- Name Factor --- name_cpd_t = pgm_ext.TemplateCPD( 'name', ('n', num_names), varpref='N', special_basis_pool=SPECIAL_BASIS_POOL) name_cpds = [name_cpd_t.new_cpd(parents=aid) for aid in annots] # +-- Match Factor --- def match_pmf(match_type, n1, n2): return { True: {'same': 1.0, 'diff': 0.0}, False: {'same': 0.0, 'diff': 1.0}, }[n1 == n2][match_type] match_cpd_t = pgm_ext.TemplateCPD( 'match', ['diff', 'same'], varpref='M', evidence_ttypes=[name_cpd_t, name_cpd_t], pmf_func=match_pmf) namepair_cpds = ut.list_unflat_take(name_cpds, upper_diag_idxs) match_cpds = [match_cpd_t.new_cpd(parents=cpds) for cpds in namepair_cpds] # +-- Score Factor --- def score_pmf(score_type, match_type): score_lookup = { 'same': {'low': .1, 'high': .9, 'veryhigh': .9}, 'diff': {'low': .9, 'high': .09, 'veryhigh': .01} } val = score_lookup[match_type][score_type] return val score_cpd_t = pgm_ext.TemplateCPD( 'score', ['low', 'high'], varpref='S', evidence_ttypes=[match_cpd_t], pmf_func=score_pmf) score_cpds = [score_cpd_t.new_cpd(parents=cpds) for cpds in zip(match_cpds)] # L___ End CPD Definitions ___ cpd_list = name_cpds + score_cpds + match_cpds print('score_cpds = %r' % (ut.list_getattr(score_cpds, 'variable'),)) # Make Model model = pgm_ext.define_model(cpd_list) model.num_names = num_names if verbose: model.print_templates() return model def make_name_model(num_annots, num_names=None, verbose=True, mode=1):
[docs] """ Defines the general name model CommandLine: python -m ibeis.algo.hots.bayes --exec-make_name_model --show Example: >>> # DISABLE_DOCTEST >>> from ibeis.algo.hots.bayes import * # NOQA >>> defaults = dict(num_annots=2, num_names=2, verbose=True, mode=2) >>> kw = ut.argparse_funckw(make_name_model, defaults) >>> model = make_name_model(**kw) >>> ut.quit_if_noshow() >>> show_model(model, show_prior=True) >>> ut.show_if_requested() """ #annots = ut.chr_range(num_annots, base='a') mode = ut.get_argval('--mode', default=mode) annots = ut.chr_range(num_annots, base=ut.get_argval('--base', default='a')) # The indexes of match CPDs will not change if another annotation is added upper_diag_idxs = ut.colwise_diag_idxs(num_annots, 2) if num_names is None: num_names = num_annots # -- Define CPD Templates def match_pmf(match_type, n1, n2): if n1 == n2: val = 1.0 if match_type == 'same' else 0.0 #val = .999 if match_type == 'same' else 0.001 elif n1 != n2: #val = 0.01 if match_type == 'same' else .99 val = 0.0 if match_type == 'same' else 1.0 return val def score_pmf(score_type, match_type): score_lookup = { 'same': {'low': .1, 'high': .9, 'veryhigh': .9}, 'diff': {'low': .9, 'high': .09, 'veryhigh': .01} #'same': {'low': .1, 'high': .9}, #'diff': {'low': .9, 'high': .1} } val = score_lookup[match_type][score_type] return val def score_pmf3(score_type, match_type, isdup='False'): score_lookup = { 'False': { 'same': {'low': .1, 'high': .5, 'veryhigh': .4}, 'diff': {'low': .9, 'high': .09, 'veryhigh': .01} }, 'True': { 'same': {'low': .01, 'high': .2, 'veryhigh': .79}, 'diff': {'low': .4, 'high': .4, 'veryhigh': .2} } } val = score_lookup[isdup][match_type][score_type] return val def score_pmf2(score_type, n1, n2): score_lookup = { True: {'low': .1, 'high': .4, 'veryhigh': .5}, False: {'low': .9, 'high': .09, 'veryhigh': .01} } val = score_lookup[n1 == n2][score_type] return val def dup_pmf(dupstate, match_type): lookup = { 'same': {'True': 0.5, 'False': 0.5}, 'diff': {'True': 0.0, 'False': 1.0} } return lookup[match_type][dupstate] def check_pmf(n0, n1, match_type): pass def trimatch_pmf(match_ab, match_bc, match_ca): lookup = { 'same': { 'same': {'same': 1, 'diff': 0, }, 'diff': {'same': 0, 'diff': 1, } }, 'diff': { 'same': {'same': 0, 'diff': 1, }, 'diff': {'same': .5, 'diff': .5, } } } return lookup[match_ca][match_bc][match_ab] name_cpd_t = pgm_ext.TemplateCPD( 'name', ('n', num_names), varpref='N', special_basis_pool=SPECIAL_BASIS_POOL) if mode == 1 or mode == 5: match_cpd_t = pgm_ext.TemplateCPD( 'match', ['diff', 'same'], varpref='M', evidence_ttypes=[name_cpd_t, name_cpd_t], pmf_func=match_pmf) if mode == 5: trimatch_cpd_t = pgm_ext.TemplateCPD( 'tri_match', ['diff', 'same'], varpref='T', #evidence_ttypes=[match_cpd_t, match_cpd_t, match_cpd_t], evidence_ttypes=[match_cpd_t, match_cpd_t], pmf_func=trimatch_pmf) score_cpd_t = pgm_ext.TemplateCPD( #'score', ['low', 'high', 'veryhigh'], 'score', ['low', 'high'], varpref='S', evidence_ttypes=[match_cpd_t], pmf_func=score_pmf) else: score_cpd_t = pgm_ext.TemplateCPD( #'score', ['low', 'high', 'veryhigh'], 'score', ['low', 'high'], varpref='S', evidence_ttypes=[match_cpd_t], pmf_func=score_pmf) elif mode == 2: name_cpd_t = pgm_ext.TemplateCPD( 'name', ('n', num_names), varpref='N', special_basis_pool=SPECIAL_BASIS_POOL) score_cpd_t = pgm_ext.TemplateCPD( #'score', ['low', 'high', 'veryhigh'], 'score', ['low', 'high'], varpref='S', evidence_ttypes=[name_cpd_t, name_cpd_t], pmf_func=score_pmf2) elif mode == 3 or mode == 4: match_cpd_t = pgm_ext.TemplateCPD( 'match', ['diff', 'same'], varpref='M', evidence_ttypes=[name_cpd_t, name_cpd_t], pmf_func=match_pmf) if mode == 3: dup_cpd_t = pgm_ext.TemplateCPD( 'dup', ['False', 'True'], varpref='D', ) else: dup_cpd_t = pgm_ext.TemplateCPD( 'dup', ['False', 'True'], varpref='D', evidence_ttypes=[match_cpd_t], pmf_func=dup_pmf ) score_cpd_t = pgm_ext.TemplateCPD( 'score', ['low', 'high', 'veryhigh'], varpref='S', evidence_ttypes=[match_cpd_t, dup_cpd_t], pmf_func=score_pmf3) # Instanciate templates if mode == 1 or mode == 5: name_cpds = [name_cpd_t.new_cpd(parents=aid) for aid in annots] namepair_cpds = ut.list_unflat_take(name_cpds, upper_diag_idxs) match_cpds = [match_cpd_t.new_cpd(parents=cpds) for cpds in namepair_cpds] score_cpds = [score_cpd_t.new_cpd(parents=cpds) for cpds in zip(match_cpds)] if mode == 5: #triple_idxs = ut.colwise_diag_idxs(num_annots, 3) tid2_match = {cpd._template_id: cpd for cpd in match_cpds} trimatch_cpds = [] # such hack for cpd in match_cpds: parents = [] this_ = list(cpd._template_id) for aid in annots: if aid in this_: continue for aid2 in this_: key = aid2 + aid if key not in tid2_match: key = aid + aid2 parents += [tid2_match[key]] trimatch_cpds += [trimatch_cpd_t.new_cpd(parents=parents)] #score_cpds = [score_cpd_t.new_cpd(parents=cpds) # for cpds in zip(trimatch_cpds)] cpd_list = name_cpds + score_cpds + match_cpds + trimatch_cpds else: cpd_list = name_cpds + score_cpds + match_cpds elif mode == 2: name_cpds = [name_cpd_t.new_cpd(parents=aid) for aid in annots] namepair_cpds = ut.list_unflat_take(name_cpds, upper_diag_idxs) score_cpds = [score_cpd_t.new_cpd(parents=cpds) for cpds in namepair_cpds] cpd_list = name_cpds + score_cpds elif mode == 3 or mode == 4: name_cpds = [name_cpd_t.new_cpd(parents=aid) for aid in annots] namepair_cpds = ut.list_unflat_take(name_cpds, upper_diag_idxs) match_cpds = [match_cpd_t.new_cpd(parents=cpds) for cpds in namepair_cpds] if mode == 3: dup_cpds = [dup_cpd_t.new_cpd(parents=''.join(map(str, aids))) for aids in ut.list_unflat_take(annots, upper_diag_idxs)] else: dup_cpds = [dup_cpd_t.new_cpd(parents=[mcpds]) for mcpds in match_cpds] score_cpds = [score_cpd_t.new_cpd(parents=([mcpds] + [dcpd])) for mcpds, dcpd in zip(match_cpds, dup_cpds)] cpd_list = name_cpds + score_cpds + match_cpds + dup_cpds #print('upper_diag_idxs = %r' % (upper_diag_idxs,)) print('score_cpds = %r' % (ut.list_getattr(score_cpds, 'variable'),)) # import sys # sys.exit(1) # Make Model model = pgm_ext.define_model(cpd_list) model.num_names = num_names if verbose: model.print_templates() #ut.colorprint('\n --- CPD Templates ---', 'blue') #for temp_cpd in templates: # ut.colorprint(temp_cpd._cpdstr('psql'), 'turquoise') #print_ascii_graph(model) return model def show_model(model, evidence={}, soft_evidence={}, **kwargs):
[docs] """ References: http://stackoverflow.com/questions/22207802/pygraphviz-networkx-set-node-level-or-layer Ignore: pkg-config --libs-only-L libcgraph sudo apt-get install libgraphviz-dev -y sudo apt-get install libgraphviz4 -y # sudo apt-get install pkg-config sudo apt-get install libgraphviz-dev # pip install git+git://github.com/pygraphviz/pygraphviz.git pip install pygraphviz python -c "import pygraphviz; print(pygraphviz.__file__)" sudo pip3 install pygraphviz --install-option="--include-path=/usr/include/graphviz" --install-option="--library-path=/usr/lib/graphviz/" python3 -c "import pygraphviz; print(pygraphviz.__file__)" """ if ut.get_argval('--hackmarkov') or ut.get_argval('--hackjunc'): draw_tree_model(model, **kwargs) return import plottool as pt import networkx as netx import matplotlib as mpl fnum = pt.ensure_fnum(None) fig = pt.figure(fnum=fnum, pnum=(3, 1, (slice(0, 2), 0)), doclf=True) # NOQA #fig = pt.figure(fnum=fnum, pnum=(3, 2, (1, slice(1, 2))), doclf=True) # NOQA ax = pt.gca() var2_post = {f.variables[0]: f for f in kwargs.get('factor_list', [])} netx_graph = (model) #netx_graph.graph.setdefault('graph', {})['size'] = '"10,5"' #netx_graph.graph.setdefault('graph', {})['rankdir'] = 'LR' pos = get_hacked_pos(netx_graph) #netx.nx_agraph.pygraphviz_layout(netx_graph) #pos = netx.nx_agraph.pydot_layout(netx_graph, prog='dot') #pos = netx.nx_agraph.graphviz_layout(netx_graph) drawkw = dict(pos=pos, ax=ax, with_labels=True, node_size=1500) if evidence is not None: node_colors = [ # (pt.TRUE_BLUE (pt.WHITE if node not in soft_evidence else pt.LIGHT_PINK) if node not in evidence else pt.FALSE_RED for node in netx_graph.nodes()] for node in netx_graph.nodes(): cpd = model.var2_cpd[node] if cpd.ttype == 'score': pass drawkw['node_color'] = node_colors netx.draw(netx_graph, **drawkw) show_probs = True if show_probs: textprops = { 'family': 'monospace', 'horizontalalignment': 'left', #'horizontalalignment': 'center', #'size': 12, 'size': 8, } textkw = dict( xycoords='data', boxcoords='offset points', pad=0.25, framewidth=True, arrowprops=dict(arrowstyle='->'), #bboxprops=dict(fc=node_attr['fillcolor']), ) netx_nodes = model.nodes(data=True) node_key_list = ut.get_list_column(netx_nodes, 0) pos_list = ut.dict_take(pos, node_key_list) artist_list = [] offset_box_list = [] for pos_, node in zip(pos_list, netx_nodes): x, y = pos_ variable = node[0] cpd = model.var2_cpd[variable] prior_marg = (cpd if cpd.evidence is None else cpd.marginalize(cpd.evidence, inplace=False)) prior_text = None text = None if variable in evidence: text = cpd.variable_statenames[evidence[variable]] elif variable in var2_post: post_marg = var2_post[variable] text = pgm_ext.make_factor_text(post_marg, 'post') prior_text = pgm_ext.make_factor_text(prior_marg, 'prior') else: if len(evidence) == 0 and len(soft_evidence) == 0: prior_text = pgm_ext.make_factor_text(prior_marg, 'prior') show_post = kwargs.get('show_post', False) show_prior = kwargs.get('show_prior', False) show_prior = True show_post = True show_ev = (evidence is not None and variable in evidence) if (show_post or show_ev) and text is not None: offset_box = mpl.offsetbox.TextArea(text, textprops) artist = mpl.offsetbox.AnnotationBbox( # offset_box, (x + 5, y), xybox=(20., 5.), offset_box, (x, y + 5), xybox=(4., 20.), #box_alignment=(0, 0), box_alignment=(.5, 0), **textkw) offset_box_list.append(offset_box) artist_list.append(artist) if show_prior and prior_text is not None: offset_box2 = mpl.offsetbox.TextArea(prior_text, textprops) artist2 = mpl.offsetbox.AnnotationBbox( # offset_box2, (x - 5, y), xybox=(-20., -15.), # offset_box2, (x, y - 5), xybox=(-15., -20.), offset_box2, (x, y - 5), xybox=(-4, -20.), #box_alignment=(1, 1), box_alignment=(.5, 1), **textkw) offset_box_list.append(offset_box2) artist_list.append(artist2) for artist in artist_list: ax.add_artist(artist) xmin, ymin = np.array(pos_list).min(axis=0) xmax, ymax = np.array(pos_list).max(axis=0) num_annots = len(model.ttype2_cpds['name']) if num_annots > 4: ax.set_xlim((xmin - 40, xmax + 40)) ax.set_ylim((ymin - 50, ymax + 50)) fig.set_size_inches(30, 7) else: ax.set_xlim((xmin - 42, xmax + 42)) ax.set_ylim((ymin - 50, ymax + 50)) fig.set_size_inches(23, 7) fig = pt.gcf() title = 'num_names=%r, num_annots=%r' % (model.num_names, num_annots,) map_assign = kwargs.get('map_assign', None) #max_marginal_list = [] #for name, marginal in marginalized_joints.items(): # states = list(ut.iprod(*marginal.statenames)) # vals = marginal.values.ravel() # x = vals.argmax() # max_marginal_list += ['P(' + ', '.join(states[x]) + ') = ' + str(vals[x])] # title += str(marginal) top_assignments = kwargs.get('top_assignments', None) if top_assignments is not None: map_assign, map_prob = top_assignments[0] if map_assign is not None: # title += '\nMAP=' + ut.repr2(map_assign, strvals=True) title += '\nMAP: ' + map_assign + ' @' + '%.2f%%' % (100 * map_prob,) if kwargs.get('show_title', True): pt.set_figtitle(title, size=14) #pt.set_xlabel() def hack_fix_centeralign(): if textprops['horizontalalignment'] == 'center': print('Fixing centeralign') fig = pt.gcf() fig.canvas.draw() # Superhack for centered text. Fix bug in # /usr/local/lib/python2.7/dist-packages/matplotlib/offsetbox.py # /usr/local/lib/python2.7/dist-packages/matplotlib/text.py for offset_box in offset_box_list: offset_box.set_offset z = offset_box._text.get_window_extent() (z.x1 - z.x0) / 2 offset_box._text T = offset_box._text.get_transform() A = mpl.transforms.Affine2D() A.clear() A.translate((z.x1 - z.x0) / 2, 0) offset_box._text.set_transform(T + A) hack_fix_centeralign() top_assignments = kwargs.get('top_assignments', None) if top_assignments is not None: bin_labels = ut.get_list_column(top_assignments, 0) bin_vals = ut.get_list_column(top_assignments, 1) # bin_labels = ['\n'.join(ut.textwrap.wrap(_lbl, width=30)) for _lbl in bin_labels] pt.draw_histogram(bin_labels, bin_vals, fnum=fnum, pnum=(3, 8, (2, slice(4, None))), transpose=True, use_darkbackground=False, #xtick_rotation=-10, ylabel='Prob', xlabel='assignment') pt.set_title('Assignment probabilities') #fpath = ('name_model_' + suff + '.png') #pt.plt.savefig(fpath) #return fpath def flow():
[docs] """ http://pmneila.github.io/PyMaxflow/maxflow.html#maxflow-fastmin pip install PyMaxFlow pip install pystruct pip install hdbscan """ # Toy problem representing attempting to discover names via annotation # scores import pystruct # NOQA import pystruct.models # NOQA import networkx as netx # NOQA import vtool as vt num_annots = 10 num_names = num_annots hidden_nids = np.random.randint(0, num_names, num_annots) unique_nids, groupxs = vt.group_indices(hidden_nids) toy_params = { True: {'mu': 1.0, 'sigma': 2.2}, False: {'mu': 7.0, 'sigma': .9} } if True: import vtool as vt import plottool as pt xdata = np.linspace(0, 100, 1000) tp_pdf = vt.gauss_func1d(xdata, **toy_params[True]) fp_pdf = vt.gauss_func1d(xdata, **toy_params[False]) pt.plot_probabilities([tp_pdf, fp_pdf], ['TP', 'TF'], xdata=xdata) def metric(aidx1, aidx2, hidden_nids=hidden_nids, toy_params=toy_params): if aidx1 == aidx2: return 0 rng = np.random.RandomState(int(aidx1 + aidx2)) same = hidden_nids[int(aidx1)] == hidden_nids[int(aidx2)] mu, sigma = ut.dict_take(toy_params[same], ['mu', 'sigma']) return np.clip(rng.normal(mu, sigma), 0, np.inf) pairwise_aidxs = list(ut.iprod(range(num_annots), range(num_annots))) pairwise_labels = np.array([hidden_nids[a1] == hidden_nids[a2] for a1, a2 in pairwise_aidxs]) pairwise_scores = np.array([metric(*zz) for zz in pairwise_aidxs]) pairwise_scores_mat = pairwise_scores.reshape(num_annots, num_annots) if num_annots <= 10: print(ut.repr2(pairwise_scores_mat, precision=1)) #aids = list(range(num_annots)) #g = netx.DiGraph() #g.add_nodes_from(aids) #g.add_edges_from([(tup[0], tup[1], {'weight': score}) for tup, score in zip(pairwise_aidxs, pairwise_scores) if tup[0] != tup[1]]) #netx.draw_graphviz(g) #pr = netx.pagerank(g) X = pairwise_scores Y = pairwise_labels encoder = vt.ScoreNormalizer() encoder.fit(X, Y) encoder.visualize() # meanshift clustering import sklearn bandwidth = sklearn.cluster.estimate_bandwidth(X[:, None]) # , quantile=quantile, n_samples=500) assert bandwidth != 0, ('[] bandwidth is 0. Cannot cluster') # bandwidth is with respect to the RBF used in clustering #ms = sklearn.cluster.MeanShift(bandwidth=bandwidth, bin_seeding=True, cluster_all=True) ms = sklearn.cluster.MeanShift(bandwidth=bandwidth, bin_seeding=True, cluster_all=False) ms.fit(X[:, None]) label_arr = ms.labels_ unique_labels = np.unique(label_arr) max_label = max(0, unique_labels.max()) num_orphans = (label_arr == -1).sum() label_arr[label_arr == -1] = np.arange(max_label + 1, max_label + 1 + num_orphans) X_data = np.arange(num_annots)[:, None].astype(np.int64) #graph = pystruct.models.GraphCRF( # n_states=None, # n_features=None, # inference_method='lp', # class_weight=None, # directed=False, #) import scipy import scipy.cluster import scipy.cluster.hierarchy thresh = 2.0 labels = scipy.cluster.hierarchy.fclusterdata(X_data, thresh, metric=metric) unique_lbls, lblgroupxs = vt.group_indices(labels) print(groupxs) print(lblgroupxs) print('groupdiff = %r' % (ut.compare_groupings(groupxs, lblgroupxs),)) print('common groups = %r' % (ut.find_grouping_consistencies(groupxs, lblgroupxs),)) #X_data, seconds_thresh, criterion='distance') #help(hdbscan.HDBSCAN) import hdbscan alg = hdbscan.HDBSCAN(metric=metric, min_cluster_size=1, p=1, gen_min_span_tree=1, min_samples=2) labels = alg.fit_predict(X_data) labels[labels == -1] = np.arange(np.sum(labels == -1)) + labels.max() + 1 unique_lbls, lblgroupxs = vt.group_indices(labels) print(groupxs) print(lblgroupxs) print('groupdiff = %r' % (ut.compare_groupings(groupxs, lblgroupxs),)) print('common groups = %r' % (ut.find_grouping_consistencies(groupxs, lblgroupxs),)) #import ddbscan #help(ddbscan.DDBSCAN) #alg = ddbscan.DDBSCAN(2, 2) #D = np.zeros((len(aids), len(aids) + 1)) #D.T[-1] = np.arange(len(aids)) ## Can alpha-expansion be used when the pairwise potentials are not in a grid? #hidden_ut.group_items(aids, hidden_nids) if False: import maxflow #from maxflow import fastmin # Create a graph with integer capacities. g = maxflow.Graph[int](2, 2) # Add two (non-terminal) nodes. Get the index to the first one. nodes = g.add_nodes(2) # Create two edges (forwards and backwards) with the given capacities. # The indices of the nodes are always consecutive. g.add_edge(nodes[0], nodes[1], 1, 2) # Set the capacities of the terminal edges... # ...for the first node. g.add_tedge(nodes[0], 2, 5) # ...for the second node. g.add_tedge(nodes[1], 9, 4) g = maxflow.Graph[float](2, 2) g.maxflow() g.get_nx_graph() g.get_segment(nodes[0]) #seed = (start * 11) #np.random.RandomState(seed) #if initial_nids is None: #else: # # HACK # #import utool # #utool.embed() # nid_is_avail = vt.index_to_boolmask(initial_nids, initial_nids.max() + 1) # next_ = np.where(~nid_is_avail)[0] # if len(next_) == 0: # nid_is_avail = np.append(nid_is_avail, [True]) # idx = len(nid_is_avail) - 1 # else: # idx = next_[0] # nid_is_avail[idx] = True # avail_nids = np.where(nid_is_avail)[0] # p = np.ones(len(avail_nids)) # # chance to see a new annotation # chance = .4 # # more or less # p[:] = (1 - chance) / (len(p) - 1) # p[idx] = chance # #print('p = %r' % (p,)) # #nids = rng.randint(0, num_names, num_annots) # #print('avail_nids = %r' % (avail_nids,)) # nids = rng.choice(avail_nids, num_annots, p=p) # #print('nids = %r' % (nids,))