Source code for ibeis.dbio.ingest_database

#!/usr/bin/env python2.7
# -*- coding: utf-8 -*-
"""
This module lists known raw databases and how to ingest them.

Specify arguments and run the following command to ingest a database

python -m ibeis --tf ingest_rawdata --db seaturtles  --imgdir "~/turtles/Turtles from Jill" --ingest-type=named_folders --species=turtles

# --- GET DATA ---
rsync -avhzP <user>@<host>:<remotedir>  <path-to-raw-imgs>
# --- RUN INGEST SCRIPT ---
python -m ibeis --tf ingest_rawdata --db <new-ibeis-db-name> --imgdir <path-to-raw-imgs> --ingest-type=named_folders --species=<optional> --fmtkey=<optional>
"""
from __future__ import absolute_import, division, print_function
from six.moves import zip, map, range
import ibeis
import os
from os.path import relpath, dirname, exists, join, realpath, basename
from ibeis.other import ibsfuncs
from ibeis import constants as const
import utool as ut
import vtool as vt
import parse


[docs]class Ingestable(object):
    """
    Temporary structure representing how to ingest a databases
    """
    def __init__(self, dbname, img_dir=None, ingest_type=None, fmtkey=None,
                 adjust_percent=0.0, postingest_func=None, zipfile=None,
                 species=None, images_as_annots=True):
        self.dbname          = dbname
        self.img_dir         = img_dir
        self.ingest_type     = ingest_type
        self.fmtkey          = fmtkey
        self.zipfile         = zipfile
        self.adjust_percent  = adjust_percent
        self.postingest_func = postingest_func
        self.species         = species
        self.images_as_annots = images_as_annots
        self.ensure_feasibility()

    def __str__(self):
        return ut.dict_str(self.__dict__)

[docs]    def ensure_feasibility(self):
        rawdir  = ibeis.sysres.get_rawdir()
        if self.img_dir is None:
            # Try to find data either the raw or work dir
            self.img_dir = ibeis.sysres.db_to_dbdir(
                self.dbname, extra_workdirs=[rawdir], allow_newdir=True)
        msg = 'Cannot find img_dir for dbname=%r, img_dir=%r' % (self.dbname, self.img_dir)
        assert self.img_dir is not None, msg
        #from os.path import isabs
        #if not isabs(self.img_dir):
        #    self.img_dir = join(dbdir, self.img_dir)
        self.img_dir = ut.truepath(self.img_dir)
        assert exists(self.img_dir), msg
        #if self.ingest_type == 'named_folders':
        #    assert self.fmtkey == 'name'


[docs]class Ingestable2(object):
    def __init__(self, dbdir, imgpath_list=None, imgdir_list=None,
                 zipfile_list=None, postingest_func=None, ingest_config={},
                 **kwargs):
        self.dbdir = dbdir
        self.zipfile_list = zipfile_list
        self.imgdir_list = imgdir_list
        self.imgpath_list = imgpath_list
        self.postingest_func = postingest_func

        import dtool
        # valid_species = None
        valid_species = ['____']

        class IngestConfig(dtool.Config):
            _param_info_list = [
                ut.ParamInfo(
                    'images_as_annots', True),
                ut.ParamInfo(
                    'ingest_type', 'unknown', valid_values=['unknown', 'named_folders', 'named_images']),
                ut.ParamInfo(
                    'species', '____',
                    hideif=lambda cfg: not cfg['images_as_annots'],
                    valid_values=valid_species,
                ),
                ut.ParamInfo(
                    'adjust_percent', 0.0,
                    hideif=lambda cfg: not cfg['images_as_annots']),
            ]
        updatekw = kwargs.copy()
        updatekw.update(ingest_config)
        self.ingest_config = IngestConfig(**updatekw)

[docs]    def execute(self, ibs=None):
        print('[ingest_rawdata] Ingestable' + str(self))
        assert ibs is not None

        unzipped_file_base_dir = join(ibs.get_dbdir(), 'unzipped_files')

        def extract_from_zipfiles(zipfile_list):
            ut.ensuredir(unzipped_file_base_dir)
            for zipfile in zipfile_list:
                img_dir = unzipped_file_base_dir
                unziped_file_relpath = dirname(relpath(relpath(realpath(zipfile), realpath(img_dir))))
                unzipped_file_dir = join(unzipped_file_base_dir, unziped_file_relpath)
                ut.ensuredir(unzipped_file_dir)
                ut.unzip_file(zipfile, output_dir=unzipped_file_dir, overwrite=False)
            gpath_list = ut.list_images(unzipped_file_dir, fullpath=True, recursive=True)
            return gpath_list

        def list_images(img_dir):
            """ lists images that are not in an internal cache """
            import utool as ut  # NOQA
            ignore_list = ['_hsdb', '.hs_internals', '_ibeis_cache', '_ibsdb']
            gpath_list = ut.list_images(img_dir, fullpath=True, recursive=True,
                                        ignore_list=ignore_list)
            return gpath_list

        # FIXME ensure python3 works with this
        gpath_list = []
        if self.imgpath_list is not None:
            gpath_list += self.imgpath_list

        if self.imgdir_list is not None:
            for img_dir in self.imgdir_list:
                gpath_list += ut.ensure_unicode_strlist(list_images(img_dir))

        if self.zipfile_list is not None:
            gpath_list += extract_from_zipfiles(self.zipfile_list)

        gpath_list = ut.ensure_unicode_strlist(gpath_list)

        # Parse structure for image names
        ingest_type = self.ingest_config.ingest_type
        if ingest_type == 'named_folders':
            name_list = get_name_texts_from_parent_folder(gpath_list, img_dir, None)
            pass
        elif ingest_type == 'named_images':
            name_list = get_name_texts_from_gnames(gpath_list, img_dir, None)
        elif ingest_type == 'unknown':
            name_list = [const.UNKNOWN for _ in range(len(gpath_list))]
        else:
            raise NotImplementedError('unknwon ingest_type=%r' % (ingest_type,))

        # Add Images
        gpath_list = [gpath.replace('\\', '/') for gpath in gpath_list]
        gid_list_ = ibs.add_images(gpath_list)

        # <DEBUG>
        #print('added: ' + ut.indentjoin(map(str, zip(gid_list_, gpath_list))))
        unique_gids = list(set(gid_list_))
        print("[ingest] Length gid list: %d" % len(gid_list_))
        print("[ingest] Length unique gid list: %d" % len(unique_gids))
        assert len(gid_list_) == len(gpath_list)
        for gid in gid_list_:
            if gid is None:
                print('[ingest] big fat warning')
        # </DEBUG>
        gid_list = ut.filter_Nones(gid_list_)
        unique_gids, unique_names, unique_notes = resolve_name_conflicts(
            gid_list, name_list)
        # Add ANNOTATIONs with names and notes
        if self.ingest_config.images_as_annots:
            aid_list = ibs.use_images_as_annotations(unique_gids,
                                                     adjust_percent=self.ingest_config.adjust_percent)
            ibs.set_annot_names(aid_list, unique_names)
            ibs.set_annot_notes(aid_list, unique_notes)
            species_text = self.ingest_config.species
            if species_text is not None:
                ibs.set_annot_species(aid_list, [species_text] * len(aid_list))

        localize = False
        if localize:
            ibs.localize_images()

        if self.postingest_func is not None:
            self.postingest_func(ibs)
        return gid_list


[docs]def ingest_rawdata(ibs, ingestable, localize=False):
    """
    Ingests rawdata into an ibeis database.

    Args:
        ibs (ibeis.IBEISController):  ibeis controller object
        ingestable (Ingestable):
        localize (bool): (default = False)

    Returns:
        list: aid_list -  list of annotation rowids

    Notes:
        if ingest_type == 'named_folders':
            Converts folder structure where folders = name, to ibsdb
        if ingest_type == 'named_images':
            Converts imgname structure where imgnames = name_id.ext, to ibsdb

    CommandLine:
        python ibeis/dbio/ingest_database.py --db seals_drop2
        python -m ibeis.dbio.ingest_database --exec-ingest_rawdata
        python -m ibeis.dbio.ingest_database --exec-ingest_rawdata --db snow-leopards --imgdir /raid/raw_rsync/snow-leopards

        python -m ibeis --tf ingest_rawdata --db wd_peter2 --imgdir /raid/raw_rsync/african-dogs --ingest-type=named_folders --species=wild_dog --fmtkey='African Wild Dog: {name}' --force-delete
        python -m ibeis --tf ingest_rawdata --db <newdbname>  --imgdir <path-to-images> --ingest-type=named_folders --species=humpback

    Example:
        >>> # SCRIPT
        >>> # General ingest script
        >>> from ibeis.dbio.ingest_database import *  # NOQA
        >>> import ibeis
        >>> dbname = ut.get_argval('--db', str, None)  # 'snow-leopards')
        >>> force_delete = ut.get_argflag(('--force_delete', '--force-delete'))
        >>> img_dir = ut.get_argval('--imgdir', type_=str, default=None)
        >>> ingest_type = ut.get_argval('--ingest-type', type_=str, default='unknown')
        >>> fmtkey = ut.get_argval('--fmtkey', type_=str, default=None)
        >>> species = ut.get_argval('--species', type_=str, default=None)
        >>> assert img_dir is not None, 'specify img dir'
        >>> assert dbname is not None, 'specify dbname'
        >>> ingestable = Ingestable(
        >>>     dbname, img_dir=img_dir, ingest_type=ingest_type,
        >>>     fmtkey=fmtkey, species=species, images_as_annots=ingest_type != 'unknown',
        >>>     adjust_percent=0.00)
        >>> from ibeis.control import IBEISControl
        >>> dbdir = ibeis.sysres.db_to_dbdir(dbname, allow_newdir=True, use_sync=False)
        >>> ut.ensuredir(dbdir, verbose=True)
        >>> if force_delete:
        >>>     ibsfuncs.delete_ibeis_database(dbdir)
        >>> ibs = IBEISControl.request_IBEISController(dbdir)
        >>> localize = False
        >>> aid_list = ingest_rawdata(ibs, ingestable, localize)
        >>> result = ('aid_list = %s' % (str(aid_list),))
        >>> print(result)
    """
    print('[ingest_rawdata] Ingestable' + str(ingestable))

    if ingestable.zipfile is not None:
        zipfile_fpath = ut.truepath(join(ibeis.sysres.get_workdir(), ingestable.zipfile))
        ingestable.img_dir = ut.unarchive_file(zipfile_fpath)

    img_dir         = realpath(ingestable.img_dir)
    ingest_type     = ingestable.ingest_type
    fmtkey          = ingestable.fmtkey
    adjust_percent  = ingestable.adjust_percent
    species_text    = ingestable.species
    postingest_func = ingestable.postingest_func
    print('[ingest] ingesting rawdata: img_dir=%r, injest_type=%r' % (img_dir, ingest_type))
    # Get images in the image directory

    unzipped_file_base_dir = join(ibs.get_dbdir(), 'unzipped_files')

    def extract_zipfile_images(ibs, ingestable):
        import utool as ut  # NOQA
        zipfile_list = ut.glob(ingestable.img_dir, '*.zip', recursive=True)
        if len(zipfile_list) > 0:
            print('Found zipfile_list = %r' % (zipfile_list,))
            ut.ensuredir(unzipped_file_base_dir)
            for zipfile in zipfile_list:
                unziped_file_relpath = dirname(relpath(relpath(realpath(zipfile), realpath(ingestable.img_dir))))
                unzipped_file_dir = join(unzipped_file_base_dir, unziped_file_relpath)
                ut.ensuredir(unzipped_file_dir)
                ut.unzip_file(zipfile, output_dir=unzipped_file_dir, overwrite=False)
            gpath_list = ut.list_images(unzipped_file_dir, fullpath=True, recursive=True)
        else:
            gpath_list = []
        return gpath_list

    def list_images(img_dir):
        """ lists images that are not in an internal cache """
        import utool as ut  # NOQA
        ignore_list = ['_hsdb', '.hs_internals', '_ibeis_cache', '_ibsdb']
        gpath_list = ut.list_images(img_dir,
                                    fullpath=True,
                                    recursive=True,
                                    ignore_list=ignore_list)
        return gpath_list

    # FIXME ensure python3 works with this
    gpath_list1 = ut.ensure_unicode_strlist(list_images(img_dir))
    gpath_list2 = ut.ensure_unicode_strlist(extract_zipfile_images(ibs, ingestable))
    gpath_list = gpath_list1 + gpath_list2

    # Parse structure for image names
    if ingest_type == 'named_folders':
        name_list1 = get_name_texts_from_parent_folder(gpath_list1, img_dir,
                                                       fmtkey)
        name_list2 = get_name_texts_from_parent_folder(gpath_list2,
                                                       unzipped_file_base_dir,
                                                       fmtkey)
        name_list = name_list1 + name_list2
        pass
    elif ingest_type == 'named_images':
        name_list = get_name_texts_from_gnames(gpath_list, img_dir, fmtkey)
    elif ingest_type == 'unknown':
        name_list = [const.UNKNOWN for _ in range(len(gpath_list))]
    else:
        raise NotImplementedError('unknwon ingest_type=%r' % (ingest_type,))

    # Find names likely to be the same?
    RECTIFY_NAMES_HUERISTIC = True
    if RECTIFY_NAMES_HUERISTIC:
        names = sorted(list(set(name_list)))
        splitchars = [' ', '/']

        def multisplit(str_, splitchars):
            import utool as ut
            n = [str_]
            for char in splitchars:
                n = ut.flatten([_.split(char) for _ in n])
            return n

        groupids = [multisplit(n1, splitchars)[0] for n1 in names]
        grouped_names = ut.group_items(names, groupids)
        fixed_names = {
            newkey: key
            for key, val in grouped_names.items()
            if len(val) > 1 for newkey in val
        }
        name_list = [fixed_names.get(name, name) for name in name_list]

    # Add Images

    gpath_list = [gpath.replace('\\', '/') for gpath in gpath_list]
    gid_list_ = ibs.add_images(gpath_list)

    # <DEBUG>
    #print('added: ' + ut.indentjoin(map(str, zip(gid_list_, gpath_list))))
    unique_gids = list(set(gid_list_))
    print("[ingest] Length gid list: %d" % len(gid_list_))
    print("[ingest] Length unique gid list: %d" % len(unique_gids))
    assert len(gid_list_) == len(gpath_list)
    for gid in gid_list_:
        if gid is None:
            print('[ingest] big fat warning')
    # </DEBUG>
    gid_list = ut.filter_Nones(gid_list_)
    unique_gids, unique_names, unique_notes = resolve_name_conflicts(
        gid_list, name_list)
    # Add ANNOTATIONs with names and notes
    if ingestable.images_as_annots:
        aid_list = ibs.use_images_as_annotations(unique_gids,
                                                 adjust_percent=adjust_percent)
        ibs.set_annot_names(aid_list, unique_names)
        ibs.set_annot_notes(aid_list, unique_notes)
        if species_text is not None:
            ibs.set_annot_species(aid_list, [species_text] * len(aid_list))
    if localize:
        ibs.localize_images()

    TURTLE_HURISTIC = 'turtles' in img_dir
    if TURTLE_HURISTIC:
        """
        python -m ibeis --tf ingest_rawdata --db seaturtles  --imgdir "~/turtles/Turtles from Jill" --ingest-type=named_folders --species=turtles
        """
        aid_list = ibs.get_valid_aids()
        parent_gids = ibs.get_annot_gids(aid_list)
        annot_orig_uris = ibs.get_image_uris_original(parent_gids)
        def parse_turtle_uri(uri):
            from os.path import splitext, dirname, basename
            info = {}
            uril = uri.lower()
            def findany(text, possible):
                return any([x in text for x in possible])
            if findany(uril, ['right']) or splitext(uril)[0].endswith('rs'):
                info['view'] = 'right'
            if findany(uril, ['left']) or splitext(uril)[0].endswith('ls'):
                info['view'] = 'left'
            if findany(uril, ['carapace', 'whole', 'carpace']) or splitext(uril)[0].endswith('wb'):
                #info['view'] = 'top'
                info['view'] = 'up'
            occurrence_id = basename(dirname(uri))
            info['occurrence'] = 'occurrence' + occurrence_id
            return info
        turtle_info_list = [parse_turtle_uri(uri) for uri in annot_orig_uris]
        view_text_list = ut.take_column(turtle_info_list, 'view')
        occur_text_list = ut.take_column(turtle_info_list, 'occurrence')
        turtle_tag_list = list(zip(occur_text_list, view_text_list))

        # TODO: mark viewpoints using euler angles / quaternions
        ibs.set_image_imagesettext(parent_gids, occur_text_list)
        ibs.append_annot_case_tags(aid_list, ut.lmap(list, turtle_tag_list))

    if postingest_func is not None:
        postingest_func(ibs)
    # Print to show success
    #ibs.print_image_table()
    #ibs.print_tables()
    #ibs.print_annotation_table()
    #ibs.print_alr_table()
    #ibs.print_lblannot_table()
    #ibs.print_image_table()
    #return aid_list


[docs]def normalize_name(name):
    """ Maps unknonwn names to the standard ____ """
    if name in const.ACCEPTED_UNKNOWN_NAMES:
        name = const.INDIVIDUAL_KEY
    return name


[docs]def get_name_texts_from_parent_folder(gpath_list, img_dir, fmtkey=None):
    """
    Input: gpath_list
    Output: names based on the parent folder of each image
    """
    #from os.path import commonprefix
    relgpath_list = [relpath(gpath, img_dir) for gpath in gpath_list]
    #_prefix = commonprefix(gpath_list)
    #relgpath_list = [relpath(gpath, _prefix) for gpath in gpath_list]
    _name_list  = [dirname(relgpath) for relgpath in relgpath_list]

    if fmtkey is not None:
        #fmtkey = 'African Wild Dog: {name}'
        import parse
        parse_results = [parse.parse(fmtkey, name) for name in _name_list]
        _name_list = [res['name'] if res is not None else name
                      for name, res in zip(_name_list, parse_results)]

    name_list = list(map(normalize_name, _name_list))
    return name_list


[docs]class FMT_KEYS(object):
    name_fmt = '{name:*}[id:d].{ext}'
    snails_fmt  = '{name:*dd}{id:dd}.{ext}'
    giraffe1_fmt = '{name:*}_{id:d}.{ext}'
    seal2_fmt = '{name:Phsd*}{id:[A-Z]}.{ext}'
    elephant_fmt = '{prefix?}{name}_{view}_{id?}.{ext}'


[docs]def get_name_texts_from_gnames(gpath_list, img_dir, fmtkey='{name:*}[aid:d].{ext}'):
    """
    Args:
        gpath_list (list): list of image paths
        img_dir (str): path to image directory
        fmtkey (str): pattern string to parse names from (default = '{name:*}[aid:d].{ext}')

    Returns:
        list: name_list - based on the parent folder of each image

    CommandLine:
        python -m ibeis.dbio.ingest_database --test-get_name_texts_from_gnames

    Example:
        >>> # DISABLE_DOCTEST
        >>> from ibeis.dbio.ingest_database import *  # NOQA
        >>> gpath_list = ['e_f0273_f.jpg', 'f0001_f.jpg', 'f0259_l_3.jpg', 'f0259_f_1.jpg',  'f0259_f (1).jpg', 'f0058_u16_f.jpg']
        >>> img_dir = ''
        >>> fmtkey = FMT_KEYS.elephant_fmt
        >>> result = get_name_texts_from_gnames(gpath_list, img_dir, fmtkey)
        >>> print(result)
    """
    # These define regexes that attempt to parse the insane and contradicting
    # naming schemes of the image sets that we get.
    INGEST_FORMATS = {
        FMT_KEYS.name_fmt: ut.named_field_regex([
            ('name', r'[a-zA-Z]+'),  # all alpha characters
            ('id',   r'\d*'),        # first numbers (if existant)
            ( None,  r'\.'),
            ('ext',  r'\w+'),
        ]),

        FMT_KEYS.snails_fmt: ut.named_field_regex([
            ('name', r'[a-zA-Z]+\d\d'),  # species and 2 numbers
            ('id',   r'\d\d'),  # 2 more numbers
            ( None,  r'\.'),
            ('ext',  r'\w+'),
        ]),

        FMT_KEYS.giraffe1_fmt: ut.named_field_regex([
            ('name',  r'G\d+'),
            ('under', r'_'),
            ('id',    r'\d+'),
            ( None,   r'\.'),
            ('ext',   r'\w+'),
        ]),

        FMT_KEYS.seal2_fmt: ut.named_field_regex([
            ('name',  r'Phs\d+'),  # Phs and then numbers
            ('id',    r'[A-Z]+'),  # 1 or more letters
            ( None,   r'\.'),
            ('ext',   r'\w+'),
        ]),

        # this one defines multiple possible regex types. yay standards
        FMT_KEYS.elephant_fmt: [
            ut.named_field_regex([
                ('prefix',  r'(e_)?'),
                ('name', r'[a-zA-Z0-9]+'),
                ('view', r'_[rflo]'),
                ('id',    r'([ _][^.]+)?'),
                ( None,   r'\.'),
                ('ext',   r'\w+'),
            ]),
            ut.named_field_regex([
                ('prefix',  r'(e_)?'),
                ('name', r'[a-zA-Z0-9]+'),
                ('id',    r'([ _][^.]+)?'),
                ('view', r'_[rflo]'),
                ( None,   r'\.'),
                ('ext',   r'\w+'),
            ])],
    }
    regex_list = INGEST_FORMATS.get(fmtkey, fmtkey)
    gname_list = ut.fpaths_to_fnames(gpath_list)
    def parse_format(regex_list, gname):
        if not isinstance(regex_list, list):
            regex_list = [regex_list]
        for regex in regex_list:
            result = ut.regex_parse(regex, gname)
            if result is not None:
                return result
        return None

    parsed_list = [parse_format(regex_list, gname) for gname in gname_list]

    anyfailed = False
    for gpath, parsed in zip(gpath_list, parsed_list):
        if parsed is None:
            print('FAILED TO PARSE: %r' % gpath)
            anyfailed = True
    if anyfailed:
        msg = ('FAILED REGEX: %r' % regex_list)
        raise Exception(msg)

    _name_list = [parsed['name'] for parsed in parsed_list]
    name_list = list(map(normalize_name, _name_list))
    return name_list


[docs]def resolve_name_conflicts(gid_list, name_list):
    """ """
    # Build conflict map (values are lists of members)
    conflict_gid_to_names = ut.build_conflict_dict(gid_list, name_list)

    # Check to see which gid has more than one name
    unique_gids = ut.unique_ordered(gid_list)
    unique_names = []
    unique_notes = []

    for gid in unique_gids:
        names = ut.unique_ordered(conflict_gid_to_names[gid])
        unique_name = names[0]
        unique_note = ''
        if len(names) > 1:
            if '____' in names:
                names.remove('____')
            if len(names) == 1:
                unique_name = names[0]
            else:
                unique_name = names[0]
                unique_note = 'aliases([' + ', '.join(map(repr, names[1:])) + '])'
        unique_names.append(unique_name)
        unique_notes.append(unique_note)

    return unique_gids, unique_names, unique_notes


#
#
### <STANDARD DATABASES> ###

STANDARD_INGEST_FUNCS = {}


def __standard(dbname):
    """  Decorates a function as a standard ingestable database """
    def __registerdb(func):
        STANDARD_INGEST_FUNCS[dbname] = func
        return func
    return __registerdb


@__standard('testdb1')
[docs]def ingest_testdb1(dbname):
    """
    ingest_testdb1

    Example:
        >>> # DISABLE_DOCTEST
        >>> from ibeis.dbio.ingest_database import *  # NOQA
        >>> import utool as ut
        >>> from vtool.tests import grabdata
        >>> import ibeis
        >>> grabdata.ensure_testdata()
        >>> # DELETE TESTDB1
        >>> TESTDB1 = ut.unixjoin(ibeis.sysres.get_workdir(), 'testdb1')
        >>> ut.delete(TESTDB1, ignore_errors=False)
        >>> result = ingest_testdb1(dbname)
    """
    from vtool.tests import grabdata   # TODO: remove and use utool appdir
    def postingest_tesdb1_func(ibs):
        import numpy as np
        from ibeis import constants as const
        print('postingest_tesdb1_func')
        # Adjust data as we see fit
        gid_list = np.array(ibs.get_valid_gids())
        # Set image unixtimes
        unixtimes_even = (gid_list[0::2] + 100).tolist()
        unixtimes_odd  = (gid_list[1::2] + 9001).tolist()
        unixtime_list = unixtimes_even + unixtimes_odd
        ibs.set_image_unixtime(gid_list, unixtime_list)
        # Unname first aid in every name
        aid_list = ibs.get_valid_aids()
        nid_list = ibs.get_annot_name_rowids(aid_list)
        nid_list = [ (nid if nid > 0 else None) for nid in nid_list]
        unique_flag = ut.flag_unique_items(nid_list)
        unique_nids = ut.compress(nid_list, unique_flag)
        none_nids = [ nid is not None for nid in nid_list]
        flagged_nids = [nid for nid in unique_nids if nid_list.count(nid) > 1]
        plural_flag = [nid in flagged_nids for nid in nid_list]
        flag_list = list(map(all, zip(plural_flag, unique_flag, none_nids)))
        flagged_aids = ut.compress(aid_list, flag_list)
        if ut.VERYVERBOSE:
            def print2(*args):
                print('[post_testdb1] ' + ', '.join(args))
            print2('aid_list=%r' % aid_list)
            print2('nid_list=%r' % nid_list)
            print2('unique_flag=%r' % unique_flag)
            print2('plural_flag=%r' % plural_flag)
            print2('unique_nids=%r' % unique_nids)
            print2('none_nids=%r' % none_nids)
            print2('flag_list=%r' % flag_list)
            print2('flagged_nids=%r' % flagged_nids)
            print2('flagged_aids=%r' % flagged_aids)
            # print2('new_nids=%r' % new_nids)
        # Unname, some annotations for testing
        unname_aids = ut.compress(aid_list, flag_list)
        ibs.delete_annot_nids(unname_aids)
        # Add all annotations with names as exemplars
        #from ibeis.control.IBEISControl import IBEISController
        #assert isinstance(ibs, IBEISController)
        unflagged_aids = ut.get_dirty_items(aid_list, flag_list)
        exemplar_flags = [True] * len(unflagged_aids)
        ibs.set_annot_exemplar_flags(unflagged_aids, exemplar_flags)
        # Set some test species labels
        species_text_list = ibs.get_annot_species_texts(aid_list)
        for ix in range(0, 6):
            species_text_list[ix] = const.TEST_SPECIES.ZEB_PLAIN
        # These are actually plains zebras.
        for ix in range(8, 10):
            species_text_list[ix] = const.TEST_SPECIES.ZEB_GREVY
        for ix in range(10, 12):
            species_text_list[ix] = const.TEST_SPECIES.BEAR_POLAR

        ibs.set_annot_species(aid_list, species_text_list)
        ibs.set_annot_notes(aid_list[8:10], ['this is actually a plains zebra'] * 2)
        ibs.set_annot_notes(aid_list[0:1], ['aid 1 and 2 are correct matches'])
        ibs.set_annot_notes(aid_list[6:7], ['very simple image to debug feature detector'])
        ibs.set_annot_notes(aid_list[7:8], ['standard test image'])

        # Set some randomish gps flags that are within nnp
        unixtime_list = ibs.get_image_unixtime(gid_list)
        valid_lat_min = -1.4446
        valid_lat_max = -1.3271
        valid_lon_min = 36.7619
        valid_lon_max = 36.9622
        valid_lat_range = valid_lat_max - valid_lat_min
        valid_lon_range = valid_lon_max - valid_lon_min
        randstate = np.random.RandomState(unixtime_list)
        new_gps_list = randstate.rand(len(gid_list), 2)
        new_gps_list[:, 0] = (new_gps_list[:, 0] * valid_lat_range) + valid_lat_min
        new_gps_list[:, 1] = (new_gps_list[:, 1] * valid_lon_range) + valid_lon_min
        new_gps_list[8, :] = [-1, -1]
        ibs.set_image_gps(gid_list, new_gps_list)

        # TODO: add a nan timestamp
        ibs.append_annot_case_tags([2], ['error:bbox'])
        ibs.append_annot_case_tags([4], ['quality:washedout'])
        ibs.append_annot_case_tags([4], ['lighting'])

        aidgroups = ibs.group_annots_by_name(
            ibs.filter_annots_general(min_pername=2, verbose=True))[0]
        aid1_list = ut.take_column(aidgroups, 0)
        aid2_list = ut.take_column(aidgroups, 1)
        annotmatch_rowids = ibs.add_annotmatch(aid1_list, aid2_list)

        ibs.set_annotmatch_truth(annotmatch_rowids, [True] * len(annotmatch_rowids))
        ibs.set_annotmatch_truth(annotmatch_rowids, [True] * len(annotmatch_rowids))
        ibs.set_annotmatch_prop('photobomb', annotmatch_rowids, [True] * len(annotmatch_rowids))

        for aids in aidgroups:
            pass

        return None
    return Ingestable(dbname, ingest_type='named_images',
                      fmtkey=FMT_KEYS.name_fmt,
                      img_dir=grabdata.get_testdata_dir(),
                      adjust_percent=0.00,
                      postingest_func=postingest_tesdb1_func)


@__standard('humpbacks')
[docs]def ingest_humpbacks(dbname):
    # The original humpbacks data is ROI cropped images in the
    # named folder format
    return Ingestable(dbname, ingest_type='named_folders',
                      adjust_percent=0.00,
                      species='whale_humpback',
                      # this zipfile is only on Zach's machine
                      fmtkey='name')


@__standard('polar_bears')
[docs]def ingest_polar_bears(dbname):
    return Ingestable(dbname, ingest_type='named_folders',
                      adjust_percent=0.00,
                      fmtkey='name')


@__standard('wd_peter_blinston')
[docs]def ingest_wilddog_peter(dbname):
    """
    CommandLine:
        python -m ibeis.dbio.ingest_database --exec-injest_main --db wd_peter_blinston
    """
    return Ingestable(dbname, ingest_type='unknown',
                      img_dir='/raid/raw_rsync/african-dogs',
                      adjust_percent=0.01,
                      species=const.Species.WILDDOG)


@__standard('lynx')
[docs]def ingest_lynx(dbname):
    """
    CommandLine:
        python -m ibeis.dbio.ingest_database --exec-injest_main --db lynx
    """
    return Ingestable(dbname, ingest_type='named_folders',
                      img_dir='/raid/raw_rsync/iberian-lynx/CARPETAS CATALOGO INDIVIDUOS/',
                      adjust_percent=0.01,
                      species='lynx',
                      fmtkey='name')


@__standard('WS_ALL')
[docs]def ingest_whale_sharks(dbname):
    """
    CommandLine:
        python -m ibeis.dbio.ingest_database --exec-injest_main --db WS_ALL
    """
    return Ingestable(dbname, ingest_type='named_folders',
                      img_dir='named-left-sharkimages',
                      adjust_percent=0.01,
                      species='whale_shark',
                      fmtkey='name')


@__standard('snails_drop1')
[docs]def ingest_snails_drop1(dbname):
    return Ingestable(dbname,
                      ingest_type='named_images',
                      fmtkey=FMT_KEYS.snails_fmt,
                      species='snail',
                      #img_dir='/raid/raw/snails_drop1_59MB',
                      adjust_percent=.20)


@__standard('seals_drop2')
[docs]def ingest_seals_drop2(dbname):
    return Ingestable(dbname,
                      zipfile='../raw/hiby_Phs_photos.zip',
                      ingest_type='named_images',
                      fmtkey=FMT_KEYS.seal2_fmt,
                      #img_dir='/raid/raw/snails_drop1_59MB',
                      adjust_percent=.20,
                      species='seal_saimma_ringed'
                      )


@__standard('JAG_Kieryn')
[docs]def ingest_JAG_Kieryn(dbname):
    return Ingestable(dbname,
                      ingest_type='unknown',
                      species='jaguar',
                      adjust_percent=0.00)


@__standard('Giraffes')
[docs]def ingest_Giraffes1(dbname):
    return Ingestable(dbname,
                      ingest_type='named_images',
                      fmtkey=FMT_KEYS.giraffe1_fmt,
                      species='giraffe_reticulated',
                      adjust_percent=0.00)


@__standard('Elephants_drop1')
[docs]def ingest_Elephants_drop1(dbname):
    return Ingestable(dbname,
                      zipfile='../raw_unprocessed/ID photo front_Elephants_4-29-2015-PeterGranli.zip',  # NOQA
                      ingest_type='named_images',
                      fmtkey=FMT_KEYS.elephant_fmt,
                      species='elephant_savanna',
                      adjust_percent=0.00)


[docs]def get_standard_ingestable(dbname):
    if dbname in STANDARD_INGEST_FUNCS:
        return STANDARD_INGEST_FUNCS[dbname](dbname)
    else:
        raise AssertionError('Unknown dbname=%r' % (dbname,))


[docs]def ingest_standard_database(dbname, force_delete=False):
    """
    ingest_standard_database

    Args:
        dbname (str): database name
        force_delete (bool):

    Example:
        >>> from ibeis.dbio.ingest_database import *  # NOQA
        >>> dbname = 'testdb1'
        >>> force_delete = False
        >>> result = ingest_standard_database(dbname, force_delete)
        >>> print(result)
    """
    from ibeis.control import IBEISControl
    print('[ingest] Ingest Standard Database: dbname=%r' % (dbname,))
    ingestable = get_standard_ingestable(dbname)
    dbdir = ibeis.sysres.db_to_dbdir(ingestable.dbname, allow_newdir=True, use_sync=False)
    ut.ensuredir(dbdir, verbose=True)
    if force_delete:
        ibsfuncs.delete_ibeis_database(dbdir)
    ibs = IBEISControl.request_IBEISController(dbdir)
    ingest_rawdata(ibs, ingestable)

### </STANDARD DATABASES> ###
#
#


[docs]def ingest_oxford_style_db(dbdir, dryrun=False):
    """
    Ingest either oxford or paris

    Args:
        dbdir (str):

    CommandLine:
        python -m ibeis.dbio.ingest_database --exec-ingest_oxford_style_db --show

    Example:
        >>> # DISABLE_DOCTEST
        >>> from ibeis.dbio.ingest_database import *  # NOQA
        >>> dbdir = '/raid/work/Oxford'
        >>> dryrun = True
        >>> ingest_oxford_style_db(dbdir)
        >>> ut.quit_if_noshow()
        >>> import plottool as pt
        >>> ut.show_if_requested()

    Ignore:
        >>> from ibeis.dbio.ingest_database import *  # NOQA
        >>> import ibeis
        >>> dbdir = '/raid/work/Oxford'
        >>> dbdir = '/raid/work/Paris'
        >>>
        #>>> ibeis.dbio.convert_db.ingest_oxford_style_db(dbdir)
    """
    print('Loading Oxford Style Images from: ' + dbdir)

    def _parse_oxsty_gtfname(gt_fname):
        """ parse gtfname for: (gt_name, quality_lbl, num) """
        # num is an id, not a number of annots
        gt_format = '{}_{:d}_{:D}.txt'
        name, num, quality = parse.parse(gt_format, gt_fname)
        return (name, num, quality)

    def _read_oxsty_gtfile(gt_fpath, name, quality, img_dpath, ignore_list):
        oxsty_annot_info_list = []
        # read the individual ground truth file
        with open(gt_fpath, 'r') as file:
            line_list = file.read().splitlines()
            for line in line_list:
                if line == '':
                    continue
                fields = line.split(' ')
                gname = fields[0].replace('oxc1_', '') + '.jpg'
                # >:( Because PARIS just cant keep paths consistent
                if gname.find('paris_') >= 0:
                    paris_hack = gname[6:gname.rfind('_')]
                    gname = join(paris_hack, gname)
                if gname in ignore_list:
                    continue
                if len(fields) > 1:  # if has bbox
                    bbox =  [int(round(float(x))) for x in fields[1:]]
                else:
                    # Get annotation width / height
                    gpath = join(img_dpath, gname)
                    h, w, c = vt.imread(gpath, orient='auto').shape
                    bbox = [0, 0, w, h]
                oxsty_annot_info = (gname, bbox)
                oxsty_annot_info_list.append(oxsty_annot_info)
        return oxsty_annot_info_list

    gt_dpath = ut.existing_subpath(dbdir,
                                      ['oxford_style_gt',
                                       'gt_files_170407',
                                       'oxford_groundtruth'])

    img_dpath = ut.existing_subpath(dbdir,
                                       ['oxbuild_images',
                                        'images'])

    corrupted_file_fpath = join(gt_dpath, 'corrupted_files.txt')
    ignore_list = []
    # Check for corrupted files (Looking at your Paris Buildings Dataset)
    if ut.checkpath(corrupted_file_fpath):
        ignore_list = ut.read_from(corrupted_file_fpath).splitlines()

    gname_list = ut.list_images(img_dpath, ignore_list=ignore_list,
                                   recursive=True, full=False)

    # just in case utool broke
    for ignore in ignore_list:
        assert ignore not in gname_list

    # Read the Oxford Style Groundtruth files
    print('Loading Oxford Style Names and Annots')
    gt_fname_list = os.listdir(gt_dpath)
    num_gt_files = len(gt_fname_list)
    query_annots  = []
    gname2_annots_raw = ut.ddict(list)
    name_set = set([])
    print(' * num_gt_files = %d ' % num_gt_files)
    #
    # Iterate over each groundtruth file
    for gtx, gt_fname in enumerate(ut.ProgIter(gt_fname_list,
                                               'parsed oxsty gtfile: ')):
        if gt_fname == 'corrupted_files.txt':
            continue
        #Get name, quality, and num from fname
        (name, num, quality) = _parse_oxsty_gtfname(gt_fname)
        gt_fpath = join(gt_dpath, gt_fname)
        name_set.add(name)
        oxsty_annot_info_sublist = _read_oxsty_gtfile(
            gt_fpath, name, quality, img_dpath, ignore_list)
        if quality == 'query':
            for (gname, bbox) in oxsty_annot_info_sublist:
                query_annots.append((gname, bbox, name, num))
        else:
            for (gname, bbox) in oxsty_annot_info_sublist:
                gname2_annots_raw[gname].append((name, bbox, quality))
    print(' * num_query images = %d ' % len(query_annots))
    #
    # Remove duplicates img.jpg : (*1.txt, *2.txt, ...) -> (*.txt)
    gname2_annots     = ut.ddict(list)
    multinamed_gname_list = []
    for gname, val in gname2_annots_raw.iteritems():
        val_repr = list(map(repr, val))
        unique_reprs = set(val_repr)
        unique_indexes = [val_repr.index(urep) for urep in unique_reprs]
        for ux in unique_indexes:
            gname2_annots[gname].append(val[ux])
        if len(gname2_annots[gname]) > 1:
            multinamed_gname_list.append(gname)
    # print some statistics
    query_gname_list = [tup[0] for tup in query_annots]
    gname_with_groundtruth_list = gname2_annots.keys()
    gname_with_groundtruth_set = set(gname_with_groundtruth_list)
    gname_set = set(gname_list)
    query_gname_set = set(query_gname_list)
    gname_without_groundtruth_list = list(gname_set - gname_with_groundtruth_set)
    print(' * num_images = %d ' % len(gname_list))
    print(' * images with groundtruth    = %d ' % len(gname_with_groundtruth_list))
    print(' * images without groundtruth = %d ' % len(gname_without_groundtruth_list))
    print(' * images with multi-groundtruth = %d ' % len(multinamed_gname_list))
    #make sure all queries have ground truth and there are no duplicate queries
    #
    assert len(query_gname_list) == len(query_gname_set.intersection(gname_with_groundtruth_list))
    assert len(query_gname_list) == len(set(query_gname_list))
    #=======================================================
    # Build IBEIS database

    if not dryrun:
        ibs = ibeis.opendb(dbdir, allow_newdir=True)
        ibs.cfg.other_cfg.auto_localize = False
        print('adding to table: ')
        # Add images to ibeis
        gpath_list = [join(img_dpath, gname).replace('\\', '/') for gname in gname_list]
        gid_list = ibs.add_images(gpath_list)

        # 1) Add Query Annotations
        qgname_list, qbbox_list, qname_list, qid_list = zip(*query_annots)
        # get image ids of queries
        qgid_list = [gid_list[gname_list.index(gname)] for gname in qgname_list]
        qnote_list = ['query'] * len(qgid_list)
        # 2) Add nonquery database annots
        dgname_list = list(gname2_annots.keys())  # NOQA
        dgid_list = []
        dname_list = []
        dbbox_list = []
        dnote_list = []
        for gname in gname2_annots.keys():
            gid = gid_list[gname_list.index(gname)]
            annots = gname2_annots[gname]
            for name, bbox, quality in annots:
                dgid_list.append(gid)
                dbbox_list.append(bbox)
                dname_list.append(name)
                dnote_list.append(quality)
        # 3) Add distractors: TODO: 100k
        ugid_list = [gid_list[gname_list.index(gname)]
                     for gname in gname_without_groundtruth_list]
        ubbox_list = [[0, 0, w, h] for (w, h) in ibs.get_image_sizes(ugid_list)]
        unote_list = ['distractor'] * len(ugid_list)

        # TODO Annotation consistency in terms of duplicate bounding boxes
        qaid_list = ibs.add_annots(qgid_list, bbox_list=qbbox_list,
                                   name_list=qname_list, notes_list=qnote_list)
        daid_list = ibs.add_annots(dgid_list, bbox_list=dbbox_list,
                                   name_list=dname_list, notes_list=dnote_list)
        uaid_list = ibs.add_annots(ugid_list, bbox_list=ubbox_list, notes_list=unote_list)
        print('Added %d query annototations' % len(qaid_list))
        print('Added %d database annototations' % len(daid_list))
        print('Added %d distractor annototations' % len(uaid_list))

    update = False
    if update:
        # TODO: integrate this into normal ingest pipeline
        'Oxford'
        ibs = ibeis.opendb(dbdir)
        aid_list = ibs.get_valid_aids()
        notes_list = ibs.get_annot_notes(aid_list)
        _dict = {
            'ok': ibs.const.QUAL_OK,
            'good': ibs.const.QUAL_GOOD,
            'junk': ibs.const.QUAL_JUNK,
            #'distractor': ibs.const.QUAL_JUNK
        }
        qual_text_list = [_dict.get(note, ibs.const.QUAL_UNKNOWN) for note in notes_list]
        ibs.set_annot_quality_texts(aid_list, qual_text_list)
        ibs._overwrite_all_annot_species_to('building')

        tags_list = [[note] if note in ['query', 'distractor'] else [] for note in notes_list]
        from ibeis import tag_funcs
        tag_funcs.append_annot_case_tags(ibs, aid_list, tags_list)
        #ibs._set
        # tags_ = ibs.get_annot_case_tags(aid_list)
        # pass
        """
        python -m ibeis --tf filter_annots_general --db Oxford --has_any=[query]
        """


[docs]def ingest_serengeti_mamal_cameratrap(species):
    """
    Downloads data from Serengeti dryad server

    References:
        http://datadryad.org/resource/doi:10.5061/dryad.5pt92
        Swanson AB, Kosmala M, Lintott CJ, Simpson RJ, Smith A, Packer C (2015)
        Snapshot Serengeti, high-frequency annotated camera trap images of 40
        mammalian species in an African savanna. Scientific Data 2: 150026.
        http://dx.doi.org/10.1038/sdata.2015.26
        Swanson AB, Kosmala M, Lintott CJ, Simpson RJ, Smith A, Packer C (2015)
        Data from: Snapshot Serengeti, high-frequency annotated camera trap
        images of 40 mammalian species in an African savanna. Dryad Digital
        Repository. http://dx.doi.org/10.5061/dryad.5pt92

    Args:
        species (?):

    CommandLine:
        python -m ibeis.dbio.ingest_database --test-ingest_serengeti_mamal_cameratrap --species zebra_plains
        python -m ibeis.dbio.ingest_database --test-ingest_serengeti_mamal_cameratrap --species cheetah

    Example:
        >>> # SCRIPT
        >>> from ibeis.dbio.ingest_database import *  # NOQA
        >>> import ibeis
        >>> species = ut.get_argval('--species', type_=str, default=ibeis.const.TEST_SPECIES.ZEB_PLAIN)
        >>> # species = ut.get_argval('--species', type_=str, default='cheetah')
        >>> result = ingest_serengeti_mamal_cameratrap(species)
        >>> print(result)
    """
    'https://snapshotserengeti.s3.msi.umn.edu/'
    import ibeis

    if species is None:
        code = 'ALL'
    elif species == 'zebra_plains':
        code = 'PZ'
    elif species == 'cheetah':
        code = 'CHTH'
    else:
        raise NotImplementedError()

    if species == 'zebra_plains':
        serengeti_sepcies = 'zebra'
    else:
        serengeti_sepcies = species

    print('species = %r' % (species,))
    print('serengeti_sepcies = %r' % (serengeti_sepcies,))

    dbname = code + '_Serengeti'
    print('dbname = %r' % (dbname,))
    dbdir = ut.ensuredir(join(ibeis.sysres.get_workdir(), dbname))
    print('dbdir = %r' % (dbdir,))
    image_dir = ut.ensuredir(join(dbdir, 'images'))

    base_url = 'http://datadryad.org/bitstream/handle/10255'
    all_images_url         = base_url + '/dryad.86392/all_images.csv'
    consensus_metadata_url = base_url + '/dryad.86348/consensus_data.csv'
    search_effort_url      = base_url + '/dryad.86347/search_effort.csv'
    gold_standard_url      = base_url + '/dryad.76010/gold_standard_data.csv'

    all_images_fpath         = ut.grab_file_url(all_images_url, download_dir=dbdir)
    consensus_metadata_fpath = ut.grab_file_url(consensus_metadata_url, download_dir=dbdir)
    search_effort_fpath      = ut.grab_file_url(search_effort_url, download_dir=dbdir)
    gold_standard_fpath      = ut.grab_file_url(gold_standard_url, download_dir=dbdir)

    print('all_images_fpath         = %r' % (all_images_fpath,))
    print('consensus_metadata_fpath = %r' % (consensus_metadata_fpath,))
    print('search_effort_fpath      = %r' % (search_effort_fpath,))
    print('gold_standard_fpath      = %r' % (gold_standard_fpath,))

    def read_csv(csv_fpath):
        import utool as ut
        csv_text = ut.read_from(csv_fpath)
        csv_lines = csv_text.split('\n')
        print(ut.list_str(csv_lines[0:2]))
        csv_data = [[field.strip('"').strip('\r') for field in line.split(',')]
                    for line in csv_lines if len(line) > 0]
        csv_header = csv_data[0]
        csv_data = csv_data[1:]
        return csv_data, csv_header

    def download_image_urls(image_url_info_list):
        # Find ones that we already have
        print('Requested %d downloaded images' % (len(image_url_info_list)))
        full_gpath_list = [join(image_dir, basename(gpath)) for gpath in image_url_info_list]
        exists_list = [ut.checkpath(gpath) for gpath in full_gpath_list]
        image_url_info_list_ = ut.compress(image_url_info_list, ut.not_list(exists_list))
        print('Already have %d/%d downloaded images' % (
            len(image_url_info_list) - len(image_url_info_list_), len(image_url_info_list)))
        print('Need to download %d images' % (len(image_url_info_list_)))
        #import sys
        #sys.exit(0)
        # Download the rest
        imgurl_prefix = 'https://snapshotserengeti.s3.msi.umn.edu/'
        image_url_list = [imgurl_prefix + suffix for suffix in image_url_info_list_]
        for img_url in ut.ProgressIter(image_url_list, lbl='Downloading image'):
            ut.grab_file_url(img_url, download_dir=image_dir)
        return full_gpath_list

    # Data contains information about which events have which animals
    if False:
        species_class_csv_data, species_class_header = read_csv(gold_standard_fpath)
        species_class_eventid_list    = ut.get_list_column(species_class_csv_data, 0)
        #gold_num_species_annots_list = ut.get_list_column(gold_standard_csv_data, 2)
        species_class_species_list    = ut.get_list_column(species_class_csv_data, 2)
        #gold_count_list              = ut.get_list_column(gold_standard_csv_data, 3)
    else:
        species_class_csv_data, species_class_header = read_csv(consensus_metadata_fpath)
        species_class_eventid_list    = ut.get_list_column(species_class_csv_data, 0)
        species_class_species_list    = ut.get_list_column(species_class_csv_data, 7)

    # Find the zebra events
    serengeti_sepcies_set = sorted(list(set(species_class_species_list)))
    print('serengeti_sepcies_hist = %s' %
          ut.dict_str(ut.dict_hist(species_class_species_list), key_order_metric='val'))
    #print('serengeti_sepcies_set = %s' % (ut.list_str(serengeti_sepcies_set),))

    assert serengeti_sepcies in serengeti_sepcies_set, 'not a known  seregeti species'
    species_class_chosen_idx_list = ut.list_where(
        [serengeti_sepcies == species_ for species_ in species_class_species_list])
    chosen_eventid_list = ut.take(species_class_eventid_list, species_class_chosen_idx_list)

    print('Number of chosen species:')
    print(' * len(species_class_chosen_idx_list) = %r' % (len(species_class_chosen_idx_list),))
    print(' * len(chosen_eventid_list) = %r' % (len(chosen_eventid_list),))

    # Read info about which events have which images
    images_csv_data, image_csv_header = read_csv(all_images_fpath)
    capture_event_id_list = ut.get_list_column(images_csv_data, 0)
    image_url_info_list = ut.get_list_column(images_csv_data, 1)
    # Group photos by eventid
    eventid_to_photos = ut.group_items(image_url_info_list, capture_event_id_list)

    # Filter to only chosens
    unflat_chosen_url_infos = ut.dict_take(eventid_to_photos, chosen_eventid_list)
    chosen_url_infos = ut.flatten(unflat_chosen_url_infos)
    image_url_info_list = chosen_url_infos
    chosen_path_list = download_image_urls(chosen_url_infos)

    ibs = ibeis.opendb(dbdir=dbdir, allow_newdir=True)
    gid_list_ = ibs.add_images(chosen_path_list, auto_localize=False)  # NOQA

    #if False:
    #    # remove non-zebra photos
    #    from os.path import basename
    #    base_gname_list = list(map(basename, zebra_url_infos))
    #    all_gname_list = ut.list_images(image_dir)
    #    nonzebra_gname_list = ut.setdiff_ordered(all_gname_list, base_gname_list)
    #    nonzebra_gpath_list = ut.fnames_to_fpaths(nonzebra_gname_list, image_dir)
    #    ut.remove_fpaths(nonzebra_gpath_list)
    return ibs


[docs]def injest_main():
    r"""
    CommandLine:
        python -m ibeis.dbio.ingest_database --test-injest_main
        python -m ibeis.dbio.ingest_database --test-injest_main --db snow-leopards

    Example:
        >>> # DISABLE_DOCTEST
        >>> from ibeis.dbio.ingest_database import *  # NOQA
        >>> injest_main()
    """
    print('__main__ = ingest_database.py')
    print(ut.unindent(
        '''
        usage:
        python ibeis/ingest/ingest_database.py --db [dbname]

        Valid dbnames:''') + ut.indentjoin(STANDARD_INGEST_FUNCS.keys(), '\n  * '))
    dbname = ut.get_argval('--db', str, None)
    force_delete = ut.get_argflag(('--force_delete', '--force-delete'))
    ibs = ingest_standard_database(dbname, force_delete)  # NOQA
    print('finished db injest')
    #img_dir = join(ibeis.sysres.get_workdir(), 'polar_bears')
    #main_locals = ibeis.main(dbdir=img_dir, gui=False)
    #ibs = main_locals['ibs']
    #ingest_rawdata(ibs, img_dir)


if __name__ == '__main__':
    """
    CommandLine:
        python ibeis/dbio/ingest_database.py --db testdb1 --serial --verbose --very-verbose
        python ibeis/dbio/ingest_database.py --db testdb1 --serial --verbose --very-verbose --super-strict --superstrict  # NOQA


        python ibeis/dbio/ingest_database.py --db JAG_Kieryn --force-delete
        python ibeis/dbio/ingest_database.py --db polar_bears --force_delete
        python ibeis/dbio/ingest_database.py --db snails_drop1
        python ibeis/dbio/ingest_database.py --db testdb1
        python -m ibeis.dbio.ingest_database --test-injest_main --db Elephants_drop1

    """
    ut.inject_colored_exceptions()
    if ut.doctest_was_requested():
        ut.doctest_funcs()
    else:
        injest_main()
    import multiprocessing
    multiprocessing.freeze_support()  # win32