Source code for autogluon.vision.detector.detector

"""Object Detection task"""
import copy
import pickle
import logging
import warnings
import os

import pandas as pd
import numpy as np
from autogluon.core.utils import verbosity2loglevel, get_gpu_count
from autogluon.core.utils import set_logger_verbosity
from gluoncv.auto.tasks import ObjectDetection as _ObjectDetection
from ..configs.presets_configs import unpack, _check_gpu_memory_presets
from ..utils import MXNetErrorCatcher

__all__ = ['ObjectDetector']

logger = logging.getLogger()  # return root logger


[docs]class ObjectDetector(object):
    """AutoGluon Predictor for detecting objects in images

    Parameters
    ----------
    path : str, default = None
        The directory for saving logs or intermediate data. If unspecified, will create a sub-directory under
        current working directory.
    verbosity : int, default = 2
        Verbosity levels range from 0 to 4 and control how much information is printed.
        Higher levels correspond to more detailed print statements (you can set verbosity = 0 to suppress warnings).
        If using logging, you can alternatively control amount of information printed via logger.setLevel(L),
        where L ranges from 0 to 50 (Note: higher values of L correspond to fewer print statements, opposite of verbosity levels)
    """
    # Dataset is a subclass of `pd.DataFrame`, with `image` and `bbox` columns.
    Dataset = _ObjectDetection.Dataset

    def __init__(self, path=None, verbosity=2):
        if path is None:
            path = os.getcwd()
        self._log_dir = path
        self._verbosity = verbosity
        self._detector = None
        self._fit_summary = {}
        os.makedirs(self._log_dir, exist_ok=True)

    @property
    def path(self):
        return self._log_dir

[docs]    @unpack('object_detector')
    def fit(self,
            train_data,
            tuning_data=None,
            time_limit='auto',
            presets=None,
            hyperparameters=None,
            **kwargs):
        """Automatic fit process for object detection.
        Tip: if you observe very slow training speed only happening at the first epoch and your overall time budget
        is not large, you may disable `CUDNN_AUTOTUNE` by setting the environment variable
        `export MXNET_CUDNN_AUTOTUNE_DEFAULT=0` before running your python script or
        insert `import os; os.environ['MXNET_CUDNN_AUTOTUNE_DEFAULT'] = '0'` before any code block.
        The tuning is beneficial in terms of training speed in the long run, but may cost your noticeble overhead at
        the begining of each trial.

        Parameters
        ----------
        train_data : pd.DataFrame or str
            Training data, can be a dataframe like image dataset.
            For more details of how to construct a object detection dataset, please checkout:
            `http://preview.d2l.ai/d8/main/object_detection/getting_started.html`.
            If a string is provided, will search for d8 datasets.
        tuning_data : pd.DataFrame or str, default = None
            Holdout tuning data for validation, reserved for model selection and hyperparameter-tuning,
            can be a dataframe like image dataset.
            If a string is provided, will search for k8 datasets.
            If `None`, the validation dataset will be randomly split from `train_data` according to `holdout_frac`.
        time_limit : int, default = 'auto'(defaults to 2 hours if no presets detected)
            Time limit in seconds, if `None`, will run until all tuning and training finished.
            If `time_limit` is hit during `fit`, the
            HPO process will interrupt and return the current best configuration.
        presets : list or str or dict, default = ['medium_quality_faster_train']
            List of preset configurations for various arguments in `fit()`. Can significantly impact predictive accuracy, memory-footprint, and inference latency of trained models,
            and various other properties of the returned `predictor`.
            It is recommended to specify presets and avoid specifying most other `fit()` arguments or model hyperparameters prior to becoming familiar with AutoGluon.
            As an example, to get the most accurate overall predictor (regardless of its efficiency), set `presets='best_quality'`.
            To get good quality with faster inference speed, set `presets='good_quality_faster_inference'`
            Any user-specified arguments in `fit()` will override the values used by presets.
            If specifying a list of presets, later presets will override earlier presets if they alter the same argument.
            For precise definitions of the provided presets, see file: `autogluon/vision/configs/presets_configs.py`.
            Users can specify custom presets by passing in a dictionary of argument values as an element to the list.
            Available Presets: ['best_quality', 'high_quality_fast_inference', 'good_quality_faster_inference', 'medium_quality_faster_train']
            It is recommended to only use one `quality` based preset in a given call to `fit()` as they alter many of the same arguments and are not compatible with each-other.

            Note that depending on your specific hardware limitation(# gpu, size of gpu memory...) your mileage may vary a lot, you may choose lower quality presets if necessary, and
            try to reduce `batch_size` if OOM("RuntimeError: CUDA error: out of memory") happens frequently during the `fit`.

            In-depth Preset Info:
                best_quality={
                    'hyperparameters': {
                        'transfer': Categorical('faster_rcnn_fpn_resnet101_v1d_coco'),
                        'lr': Real(1e-5, 1e-3, log=True),
                        'batch_size': Categorical(4, 8),
                        'epochs': 30,
                        'early_stop_patience': -1
                        },
                    'hyperparameter_tune_kwargs': {
                        'num_trials': 128,
                        'search_strategy': 'bayesopt'},
                    'time_limit': 24*3600,}
                    Best predictive accuracy with little consideration to training/inference time or model size. Achieve even better results by specifying a large time_limit value.
                    Recommended for applications that benefit from the best possible model accuracy and be prepared with the extremly long training time.

                good_quality_fast_inference={
                    'hyperparameters': {
                        'transfer': Categorical('ssd_512_resnet50_v1_coco',
                                                'yolo3_darknet53_coco',
                                                'center_net_resnet50_v1b_coco'),
                        'lr': Real(1e-4, 1e-2, log=True),
                        'batch_size': Categorical(8, 16, 32, 64),
                        'epochs': 50,
                        'early_stop_patience': 20
                        },
                    'hyperparameter_tune_kwargs': {
                        'num_trials': 512,
                        'search_strategy': 'bayesopt'},
                    'time_limit': 12*3600,}
                    Good predictive accuracy with fast inference.
                    Recommended for applications that require reasonable inference speed and/or model size.

                medium_quality_faster_train={
                    'hyperparameters': {
                        'transfer': Categorical('ssd_512_resnet50_v1_coco'),
                        'lr': 0.01,
                        'batch_size': Categorical(8, 16),
                        'epochs': 30,
                        'early_stop_patience': 5
                        },
                    'hyperparameter_tune_kwargs': {
                        'num_trials': 16,
                        'search_strategy': 'random'},
                    'time_limit': 2*3600,}

                    Medium predictive accuracy with very fast inference and very fast training time.
                    This is the default preset in AutoGluon, but should generally only be used for quick prototyping.

                medium_quality_faster_inference={
                    'hyperparameters': {
                        'transfer': Categorical('center_net_resnet18_v1b_coco', 'yolo3_mobilenet1.0_coco'),
                        'lr': Categorical(0.01, 0.005, 0.001),
                        'batch_size': Categorical(32, 64, 128),
                        'epochs': Categorical(30, 50),
                        'early_stop_patience': 10
                        },
                    'hyperparameter_tune_kwargs': {
                        'num_trials': 32,
                        'search_strategy': 'bayesopt'},
                    'time_limit': 4*3600,}

                    Medium predictive accuracy with very fast inference.
                    Comparing with `medium_quality_faster_train` it uses faster model but explores more hyperparameters.
        hyperparameters : dict, default = None
            Extra hyperparameters for specific models.
            Accepted args includes(not limited to):
            epochs : int, default value based on network
                The `epochs` for model training.
            batch_size : int
                Mini batch size
            lr : float
                Trainer learning rate for optimization process.
            early_stop_patience : int, default=10
                Number of epochs with no improvement after which train is early stopped. Use `None` to disable.
            early_stop_min_delta : float, default=1e-4
                The small delta value to ignore when evaluating the metric. A large delta helps stablize the early
                stopping strategy against tiny fluctuation, e.g. 0.5->0.49->0.48->0.499->0.500001 is still considered as
                a good timing for early stopping.
            early_stop_baseline : float, default=None
                The minimum(baseline) value to trigger early stopping. For example, with `early_stop_baseline=0.5`,
                early stopping won't be triggered if the metric is less than 0.5 even if plateau is detected.
                Use `None` to disable.
            early_stop_max_value : float, default=None
                The max value for metric, early stop training instantly once the max value is achieved. Use `None` to disable.
            You can get the list of accepted hyperparameters in `config.yaml` saved by this predictor.
        **kwargs :
            holdout_frac : float, default = 0.1
                The random split ratio for `tuning_data` if `tuning_data==None`.
            random_state : int, default = None
                The random_state(seed) for shuffling data, only used if `tuning_data==None`.
                Note that the `random_state` only affect the splitting process, not model training.
                If not specified(None), will leave the original random sampling intact.
            nthreads_per_trial : int, default = (# cpu cores)
                Number of CPU threads for each trial, if `None`, will detect the # cores on current instance.
            ngpus_per_trial : int, default = (# gpus)
                Number of GPUs to use for each trial, if `None`, will detect the # gpus on current instance.
            hyperparameter_tune_kwargs: dict, default = None
                num_trials : int, default = 1
                    The limit of HPO trials that can be performed within `time_limit`. The HPO process will be terminated
                    when `num_trials` trials have finished or wall clock `time_limit` is reached, whichever comes first.
                search_strategy : str, default = 'random'
                    Searcher strategy for HPO, 'random' by default.
                    Options include: ‘random’ (random search), ‘bayesopt’ (Gaussian process Bayesian optimization),
                    ‘grid’ (grid search).
                max_reward : float, default = None
                    The reward threashold for stopping criteria. If `max_reward` is reached during HPO, the scheduler
                    will terminate earlier to reduce time cost.
                scheduler_options : dict, default = None
                    Extra options for HPO scheduler, please refer to :class:`autogluon.core.Searcher` for details.
        """
        # init/validate kwargs
        kwargs = self._validate_kwargs(kwargs)
        # unpack
        num_trials = kwargs['hyperparameter_tune_kwargs']['num_trials']
        nthreads_per_trial = kwargs['nthreads_per_trial']
        ngpus_per_trial = kwargs['ngpus_per_trial']
        holdout_frac = kwargs['holdout_frac']
        random_state = kwargs['random_state']
        search_strategy = kwargs['hyperparameter_tune_kwargs']['search_strategy']
        max_reward = kwargs['hyperparameter_tune_kwargs']['max_reward']
        scheduler_options = kwargs['hyperparameter_tune_kwargs']['scheduler_options']

        log_level = verbosity2loglevel(self._verbosity)
        set_logger_verbosity(self._verbosity, logger=logger)
        if presets:
            if not isinstance(presets, list):
                presets = [presets]
            logger.log(20, f'Presets specified: {presets}')

        if time_limit == 'auto':
            # no presets, no user specified time_limit
            time_limit = 7200
            logger.log(20, f'`time_limit=auto` set to `time_limit={time_limit}`.')

        # data sanity check
        train_data = self._validate_data(train_data)
        if tuning_data is not None:
            tuning_data = self._validate_data(tuning_data)

        if self._detector is not None:
            self._detector._logger.setLevel(log_level)
            self._detector._logger.propagate = True
            self._fit_summary = self._detector.fit(train_data, tuning_data, 1 - holdout_frac, random_state, resume=False)
            if hasattr(self._classifier, 'fit_history'):
                self._fit_summary['fit_history'] = self._classifier.fit_history()
            return self

        # new HPO task
        if time_limit is not None and num_trials is None:
            num_trials = 99999
        if time_limit is None and num_trials is None:
            raise ValueError("`time_limit` and kwargs['hyperparameter_tune_kwargs']['num_trials'] can not be `None` at the same time, "
                             "otherwise the training will not be terminated gracefully.")
        config={'log_dir': self._log_dir,
                'num_trials': 99999 if num_trials is None else max(1, num_trials),
                'time_limits': 2147483647 if time_limit is None else max(1, time_limit),
                'search_strategy': search_strategy,
                }
        if max_reward is not None:
            config['max_reward'] = max_reward
        if nthreads_per_trial is not None:
            config['nthreads_per_trial'] = nthreads_per_trial
        if ngpus_per_trial is not None:
            config['ngpus_per_trial'] = ngpus_per_trial
        if isinstance(hyperparameters, dict):
            if 'batch_size' in hyperparameters:
                bs = hyperparameters['batch_size']
                _check_gpu_memory_presets(bs, ngpus_per_trial, 4, 1280)  # 1280MB per sample
            # check if hyperparameters overwriting existing config
            for k, v in hyperparameters.items():
                if k in config:
                    raise ValueError(f'Overwriting {k} = {config[k]} to {v} by hyperparameters is ambiguous.')
            config.update(hyperparameters)
        if scheduler_options is not None:
            config.update(scheduler_options)
        if 'early_stop_patience' not in config:
            config['early_stop_patience'] = 10
        if config['early_stop_patience'] == None:
            config['early_stop_patience'] = -1
        # TODO(zhreshold): expose the transform function(or sign function) for converting custom metrics
        if 'early_stop_baseline' not in config or config['early_stop_baseline'] == None:
            config['early_stop_baseline'] = -np.Inf
        if 'early_stop_max_value' not in config or config['early_stop_max_value'] == None:
            config['early_stop_max_value'] = np.Inf
        # verbosity
        if log_level > logging.INFO:
            logging.getLogger('gluoncv.auto.tasks.object_detection').propagate = False
            for logger_name in ('SSDEstimator', 'CenterNetEstimator', 'YOLOv3Estimator', 'FasterRCNNEstimator'):
                logging.getLogger(logger_name).setLevel(log_level)
                logging.getLogger(logger_name).propagate = False
        task = _ObjectDetection(config=config)
        task._logger.setLevel(log_level)
        task._logger.propagate = True
        with warnings.catch_warnings(record=True) as w:
            warnings.simplefilter("always")
            with MXNetErrorCatcher() as err:
                self._detector = task.fit(train_data, tuning_data, 1 - holdout_frac, random_state)
            if err.exc_value is not None:
                raise RuntimeError(err.exc_value)
        self._detector._logger.setLevel(log_level)
        self._detector._logger.propagate = True
        self._fit_summary = task.fit_summary()
        if hasattr(task, 'fit_history'):
            self._fit_summary['fit_history'] = task.fit_history()
        return self

    def _validate_data(self, data):
        """Check whether data is valid, try to convert with best effort if not"""
        if len(data) < 1:
            raise ValueError('Empty dataset.')
        if not (hasattr(data, 'classes') and hasattr(data, 'to_mxnet')):
            if isinstance(data, pd.DataFrame):
                # raw dataframe, try to add metadata automatically
                infer_classes = []
                if 'image' in data.columns:
                    # check image relative/abs path is valid
                    sample = data.iloc[0]['image']
                    if not os.path.isfile(sample):
                        raise OSError(f'Detected invalid image path `{sample}`, please ensure all image paths are absolute or you are using the right working directory.')
                if 'rois' in data.columns and 'image' in data.columns:
                    sample = data.iloc[0]['rois']
                    for sample_key in ('class', 'xmin', 'ymin', 'xmax', 'ymax'):
                        assert sample_key in sample, f'key `{sample_key}` required in `rois`'
                    class_column = data.rois.apply(lambda x: x.get('class', 'unknown'))
                    infer_classes = class_column.unique().tolist()
                    data['rois'] = data['rois'].apply(lambda x: x.update({'difficult': x.get('difficult', 0)} or x))
                    data = _ObjectDetection.Dataset(data.sort_values('image').reset_index(drop=True), classes=infer_classes)
                elif 'image' in data and 'class' in data and 'xmin' in data and 'ymin' in data and 'xmax' in data and 'ymax' in data:
                    infer_classes = data['class'].unique().tolist()
                    if 'difficult' not in data.columns:
                        data['difficult'] = 0
                    data = _ObjectDetection.Dataset(data.sort_values('image').reset_index(drop=True), classes=infer_classes)
                    data = data.pack()
                    data.classes = infer_classes
                else:
                    err_msg = 'Unable to convert raw DataFrame to ObjectDetector Dataset, ' + \
                              '`image` and `rois` columns are required.' + \
                              'You may visit `https://auto.gluon.ai/stable/tutorials/object_detection/dataset.html` ' + \
                              'for details.'
                    raise AttributeError(err_msg)
                logger.log(20, 'Converting raw DataFrame to ObjectDetector.Dataset...')
                logger.log(20, f'Detected {len(infer_classes)} unique classes: {infer_classes}')
                instruction = 'train_data = ObjectDetector.Dataset(train_data, classes=["foo", "bar"])'
                logger.log(20, f'If you feel the `classes` is inaccurate, please construct the dataset explicitly, e.g. {instruction}')
        return data

    def _validate_kwargs(self, kwargs):
        """validate and initialize default kwargs"""
        kwargs['holdout_frac'] = kwargs.get('holdout_frac', 0.1)
        if not (0 < kwargs['holdout_frac'] < 1.0):
            raise ValueError(f'Range error for `holdout_frac`, expected to be within range (0, 1), given {kwargs["holdout_frac"]}')
        kwargs['random_state'] = kwargs.get('random_state', None)
        kwargs['nthreads_per_trial'] = kwargs.get('nthreads_per_trial', None)
        kwargs['ngpus_per_trial'] = kwargs.get('ngpus_per_trial', None)
        if kwargs['ngpus_per_trial'] is not None and kwargs['ngpus_per_trial'] > 0:
            detected_gpu = get_gpu_count()
            if detected_gpu < kwargs['ngpus_per_trial']:
                raise ValueError(f"Insufficient detected # gpus {detected_gpu} vs requested {kwargs['ngpus_per_trial']}")
        # tune kwargs
        hpo_tune_args = kwargs.get('hyperparameter_tune_kwargs', {})
        hpo_tune_args['num_trials'] = hpo_tune_args.get('num_trials', 1)
        hpo_tune_args['search_strategy'] = hpo_tune_args.get('search_strategy', 'random')
        if not hpo_tune_args['search_strategy'] in ('random', 'bayesopt', 'grid'):
            raise ValueError(f"Invalid search strategy: {hpo_tune_args['search_strategy']}, supported: ('random', 'bayesopt', 'grid')")
        hpo_tune_args['max_reward'] = hpo_tune_args.get('max_reward', None)
        if hpo_tune_args['max_reward'] is not None and hpo_tune_args['max_reward'] < 0:
            raise ValueError(f"Expected `max_reward` to be a positive float number between 0 and 1.0, given {hpo_tune_args['max_reward']}")
        hpo_tune_args['scheduler_options'] = hpo_tune_args.get('scheduler_options', None)
        kwargs['hyperparameter_tune_kwargs'] = hpo_tune_args
        return kwargs

[docs]    def predict(self, data, as_pandas=True):
        """Predict objects in image, return the confidences, bounding boxes of each predicted object.

        Parameters
        ----------
        data : str, pd.DataFrame or ndarray
            The input data, can be str(filepath), pd.DataFrame with 'image' column, or raw ndarray input.
        as_pandas : bool, default = True
            Whether to return the output as a pandas object (True) or list of numpy array(s) (False).
            Pandas object is a DataFrame.

        Returns
        -------

        pd.DataFrame
            The returned dataframe will contain (`pred_score`, `pred_bbox`, `pred_id`).
            If more than one image in input, the returned dataframe will contain `images` column,
            and all results are concatenated.
        """
        if self._detector is None:
            raise RuntimeError('Detector is not initialized, try `fit` first.')
        ret = self._detector.predict(data)
        if as_pandas:
            return ret
        else:
            return ret.to_numpy()

[docs]    def evaluate(self, data):
        """Evaluate model performance on validation data.

        Parameters
        ----------
        data : pd.DataFrame or iterator
            The validation data.
        """
        if self._detector is None:
            raise RuntimeError('Detector not initialized, try `fit` first.')
        return self._detector.evaluate(data)

[docs]    def fit_summary(self):
        """Return summary of last `fit` process.

        Returns
        -------
        dict
            The summary of last `fit` process. Major keys are ('train_map', 'val_map', 'total_time',...)

        """
        return copy.copy(self._fit_summary)

[docs]    def save(self, path=None):
        """Dump predictor to disk.

        Parameters
        ----------
        path : str
            The file name of saved copy. If not specified(None), will automatically save to `self.path` directory
            with filename `object_detector.ag`

        """
        if path is None:
            path = os.path.join(self.path, 'object_detector.ag')
        with open(path, 'wb') as fid:
            pickle.dump(self, fid)

[docs]    @classmethod
    def load(cls, path, verbosity=2):
        """Load previously saved predictor.

        Parameters
        ----------
        path : str
            The file name for saved pickle file. If `path` is a directory, will try to load the file `object_detector.ag` in
            this directory.
        verbosity : int, default = 2
            Verbosity levels range from 0 to 4 and control how much information is printed.
            Higher levels correspond to more detailed print statements (you can set verbosity = 0 to suppress warnings).
            If using logging, you can alternatively control amount of information printed via logger.setLevel(L),
            where L ranges from 0 to 50 (Note: higher values of L correspond to fewer print statements, opposite of verbosity levels)

        """
        if os.path.isdir(path):
            path = os.path.join(path, 'object_detector.ag')
        with open(path, 'rb') as fid:
            obj = pickle.load(fid)
        obj._verbosity = verbosity
        return obj