Source code for autogluon.core.searcher.gp_searcher

import ConfigSpace as CS
import multiprocessing as mp

from .bayesopt.autogluon.searcher_factory import gp_fifo_searcher_factory, \
    gp_multifidelity_searcher_factory, constrained_gp_fifo_searcher_factory, gp_fifo_searcher_defaults, \
    gp_multifidelity_searcher_defaults, constrained_gp_fifo_searcher_defaults
from .searcher import BaseSearcher
from ..utils.default_arguments import check_and_merge_defaults

__all__ = ['GPFIFOSearcher',
           'GPMultiFidelitySearcher']


def _to_config_cs(config_space: CS.ConfigurationSpace, config: dict) \
        -> CS.Configuration:
    return CS.Configuration(config_space, values=config)


[docs]class GPFIFOSearcher(BaseSearcher):
    """Gaussian process Bayesian optimization for FIFO scheduler

    This searcher must be used with `FIFOScheduler`. It provides Bayesian
    optimization, based on a Gaussian process surrogate model. It is created
    along with the scheduler, using `searcher='bayesopt'`:

    Pending configurations (for which evaluation tasks are currently running)
    are dealt with by fantasizing (i.e., target values are drawn from the
    current posterior, and acquisition functions are averaged over this
    sample, see `num_fantasy_samples`).
    The GP surrogate model uses a Matern 5/2 covariance function with automatic
    relevance determination (ARD) of input attributes, and a constant mean
    function. The acquisition function is expected improvement (EI). All
    hyperparameters of the surrogate model are estimated by empirical Bayes
    (maximizing the marginal likelihood). In general, this hyperparameter
    fitting is the most expensive part of a `get_config` call.

    The following happens in `get_config`. For the first `num_init_random` calls,
    a config is drawn at random (the very first call results in the default
    config of the space). Afterwards, Bayesian optimization is used, unless
    there are no finished evaluations yet.
    First, model hyperparameter are refit. This step can be skipped (see
    `opt_skip*` parameters). Next, `num_init_candidates` configs are sampled at
    random, and ranked by a scoring function (`initial_scoring`). BFGS local
    optimization is then run starting from the top scoring config, where EI
    is minimized.

    Parameters
    ----------
    configspace : ConfigSpace.ConfigurationSpace
        Config space of `train_fn`, equal to `train_fn.cs`
    reward_attribute : str
        Name of reward attribute reported by `train_fn`, equal to `reward_attr`
        of `scheduler
    debug_log : bool (default: False)
        If True, both searcher and scheduler output an informative log, from
        which the configs chosen and decisions being made can be traced.
    first_is_default : bool (default: True)
        If True, the first config to be evaluated is the default one of the
        config space. Otherwise, this first config is drawn at random.
    elapsed_time_attribute : str (optional)
        Name of elapsed time attribute in data obtained from reporter. Here,
        elapsed time counts since the start of train_fn, unit is seconds.
    random_seed : int
        Seed for pseudo-random number generator used.
    num_init_random : int
        Number of initial `get_config` calls for which randomly sampled configs
        are returned. Afterwards, Bayesian optimization is used
    num_init_candidates : int
        Number of initial candidates sampled at random in order to seed the
        search for `get_config`
    num_fantasy_samples : int
        Number of samples drawn for fantasizing (latent target values for
        pending candidates)
    initial_scoring : str
        Scoring function to rank initial candidates (local optimization of EI
        is started from top scorer). Values are 'thompson_indep' (independent
        Thompson sampling; randomized score, which can increase exploration),
        'acq_func' (score is the same (EI) acquisition function which is afterwards
        locally optimized).
    opt_nstarts : int
        Parameter for hyperparameter fitting. Number of random restarts
    opt_maxiter : int
        Parameter for hyperparameter fitting. Maximum number of iterations
        per restart
    opt_warmstart : bool
        Parameter for hyperparameter fitting. If True, each fitting is started
        from the previous optimum. Not recommended in general
    opt_verbose : bool
        Parameter for hyperparameter fitting. If True, lots of output
    opt_skip_init_length : int
        Parameter for hyperparameter fitting, skip predicate. Fitting is never
        skipped as long as number of observations below this threshold
    opt_skip_period : int
        Parameter for hyperparameter fitting, skip predicate. If >1, and number
        of observations above `opt_skip_init_length`, fitting is done only
        K-th call, and skipped otherwise
    map_reward : str or MapReward (default: '1_minus_x')
        AutoGluon is maximizing reward, while internally, Bayesian optimization
        is minimizing the criterion. States how reward is mapped to criterion.
        This must a strictly decreasing function. Values are '1_minus_x'
        (criterion = 1 - reward), 'minus_x' (criterion = -reward).
        From a technical standpoint, it does not matter what is chosen here,
        because criterion is only used internally. Also note that criterion
        data is always normalized to mean 0, variance 1 before fitted with a
        GP.

    Examples
    --------
    >>> import autogluon.core as ag
    >>> @ag.args(
    ...     lr=ag.space.Real(1e-3, 1e-2, log=True))
    >>> def train_fn(args, reporter):
    ...     reporter(accuracy = args.lr ** 2)
    >>> searcher_options = {
    ...     'map_reward': 'minus_x',
    ...     'opt_skip_period': 2}
    >>> scheduler = ag.scheduler.FIFOScheduler(
    ...     train_fn, searcher='bayesopt', searcher_options=searcher_options,
    ...     num_trials=10, reward_attr='accuracy')
    """
    def __init__(self, configspace, **kwargs):
        _gp_searcher = kwargs.get('_gp_searcher')
        if _gp_searcher is None:
            kwargs['configspace'] = configspace
            _kwargs = check_and_merge_defaults(
                kwargs, *gp_fifo_searcher_defaults(),
                dict_name='search_options')
            _gp_searcher = gp_fifo_searcher_factory(**_kwargs)
        super().__init__(
            _gp_searcher.hp_ranges.config_space,
            reward_attribute=kwargs.get('reward_attribute'))
        self.gp_searcher = _gp_searcher
        # This lock protects gp_searcher. We are not using self.LOCK, this
        # can lead to deadlocks when superclass methods are called
        self._gp_lock = mp.Lock()
        self._elapsed_time_attribute = kwargs.get('elapsed_time_attribute')

[docs]    def configure_scheduler(self, scheduler):
        from ..scheduler import FIFOScheduler
        from ..scheduler.seq_scheduler import LocalSequentialScheduler

        assert isinstance(scheduler, FIFOScheduler) or isinstance(scheduler, LocalSequentialScheduler), \
            "This searcher requires FIFOScheduler scheduler"
        super().configure_scheduler(scheduler)

[docs]    def get_config(self, **kwargs):
        with self._gp_lock:
            config_cs = self.gp_searcher.get_config()
        return config_cs.get_dictionary()

[docs]    def update(self, config, **kwargs):
        super().update(config, **kwargs)
        with self._gp_lock:
            config_cs = self._to_config_cs(config)
            _kwargs = dict()
            attr = self._elapsed_time_attribute
            if attr is not None and attr in kwargs:
                _kwargs['elapsed_time'] = kwargs[attr]
            self.gp_searcher.update(
                config_cs, reward=kwargs[self._reward_attribute], **_kwargs)

[docs]    def register_pending(self, config, milestone=None):
        with self._gp_lock:
            config_cs = self._to_config_cs(config)
            self.gp_searcher.register_pending(config_cs)

[docs]    def evaluation_failed(self, config, **kwargs):
        with self._gp_lock:
            config_cs = self._to_config_cs(config)
            self.gp_searcher.evaluation_failed(config_cs)

[docs]    def dataset_size(self):
        with self._gp_lock:
            return self.gp_searcher.dataset_size()

[docs]    def cumulative_profile_record(self):
        with self._gp_lock:
            return self.gp_searcher.cumulative_profile_record()

[docs]    def model_parameters(self):
        with self._gp_lock:
            return self.gp_searcher.get_params()

[docs]    def get_state(self):
        with self._gp_lock:
            return self.gp_searcher.get_state()

[docs]    def clone_from_state(self, state):
        with self._gp_lock:
            _gp_searcher = self.gp_searcher.clone_from_state(state)
        # Use copy constructor
        return GPFIFOSearcher(
            self.configspace, reward_attribute=self._reward_attribute,
            _gp_searcher=_gp_searcher)

    def set_profiler(self, profiler):
        self.gp_searcher.set_profiler(profiler)

    def set_getconfig_callback(self, callback):
        self.gp_searcher.set_getconfig_callback(callback)

    @property
    def debug_log(self):
        with self._gp_lock:
            return self.gp_searcher.debug_log

    def _to_config_cs(self, config):
        return _to_config_cs(self.gp_searcher.hp_ranges.config_space, config)


class ConstrainedGPFIFOSearcher(GPFIFOSearcher):
    def __init__(self, configspace, **kwargs):
        _gp_searcher = kwargs.get('_constrained_gp_searcher')
        if _gp_searcher is None:
            kwargs['configspace'] = configspace
            self.initial_scoring = 'acq_func'
            if 'initial_scoring' in kwargs:
                assert kwargs['initial_scoring'] == 'acq_func', 'Thompson sampling is not supported for Constrained BO.'
            _kwargs = check_and_merge_defaults(
                kwargs, *constrained_gp_fifo_searcher_defaults(),
                dict_name='search_options')
            _gp_searcher = constrained_gp_fifo_searcher_factory(**_kwargs)
        super().__init__(
            _gp_searcher.hp_ranges.config_space,
            reward_attribute=kwargs.get('reward_attribute'))
        self.gp_searcher = _gp_searcher
        # This lock protects gp_searcher. We are not using self.LOCK, this
        # can lead to deadlocks when superclass methods are called
        self._gp_lock = mp.Lock()

    def update(self, config, **kwargs):
        BaseSearcher.update(self, config, **kwargs)
        with self._gp_lock:
            config_cs = self._to_config_cs(config)
            self.gp_searcher.update(
                config_cs, reward=kwargs[self._reward_attribute],
                constraint=kwargs[self._constraint_attribute])


[docs]class GPMultiFidelitySearcher(BaseSearcher):
    """Gaussian process Bayesian optimization for Hyperband scheduler

    This searcher must be used with `HyperbandScheduler`. It provides a novel
    combination of Bayesian optimization, based on a Gaussian process surrogate
    model, with Hyperband scheduling. In particular, observations across
    resource levels are modelled jointly. It is created along with the
    scheduler, using `searcher='bayesopt'`:

    Most of `GPFIFOSearcher` comments apply here as well.
    In multi-fidelity HPO, we optimize a function f(x, r), x the configuration,
    r the resource (or time) attribute. The latter must be a positive integer.
    In most applications, `time_attr` == 'epoch', and the resource is the number
    of epochs already trained.

    We model the function f(x, r) jointly over all resource levels r at which
    it is observed (but see `searcher_data` in `HyperbandScheduler`). The kernel
    and mean function of our surrogate model are over (x, r). The surrogate
    model is selected by `gp_resource_kernel`. More details about the supported
    kernels is in:

        Tiao, Klein, Lienart, Archambeau, Seeger (2020)
        Model-based Asynchronous Hyperparameter and Neural Architecture Search
        https://arxiv.org/abs/2003.10865

    The acquisition function (EI) which is optimized in `get_config`, is obtained
    by fixing the resource level r to a value which is determined depending on
    the current state. If `resource_acq` == 'bohb', r is the largest value
    <= max_t, where we have seen >= dimension(x) metric values. If
    `resource_acq` == 'first', r is the first milestone which config x would
    reach when started.

    Parameters
    ----------
    configspace : ConfigSpace.ConfigurationSpace
        Config space of `train_fn`, equal to `train_fn.cs`
    reward_attribute : str
        Name of reward attribute reported by `train_fn`, equal to `reward_attr`
        of scheduler
    resource_attribute : str
        Name of resource (or time) attribute reported by `train_fn`, equal to
        `time_attr` of scheduler
    debug_log : bool (default: False)
        If True, both searcher and scheduler output an informative log, from
        which the configs chosen and decisions being made can be traced.
    first_is_default : bool (default: True)
        If True, the first config to be evaluated is the default one of the
        config space. Otherwise, this first config is drawn at random.
    elapsed_time_attribute : str (optional)
        Name of elapsed time attribute in data obtained from reporter. Here,
        elapsed time counts since the start of train_fn, unit is seconds.
    random_seed : int
        Seed for pseudo-random number generator used.
    num_init_random : int
        See `GPFIFOSearcher`
    num_init_candidates : int
        See `GPFIFOSearcher`
    num_fantasy_samples : int
        See `GPFIFOSearcher`
    initial_scoring : str
        See `GPFIFOSearcher`
    opt_nstarts : int
        See `GPFIFOSearcher`
    opt_maxiter : int
        See `GPFIFOSearcher`
    opt_warmstart : bool
        See `GPFIFOSearcher`
    opt_verbose : bool
        See `GPFIFOSearcher`
    opt_skip_init_length : int
        See `GPFIFOSearcher`
    opt_skip_period : int
        See `GPFIFOSearcher`
    map_reward : str or MapReward (default: '1_minus_x')
        See `GPFIFOSearcher`
    gp_resource_kernel : str
        Surrogate model over criterion function f(x, r), x the config, r the
        resource. Note that x is encoded to be a vector with entries in [0, 1],
        and r is linearly mapped to [0, 1], while the criterion data is
        normalized to mean 0, variance 1. The reference above provides details
        on the models supported here. For the exponential decay kernel, the
        base kernel over x is Matern 5/2 ARD.
        Values are 'matern52' (Matern 5/2 ARD kernel over [x, r]),
        'matern52-res-warp' (Matern 5/2 ARD kernel over [x, r], with additional
        warping on r),
        'exp-decay-sum' (exponential decay kernel, with delta=0. This is the
        additive kernel from Freeze-Thaw Bayesian Optimization),
        'exp-decay-delta1' (exponential decay kernel, with delta=1),
        'exp-decay-combined' (exponential decay kernel, with delta in [0, 1]
        a hyperparameter).
    resource_acq : str
        Determines how the EI acquisition function is used (see above).
        Values: 'bohb', 'first'
    opt_skip_num_max_resource : bool
        Parameter for hyperparameter fitting, skip predicate. If True, and
        number of observations above `opt_skip_init_length`, fitting is done
        only when there is a new datapoint at r = max_t, and skipped otherwise.

    Examples
    --------
    >>> import numpy as np
    >>> import autogluon.core as ag
    >>>
    >>> @ag.args(
    ...     lr=ag.space.Real(1e-3, 1e-2, log=True),
    ...     wd=ag.space.Real(1e-3, 1e-2))
    >>> def train_fn(args, reporter):
    ...     print('lr: {}, wd: {}'.format(args.lr, args.wd))
    ...     for e in range(9):
    ...         dummy_accuracy = 1 - np.power(1.8, -np.random.uniform(e, 2*e))
    ...         reporter(epoch=e+1, accuracy=dummy_accuracy, lr=args.lr,
    ...         wd=args.wd)
    >>> searcher_options = {
    ...     'gp_resource_kernel': 'matern52-res-warp',
    ...     'opt_skip_num_max_resource': True}
    >>> scheduler = ag.scheduler.HyperbandScheduler(
    ...     train_fn, searcher='bayesopt', searcher_options=searcher_options,
    ...     num_trials=10, reward_attr='accuracy', time_attr='epoch',
    ...     max_t=10, grace_period=1, reduction_factor=3)

    See Also
    --------
    GPFIFOSearcher
    """
    def __init__(self, configspace, **kwargs):
        _gp_searcher = kwargs.get('_gp_searcher')
        if _gp_searcher is None:
            kwargs['configspace'] = configspace
            _kwargs = check_and_merge_defaults(
                kwargs, *gp_multifidelity_searcher_defaults(),
                dict_name='search_options')
            _gp_searcher = gp_multifidelity_searcher_factory(**_kwargs)
        super().__init__(
            _gp_searcher.hp_ranges.config_space,
            reward_attribute=kwargs.get('reward_attribute'))
        self.gp_searcher = _gp_searcher
        self._resource_attribute = kwargs.get('resource_attribute')
        # This lock protects gp_searcher. We are not using self.LOCK, this
        # can lead to deadlocks when superclass methods are called
        self._gp_lock = mp.Lock()
        self._elapsed_time_attribute = kwargs.get('elapsed_time_attribute')

[docs]    def configure_scheduler(self, scheduler):
        from ..scheduler import HyperbandScheduler

        assert isinstance(scheduler, HyperbandScheduler), \
            "This searcher requires HyperbandScheduler scheduler"
        super().configure_scheduler(scheduler)
        self._resource_attribute = scheduler._time_attr

[docs]    def get_config(self, **kwargs):
        with self._gp_lock:
            config_cs = self.gp_searcher.get_config(**kwargs)
        return config_cs.get_dictionary()

[docs]    def update(self, config, **kwargs):
        super().update(config, **kwargs)
        with self._gp_lock:
            config_cs = self._to_config_cs(config)
            _kwargs = dict()
            attr = self._elapsed_time_attribute
            if attr is not None and attr in kwargs:
                _kwargs['elapsed_time'] = kwargs[attr]
            self.gp_searcher.update(
                config_cs, reward=kwargs[self._reward_attribute],
                resource=int(kwargs[self._resource_attribute]), **_kwargs)
            # If evaluation task has terminated, cleanup pending evaluations
            # which may have been overlooked
            if kwargs.get('terminated', False):
                self.gp_searcher.cleanup_pending(config_cs)

[docs]    def register_pending(self, config, milestone=None):
        assert milestone is not None, \
            "This searcher works with a multi-fidelity scheduler only"
        with self._gp_lock:
            config_cs = self._to_config_cs(config)
            self.gp_searcher.register_pending(config_cs, milestone)

[docs]    def remove_case(self, config, **kwargs):
        with self._gp_lock:
            config_cs = self._to_config_cs(config)
            self.gp_searcher.remove_case(
                config_cs, resource=int(kwargs[self._resource_attribute]))

[docs]    def evaluation_failed(self, config, **kwargs):
        with self._gp_lock:
            config_cs = self._to_config_cs(config)
            self.gp_searcher.evaluation_failed(config_cs)

[docs]    def dataset_size(self):
        with self._gp_lock:
            return self.gp_searcher.dataset_size()

[docs]    def cumulative_profile_record(self):
        with self._gp_lock:
            return self.gp_searcher.cumulative_profile_record()

[docs]    def model_parameters(self):
        with self._gp_lock:
            return self.gp_searcher.get_params()

[docs]    def get_state(self):
        with self._gp_lock:
            return self.gp_searcher.get_state()

[docs]    def clone_from_state(self, state):
        with self._gp_lock:
            _gp_searcher = self.gp_searcher.clone_from_state(state)
        # Use copy constructor
        return GPMultiFidelitySearcher(
            self.configspace, reward_attribute=self._reward_attribute,
            resource_attribute=self._resource_attribute,
            _gp_searcher=_gp_searcher)

    def set_profiler(self, profiler):
        self.gp_searcher.set_profiler(profiler)

    def set_getconfig_callback(self, callback):
        self.gp_searcher.set_getconfig_callback(callback)

    @property
    def debug_log(self):
        with self._gp_lock:
            return self.gp_searcher.debug_log

    def _to_config_cs(self, config):
        return _to_config_cs(self.gp_searcher.hp_ranges.config_space, config)