from qibo.config import log, raise_error
[docs]def optimize(
loss,
initial_parameters,
args=(),
method="Powell",
jac=None,
hess=None,
hessp=None,
bounds=None,
constraints=(),
tol=None,
callback=None,
options=None,
compile=False,
processes=None,
backend=None,
):
"""Main optimization method. Selects one of the following optimizers:
- :meth:`qibo.optimizers.cmaes`
- :meth:`qibo.optimizers.newtonian`
- :meth:`qibo.optimizers.sgd`
Args:
loss (callable): Loss as a function of ``parameters`` and optional extra
arguments. Make sure the loss function returns a tensor for ``method=sgd``
and numpy object for all the other methods.
initial_parameters (np.ndarray): Initial guess for the variational
parameters that are optimized.
args (tuple): optional arguments for the loss function.
method (str): Name of optimizer to use. Can be ``'cma'``, ``'sgd'`` or
one of the Newtonian methods supported by
:meth:`qibo.optimizers.newtonian` and ``'parallel_L-BFGS-B'``. ``sgd`` is
only available for backends based on tensorflow.
jac (dict): Method for computing the gradient vector for scipy optimizers.
hess (dict): Method for computing the hessian matrix for scipy optimizers.
hessp (callable): Hessian of objective function times an arbitrary
vector for scipy optimizers.
bounds (sequence or Bounds): Bounds on variables for scipy optimizers.
constraints (dict): Constraints definition for scipy optimizers.
tol (float): Tolerance of termination for scipy optimizers.
callback (callable): Called after each iteration for scipy optimizers.
options (dict): Dictionary with options. See the specific optimizer
bellow for a list of the supported options.
compile (bool): If ``True`` the Tensorflow optimization graph is compiled.
This is relevant only for the ``'sgd'`` optimizer.
processes (int): number of processes when using the parallel BFGS method.
Returns:
(float, float, custom): Final best loss value; best parameters obtained by the optimizer; extra: optimizer-specific return object. For scipy methods it
returns the ``OptimizeResult``, for ``'cma'`` the ``CMAEvolutionStrategy.result``,
and for ``'sgd'`` the options used during the optimization.
Example:
.. testcode::
import numpy as np
from qibo import gates, models
from qibo.optimizers import optimize
# create custom loss function
# make sure the return type matches the optimizer requirements.
def myloss(parameters, circuit):
circuit.set_parameters(parameters)
return np.square(np.sum(circuit().state())) # returns numpy array
# create circuit ansatz for two qubits
circuit = models.Circuit(2)
circuit.add(gates.RY(0, theta=0))
# optimize using random initial variational parameters
initial_parameters = np.random.uniform(0, 2, 1)
best, params, extra = optimize(myloss, initial_parameters, args=(circuit))
# set parameters to circuit
circuit.set_parameters(params)
"""
if method == "cma":
if bounds is not None: # pragma: no cover
raise_error(
RuntimeError,
"The keyword 'bounds' cannot be used with the cma optimizer. Please use 'options' instead as defined by the cma documentation: ex. options['bounds'] = [0.0, 1.0].",
)
return cmaes(loss, initial_parameters, args, callback, options)
elif method == "sgd":
from qibo.backends import _check_backend
backend = _check_backend(backend)
return sgd(loss, initial_parameters, args, callback, options, compile, backend)
else:
from qibo.backends import _check_backend
backend = _check_backend(backend)
return newtonian(
loss,
initial_parameters,
args,
method,
jac,
hess,
hessp,
bounds,
constraints,
tol,
callback,
options,
processes,
backend,
)
[docs]def cmaes(loss, initial_parameters, args=(), callback=None, options=None):
"""Genetic optimizer based on `pycma <https://github.com/CMA-ES/pycma>`_.
Args:
loss (callable): Loss as a function of variational parameters to be
optimized.
initial_parameters (np.ndarray): Initial guess for the variational
parameters.
args (tuple): optional arguments for the loss function.
callback (list[callable]): List of callable called after each optimization
iteration. According to cma-es implementation take ``CMAEvolutionStrategy``
instance as argument.
See: https://cma-es.github.io/apidocs-pycma/cma.evolution_strategy.CMAEvolutionStrategy.html.
options (dict): Dictionary with options accepted by the ``cma``
optimizer. The user can use ``import cma; cma.CMAOptions()`` to view the
available options.
"""
import cma
es = cma.CMAEvolutionStrategy(initial_parameters, sigma0=1.7, inopts=options)
if callback is not None:
while not es.stop():
solutions = es.ask()
objective_values = [loss(x, *args) for x in solutions]
for solution in solutions:
callback(solution)
es.tell(solutions, objective_values)
es.logger.add()
else:
es.optimize(loss, args=args)
return es.result.fbest, es.result.xbest, es.result
[docs]def newtonian(
loss,
initial_parameters,
args=(),
method="Powell",
jac=None,
hess=None,
hessp=None,
bounds=None,
constraints=(),
tol=None,
callback=None,
options=None,
processes=None,
backend=None,
):
"""Newtonian optimization approaches based on ``scipy.optimize.minimize``.
For more details check the `scipy documentation <https://docs.scipy.org/doc/scipy/reference/generated/scipy.optimize.minimize.html>`_.
.. note::
When using the method ``parallel_L-BFGS-B`` the ``processes`` option controls the
number of processes used by the parallel L-BFGS-B algorithm through the ``multiprocessing`` library.
By default ``processes=None``, in this case the total number of logical cores are used.
Make sure to select the appropriate number of processes for your computer specification,
taking in consideration memory and physical cores. In order to obtain optimal results
you can control the number of threads used by each process with the ``qibo.set_threads`` method.
For example, for small-medium size circuits you may benefit from single thread per process, thus set
``qibo.set_threads(1)`` before running the optimization.
Args:
loss (callable): Loss as a function of variational parameters to be
optimized.
initial_parameters (np.ndarray): Initial guess for the variational
parameters.
args (tuple): optional arguments for the loss function.
method (str): Name of method supported by ``scipy.optimize.minimize`` and ``'parallel_L-BFGS-B'`` for
a parallel version of L-BFGS-B algorithm.
jac (dict): Method for computing the gradient vector for scipy optimizers.
hess (dict): Method for computing the hessian matrix for scipy optimizers.
hessp (callable): Hessian of objective function times an arbitrary
vector for scipy optimizers.
bounds (sequence or Bounds): Bounds on variables for scipy optimizers.
constraints (dict): Constraints definition for scipy optimizers.
tol (float): Tolerance of termination for scipy optimizers.
callback (callable): Called after each iteration for scipy optimizers.
options (dict): Dictionary with options accepted by
``scipy.optimize.minimize``.
processes (int): number of processes when using the parallel BFGS method.
"""
if method == "parallel_L-BFGS-B": # pragma: no cover
o = ParallelBFGS(
loss,
args=args,
processes=processes,
bounds=bounds,
callback=callback,
options=options,
)
m = o.run(initial_parameters)
else:
from scipy.optimize import minimize
m = minimize(
loss,
initial_parameters,
args=args,
method=method,
jac=jac,
hess=hess,
hessp=hessp,
bounds=bounds,
constraints=constraints,
tol=tol,
callback=callback,
options=options,
)
return m.fun, m.x, m
[docs]def sgd(
loss,
initial_parameters,
args=(),
callback=None,
options=None,
compile=False,
backend=None,
):
"""Stochastic Gradient Descent (SGD) optimizer using Tensorflow backpropagation.
See `tf.keras.Optimizers <https://www.tensorflow.org/api_docs/python/tf/keras/optimizers>`_
for a list of the available optimizers for Tensorflow.
See `torch.optim <https://pytorch.org/docs/stable/optim.html>`_ for a list of the available
optimizers for PyTorch.
Args:
loss (callable): Loss as a function of variational parameters to be
optimized.
initial_parameters (np.ndarray): Initial guess for the variational
parameters.
args (tuple): optional arguments for the loss function.
callback (callable): Called after each iteration.
options (dict): Dictionary with options for the SGD optimizer. Supports
the following keys:
- ``'optimizer'`` (str, default: ``'Adagrad'``): Name of optimizer.
- ``'learning_rate'`` (float, default: ``'1e-3'``): Learning rate.
- ``'nepochs'`` (int, default: ``1e6``): Number of epochs for optimization.
- ``'nmessage'`` (int, default: ``1e3``): Every how many epochs to print
a message of the loss function.
"""
sgd_options = {
"nepochs": 1000000,
"nmessage": 1000,
"optimizer": "Adagrad",
"learning_rate": 0.001,
}
if options is not None:
sgd_options.update(options)
if backend.name == "tensorflow":
return _sgd_tf(
loss,
initial_parameters,
args,
sgd_options,
compile,
backend,
callback=callback,
)
if backend.name == "pytorch":
if compile:
log.warning(
"PyTorch does not support compilation of the optimization graph."
)
return _sgd_torch(
loss, initial_parameters, args, sgd_options, backend, callback=callback
)
raise_error(RuntimeError, "SGD optimizer requires Tensorflow or PyTorch backend.")
def _sgd_torch(loss, initial_parameters, args, sgd_options, backend, callback=None):
vparams = initial_parameters
optimizer = getattr(backend.np.optim, sgd_options["optimizer"])(
params=[vparams], lr=sgd_options["learning_rate"]
)
for e in range(sgd_options["nepochs"]):
optimizer.zero_grad()
l = loss(vparams, *args)
l.backward()
optimizer.step()
if callback is not None:
callback(backend.to_numpy(vparams))
if e % sgd_options["nmessage"] == 1:
log.info("ite %d : loss %f", e, l.item())
return loss(vparams, *args).item(), vparams.detach().numpy(), sgd_options
def _sgd_tf(
loss, initial_parameters, args, sgd_options, compile, backend, callback=None
):
vparams = backend.tf.Variable(initial_parameters)
optimizer = getattr(backend.tf.optimizers, sgd_options["optimizer"])(
learning_rate=sgd_options["learning_rate"]
)
def opt_step():
with backend.tf.GradientTape() as tape:
l = loss(vparams, *args)
grads = tape.gradient(l, [vparams])
optimizer.apply_gradients(zip(grads, [vparams]))
return l
if compile:
loss = backend.compile(loss)
opt_step = backend.compile(opt_step)
for e in range(sgd_options["nepochs"]):
l = opt_step()
if callback is not None:
callback(vparams)
if e % sgd_options["nmessage"] == 1:
log.info("ite %d : loss %f", e, l.numpy())
return loss(vparams, *args).numpy(), vparams.numpy(), sgd_options
class ParallelBFGS: # pragma: no cover
"""Computes the L-BFGS-B using parallel evaluation using multiprocessing.
This implementation here is based on https://doi.org/10.32614/RJ-2019-030.
Args:
function (function): loss function which returns a numpy object.
args (tuple): optional arguments for the loss function.
bounds (list): list of bound values for ``scipy.optimize.minimize`` L-BFGS-B.
callback (function): function callback ``scipy.optimize.minimize`` L-BFGS-B.
options (dict): follows ``scipy.optimize.minimize`` syntax for L-BFGS-B.
processes (int): number of processes when using the paralle BFGS method.
"""
import numpy as np
def __init__(
self,
function,
args=(),
bounds=None,
callback=None,
options=None,
processes=None,
):
self.function = function
self.args = args
self.xval = None
self.function_value = None
self.jacobian_value = None
self.precision = self.np.finfo("float64").eps
self.bounds = bounds
self.callback = callback
self.options = options
self.processes = processes
def run(self, x0):
"""Executes parallel L-BFGS-B minimization.
Args:
x0 (numpy.array): guess for initial solution.
Returns:
scipy.minimize result object
"""
from scipy.optimize import minimize
out = minimize(
fun=self.fun,
x0=x0,
jac=self.jac,
method="L-BFGS-B",
bounds=self.bounds,
callback=self.callback,
options=self.options,
)
out.hess_inv = out.hess_inv * self.np.identity(len(x0))
return out
@staticmethod
def _eval_approx(eps_at, fun, x, eps):
if eps_at == 0:
x_ = x
else:
x_ = x.copy()
if eps_at <= len(x):
x_[eps_at - 1] += eps
else:
x_[eps_at - 1 - len(x)] -= eps
return fun(x_)
def evaluate(self, x, eps=1e-8):
if not (
self.xval is not None and all(abs(self.xval - x) <= self.precision * 2)
):
eps_at = range(len(x) + 1)
self.xval = x.copy()
def operation(epsi):
return self._eval_approx(
epsi, lambda y: self.function(y, *self.args), x, eps
)
from joblib import Parallel, delayed
ret = Parallel(self.processes, prefer="threads")(
delayed(operation)(epsi) for epsi in eps_at
)
self.function_value = ret[0]
self.jacobian_value = (ret[1 : (len(x) + 1)] - self.function_value) / eps
def fun(self, x):
self.evaluate(x)
return self.function_value
def jac(self, x):
self.evaluate(x)
return self.jacobian_value