From 9ea810c264f30d0b09b61a2146da3586fcfe18d4 Mon Sep 17 00:00:00 2001 From: AlexAndorra Date: Mon, 30 May 2022 20:37:32 +0300 Subject: [PATCH] Remove advanced Aesara usage guide --- .../Advanced_usage_of_Aesara_in_PyMC.rst | 223 ------------------ 1 file changed, 223 deletions(-) delete mode 100644 docs/source/Advanced_usage_of_Aesara_in_PyMC.rst diff --git a/docs/source/Advanced_usage_of_Aesara_in_PyMC.rst b/docs/source/Advanced_usage_of_Aesara_in_PyMC.rst deleted file mode 100644 index ceedb4c262..0000000000 --- a/docs/source/Advanced_usage_of_Aesara_in_PyMC.rst +++ /dev/null @@ -1,223 +0,0 @@ -:orphan: -(Advanced_usage_of_Aesara_in_PyMC)= -.. - _referenced in docs/source/notebooks/table_of_contents_tutorials.js - -================================= -Advanced usage of Aesara in PyMC -================================= - -Using shared variables -====================== - -Shared variables allow us to use values in Aesara functions that are -not considered an input to the function, but can still be changed -later. They are very similar to global variables in may ways:: - - a = at.scalar('a') - # Create a new shared variable with initial value of 0.1 - b = aesara.shared(0.1) - func = aesara.function([a], a * b) - assert func(2.) == 0.2 - - b.set_value(10.) - assert func(2.) == 20. - -Shared variables can also contain arrays, and are allowed to change -their shape as long as the number of dimensions stays the same. - -We can use shared variables in PyMC to fit the same model to several -datasets without the need to recreate the model each time (which can -be time consuming if the number of datasets is large):: - - # We generate 10 datasets - true_mu = [np.random.randn() for _ in range(10)] - observed_data = [mu + np.random.randn(20) for mu in true_mu] - - data = aesara.shared(observed_data[0]) - with pm.Model() as model: - mu = pm.Normal('mu', 0, 10) - pm.Normal('y', mu=mu, sigma=1, observed=data) - - # Generate one trace for each dataset - idatas = [] - for data_vals in observed_data: - # Switch out the observed dataset - data.set_value(data_vals) - with model: - idatas.append(pm.sample()) - -We can also sometimes use shared variables to work around limitations -in the current PyMC api. A common task in Machine Learning is to predict -values for unseen data, and one way to achieve this is to use a shared -variable for our observations:: - - x = np.random.randn(100) - y = x > 0 - - x_shared = aesara.shared(x) - - with pm.Model() as model: - coeff = pm.Normal('x', mu=0, sigma=1) - logistic = pm.math.sigmoid(coeff * x_shared) - pm.Bernoulli('obs', p=logistic, observed=y) - - # fit the model - idata = pm.sample() - - # Switch out the observations and use `sample_posterior_predictive` to predict - x_shared.set_value([-1, 0, 1.]) - post_pred = pm.sample_posterior_predictive(trace, samples=500) - -However, due to the way we handle shapes at the moment, it is -not possible to change the shape of a shared variable if that would -also change the shape of one of the variables. - - -Writing custom Aesara Ops -========================= - -While Aesara includes a wide range of operations, there are cases where -it makes sense to write your own. But before doing this it is a good -idea to think hard if it is actually necessary. Especially if you want -to use algorithms that need gradient information — this includes NUTS and -all variational methods, and you probably *should* want to use those — -this is often quite a bit of work and also requires some math and -debugging skills for the gradients. - -Good reasons for defining a custom `Op` might be the following: - -- You require an operation that is not available in Aesara and can't - be build up out of existing Aesara operations. This could for example - include models where you need to solve differential equations or - integrals, or find a root or minimum of a function that depends - on your parameters. -- You want to connect your PyMC model to some existing external code. -- After carefully considering different parametrizations and a lot - of profiling your model is still too slow, but you know of a faster - way to compute the gradient than what Aesara is doing. This faster - way might be anything from clever maths to using more hardware. - There is nothing stopping anyone from using a cluster via MPI in - a custom node, if a part of the gradient computation is slow enough - and sufficiently parallelizable to make the cost worth it. - We would definitely like to hear about any such examples. - -Aesara has extensive :doc:`documentation ` -about how to write new Ops. - - -Finding the root of a function ------------------------------- - -We'll use finding the root of a function as a simple example. -Let's say we want to define a model where a parameter is defined -implicitly as the root of a function, that depends on another -parameter: - -.. math:: - - \theta \sim N^+(0, 1)\\ - \text{$\mu\in \mathbb{R}^+$ such that $R(\mu, \theta) - = \mu + \mu e^{\theta \mu} - 1= 0$}\\ - y \sim N(\mu, 0.1^2) - -First, we observe that this problem is well-defined, because -:math:`R(\cdot, \theta)` is monotone and has the image :math:`(-1, \infty)` -for :math:`\mu, \theta \in \mathbb{R}^+`. To avoid overflows in -:math:`\exp(\mu \theta)` for large -values of :math:`\mu\theta` we instead find the root of - -.. math:: - - R'(\mu, \theta) - = \log(R(\mu, \theta) + 1) - = \log(\mu) + \log(1 + e^{\theta\mu}). - -Also, we have - -.. math:: - - \frac{\partial}{\partial\mu}R'(\mu, \theta) - = \theta\, \text{logit}^{-1}(\theta\mu) + \mu^{-1}. - -We can now use `scipy.optimize.newton` to find the root:: - - from scipy import optimize, special - import numpy as np - - def func(mu, theta): - thetamu = theta * mu - value = np.log(mu) + np.logaddexp(0, thetamu) - return value - - def jac(mu, theta): - thetamu = theta * mu - jac = theta * special.expit(thetamu) + 1 / mu - return jac - - def mu_from_theta(theta): - return optimize.newton(func, 1, fprime=jac, args=(theta,)) - -We could wrap `mu_from_theta` with `aesara.compile.ops.as_op` and use gradient-free -methods like Metropolis, but to get NUTS and ADVI working, we also -need to define the derivative of `mu_from_theta`. We can find this -derivative using the implicit function theorem, or equivalently we -take the derivative with respect of :math:`\theta` for both sides of -:math:`R(\mu(\theta), \theta) = 0` and solve for :math:`\frac{d\mu}{d\theta}`. -This isn't hard to do by hand, but for the fun of it, let's do it using -sympy:: - - import sympy - - mu = sympy.Function('mu') - theta = sympy.Symbol('theta') - R = mu(theta) + mu(theta) * sympy.exp(theta * mu(theta)) - 1 - solution = sympy.solve(R.diff(theta), mu(theta).diff(theta))[0] - -We get - -.. math:: - - \frac{d}{d\theta}\mu(\theta) - = - \frac{\mu(\theta)^2}{1 + \theta\mu(\theta) + e^{-\theta\mu(\theta)}} - -Now, we use this to define a Aesara `Op`, that also computes the gradient:: - - import aesara - import aesara.tensor as at - import aesara.tests.unittest_tools - from aesara.graph.op import Op - - class MuFromTheta(Op): - itypes = [at.dscalar] - otypes = [at.dscalar] - - def perform(self, node, inputs, outputs): - theta, = inputs - mu = mu_from_theta(theta) - outputs[0][0] = np.array(mu) - - def grad(self, inputs, g): - theta, = inputs - mu = self(theta) - thetamu = theta * mu - return [- g[0] * mu ** 2 / (1 + thetamu + at.exp(-thetamu))] - -If you value your sanity, always check that the gradient is ok:: - - aesara.gradient.verify_grad(MuFromTheta(), [np.array(0.2)]) - aesara.gradient.verify_grad(MuFromTheta(), [np.array(1e-5)]) - aesara.gradient.verify_grad(MuFromTheta(), [np.array(1e5)]) - -We can now define our model using this new `Op`:: - - import pymc as pm - - at_mu_from_theta = MuFromTheta() - - with pm.Model() as model: - theta = pm.HalfNormal('theta', sigma=1) - mu = pm.Deterministic('mu', at_mu_from_theta(theta)) - pm.Normal('y', mu=mu, sigma=0.1, observed=[0.2, 0.21, 0.3]) - - idata = pm.sample()