@@ -214,19 +214,17 @@ class MvNormal(_QuadFormBase):
214
214
[0.1, 0.2, 1.0]])
215
215
data = np.random.multivariate_normal(mu, true_cov, 10)
216
216
217
- sd_dist = pm.HalfCauchy.dist(beta=2.5, shape=3)
218
- chol_packed = pm.LKJCholeskyCov('chol_packed',
219
- n=3, eta=2, sd_dist=sd_dist)
220
- chol = pm.expand_packed_triangular(3, chol_packed)
217
+ sd_dist = pm.Exponential.dist(1.0, shape=3)
218
+ chol, corr, stds = pm.LKJCholeskyCov('chol_cov', n=3, eta=2,
219
+ sd_dist=sd_dist, compute_corr=True)
221
220
vals = pm.MvNormal('vals', mu=mu, chol=chol, observed=data)
222
221
223
222
For unobserved values it can be better to use a non-centered
224
223
parametrization::
225
224
226
- sd_dist = pm.HalfCauchy.dist(beta=2.5, shape=3)
227
- chol_packed = pm.LKJCholeskyCov('chol_packed',
228
- n=3, eta=2, sd_dist=sd_dist)
229
- chol = pm.expand_packed_triangular(3, chol_packed)
225
+ sd_dist = pm.Exponential.dist(1.0, shape=3)
226
+ chol, _, _ = pm.LKJCholeskyCov('chol_cov', n=3, eta=2,
227
+ sd_dist=sd_dist, compute_corr=True)
230
228
vals_raw = pm.Normal('vals_raw', mu=0, sigma=1, shape=(5, 3))
231
229
vals = pm.Deterministic('vals', tt.dot(chol, vals_raw.T).T)
232
230
"""
@@ -1011,113 +1009,9 @@ def _lkj_normalizing_constant(eta, n):
1011
1009
return result
1012
1010
1013
1011
1014
- class LKJCholeskyCov (Continuous ):
1015
- R"""Covariance matrix with LKJ distributed correlations.
1016
-
1017
- This defines a distribution over cholesky decomposed covariance
1018
- matrices, such that the underlying correlation matrices follow an
1019
- LKJ distribution [1] and the standard deviations follow an arbitray
1020
- distribution specified by the user.
1021
-
1022
- Parameters
1023
- ----------
1024
- eta: float
1025
- The shape parameter (eta > 0) of the LKJ distribution. eta = 1
1026
- implies a uniform distribution of the correlation matrices;
1027
- larger values put more weight on matrices with few correlations.
1028
- n: int
1029
- Dimension of the covariance matrix (n > 1).
1030
- sd_dist: pm.Distribution
1031
- A distribution for the standard deviations.
1032
-
1033
- Notes
1034
- -----
1035
- Since the cholesky factor is a lower triangular matrix, we use
1036
- packed storge for the matrix: We store and return the values of
1037
- the lower triangular matrix in a one-dimensional array, numbered
1038
- by row::
1039
-
1040
- [[0 - - -]
1041
- [1 2 - -]
1042
- [3 4 5 -]
1043
- [6 7 8 9]]
1044
-
1045
- You can use `pm.expand_packed_triangular(packed_cov, lower=True)`
1046
- to convert this to a regular two-dimensional array.
1047
-
1048
- Examples
1049
- --------
1050
- .. code:: python
1051
-
1052
- with pm.Model() as model:
1053
- # Note that we access the distribution for the standard
1054
- # deviations, and do not create a new random variable.
1055
- sd_dist = pm.HalfCauchy.dist(beta=2.5)
1056
- packed_chol = pm.LKJCholeskyCov('chol_cov', eta=2, n=10, sd_dist=sd_dist)
1057
- chol = pm.expand_packed_triangular(10, packed_chol, lower=True)
1058
-
1059
- # Define a new MvNormal with the given covariance
1060
- vals = pm.MvNormal('vals', mu=np.zeros(10), chol=chol, shape=10)
1061
-
1062
- # Or transform an uncorrelated normal:
1063
- vals_raw = pm.Normal('vals_raw', mu=0, sigma=1, shape=10)
1064
- vals = tt.dot(chol, vals_raw)
1065
-
1066
- # Or compute the covariance matrix
1067
- cov = tt.dot(chol, chol.T)
1068
-
1069
- # Extract the standard deviations
1070
- stds = tt.sqrt(tt.diag(cov))
1071
-
1072
- **Implementation** In the unconstrained space all values of the cholesky factor
1073
- are stored untransformed, except for the diagonal entries, where
1074
- we use a log-transform to restrict them to positive values.
1075
-
1076
- To correctly compute log-likelihoods for the standard deviations
1077
- and the correlation matrix seperatly, we need to consider a
1078
- second transformation: Given a cholesky factorization
1079
- :math:`LL^T = \Sigma` of a covariance matrix we can recover the
1080
- standard deviations :math:`\sigma` as the euclidean lengths of
1081
- the rows of :math:`L`, and the cholesky factor of the
1082
- correlation matrix as :math:`U = \text{diag}(\sigma)^{-1}L`.
1083
- Since each row of :math:`U` has length 1, we do not need to
1084
- store the diagonal. We define a transformation :math:`\phi`
1085
- such that :math:`\phi(L)` is the lower triangular matrix containing
1086
- the standard deviations :math:`\sigma` on the diagonal and the
1087
- correlation matrix :math:`U` below. In this form we can easily
1088
- compute the different likelihoods seperatly, as the likelihood
1089
- of the correlation matrix only depends on the values below the
1090
- diagonal, and the likelihood of the standard deviation depends
1091
- only on the diagonal values.
1092
-
1093
- We still need the determinant of the jacobian of :math:`\phi^{-1}`.
1094
- If we think of :math:`\phi` as an automorphism on
1095
- :math:`\mathbb{R}^{\tfrac{n(n+1)}{2}}`, where we order
1096
- the dimensions as described in the notes above, the jacobian
1097
- is a block-diagonal matrix, where each block corresponds to
1098
- one row of :math:`U`. Each block has arrowhead shape, and we
1099
- can compute the determinant of that as described in [2]. Since
1100
- the determinant of a block-diagonal matrix is the product
1101
- of the determinants of the blocks, we get
1102
-
1103
- .. math::
1104
-
1105
- \text{det}(J_{\phi^{-1}}(U)) =
1106
- \left[
1107
- \prod_{i=2}^N u_{ii}^{i - 1} L_{ii}
1108
- \right]^{-1}
1109
-
1110
- References
1111
- ----------
1112
- .. [1] Lewandowski, D., Kurowicka, D. and Joe, H. (2009).
1113
- "Generating random correlation matrices based on vines and
1114
- extended onion method." Journal of multivariate analysis,
1115
- 100(9), pp.1989-2001.
1116
-
1117
- .. [2] J. M. isn't a mathematician (http://math.stackexchange.com/users/498/
1118
- j-m-isnt-a-mathematician), Different approaches to evaluate this
1119
- determinant, URL (version: 2012-04-14):
1120
- http://math.stackexchange.com/q/130026
1012
+ class _LKJCholeskyCov (Continuous ):
1013
+ R"""Underlying class for covariance matrix with LKJ distributed correlations.
1014
+ See docs for LKJCholeskyCov function for more details on how to use it in models.
1121
1015
"""
1122
1016
def __init__ (self , eta , n , sd_dist , * args , ** kwargs ):
1123
1017
self .n = n
@@ -1289,6 +1183,161 @@ def random(self, point=None, size=None):
1289
1183
return samples
1290
1184
1291
1185
1186
+ def LKJCholeskyCov (
1187
+ name , eta , n , sd_dist , compute_corr = False , store_in_trace = True , * args , ** kwargs
1188
+ ):
1189
+ R"""Wrapper function for covariance matrix with LKJ distributed correlations.
1190
+
1191
+ This defines a distribution over Cholesky decomposed covariance
1192
+ matrices, such that the underlying correlation matrices follow an
1193
+ LKJ distribution [1] and the standard deviations follow an arbitray
1194
+ distribution specified by the user.
1195
+
1196
+ Parameters
1197
+ ----------
1198
+ name: str
1199
+ The name given to the variable in the model.
1200
+ eta: float
1201
+ The shape parameter (eta > 0) of the LKJ distribution. eta = 1
1202
+ implies a uniform distribution of the correlation matrices;
1203
+ larger values put more weight on matrices with few correlations.
1204
+ n: int
1205
+ Dimension of the covariance matrix (n > 1).
1206
+ sd_dist: pm.Distribution
1207
+ A distribution for the standard deviations.
1208
+ compute_corr: bool, default=False
1209
+ If `True`, returns three values: the Cholesky decomposition, the correlations
1210
+ and the standard deviations of the covariance matrix. Otherwise, only returns
1211
+ the packed Cholesky decomposition. Defaults to `False` to ensure backwards
1212
+ compatibility.
1213
+ store_in_trace: bool, default=True
1214
+ Whether to store the correlations and standard deviations of the covariance
1215
+ matrix in the posterior trace. If `True`, they will automatically be named as
1216
+ `{name}_corr` and `{name}_stds` respectively. Effective only when
1217
+ `compute_corr=True`.
1218
+
1219
+ Returns
1220
+ -------
1221
+ packed_chol: TensorVariable
1222
+ If `compute_corr=False` (default). The packed Cholesky covariance decomposition.
1223
+ chol: TensorVariable
1224
+ If `compute_corr=True`. The unpacked Cholesky covariance decomposition.
1225
+ corr: TensorVariable
1226
+ If `compute_corr=True`. The correlations of the covariance matrix.
1227
+ stds: TensorVariable
1228
+ If `compute_corr=True`. The standard deviations of the covariance matrix.
1229
+
1230
+ Notes
1231
+ -----
1232
+ Since the Cholesky factor is a lower triangular matrix, we use packed storage for
1233
+ the matrix: We store the values of the lower triangular matrix in a one-dimensional
1234
+ array, numbered by row::
1235
+
1236
+ [[0 - - -]
1237
+ [1 2 - -]
1238
+ [3 4 5 -]
1239
+ [6 7 8 9]]
1240
+
1241
+ The unpacked Cholesky covariance matrix is automatically computed and returned when
1242
+ you specify `compute_corr=True` in `pm.LKJCholeskyCov` (see example below).
1243
+ Otherwise, you can use `pm.expand_packed_triangular(packed_cov, lower=True)`
1244
+ to convert the packed Cholesky matrix to a regular two-dimensional array.
1245
+
1246
+ Examples
1247
+ --------
1248
+ .. code:: python
1249
+
1250
+ with pm.Model() as model:
1251
+ # Note that we access the distribution for the standard
1252
+ # deviations, and do not create a new random variable.
1253
+ sd_dist = pm.Exponential.dist(1.0)
1254
+ chol, corr, sigmas = pm.LKJCholeskyCov('chol_cov', eta=4, n=10,
1255
+ sd_dist=sd_dist, compute_corr=True)
1256
+
1257
+ # if you only want the packed Cholesky (default behavior):
1258
+ # packed_chol = pm.LKJCholeskyCov('chol_cov', eta=4, n=10, sd_dist=sd_dist)
1259
+ # chol = pm.expand_packed_triangular(10, packed_chol, lower=True)
1260
+
1261
+ # Define a new MvNormal with the given covariance
1262
+ vals = pm.MvNormal('vals', mu=np.zeros(10), chol=chol, shape=10)
1263
+
1264
+ # Or transform an uncorrelated normal:
1265
+ vals_raw = pm.Normal('vals_raw', mu=0, sigma=1, shape=10)
1266
+ vals = tt.dot(chol, vals_raw)
1267
+
1268
+ # Or compute the covariance matrix
1269
+ cov = tt.dot(chol, chol.T)
1270
+
1271
+ **Implementation** In the unconstrained space all values of the cholesky factor
1272
+ are stored untransformed, except for the diagonal entries, where
1273
+ we use a log-transform to restrict them to positive values.
1274
+
1275
+ To correctly compute log-likelihoods for the standard deviations
1276
+ and the correlation matrix seperatly, we need to consider a
1277
+ second transformation: Given a cholesky factorization
1278
+ :math:`LL^T = \Sigma` of a covariance matrix we can recover the
1279
+ standard deviations :math:`\sigma` as the euclidean lengths of
1280
+ the rows of :math:`L`, and the cholesky factor of the
1281
+ correlation matrix as :math:`U = \text{diag}(\sigma)^{-1}L`.
1282
+ Since each row of :math:`U` has length 1, we do not need to
1283
+ store the diagonal. We define a transformation :math:`\phi`
1284
+ such that :math:`\phi(L)` is the lower triangular matrix containing
1285
+ the standard deviations :math:`\sigma` on the diagonal and the
1286
+ correlation matrix :math:`U` below. In this form we can easily
1287
+ compute the different likelihoods separately, as the likelihood
1288
+ of the correlation matrix only depends on the values below the
1289
+ diagonal, and the likelihood of the standard deviation depends
1290
+ only on the diagonal values.
1291
+
1292
+ We still need the determinant of the jacobian of :math:`\phi^{-1}`.
1293
+ If we think of :math:`\phi` as an automorphism on
1294
+ :math:`\mathbb{R}^{\tfrac{n(n+1)}{2}}`, where we order
1295
+ the dimensions as described in the notes above, the jacobian
1296
+ is a block-diagonal matrix, where each block corresponds to
1297
+ one row of :math:`U`. Each block has arrowhead shape, and we
1298
+ can compute the determinant of that as described in [2]. Since
1299
+ the determinant of a block-diagonal matrix is the product
1300
+ of the determinants of the blocks, we get
1301
+
1302
+ .. math::
1303
+
1304
+ \text{det}(J_{\phi^{-1}}(U)) =
1305
+ \left[
1306
+ \prod_{i=2}^N u_{ii}^{i - 1} L_{ii}
1307
+ \right]^{-1}
1308
+
1309
+ References
1310
+ ----------
1311
+ .. [1] Lewandowski, D., Kurowicka, D. and Joe, H. (2009).
1312
+ "Generating random correlation matrices based on vines and
1313
+ extended onion method." Journal of multivariate analysis,
1314
+ 100(9), pp.1989-2001.
1315
+
1316
+ .. [2] J. M. isn't a mathematician (http://math.stackexchange.com/users/498/
1317
+ j-m-isnt-a-mathematician), Different approaches to evaluate this
1318
+ determinant, URL (version: 2012-04-14):
1319
+ http://math.stackexchange.com/q/130026
1320
+ """
1321
+ # compute Cholesky decomposition
1322
+ packed_chol = _LKJCholeskyCov (name , eta = eta , n = n , sd_dist = sd_dist )
1323
+ if not compute_corr :
1324
+ return packed_chol
1325
+
1326
+ else :
1327
+ chol = pm .expand_packed_triangular (n , packed_chol , lower = True )
1328
+ # compute covariance matrix
1329
+ cov = tt .dot (chol , chol .T )
1330
+ # extract standard deviations and rho
1331
+ stds = tt .sqrt (tt .diag (cov ))
1332
+ inv_stds = 1 / stds
1333
+ corr = inv_stds [None , :] * cov * inv_stds [:, None ]
1334
+ if store_in_trace :
1335
+ stds = pm .Deterministic (f"{ name } _stds" , stds )
1336
+ corr = pm .Deterministic (f"{ name } _corr" , corr )
1337
+
1338
+ return chol , corr , stds
1339
+
1340
+
1292
1341
class LKJCorr (Continuous ):
1293
1342
R"""
1294
1343
The LKJ (Lewandowski, Kurowicka and Joe) log-likelihood.
0 commit comments