|
20 | 20 |
|
21 | 21 | #############################################################################
|
22 | 22 | # Imports needed
|
23 |
| -# ------------------------------ |
| 23 | +# -------------- |
24 | 24 |
|
25 | 25 | import matplotlib.lines as mlines
|
26 | 26 | import matplotlib.pyplot as plt
|
|
37 | 37 |
|
38 | 38 | #############################################################################
|
39 | 39 | # Generate the data
|
40 |
| -# ------------------------------ |
| 40 | +# ----------------- |
41 | 41 | # We generate the data using a multivariate normal distribution with a Toeplitz
|
42 | 42 | # correlation matrix. The target variable is generated using a non-linear function
|
43 | 43 | # of the features. To make the problem more intuitive, we generate a non-linear
|
|
81 | 81 |
|
82 | 82 | #############################################################################
|
83 | 83 | # Visualize the data
|
84 |
| -# ------------------------------ |
| 84 | +# ------------------ |
85 | 85 |
|
86 | 86 | fig, axes = plt.subplots(
|
87 | 87 | 1,
|
|
115 | 115 |
|
116 | 116 | #############################################################################
|
117 | 117 | # Variable importance inference
|
118 |
| -# ------------------------------ |
| 118 | +# ----------------------------- |
119 | 119 | # We use two different Support Vector Machine models, one with a linear kernel and
|
120 | 120 | # one with a polynomial kernel of degree 2, well specified to capture the non-linear
|
121 | 121 | # relationship between the features and the target variable. We then use the CPI and
|
|
208 | 208 |
|
209 | 209 | #############################################################################
|
210 | 210 | # Compute the p-values for the variable importance
|
211 |
| -# ------------------------------ |
| 211 | +# ------------------------------------------------ |
212 | 212 |
|
213 | 213 | pval_arr = np.zeros((n_features, 3))
|
214 | 214 | for j in range(n_features):
|
|
218 | 218 |
|
219 | 219 | #############################################################################
|
220 | 220 | # Visualize the variable importance
|
221 |
| -# ------------------------------ |
| 221 | +# --------------------------------- |
222 | 222 | # Here we plot the variable importance and highlight the features that are considered
|
223 | 223 | # important, with a p-value lower than 0.05, using a diamond marker. We also highlight
|
224 | 224 | # the true important features, used to generate the target variable, with a star marker.
|
|
0 commit comments