Skip to content

Commit

Permalink
DOC: clean notebooks
Browse files Browse the repository at this point in the history
  • Loading branch information
glouppe committed Apr 22, 2016
1 parent e47c705 commit 323d1ed
Showing 6 changed files with 83 additions and 225 deletions.
19 changes: 7 additions & 12 deletions carl/__init__.py
Original file line number Diff line number Diff line change
@@ -10,18 +10,13 @@
[![DOI](https://zenodo.org/badge/doi/10.5281/zenodo.47798.svg)](http://dx.doi.org/10.5281/zenodo.47798)
## Documentation
* Illustrative examples complementing the online reference
can be found under the
[`examples/`](https://github.com/diana-hep/carl/tree/master/examples)
directory.
* Extended details regarding likelihood-free inference with calibrated
classifiers can be found in the companion paper _"Approximating Likelihood
Ratios with Calibrated Discriminative Classifiers", Kyle Cranmer, Juan Pavez,
Gilles Louppe._
[http://arxiv.org/abs/1506.02169](http://arxiv.org/abs/1506.02169)
## Likelihood-free inference with classifiers
Extended details regarding likelihood-free inference with calibrated
classifiers can be found in the companion paper _"Approximating Likelihood
Ratios with Calibrated Discriminative Classifiers", Kyle Cranmer, Juan Pavez,
Gilles Louppe._
[http://arxiv.org/abs/1506.02169](http://arxiv.org/abs/1506.02169)
## Installation
8 changes: 6 additions & 2 deletions carl/learning/calibration.py
Original file line number Diff line number Diff line change
@@ -32,7 +32,7 @@ class CalibratedClassifierCV(BaseEstimator, ClassifierMixin):
probabilities for each of the folds are then averaged for prediction.
"""

def __init__(self, base_estimator, method="histogram", cv=1):
def __init__(self, base_estimator, method="histogram", bins="auto", cv=1):
"""Constructor.
Parameters
@@ -47,6 +47,9 @@ def __init__(self, base_estimator, method="histogram", cv=1):
`"histogram"`, `"kde"`, `"isotonic"`, `"interpolated-isotonic"` and
`"sigmoid"`.
* `bins` [int, default="auto"]:
The number of bins, if `method` is `"histogram"`.
* `cv` [integer, cross-validation generator, iterable or `"prefit"`]:
Determines the cross-validation splitting strategy.
Possible inputs for cv are:
@@ -61,6 +64,7 @@ def __init__(self, base_estimator, method="histogram", cv=1):
"""
self.base_estimator = base_estimator
self.method = method
self.bins = bins
self.cv = cv

def fit(self, X, y):
@@ -93,7 +97,7 @@ def fit(self, X, y):

# Calibrator
if self.method == "histogram":
base_calibrator = HistogramCalibrator()
base_calibrator = HistogramCalibrator(bins=self.bins)
elif self.method == "kde":
base_calibrator = KernelDensityCalibrator()
elif self.method == "isotonic":
4 changes: 2 additions & 2 deletions ci/templates/html.mako
Original file line number Diff line number Diff line change
@@ -361,6 +361,8 @@
%>
<div id="sidebar">
<ul id="index">
<li class="set"><h3><a href="${ root_url }">Index</a></h3></li>
% if len(variables) > 0:
<li class="set"><h3><a href="#header-variables">Module variables</a></h3>
${show_column_list(map(lambda v: link(v.refname), variables))}
@@ -412,8 +414,6 @@
</ul>
</li>
% endif
<li class="set"><h3><a href="${ root_url }">Index</a></h3></li>
</ul>
</div>
</%def>
123 changes: 33 additions & 90 deletions examples/Diagnostics for approximate likelihood ratios.ipynb
Original file line number Diff line number Diff line change
@@ -4,7 +4,7 @@
"cell_type": "markdown",
"metadata": {},
"source": [
"## Diagnostics for approximate likelihood ratios\n",
"# Diagnostics for approximate likelihood ratios\n",
"\n",
"Kyle Cranmer, Juan Pavez, Gilles Louppe, March 2016.\n",
"\n",
@@ -23,23 +23,14 @@
},
{
"cell_type": "code",
"execution_count": 1,
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Couldn't import dot_parser, loading of dot files will not be possible.\n"
]
}
],
"outputs": [],
"source": [
"%matplotlib inline\n",
"import matplotlib.pyplot as plt\n",
"#plt.set_cmap(\"viridis\")\n",
"\n",
"import numpy as np\n",
"import theano\n",
@@ -53,7 +44,7 @@
"cell_type": "markdown",
"metadata": {},
"source": [
"Create model and generate artificial dataset"
"## Create model and generate artificial data"
]
},
{
@@ -71,8 +62,6 @@
"from carl.distributions import LinearTransform\n",
"from sklearn.datasets import make_sparse_spd_matrix\n",
"\n",
"import pdb\n",
"\n",
"# Parameters\n",
"true_A = 1.\n",
"A = theano.shared(true_A, name=\"A\")\n",
@@ -100,6 +89,7 @@
" Exponential(inverse_scale=3.0),\n",
" Exponential(inverse_scale=0.5)]), R))\n",
"p1 = p1s[0]\n",
"\n",
"# Draw data\n",
"X_true = p0.rvs(500, random_state=314) "
]
@@ -108,7 +98,7 @@
"cell_type": "markdown",
"metadata": {},
"source": [
"# Known likelihood setup"
"## Known likelihood setup"
]
},
{
@@ -129,7 +119,9 @@
"source": [
"# Minimize the exact LR\n",
"from scipy.optimize import minimize\n",
"\n",
"p1 = p1s[2]\n",
"\n",
"def nll_exact(theta, X):\n",
" A.set_value(theta[0])\n",
" return (p0.nll(X) - p1.nll(X)).sum()\n",
@@ -188,7 +180,7 @@
"cell_type": "markdown",
"metadata": {},
"source": [
"# Likelihood-free setup\n",
"## Likelihood-free setup\n",
"Here we create the data to train a parametrized classifier"
]
},
@@ -205,36 +197,25 @@
"\n",
"bounds = [(-3, 3), (-3, 3)]\n",
"\n",
"clf_parameters = [(1000,100000),(1000000,500),(1000000,100000)]\n",
"clf_parameters = [(1000, 100000), (1000000, 500), (1000000, 100000)]\n",
"X = [0]*3*3\n",
"y = [0]*3*3\n",
"\n",
"\n",
"\n",
"for k,(param,p1) in enumerate(product(clf_parameters,p1s)):\n",
" X[k], y[k] = make_parameterized_classification(\n",
" p0, p1,\n",
" param[0], \n",
" [(A, np.linspace(bounds[0][0],bounds[0][1], num=30))],\n",
" random_state=0)\n"
" random_state=0)"
]
},
{
"cell_type": "code",
"execution_count": 7,
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/Users/juanpavez/Library/Python/2.7/lib/python/site-packages/sklearn/cross_validation.py:43: DeprecationWarning: This module has been deprecated in favor of the model_selection module into which all the refactored classes and functions are moved. Also note that the interface of the new CV iterators are different from that of this module. This module will be removed in 0.20.\n",
" \"This module will be removed in 0.20.\", DeprecationWarning)\n"
]
}
],
"outputs": [],
"source": [
"# Train parameterized classifier\n",
"from carl.learning import as_classifier\n",
@@ -243,9 +224,10 @@
"from sklearn.neural_network import MLPRegressor\n",
"from sklearn.pipeline import make_pipeline\n",
"from sklearn.preprocessing import StandardScaler\n",
"from sklearn.model_selection import RandomizedSearchCV\n",
"\n",
"clfs = []\n",
"for k,_ in enumerate(product(clf_parameters,p1s)):\n",
"\n",
"for k, _ in enumerate(product(clf_parameters,p1s)):\n",
" clfs.append(ParameterizedClassifier(\n",
" make_pipeline(StandardScaler(), \n",
" as_classifier(MLPRegressor(learning_rate=\"adaptive\", \n",
@@ -279,9 +261,9 @@
" return wrapper\n",
"\n",
"def objective(theta, random_state=0, n_samples=100000, clf=clfs[0],p1=p1s[0]): \n",
" \n",
" # Set parameter values \n",
" A.set_value(theta[0])\n",
" \n",
" # Fit ratio\n",
" ratio = ClassifierRatio(CalibratedClassifierCV(\n",
" base_estimator=clf, \n",
@@ -313,40 +295,17 @@
},
{
"cell_type": "code",
"execution_count": 9,
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"*Optimization completed:\n",
" -Maximum number of iterations reached.\n",
"*Optimization completed:\n",
" -Maximum number of iterations reached.\n",
"*Optimization completed:\n",
" -Maximum number of iterations reached.\n",
"*Optimization completed:\n",
" -Maximum number of iterations reached.\n",
"*Optimization completed:\n",
" -Maximum number of iterations reached.\n",
"*Optimization completed:\n",
" -Maximum number of iterations reached.\n",
"*Optimization completed:\n",
" -Maximum number of iterations reached.\n",
"*Optimization completed:\n",
" -Maximum number of iterations reached.\n",
"*Optimization completed:\n",
" -Maximum number of iterations reached.\n"
]
}
],
"outputs": [],
"source": [
"from GPyOpt.methods import BayesianOptimization\n",
"\n",
"solvers = []\n",
"for k,(param,p1) in enumerate(product(clf_parameters,p1s)):\n",
"\n",
"for k, (param, p1) in enumerate(product(clf_parameters,p1s)):\n",
" clf = clfs[k]\n",
" n_samples = param[1]\n",
" bounds = [(-3, 3)]\n",
@@ -379,7 +338,8 @@
],
"source": [
"approx_MLEs = []\n",
"for k,_ in enumerate(product(clf_parameters,p1s)):\n",
"\n",
"for k, _ in enumerate(product(clf_parameters,p1s)):\n",
" solver = solvers[k]\n",
" approx_MLE = solver.x_opt\n",
" approx_MLEs.append(approx_MLE)\n",
@@ -459,7 +419,7 @@
"rs = []\n",
"solver = solvers[0]\n",
"\n",
"for k,_ in enumerate(product(clf_parameters,p1s)):\n",
"for k, _ in enumerate(product(clf_parameters,p1s)):\n",
" def gp_objective(theta):\n",
" theta = theta.reshape(1, -1)\n",
" return solvers[k].model.predict(theta)[0][0]\n",
@@ -496,27 +456,18 @@
" nll_gp, var_gp = solvers[k].model.predict(As.reshape(-1, 1))\n",
" nll_gp = 2. * (nll_gp - rs[k].fun) * len(X_true)\n",
" gp_ratios.append(nll_gp)\n",
" \n",
" # STD\n",
" std_gp = np.sqrt(4*var_gp*len(X_true)*len(X_true))\n",
" std_gp[np.isnan(std_gp)] = 0.\n",
" gp_std.append(std_gp)\n",
" \n",
" # 95% CI\n",
" q1_gp, q2_gp = solvers[k].model.predict_quantiles(As.reshape(-1, 1))\n",
" q1_gp = 2. * (q1_gp - rs[k].fun) * len(X_true)\n",
" q2_gp = 2. * (q2_gp - rs[k].fun) * len(X_true)\n",
" gp_q1.append(q1_gp)\n",
" gp_q2.append(q2_gp)\n",
"\n",
" #nll_approx = np.zeros(n_points)\n",
"\n",
" #approx = [objective([a]) for a in np.linspace(*bounds[0], n_points)]\n",
" #approx = [objective([a],n_samples=n_samples,clf=clf,p1=p1) for a \n",
" # in np.linspace(bounds[0][0],bounds[0][1], n_points)]\n",
"\n",
" #approx = np.array(approx)\n",
" #approx = 2. * (approx - approx.min()) * len(X_true)\n",
" #nll_approx = approx\n",
" #approx_ratios.append(nll_approx)\n"
" gp_q2.append(q2_gp)"
]
},
{
@@ -566,7 +517,8 @@
],
"source": [
"bounds = [(true_A - 0.30, true_A + 0.30)]\n",
"for k,_ in enumerate(clf_parameters):\n",
"\n",
"for k, _ in enumerate(clf_parameters):\n",
" fig = plt.figure()\n",
" ax = fig.add_subplot(1,1,1)\n",
" ax.plot(As, nll, label=\"Exact\")\n",
@@ -624,9 +576,11 @@
"outputs": [],
"source": [
"from sklearn.metrics import roc_curve, auc\n",
"def makeROC(predictions,targetdata):\n",
"\n",
"def makeROC(predictions ,targetdata):\n",
" fpr, tpr, _ = roc_curve(targetdata.ravel(),predictions.ravel())\n",
" roc_auc = auc(fpr, tpr)\n",
" \n",
" return fpr,tpr,roc_auc"
]
},
@@ -669,8 +623,6 @@
}
],
"source": [
"#fig = plt.figure(figsize=(15,15))\n",
"\n",
"# I obtain data from r*p1 by resampling data from p1 using r as weights\n",
"def weight_data(x0,x1,weights):\n",
" x1_len = x1.shape[0]\n",
@@ -767,15 +719,6 @@
"trained, well calibrated case is almost identical with the exact likelihood\n",
"ratio, confirming the quality of the approximation."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": []
}
],
"metadata": {
Loading

0 comments on commit 323d1ed

Please sign in to comment.