Skip to content

Commit

Permalink
Updated ipython scripts
Browse files Browse the repository at this point in the history
  • Loading branch information
dTmC0945 committed Nov 4, 2024
1 parent 0cbf84a commit b105341
Show file tree
Hide file tree
Showing 5 changed files with 5 additions and 0 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"cells":[{"cell_type":"markdown","id":"21bb2df1-b058-4f90-a218-3bc190faf52b","metadata":{},"source":"Code for\n========\n\n"},{"cell_type":"markdown","id":"918f1812-d0f7-4913-8b12-d9431beb5f26","metadata":{},"source":["These are the code snippets used in Decision Trees\npart of .\n\n"]},{"cell_type":"markdown","id":"d35cf299-ee72-4ba1-886b-59a7706f26b8","metadata":{},"source":["### Introduction\n\n"]},{"cell_type":"code","execution_count":1,"id":"3bdb0325-dd4a-4622-bc12-2333914f48db","metadata":{},"outputs":[],"source":["import matplotlib.pyplot as plt\nimport ChalcedonPy as cp\n\n# Initialise ChalcedonPy\ncp.init(save_path=\"Decision-Trees\",\n display_mode=\"slide\")"]},{"cell_type":"code","execution_count":1,"id":"3a27a1b2-1538-47f9-89a0-ed815bb80c8f","metadata":{},"outputs":[],"source":["from sklearn.datasets import load_iris\nfrom sklearn.tree import DecisionTreeClassifier\niris = load_iris(as_frame=True)\nX_iris = iris.data[[\"petal length (cm)\", \"petal width (cm)\"]].values\ny_iris = iris.target\ntree_clf = DecisionTreeClassifier(max_depth=2, random_state=42)\ntree_clf.fit(X_iris, y_iris)"]},{"cell_type":"code","execution_count":1,"id":"87743105-ae6b-43af-89bc-c472b0fbbb63","metadata":{},"outputs":[],"source":["from sklearn.tree import export_graphviz\nexport_graphviz(\ntree_clf,\nout_file=\"iris_tree.dot\",\nfeature_names=[\"petal length (cm)\", \"petal width (cm)\"],\nclass_names=iris.target_names,\nrounded=True,\nfilled=True\n)"]},{"cell_type":"code","execution_count":1,"id":"efb321a0-fdf0-4773-a7ba-f31ad23daa00","metadata":{},"outputs":[],"source":["from graphviz import Source\nSource.from_file(\"iris_tree.dot\")"]},{"cell_type":"markdown","id":"516bca78-fd0b-424b-831f-5638a258b87a","metadata":{},"source":["### Making Predictions\n\n"]},{"cell_type":"code","execution_count":1,"id":"8e674a19-9882-49ad-ae2c-b7eaabfa7957","metadata":{},"outputs":[],"source":["import numpy as np\nimport matplotlib.pyplot as plt\n\n# extra code – just formatting details\nfrom matplotlib.colors import ListedColormap\ncustom_cmap = ListedColormap(['#fafab0', '#9898ff', '#a0faa0'])\nplt.figure(figsize=(8, 4))\n\nlengths, widths = np.meshgrid(np.linspace(0, 7.2, 100), np.linspace(0, 3, 100))\nX_iris_all = np.c_[lengths.ravel(), widths.ravel()]\ny_pred = tree_clf.predict(X_iris_all).reshape(lengths.shape)\nplt.contourf(lengths, widths, y_pred, alpha=0.3, cmap=custom_cmap)\nfor idx, (name, style) in enumerate(zip(iris.target_names, (\"yo\", \"bs\", \"g^\"))):\n plt.plot(X_iris[:, 0][y_iris == idx], X_iris[:, 1][y_iris == idx],\n style, label=f\"Iris {name}\")\n\n# extra code – this section beautifies and saves Figure 6–2\ntree_clf_deeper = DecisionTreeClassifier(max_depth=3, random_state=42)\ntree_clf_deeper.fit(X_iris, y_iris)\nth0, th1, th2a, th2b = tree_clf_deeper.tree_.threshold[[0, 2, 3, 6]]\nplt.xlabel(\"Petal length (cm)\")\nplt.ylabel(\"Petal width (cm)\")\nplt.plot([th0, th0], [0, 3], \"k-\", linewidth=2)\nplt.plot([th0, 7.2], [th1, th1], \"k--\", linewidth=2)\nplt.plot([th2a, th2a], [0, th1], \"k:\", linewidth=2)\nplt.plot([th2b, th2b], [th1, 3], \"k:\", linewidth=2)\nplt.text(th0 - 0.05, 1.0, \"Depth=0\", horizontalalignment=\"right\", fontsize=15)\nplt.text(3.2, th1 + 0.02, \"Depth=1\", verticalalignment=\"bottom\", fontsize=13)\nplt.text(th2a + 0.05, 0.5, \"(Depth=2)\", fontsize=11)\nplt.axis([0, 7.2, 0, 3])\nplt.legend()\nplt.show()"]},{"cell_type":"markdown","id":"3f8a59ed-4059-41ee-b12c-2a8059f4e56f","metadata":{},"source":["#### Estimating Class Probabilities\n\n"]},{"cell_type":"code","execution_count":1,"id":"5dbef0a1-4069-4072-bf75-e85457c904a7","metadata":{},"outputs":[],"source":["print(tree_clf.predict_proba([[5, 1.5]]).round(3))\nprint(tree_clf.predict([[5, 1.5]]))"]},{"cell_type":"code","execution_count":1,"id":"c9f2e19a-cd32-4f2d-b035-df223e5aee2c","metadata":{},"outputs":[],"source":["from sklearn.datasets import make_moons\n\nX_moons, y_moons = make_moons(n_samples=150, noise=0.2, random_state=42)\n\ntree_clf1 = DecisionTreeClassifier(random_state=42)\ntree_clf2 = DecisionTreeClassifier(min_samples_leaf=5, random_state=42)\ntree_clf1.fit(X_moons, y_moons)\ntree_clf2.fit(X_moons, y_moons)"]},{"cell_type":"code","execution_count":1,"id":"e5fbdc62-2cfc-4634-8237-bbd47b73f900","metadata":{},"outputs":[],"source":["X_moons_test, y_moons_test = make_moons(n_samples=1000, noise=0.2, random_state=43)\nprint(tree_clf1.score(X_moons_test, y_moons_test))\nprint(tree_clf2.score(X_moons_test, y_moons_test))"]},{"cell_type":"code","execution_count":1,"id":"28178fc7-6a45-4208-94ab-665df019ca91","metadata":{},"outputs":[],"source":["def plot_decision_boundary(clf, X, y, axes, cmap):\n x1, x2 = np.meshgrid(np.linspace(axes[0], axes[1], 100),\n np.linspace(axes[2], axes[3], 100))\n X_new = np.c_[x1.ravel(), x2.ravel()]\n y_pred = clf.predict(X_new).reshape(x1.shape)\n \n plt.contourf(x1, x2, y_pred, alpha=0.3, cmap=cmap)\n plt.contour(x1, x2, y_pred, cmap=\"Greys\", alpha=0.8)\n colors = {\"Wistia\": [\"#78785c\", \"#c47b27\"], \"Pastel1\": [\"red\", \"blue\"]}\n markers = (\"o\", \"^\")\n for idx in (0, 1):\n plt.plot(X[:, 0][y == idx], X[:, 1][y == idx],\n color=colors[cmap][idx], marker=markers[idx], linestyle=\"none\")\n plt.axis(axes)\n plt.xlabel(r\"$x_1$\")\n plt.ylabel(r\"$x_2$\", rotation=0)\n\nfig, axes = plt.subplots(ncols=2, figsize=(10, 4), sharey=True)\nplt.sca(axes[0])\nplot_decision_boundary(tree_clf1, X_moons, y_moons,\n axes=[-1.5, 2.4, -1, 1.5], cmap=\"Wistia\")\nplt.title(\"No restrictions\")\nplt.sca(axes[1])\nplot_decision_boundary(tree_clf2, X_moons, y_moons,\n axes=[-1.5, 2.4, -1, 1.5], cmap=\"Wistia\")\nplt.title(f\"min_samples_leaf = {tree_clf2.min_samples_leaf}\")\nplt.ylabel(\"\")\nplt.show()"]},{"cell_type":"markdown","id":"a238159a-aba7-4c33-b910-352269f7f36e","metadata":{},"source":["### Regression\n\n"]},{"cell_type":"code","execution_count":1,"id":"97fd7a21-10a9-4133-a273-8e6f027220bf","metadata":{},"outputs":[],"source":["from sklearn.tree import DecisionTreeRegressor\n\nnp.random.seed(42)\nX_quad = np.random.rand(200, 1) - 0.5 # a single random input feature\ny_quad = X_quad ** 2 + 0.025 * np.random.randn(200, 1)\n\ntree_reg = DecisionTreeRegressor(max_depth=2, random_state=42)\ntree_reg.fit(X_quad, y_quad)"]},{"cell_type":"code","execution_count":1,"id":"c21debc2-f9ff-488f-bc04-d74d019dc491","metadata":{},"outputs":[],"source":["export_graphviz(\n tree_reg,\n out_file=str(\"regression_tree.dot\"),\n feature_names=[\"x1\"],\n rounded=True,\n filled=True\n)"]},{"cell_type":"code","execution_count":1,"id":"a820cd06-5ec1-43b3-ae3d-bbe34b1d6363","metadata":{},"outputs":[],"source":["tree_reg2 = DecisionTreeRegressor(max_depth=3, random_state=42)\ntree_reg2.fit(X_quad, y_quad)"]},{"cell_type":"code","execution_count":1,"id":"37857f45-cf7d-465b-a8dd-fbc906493dd2","metadata":{},"outputs":[],"source":["def plot_regression_predictions(tree_reg, X, y, axes=[-0.5, 0.5, -0.05, 0.25]):\n x1 = np.linspace(axes[0], axes[1], 500).reshape(-1, 1)\n y_pred = tree_reg.predict(x1)\n plt.axis(axes)\n plt.xlabel(\"$x_1$\")\n plt.plot(X, y, \"b.\")\n plt.plot(x1, y_pred, \"r.-\", linewidth=2, label=r\"$\\hat{y}$\")\n\nfig, axes = plt.subplots(ncols=2, figsize=(10, 4), sharey=True)\nplt.sca(axes[0])\nplot_regression_predictions(tree_reg, X_quad, y_quad)\n\nth0, th1a, th1b = tree_reg.tree_.threshold[[0, 1, 4]]\nfor split, style in ((th0, \"k-\"), (th1a, \"k--\"), (th1b, \"k--\")):\n plt.plot([split, split], [-0.05, 0.25], style, linewidth=2)\nplt.text(th0, 0.16, \"Depth=0\", fontsize=15)\nplt.text(th1a + 0.01, -0.01, \"Depth=1\", horizontalalignment=\"center\", fontsize=13)\nplt.text(th1b + 0.01, -0.01, \"Depth=1\", fontsize=13)\nplt.ylabel(\"$y$\", rotation=0)\nplt.legend(loc=\"upper center\", fontsize=16)\nplt.title(\"max_depth=2\")\n\nplt.sca(axes[1])\nth2s = tree_reg2.tree_.threshold[[2, 5, 9, 12]]\nplot_regression_predictions(tree_reg2, X_quad, y_quad)\nfor split, style in ((th0, \"k-\"), (th1a, \"k--\"), (th1b, \"k--\")):\n plt.plot([split, split], [-0.05, 0.25], style, linewidth=2)\nfor split in th2s:\n plt.plot([split, split], [-0.05, 0.25], \"k:\", linewidth=1)\nplt.text(th2s[2] + 0.01, 0.15, \"Depth=2\", fontsize=13)\nplt.title(\"max_depth=3\")\n\nplt.show()"]},{"cell_type":"code","execution_count":1,"id":"7aaaa602-82e9-4a02-b604-3292c3f981d6","metadata":{},"outputs":[],"source":["tree_reg1 = DecisionTreeRegressor(random_state=42)\ntree_reg2 = DecisionTreeRegressor(random_state=42, min_samples_leaf=10)\ntree_reg1.fit(X_quad, y_quad)\ntree_reg2.fit(X_quad, y_quad)\n\nx1 = np.linspace(-0.5, 0.5, 500).reshape(-1, 1)\ny_pred1 = tree_reg1.predict(x1)\ny_pred2 = tree_reg2.predict(x1)\n\nfig, axes = plt.subplots(ncols=2, figsize=(10, 4), sharey=True)\n\nplt.sca(axes[0])\nplt.plot(X_quad, y_quad, \"b.\")\nplt.plot(x1, y_pred1, \"r.-\", linewidth=2, label=r\"$\\hat{y}$\")\nplt.axis([-0.5, 0.5, -0.05, 0.25])\nplt.xlabel(\"$x_1$\")\nplt.ylabel(\"$y$\", rotation=0)\nplt.legend(loc=\"upper center\")\nplt.title(\"No restrictions\")\n\nplt.sca(axes[1])\nplt.plot(X_quad, y_quad, \"b.\")\nplt.plot(x1, y_pred2, \"r.-\", linewidth=2, label=r\"$\\hat{y}$\")\nplt.axis([-0.5, 0.5, -0.05, 0.25])\nplt.xlabel(\"$x_1$\")\nplt.title(f\"min_samples_leaf={tree_reg2.min_samples_leaf}\")\n\nplt.show()"]},{"cell_type":"code","execution_count":1,"id":"b4bbc372-4bd7-4aa5-92f2-9a3c2e397b42","metadata":{},"outputs":[],"source":["np.random.seed(6)\nX_square = np.random.rand(100, 2) - 0.5\ny_square = (X_square[:, 0] > 0).astype(np.int64)\n\nangle = np.pi / 4 # 45 degrees\nrotation_matrix = np.array([[np.cos(angle), -np.sin(angle)],\n [np.sin(angle), np.cos(angle)]])\nX_rotated_square = X_square.dot(rotation_matrix)\n\ntree_clf_square = DecisionTreeClassifier(random_state=42)\ntree_clf_square.fit(X_square, y_square)\ntree_clf_rotated_square = DecisionTreeClassifier(random_state=42)\ntree_clf_rotated_square.fit(X_rotated_square, y_square)\n\nfig, axes = plt.subplots(ncols=2, figsize=(10, 4), sharey=True)\nplt.sca(axes[0])\nplot_decision_boundary(tree_clf_square, X_square, y_square,\n axes=[-0.7, 0.7, -0.7, 0.7], cmap=\"Pastel1\")\nplt.sca(axes[1])\nplot_decision_boundary(tree_clf_rotated_square, X_rotated_square, y_square,\n axes=[-0.7, 0.7, -0.7, 0.7], cmap=\"Pastel1\")\nplt.ylabel(\"\")\n\nplt.show()"]},{"cell_type":"code","execution_count":1,"id":"7fa86b0a-7c20-4fef-bddc-8fa65c54f95b","metadata":{},"outputs":[],"source":["from sklearn.decomposition import PCA\nfrom sklearn.pipeline import make_pipeline\nfrom sklearn.preprocessing import StandardScaler\n\npca_pipeline = make_pipeline(StandardScaler(), PCA())\nX_iris_rotated = pca_pipeline.fit_transform(X_iris)\ntree_clf_pca = DecisionTreeClassifier(max_depth=2, random_state=42)\ntree_clf_pca.fit(X_iris_rotated, y_iris)"]},{"cell_type":"code","execution_count":1,"id":"460dc940-4614-4d83-9f6e-93ae296272ad","metadata":{},"outputs":[],"source":["plt.figure(figsize=(8, 4))\n\naxes = [-2.2, 2.4, -0.6, 0.7]\nz0s, z1s = np.meshgrid(np.linspace(axes[0], axes[1], 100),\n np.linspace(axes[2], axes[3], 100))\nX_iris_pca_all = np.c_[z0s.ravel(), z1s.ravel()]\ny_pred = tree_clf_pca.predict(X_iris_pca_all).reshape(z0s.shape)\n\nplt.contourf(z0s, z1s, y_pred, alpha=0.3, cmap=custom_cmap)\nfor idx, (name, style) in enumerate(zip(iris.target_names, (\"yo\", \"bs\", \"g^\"))):\n plt.plot(X_iris_rotated[:, 0][y_iris == idx],\n X_iris_rotated[:, 1][y_iris == idx],\n style, label=f\"Iris {name}\")\n\nplt.xlabel(\"$z_1$\")\nplt.ylabel(\"$z_2$\", rotation=0)\nth1, th2 = tree_clf_pca.tree_.threshold[[0, 2]]\nplt.plot([th1, th1], axes[2:], \"k-\", linewidth=2)\nplt.plot([th2, th2], axes[2:], \"k--\", linewidth=2)\nplt.text(th1 - 0.01, axes[2] + 0.05, \"Depth=0\",\n horizontalalignment=\"right\", fontsize=15)\nplt.text(th2 - 0.01, axes[2] + 0.05, \"Depth=1\",\n horizontalalignment=\"right\", fontsize=13)\nplt.axis(axes)\nplt.legend(loc=(0.32, 0.67))\nplt.show()"]},{"cell_type":"code","execution_count":1,"id":"3c23706a-eba6-431d-a656-b68dd5b6cc4e","metadata":{},"outputs":[],"source":["tree_clf_tweaked = DecisionTreeClassifier(max_depth=2, random_state=40)\ntree_clf_tweaked.fit(X_iris, y_iris)"]},{"cell_type":"code","execution_count":1,"id":"66d01e1e-860a-44bd-9006-b1dfa36b1353","metadata":{},"outputs":[],"source":["plt.figure(figsize=(8, 4))\ny_pred = tree_clf_tweaked.predict(X_iris_all).reshape(lengths.shape)\nplt.contourf(lengths, widths, y_pred, alpha=0.3, cmap=custom_cmap)\n\nfor idx, (name, style) in enumerate(zip(iris.target_names, (\"yo\", \"bs\", \"g^\"))):\n plt.plot(X_iris[:, 0][y_iris == idx], X_iris[:, 1][y_iris == idx],\n style, label=f\"Iris {name}\")\n\nth0, th1 = tree_clf_tweaked.tree_.threshold[[0, 2]]\nplt.plot([0, 7.2], [th0, th0], \"k-\", linewidth=2)\nplt.plot([0, 7.2], [th1, th1], \"k--\", linewidth=2)\nplt.text(1.8, th0 + 0.05, \"Depth=0\", verticalalignment=\"bottom\", fontsize=15)\nplt.text(2.3, th1 + 0.05, \"Depth=1\", verticalalignment=\"bottom\", fontsize=13)\nplt.xlabel(\"Petal length (cm)\")\nplt.ylabel(\"Petal width (cm)\")\nplt.axis([0, 7.2, 0, 3])\nplt.legend()\nplt.show()"]}],"metadata":{"org":null,"kernelspec":{"display_name":"Python 3","language":"python","name":"python3"},"language_info":{"codemirror_mode":{"name":"ipython","version":3},"file_extension":".py","mimetype":"text/x-python","name":"python","nbconvert_exporter":"python","pygments_lexer":"ipython3","version":"3.5.2"}},"nbformat":4,"nbformat_minor":5}
Loading

0 comments on commit b105341

Please sign in to comment.