Skip to content

Commit

Permalink
minor updates.
Browse files Browse the repository at this point in the history
  • Loading branch information
redur committed Apr 9, 2024
1 parent f694d27 commit 3a52dca
Showing 1 changed file with 43 additions and 44 deletions.
87 changes: 43 additions & 44 deletions notebooks/Borehole_Profile_V2_Exploration.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
},
{
"cell_type": "code",
"execution_count": 1,
"execution_count": null,
"id": "eacdbb5e",
"metadata": {},
"outputs": [],
Expand All @@ -21,14 +21,13 @@
},
{
"cell_type": "code",
"execution_count": 2,
"execution_count": null,
"id": "90e4aab6-f21b-422f-9278-2d3a7715bc4f",
"metadata": {},
"outputs": [],
"source": [
"input_directory = DATAPATH / \"data_v2\"\n",
"ground_truth_path = input_directory / \"geoquat_ground_truth.json\"\n",
"predictions_path = input_directory / \"extract\" / \"predictions.json\""
"ground_truth_path = input_directory / \"geoquat_ground_truth.json\""
]
},
{
Expand All @@ -41,7 +40,7 @@
},
{
"cell_type": "code",
"execution_count": 3,
"execution_count": null,
"id": "fecb9c65-5fb2-498b-a7c4-439fd2e812b2",
"metadata": {},
"outputs": [],
Expand All @@ -52,7 +51,7 @@
},
{
"cell_type": "code",
"execution_count": 4,
"execution_count": null,
"id": "e9be30e3",
"metadata": {},
"outputs": [
Expand All @@ -73,7 +72,7 @@
},
{
"cell_type": "code",
"execution_count": 5,
"execution_count": null,
"id": "0882c4a9-e1b3-465e-8da5-42547b61dfeb",
"metadata": {},
"outputs": [
Expand All @@ -91,7 +90,7 @@
},
{
"cell_type": "code",
"execution_count": 6,
"execution_count": null,
"id": "b0fb5110",
"metadata": {},
"outputs": [
Expand Down Expand Up @@ -129,7 +128,7 @@
},
{
"cell_type": "code",
"execution_count": 7,
"execution_count": null,
"id": "697a4493",
"metadata": {},
"outputs": [],
Expand All @@ -143,7 +142,7 @@
},
{
"cell_type": "code",
"execution_count": 8,
"execution_count": null,
"id": "6c06ec7f",
"metadata": {},
"outputs": [],
Expand All @@ -157,7 +156,7 @@
},
{
"cell_type": "code",
"execution_count": 9,
"execution_count": null,
"id": "e85fca47",
"metadata": {},
"outputs": [],
Expand All @@ -175,7 +174,7 @@
},
{
"cell_type": "code",
"execution_count": 10,
"execution_count": null,
"id": "f552de08",
"metadata": {},
"outputs": [
Expand Down Expand Up @@ -220,7 +219,7 @@
},
{
"cell_type": "code",
"execution_count": 34,
"execution_count": null,
"id": "4cba499a",
"metadata": {},
"outputs": [
Expand Down Expand Up @@ -250,7 +249,7 @@
},
{
"cell_type": "code",
"execution_count": 35,
"execution_count": null,
"id": "541445bf",
"metadata": {},
"outputs": [
Expand All @@ -271,7 +270,7 @@
},
{
"cell_type": "code",
"execution_count": 13,
"execution_count": null,
"id": "092bb56a",
"metadata": {},
"outputs": [
Expand All @@ -294,7 +293,7 @@
},
{
"cell_type": "code",
"execution_count": 14,
"execution_count": null,
"id": "c81d5ea1",
"metadata": {},
"outputs": [
Expand Down Expand Up @@ -330,7 +329,7 @@
},
{
"cell_type": "code",
"execution_count": 15,
"execution_count": null,
"id": "6fa25548",
"metadata": {},
"outputs": [
Expand All @@ -354,7 +353,7 @@
},
{
"cell_type": "code",
"execution_count": 16,
"execution_count": null,
"id": "c1f8d4b3",
"metadata": {},
"outputs": [
Expand All @@ -377,7 +376,7 @@
},
{
"cell_type": "code",
"execution_count": 17,
"execution_count": null,
"id": "70a4747f",
"metadata": {},
"outputs": [
Expand Down Expand Up @@ -408,7 +407,7 @@
},
{
"cell_type": "code",
"execution_count": 18,
"execution_count": null,
"id": "10ed5f38",
"metadata": {},
"outputs": [],
Expand All @@ -420,7 +419,7 @@
},
{
"cell_type": "code",
"execution_count": 19,
"execution_count": null,
"id": "6df98c9e",
"metadata": {},
"outputs": [],
Expand All @@ -440,7 +439,7 @@
},
{
"cell_type": "code",
"execution_count": 20,
"execution_count": null,
"id": "4670b639",
"metadata": {},
"outputs": [
Expand Down Expand Up @@ -477,7 +476,6 @@
}
],
"source": [
"number_wrong = 0\n",
"fp = 0\n",
"fn = 0\n",
"tp = 0\n",
Expand All @@ -489,7 +487,7 @@
},
{
"cell_type": "code",
"execution_count": 21,
"execution_count": null,
"id": "72e28be2",
"metadata": {},
"outputs": [
Expand All @@ -516,7 +514,7 @@
},
{
"cell_type": "code",
"execution_count": 22,
"execution_count": null,
"id": "817e7692",
"metadata": {},
"outputs": [
Expand Down Expand Up @@ -548,7 +546,7 @@
},
{
"cell_type": "code",
"execution_count": 23,
"execution_count": null,
"id": "ca0578cc",
"metadata": {},
"outputs": [
Expand Down Expand Up @@ -585,7 +583,7 @@
},
{
"cell_type": "code",
"execution_count": 24,
"execution_count": null,
"id": "cf7656f7",
"metadata": {},
"outputs": [
Expand Down Expand Up @@ -622,7 +620,7 @@
},
{
"cell_type": "code",
"execution_count": 25,
"execution_count": null,
"id": "88826b70",
"metadata": {},
"outputs": [
Expand Down Expand Up @@ -658,7 +656,7 @@
},
{
"cell_type": "code",
"execution_count": 26,
"execution_count": null,
"id": "6a1d3ec5",
"metadata": {},
"outputs": [
Expand Down Expand Up @@ -695,7 +693,7 @@
},
{
"cell_type": "code",
"execution_count": 27,
"execution_count": null,
"id": "543479c2",
"metadata": {},
"outputs": [
Expand Down Expand Up @@ -752,7 +750,7 @@
},
{
"cell_type": "code",
"execution_count": 28,
"execution_count": null,
"id": "9a293a1d",
"metadata": {},
"outputs": [
Expand Down Expand Up @@ -814,7 +812,7 @@
},
{
"cell_type": "code",
"execution_count": 29,
"execution_count": null,
"id": "d686b1b3",
"metadata": {},
"outputs": [
Expand Down Expand Up @@ -861,7 +859,7 @@
},
{
"cell_type": "code",
"execution_count": 30,
"execution_count": null,
"id": "d7a1baac",
"metadata": {},
"outputs": [
Expand Down Expand Up @@ -891,7 +889,7 @@
},
{
"cell_type": "code",
"execution_count": 40,
"execution_count": null,
"id": "d696e5e9",
"metadata": {},
"outputs": [
Expand All @@ -917,7 +915,7 @@
},
{
"cell_type": "code",
"execution_count": 60,
"execution_count": null,
"id": "e5692901",
"metadata": {},
"outputs": [],
Expand All @@ -929,11 +927,12 @@
},
{
"cell_type": "code",
"execution_count": 89,
"execution_count": null,
"id": "4ae5539f",
"metadata": {},
"outputs": [],
"source": [
"# We need to create bins for the number of layers as this is a continuous variable\n",
"X = pd.DataFrame.from_dict(metadata, orient='index')\n",
"bins = np.linspace(0, 50, 6)\n",
"bins = list(bins)\n",
Expand All @@ -943,7 +942,7 @@
},
{
"cell_type": "code",
"execution_count": 90,
"execution_count": null,
"id": "e94817a9",
"metadata": {},
"outputs": [
Expand Down Expand Up @@ -1037,12 +1036,12 @@
}
],
"source": [
"X.groupby(\"number_layers_binned\").count()"
"X.groupby(\"number_layers_binned\").count() # We would like to have at least 6 samples per bin"
]
},
{
"cell_type": "code",
"execution_count": 101,
"execution_count": null,
"id": "4861f963",
"metadata": {},
"outputs": [],
Expand All @@ -1054,18 +1053,18 @@
},
{
"cell_type": "code",
"execution_count": 102,
"execution_count": null,
"id": "99197a07",
"metadata": {},
"outputs": [],
"source": [
"_X, X_test= train_test_split(X, test_size=test_size, random_state=42, stratify=X[[\"language\", \"number_layers_binned\"]]) # it's not straight forward to stratify by number_of_layers as it's continuous.\n",
"_X, X_test= train_test_split(X, test_size=test_size, random_state=42, stratify=X[[\"language\", \"number_layers_binned\"]])\n",
"X_train, X_val = train_test_split(_X, test_size=eval_size/(1-test_size), random_state=42, stratify=_X[[\"language\", \"number_layers_binned\"]])"
]
},
{
"cell_type": "code",
"execution_count": 103,
"execution_count": null,
"id": "9bef204b",
"metadata": {},
"outputs": [
Expand All @@ -1086,7 +1085,7 @@
},
{
"cell_type": "code",
"execution_count": 113,
"execution_count": null,
"id": "aae619eb",
"metadata": {},
"outputs": [
Expand Down Expand Up @@ -1114,7 +1113,7 @@
},
{
"cell_type": "code",
"execution_count": 121,
"execution_count": null,
"id": "efb8962a",
"metadata": {},
"outputs": [
Expand Down

0 comments on commit 3a52dca

Please sign in to comment.