Release 0.3.5 (#72)

* Release 0.3.5 * Add devcontainer badge to README.md * Add missing dotfiles * Delete removed files * Add missing pre-commit config * Fix extra argument for pre-commit
microsoft · Oct 24, 2023 · d47af78 · d47af78
1 parent 2ae92df
commit d47af78
Show file tree

Hide file tree

Showing 60 changed files with 5,700 additions and 2,313 deletions.
diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json
@@ -6,10 +6,7 @@
 		"target": "dev"
 	},
 	"runArgs": [
-		// The default shared memory size of Docker is 64MB which can lead to memory
-		// issues when using Pytorch dataloaders with multiple workers.
-		// See https://github.com/aws/sagemaker-python-sdk/issues/937 and
-		// https://github.com/pytorch/pytorch#docker-image.
+		// give headspace for pytorch dataloaders passing tensors across processes
 		"--shm-size=1gb"
 	],
 	"containerEnv": {
@@ -22,7 +19,8 @@
 	"hostRequirements": {
 		"gpu": "optional"  // Mount GPU(s) if available
 	},
-	"postStartCommand": "git config --global core.editor \"code --wait\"",
+	"postCreateCommand": "pre-commit install",
+	"postStartCommand": "git config --global core.editor \"code --wait\" && poetry install",
 	"shutdownAction": "none",
 	"customizations": {
 		"vscode": {
@@ -33,7 +31,7 @@
 				"python.testing.pytestEnabled": true,
 				"python.testing.pytestArgs": [
 					"--continue-on-collection-errors",
-					"causica/test"
+					"test"
 				],
 				"python.testing.unittestEnabled": false,
 				"vim.textwidth": 120,

diff --git a/.github/workflows/ci-build.yml b/.github/workflows/ci-build.yml
@@ -14,47 +14,41 @@ jobs:
 
     steps:
     - uses: actions/checkout@v3
-      with:
-        path: "repo"
 
     - uses: actions/setup-python@v2
       with:
-        python-version: "3.9"
+        python-version: "3.10"
 
     - uses: actions/cache@v3
       with:
         path: ~/.cache/pypoetry/virtualenvs
-        key: ${{ hashFiles('repo/poetry.lock') }}
+        key: ${{ hashFiles('poetry.lock') }}
       id: cache
-    
+
     - name: Install poetry
       run: |
         curl -sSL https://install.python-poetry.org | python3 -
 
     - name: Install dependencies
-      shell: bash -l {0}
       run: |
-        cd repo
-        poetry env use 3.9
+        poetry env use 3.10
         poetry install
       if: steps.cache.outputs.cache-hit != 'true'
 
+    - name: Verify pre-commit checks
+      uses: pre-commit/[email protected]
+      with:
+        extra_args: --all-files
+
     - name: Test with pytest
-      shell: bash -l {0}
-      run: |
-        cd repo
-        poetry run python -m pytest ./test
+      run: poetry run python -m pytest ./test
 
     - name: Create build artifact
-      shell: bash -l {0}
-      run: |
-        cd repo
-        poetry build
+      run: poetry build
 
     - name: Upload build artifact
-      # if: github.ref == 'refs/heads/main' # only create artifacts on push to main
       uses: actions/upload-artifact@v3
       with:
         name: Build artifacts
-        path: ./repo/dist
-        retention-days: 90
+        path: ./dist
+        retention-days: 90
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -0,0 +1,39 @@
+repos:
+- repo: local
+  hooks:
+  - id: black
+    name: black
+    entry: poetry run black
+    language: system
+    types_or: [python, jupyter]
+    args: [--config=./pyproject.toml]
+- repo: local
+  hooks:
+  - id: isort
+    name: isort
+    entry: poetry run isort
+    language: system
+    types: [python]
+- repo: local
+  hooks:
+  - id: poetry lock
+    name: poetry lock check
+    entry: poetry lock --check
+    pass_filenames: false
+    language: system
+- repo: local
+  hooks:
+  - id: mypy
+    name: mypy
+    entry: poetry run mypy
+    pass_filenames: false
+    language: system
+    types: [python]
+    args: ["--config-file=pyproject.toml", "."]
+- repo: local
+  hooks:
+  - id: pylint
+    name: pylint
+    entry: poetry run pylint
+    language: system
+    types: [python]
diff --git a/Dockerfile b/Dockerfile
@@ -31,8 +31,7 @@ RUN curl -sSL https://install.python-poetry.org | python3 -
 WORKDIR /workspaces/causica
 COPY pyproject.toml poetry.lock ./
 RUN --mount=type=cache,target=/root/.cache/pypoetry,sharing=locked \
-    mkdir -p src/causica && touch README.md src/causica/__init__.py && \
-    poetry install --only main
+    poetry install --only main --no-root --no-directory
 
 FROM base as deploy
 COPY . /workspaces/causica
@@ -74,4 +73,4 @@ RUN <<EOT
 EOT
 
 RUN --mount=type=cache,target=/root/.cache/pypoetry,sharing=locked \
-    poetry install --with dev
+    poetry install --with dev --no-root --no-directory
diff --git a/README.md b/README.md
@@ -1,15 +1,17 @@
 [![Causica CI Build](https://github.com/microsoft/causica/actions/workflows/ci-build.yml/badge.svg)](https://github.com/microsoft/causica/actions/workflows/ci-build.yml)
+[![Open in Dev Containers](https://img.shields.io/static/v1?label=Dev%20Containers&message=Open&color=blue&logo=visualstudiocode)](https://vscode.dev/redirect?url=vscode://ms-vscode-remote.remote-containers/cloneInVolume?url=https://github.com/microsoft/causica)
+
 
 # Causica
 
 ## Overview
- 
-Causal machine learning enables individuals and organizations to make better data-driven decisions. In particular, causal ML allows us to answer “what if” questions about the effect of potential actions on outcomes. 
- 
+
+Causal machine learning enables individuals and organizations to make better data-driven decisions. In particular, causal ML allows us to answer “what if” questions about the effect of potential actions on outcomes.
+
 Causal ML is a nascent area, we aim  to enable a **scalable**, **flexible**, **real-world applicable end-to-end** causal inference framework. In perticular, we bridge between causal discovery, causal inference, and deep learning to achieve the goal.  We aim to develop technology can automate causal decision-making using existing observational data alone, output both the discovered causal relationships and estimate the effect of actions simultaneously.
- 
+
 Causica is a deep learning library for end-to-end causal inference, including both causal discovery and inference.  It implements deep end-to-end inference framework [2] and different alternatives.
- 
+
 This project splits the interventional decision making from observational decision making Azua repo found here [Azua](https://github.com/microsoft/project-azua).
 
 This codebase has been heavily refactored, you can find the previous version of the code [here](https://github.com/microsoft/causica/releases/tag/v0.0.0).
@@ -26,7 +28,7 @@ pip install causica
 
 ## About
 
-Real-world data-driven decision making requires causal inference to ensure the validity of drawn conclusions. However, it is very uncommon to have a-priori perfect knowledge of the causal relationships underlying relevant variables. DECI allows the end user to perform causal inference without having complete knowledge of the causal graph. This is done by combining the causal discovery and causal inference steps in a single model. DECI takes in observational data and outputs ATE and CATE estimates. 
+Real-world data-driven decision making requires causal inference to ensure the validity of drawn conclusions. However, it is very uncommon to have a-priori perfect knowledge of the causal relationships underlying relevant variables. DECI allows the end user to perform causal inference without having complete knowledge of the causal graph. This is done by combining the causal discovery and causal inference steps in a single model. DECI takes in observational data and outputs ATE and CATE estimates.
 
 For more information, please refer to the [paper](https://arxiv.org/abs/2202.02195).
 
@@ -37,7 +39,7 @@ DECI is a generative model that employs an additive noise structural equation mo
 
 **Simulation-based Causal Inference**
 
-DECI estimates causal quantities (ATE) by applying the relevant interventions to its learnt causal graph (i.e. mutilating incoming edges to intervened variables) and then sampling from the generative model. This process involves first sampling a vector of exogenous noise from the learnt noise distribution and then forward simulating the SEM until an observation vector is obtained. ATE can be computed via estimating an expectation over the effect variable of interest using MonteCarlo samples of the intervened distribution of observations. 
+DECI estimates causal quantities (ATE) by applying the relevant interventions to its learnt causal graph (i.e. mutilating incoming edges to intervened variables) and then sampling from the generative model. This process involves first sampling a vector of exogenous noise from the learnt noise distribution and then forward simulating the SEM until an observation vector is obtained. ATE can be computed via estimating an expectation over the effect variable of interest using MonteCarlo samples of the intervened distribution of observations.
 
 ## How to run
 
@@ -60,7 +62,7 @@ The Spline model uses a flexible spline flow that is learnt from the data. This
 
 To use DECI to learn the functional relationships, remove the variational distribution terms from the loss and replace the sample with the known graph.
 
-## Further extensions 
+## Further extensions
 
 For now, we have removed Rhino and DDECI from the codebase but they will be added back. You can still access the previously released versions [here](https://github.com/microsoft/causica/releases/tag/v0.0.0).
 

diff --git a/examples/csuite_example.ipynb b/examples/csuite_example.ipynb
@@ -36,7 +36,7 @@
     "    JointNoiseModule,\n",
     "    create_noise_modules,\n",
     ")\n",
-    "from causica.functional_relationships import ICGNN\n",
+    "from causica.functional_relationships import DECIEmbedFunctionalRelationships\n",
     "from causica.graph.dag_constraint import calculate_dagness\n",
     "from causica.sem.sem_distribution import SEMDistributionModule\n",
     "from causica.training.auglag import AugLagLossCalculator, AugLagLR, AugLagLRConfig"
@@ -168,12 +168,12 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "icgnn = ICGNN(\n",
+    "functional_relationships = DECIEmbedFunctionalRelationships(\n",
     "    shapes=tensordict_shapes(dataset_train),\n",
     "    embedding_size=32,\n",
     "    out_dim_g=32,\n",
-    "    norm_layer=torch.nn.LayerNorm,\n",
-    "    res_connection=True,\n",
+    "    num_layers_g=2,\n",
+    "    num_layers_zeta=2,\n",
     ")"
    ]
   },
@@ -213,7 +213,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "sem_module: SEMDistributionModule = SEMDistributionModule(adjacency_dist, icgnn, noise_module)\n",
+    "sem_module: SEMDistributionModule = SEMDistributionModule(adjacency_dist, functional_relationships, noise_module)\n",
     "\n",
     "sem_module.to(device);"
    ]
@@ -233,7 +233,7 @@
    "outputs": [],
    "source": [
     "modules = {\n",
-    "    \"icgnn\": sem_module.functional_relationships,\n",
+    "    \"functional_relationships\": sem_module.functional_relationships,\n",
     "    \"vardist\": sem_module.adjacency_module,\n",
     "    \"noise_dist\": sem_module.noise_module,\n",
     "}\n",

diff --git a/examples/multi_investment_sales_attribution.ipynb b/examples/multi_investment_sales_attribution.ipynb