Merge branch 'main' into protein_tests

CS342 · Mar 14, 2024 · df0d928 · df0d928
2 parents 445668a + 92790a7
commit df0d928
Show file tree

Hide file tree

Showing 4 changed files with 256 additions and 0 deletions.
diff --git a/.gitignore b/.gitignore
@@ -46,3 +46,20 @@ firebase-debug.*.log*
 
 # Swift Package List
 Stronger/package-list.json
+
+# PyBuilder
+.pybuilder/
+target/
+
+# Jupyter Notebook
+.ipynb_checkpoints
+.virtual_documents
+
+# IPython
+profile_default/
+ipython_config.py
+
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
diff --git a/StrongerDataExport/StrongerDataExport.ipynb b/StrongerDataExport/StrongerDataExport.ipynb
@@ -0,0 +1,104 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "jhj9wvlZY5yg"
+   },
+   "source": [
+    "# Import Stronger Data Export module"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from StrongerDataExport import *"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Connect to Firebase"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "id": "7Eq4wNI1aVZC"
+   },
+   "outputs": [],
+   "source": [
+    "# Define your Firebase project ID\n",
+    "project_id = 'cs342-2024-stronger'\n",
+    "\n",
+    "# Set your service account credentials for a production project\n",
+    "serviceAccountKey_file = 'path_to_service_account_key_file/service_account_key.json'\n",
+    "\n",
+    "db = connect_to_firebase(project_id, serviceAccountKey_file)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Export proterin intake and exercise data"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "id": "1Qow-2oddcoF"
+   },
+   "outputs": [],
+   "source": [
+    "users_df, protein_df, exercise_df = process_data(db)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/",
+     "height": 201
+    },
+    "id": "gurhvOcNi83W",
+    "outputId": "0803d441-8ee4-49fd-cc84-056589477de3"
+   },
+   "outputs": [],
+   "source": [
+    "exercise_df.head()"
+   ]
+  }
+ ],
+ "metadata": {
+  "colab": {
+   "provenance": []
+  },
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.9"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
diff --git a/StrongerDataExport/StrongerDataExport.ipynb.license b/StrongerDataExport/StrongerDataExport.ipynb.license
@@ -0,0 +1,6 @@
+
+This source file is part of the Stanford Spezi open-source project
+
+SPDX-FileCopyrightText: 2024 Stanford University and the project authors (see CONTRIBUTORS.md)
+
+SPDX-License-Identifier: MIT
diff --git a/StrongerDataExport/StrongerDataExport.py b/StrongerDataExport/StrongerDataExport.py
@@ -0,0 +1,129 @@
+#
+# This source file is part of the Stanford Spezi open-source project
+#
+# SPDX-FileCopyrightText: 2024 Stanford University and the project authors (see CONTRIBUTORS.md)
+#
+# SPDX-License-Identifier: MIT
+#
+
+# Standard Library Imports
+import os
+from datetime import datetime
+from typing import List, Dict
+
+# Firebase and Google Cloud Firestore Imports
+import firebase_admin
+from firebase_admin import credentials, firestore
+from google.cloud.firestore_v1.client import Client
+
+# Data Handling and Scientific Computing Libraries
+import pandas as pd
+import numpy as np
+
+
+def connect_to_firebase(project_id: str, serviceAccountKey_file: str = None) -> Client:
+
+    if not serviceAccountKey_file and not project_id:
+        project_id = "strongerdatapipeline"
+        os.environ["FIRESTORE_EMULATOR_HOST"] = "localhost:8080"
+        os.environ["GCLOUD_PROJECT"] = project_id
+        firebase_admin.initialize_app(options={'projectId': project_id})
+        db = firestore.Client(project=project_id)    
+
+    elif serviceAccountKey_file and project_id:
+        if not firebase_admin._apps:
+            cred = credentials.Certificate(serviceAccountKey_file)
+            firebase_admin.initialize_app(cred)
+            db = firestore.client()
+
+    return db
+
+
+def fetch_data(db: Client, collection_name: str = 'users') -> Dict[str, List[Dict]]:
+    users_ref = db.collection(collection_name)
+    users_docs = users_ref.stream()
+
+    data = {
+        "ProteinIntake": [],
+        "exerciseLog": []
+    }
+
+    for doc in users_docs:
+        user_data = doc.to_dict()
+        user_id = doc.id
+        user_data_prefixed = {'user_id': user_id}
+        user_data_prefixed.update(user_data)
+
+        protein_ref = users_ref.document(user_id).collection('ProteinIntake')
+        protein_docs = protein_ref.stream()
+        for protein_doc in protein_docs:
+            protein_data = protein_doc.to_dict()
+            protein_data_final = user_data_prefixed.copy()
+            protein_data_final.update(protein_data)
+            data["ProteinIntake"].append(protein_data_final)
+
+        exercise_ref = users_ref.document(user_id).collection('exerciseLog')
+        exercise_docs = exercise_ref.stream()
+        for exercise_doc in exercise_docs:
+            exercise_data = exercise_doc.to_dict()
+            exercise_data_final = user_data_prefixed.copy()
+            exercise_data_final.update(exercise_data)
+            data["exerciseLog"].append(exercise_data_final)
+
+    return data
+
+
+def flatten_data(data: Dict[str, List[Dict]], save_as_csv: bool = True) -> tuple[pd.DataFrame, pd.DataFrame]:
+    protein_df = pd.DataFrame(data["ProteinIntake"])
+    exercise_df = pd.DataFrame(data["exerciseLog"])
+
+    if save_as_csv:
+        save_dataframe_to_csv(protein_df, f'protein_intake_{datetime.now().strftime("%Y-%m-%d")}.csv')
+        save_dataframe_to_csv(exercise_df, f'exercise_log_{datetime.now().strftime("%Y-%m-%d")}.csv')
+
+    return protein_df, exercise_df
+
+
+def process_data(db: Client, collection_name: str = 'users', save_as_csv: bool = True) -> tuple[pd.DataFrame, pd.DataFrame]:
+    users_df = fetch_users_list(db, collection_name)
+    data = fetch_data(db, collection_name)
+    protein_df, exercise_df = flatten_data(data) 
+
+    return users_df, protein_df, exercise_df
+
+
+def save_dataframe_to_csv(df: pd.DataFrame, filename: str) -> None:
+    df.to_csv(filename, index=False)
+
+
+def fetch_users_list(db: Client, collection_name: str = 'users', save_as_csv: bool = False) -> pd.DataFrame:
+    users = db.collection(collection_name).stream()
+    users_data = []
+    all_identifiers = set()
+
+    for user in users:
+        user_data = user.to_dict()
+        if user_data:
+            user_data['User Document ID'] = user.id
+            users_data.append(user_data)
+            all_identifiers.update(user_data.keys())
+
+    df = pd.DataFrame(users_data)
+
+    # This step is optional and depends on the need for consistency in the DataFrame's structure
+    for identifier in all_identifiers:
+        if identifier not in df.columns:
+            df[identifier] = None
+
+    column_order = ['User Document ID'] + [col for col in df.columns if col != 'User Document ID']
+    df = df[column_order]
+
+    if save_as_csv:
+        filename = f'users_list_{datetime.now().strftime("%Y-%m-%d")}.csv'
+        save_dataframe_to_csv(df, filename)
+
+    return df
+
+
+def convert_to_snake_case(s: str) -> str:
+    return s.lower().replace(" ", "_")