Skip to content

Commit

Permalink
Merge branch 'main' into protein_tests
Browse files Browse the repository at this point in the history
  • Loading branch information
terribilis authored Mar 14, 2024
2 parents 445668a + 92790a7 commit df0d928
Show file tree
Hide file tree
Showing 4 changed files with 256 additions and 0 deletions.
17 changes: 17 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -46,3 +46,20 @@ firebase-debug.*.log*

# Swift Package List
Stronger/package-list.json

# PyBuilder
.pybuilder/
target/

# Jupyter Notebook
.ipynb_checkpoints
.virtual_documents

# IPython
profile_default/
ipython_config.py

# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class
104 changes: 104 additions & 0 deletions StrongerDataExport/StrongerDataExport.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
{
"cells": [
{
"cell_type": "markdown",
"metadata": {
"id": "jhj9wvlZY5yg"
},
"source": [
"# Import Stronger Data Export module"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from StrongerDataExport import *"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Connect to Firebase"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "7Eq4wNI1aVZC"
},
"outputs": [],
"source": [
"# Define your Firebase project ID\n",
"project_id = 'cs342-2024-stronger'\n",
"\n",
"# Set your service account credentials for a production project\n",
"serviceAccountKey_file = 'path_to_service_account_key_file/service_account_key.json'\n",
"\n",
"db = connect_to_firebase(project_id, serviceAccountKey_file)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Export proterin intake and exercise data"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "1Qow-2oddcoF"
},
"outputs": [],
"source": [
"users_df, protein_df, exercise_df = process_data(db)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 201
},
"id": "gurhvOcNi83W",
"outputId": "0803d441-8ee4-49fd-cc84-056589477de3"
},
"outputs": [],
"source": [
"exercise_df.head()"
]
}
],
"metadata": {
"colab": {
"provenance": []
},
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.9"
}
},
"nbformat": 4,
"nbformat_minor": 4
}
6 changes: 6 additions & 0 deletions StrongerDataExport/StrongerDataExport.ipynb.license
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@

This source file is part of the Stanford Spezi open-source project

SPDX-FileCopyrightText: 2024 Stanford University and the project authors (see CONTRIBUTORS.md)

SPDX-License-Identifier: MIT
129 changes: 129 additions & 0 deletions StrongerDataExport/StrongerDataExport.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,129 @@
#
# This source file is part of the Stanford Spezi open-source project
#
# SPDX-FileCopyrightText: 2024 Stanford University and the project authors (see CONTRIBUTORS.md)
#
# SPDX-License-Identifier: MIT
#

# Standard Library Imports
import os
from datetime import datetime
from typing import List, Dict

# Firebase and Google Cloud Firestore Imports
import firebase_admin
from firebase_admin import credentials, firestore
from google.cloud.firestore_v1.client import Client

# Data Handling and Scientific Computing Libraries
import pandas as pd
import numpy as np


def connect_to_firebase(project_id: str, serviceAccountKey_file: str = None) -> Client:

if not serviceAccountKey_file and not project_id:
project_id = "strongerdatapipeline"
os.environ["FIRESTORE_EMULATOR_HOST"] = "localhost:8080"
os.environ["GCLOUD_PROJECT"] = project_id
firebase_admin.initialize_app(options={'projectId': project_id})
db = firestore.Client(project=project_id)

elif serviceAccountKey_file and project_id:
if not firebase_admin._apps:
cred = credentials.Certificate(serviceAccountKey_file)
firebase_admin.initialize_app(cred)
db = firestore.client()

return db


def fetch_data(db: Client, collection_name: str = 'users') -> Dict[str, List[Dict]]:
users_ref = db.collection(collection_name)
users_docs = users_ref.stream()

data = {
"ProteinIntake": [],
"exerciseLog": []
}

for doc in users_docs:
user_data = doc.to_dict()
user_id = doc.id
user_data_prefixed = {'user_id': user_id}
user_data_prefixed.update(user_data)

protein_ref = users_ref.document(user_id).collection('ProteinIntake')
protein_docs = protein_ref.stream()
for protein_doc in protein_docs:
protein_data = protein_doc.to_dict()
protein_data_final = user_data_prefixed.copy()
protein_data_final.update(protein_data)
data["ProteinIntake"].append(protein_data_final)

exercise_ref = users_ref.document(user_id).collection('exerciseLog')
exercise_docs = exercise_ref.stream()
for exercise_doc in exercise_docs:
exercise_data = exercise_doc.to_dict()
exercise_data_final = user_data_prefixed.copy()
exercise_data_final.update(exercise_data)
data["exerciseLog"].append(exercise_data_final)

return data


def flatten_data(data: Dict[str, List[Dict]], save_as_csv: bool = True) -> tuple[pd.DataFrame, pd.DataFrame]:
protein_df = pd.DataFrame(data["ProteinIntake"])
exercise_df = pd.DataFrame(data["exerciseLog"])

if save_as_csv:
save_dataframe_to_csv(protein_df, f'protein_intake_{datetime.now().strftime("%Y-%m-%d")}.csv')
save_dataframe_to_csv(exercise_df, f'exercise_log_{datetime.now().strftime("%Y-%m-%d")}.csv')

return protein_df, exercise_df


def process_data(db: Client, collection_name: str = 'users', save_as_csv: bool = True) -> tuple[pd.DataFrame, pd.DataFrame]:
users_df = fetch_users_list(db, collection_name)
data = fetch_data(db, collection_name)
protein_df, exercise_df = flatten_data(data)

return users_df, protein_df, exercise_df


def save_dataframe_to_csv(df: pd.DataFrame, filename: str) -> None:
df.to_csv(filename, index=False)


def fetch_users_list(db: Client, collection_name: str = 'users', save_as_csv: bool = False) -> pd.DataFrame:
users = db.collection(collection_name).stream()
users_data = []
all_identifiers = set()

for user in users:
user_data = user.to_dict()
if user_data:
user_data['User Document ID'] = user.id
users_data.append(user_data)
all_identifiers.update(user_data.keys())

df = pd.DataFrame(users_data)

# This step is optional and depends on the need for consistency in the DataFrame's structure
for identifier in all_identifiers:
if identifier not in df.columns:
df[identifier] = None

column_order = ['User Document ID'] + [col for col in df.columns if col != 'User Document ID']
df = df[column_order]

if save_as_csv:
filename = f'users_list_{datetime.now().strftime("%Y-%m-%d")}.csv'
save_dataframe_to_csv(df, filename)

return df


def convert_to_snake_case(s: str) -> str:
return s.lower().replace(" ", "_")

0 comments on commit df0d928

Please sign in to comment.