From 42d305be78ef3eeba16e7ec62782ca4dd7214056 Mon Sep 17 00:00:00 2001 From: $Cuentas-AppServices Date: Thu, 9 Nov 2023 13:18:11 +0000 Subject: [PATCH 01/15] first iteration --- .coveragerc | 2 + README.md | 35 ++++++- cloud_function/main.py | 21 ++++ cloud_function/model.py | 167 ++++++++++++++++++++++++++++++++ cloud_function/query.py | 145 +++++++++++++++++++++++++++ cloud_function/repository.py | 123 +++++++++++++++++++++++ cloud_function/requirements.txt | 3 + cloud_function/services.py | 20 ++++ tests/__init__.py | 21 ++++ tests/conftest.py | 44 +++++++++ tests/data/cashflows.py | 11 +++ tests/test_model.py | 131 +++++++++++++++++++++++++ tests/test_services.py | 32 ++++++ 13 files changed, 754 insertions(+), 1 deletion(-) create mode 100644 .coveragerc create mode 100644 cloud_function/main.py create mode 100644 cloud_function/model.py create mode 100644 cloud_function/query.py create mode 100644 cloud_function/repository.py create mode 100644 cloud_function/requirements.txt create mode 100644 cloud_function/services.py create mode 100644 tests/__init__.py create mode 100644 tests/conftest.py create mode 100644 tests/data/cashflows.py create mode 100644 tests/test_model.py create mode 100644 tests/test_services.py diff --git a/.coveragerc b/.coveragerc new file mode 100644 index 0000000..3dbfbb4 --- /dev/null +++ b/.coveragerc @@ -0,0 +1,2 @@ +[run] +omit = tests/* \ No newline at end of file diff --git a/README.md b/README.md index a07dd40..1db2a20 100644 --- a/README.md +++ b/README.md @@ -1,2 +1,35 @@ # irr_calculator -Python solution that calculates irr for a set of accounts +## Introduction +Python solution that calculates irr for a set of entities. + +## Execution +On this section you will find how to run the code locally. + +### Python environment +To execute the solution with your machine you will need an environment with python 3.10.0 and the libraries listed in +requirements.txt. In case you do not have such environment, you can create it as follows with conda: + +``` +conda create -n [] python=3.10.0 pip +pip install -r cloud_function/requirements.txt +``` + +### Testing + +To execute the Python tests use next command on the CLI: + +```commandline +python -m pytest -vv +``` + +It is needed a _.env_ file with the next settings: + +``` +PROJECT= +SOURCE_TABLE= +DESTINATION_TABLE= +DATASET= +``` + +You will also need to provide a Service Account credentials or to use a user account with the right permissions to +interact with BigQuery. \ No newline at end of file diff --git a/cloud_function/main.py b/cloud_function/main.py new file mode 100644 index 0000000..a79183d --- /dev/null +++ b/cloud_function/main.py @@ -0,0 +1,21 @@ +from cloud_function import repository, services + + +def func_entry_point(event, context): + """ + Entry point for the application. This function initializes a BigQuery repository connector and invokes the + IRR pipeline. + + Args: + event: The dictionary with data specific to this type of event. The `@type` field maps to + `type.googleapis.com/google.pubsub.v1.PubsubMessage`. The `data` field maps to the PubsubMessage data + in a base64-encoded string. The `attributes` field maps to the PubsubMessage attributes + if any is present. + context: Metadata of triggering event including `event_id` which maps to the PubsubMessage + messageId, `timestamp` which maps to the PubsubMessage publishTime, `event_type` which maps to + `google.pubsub.topic.publish`, and `resource` which is a dictionary that describes the service + API endpoint pubsub.googleapis.com, the triggering topic's name, and the triggering event type + `type.googleapis.com/google.pubsub.v1.PubsubMessage`. + """ + bq_repository = repository.BiqQueryRepository() + services.irr_pipeline(bq_repository) diff --git a/cloud_function/model.py b/cloud_function/model.py new file mode 100644 index 0000000..853185a --- /dev/null +++ b/cloud_function/model.py @@ -0,0 +1,167 @@ +from dataclasses import dataclass +import datetime as dt +import numpy_financial as npf + +@dataclass(frozen=True) +class Cashflow: + """ + A data class representing a cashflow entry. This class is used to store information about a cashflow, + including the date, inflow, outflow, value, and entity name. The class is frozen to ensure immutability. + + Attributes: + date (datetime.datetime): The date of the cashflow. + inflow (float): The inflow amount. + outflow (float): The outflow amount. + value (float): The net value. + entity_name (str): The name of the associated entity. + """ + date: dt.datetime + inflow: float + outflow: float + value: float + entity_name: str + + def __gt__(self, other): + if self.date is None: + return False + elif other.date is None: + return True + else: + return self.date > other.date + + +@dataclass(frozen=True) +class Irr: + """ + A data class representing Internal Rate of Return (IRR) data for a specific entity. This class is used to store + information about IRR, including the date, monthly IRR value, and entity name. The class is frozen to ensure + immutability. + + Attributes: + date (datetime.datetime): The date of the IRR calculation. + value (float): The monthly IRR value. + entity_name (str): The name of the associated entity. + Properties: + value_annual (float): Calculate the annualized IRR value based on the monthly value. + Methods: + to_dict(): Convert the IRR data to a dictionary for serialization. + """ + date: dt.datetime + value: float + entity_name: str + + @property + def value_annual(self): + """ + Calculate the annualized IRR value based on the monthly value. + + Returns: + float: The annualized IRR value (rounded to 4 decimal places). + """ + return round(((1 + self.value) ** 12) - 1, 4) + + def to_dict(self) -> dict: + """ + Convert the IRR data to a dictionary for serialization. + + Returns: + dict: A dictionary representation of the IRR data. + """ + return { + 'date': self.date.strftime('%Y-%m-%d'), + 'irr_monthly': self.value, + 'irr_annual': self.value_annual, + 'entity_name': self.entity_name + } + + +class Entity: + """ + A class representing an entity with associated cashflows and calculated Internal Rate of Return (IRR) data. + + Args: + entity_name (str): The name of the entity. + Attributes: + entity_name (str): The name of the entity. + sorted_cashflows (list[Cashflow]): A list of sorted Cashflow objects for the entity. + irrs (list[Irr]): A list of calculated IRR data. + Methods: + add_cashflow(cashflow: Cashflow): Add a Cashflow to the entity's list of cashflows. + calculate_irr(): Calculate IRR data based on the entity's cashflows. + """ + def __init__(self, entity_name: str): + self.entity_name: str = entity_name + self.sorted_cashflows: list[Cashflow] = [] + self.irrs: list[Irr] = [] + + def add_cashflow(self, cashflow: Cashflow): + """ + Add a Cashflow to the entity's list of cashflows and ensure the list remains sorted by date. + + Args: + cashflow (Cashflow): The Cashflow to add. + """ + self.sorted_cashflows.append(cashflow) + self.sorted_cashflows = sorted(self.sorted_cashflows) + + def calculate_irr(self): + """ + Calculate IRR data based on the entity's sorted cashflows. This method calculates IRR based on cashflows and + stores the results in the 'irrs' attribute. + + If there are not enough cashflows for calculation, a message is printed. + """ + self.irrs = [] + if self.sorted_cashflows.__len__() < 2: + print(f"Not enough values for {self.entity_name}") + # raise Exception("Not enough values") + # todo: make this to be logged + else: + periodic_cashflow = [self.sorted_cashflows[0].outflow - self.sorted_cashflows[0].inflow] + for cashflow in self.sorted_cashflows[1:]: + periodic_cashflow.append(cashflow.value + cashflow.outflow - cashflow.inflow) + self.irrs.append(Irr(cashflow.date, round(npf.irr(periodic_cashflow), 4), self.entity_name)) + periodic_cashflow[-1] = cashflow.outflow - cashflow.inflow + + def __eq__(self, other): + if not isinstance(other, Entity): + return False + return self.entity_name == other.entity_name + + def __hash__(self): + return hash(self.entity_name) + + +def allocate_cashflows_to_entities(cashflows: list[Cashflow], entities: dict[str: Entity]): + """ + Allocate cashflows to entities based on the entity names. + + Args: + cashflows (list[Cashflow]): A list of Cashflow objects to be allocated to entities. + entities (dict[str, Entity]): A dictionary of entities where keys are entity names, and values are Entity + objects. + Returns: + dict[str, Entity]: A dictionary of entities with updated cashflow data. + """ + for cashflow in cashflows: + entities[cashflow.entity_name].add_cashflow(cashflow) + # todo: get an except catcher for KeyError!? + + return entities + + +def entities_collection_creation(cashflows: list[Cashflow]) -> dict[str: Entity]: + """ + Create a collection of entities based on the provided list of cashflows. + + Args: + cashflows (list[Cashflow]): A list of Cashflow objects from which entities will be created. + Returns: + dict[str, Entity]: A dictionary of entities with entity names as keys and corresponding Entity objects. + """ + entities = {} + entity_names = tuple([cashflow.entity_name for cashflow in cashflows]) + for entity_name in entity_names: + entities[entity_name] = Entity(entity_name) + + return entities diff --git a/cloud_function/query.py b/cloud_function/query.py new file mode 100644 index 0000000..c81810d --- /dev/null +++ b/cloud_function/query.py @@ -0,0 +1,145 @@ +ACCOUNT_LEVEL_QUERY = """ +WITH + max_date AS ( + SELECT + MAX(month_start) AS max_date + FROM + `poetic-dock-367718.publishing.monthly_periodic_asset_portfolio` ), + monthly_periodic_cashflows AS ( + SELECT + acc.account_name, + DATE_ADD(gla.date, INTERVAL 1 DAY) AS date, + CASE + WHEN gla.end_balance_amount_pounds > 0 THEN CASE + WHEN acc.account_currency = 'Pound' THEN gla.end_balance_amount_pounds + ELSE + gla.end_balance_amount_euros + END + ELSE + 0 + END + AS inflow, + CASE + WHEN gla.end_balance_amount_pounds < 0 THEN CASE + WHEN acc.account_currency = 'Pound' THEN - gla.end_balance_amount_pounds + ELSE + - gla.end_balance_amount_euros + END + ELSE + 0 + END + AS outflow + FROM + `poetic-dock-367718.dw_accounting.accounts` acc + INNER JOIN + `poetic-dock-367718.dw_accounting.gl_monthly_periodic_snapshot` gla + ON + gla.account_id = acc.account_id + WHERE + acc.account_family = 'Stock Market' + AND gla.ledger_book_name = 'Accounting App') +SELECT + ass.account_name AS entity_name, + ass.month_start AS date, + CASE + WHEN ass.account_currency = 'Pound' THEN ass.end_balance_amount_pounds + ELSE + ass.end_balance_amount_euros +END + AS value, + COALESCE(pas.inflow, 0) AS inflow, + COALESCE(pas.outflow,0) AS outflow +FROM + `poetic-dock-367718.publishing.monthly_periodic_asset_portfolio` ass +LEFT JOIN + monthly_periodic_cashflows pas +ON + ass.month_start = pas.date + AND ass.account_name = pas.account_name +CROSS JOIN + max_date md +WHERE + ass.account_family = 'Stock Market' + AND ass.month_start <= md.max_date + AND ass.month_start NOT IN ('2023-06-01', + '2023-07-01', + '2023-08-01', + '2023-09-01', + '2023-10-01') + AND NOT ( ass.account_name = 'DeGiro - IPCO' AND ass.month_start = '2019-04-01' ) +UNION ALL +SELECT + ass.account_name AS entity_name, + pas.date, + CASE + WHEN ass.account_currency = 'Pound' THEN ass.end_balance_amount_pounds + ELSE + ass.end_balance_amount_euros +END + AS value, + COALESCE(pas.inflow, 0) AS inflow, + COALESCE(pas.outflow,0) AS outflow +FROM ( + SELECT + * + FROM + `poetic-dock-367718.publishing.monthly_periodic_asset_portfolio` + WHERE + month_start = '2023-05-01' + AND account_family = 'Stock Market' ) ass +RIGHT JOIN ( + SELECT + acc.account_name, + DATE_ADD(gla.date, INTERVAL 1 DAY) AS date, + CASE + WHEN gla.end_balance_amount_pounds > 0 THEN CASE + WHEN acc.account_currency = 'Pound' THEN gla.end_balance_amount_pounds + ELSE + gla.end_balance_amount_euros + END + ELSE + 0 + END + AS inflow, + CASE + WHEN gla.end_balance_amount_pounds < 0 THEN CASE + WHEN acc.account_currency = 'Pound' THEN - gla.end_balance_amount_pounds + ELSE + - gla.end_balance_amount_euros + END + ELSE + 0 + END + AS outflow + FROM + `poetic-dock-367718.dw_accounting.accounts` acc + INNER JOIN + `poetic-dock-367718.dw_accounting.gl_monthly_periodic_snapshot` gla + ON + gla.account_id = acc.account_id + WHERE + acc.account_family = 'Stock Market' + AND gla.ledger_book_name = 'Accounting App' + AND DATE_ADD(gla.date, INTERVAL 1 DAY) IN ('2023-06-01', + '2023-07-01', + '2023-08-01', + '2023-09-01', + '2023-10-01')) pas +ON + ass.account_name = pas.account_name +INNER JOIN ( + SELECT + account_name, + MIN(month_start) AS min_date, + MAX(month_start) AS max_date + FROM + `poetic-dock-367718.publishing.monthly_periodic_asset_portfolio` + WHERE + account_family = 'Stock Market' + GROUP BY + account_name) md +ON + md.account_name = pas.account_name + AND pas.date <= md.max_date + AND pas.date >= md.min_date; +""" \ No newline at end of file diff --git a/cloud_function/repository.py b/cloud_function/repository.py new file mode 100644 index 0000000..2547fee --- /dev/null +++ b/cloud_function/repository.py @@ -0,0 +1,123 @@ +from abc import ABC, abstractmethod +from google.cloud import bigquery +from cloud_function import model + + +class AbstractRepository(ABC): + """ + An abstract base class for repository interfaces that define methods to interact with a data source. + + Subclasses of AbstractRepository are expected to implement these methods to handle data retrieval and storage. + + Methods: + get_cashflows: Abstract method for retrieving cashflows from the repository. + load_irrs: Abstract method for loading Internal Rate of Return (IRR) data into the repository. + """ + @abstractmethod + def get_cashflows(self) -> list[model.Cashflow]: + """ + Abstract method for retrieving cashflows from the repository. + + Returns: + A list of Cashflow objects representing the cashflows. + + Raises: + NotImplementedError: This method should be implemented by concrete subclasses. + """ + raise NotImplementedError + + @abstractmethod + def load_irrs(self, entities: dict[str: model.Entity]): + """ + Abstract method for loading Internal Rate of Return (IRR) data into the repository. + + Args: + entities (dict): A dictionary of Entity objects for which IRR data will be loaded. + The dictionary maps entity IDs to Entity objects. + + Raises: + NotImplementedError: This method should be implemented by concrete subclasses. + """ + raise NotImplementedError + + +class BiqQueryRepository(AbstractRepository): + """ + A concrete implementation of the AbstractRepository for interacting with Google BigQuery. + This repository class is designed to retrieve cashflows and load Internal Rate of Return (IRR) data into + Google BigQuery. + + Args: + project (str, optional): The Google Cloud project ID. If not specified, the default project is used. + + Attributes: + client (bigquery.Client): The Google BigQuery client instance. + cashflow_source (str): The SQL source query for cashflows. + irr_destination (str): The destination table for IRR data. + + Methods: + get(query: str) -> bigquery.table.RowIterator: Execute a SQL query and return the results. + get_cashflows() -> list[model.Cashflow]: Retrieve cashflow data and convert it to Cashflow objects. + load_table_from_json(data: list[dict], destination: str, job_config: bigquery.job.load.LoadJobConfig): + Load data from a list of dictionaries into a BigQuery table. + load_irrs(entities: dict[str, model.Entity]): Load IRR data from a dictionary of Entity objects. + """ + def __init__(self, project=None): + self.client = bigquery.Client(project=project) + self.cashflow_source = "SELECT * FROM dw_accounting.cashflows" + self.irr_destination = "publishing.entity_irrs" + + def get(self, query: str) -> bigquery.table.RowIterator: + """ + Execute a SQL query and return the results as a RowIterator. + + Args: + query (str): The SQL query to execute. + + Returns: + bigquery.table.RowIterator: An iterator for the query results. + """ + query_job = self.client.query(query) + rows = query_job.result() + + return rows + + def get_cashflows(self) -> list[model.Cashflow]: + """ + Retrieve cashflow data and convert it into a list of Cashflow objects. + + Returns: + list[model.Cashflow]: A list of Cashflow objects representing the cashflows. + """ + cashflows = [] + for row in self.get(self.cashflow_source): + cashflows.append(model.Cashflow(row.date, row.inflow, row.outflow, row.value, row.entity_name)) + + return cashflows + + def load_table_from_json(self, data: list[dict], destination: str, job_config: bigquery.job.load.LoadJobConfig): + """ + Load data from a list of dictionaries into a BigQuery table. + + Args: + data (list[dict]): The data to load as a list of dictionaries. + destination (str): The destination table in BigQuery. + job_config (bigquery.job.load.LoadJobConfig): Job configuration for the load operation. + """ + load_job = self.client.load_table_from_json(data, destination, job_config=job_config) + load_job.result() + + def load_irrs(self, entities: dict[str: model.Entity]): + """ + Load IRR data from a dictionary of Entity objects into BigQuery. This method extracts IRR data from Entity + objects and loads it into the specified BigQuery destination table. + + Args: + entities (dict): A dictionary of Entity objects where the keys are entity IDs. + """ + irrs = [irr.to_dict() for entity in entities.values() for irr in entity.irrs] + job_config = bigquery.LoadJobConfig( + write_disposition=bigquery.WriteDisposition.WRITE_TRUNCATE, + source_format=bigquery.SourceFormat.NEWLINE_DELIMITED_JSON, + ) + self.load_table_from_json(irrs, self.irr_destination, job_config) diff --git a/cloud_function/requirements.txt b/cloud_function/requirements.txt new file mode 100644 index 0000000..ff902cd --- /dev/null +++ b/cloud_function/requirements.txt @@ -0,0 +1,3 @@ +numpy-financial==1.0.0 +google-cloud-bigquery==3.13.0 +python-dotenv==0.14.0 \ No newline at end of file diff --git a/cloud_function/services.py b/cloud_function/services.py new file mode 100644 index 0000000..c668547 --- /dev/null +++ b/cloud_function/services.py @@ -0,0 +1,20 @@ +from cloud_function.repository import AbstractRepository +from cloud_function import model + + +def irr_pipeline(repository: AbstractRepository): + """ + Perform an Internal Rate of Return (IRR) data pipeline. The pipeline retrieves cashflows, creates entities, + allocates cashflows to entities, calculates IRRs, and loads IRR data into the repository. + + Args: + repository (AbstractRepository): The repository for data retrieval and storage. + """ + cashflows = repository.get_cashflows() + entities = model.entities_collection_creation(cashflows) + entities = model.allocate_cashflows_to_entities(cashflows, entities) + + for entity in entities.values(): + entity.calculate_irr() + + repository.load_irrs(entities) \ No newline at end of file diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 0000000..47d406f --- /dev/null +++ b/tests/__init__.py @@ -0,0 +1,21 @@ +import os +from dotenv import load_dotenv + + +def env_var_loader(file_name, file_path=None): + """ Method that allows to load env variables in local from a file. + Args: + file_name: path to file with environment variables + file_path: path to the file, if it is not provided it is assumed that the file is in the root of the project + """ + if file_path: + env_path = os.path.join(file_path, file_name) + else: + wd = os.getcwd() + env_path = os.path.join(wd, file_name) + + if os.path.isfile(env_path): + load_dotenv(dotenv_path=env_path) + + +env_var_loader('tests/.env') diff --git a/tests/conftest.py b/tests/conftest.py new file mode 100644 index 0000000..af7c58d --- /dev/null +++ b/tests/conftest.py @@ -0,0 +1,44 @@ +import pytest +from cloud_function.repository import BiqQueryRepository +import os +from tests.data.cashflows import CASHFLOWS +from google.cloud import bigquery + + +@pytest.fixture(scope='session') +def bq_repository(): + """ + Fixture that returns instance of BiqQueryRepository() + + Returns: + instance of BiqQueryRepository() + """ + bq_repository = BiqQueryRepository(project=os.environ['PROJECT']) + bq_repository.cashflow_source = f"SELECT * FROM {os.environ['DATASET']}.{os.environ['SOURCE_TABLE']}" + bq_repository.irr_destination = os.environ['DATASET'] + '.' + os.environ['DESTINATION_TABLE'] + + return bq_repository + + +@pytest.fixture(scope='function') +def repository_with_cashflows(bq_repository): + """ + Fixture that creates a cashflow table on destination BigQuery project. Also load data into table. + + Args: + bq_repository: instance of BiqQueryRepository() + + Returns: + instance of BiqQueryRepository() where a cashflow table has been created + """ + job_config = bigquery.LoadJobConfig( + write_disposition=bigquery.WriteDisposition.WRITE_TRUNCATE, + source_format=bigquery.SourceFormat.NEWLINE_DELIMITED_JSON, + ) + bq_repository.client.create_dataset(os.environ['DATASET'], exists_ok=True) + bq_repository.load_table_from_json(CASHFLOWS, os.environ['DATASET'] + '.' + os.environ['SOURCE_TABLE'], job_config) + + yield bq_repository + + bq_repository.client.delete_table(os.environ['DATASET'] + '.' + os.environ['SOURCE_TABLE']) + bq_repository.client.delete_table(os.environ['DATASET'] + '.' + os.environ['DESTINATION_TABLE']) diff --git a/tests/data/cashflows.py b/tests/data/cashflows.py new file mode 100644 index 0000000..25b427c --- /dev/null +++ b/tests/data/cashflows.py @@ -0,0 +1,11 @@ +import datetime as dt + +CASHFLOWS = [ + {'date': '2022-01-01', 'inflow': 1000, 'outflow': 0, 'value': 1000, 'entity_name': 'Test Account 1'}, + {'date': '2022-02-01', 'inflow': 0, 'outflow': 100, 'value': 1000, 'entity_name': 'Test Account 1'}, + {'date': '2022-03-01', 'inflow': 0, 'outflow': 100, 'value': 1000, 'entity_name': 'Test Account 1'}, + {'date': '2022-04-01', 'inflow': 0, 'outflow': 100, 'value': 1000, 'entity_name': 'Test Account 1'}, + {'date': '2022-05-01', 'inflow': 0, 'outflow': 100, 'value': 1000, 'entity_name': 'Test Account 1'}, + {'date': '2022-03-01', 'inflow': 1000, 'outflow': 0, 'value': 1000, 'entity_name': 'Test Account 2'}, + {'date': '2022-04-01', 'inflow': 0, 'outflow': 0, 'value': 1100, 'entity_name': 'Test Account 2'}, +] diff --git a/tests/test_model.py b/tests/test_model.py new file mode 100644 index 0000000..a7216a3 --- /dev/null +++ b/tests/test_model.py @@ -0,0 +1,131 @@ +from cloud_function import model +import datetime as dt +import pytest + + +def test_sort_for_cashflow(): + """ + GIVEN a list of elements for value object Cashflow + WHEN this list is sorted [sorted()] + THEN the Cashflow instances has to be sorted by date from older to newer + """ + cashflow1 = model.Cashflow(dt.datetime(2022, 1, 1), 1000, 0, 0, 'test entity') + cashflow2 = model.Cashflow(dt.datetime(2023, 2, 1), 0, 100, 1000, 'test entity') + cashflow3 = model.Cashflow(dt.datetime(1998, 3, 1), 0, 100, 1000, 'test entity') + + assert sorted([cashflow1, cashflow2, cashflow3]) == [cashflow3, cashflow1, cashflow2] + + +def test_sort_for_cashflow_none_cases(): + """ + GIVEN a collection of cashflows which one has None as date + WHEN those cashflows are sorted + THEN cashflow with None as date should be listed first + """ + cashflow1 = model.Cashflow(None, 1000, 0, 0, 'test entity') + cashflow2 = model.Cashflow(dt.datetime(2023, 2, 1), 0, 100, 1000, 'test entity') + + assert sorted([cashflow1, cashflow2]) == [cashflow1, cashflow2] + assert sorted([cashflow2, cashflow1]) == [cashflow1, cashflow2] + + +def test_allocate(): + """ + GIVEN an entity + WHEN cashflows are added to entity (Entity.add_cashflow()) + THEN cashflow collection of entity object (Entity.sorted_cashflows) are updated to include + this cashflow and are sorted + """ + entity_name = 'test entity' + cashflow1 = model.Cashflow(dt.datetime(2022, 1, 1), 100, 100, 100, entity_name) + cashflow2 = model.Cashflow(dt.datetime(2023, 1, 2), 1000, 1000, 1000, entity_name) + entity = model.Entity(entity_name) + entities = {entity_name: entity} + + model.allocate_cashflows_to_entities([cashflow2, cashflow1], entities) + + assert entity.sorted_cashflows == [cashflow1, cashflow2] + + +def test_calculate_irrs(): + """ + GIVEN an entity with a collection of cashflows + WHEN Internal Rate of Return (irr or dcf, Discounted Cash Flow) is calculated (Entity.calculate_irr()) + THEN irrs has to be calculated producing expected results + """ + entity_name = 'test account' + entity = model.Entity(entity_name) + entities = {entity_name: entity} + + model.allocate_cashflows_to_entities( + [ + model.Cashflow(dt.datetime(2022, 1, 1), 1000, 0, 0, entity_name), + model.Cashflow(dt.datetime(2022, 2, 1), 0, 100, 1000, entity_name), + model.Cashflow(dt.datetime(2022, 3, 1), 0, 100, 1000, entity_name) + ], + entities) + + entities[entity_name].calculate_irr() + + assert entities[entity_name].irrs == [ + model.Irr(dt.datetime(2022, 2, 1), 0.1, entity_name), + model.Irr(dt.datetime(2022, 3, 1), 0.1, entity_name) + ] + + +@pytest.mark.parametrize( + "value, expected_result", + [ + (1, 4095), + (-1, -1), + (0.01, 0.1268) + ] +) +def test_cashflow_value_annual(value, expected_result): + """ + GIVEN an Internal Rate of Return with a monthly value + WHEN calling Irr.value_annual + THEN the annualised irr value has to be returned + """ + irr = model.Irr(dt.datetime(2022, 2, 1), value, 'test - account') + + assert irr.value_annual == expected_result + + +def test_irr_to_dict(): + """ + GIVEN an Internal Rate of Return + WHEN it is converted to dict + THEN check that the return is the expected value + """ + irr = model.Irr(dt.datetime(2022, 2, 1), 1, 'test - account') + + assert irr.to_dict() == { + 'date': '2022-02-01', 'irr_monthly': 1, 'irr_annual': 4095, 'entity_name': 'test - account' + } + + +def test_entity_equality(): + """ + GIVEN 2 entities + WHEN they have the same entity name + THEN they are equal + """ + entity_name = 'test entity' + entity1 = model.Entity(entity_name) + entity2 = model.Entity(entity_name) + + assert entity1 == entity2 + + +def test_entity_inequality(): + """ + GIVEN 2 entities + WHEN they have different entity name + THEN they are different + """ + entity1 = model.Entity('test entity') + entity2 = model.Entity('other') + + assert not entity1 == entity2 + assert not entity1 == 1 diff --git a/tests/test_services.py b/tests/test_services.py new file mode 100644 index 0000000..c9e8a93 --- /dev/null +++ b/tests/test_services.py @@ -0,0 +1,32 @@ +import os +from cloud_function import services +from google.cloud.bigquery.table import Row +import datetime as dt + + +def test_irr_pipeline(repository_with_cashflows): + """ + GIVEN some cashflows on bq + WHEN they are processed by irr_pipeline() service + THEN the expected results should be populated into the correct destination table + """ + services.irr_pipeline(repository_with_cashflows) + + results = repository_with_cashflows.get( + f"SELECT * FROM {os.environ['DATASET']}.{os.environ['DESTINATION_TABLE']} ORDER BY entity_name, date" + ) + expected_results = [ + Row(('Test Account 1', 0.1, 2.1384, dt.date(2022, 2, 1)), + {'entity_name': 0, 'irr_monthly': 1, 'irr_annual': 2, 'date': 3}), + Row(('Test Account 1', 0.1, 2.1384, dt.date(2022, 3, 1)), + {'entity_name': 0, 'irr_monthly': 1, 'irr_annual': 2, 'date': 3}), + Row(('Test Account 1', 0.1, 2.1384, dt.date(2022, 4, 1)), + {'entity_name': 0, 'irr_monthly': 1, 'irr_annual': 2, 'date': 3}), + Row(('Test Account 1', 0.1, 2.1384, dt.date(2022, 5, 1)), + {'entity_name': 0, 'irr_monthly': 1, 'irr_annual': 2, 'date': 3}), + Row(('Test Account 2', 0.1, 2.1384, dt.date(2022, 4, 1)), + {'entity_name': 0, 'irr_monthly': 1, 'irr_annual': 2, 'date': 3}) + ] + + for result, expected_result in zip(results, expected_results): + assert result == expected_result \ No newline at end of file From f901416f5616acfb86e4eaa1aee2f6c413b3856c Mon Sep 17 00:00:00 2001 From: $Cuentas-AppServices Date: Thu, 9 Nov 2023 16:06:28 +0000 Subject: [PATCH 02/15] formatted with black --- cloud_function/model.py | 34 +++++--- cloud_function/query.py | 145 ----------------------------------- cloud_function/repository.py | 23 ++++-- cloud_function/services.py | 2 +- tests/__init__.py | 12 +-- tests/conftest.py | 28 ++++--- tests/data/cashflows.py | 56 ++++++++++++-- tests/test_model.py | 51 ++++++------ tests/test_services.py | 32 +++++--- 9 files changed, 167 insertions(+), 216 deletions(-) delete mode 100644 cloud_function/query.py diff --git a/cloud_function/model.py b/cloud_function/model.py index 853185a..6f14527 100644 --- a/cloud_function/model.py +++ b/cloud_function/model.py @@ -2,6 +2,7 @@ import datetime as dt import numpy_financial as npf + @dataclass(frozen=True) class Cashflow: """ @@ -15,6 +16,7 @@ class Cashflow: value (float): The net value. entity_name (str): The name of the associated entity. """ + date: dt.datetime inflow: float outflow: float @@ -46,6 +48,7 @@ class Irr: Methods: to_dict(): Convert the IRR data to a dictionary for serialization. """ + date: dt.datetime value: float entity_name: str @@ -68,10 +71,10 @@ def to_dict(self) -> dict: dict: A dictionary representation of the IRR data. """ return { - 'date': self.date.strftime('%Y-%m-%d'), - 'irr_monthly': self.value, - 'irr_annual': self.value_annual, - 'entity_name': self.entity_name + "date": self.date.strftime("%Y-%m-%d"), + "irr_monthly": self.value, + "irr_annual": self.value_annual, + "entity_name": self.entity_name, } @@ -89,6 +92,7 @@ class Entity: add_cashflow(cashflow: Cashflow): Add a Cashflow to the entity's list of cashflows. calculate_irr(): Calculate IRR data based on the entity's cashflows. """ + def __init__(self, entity_name: str): self.entity_name: str = entity_name self.sorted_cashflows: list[Cashflow] = [] @@ -117,10 +121,20 @@ def calculate_irr(self): # raise Exception("Not enough values") # todo: make this to be logged else: - periodic_cashflow = [self.sorted_cashflows[0].outflow - self.sorted_cashflows[0].inflow] + periodic_cashflow = [ + self.sorted_cashflows[0].outflow - self.sorted_cashflows[0].inflow + ] for cashflow in self.sorted_cashflows[1:]: - periodic_cashflow.append(cashflow.value + cashflow.outflow - cashflow.inflow) - self.irrs.append(Irr(cashflow.date, round(npf.irr(periodic_cashflow), 4), self.entity_name)) + periodic_cashflow.append( + cashflow.value + cashflow.outflow - cashflow.inflow + ) + self.irrs.append( + Irr( + cashflow.date, + round(npf.irr(periodic_cashflow), 4), + self.entity_name, + ) + ) periodic_cashflow[-1] = cashflow.outflow - cashflow.inflow def __eq__(self, other): @@ -132,7 +146,9 @@ def __hash__(self): return hash(self.entity_name) -def allocate_cashflows_to_entities(cashflows: list[Cashflow], entities: dict[str: Entity]): +def allocate_cashflows_to_entities( + cashflows: list[Cashflow], entities: dict[str:Entity] +): """ Allocate cashflows to entities based on the entity names. @@ -150,7 +166,7 @@ def allocate_cashflows_to_entities(cashflows: list[Cashflow], entities: dict[str return entities -def entities_collection_creation(cashflows: list[Cashflow]) -> dict[str: Entity]: +def entities_collection_creation(cashflows: list[Cashflow]) -> dict[str:Entity]: """ Create a collection of entities based on the provided list of cashflows. diff --git a/cloud_function/query.py b/cloud_function/query.py deleted file mode 100644 index c81810d..0000000 --- a/cloud_function/query.py +++ /dev/null @@ -1,145 +0,0 @@ -ACCOUNT_LEVEL_QUERY = """ -WITH - max_date AS ( - SELECT - MAX(month_start) AS max_date - FROM - `poetic-dock-367718.publishing.monthly_periodic_asset_portfolio` ), - monthly_periodic_cashflows AS ( - SELECT - acc.account_name, - DATE_ADD(gla.date, INTERVAL 1 DAY) AS date, - CASE - WHEN gla.end_balance_amount_pounds > 0 THEN CASE - WHEN acc.account_currency = 'Pound' THEN gla.end_balance_amount_pounds - ELSE - gla.end_balance_amount_euros - END - ELSE - 0 - END - AS inflow, - CASE - WHEN gla.end_balance_amount_pounds < 0 THEN CASE - WHEN acc.account_currency = 'Pound' THEN - gla.end_balance_amount_pounds - ELSE - - gla.end_balance_amount_euros - END - ELSE - 0 - END - AS outflow - FROM - `poetic-dock-367718.dw_accounting.accounts` acc - INNER JOIN - `poetic-dock-367718.dw_accounting.gl_monthly_periodic_snapshot` gla - ON - gla.account_id = acc.account_id - WHERE - acc.account_family = 'Stock Market' - AND gla.ledger_book_name = 'Accounting App') -SELECT - ass.account_name AS entity_name, - ass.month_start AS date, - CASE - WHEN ass.account_currency = 'Pound' THEN ass.end_balance_amount_pounds - ELSE - ass.end_balance_amount_euros -END - AS value, - COALESCE(pas.inflow, 0) AS inflow, - COALESCE(pas.outflow,0) AS outflow -FROM - `poetic-dock-367718.publishing.monthly_periodic_asset_portfolio` ass -LEFT JOIN - monthly_periodic_cashflows pas -ON - ass.month_start = pas.date - AND ass.account_name = pas.account_name -CROSS JOIN - max_date md -WHERE - ass.account_family = 'Stock Market' - AND ass.month_start <= md.max_date - AND ass.month_start NOT IN ('2023-06-01', - '2023-07-01', - '2023-08-01', - '2023-09-01', - '2023-10-01') - AND NOT ( ass.account_name = 'DeGiro - IPCO' AND ass.month_start = '2019-04-01' ) -UNION ALL -SELECT - ass.account_name AS entity_name, - pas.date, - CASE - WHEN ass.account_currency = 'Pound' THEN ass.end_balance_amount_pounds - ELSE - ass.end_balance_amount_euros -END - AS value, - COALESCE(pas.inflow, 0) AS inflow, - COALESCE(pas.outflow,0) AS outflow -FROM ( - SELECT - * - FROM - `poetic-dock-367718.publishing.monthly_periodic_asset_portfolio` - WHERE - month_start = '2023-05-01' - AND account_family = 'Stock Market' ) ass -RIGHT JOIN ( - SELECT - acc.account_name, - DATE_ADD(gla.date, INTERVAL 1 DAY) AS date, - CASE - WHEN gla.end_balance_amount_pounds > 0 THEN CASE - WHEN acc.account_currency = 'Pound' THEN gla.end_balance_amount_pounds - ELSE - gla.end_balance_amount_euros - END - ELSE - 0 - END - AS inflow, - CASE - WHEN gla.end_balance_amount_pounds < 0 THEN CASE - WHEN acc.account_currency = 'Pound' THEN - gla.end_balance_amount_pounds - ELSE - - gla.end_balance_amount_euros - END - ELSE - 0 - END - AS outflow - FROM - `poetic-dock-367718.dw_accounting.accounts` acc - INNER JOIN - `poetic-dock-367718.dw_accounting.gl_monthly_periodic_snapshot` gla - ON - gla.account_id = acc.account_id - WHERE - acc.account_family = 'Stock Market' - AND gla.ledger_book_name = 'Accounting App' - AND DATE_ADD(gla.date, INTERVAL 1 DAY) IN ('2023-06-01', - '2023-07-01', - '2023-08-01', - '2023-09-01', - '2023-10-01')) pas -ON - ass.account_name = pas.account_name -INNER JOIN ( - SELECT - account_name, - MIN(month_start) AS min_date, - MAX(month_start) AS max_date - FROM - `poetic-dock-367718.publishing.monthly_periodic_asset_portfolio` - WHERE - account_family = 'Stock Market' - GROUP BY - account_name) md -ON - md.account_name = pas.account_name - AND pas.date <= md.max_date - AND pas.date >= md.min_date; -""" \ No newline at end of file diff --git a/cloud_function/repository.py b/cloud_function/repository.py index 2547fee..b1e75e6 100644 --- a/cloud_function/repository.py +++ b/cloud_function/repository.py @@ -13,6 +13,7 @@ class AbstractRepository(ABC): get_cashflows: Abstract method for retrieving cashflows from the repository. load_irrs: Abstract method for loading Internal Rate of Return (IRR) data into the repository. """ + @abstractmethod def get_cashflows(self) -> list[model.Cashflow]: """ @@ -27,7 +28,7 @@ def get_cashflows(self) -> list[model.Cashflow]: raise NotImplementedError @abstractmethod - def load_irrs(self, entities: dict[str: model.Entity]): + def load_irrs(self, entities: dict[str : model.Entity]): """ Abstract method for loading Internal Rate of Return (IRR) data into the repository. @@ -62,6 +63,7 @@ class BiqQueryRepository(AbstractRepository): Load data from a list of dictionaries into a BigQuery table. load_irrs(entities: dict[str, model.Entity]): Load IRR data from a dictionary of Entity objects. """ + def __init__(self, project=None): self.client = bigquery.Client(project=project) self.cashflow_source = "SELECT * FROM dw_accounting.cashflows" @@ -91,11 +93,20 @@ def get_cashflows(self) -> list[model.Cashflow]: """ cashflows = [] for row in self.get(self.cashflow_source): - cashflows.append(model.Cashflow(row.date, row.inflow, row.outflow, row.value, row.entity_name)) + cashflows.append( + model.Cashflow( + row.date, row.inflow, row.outflow, row.value, row.entity_name + ) + ) return cashflows - def load_table_from_json(self, data: list[dict], destination: str, job_config: bigquery.job.load.LoadJobConfig): + def load_table_from_json( + self, + data: list[dict], + destination: str, + job_config: bigquery.job.load.LoadJobConfig, + ): """ Load data from a list of dictionaries into a BigQuery table. @@ -104,10 +115,12 @@ def load_table_from_json(self, data: list[dict], destination: str, job_config: b destination (str): The destination table in BigQuery. job_config (bigquery.job.load.LoadJobConfig): Job configuration for the load operation. """ - load_job = self.client.load_table_from_json(data, destination, job_config=job_config) + load_job = self.client.load_table_from_json( + data, destination, job_config=job_config + ) load_job.result() - def load_irrs(self, entities: dict[str: model.Entity]): + def load_irrs(self, entities: dict[str : model.Entity]): """ Load IRR data from a dictionary of Entity objects into BigQuery. This method extracts IRR data from Entity objects and loads it into the specified BigQuery destination table. diff --git a/cloud_function/services.py b/cloud_function/services.py index c668547..3f606ef 100644 --- a/cloud_function/services.py +++ b/cloud_function/services.py @@ -17,4 +17,4 @@ def irr_pipeline(repository: AbstractRepository): for entity in entities.values(): entity.calculate_irr() - repository.load_irrs(entities) \ No newline at end of file + repository.load_irrs(entities) diff --git a/tests/__init__.py b/tests/__init__.py index 47d406f..25a0fc3 100644 --- a/tests/__init__.py +++ b/tests/__init__.py @@ -3,11 +3,13 @@ def env_var_loader(file_name, file_path=None): - """ Method that allows to load env variables in local from a file. + """ + Load environment variables from a specified file using python-dotenv. + Args: - file_name: path to file with environment variables - file_path: path to the file, if it is not provided it is assumed that the file is in the root of the project - """ + file_name (str): The name of the environment file. + file_path (str, optional): The path to the directory containing the environment file. + """ if file_path: env_path = os.path.join(file_path, file_name) else: @@ -18,4 +20,4 @@ def env_var_loader(file_name, file_path=None): load_dotenv(dotenv_path=env_path) -env_var_loader('tests/.env') +env_var_loader("tests/.env") diff --git a/tests/conftest.py b/tests/conftest.py index af7c58d..e16eb40 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -5,7 +5,7 @@ from google.cloud import bigquery -@pytest.fixture(scope='session') +@pytest.fixture(scope="session") def bq_repository(): """ Fixture that returns instance of BiqQueryRepository() @@ -13,14 +13,18 @@ def bq_repository(): Returns: instance of BiqQueryRepository() """ - bq_repository = BiqQueryRepository(project=os.environ['PROJECT']) - bq_repository.cashflow_source = f"SELECT * FROM {os.environ['DATASET']}.{os.environ['SOURCE_TABLE']}" - bq_repository.irr_destination = os.environ['DATASET'] + '.' + os.environ['DESTINATION_TABLE'] + bq_repository = BiqQueryRepository(project=os.environ["PROJECT"]) + bq_repository.cashflow_source = ( + f"SELECT * FROM {os.environ['DATASET']}.{os.environ['SOURCE_TABLE']}" + ) + bq_repository.irr_destination = ( + os.environ["DATASET"] + "." + os.environ["DESTINATION_TABLE"] + ) return bq_repository -@pytest.fixture(scope='function') +@pytest.fixture(scope="function") def repository_with_cashflows(bq_repository): """ Fixture that creates a cashflow table on destination BigQuery project. Also load data into table. @@ -35,10 +39,16 @@ def repository_with_cashflows(bq_repository): write_disposition=bigquery.WriteDisposition.WRITE_TRUNCATE, source_format=bigquery.SourceFormat.NEWLINE_DELIMITED_JSON, ) - bq_repository.client.create_dataset(os.environ['DATASET'], exists_ok=True) - bq_repository.load_table_from_json(CASHFLOWS, os.environ['DATASET'] + '.' + os.environ['SOURCE_TABLE'], job_config) + bq_repository.client.create_dataset(os.environ["DATASET"], exists_ok=True) + bq_repository.load_table_from_json( + CASHFLOWS, os.environ["DATASET"] + "." + os.environ["SOURCE_TABLE"], job_config + ) yield bq_repository - bq_repository.client.delete_table(os.environ['DATASET'] + '.' + os.environ['SOURCE_TABLE']) - bq_repository.client.delete_table(os.environ['DATASET'] + '.' + os.environ['DESTINATION_TABLE']) + bq_repository.client.delete_table( + os.environ["DATASET"] + "." + os.environ["SOURCE_TABLE"] + ) + bq_repository.client.delete_table( + os.environ["DATASET"] + "." + os.environ["DESTINATION_TABLE"] + ) diff --git a/tests/data/cashflows.py b/tests/data/cashflows.py index 25b427c..9cd4cd0 100644 --- a/tests/data/cashflows.py +++ b/tests/data/cashflows.py @@ -1,11 +1,53 @@ import datetime as dt CASHFLOWS = [ - {'date': '2022-01-01', 'inflow': 1000, 'outflow': 0, 'value': 1000, 'entity_name': 'Test Account 1'}, - {'date': '2022-02-01', 'inflow': 0, 'outflow': 100, 'value': 1000, 'entity_name': 'Test Account 1'}, - {'date': '2022-03-01', 'inflow': 0, 'outflow': 100, 'value': 1000, 'entity_name': 'Test Account 1'}, - {'date': '2022-04-01', 'inflow': 0, 'outflow': 100, 'value': 1000, 'entity_name': 'Test Account 1'}, - {'date': '2022-05-01', 'inflow': 0, 'outflow': 100, 'value': 1000, 'entity_name': 'Test Account 1'}, - {'date': '2022-03-01', 'inflow': 1000, 'outflow': 0, 'value': 1000, 'entity_name': 'Test Account 2'}, - {'date': '2022-04-01', 'inflow': 0, 'outflow': 0, 'value': 1100, 'entity_name': 'Test Account 2'}, + { + "date": "2022-01-01", + "inflow": 1000, + "outflow": 0, + "value": 1000, + "entity_name": "Test Account 1", + }, + { + "date": "2022-02-01", + "inflow": 0, + "outflow": 100, + "value": 1000, + "entity_name": "Test Account 1", + }, + { + "date": "2022-03-01", + "inflow": 0, + "outflow": 100, + "value": 1000, + "entity_name": "Test Account 1", + }, + { + "date": "2022-04-01", + "inflow": 0, + "outflow": 100, + "value": 1000, + "entity_name": "Test Account 1", + }, + { + "date": "2022-05-01", + "inflow": 0, + "outflow": 100, + "value": 1000, + "entity_name": "Test Account 1", + }, + { + "date": "2022-03-01", + "inflow": 1000, + "outflow": 0, + "value": 1000, + "entity_name": "Test Account 2", + }, + { + "date": "2022-04-01", + "inflow": 0, + "outflow": 0, + "value": 1100, + "entity_name": "Test Account 2", + }, ] diff --git a/tests/test_model.py b/tests/test_model.py index a7216a3..483cd87 100644 --- a/tests/test_model.py +++ b/tests/test_model.py @@ -9,11 +9,15 @@ def test_sort_for_cashflow(): WHEN this list is sorted [sorted()] THEN the Cashflow instances has to be sorted by date from older to newer """ - cashflow1 = model.Cashflow(dt.datetime(2022, 1, 1), 1000, 0, 0, 'test entity') - cashflow2 = model.Cashflow(dt.datetime(2023, 2, 1), 0, 100, 1000, 'test entity') - cashflow3 = model.Cashflow(dt.datetime(1998, 3, 1), 0, 100, 1000, 'test entity') - - assert sorted([cashflow1, cashflow2, cashflow3]) == [cashflow3, cashflow1, cashflow2] + cashflow1 = model.Cashflow(dt.datetime(2022, 1, 1), 1000, 0, 0, "test entity") + cashflow2 = model.Cashflow(dt.datetime(2023, 2, 1), 0, 100, 1000, "test entity") + cashflow3 = model.Cashflow(dt.datetime(1998, 3, 1), 0, 100, 1000, "test entity") + + assert sorted([cashflow1, cashflow2, cashflow3]) == [ + cashflow3, + cashflow1, + cashflow2, + ] def test_sort_for_cashflow_none_cases(): @@ -22,8 +26,8 @@ def test_sort_for_cashflow_none_cases(): WHEN those cashflows are sorted THEN cashflow with None as date should be listed first """ - cashflow1 = model.Cashflow(None, 1000, 0, 0, 'test entity') - cashflow2 = model.Cashflow(dt.datetime(2023, 2, 1), 0, 100, 1000, 'test entity') + cashflow1 = model.Cashflow(None, 1000, 0, 0, "test entity") + cashflow2 = model.Cashflow(dt.datetime(2023, 2, 1), 0, 100, 1000, "test entity") assert sorted([cashflow1, cashflow2]) == [cashflow1, cashflow2] assert sorted([cashflow2, cashflow1]) == [cashflow1, cashflow2] @@ -36,7 +40,7 @@ def test_allocate(): THEN cashflow collection of entity object (Entity.sorted_cashflows) are updated to include this cashflow and are sorted """ - entity_name = 'test entity' + entity_name = "test entity" cashflow1 = model.Cashflow(dt.datetime(2022, 1, 1), 100, 100, 100, entity_name) cashflow2 = model.Cashflow(dt.datetime(2023, 1, 2), 1000, 1000, 1000, entity_name) entity = model.Entity(entity_name) @@ -53,7 +57,7 @@ def test_calculate_irrs(): WHEN Internal Rate of Return (irr or dcf, Discounted Cash Flow) is calculated (Entity.calculate_irr()) THEN irrs has to be calculated producing expected results """ - entity_name = 'test account' + entity_name = "test account" entity = model.Entity(entity_name) entities = {entity_name: entity} @@ -61,25 +65,21 @@ def test_calculate_irrs(): [ model.Cashflow(dt.datetime(2022, 1, 1), 1000, 0, 0, entity_name), model.Cashflow(dt.datetime(2022, 2, 1), 0, 100, 1000, entity_name), - model.Cashflow(dt.datetime(2022, 3, 1), 0, 100, 1000, entity_name) + model.Cashflow(dt.datetime(2022, 3, 1), 0, 100, 1000, entity_name), ], - entities) + entities, + ) entities[entity_name].calculate_irr() assert entities[entity_name].irrs == [ model.Irr(dt.datetime(2022, 2, 1), 0.1, entity_name), - model.Irr(dt.datetime(2022, 3, 1), 0.1, entity_name) + model.Irr(dt.datetime(2022, 3, 1), 0.1, entity_name), ] @pytest.mark.parametrize( - "value, expected_result", - [ - (1, 4095), - (-1, -1), - (0.01, 0.1268) - ] + "value, expected_result", [(1, 4095), (-1, -1), (0.01, 0.1268)] ) def test_cashflow_value_annual(value, expected_result): """ @@ -87,7 +87,7 @@ def test_cashflow_value_annual(value, expected_result): WHEN calling Irr.value_annual THEN the annualised irr value has to be returned """ - irr = model.Irr(dt.datetime(2022, 2, 1), value, 'test - account') + irr = model.Irr(dt.datetime(2022, 2, 1), value, "test - account") assert irr.value_annual == expected_result @@ -98,10 +98,13 @@ def test_irr_to_dict(): WHEN it is converted to dict THEN check that the return is the expected value """ - irr = model.Irr(dt.datetime(2022, 2, 1), 1, 'test - account') + irr = model.Irr(dt.datetime(2022, 2, 1), 1, "test - account") assert irr.to_dict() == { - 'date': '2022-02-01', 'irr_monthly': 1, 'irr_annual': 4095, 'entity_name': 'test - account' + "date": "2022-02-01", + "irr_monthly": 1, + "irr_annual": 4095, + "entity_name": "test - account", } @@ -111,7 +114,7 @@ def test_entity_equality(): WHEN they have the same entity name THEN they are equal """ - entity_name = 'test entity' + entity_name = "test entity" entity1 = model.Entity(entity_name) entity2 = model.Entity(entity_name) @@ -124,8 +127,8 @@ def test_entity_inequality(): WHEN they have different entity name THEN they are different """ - entity1 = model.Entity('test entity') - entity2 = model.Entity('other') + entity1 = model.Entity("test entity") + entity2 = model.Entity("other") assert not entity1 == entity2 assert not entity1 == 1 diff --git a/tests/test_services.py b/tests/test_services.py index c9e8a93..69f6fc9 100644 --- a/tests/test_services.py +++ b/tests/test_services.py @@ -16,17 +16,27 @@ def test_irr_pipeline(repository_with_cashflows): f"SELECT * FROM {os.environ['DATASET']}.{os.environ['DESTINATION_TABLE']} ORDER BY entity_name, date" ) expected_results = [ - Row(('Test Account 1', 0.1, 2.1384, dt.date(2022, 2, 1)), - {'entity_name': 0, 'irr_monthly': 1, 'irr_annual': 2, 'date': 3}), - Row(('Test Account 1', 0.1, 2.1384, dt.date(2022, 3, 1)), - {'entity_name': 0, 'irr_monthly': 1, 'irr_annual': 2, 'date': 3}), - Row(('Test Account 1', 0.1, 2.1384, dt.date(2022, 4, 1)), - {'entity_name': 0, 'irr_monthly': 1, 'irr_annual': 2, 'date': 3}), - Row(('Test Account 1', 0.1, 2.1384, dt.date(2022, 5, 1)), - {'entity_name': 0, 'irr_monthly': 1, 'irr_annual': 2, 'date': 3}), - Row(('Test Account 2', 0.1, 2.1384, dt.date(2022, 4, 1)), - {'entity_name': 0, 'irr_monthly': 1, 'irr_annual': 2, 'date': 3}) + Row( + ("Test Account 1", 0.1, 2.1384, dt.date(2022, 2, 1)), + {"entity_name": 0, "irr_monthly": 1, "irr_annual": 2, "date": 3}, + ), + Row( + ("Test Account 1", 0.1, 2.1384, dt.date(2022, 3, 1)), + {"entity_name": 0, "irr_monthly": 1, "irr_annual": 2, "date": 3}, + ), + Row( + ("Test Account 1", 0.1, 2.1384, dt.date(2022, 4, 1)), + {"entity_name": 0, "irr_monthly": 1, "irr_annual": 2, "date": 3}, + ), + Row( + ("Test Account 1", 0.1, 2.1384, dt.date(2022, 5, 1)), + {"entity_name": 0, "irr_monthly": 1, "irr_annual": 2, "date": 3}, + ), + Row( + ("Test Account 2", 0.1, 2.1384, dt.date(2022, 4, 1)), + {"entity_name": 0, "irr_monthly": 1, "irr_annual": 2, "date": 3}, + ), ] for result, expected_result in zip(results, expected_results): - assert result == expected_result \ No newline at end of file + assert result == expected_result From 56e12af8fc35922252ed1023ba9d767d845027ee Mon Sep 17 00:00:00 2001 From: $Cuentas-AppServices Date: Thu, 9 Nov 2023 18:01:16 +0000 Subject: [PATCH 03/15] added terraform code --- cloud_function/main.py | 3 +- cloud_function/repository.py | 2 +- cloud_function/services.py | 4 +- main.tf | 73 ++++++++++++++++++++++++++++++++++++ tests/__init__.py | 3 ++ variables.tf | 27 +++++++++++++ 6 files changed, 108 insertions(+), 4 deletions(-) create mode 100644 main.tf create mode 100644 variables.tf diff --git a/cloud_function/main.py b/cloud_function/main.py index a79183d..bf941e0 100644 --- a/cloud_function/main.py +++ b/cloud_function/main.py @@ -1,4 +1,5 @@ -from cloud_function import repository, services +import repository +import services def func_entry_point(event, context): diff --git a/cloud_function/repository.py b/cloud_function/repository.py index b1e75e6..751137c 100644 --- a/cloud_function/repository.py +++ b/cloud_function/repository.py @@ -1,6 +1,6 @@ from abc import ABC, abstractmethod from google.cloud import bigquery -from cloud_function import model +import model class AbstractRepository(ABC): diff --git a/cloud_function/services.py b/cloud_function/services.py index 3f606ef..cb905bf 100644 --- a/cloud_function/services.py +++ b/cloud_function/services.py @@ -1,5 +1,5 @@ -from cloud_function.repository import AbstractRepository -from cloud_function import model +from repository import AbstractRepository +import model def irr_pipeline(repository: AbstractRepository): diff --git a/main.tf b/main.tf new file mode 100644 index 0000000..01e4855 --- /dev/null +++ b/main.tf @@ -0,0 +1,73 @@ +provider "google" { + project = var.project_id + region = var.region +} + +resource "google_service_account" "default" { + account_id = var.service_account_name + display_name = "IRR Calculator Cloud Function SA" +} + +resource "google_project_iam_member" "big_query_writer" { + project = var.project_id + role = "roles/bigquery.dataEditor" + member = "serviceAccount:${google_service_account.default.email}" +} + +resource "google_project_iam_member" "big_query_jobUser" { + project = var.project_id + role = "roles/bigquery.jobUser" + member = "serviceAccount:${google_service_account.default.email}" +} + +resource "google_storage_bucket" "source_code" { + name = "irr-calculator-source-code-location" + storage_class = "STANDARD" + location = var.region + uniform_bucket_level_access = true +} + +data "archive_file" "source" { + type = "zip" + source_dir = "${path.root}/cloud_function" + output_path = "${path.root}/zip_to_cloud_function.zip" +} + +resource "google_storage_bucket_object" "zip" { + name = "cloud-function-source-code-for-${var.cloud_function_name}.zip" + bucket = google_storage_bucket.source_code.name + source = data.archive_file.source.output_path +} + +resource "google_pubsub_topic" "default" { + name = "cloud-function-${var.cloud_function_name}" +} + +resource "google_cloud_scheduler_job" "default" { + name = "cloud-function-${var.cloud_function_name}" + description = "Scheduler to trigger the cloud function: ${var.cloud_function_name}" + schedule = "30 0 * * *" + + pubsub_target { + topic_name = google_pubsub_topic.default.id + data = base64encode("Trigger Cloud Function") + } +} + +resource "google_cloudfunctions_function" "irr_calculator" { + name = var.cloud_function_name + + runtime = "python310" + available_memory_mb = 512 + timeout = 539 + source_archive_bucket = google_storage_bucket.source_code.name + source_archive_object = google_storage_bucket_object.zip.name + entry_point = var.function_entry_point + service_account_email = google_service_account.default.email + max_instances = 1 + + event_trigger { + event_type = "google.pubsub.topic.publish" + resource = google_pubsub_topic.default.name + } +} \ No newline at end of file diff --git a/tests/__init__.py b/tests/__init__.py index 25a0fc3..f93b7e1 100644 --- a/tests/__init__.py +++ b/tests/__init__.py @@ -1,5 +1,6 @@ import os from dotenv import load_dotenv +import sys def env_var_loader(file_name, file_path=None): @@ -21,3 +22,5 @@ def env_var_loader(file_name, file_path=None): env_var_loader("tests/.env") + +sys.path.append(os.path.join(os.getcwd(), 'cloud_function')) diff --git a/variables.tf b/variables.tf new file mode 100644 index 0000000..6c5fb5b --- /dev/null +++ b/variables.tf @@ -0,0 +1,27 @@ +variable "project_id" { + type = string + description = "Name of the Google Project" +} + +variable "region" { + type = string + default = "europe-west2" + description = "Location for the resources" +} + +variable "cloud_function_name" { + type = string + description = "Name of the ECB Api Caller Cloud Function" +} + +variable "function_entry_point" { + type = string + default = "func_entry_point" + description = "Name of the function entry point for the Python solution at main.py" +} + + +variable "service_account_name" { + type = string + description = "Name of the Service Account" +} \ No newline at end of file From 3d66d687016e799244e42ac25fb152e4ad8861b1 Mon Sep 17 00:00:00 2001 From: $Cuentas-AppServices Date: Thu, 9 Nov 2023 23:11:13 +0000 Subject: [PATCH 04/15] added github workflow for testing --- .github/workflows/pytest.yaml | 34 ++++++++++++++++++++++++++++++++++ tests/__init__.py | 10 +++++++++- 2 files changed, 43 insertions(+), 1 deletion(-) create mode 100644 .github/workflows/pytest.yaml diff --git a/.github/workflows/pytest.yaml b/.github/workflows/pytest.yaml new file mode 100644 index 0000000..b896426 --- /dev/null +++ b/.github/workflows/pytest.yaml @@ -0,0 +1,34 @@ +name: Pytest + +on: + push: + pull_request: + branches: + - main +jobs: + test: + runs-on: ubuntu-latest + strategy: + matrix: + python-version: ['3.10'] + max-parallel: 1 + env: # Or as an environment variable + PROJECT: ${{ secrets.PROJECT }} + SOURCE_TABLE: ${{ secrets.SOURCE_TABLE }} + DESTINATION_TABLE: ${{ secrets.DESTINATION_TABLE }} + DATASET: ${{ secrets.DATASET }} + SA_JSON: ${{ secrets.SA_JSON }} + steps: + - uses: actions/checkout@v3 + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v3 + with: + python-version: ${{ matrix.python-version }} + - name: Install dependencies + run: | + python -m pip install --upgrade pip + if [ -f requirements.txt ]; then pip install -r requirements.txt; fi + python -m pip install pytest==7.4.3 + - name: Test with pytest + run: | + python -m pytest -vv diff --git a/tests/__init__.py b/tests/__init__.py index f93b7e1..86bc523 100644 --- a/tests/__init__.py +++ b/tests/__init__.py @@ -20,7 +20,15 @@ def env_var_loader(file_name, file_path=None): if os.path.isfile(env_path): load_dotenv(dotenv_path=env_path) - +# load env vars env_var_loader("tests/.env") +# load path to get python files sys.path.append(os.path.join(os.getcwd(), 'cloud_function')) + +# load sa if applicable +if os.environ.get('SA_JSON'): + file_name = 'sa.json' + with open(file_name, 'r') as f: + f.write(os.environ['SA_JSON']) + os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = file_name From 6538b6a5bf8085da933b249e6c5fad9ca5829692 Mon Sep 17 00:00:00 2001 From: $Cuentas-AppServices Date: Thu, 9 Nov 2023 23:14:13 +0000 Subject: [PATCH 05/15] hotfix GitHub workflow --- .github/workflows/pytest.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/pytest.yaml b/.github/workflows/pytest.yaml index b896426..d8a1db8 100644 --- a/.github/workflows/pytest.yaml +++ b/.github/workflows/pytest.yaml @@ -29,6 +29,7 @@ jobs: python -m pip install --upgrade pip if [ -f requirements.txt ]; then pip install -r requirements.txt; fi python -m pip install pytest==7.4.3 + python -m pip install python-dotenv==0.14.0 - name: Test with pytest run: | python -m pytest -vv From 8c5786c176451000af0c14bedc07a4af877168b3 Mon Sep 17 00:00:00 2001 From: $Cuentas-AppServices Date: Thu, 9 Nov 2023 23:16:40 +0000 Subject: [PATCH 06/15] hotfix GitHub workflow 2 --- tests/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/__init__.py b/tests/__init__.py index 86bc523..62deea0 100644 --- a/tests/__init__.py +++ b/tests/__init__.py @@ -28,7 +28,7 @@ def env_var_loader(file_name, file_path=None): # load sa if applicable if os.environ.get('SA_JSON'): - file_name = 'sa.json' + file_name = 'temp/sa.json' with open(file_name, 'r') as f: f.write(os.environ['SA_JSON']) os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = file_name From 114475c5f0bef768cc5d087ed4ec4cb264f9c110 Mon Sep 17 00:00:00 2001 From: $Cuentas-AppServices Date: Thu, 9 Nov 2023 23:20:09 +0000 Subject: [PATCH 07/15] hotfix GitHub workflow 3 --- tests/__init__.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/__init__.py b/tests/__init__.py index 62deea0..ddfb54f 100644 --- a/tests/__init__.py +++ b/tests/__init__.py @@ -28,7 +28,7 @@ def env_var_loader(file_name, file_path=None): # load sa if applicable if os.environ.get('SA_JSON'): - file_name = 'temp/sa.json' - with open(file_name, 'r') as f: - f.write(os.environ['SA_JSON']) + file_name = 'sa.json' + with open(file_name, 'w') as f: + f.write(os.environ.get('SA_JSON')) os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = file_name From 8ab691954d015fac67a71cc8024203da249e7fdd Mon Sep 17 00:00:00 2001 From: $Cuentas-AppServices Date: Thu, 9 Nov 2023 23:21:54 +0000 Subject: [PATCH 08/15] hotfix GitHub workflow 4 --- .github/workflows/pytest.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/pytest.yaml b/.github/workflows/pytest.yaml index d8a1db8..bce3989 100644 --- a/.github/workflows/pytest.yaml +++ b/.github/workflows/pytest.yaml @@ -27,7 +27,7 @@ jobs: - name: Install dependencies run: | python -m pip install --upgrade pip - if [ -f requirements.txt ]; then pip install -r requirements.txt; fi + if [ -f cloud_function/requirements.txt ]; then pip install -r cloud_function/requirements.txt; fi python -m pip install pytest==7.4.3 python -m pip install python-dotenv==0.14.0 - name: Test with pytest From 4657c72ac9c9e3f27e9a267e4b5229dc833ec6ff Mon Sep 17 00:00:00 2001 From: $Cuentas-AppServices Date: Thu, 9 Nov 2023 23:25:28 +0000 Subject: [PATCH 09/15] hotfix GitHub workflow 5 --- tests/conftest.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/conftest.py b/tests/conftest.py index e16eb40..181b353 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -39,7 +39,8 @@ def repository_with_cashflows(bq_repository): write_disposition=bigquery.WriteDisposition.WRITE_TRUNCATE, source_format=bigquery.SourceFormat.NEWLINE_DELIMITED_JSON, ) - bq_repository.client.create_dataset(os.environ["DATASET"], exists_ok=True) + if os.environ.get('SA_JSON'): + bq_repository.client.create_dataset(os.environ["DATASET"], exists_ok=True) bq_repository.load_table_from_json( CASHFLOWS, os.environ["DATASET"] + "." + os.environ["SOURCE_TABLE"], job_config ) From 23b382e997b8a37d85abedc7077f29035a76ad43 Mon Sep 17 00:00:00 2001 From: $Cuentas-AppServices Date: Thu, 9 Nov 2023 23:26:43 +0000 Subject: [PATCH 10/15] hotfix GitHub workflow 6 --- tests/conftest.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/conftest.py b/tests/conftest.py index 181b353..845fbb5 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -39,7 +39,7 @@ def repository_with_cashflows(bq_repository): write_disposition=bigquery.WriteDisposition.WRITE_TRUNCATE, source_format=bigquery.SourceFormat.NEWLINE_DELIMITED_JSON, ) - if os.environ.get('SA_JSON'): + if not os.environ.get('SA_JSON'): bq_repository.client.create_dataset(os.environ["DATASET"], exists_ok=True) bq_repository.load_table_from_json( CASHFLOWS, os.environ["DATASET"] + "." + os.environ["SOURCE_TABLE"], job_config From 160f39e3001f2e809369c860387b232b15d6a28d Mon Sep 17 00:00:00 2001 From: $Cuentas-AppServices Date: Fri, 10 Nov 2023 17:11:49 +0000 Subject: [PATCH 11/15] adding README --- README.md | 67 +++++++++++++++++++++++++++++++++++++++++++------------ 1 file changed, 53 insertions(+), 14 deletions(-) diff --git a/README.md b/README.md index 1db2a20..2ea4809 100644 --- a/README.md +++ b/README.md @@ -1,20 +1,37 @@ # irr_calculator -## Introduction -Python solution that calculates irr for a set of entities. +Python solution that calculates Internal Rate of Return (IRR) for a set of entities. -## Execution -On this section you will find how to run the code locally. +## Internal Rate of Return (IRR) -### Python environment -To execute the solution with your machine you will need an environment with python 3.10.0 and the libraries listed in -requirements.txt. In case you do not have such environment, you can create it as follows with conda: - -``` -conda create -n [] python=3.10.0 pip -pip install -r cloud_function/requirements.txt -``` +Internal Rate of Return (IRR) is a financial metric that calculates the profitability of an investment by +determining the Discount Rate at which the net present value (NPV) of the investment becomes zero. +In other words, IRR represents the interest rate at which the present value of cash inflows equals the present +value of cash outflows. The primary purpose of IRR is to assess the attractiveness of an investment opportunity. -### Testing +The IRR is calculated using the following formula: + +$`\[ NPV = \sum_{t=0}^{T} \frac{CF_t}{(1 + r)^t} \]`$ + +Where: +- \( NPV \) is the net present value of cash inflows and outflows. +- \( T \) is the total number of periods. +- \( CF_t \) is the cash flow in period \( t \). +- \( r \) is the discount rate (IRR). + +The IRR is found by solving the NPV equation for \( r \) when \( NPV = 0 \). + +## Terraform code +The provided Terraform code automates the deployment of the python solution to calculate IRR as a +Cloud Function on Google Cloud Platform (GCP). It begins by configuring the necessary GCP provider settings, +such as the project ID and region. The code then creates a service account for the Cloud Function, +assigning it specific roles for interacting with BigQuery. +It also establishes a Cloud Storage bucket to store the Cloud Function's source code, archives the source code, +and uploads it to the designated bucket. Additionally, the configuration sets up a Pub/Sub topic and a +Cloud Scheduler job, allowing the Cloud Function to be triggered periodically. +Finally, it defines the Cloud Function itself. This Terraform setup streamlines the deployment process and +ensures a consistent environment for the IRR calculator on GCP. + +## Testing To execute the Python tests use next command on the CLI: @@ -32,4 +49,26 @@ DATASET= ``` You will also need to provide a Service Account credentials or to use a user account with the right permissions to -interact with BigQuery. \ No newline at end of file +interact with BigQuery. + +### Python environment +To execute these tests within your machine you will need an environment with python 3.10.0 and the libraries listed in +requirements.txt. In case you do not have such environment, you can create it as follows with conda: + +``` +conda create -n [] python=3.10.0 pip +pip install -r cloud_function/requirements.txt +``` + +You also need to install pytest==7.4.3 & python-dotenv==0.14.0 + +## GitHub Workflow +GitHub workflow automates Python testing for the project, triggered on every push or pull requests to the main branch. +Operating on the latest Ubuntu environment, it employs a matrix strategy to test against Python 3.10. +The workflow initializes Python, installs project dependencies, including Pytest and Python-dotenv, +and executes Pytest. +Key environment variables, such as project details and service account JSON, are securely managed using GitHub Secrets. + + + + From 91be33f48ead6dee8049a367ed22fa26bdc8227a Mon Sep 17 00:00:00 2001 From: $Cuentas-AppServices Date: Fri, 10 Nov 2023 17:12:46 +0000 Subject: [PATCH 12/15] fix to readme --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 2ea4809..1fd6b5d 100644 --- a/README.md +++ b/README.md @@ -10,7 +10,7 @@ value of cash outflows. The primary purpose of IRR is to assess the attractivene The IRR is calculated using the following formula: -$`\[ NPV = \sum_{t=0}^{T} \frac{CF_t}{(1 + r)^t} \]`$ +$`NPV = \sum_{t=0}^{T} \frac{CF_t}{(1 + r)^t}`$ Where: - \( NPV \) is the net present value of cash inflows and outflows. From f3903bc4717ce1e7e747b71f796f91b1e8ea329f Mon Sep 17 00:00:00 2001 From: $Cuentas-AppServices Date: Fri, 10 Nov 2023 17:14:33 +0000 Subject: [PATCH 13/15] fix to readme 2 --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 1fd6b5d..ca5cd03 100644 --- a/README.md +++ b/README.md @@ -10,7 +10,7 @@ value of cash outflows. The primary purpose of IRR is to assess the attractivene The IRR is calculated using the following formula: -$`NPV = \sum_{t=0}^{T} \frac{CF_t}{(1 + r)^t}`$ +$`NPV = \sum_{t=0}^T \frac{CF_t}{(1 + r)^t}`$ Where: - \( NPV \) is the net present value of cash inflows and outflows. From 901f93e24527ce2cc97d5591c89f938589be9a3d Mon Sep 17 00:00:00 2001 From: $Cuentas-AppServices Date: Fri, 10 Nov 2023 17:16:40 +0000 Subject: [PATCH 14/15] fix to readme 3 --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index ca5cd03..eada67c 100644 --- a/README.md +++ b/README.md @@ -10,7 +10,7 @@ value of cash outflows. The primary purpose of IRR is to assess the attractivene The IRR is calculated using the following formula: -$`NPV = \sum_{t=0}^T \frac{CF_t}{(1 + r)^t}`$ +$`NPV = ( \sum_{t=0}^T \frac{CF_t}{(1 + r)^t} )`$ Where: - \( NPV \) is the net present value of cash inflows and outflows. From d275dec3af45a230ada1fceac82d5fa3b601c6cd Mon Sep 17 00:00:00 2001 From: $Cuentas-AppServices Date: Fri, 10 Nov 2023 17:20:00 +0000 Subject: [PATCH 15/15] fix to readme 4 --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index eada67c..ca5cd03 100644 --- a/README.md +++ b/README.md @@ -10,7 +10,7 @@ value of cash outflows. The primary purpose of IRR is to assess the attractivene The IRR is calculated using the following formula: -$`NPV = ( \sum_{t=0}^T \frac{CF_t}{(1 + r)^t} )`$ +$`NPV = \sum_{t=0}^T \frac{CF_t}{(1 + r)^t}`$ Where: - \( NPV \) is the net present value of cash inflows and outflows.