diff --git a/tron/config/config_parse.py b/tron/config/config_parse.py index bf609fb1d..95d1e2d8b 100644 --- a/tron/config/config_parse.py +++ b/tron/config/config_parse.py @@ -11,6 +11,7 @@ import itertools import logging import os +from copy import deepcopy from typing import Dict from typing import List from typing import Optional @@ -1068,11 +1069,14 @@ def validate_config_mapping(config_mapping): msg = "A config mapping requires a %s namespace" raise ConfigError(msg % MASTER_NAMESPACE) - master = valid_config(config_mapping.pop(MASTER_NAMESPACE)) + # we mutate this mapping - so let's make sure that we're making a copy + # in case the passed-in mapping is used elsewhere + config_mapping_to_validate = deepcopy(config_mapping) + master = valid_config(config_mapping_to_validate.pop(MASTER_NAMESPACE)) nodes = get_nodes_from_master_namespace(master) yield MASTER_NAMESPACE, master - for name, content in config_mapping.items(): + for name, content in config_mapping_to_validate.items(): context = ConfigContext( name, nodes, diff --git a/tron/config/manager.py b/tron/config/manager.py index 7717f039a..494f06bcd 100644 --- a/tron/config/manager.py +++ b/tron/config/manager.py @@ -1,6 +1,8 @@ import hashlib import logging import os +from copy import deepcopy +from typing import Union from tron import yaml from tron.config import config_parse @@ -42,7 +44,7 @@ def read_raw(path) -> str: return fh.read() -def hash_digest(content): +def hash_digest(content: Union[str, bytes]) -> str: return hashlib.sha1( maybe_encode(content) ).hexdigest() # TODO: TRON-2293 maybe_encode is a relic of Python2->Python3 migration. Remove it. @@ -97,6 +99,7 @@ class ConfigManager: def __init__(self, config_path, manifest=None): self.config_path = config_path self.manifest = manifest or ManifestFile(config_path) + self.name_mapping = None def build_file_path(self, name): name = name.replace(".", "_").replace(os.path.sep, "_") @@ -107,23 +110,32 @@ def read_raw_config(self, name=schema.MASTER_NAMESPACE) -> str: filename = self.manifest.get_file_name(name) return read_raw(filename) - def write_config(self, name, content): + def write_config(self, name: str, content: str) -> None: + loaded_content = from_string(content) self.validate_with_fragment( name, - from_string(content), + content=loaded_content, # TODO: remove this constraint after tron triggers across clusters are supported. should_validate_missing_dependency=False, ) + # validate_with_fragment throws if the updated content is invalid - so if we get here + # we know it's safe to reflect the update in our config store + self.get_config_name_mapping()[name] = loaded_content + + # ...and then let's also persist the update to disk since memory is temporary, but disk is foreverâ„¢ filename = self.get_filename_from_manifest(name) write_raw(filename, content) - def delete_config(self, name): + def delete_config(self, name: str) -> None: filename = self.manifest.get_file_name(name) if not filename: msg = "Namespace %s does not exist in manifest, cannot delete." log.info(msg % name) return + # to avoid needing to reload from disk on every config load - we need to ensure that + # we also persist config deletions into our cache + self.get_config_name_mapping().pop(name, None) self.manifest.delete(name) os.remove(filename) @@ -141,7 +153,11 @@ def validate_with_fragment( content, should_validate_missing_dependency=True, ): - name_mapping = self.get_config_name_mapping() + # NOTE: we deepcopy rather than swap values to keep this a pure function + # get_config_name_mapping() returns a shared dict, so this would otherwise + # actually update the mapping - which would be unwanted/need to be rolled-back + # should validation fail. + name_mapping = deepcopy(self.get_config_name_mapping()) name_mapping[name] = content try: JobGraph( @@ -152,8 +168,11 @@ def validate_with_fragment( raise ConfigError(str(e)) def get_config_name_mapping(self): - seq = self.manifest.get_file_mapping().items() - return {name: read(filename) for name, filename in seq} + if self.name_mapping is None: + log.info("Creating config mapping cache...") + seq = self.manifest.get_file_mapping().items() + self.name_mapping = {name: read(filename) for name, filename in seq} + return self.name_mapping def load(self): """Return the fully constructed configuration.""" @@ -165,6 +184,25 @@ def get_hash(self, name) -> str: """Return a hash of the configuration contents for name.""" if name not in self: return self.DEFAULT_HASH + + if name in self.get_config_name_mapping(): + # unfortunately, we have the parsed dict in memory. + # rather than hit the disk to get the raw string - let's convert + # the in-memory dict to a yaml string and hash that to save a couple + # ms (in testing, ~3ms over loading from disk and ~1ms over dumping to json :p) + # TODO: consider storing the hash alongside the config so that we only calculate + # hashes once? + return hash_digest( + yaml.dump( + self.get_config_name_mapping()[name], + # ensure that the keys are always in a stable order + sort_keys=True, + ), + ) + + # the config for any name should always be in our name mapping + # ...but just in case, let's fallback to reading from disk. + log.warning("%s not found in name mapping - falling back to hashing contents on disk!") return hash_digest(self.read_raw_config(name)) def __contains__(self, name):