diff --git a/quarry/default_config.yaml b/quarry/default_config.yaml index 90cc3b0..e6ba685 100644 --- a/quarry/default_config.yaml +++ b/quarry/default_config.yaml @@ -30,8 +30,13 @@ worker_prefetch_multiplier: 1 # Tasks can run for a long time REPLICA_DOMAIN: '' # Change to `analytics.db.svc.wikimedia.cloud` for live replicas REPLICA_USER: 'repl' # For live replicas, your replica.my.cnf username REPLICA_PASSWORD: 'repl' # For live replicas, your replica.my.cnf password - REPLICA_PORT: 3306 + +TOOLS_DB_HOST: 'tools.db.svc.wikimedia.cloud' +TOOLS_DB_PORT: 3306 +TOOLS_DB_USER: '' +TOOLS_DB_PASSWORD: '' + OUTPUT_PATH_TEMPLATE: '/results/%s/%s/%s.sqlite' REDIS_HOST: 'redis' REDIS_PORT: 6379 diff --git a/quarry/web/replica.py b/quarry/web/replica.py index 9c2450a..34ee67f 100644 --- a/quarry/web/replica.py +++ b/quarry/web/replica.py @@ -10,16 +10,18 @@ class Replica: def __init__(self, config): self.config = config self.dbname = "" + self.is_tools_db = False def _db_name_mangler(self): if self.dbname == "": raise ReplicaConnectionException( "Attempting connection before a database is selected" ) - - if self.dbname == "meta" or self.dbname == "meta_p": + if "__" in self.dbname and self.dbname.endswith("_p"): + self.is_tools_db = True + self.database_p = self.dbname + elif self.dbname == "meta" or self.dbname == "meta_p": self.database_name = "s7" - self.database_p = "meta_p" elif self.dbname == "centralauth" or self.dbname == "centralauth_p": self.database_name = "s7" @@ -36,6 +38,13 @@ def _db_name_mangler(self): else "{}_p".format(self.dbname) ) + def get_host_name(self): + if self.is_tools_db: + return self.config["TOOLS_DB_HOST"] + if self.config["REPLICA_DOMAIN"]: + return f"{self.database_name}.{self.config['REPLICA_DOMAIN']}" + return self.database_name + @property def connection(self): self._replica.ping(reconnect=True) @@ -52,22 +61,20 @@ def connection(self, db): self.dbname = db self._db_name_mangler() - repl_host = ( - f"{self.database_name}.{self.config['REPLICA_DOMAIN']}" - if self.config["REPLICA_DOMAIN"] - else self.database_name - ) + host = self.get_host_name() + conf_prefix = "TOOLS_DB" if self.is_tools_db else "REPLICA" + port = self.config[f"{conf_prefix}_PORT"] connect_opts = { "db": self.database_p, - "user": self.config["REPLICA_USER"], - "passwd": self.config["REPLICA_PASSWORD"], + "user": self.config[f"{conf_prefix}_USER"], + "passwd": self.config[f"{conf_prefix}_PASSWORD"], "charset": "utf8", "client_flag": pymysql.constants.CLIENT.MULTI_STATEMENTS, } if not self.config.get("REPLICA_SOCKS5_PROXY_HOST"): self._replica = pymysql.connect( - host=repl_host, port=self.config["REPLICA_PORT"], **connect_opts + host=host, port=port, **connect_opts ) else: self._replica = pymysql.connect(defer_connect=True, **connect_opts) @@ -78,7 +85,7 @@ def connection(self, db): addr=self.config["REPLICA_SOCKS5_PROXY_HOST"], port=self.config["REPLICA_SOCKS5_PROXY_PORT"], ) - sock.connect((repl_host, self.config["REPLICA_PORT"])) + sock.connect((host, port)) self._replica.connect(sock=sock) @connection.deleter diff --git a/quarry/web/utils/__init__.py b/quarry/web/utils/__init__.py index ea68d94..28f1859 100644 --- a/quarry/web/utils/__init__.py +++ b/quarry/web/utils/__init__.py @@ -2,7 +2,7 @@ VALID_DB_NAMES = re.compile( - r"^(?:(?:(?:centralauth|meta|[0-9a-z_]*wik[a-z]+)(?:_p)?)|quarry)$" + r"^(?:(?:(?:centralauth|meta|[0-9a-z_]*wik[a-z]+)(?:_p)?)|quarry|s\d+__\w+_p)$" )