-
Notifications
You must be signed in to change notification settings - Fork 1k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Username/email migration commits (squashed)
Add directory for migrations data fixing scripts Add username deduplication data fixer Add migration for username column unique constraint Add email deduplication data fixer Add migration for email column unique constraint Update the model w/unique constraints Fix integration test that violated db integrity constraint Randomize test user email to respect unique constraint Fix bug in test_rule_helper Fix test_quota to respect constraint Fix test_galaxy_mapping to respect constraint
- Loading branch information
Showing
12 changed files
with
248 additions
and
22 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
52 changes: 52 additions & 0 deletions
52
...laxy/model/migrations/alembic/versions_gxy/1cf595475b58_email_column_unique_constraint.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,52 @@ | ||
"""Email column unique constraint | ||
Revision ID: 1cf595475b58 | ||
Revises: d619fdfa6168 | ||
Create Date: 2024-07-03 19:53:22.443016 | ||
""" | ||
|
||
from alembic import op | ||
|
||
from galaxy.model.database_object_names import build_index_name | ||
from galaxy.model.migrations.data_fixes.user_table_fixer import EmailDeduplicator | ||
from galaxy.model.migrations.util import ( | ||
create_index, | ||
drop_index, | ||
index_exists, | ||
transaction, | ||
) | ||
|
||
# revision identifiers, used by Alembic. | ||
revision = "1cf595475b58" | ||
down_revision = "d619fdfa6168" | ||
branch_labels = None | ||
depends_on = None | ||
|
||
|
||
table_name = "galaxy_user" | ||
column_name = "email" | ||
index_name = build_index_name(table_name, [column_name]) | ||
|
||
|
||
def upgrade(): | ||
with transaction(): | ||
_fix_duplicate_emails() | ||
# Existing databases may have an existing index we no longer need | ||
# New databases will not have that index, so we must check. | ||
if index_exists(index_name, table_name, False): | ||
drop_index(index_name, table_name) | ||
# Create a UNIQUE index | ||
create_index(index_name, table_name, [column_name], unique=True) | ||
|
||
|
||
def downgrade(): | ||
with transaction(): | ||
drop_index(index_name, table_name) | ||
# Restore a non-unique index | ||
create_index(index_name, table_name, [column_name]) | ||
|
||
|
||
def _fix_duplicate_emails(): | ||
"""Fix records with duplicate usernames""" | ||
connection = op.get_bind() | ||
EmailDeduplicator(connection).run() |
51 changes: 51 additions & 0 deletions
51
...y/model/migrations/alembic/versions_gxy/d619fdfa6168_username_column_unique_constraint.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,51 @@ | ||
"""Username column unique constraint | ||
Revision ID: d619fdfa6168 | ||
Revises: d2d8f51ebb7e | ||
Create Date: 2024-07-02 13:13:10.325586 | ||
""" | ||
|
||
from alembic import op | ||
|
||
from galaxy.model.database_object_names import build_index_name | ||
from galaxy.model.migrations.data_fixes.user_table_fixer import UsernameDeduplicator | ||
from galaxy.model.migrations.util import ( | ||
create_index, | ||
drop_index, | ||
index_exists, | ||
transaction, | ||
) | ||
|
||
# revision identifiers, used by Alembic. | ||
revision = "d619fdfa6168" | ||
down_revision = "d2d8f51ebb7e" | ||
branch_labels = None | ||
depends_on = None | ||
|
||
table_name = "galaxy_user" | ||
column_name = "username" | ||
index_name = build_index_name(table_name, [column_name]) | ||
|
||
|
||
def upgrade(): | ||
with transaction(): | ||
_fix_duplicate_usernames() | ||
# Existing databases may have an existing index we no longer need | ||
# New databases will not have that index, so we must check. | ||
if index_exists(index_name, table_name, False): | ||
drop_index(index_name, table_name) | ||
# Create a UNIQUE index | ||
create_index(index_name, table_name, [column_name], unique=True) | ||
|
||
|
||
def downgrade(): | ||
with transaction(): | ||
drop_index(index_name, table_name) | ||
# Restore a non-unique index | ||
create_index(index_name, table_name, [column_name]) | ||
|
||
|
||
def _fix_duplicate_usernames(): | ||
"""Fix records with duplicate usernames""" | ||
connection = op.get_bind() | ||
UsernameDeduplicator(connection).run() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
""" | ||
Package contains code for fixing inconsistent data in the database that must be | ||
run together with a migration script. | ||
""" |
112 changes: 112 additions & 0 deletions
112
lib/galaxy/model/migrations/data_fixes/user_table_fixer.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,112 @@ | ||
import uuid | ||
|
||
from sqlalchemy import ( | ||
func, | ||
Result, | ||
select, | ||
text, | ||
update, | ||
) | ||
|
||
from galaxy.model import User | ||
|
||
|
||
class UsernameDeduplicator: | ||
|
||
def __init__(self, connection): | ||
self.connection = connection | ||
|
||
def run(self): | ||
""" | ||
Deduplicate usernames by generating a unique value for all duplicates, keeping | ||
the username of the most recently created user unchanged. | ||
Records updated with the generated value are marked as deleted. | ||
""" | ||
duplicates = self._get_duplicate_username_data() | ||
prev_username = None | ||
for id, username, _ in duplicates: | ||
if username == prev_username: | ||
new_username = self._generate_next_available_username(username) | ||
stmt = update(User).where(User.id == id).values(username=new_username, deleted=True) | ||
self.connection.execute(stmt) | ||
else: | ||
prev_username = username | ||
|
||
def _get_duplicate_username_data(self) -> Result: | ||
# Duplicate usernames | ||
duplicates_stmt = select(User.username).group_by(User.username).having(func.count() > 1) | ||
# User data for records with duplicate usernames (ordering: newest to oldest) | ||
stmt = ( | ||
select(User.id, User.username, User.create_time) | ||
.where(User.username.in_(duplicates_stmt)) | ||
.order_by(User.username, User.create_time.desc()) | ||
) | ||
return self.connection.execute(stmt) | ||
|
||
def _generate_next_available_username(self, username): | ||
i = 1 | ||
while self.connection.execute(select(User).where(User.username == f"{username}-{i}")).first(): | ||
i += 1 | ||
return f"{username}-{i}" | ||
|
||
|
||
class EmailDeduplicator: | ||
|
||
def __init__(self, connection): | ||
self.connection = connection | ||
|
||
def run(self): | ||
""" | ||
Deduplicate user emails by generating a unique value for all duplicates, keeping | ||
the email of the most recently created user that has one or more history unchanged. | ||
If such a user does not exist, keep the oldest user. | ||
Records updated with the generated value are marked as deleted (we presume them | ||
to be invalid, and the user should not be able to login). | ||
""" | ||
stmt = select(User.email).group_by(User.email).having(func.count() > 1) | ||
duplicate_emails = self.connection.scalars(stmt) | ||
for email in duplicate_emails: | ||
users = self._get_users_with_same_email(email) | ||
user_with_history = self._find_oldest_user_with_history(users) | ||
duplicates = self._get_users_to_deduplicate(users, user_with_history) | ||
self._deduplicate_users(email, duplicates) | ||
|
||
def _get_users_with_same_email(self, email: str): | ||
sql = text( | ||
""" | ||
SELECT u.id, EXISTS(SELECT h.id FROM history h WHERE h.user_id = u.id) | ||
FROM galaxy_user u | ||
WHERE u.email = :email | ||
ORDER BY u.create_time | ||
""" | ||
) | ||
params = {"email": email} | ||
return self.connection.execute(sql, params).all() | ||
|
||
def _find_oldest_user_with_history(self, users): | ||
for user_id, exists in users: | ||
if exists: | ||
return user_id | ||
return None | ||
|
||
def _get_users_to_deduplicate(self, users, user_with_history): | ||
if user_with_history: | ||
# Preserve the oldest user with a history | ||
return [user_id for user_id, _ in users if user_id != user_with_history] | ||
else: | ||
# Preserve the oldest user | ||
return [user_id for user_id, _ in users[1:]] | ||
|
||
def _deduplicate_users(self, email, to_deduplicate): | ||
for id in to_deduplicate: | ||
new_email = self._generate_replacement_for_duplicate_email(email) | ||
stmt = update(User).where(User.id == id).values(email=new_email, deleted=True) | ||
self.connection.execute(stmt) | ||
|
||
def _generate_replacement_for_duplicate_email(self, email: str) -> str: | ||
""" | ||
Generate a replacement for a duplicate email value. The new value consists of the original | ||
email and a unique suffix. Since the original email is part of the new value, it will be | ||
possible to retrieve the user record based on this value, if needed. | ||
""" | ||
return f"{email}-{uuid.uuid4()}" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,13 @@ | ||
import random | ||
import string | ||
|
||
|
||
def random_str() -> str: | ||
alphabet = string.ascii_lowercase + string.digits | ||
size = random.randint(5, 10) | ||
return "".join(random.choices(alphabet, k=size)) | ||
|
||
|
||
def random_email() -> str: | ||
text = random_str() | ||
return f"{text}@galaxy.testing" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -23,6 +23,7 @@ | |
get_object_session, | ||
) | ||
from galaxy.model.security import GalaxyRBACAgent | ||
from galaxy.model.unittest_utils.utils import random_email | ||
from galaxy.objectstore import QuotaSourceMap | ||
from galaxy.util.unittest import TestCase | ||
|
||
|
@@ -262,7 +263,7 @@ def test_collection_get_interface(self): | |
assert c1[i] == dces[i] | ||
|
||
def test_dataset_instance_order(self) -> None: | ||
u = model.User(email="[email protected]", password="password") | ||
u = model.User(email=random_email(), password="password") | ||
h1 = model.History(name="History 1", user=u) | ||
elements = [] | ||
list_pair = model.DatasetCollection(collection_type="list:paired") | ||
|
@@ -669,7 +670,7 @@ def contents_iter_names(**kwds): | |
assert contents_iter_names(ids=[d1.id, d3.id]) == ["1", "3"] | ||
|
||
def test_history_audit(self): | ||
u = model.User(email="[email protected]", password="password") | ||
u = model.User(email=random_email(), password="password") | ||
h1 = model.History(name="HistoryAuditHistory", user=u) | ||
h2 = model.History(name="HistoryAuditHistory", user=u) | ||
|
||
|
@@ -739,7 +740,7 @@ def test_flush_refreshes(self): | |
# states and flushing in SQL Alchemy is very subtle and it is good to have a executable | ||
# reference for how it behaves in the context of Galaxy objects. | ||
model = self.model | ||
user = model.User(email="[email protected]", password="password") | ||
user = model.User(email=random_email(), password="password") | ||
galaxy_session = model.GalaxySession() | ||
galaxy_session_other = model.GalaxySession() | ||
galaxy_session.user = user | ||
|
@@ -812,7 +813,7 @@ def test_flush_refreshes(self): | |
assert "id" not in inspect(galaxy_model_object_new).unloaded | ||
|
||
def test_workflows(self): | ||
user = model.User(email="[email protected]", password="password") | ||
user = model.User(email=random_email(), password="password") | ||
|
||
child_workflow = _workflow_from_steps(user, []) | ||
self.persist(child_workflow) | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,6 +1,7 @@ | ||
import uuid | ||
|
||
from galaxy import model | ||
from galaxy.model.unittest_utils.utils import random_email | ||
from galaxy.objectstore import ( | ||
QuotaSourceInfo, | ||
QuotaSourceMap, | ||
|
@@ -16,7 +17,7 @@ class TestPurgeUsage(BaseModelTestCase): | |
def setUp(self): | ||
super().setUp() | ||
model = self.model | ||
u = model.User(email="[email protected]", password="password") | ||
u = model.User(email=random_email(), password="password") | ||
u.disk_usage = 25 | ||
self.persist(u) | ||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,5 +1,6 @@ | ||
from galaxy import model | ||
from galaxy.model.base import transaction | ||
from galaxy.model.unittest_utils.utils import random_email | ||
from galaxy.workflow.run_request import ( | ||
_normalize_inputs, | ||
_normalize_step_parameters, | ||
|
@@ -89,7 +90,7 @@ def __new_input(): | |
|
||
|
||
def __workflow_fixure(trans): | ||
user = model.User(email="[email protected]", password="pass") | ||
user = model.User(email=random_email(), password="pass") | ||
stored_workflow = model.StoredWorkflow() | ||
stored_workflow.user = user | ||
workflow = model.Workflow() | ||
|