Skip to content

Commit

Permalink
Remove transaction helper from scripts
Browse files Browse the repository at this point in the history
  • Loading branch information
jdavcs committed Jan 14, 2025
1 parent 29b76af commit 86d046e
Show file tree
Hide file tree
Showing 7 changed files with 54 additions and 82 deletions.
5 changes: 1 addition & 4 deletions scripts/cleanup_datasets/admin_cleanup_datasets.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,6 @@
import galaxy.config
import galaxy.model.mapping
import galaxy.util
from galaxy.model.base import transaction
from galaxy.util.script import (
app_properties_from_args,
populate_config_args,
Expand Down Expand Up @@ -261,9 +260,7 @@ def administrative_delete_datasets(
hda.deleted = True
app.sa_session.add(hda)
print(f"Marked HistoryDatasetAssociation id {hda.id} as deleted")
session = app.sa_session()
with transaction(session):
session.commit()
app.session().commit()

emailtemplate = Template(filename=template_file)
for email, dataset_list in user_notifications.items():
Expand Down
94 changes: 41 additions & 53 deletions scripts/cleanup_datasets/cleanup_datasets.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,6 @@
import galaxy.config
from galaxy.datatypes.registry import Registry
from galaxy.exceptions import ObjectNotFound
from galaxy.model.base import transaction
from galaxy.model.mapping import init_models_from_config
from galaxy.objectstore import build_object_store_from_config
from galaxy.util import unicodify
Expand Down Expand Up @@ -222,12 +221,13 @@ def delete_userless_histories(app, cutoff_time, info_only=False, force_retry=Fal
# Nothing is removed from disk yet.
history_count = 0
start = time.time()
session = app.sa_session()
if force_retry:
histories = app.sa_session.query(app.model.History).filter(
histories = session.query(app.model.History).filter(
and_(app.model.History.__table__.c.user_id == null(), app.model.History.update_time < cutoff_time)
)
else:
histories = app.sa_session.query(app.model.History).filter(
histories = session.query(app.model.History).filter(
and_(
app.model.History.__table__.c.user_id == null(),
app.model.History.__table__.c.deleted == false(),
Expand All @@ -238,10 +238,8 @@ def delete_userless_histories(app, cutoff_time, info_only=False, force_retry=Fal
if not info_only:
log.info("Deleting history id %d", history.id)
history.deleted = True
app.sa_session.add(history)
session = app.sa_session()
with transaction(session):
session.commit()
session.add(history)
session.commit()
history_count += 1
stop = time.time()
log.info("Deleted %d histories", history_count)
Expand All @@ -257,15 +255,16 @@ def purge_histories(app, cutoff_time, remove_from_disk, info_only=False, force_r
# i.e. all associated datasets are marked as deleted
history_count = 0
start = time.time()
session = app.sa_session()
if force_retry:
histories = (
app.sa_session.query(app.model.History)
session.query(app.model.History)
.filter(and_(app.model.History.__table__.c.deleted == true(), app.model.History.update_time < cutoff_time))
.options(joinedload(app.model.History.datasets))
)
else:
histories = (
app.sa_session.query(app.model.History)
session.query(app.model.History)
.filter(
and_(
app.model.History.__table__.c.deleted == true(),
Expand All @@ -289,10 +288,8 @@ def purge_histories(app, cutoff_time, remove_from_disk, info_only=False, force_r
# dhp.delete()
log.info("Purging history id %d", history.id)
history.purged = True
app.sa_session.add(history)
session = app.sa_session()
with transaction(session):
session.commit()
session.add(history)
session.commit()
else:
log.info("History id %d will be purged (without 'info_only' mode)", history.id)
history_count += 1
Expand All @@ -310,14 +307,15 @@ def purge_libraries(app, cutoff_time, remove_from_disk, info_only=False, force_r
# i.e. all associated LibraryDatasets/folders are marked as deleted
library_count = 0
start = time.time()
session = app.sa_session()
if force_retry:
libraries = app.sa_session.query(app.model.Library).filter(
libraries = session.query(app.model.Library).filter(
and_(
app.model.Library.__table__.c.deleted == true(), app.model.Library.__table__.c.update_time < cutoff_time
)
)
else:
libraries = app.sa_session.query(app.model.Library).filter(
libraries = session.query(app.model.Library).filter(
and_(
app.model.Library.__table__.c.deleted == true(),
app.model.Library.__table__.c.purged == false(),
Expand All @@ -329,10 +327,8 @@ def purge_libraries(app, cutoff_time, remove_from_disk, info_only=False, force_r
if not info_only:
log.info("Purging library id %d", library.id)
library.purged = True
app.sa_session.add(library)
session = app.sa_session()
with transaction(session):
session.commit()
session.add(library)
session.commit()
library_count += 1
stop = time.time()
log.info("# Purged %d libraries .", library_count)
Expand Down Expand Up @@ -413,6 +409,7 @@ def delete_datasets(app, cutoff_time, remove_from_disk, info_only=False, force_r
deleted_dataset_count = 0
deleted_instance_count = 0
skip = []
session = app.sa_session()
# Handle library datasets. This is a bit tricky, so here's some clarification. We have a list of all
# LibraryDatasets that were marked deleted before our cutoff_time, but have not yet been marked purged.
# A LibraryDataset object is marked purged when all of its LibraryDatasetDatasetAssociations have been
Expand All @@ -421,37 +418,35 @@ def delete_datasets(app, cutoff_time, remove_from_disk, info_only=False, force_r
# and add it to our accrued list of Datasets for later processing. We mark as deleted all of its
# LibraryDatasetDatasetAssociations. Then we mark the LibraryDataset as purged. We then process our
# list of Datasets.
library_dataset_ids = [row.id for row in app.sa_session.execute(library_dataset_ids_query)]
library_dataset_ids = [row.id for row in session.execute(library_dataset_ids_query)]
dataset_ids = []
for library_dataset_id in library_dataset_ids:
log.info("######### Processing LibraryDataset id: %d", library_dataset_id)
# Get the LibraryDataset and the current LibraryDatasetDatasetAssociation objects
ld = app.sa_session.query(app.model.LibraryDataset).get(library_dataset_id)
ld = session.query(app.model.LibraryDataset).get(library_dataset_id)
ldda = ld.library_dataset_dataset_association
# Append the associated Dataset object's id to our list of dataset_ids
dataset_ids.append(ldda.dataset_id)
# Mark all of the LibraryDataset's associated LibraryDatasetDatasetAssociation objects' as deleted
if not ldda.deleted:
ldda.deleted = True
app.sa_session.add(ldda)
session.add(ldda)
log.info("Marked associated LibraryDatasetDatasetAssociation id %d as deleted", ldda.id)
for expired_ldda in ld.expired_datasets:
if not expired_ldda.deleted:
expired_ldda.deleted = True
app.sa_session.add(expired_ldda)
session.add(expired_ldda)
log.info("Marked associated expired LibraryDatasetDatasetAssociation id %d as deleted", ldda.id)
# Mark the LibraryDataset as purged
ld.purged = True
app.sa_session.add(ld)
session.add(ld)
log.info("Marked LibraryDataset id %d as purged", ld.id)
session = app.sa_session()
with transaction(session):
session.commit()
session.commit()
# Add all datasets associated with Histories to our list
dataset_ids.extend([row.id for row in app.sa_session.execute(history_dataset_ids_query)])
dataset_ids.extend([row.id for row in session.execute(history_dataset_ids_query)])
# Process each of the Dataset objects
for dataset_id in dataset_ids:
dataset = app.sa_session.query(app.model.Dataset).get(dataset_id)
dataset = session.query(app.model.Dataset).get(dataset_id)
if dataset.id in skip:
continue
skip.append(dataset.id)
Expand Down Expand Up @@ -528,10 +523,9 @@ def _purge_dataset_instance(dataset_instance, app, remove_from_disk, info_only=F
)
dataset_instance.mark_deleted()
dataset_instance.clear_associated_files()
app.sa_session.add(dataset_instance)
session = app.sa_session()
with transaction(session):
session.commit()
session.add(dataset_instance)
session.commit()
app.sa_session.refresh(dataset_instance.dataset)
else:
log.info(
Expand Down Expand Up @@ -572,12 +566,12 @@ def _delete_dataset(dataset, app, remove_from_disk, info_only=False, is_deletabl
metadata_files = []
# lets create a list of metadata files, then perform actions on them
for hda in dataset.history_associations:
for metadata_file in app.sa_session.query(app.model.MetadataFile).filter(
for metadata_file in session.query(app.model.MetadataFile).filter(
app.model.MetadataFile.__table__.c.hda_id == hda.id
):
metadata_files.append(metadata_file)
for ldda in dataset.library_associations:
for metadata_file in app.sa_session.query(app.model.MetadataFile).filter(
for metadata_file in session.query(app.model.MetadataFile).filter(
app.model.MetadataFile.__table__.c.lda_id == ldda.id
):
metadata_files.append(metadata_file)
Expand Down Expand Up @@ -608,20 +602,17 @@ def _delete_dataset(dataset, app, remove_from_disk, info_only=False, is_deletabl
metadata_file.get_file_name(),
)
metadata_file.purged = True
app.sa_session.add(metadata_file)
with transaction(session):
session.commit()
metadata_file.deleted = True
app.sa_session.add(metadata_file)
with transaction(session):
session.add(metadata_file)
session.commit()
metadata_file.deleted = True
session.add(metadata_file)
session.commit()
log.info(metadata_file.get_file_name())
if not info_only:
log.info("Deleting dataset id %d", dataset.id)
dataset.deleted = True
app.sa_session.add(dataset)
with transaction(session):
session.commit()
session.add(dataset)
session.commit()
else:
log.info("Dataset %d will be deleted (without 'info_only' mode)", dataset.id)

Expand Down Expand Up @@ -650,12 +641,11 @@ def _purge_dataset(app, dataset, remove_from_disk, info_only=False):
usage_users.append(hda.history.user)
for user in usage_users:
user.adjust_total_disk_usage(-dataset.get_total_size())
app.sa_session.add(user)
session.add(user)
log.info("Purging dataset id %d", dataset.id)
dataset.purged = True
app.sa_session.add(dataset)
with transaction(session):
session.commit()
session.add(dataset)
session.commit()
else:
log.info("Dataset %d will be purged (without 'info_only' mode)", dataset.id)
else:
Expand All @@ -668,9 +658,8 @@ def _purge_dataset(app, dataset, remove_from_disk, info_only=False):
log.error("Error, dataset file has already been removed: %s", unicodify(exc))
log.error("Purging dataset id %d", dataset.id)
dataset.purged = True
app.sa_session.add(dataset)
with transaction(session):
session.commit()
session.add(dataset)
session.commit()
except ObjectNotFound:
log.error("Dataset %d cannot be found in the object store", dataset.id)
except Exception as exc:
Expand All @@ -694,10 +683,9 @@ def _purge_folder(folder, app, remove_from_disk, info_only=False):
# TODO: should the folder permissions be deleted here?
log.info("Purging folder id %s", folder.id)
folder.purged = True
app.sa_session.add(folder)
session = app.sa_session()
with transaction(session):
session.commit()
session.add(folder)
session.commit()


class CleanupDatasetsApplication:
Expand Down
11 changes: 4 additions & 7 deletions scripts/cleanup_datasets/populate_uuid.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@
sys.path.insert(1, os.path.abspath(os.path.join(os.path.dirname(__file__), os.pardir, os.pardir, "lib")))

import galaxy.config
from galaxy.model.base import transaction
from galaxy.model.mapping import init_models_from_config
from galaxy.util.script import (
app_properties_from_args,
Expand All @@ -40,19 +39,17 @@ def main():
model = init_models_from_config(config)
session = model.context()

for row in model.context.query(model.Dataset):
for row in session.query(model.Dataset):
if row.uuid is None:
row.uuid = uuid.uuid4()
print("Setting dataset:", row.id, " UUID to ", row.uuid)
with transaction(session):
session.commit()
session.commit()

for row in model.context.query(model.Workflow):
for row in session.query(model.Workflow):
if row.uuid is None:
row.uuid = uuid.uuid4()
print("Setting Workflow:", row.id, " UUID to ", row.uuid)
with transaction(session):
session.commit()
session.commit()
print("Complete")


Expand Down
7 changes: 2 additions & 5 deletions scripts/pages_identifier_conversion.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@
PageContentProcessor,
placeholderRenderForSave,
)
from galaxy.model.base import transaction
from galaxy.model.mapping import init_models_from_config
from galaxy.objectstore import build_object_store_from_config
from galaxy.security.idencoding import IdEncodingHelper
Expand Down Expand Up @@ -42,7 +41,7 @@ def main(argv):
)

model = init_models_from_config(config, object_store=object_store)
session = model.context.current
session = model.context.current()
pagerevs = session.query(model.PageRevision).all()
mock_trans = Bunch(app=Bunch(security=security_helper), model=model, user_is_admin=lambda: True, sa_session=session)
for p in pagerevs:
Expand All @@ -54,9 +53,7 @@ def main(argv):
if not args.dry_run:
p.content = unicodify(processor.output(), "utf-8")
session.add(p)
session = session()
with transaction(session):
session.commit()
session.commit()
else:
print(f"Modifying revision {p.id}.")
print(difflib.unified_diff(p.content, newcontent))
Expand Down
11 changes: 4 additions & 7 deletions scripts/set_dataset_sizes.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@
sys.path.insert(1, os.path.abspath(os.path.join(os.path.dirname(__file__), os.pardir, "lib")))

import galaxy.config
from galaxy.model.base import transaction
from galaxy.model.mapping import init_models_from_config
from galaxy.objectstore import build_object_store_from_config
from galaxy.util.script import (
Expand Down Expand Up @@ -36,24 +35,22 @@ def init():
session = sa_session()

set = 0
dataset_count = sa_session.query(model.Dataset).count()
dataset_count = session.query(model.Dataset).count()
print(f"Processing {dataset_count} datasets...")
percent = 0
print(f"Completed {percent}%", end=" ")
sys.stdout.flush()
for i, dataset in enumerate(sa_session.query(model.Dataset).enable_eagerloads(False).yield_per(1000)):
for i, dataset in enumerate(session.query(model.Dataset).enable_eagerloads(False).yield_per(1000)):
if dataset.total_size is None:
dataset.set_total_size()
set += 1
if not set % 1000:
with transaction(session):
session.commit()
session.commit()
new_percent = int(float(i) / dataset_count * 100)
if new_percent != percent:
percent = new_percent
print(f"\rCompleted {percent}%", end=" ")
sys.stdout.flush()
with transaction(session):
session.commit()
session.commit()
print("\rCompleted 100%")
object_store.shutdown()
4 changes: 1 addition & 3 deletions scripts/tool_shed/deprecate_repositories_without_metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,6 @@

import tool_shed.webapp.config as tool_shed_config
import tool_shed.webapp.model.mapping
from galaxy.model.base import transaction
from galaxy.util import (
build_url,
send_mail as galaxy_send_mail,
Expand Down Expand Up @@ -176,8 +175,7 @@ def deprecate_repositories(app, cutoff_time, days=14, info_only=False, verbose=F
repository.deprecated = True
app.sa_session.add(repository)
session = app.sa_session()
with transaction(session):
session.commit()
session.commit()
owner = repositories_by_owner[repository_owner]["owner"]
send_mail_to_owner(
app, owner.username, owner.email, repositories_by_owner[repository_owner]["repositories"], days
Expand Down
Loading

0 comments on commit 86d046e

Please sign in to comment.