From 0b4326e0590c8493fcb36f5ac5a7a4c93cbc3b24 Mon Sep 17 00:00:00 2001 From: Erik Johnson Date: Sun, 24 Nov 2024 14:50:24 +0100 Subject: [PATCH 1/2] Stop on existing --- bdfr/__main__.py | 1 + bdfr/configuration.py | 1 + bdfr/connector.py | 3 +++ bdfr/downloader.py | 6 ++++++ 4 files changed, 11 insertions(+) diff --git a/bdfr/__main__.py b/bdfr/__main__.py index dadba517..51f1ee86 100644 --- a/bdfr/__main__.py +++ b/bdfr/__main__.py @@ -60,6 +60,7 @@ click.option("--max-score", type=int, default=None), click.option("--min-score-ratio", type=float, default=None), click.option("--max-score-ratio", type=float, default=None), + click.option("--stop-on-exist", is_flag=True, default=None), ] _archiver_options = [ diff --git a/bdfr/configuration.py b/bdfr/configuration.py index 05fc27e8..16438ce3 100644 --- a/bdfr/configuration.py +++ b/bdfr/configuration.py @@ -53,6 +53,7 @@ def __init__(self): self.upvoted: bool = False self.user: list[str] = [] self.verbose: int = 0 + self.stop_on_exist: bool = False # Archiver-specific options self.all_comments = False diff --git a/bdfr/connector.py b/bdfr/connector.py index 77a4a71a..c33c82ff 100644 --- a/bdfr/connector.py +++ b/bdfr/connector.py @@ -61,6 +61,7 @@ def __init__(self, args: Configuration, logging_handlers: Iterable[logging.Handl self._apply_logging_handlers(itertools.chain(logging_handlers, [file_log])) self.run_time = datetime.now().isoformat() self._setup_internal_objects() + self.existcount=0 self.reddit_lists = self.retrieve_reddit_lists() @@ -350,6 +351,8 @@ def get_multireddits(self) -> list[Iterator]: def create_filtered_listing_generator(self, reddit_source) -> Iterator: sort_function = self.determine_sort_function() + if self.args.stop_on_exist and sort_function != praw.models.Subreddit.new: + logger.warning("Stopping downloads when an old duplicate is encountered works best when sorted by new.") if self.sort_filter in (RedditTypes.SortType.TOP, RedditTypes.SortType.CONTROVERSIAL): return sort_function(reddit_source, limit=self.args.limit, time_filter=self.time_filter.value) else: diff --git a/bdfr/downloader.py b/bdfr/downloader.py index 20984e69..0ccc64fa 100644 --- a/bdfr/downloader.py +++ b/bdfr/downloader.py @@ -111,10 +111,16 @@ def _download_submission(self, submission: praw.models.Submission): for destination, res in self.file_name_formatter.format_resource_paths(content, self.download_directory): if destination.exists(): logger.debug(f"File {destination} from submission {submission.id} already exists, continuing") + if self.args.stop_on_exist and not submission.stickied: + self.existcount+=1 + if self.existcount>=5: + logger.warning(f"Prevously-downloaded threshold met, exiting") + exit(0) continue elif not self.download_filter.check_resource(res): logger.debug(f"Download filter removed {submission.id} file with URL {submission.url}") continue + self.existcount=0 try: res.download({"max_wait_time": self.args.max_wait_time}) except errors.BulkDownloaderException as e: From 00ed5085a44465530f35d939c9d8620dc388db1e Mon Sep 17 00:00:00 2001 From: Erik Johnson Date: Thu, 28 Nov 2024 09:40:57 +0100 Subject: [PATCH 2/2] Formatting changes --- bdfr/connector.py | 2 +- bdfr/downloader.py | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/bdfr/connector.py b/bdfr/connector.py index c33c82ff..86ded1e8 100644 --- a/bdfr/connector.py +++ b/bdfr/connector.py @@ -61,7 +61,7 @@ def __init__(self, args: Configuration, logging_handlers: Iterable[logging.Handl self._apply_logging_handlers(itertools.chain(logging_handlers, [file_log])) self.run_time = datetime.now().isoformat() self._setup_internal_objects() - self.existcount=0 + self.existcount = 0 self.reddit_lists = self.retrieve_reddit_lists() diff --git a/bdfr/downloader.py b/bdfr/downloader.py index 0ccc64fa..e62c930a 100644 --- a/bdfr/downloader.py +++ b/bdfr/downloader.py @@ -112,15 +112,15 @@ def _download_submission(self, submission: praw.models.Submission): if destination.exists(): logger.debug(f"File {destination} from submission {submission.id} already exists, continuing") if self.args.stop_on_exist and not submission.stickied: - self.existcount+=1 - if self.existcount>=5: - logger.warning(f"Prevously-downloaded threshold met, exiting") + self.existcount += 1 + if self.existcount >= 5: + logger.warning("Prevously-downloaded threshold met, exiting") exit(0) continue elif not self.download_filter.check_resource(res): logger.debug(f"Download filter removed {submission.id} file with URL {submission.url}") continue - self.existcount=0 + self.existcount = 0 try: res.download({"max_wait_time": self.args.max_wait_time}) except errors.BulkDownloaderException as e: