diff --git a/src/initialize.py b/src/initialize.py index e19530a6..d08e5c41 100644 --- a/src/initialize.py +++ b/src/initialize.py @@ -2,8 +2,6 @@ import yaml import platform import ctranslate2 -import os -import shutil from pathlib import Path def get_compute_device_info(): @@ -62,22 +60,10 @@ def update_config_file(**system_info): with open(full_config_path, 'w') as stream: yaml.safe_dump(config_data, stream) -def move_custom_pdf_loader(): - current_dir = Path.cwd() - user_manual_pdf_path = current_dir / "User_Manual" / "PDF.py" - lib_pdf_path = current_dir / "Lib" / "site-packages" / "langchain" / "document_loaders" / "parsers" / "PDF.py" - - user_manual_pdf_size = user_manual_pdf_path.stat().st_size - lib_pdf_size = lib_pdf_path.stat().st_size - - if user_manual_pdf_size != lib_pdf_size: - shutil.copy(user_manual_pdf_path, lib_pdf_path) - def main(): compute_device_info = get_compute_device_info() platform_info = get_platform_info() update_config_file(Compute_Device=compute_device_info, Platform_Info=platform_info) - move_custom_pdf_loader() if __name__ == "__main__": main() diff --git a/src/replace_pdf.py b/src/replace_pdf.py index de787c4f..5d99f6ae 100644 --- a/src/replace_pdf.py +++ b/src/replace_pdf.py @@ -1,5 +1,6 @@ import shutil from pathlib import Path +import hashlib def find_all_target_directories_with_file(base_path, target_folder, target_file): found_directories = [] @@ -20,6 +21,14 @@ def find_closest_directory(directories, base_directory): depths = [(dir, get_directory_depth(dir, base_directory)) for dir in directories] return min(depths, key=lambda x: x[1])[0] +def hash_file(filepath): + """Compute the SHA-256 hash of a file.""" + hasher = hashlib.sha256() + with open(filepath, 'rb') as f: + buf = f.read() + hasher.update(buf) + return hasher.hexdigest() + def replace_pdf_in_parsers(): script_dir = Path(__file__).parent user_manual_pdf_path = script_dir / "User_Manual" / "PDF.py" @@ -28,7 +37,7 @@ def replace_pdf_in_parsers(): print("No 'pdf.py' file found in 'User_Manual' directory.") return - base_dir = script_dir.parent.parent # Move up two levels from the script's location + base_dir = script_dir.parent # Move up one level from the script's location target_folder = "parsers" target_file = "pdf.py" found_paths = find_all_target_directories_with_file(base_dir, target_folder, target_file) @@ -44,16 +53,16 @@ def replace_pdf_in_parsers(): print(f"Chosen 'parsers' directory based on path depth: {closest_parsers_path}") chosen_pdf_path = closest_parsers_path / target_file - # File size comparison and replacement - user_manual_pdf_size = user_manual_pdf_path.stat().st_size - chosen_pdf_size = chosen_pdf_path.stat().st_size + # Hash comparison and replacement + user_manual_pdf_hash = hash_file(user_manual_pdf_path) + chosen_pdf_hash = hash_file(chosen_pdf_path) - if user_manual_pdf_size != chosen_pdf_size: + if user_manual_pdf_hash != chosen_pdf_hash: print("Replacing the existing pdf.py with the new one...") shutil.copy(user_manual_pdf_path, chosen_pdf_path) print(f"PDF.py replaced at: {chosen_pdf_path}") else: - print("No replacement needed. The files are of the same size.") + print("No replacement needed. The files are identical.") if __name__ == "__main__": replace_pdf_in_parsers()