From aabf00a70926d2991e64f8754f6a2b8f17d2a391 Mon Sep 17 00:00:00 2001 From: Daniel Olson Date: Mon, 9 Dec 2024 16:34:10 -0700 Subject: [PATCH 1/5] Updated --cite to point to the oup article --- src/cli.cpp | 23 +++++++++++++---------- 1 file changed, 13 insertions(+), 10 deletions(-) diff --git a/src/cli.cpp b/src/cli.cpp index ddc241e..916a6e1 100644 --- a/src/cli.cpp +++ b/src/cli.cpp @@ -367,16 +367,19 @@ bool Settings::parse_input(int argc, const char **argv) { if (this->cite) { printf("BibTeX: \n" - "@article {Olson2024ultra,\n" - " author = {Olson, Daniel R. and Wheeler, Travis J.},\n" - " title = {ULTRA-Effective Labeling of Repetitive Genomic Sequence},\n" - " elocation-id = {2024.06.03.597269},\n" - " year = {2024},\n" - " doi = {10.1101/2024.06.03.597269},\n" - " publisher = {Cold Spring Harbor Laboratory},\n" - " URL = {https://www.biorxiv.org/content/early/2024/06/04/2024.06.03.597269},\n" - " eprint = {https://www.biorxiv.org/content/early/2024/06/04/2024.06.03.597269.full.pdf},\n" - " journal = {bioRxiv}\n" + "@article{10.1093/bioadv/vbae149,\n" + " author = {Olson, Daniel R and Wheeler, Travis J},\n" + " title = {ULTRA-effective labeling of tandem repeats in genomic sequence},\n" + " journal = {Bioinformatics Advances},\n" + " volume = {4},\n" + " number = {1},\n" + " pages = {vbae149},\n" + " year = {2024},\n" + " month = {10},\n" + " issn = {2635-0041},\n" + " doi = {10.1093/bioadv/vbae149},\n" + " url = {https://doi.org/10.1093/bioadv/vbae149},\n" + " eprint = {https://academic.oup.com/bioinformaticsadvances/article-pdf/4/1/vbae149/60779841/vbae149.pdf},\n" "}\n"); exit(0); } From 333fd162b8ad2ba7093f90b6b12011ce0c1fdb12 Mon Sep 17 00:00:00 2001 From: Daniel Olson Date: Mon, 9 Dec 2024 16:34:35 -0700 Subject: [PATCH 2/5] Updated version string --- src/cli.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/cli.hpp b/src/cli.hpp index b98949d..21c5a3a 100644 --- a/src/cli.hpp +++ b/src/cli.hpp @@ -5,7 +5,7 @@ #ifndef ULTRA_CLI_HPP #define ULTRA_CLI_HPP -#define ULTRA_VERSION_STRING "1.0.2b" +#define ULTRA_VERSION_STRING "1.0.3" #define DEBUG_STRING "" #ifdef DEBUG_PRAGMA #undef DEBUG_STRING From c019eacc245ca280989d894f4e657252c93358a9 Mon Sep 17 00:00:00 2001 From: Daniel Olson Date: Mon, 9 Dec 2024 16:46:57 -0700 Subject: [PATCH 3/5] Added a --show_count (-c) flag that results in copy number, #insertions, #deletions, and #substitutions to be output. Implemented in json --- src/JSONFileWriter.cpp | 13 ++++++++++--- src/cli.cpp | 5 +++++ src/cli.hpp | 2 ++ 3 files changed, 17 insertions(+), 3 deletions(-) diff --git a/src/JSONFileWriter.cpp b/src/JSONFileWriter.cpp index 1608b41..ba5b730 100644 --- a/src/JSONFileWriter.cpp +++ b/src/JSONFileWriter.cpp @@ -91,9 +91,16 @@ void JSONFileWriter::WriteRepeat(RepeatRegion *repeat) { this->OutputJSONKeyValue("PVal", std::to_string(pval)); } - this->OutputJSONKeyValue("Substitutions", std::to_string(repeat->mismatches)); - this->OutputJSONKeyValue("Insertions", std::to_string(repeat->insertions)); - this->OutputJSONKeyValue("Deletions", std::to_string(repeat->deletions)); + + if (owner->settings->show_counts) { + auto copies = (repeat->repeatLength - repeat->insertions + repeat->deletions) / repeat->repeatPeriod; + this->OutputJSONKeyValue("Copies", std::to_string(copies)); + this->OutputJSONKeyValue("Substitutions", + std::to_string(repeat->mismatches)); + this->OutputJSONKeyValue("Insertions", std::to_string(repeat->insertions)); + this->OutputJSONKeyValue("Deletions", std::to_string(repeat->deletions)); + } + this->OutputJSONKeyValue("Consensus", repeat->string_consensus, true); if (owner->settings->show_seq) { diff --git a/src/cli.cpp b/src/cli.cpp index 916a6e1..979f5de 100644 --- a/src/cli.cpp +++ b/src/cli.cpp @@ -38,6 +38,11 @@ void Settings::prepare_settings() { "Disables streaming output; no output will be created until all analysis has been completed") ->group("Output"); + app.add_flag("-c, --show_counts", + this->show_counts, + "Output #copies, #substitutions, #insertions, #deletions") + ->group("Output"); + app.add_flag("--pval", this->pval, "Use p-values instead of scores in BED output") ->group("Output"); diff --git a/src/cli.hpp b/src/cli.hpp index 21c5a3a..204a326 100644 --- a/src/cli.hpp +++ b/src/cli.hpp @@ -42,6 +42,8 @@ struct Settings { bool bed_out = false; bool json_out = false; + + bool show_counts = false; bool show_seq = false; bool show_deltas = false; bool show_trace = false; From 7841c44b57b6d790502907e3edd096fe5504c8c5 Mon Sep 17 00:00:00 2001 From: Daniel Olson Date: Mon, 9 Dec 2024 16:50:49 -0700 Subject: [PATCH 4/5] Added a --show_count (-c) flag that results in copy number, #insertions, #deletions, and #substitutions to be output. Implemented in json and tabfilewriter --- src/TabFileWriter.cpp | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/src/TabFileWriter.cpp b/src/TabFileWriter.cpp index 7a76c92..bb71f73 100644 --- a/src/TabFileWriter.cpp +++ b/src/TabFileWriter.cpp @@ -20,6 +20,11 @@ void TabFileWriter::InitializeWriter(Ultra *ultra, FILE *out_f) { fprintf(out, ",PValue"); if (owner->settings->max_consensus_period >= 0) fprintf(out, "\tConsensus"); + + if (owner->settings->show_counts) { + fprintf(out, "\t#copies\t#substitutions\t#insertions\t#deletions"); + } + if (owner->settings->max_split >= 0) { fprintf(out, "\t#Subrepeats"); fprintf(out, "\tSubrepeatStarts"); @@ -31,6 +36,7 @@ void TabFileWriter::InitializeWriter(Ultra *ultra, FILE *out_f) { fprintf(out, "\tSequence"); } + fprintf(out, "\n"); } @@ -75,6 +81,12 @@ void TabFileWriter::WriteRepeat(RepeatRegion *repeat) { fprintf(out, "\t%s", rep_con.c_str()); } + if (owner->settings->show_counts) { + auto copies = (repeat->repeatLength - repeat->insertions + repeat->deletions) / repeat->repeatPeriod; + + fprintf(out, "\t%lu\t%d\t%d\t%d", copies, repeat->mismatches, repeat->insertions, repeat->deletions); + } + if (owner->settings->max_split >= 0) { std::string sizes = ""; std::string starts = "0"; @@ -138,6 +150,8 @@ void TabFileWriter::WriteRepeat(RepeatRegion *repeat) { if (owner->settings->show_seq) { fprintf(out, "\t%s", repeat->sequence.c_str()); } + + fprintf(out, "\n"); } From f987a6f4c739a4b9933b6abdb57f7d51bbaa8233 Mon Sep 17 00:00:00 2001 From: Daniel Olson Date: Mon, 9 Dec 2024 17:03:00 -0700 Subject: [PATCH 5/5] Updated README citation info, paper info Updated github workflow to match https://github.com/TravisWheelerLab/release-workflow-example --- .github/workflows/build.yml | 37 ------ .github/workflows/release.yml | 118 +++++++++++++++++++ .github/workflows/update_release_table.py | 131 ++++++++++++++++++++++ README.md | 25 +++-- 4 files changed, 263 insertions(+), 48 deletions(-) delete mode 100644 .github/workflows/build.yml create mode 100644 .github/workflows/release.yml create mode 100644 .github/workflows/update_release_table.py diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml deleted file mode 100644 index 87d08de..0000000 --- a/.github/workflows/build.yml +++ /dev/null @@ -1,37 +0,0 @@ -name: Build and Release - -on: - push: - tags: - - 'v*.*.*' # Trigger only on version tags - pull_request: - tags: - - 'v*.*.*' # Trigger only on version tags - -jobs: - build: - runs-on: ubuntu-latest - - steps: - - name: Checkout code - uses: actions/checkout@v2 - - - name: Configure CMake - run: cmake -S . -B build - - - name: Build with CMake - run: cmake --build build --config Release - - - name: Create zip archive of binaries - run: | - mkdir build_output - cp build/ultra build_output/ - zip -r binaries_${{ github.ref_name }}.zip build_output/ - shell: bash - - - name: Upload binaries to GitHub Release - uses: softprops/action-gh-release@v1 - with: - files: binaries_${{ github.ref_name }}.zip - env: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml new file mode 100644 index 0000000..4888aab --- /dev/null +++ b/.github/workflows/release.yml @@ -0,0 +1,118 @@ +name: Build and Release + +on: + push: + tags: + - 'v*' # Triggers the workflow when a tag starting with 'v' is pushed + +jobs: + build: + runs-on: ${{ matrix.os }} + strategy: + matrix: + os: [ubuntu-latest, macos-latest] # build for ubuntu and mac + arch: [x64, arm64] # x64 and arm + steps: + - name: Checkout code + uses: actions/checkout@v3 + + - name: Set up build environment + run: | + if [ ${{ matrix.os }} == 'ubuntu-latest' ]; then + sudo apt-get update + sudo apt-get install -y autoconf gcc make + elif [ ${{ matrix.os }} == 'macos-latest' ]; then + brew install autoconf gcc make + fi + + - name: Build + run: | + make + mkdir -p installation/usr/local/bin + cd src && make install DESTDIR=../installation + + - name: Extract version from github.ref + id: extract_version + run: echo "::set-output name=version::$(echo ${GITHUB_REF#refs/tags/})" + + - name: Create tar.gz archive + run: | + cd installation/usr/local/bin + tar -czvf ../../../HELLO-${{ steps.extract_version.outputs.version }}.${{ matrix.os }}.${{ matrix.arch }}.tar.gz hello + + - name: Archive build artifacts + uses: actions/upload-artifact@v4 + with: + name: HELLO-${{ matrix.os }}-${{ matrix.arch }} + path: installation/HELLO-${{ steps.extract_version.outputs.version }}.${{ matrix.os }}.${{ matrix.arch }}.tar.gz + if-no-files-found: error + + release: + needs: build + runs-on: ${{ matrix.os }} + strategy: + matrix: + os: [ubuntu-latest, macos-latest] + arch: [x64, arm64] + steps: + - uses: actions/checkout@v3 + + - name: Check if release exists + id: check_release + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + tag: ${{ github.ref_name }} + run: | + if gh release view "$tag" --repo="$GITHUB_REPOSITORY" > /dev/null 2>&1; then + echo "release_exists=true" >> $GITHUB_ENV + else + echo "release_exists=false" >> $GITHUB_ENV + fi + + - name: Create release + if: env.release_exists == 'false' + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + tag: ${{ github.ref_name }} + run: | + gh release create "$tag" \ + --repo="$GITHUB_REPOSITORY" \ + --title="${GITHUB_REPOSITORY#*/} ${tag#v}" \ + --generate-notes + + - name: Download build artifacts + uses: actions/download-artifact@v4 + with: + pattern: HELLO-${{ matrix.os }}-${{ matrix.arch }} + path: ./artifacts + + - name: List files in artifacts directory + run: | + echo "Listing files in ./artifacts:" + ls -l ./artifacts + + - name: Upload Release Assets + uses: softprops/action-gh-release@v2 + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + with: + files: ./artifacts/HELLO-${{ matrix.os }}-${{ matrix.arch }}/* + tag_name: ${{ steps.extract_version.outputs.version }} + name: Release ${{ steps.extract_version.outputs.version }} + prerelease: false + draft: false + token: ${{ secrets.GITHUB_TOKEN }} + + - name: Set up Python + uses: actions/setup-python@v4 + with: + python-version: '3.x' # Specify the version of Python to use + + - name: Install dependencies + run: python -m pip install --upgrade pip && pip install requests + + - name: Run Python script + run: | + python .github/workflows/update_release_table.py ${{ github.ref_name }} + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} \ No newline at end of file diff --git a/.github/workflows/update_release_table.py b/.github/workflows/update_release_table.py new file mode 100644 index 0000000..016b488 --- /dev/null +++ b/.github/workflows/update_release_table.py @@ -0,0 +1,131 @@ +import requests +import argparse +import os +import re + +# GitHub repository details +# adjust REPO to match your setup +REPO = "TravisWheelerLab/ULTRA" +API_URL = f"https://api.github.com/repos/{REPO}/releases" + +def get_releases_data(): + headers = { + 'Authorization': f'token {os.getenv("GITHUB_TOKEN")}', + 'Accept': 'application/vnd.github.v3+json', + } + response = requests.get(API_URL, headers=headers) + response.raise_for_status() + return response.json() + +def find_release_by_version(releases, version): + for release in releases: + if release["tag_name"] == version: + return release + return None + +def extract_os_arch_from_filename(filename): + pattern = r'[.-]([a-zA-Z]+-[a-zA-Z]+)\.([a-zA-Z0-9_]+)\.tar\.gz' + + match = re.search(pattern, filename) + + if match: + os = match.group(1) + arch = match.group(2) + + if os == "macos-latest": + os = "MacOS" + elif os == "ubuntu-latest": + os = "Ubuntu" + elif os == "windows-latest": + os = "Windows" + + if arch == "x64": + arch = "Intel/AMD 64-bit" + elif arch == "386": + arch = "Intel/AMD 32-bit" + elif arch == "arm64": + if os == "MacOS": + arch = "M1/M2/M3 (ARM 64-bit)" + else: + arch = "ARM 64-bit" + elif arch == "arm": + arch = "ARM 32-bit" + + return os, arch + else: + return None, None + +def generate_markdown_table(release): + table = "### Release Assets\n" + table += "| OS | Architecture | Link |\n" + table += "|---------|----------|-------------|\n" + + for asset in release["assets"]: + if not asset["name"].endswith(".md5"): + # parse + os, arch = extract_os_arch_from_filename(asset["name"]) + download_url = asset["browser_download_url"] + table += f"| {os} | {arch} | [Download]({download_url}) |\n" + + # Add note about Mac binary signing restriction + table += ( + "\nTo address the Mac binary signing restriction, use the following command: " + "`sudo xattr -dr com.apple.quarantine /my-binary-amd64`\n" + ) + + return table + +def update_release_body(release_id, new_body): + headers = { + 'Authorization': f'token {os.getenv("GITHUB_TOKEN")}', + 'Accept': 'application/vnd.github.v3+json', + } + update_url = f"https://api.github.com/repos/{REPO}/releases/{release_id}" + data = { + "body": new_body + } + + response = requests.patch(update_url, headers=headers, json=data) + response.raise_for_status() + +""" +def update_readme(table): + with open("README.md", "r") as file: + lines = file.readlines() + + with open("README.md", "w") as file: + inside_table = False + for line in lines: + if line.startswith("| Version"): + inside_table = True + file.write(table) + continue + if inside_table and line.startswith("|"): + continue + file.write(line) +""" + +def main(target_version): + releases = get_releases_data() + release_to_update = find_release_by_version(releases, target_version) + + if release_to_update: + release_id = release_to_update["id"] + current_body = release_to_update.get("body", "") + + markdown_table = generate_markdown_table(release_to_update) + print(markdown_table) + + if "### Release Assets\n" not in current_body: + updated_body = current_body + "\n\n" + markdown_table + update_release_body(release_id, updated_body) + print(f"Release {target_version} updated successfully.") + else: + print(f"Release with version {target_version} not found.") + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description="Update a specific release in GitHub.") + parser.add_argument("version", help="The tag name of the release to update") + args = parser.parse_args() + + main(args.version) diff --git a/README.md b/README.md index 00799c6..d06ad29 100644 --- a/README.md +++ b/README.md @@ -2,7 +2,7 @@ **U**LTRA **L**ocates **T**andemly **R**epetitive **A**reas ## About -ULTRA is a tool for finding and annotating tandem repeats within genomic sequence. Model details and evaluation can be found in our release paper, [ULTRA-Effective Labeling of Tandem Repeats in Genomic Sequence](https://www.biorxiv.org/content/10.1101/2024.06.03.597269v1) +ULTRA is a tool for finding and annotating tandem repeats within genomic sequence. Model details and evaluation can be found in our release paper, [ULTRA-Effective Labeling of Tandem Repeats in Genomic Sequence](https://academic.oup.com/bioinformaticsadvances/article/4/1/vbae149/7816237) ## Building ULTRA requires a compiler supporting C++11 or higher and CMake 3.12 or higher. To download and build ULTRA run the following commands: @@ -162,15 +162,18 @@ Here `examples/tune_file` tests different repeat periods (`-p `), ## Citing ``` -@article {Olson2024ultra, - author = {Olson, Daniel R. and Wheeler, Travis J.}, - title = {ULTRA-Effective Labeling of Repetitive Genomic Sequence}, - elocation-id = {2024.06.03.597269}, - year = {2024}, - doi = {10.1101/2024.06.03.597269}, - publisher = {Cold Spring Harbor Laboratory}, - URL = {https://www.biorxiv.org/content/early/2024/06/04/2024.06.03.597269}, - eprint = {https://www.biorxiv.org/content/early/2024/06/04/2024.06.03.597269.full.pdf}, - journal = {bioRxiv} +@article{10.1093/bioadv/vbae149, + author = {Olson, Daniel R and Wheeler, Travis J}, + title = {ULTRA-effective labeling of tandem repeats in genomic sequence}, + journal = {Bioinformatics Advances}, + volume = {4}, + number = {1}, + pages = {vbae149}, + year = {2024}, + month = {10}, + issn = {2635-0041}, + doi = {10.1093/bioadv/vbae149}, + url = {https://doi.org/10.1093/bioadv/vbae149}, + eprint = {https://academic.oup.com/bioinformaticsadvances/article-pdf/4/1/vbae149/60779841/vbae149.pdf}, } ```