From 537beb3dc5d20ebf795ddc14f3a294ac13a870b3 Mon Sep 17 00:00:00 2001
From: Terry Kong <terryk@nvidia.com>
Date: Fri, 13 Dec 2024 12:42:19 -0800
Subject: [PATCH 1/8] docs: fix typo PRETRAINED_ACTOR_NEMO_FILE ->
 ACTOR_NEMO_FILE (#449)

Signed-off-by: Terry Kong <terryk@nvidia.com>
---
 docs/user-guide/reinforce.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/user-guide/reinforce.rst b/docs/user-guide/reinforce.rst
index cc3005db1..b39fc7204 100644
--- a/docs/user-guide/reinforce.rst
+++ b/docs/user-guide/reinforce.rst
@@ -58,7 +58,7 @@ The REINFORCE Actor training job contains the master controller that makes the H
    TRAIN_DATA_PATH="/path/to/train_prompts.jsonl"
    VALID_DATA_PATH="/path/to/test_prompts.jsonl"
 
-   PRETRAINED_ACTOR_NEMO_FILE="/path/to/sft_checkpoint.nemo"
+   ACTOR_NEMO_FILE="/path/to/sft_checkpoint.nemo"
    RESULTS_DIR="/path/to/actor_results_dir"
 
    USE_FLASK=False

From 4ee496cd7dc8a26810dedff05df3b1006704c359 Mon Sep 17 00:00:00 2001
From: Anna Shors <ashors@nvidia.com>
Date: Mon, 16 Dec 2024 17:53:43 -0800
Subject: [PATCH 2/8] docs: fix minor typo (#452)

Signed-off-by: ashors1 <ashors@nvidia.com>
---
 docs/user-guide/dpo.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/user-guide/dpo.rst b/docs/user-guide/dpo.rst
index fa75941d0..2ce17f34c 100644
--- a/docs/user-guide/dpo.rst
+++ b/docs/user-guide/dpo.rst
@@ -46,7 +46,7 @@ To start, we must first get a pretrained model to align. There are two models we
                   --in-folder ./model_checkpoint \
                   --out-file ./mcore_gpt.nemo
 
-    .. tab-item:: LLaMa3 7B
+    .. tab-item:: LLaMa3 8B
         :sync: key2
 
         #. Download the `Llama 3 8B LLM model and tokenizer <https://huggingface.co/meta-llama/Meta-Llama-3-8B>`__ into the models folder.

From b9844bd46909f32814532d5e85b7bc68388e6ac0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?oliver=20k=C3=B6nig?= <okoenig@nvidia.com>
Date: Wed, 18 Dec 2024 00:20:43 +0100
Subject: [PATCH 3/8] ci: Bump release worflow (#445)

Signed-off-by: Oliver Koenig <okoenig@nvidia.com>
---
 .github/workflows/release-freeze.yml | 17 +++++++++++------
 .github/workflows/release.yaml       |  6 +++---
 2 files changed, 14 insertions(+), 9 deletions(-)

diff --git a/.github/workflows/release-freeze.yml b/.github/workflows/release-freeze.yml
index 2513928c9..10ae3386e 100644
--- a/.github/workflows/release-freeze.yml
+++ b/.github/workflows/release-freeze.yml
@@ -3,20 +3,25 @@ name: "Code freeze"
 on:
   workflow_dispatch:
     inputs:
-      type_of_release:
+      release-type:
         type: choice
         description: Type of release
         options: 
         - major
         - minor
-
+      freeze-commit:
+        type: string
+        description: Commit SHA to use for cut-off
+        required: false
+        default: main
 jobs:
   code-freeze:
-    uses: NVIDIA/NeMo-FW-CI-templates/.github/workflows/_code_freeze.yml@v0.8.0
+    uses: NVIDIA/NeMo-FW-CI-templates/.github/workflows/_code_freeze.yml@v0.17.3
     with:
-      name_of_library: NeMo-Aligner
-      type_of_release: ${{ inputs.type_of_release }}
-      python_package: nemo_aligner
+      library-name: NeMo-Aligner
+      python-package: nemo_aligner
+      release-type: ${{ inputs.release-type }}
+      freeze-commit: ${{ inputs.freeze-commit }}
     secrets:
       SLACK_RELEASE_ENDPOINT: ${{ secrets.SLACK_RELEASE_ENDPOINT }}
       SLACK_WEBHOOK_ADMIN: ${{ secrets.SLACK_WEBHOOK_ADMIN }}
diff --git a/.github/workflows/release.yaml b/.github/workflows/release.yaml
index 6991a5cfb..d9d1995b1 100644
--- a/.github/workflows/release.yaml
+++ b/.github/workflows/release.yaml
@@ -25,10 +25,10 @@ on:
         required: true
         default: true
         type: boolean
-    
+          
 jobs:
   release:
-    uses: NVIDIA/NeMo-FW-CI-templates/.github/workflows/_release_library.yml@v0.15.0
+    uses: NVIDIA/NeMo-FW-CI-templates/.github/workflows/_release_library.yml@v0.17.3
     with:
       release-ref: ${{ inputs.release-ref }}
       image-name: nemo_aligner_container
@@ -46,5 +46,5 @@ jobs:
       TWINE_USERNAME: ${{ secrets.TWINE_USERNAME }}
       TWINE_PASSWORD: ${{ secrets.TWINE_PASSWORD }}
       SLACK_RELEASE_ENDPOINT: ${{ secrets.SLACK_RELEASE_ENDPOINT }}
-      PAT: ${{ secrets.PAT }}
       SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
+      PAT: ${{ secrets.PAT }}

From af91e522c0f446ce0d52d72c05e3205161ab9c5e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?oliver=20k=C3=B6nig?= <okoenig@nvidia.com>
Date: Wed, 18 Dec 2024 10:23:45 +0100
Subject: [PATCH 4/8] ci: Add final status check (#455)

Signed-off-by: Oliver Koenig <okoenig@nvidia.com>
---
 .github/workflows/cicd-main.yml | 63 ++++++++++++++++++++++++++++++---
 1 file changed, 59 insertions(+), 4 deletions(-)

diff --git a/.github/workflows/cicd-main.yml b/.github/workflows/cicd-main.yml
index 3f11fa876..e228c0971 100644
--- a/.github/workflows/cicd-main.yml
+++ b/.github/workflows/cicd-main.yml
@@ -41,6 +41,7 @@ jobs:
     outputs:
       test_to_run: ${{ steps.test_to_run.outputs.main }}
       all: ${{ steps.all.outputs.main }}
+      run_ci: ${{ steps.evaluate.outputs.run_ci }}
     steps:
       - name: Parse test_to_run
         id: test_to_run
@@ -51,9 +52,42 @@ jobs:
         id: all
         run: |
           echo "main=${{ contains(fromJSON(steps.test_to_run.outputs.main), 'all') }}" | tee -a "$GITHUB_OUTPUT"
-  
+
+      - name: Get changed files
+        id: changed-files
+        uses: tj-actions/changed-files@v44
+        with:
+          files_yaml: |
+            doc:
+              - '**.md'
+              - docs/**
+            src:
+              - '!**.md'
+              - '!docs/**'
+
+      - name: Evaluate conditions
+        id: evaluate
+        env:
+          DOCS_ONLY: ${{ steps.changed-files.outputs.doc_any_changed == 'true' && steps.changed-files.outputs.src_any_changed == 'false' }}
+          CHANGED_DOCS: ${{ steps.changed-files.outputs.doc_all_changed_files }}
+          CHANGED_SRC: ${{ steps.changed-files.outputs.src_all_changed_files }}
+          LABEL: ${{ github.event.label.name == 'Run CICD' }}
+          MAIN_BRANCH: ${{ github.ref == 'refs/heads/main' }}          
+        run: |
+          # Some output that's helpful for debugging
+          echo "Docs changed: $CHANGED_DOCS"
+          echo "Src changed: $CHANGED_SRC"
+          
+          echo "docs_only: $DOCS_ONLY"
+          echo "label: $LABEL"
+          echo "main_branch: $MAIN_BRANCH"
+          
+          # Run CI only (on main or if label is attached) and if it's not only docs
+          echo run_ci=$([[ ("$LABEL" = "true" || "$MAIN_BRANCH" = "true") && "$DOCS_ONLY" = "false" ]] && echo "true" || echo "false") | tee -a "$GITHUB_OUTPUT"
+
   build-container:
-    if: ${{ github.event.label.name == 'Run CICD' || github.ref == 'refs/heads/main' }}
+    if: ${{ needs.pre-flight.outputs.run_ci == 'true' }}
+    needs: [pre-flight]
     uses: NVIDIA/NeMo-FW-CI-templates/.github/workflows/_build_container.yml@v0.1.0
     with:
       image-name: nemo_aligner_container
@@ -67,7 +101,7 @@ jobs:
     name: ${{ matrix.test_case }}
     needs: [build-container, pre-flight]
     uses: ./.github/workflows/_run_test.yml
-    if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'unit') || needs.pre-flight.outputs.all == 'true'
+    if: ${{ needs.pre-flight.outputs.run_ci == 'true' }}
     strategy:
       matrix:
         test_case:
@@ -85,7 +119,7 @@ jobs:
     name: ${{ matrix.test_case }}
     needs: [build-container, pre-flight]
     uses: ./.github/workflows/_run_test.yml
-    if: contains(fromJSON(needs.pre-flight.outputs.test_to_run), 'functional') || needs.pre-flight.outputs.all == 'true'
+    if: ${{ needs.pre-flight.outputs.run_ci == 'true' }}
     strategy:
       matrix:
         test_case:
@@ -102,3 +136,24 @@ jobs:
       TIMEOUT: 8
       SCRIPT: |
         bash /opt/NeMo-Aligner/tests/functional/test_cases/${{ matrix.test_case }}
+
+  CI_QA_Gate:
+    name: CI quality check
+    if: always()
+    runs-on: ubuntu-latest
+    needs: 
+      - Unit_Tests
+      - Functional_Tests
+    steps:
+      - name: main
+        env:
+          JOB_RESULTS: ${{ toJSON(needs) }}
+          ALL_SUCCESS: ${{ !contains(needs.*.result, 'failure') && !contains(needs.*.result, 'cancelled') && !contains(needs.*.result, 'skipped') }}
+          CI_SKIP: ${{ github.event.label.name == 'Skip CICD' }}
+        run: |
+         
+          SUMMARY=$(echo $JOB_RESULTS | jq 'to_entries[] | .key + ": " + .value.result' | tr -d '"')
+          echo '🤖: CICD Result' >> $GITHUB_STEP_SUMMARY
+          echo "$SUMMARY" >> $GITHUB_STEP_SUMMARY
+          
+          test "$ALL_SUCCESS" = "true" || test "$CI_SKIP" = "true"
\ No newline at end of file

From 9be1c3715e73d4c46040e6cc76914bfd1aca9028 Mon Sep 17 00:00:00 2001
From: Anna Shors <ashors@nvidia.com>
Date: Wed, 18 Dec 2024 16:02:43 -0600
Subject: [PATCH 5/8] docs: add llama download command (#460)

Co-authored-by: Terry Kong <terryk@nvidia.com>
---
 docs/user-guide/dpo.rst | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/docs/user-guide/dpo.rst b/docs/user-guide/dpo.rst
index 2ce17f34c..d227df6f5 100644
--- a/docs/user-guide/dpo.rst
+++ b/docs/user-guide/dpo.rst
@@ -49,7 +49,11 @@ To start, we must first get a pretrained model to align. There are two models we
     .. tab-item:: LLaMa3 8B
         :sync: key2
 
-        #. Download the `Llama 3 8B LLM model and tokenizer <https://huggingface.co/meta-llama/Meta-Llama-3-8B>`__ into the models folder.
+        #. Download the `Llama 3 8B LLM model and tokenizer <https://huggingface.co/meta-llama/Meta-Llama-3-8B>`__ into the models folder. You can use the Hugging Face CLI for this:
+            .. code-block:: bash
+
+               huggingface-cli download meta-llama/Meta-Llama-3-8B --local-dir /path/to/llama
+
         #. Convert the LLaMa3 LLM into ``.nemo`` format.
             .. code-block:: bash 
 

From 1c6e5018da4d39c8f42ee4c88ea6ede60a3f634e Mon Sep 17 00:00:00 2001
From: Terry Kong <terryk@nvidia.com>
Date: Wed, 18 Dec 2024 15:37:04 -0800
Subject: [PATCH 6/8] docs: fix reinforce and ppo to specify mpirun requirement
 (#462)

Signed-off-by: Terry Kong <terryk@nvidia.com>
---
 docs/user-guide/reinforce.rst | 16 ++++++++--------
 docs/user-guide/rlhf.rst      | 24 ++++++++++++++++++++++++
 2 files changed, 32 insertions(+), 8 deletions(-)

diff --git a/docs/user-guide/reinforce.rst b/docs/user-guide/reinforce.rst
index b39fc7204..1e1651668 100644
--- a/docs/user-guide/reinforce.rst
+++ b/docs/user-guide/reinforce.rst
@@ -3,14 +3,14 @@
 .. _model-aligner-reinforce:
 
 Model Alignment by REINFORCE
-@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+@@@@@@@@@@@@@@@@@@@@@@@@@@@@
 
 In this tutorial, we will guide you through the process of aligning a NeMo Framework model using REINFORCE. This method can be applied to various models, including LLaMa2 and Mistral, with our scripts functioning consistently across different models.
 
 REINFORCE is usually preceded by a Supervised Fine-Tuning (SFT). We should first follow the :ref:`Prerequisite guide <prerequisite>` and the :ref:`SFT guide <sft>`. After obtaining the SFT model, we will also need to train a reward model as in :ref:`PPO guide <ppo>`. We will use the REINFORCE algorithm on the `Anthropic-HH-RLHF <https://huggingface.co/datasets/Anthropic/hh-rlhf>`__ dataset.
 
 REINFORCE Training
-############
+##################
 
 After you have fine-tuned a GPT model using Supervised Fine-Tuning (SFT), and trained a reward model as explained in the preceding section, you can start aligning the policy using REINFORCE.
 
@@ -48,7 +48,7 @@ To launch the server:
 The above example launches the reward model server on eight GPUs and one node. Make sure to change trainer.devices, trainer.num_nodes depending on your model size and scale. Aligner will work on any scale. Also, make sure to tune the trainer.reinforce.inference_micro_batch_size argument. This argument sets the size of the batch the REINFORCE actor is allowed to send to the reward per DP rank.
 
 Launch the Initial Policy and REINFORCE Actor Training
-%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 
 The REINFORCE Actor training job contains the master controller that makes the HTTP calls to all servers when needed. To launch the REINFORCE Actor and Initial Policy server:
 
@@ -73,7 +73,7 @@ The REINFORCE Actor training job contains the master controller that makes the H
    cd ${GPFS}
    export PYTHONPATH="${GPFS}:${PYTHONPATH}" \
    && export HYDRA_FULL_ERROR=1 \
-   && python -u examples/nlp/gpt/train_gpt_reinforce_actor.py \
+   && mpirun -n 8 --allow-run-as-root python -u examples/nlp/gpt/train_gpt_reinforce_actor.py \
       "model.data.data_prefix={train: [${TRAIN_DATA_PATH}], validation: [${VALID_DATA_PATH}], test: [${VALID_DATA_PATH}]}" \
       pretrained_checkpoint.restore_from_path=\"${ACTOR_NEMO_FILE}\" \
       exp_manager.checkpoint_callback_params.save_top_k=1 \
@@ -114,7 +114,7 @@ The REINFORCE Actor training job contains the master controller that makes the H
 The above command launches the initial and actor server on one node with eight GPUs.
 
 Launching Both Servers for REINFORCE training
-%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 
 You can use slurm to launch the two jobs and get them to coordinate together in a full REINFORCE job through the following:
 
@@ -239,7 +239,7 @@ You can use slurm to launch the two jobs and get them to coordinate together in
       trainer.reinforce.rollout_batch_seq_length=4096
    EOF
 
-   srun --het-group=1 -o $PPO_OUTFILE -e $PPO_ERRFILE --container-image=${CONTAINER} $MOUNTS bash -c "${cmd_reinforce}" &
+   srun --mpi=pmix --het-group=1 -o $PPO_OUTFILE -e $PPO_ERRFILE --container-image=${CONTAINER} $MOUNTS bash -c "${cmd_reinforce}" &
 
    wait
 
@@ -251,6 +251,6 @@ It is important to launch all jobs with ``&`` after the srun command to ensure t
    Make sure to change the reward model arg ``trainer.reinforce.inference_micro_batch_size`` such that ``trainer.reinforce.inference_micro_batch_size * DP size <= model.reinforce.rollout_micro_batch_size``.
 
 REINFORCE Results
-%%%%%%%%%%%%%%%%%%%%%%%%%%
+%%%%%%%%%%%%%%%%%
 
-After you've completed reinforce training, you can serve your model using the `megatron_gpt_eval.py <https://github.com/NVIDIA/NeMo/blob/8cd5f1c8e7d4fed9f4f946028cd02047c5d2296f/examples/nlp/language_modeling/megatron_gpt_eval.py#L4>`__ script from the NeMo codebase to run more rigorous evaluation of your trained model.
\ No newline at end of file
+After you've completed reinforce training, you can serve your model using the `megatron_gpt_eval.py <https://github.com/NVIDIA/NeMo/blob/8cd5f1c8e7d4fed9f4f946028cd02047c5d2296f/examples/nlp/language_modeling/megatron_gpt_eval.py#L4>`__ script from the NeMo codebase to run more rigorous evaluation of your trained model.
diff --git a/docs/user-guide/rlhf.rst b/docs/user-guide/rlhf.rst
index 5c68edb60..3e98e7fe2 100644
--- a/docs/user-guide/rlhf.rst
+++ b/docs/user-guide/rlhf.rst
@@ -383,6 +383,30 @@ NeMo-Aligner has support for accelerating RLHF with `TensorRT-LLM <https://githu
 
 For more information please see the NeMo-Aligner `paper <https://arxiv.org/abs/2405.01481>`__.
 
+.. note::
+    If you are running ``train_gpt_ppo_actor.py`` interactively (outside of SLURM) with TensorRT-LLM acceleration,
+    you must prepend ``mpirun -n 8 --allow-run-as-root`` to the python run command:
+
+    .. code-block:: bash
+
+        mpirun -n 8 --allow-run-as-root python -u ${GPFS}/examples/nlp/gpt/train_gpt_ppo_actor.py ...
+
+    If you are using SLURM, you do not need to prepend ``mpirun`` since this will be handled automatically
+    if you run ``srun`` with ``--mpi=pmix``:
+
+    .. code-block:: bash
+
+        read -r -d '' cmd_ppo <<EOF
+        cd ${GPFS} \
+        && export PYTHONPATH="${GPFS}:${PYTHONPATH}" \
+        && export HYDRA_FULL_ERROR=1 \
+        && python -u ${GPFS}/examples/nlp/gpt/train_gpt_ppo_actor.py \
+            ...
+        EOF
+
+        srun --mpi=pmix ... bash -c "${cmd_ppo}"
+
+
 PPO Results with TensorRT-LLM
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 

From 92233b07c31a7bf2afd8ae29de34e77977506398 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?oliver=20k=C3=B6nig?= <okoenig@nvidia.com>
Date: Thu, 19 Dec 2024 22:04:06 +0100
Subject: [PATCH 7/8] ci: Use `github.sha` for build (#463)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: oliver könig <okoenig@nvidia.com>
---
 .github/workflows/cicd-main.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/cicd-main.yml b/.github/workflows/cicd-main.yml
index e228c0971..6f2ee3775 100644
--- a/.github/workflows/cicd-main.yml
+++ b/.github/workflows/cicd-main.yml
@@ -95,7 +95,7 @@ jobs:
       image-label: nemo-aligner
       build-args: |
         MAX_JOBS=32
-        ALIGNER_COMMIT=${{ github.event.pull_request.head.sha || github.sha }}
+        ALIGNER_COMMIT=${{ github.sha }}
   
   Unit_Tests:
     name: ${{ matrix.test_case }}

From 7f33afc32bb0fcaae8d449a85b253238ec041850 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?oliver=20k=C3=B6nig?= <okoenig@nvidia.com>
Date: Fri, 20 Dec 2024 00:16:00 +0100
Subject: [PATCH 8/8] ci: Run on `main` (#464)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: oliver könig <okoenig@nvidia.com>
---
 .github/workflows/cicd-main.yml | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/.github/workflows/cicd-main.yml b/.github/workflows/cicd-main.yml
index 6f2ee3775..781d3b482 100644
--- a/.github/workflows/cicd-main.yml
+++ b/.github/workflows/cicd-main.yml
@@ -55,6 +55,7 @@ jobs:
 
       - name: Get changed files
         id: changed-files
+        if: github.event_name == 'pull_request'
         uses: tj-actions/changed-files@v44
         with:
           files_yaml: |
@@ -71,19 +72,19 @@ jobs:
           DOCS_ONLY: ${{ steps.changed-files.outputs.doc_any_changed == 'true' && steps.changed-files.outputs.src_any_changed == 'false' }}
           CHANGED_DOCS: ${{ steps.changed-files.outputs.doc_all_changed_files }}
           CHANGED_SRC: ${{ steps.changed-files.outputs.src_all_changed_files }}
+          IS_PULLREQUEST: ${{ github.event_name == 'pull_request' }}
           LABEL: ${{ github.event.label.name == 'Run CICD' }}
-          MAIN_BRANCH: ${{ github.ref == 'refs/heads/main' }}          
         run: |
           # Some output that's helpful for debugging
           echo "Docs changed: $CHANGED_DOCS"
           echo "Src changed: $CHANGED_SRC"
           
-          echo "docs_only: $DOCS_ONLY"
-          echo "label: $LABEL"
-          echo "main_branch: $MAIN_BRANCH"
+          echo "DOCS_ONLY: $DOCS_ONLY"
+          echo "LABEL: $LABEL"
+          echo "IS_PULLREQUEST: $IS_PULLREQUEST"
           
           # Run CI only (on main or if label is attached) and if it's not only docs
-          echo run_ci=$([[ ("$LABEL" = "true" || "$MAIN_BRANCH" = "true") && "$DOCS_ONLY" = "false" ]] && echo "true" || echo "false") | tee -a "$GITHUB_OUTPUT"
+          echo run_ci=$([[ ("$LABEL" = "true" || "$IS_PULLREQUEST" = "false") && "$DOCS_ONLY" = "false" ]] && echo "true" || echo "false") | tee -a "$GITHUB_OUTPUT"
 
   build-container:
     if: ${{ needs.pre-flight.outputs.run_ci == 'true' }}