Spaces:
Runtime error
Runtime error
| # Copyright (c) 2025, NVIDIA CORPORATION. | |
| # | |
| # Licensed under the Apache License, Version 2.0 (the "License"); | |
| # you may not use this file except in compliance with the License. | |
| # You may obtain a copy of the License at | |
| # | |
| # http://www.apache.org/licenses/LICENSE-2.0 | |
| # | |
| # Unless required by applicable law or agreed to in writing, software | |
| # distributed under the License is distributed on an "AS IS" BASIS, | |
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
| # See the License for the specific language governing permissions and | |
| # limitations under the License. | |
| name: CICD NeMo | |
| on: | |
| schedule: | |
| - cron: 0 0 * * * | |
| pull_request: | |
| branches: | |
| - main | |
| - r** | |
| - weekly-bump* | |
| types: [labeled] | |
| workflow_dispatch: | |
| inputs: | |
| test_to_run: | |
| required: false | |
| default: all | |
| type: string | |
| description: Comma-separated list of tests to run. Use "all" to run the full test suite. | |
| concurrency: | |
| # group: ${{ github.workflow }}-${{ github.ref == 'refs/heads/main' && github.run_id || github.event.pull_request.number || github.ref }}-${{ github.event_name }} | |
| group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}-${{ github.event.label.name || 'main' }}-${{ github.event_name }} | |
| cancel-in-progress: true | |
| jobs: | |
| pre-flight: | |
| runs-on: ubuntu-latest | |
| outputs: | |
| test_to_run: ${{ steps.test_to_run.outputs.main }} | |
| is_ci_workload: ${{ steps.is_ci_workload.outputs.main }} | |
| no_fail_fast: ${{ steps.no_fail_fast.outputs.main }} | |
| components_to_run: ${{ steps.components_to_run.outputs.main }} | |
| env: | |
| TESTS_TO_RUN: ${{ inputs.test_to_run }} | |
| EVENT_NAME: ${{ github.event_name }} | |
| HAS_LABEL: ${{ github.event.label.name == 'Run CICD' }} | |
| steps: | |
| - name: Checkout branch | |
| uses: actions/checkout@v4 | |
| with: | |
| fetch-depth: 0 | |
| - name: Select components to run | |
| id: components_to_run | |
| run: | | |
| pip install -U pip | |
| pip install git-python | |
| if [[ "$EVENT_NAME" == "pull_request" ]]; then | |
| python .github/scripts/components_to_run.py --source-sha ${{ github.event.pull_request.head.sha }} --target-sha ${{ github.event.pull_request.base.sha }} | |
| else | |
| echo '["nemo2", "export-deploy", "speech"]' | tee -a test_modules.json | |
| fi | |
| components_to_run=$(cat test_modules.json) | |
| echo "main=${components_to_run}" | tee -a "$GITHUB_OUTPUT" | |
| - name: Select tests to run | |
| id: test_to_run | |
| run: | | |
| # For manual dispatch, we replace `all` with the actual job names | |
| if [[ "$EVENT_NAME" == "workflow_dispatch" ]]; then | |
| TESTS_TO_RUN=$TESTS_TO_RUN | |
| # For correctly labeled PR, we replace `all` with the actual job names | |
| elif [[ "$EVENT_NAME" == "pull_request" && "$HAS_LABEL" == "true" ]]; then | |
| TESTS_TO_RUN=all | |
| # For incorrectly labeled PR, run no tests | |
| elif [[ "$EVENT_NAME" == "pull_request" && "$HAS_LABEL" != "true" ]]; then | |
| TESTS_TO_RUN="" | |
| # For push events, run all tests. This is so that we can generate coverage | |
| # on branch `main`. | |
| elif [[ "$EVENT_NAME" == "push" || "$EVENT_NAME" == "schedule" ]]; then | |
| TESTS_TO_RUN=all | |
| else | |
| echo "Unsupported event_name $EVENT_NAME provided". | |
| exit 1 | |
| fi | |
| parsed_string=$(echo "$TESTS_TO_RUN" | jq -c --raw-input 'split(",")') | |
| echo "main=${parsed_string}" | tee -a "$GITHUB_OUTPUT" | |
| - name: Check if this is a CI workload | |
| shell: bash | |
| id: is_ci_workload | |
| run: | | |
| branch_name=${GITHUB_HEAD_REF:-${GITHUB_REF#refs/heads/}} | |
| if [[ "$branch_name" =~ ^bump-ci-container || "$EVENT_NAME" == "schedule" ]]; then | |
| is_ci_workload=true | |
| echo "main=true" | tee -a "$GITHUB_OUTPUT" | |
| else | |
| is_ci_workload=false | |
| fi | |
| echo "main=$is_ci_workload" | tee -a "$GITHUB_OUTPUT" | |
| - name: Check if no-fail-fast is set | |
| shell: bash | |
| id: no_fail_fast | |
| env: | |
| HAS_FAIL_FAST_LABEL: ${{ contains(github.event.pull_request.labels.*.name, 'no-fail-fast') }} | |
| run: | | |
| if [[ "$HAS_FAIL_FAST_LABEL" == "true" || "$EVENT_NAME" == "schedule" ]]; then | |
| no_fail_fast=true | |
| else | |
| no_fail_fast=false | |
| fi | |
| echo "main=$no_fail_fast" | tee -a "$GITHUB_OUTPUT" | |
| code-linting: | |
| if: needs.pre-flight.outputs.test_to_run != '[]' | |
| needs: [pre-flight] | |
| uses: ./.github/workflows/code-linting.yml | |
| cicd-wait-in-queue: | |
| needs: [pre-flight, code-linting] | |
| runs-on: ubuntu-latest | |
| environment: test | |
| if: | | |
| needs.pre-flight.outputs.test_to_run != '[]' | |
| && needs.pre-flight.outputs.components_to_run != '[]' | |
| && needs.pre-flight.outputs.is_ci_workload == 'false' | |
| steps: | |
| - name: Running CI tests | |
| run: | | |
| echo "Running CI tests" | |
| cicd-test-container-build: | |
| uses: ./.github/workflows/_build_container.yml | |
| needs: [pre-flight, code-linting, cicd-wait-in-queue] | |
| if: | | |
| needs.pre-flight.outputs.test_to_run != '[]' | |
| && needs.pre-flight.outputs.components_to_run != '[]' | |
| && ( | |
| success() | |
| || ( | |
| needs.cicd-wait-in-queue.result == 'skipped' | |
| && needs.pre-flight.outputs.is_ci_workload == 'true' | |
| ) | |
| ) | |
| && !cancelled() | |
| with: | |
| image-name: nemo_container | |
| dockerfile: docker/Dockerfile.ci | |
| cicd-import-tests: | |
| if: | | |
| needs.pre-flight.outputs.test_to_run != '[]' | |
| && needs.pre-flight.outputs.components_to_run != '[]' | |
| && ( | |
| success() | |
| || ( | |
| needs.cicd-wait-in-queue.result == 'skipped' | |
| && needs.pre-flight.outputs.is_ci_workload == 'true' | |
| ) | |
| ) | |
| && !cancelled() | |
| needs: [cicd-test-container-build, pre-flight] | |
| runs-on: self-hosted-azure-gpus-1 | |
| steps: | |
| - name: Create UUID | |
| id: uuid | |
| run: | | |
| echo "id=$(uuidgen)" >> "$GITHUB_OUTPUT" | |
| - name: Checkout NeMo | |
| uses: actions/checkout@v4 | |
| with: | |
| path: ${{ github.run_id }}/${{steps.uuid.outputs.id }}/NeMo | |
| - name: Run some checks | |
| run: | | |
| docker run \ | |
| --rm \ | |
| --device=/dev/nvidia0 \ | |
| --gpus all \ | |
| --shm-size=8g \ | |
| --volume $(pwd)/${{ github.run_id }}/${{steps.uuid.outputs.id }}/NeMo:/workspace \ | |
| --env TRANSFORMERS_OFFLINE=0 \ | |
| --env HYDRA_FULL_ERROR=1 --env PYTHONUNBUFFERED=1 nemoci.azurecr.io/nemo_container:${{ github.run_id }} bash -c '\ | |
| # PyTorch Lightning version | |
| python -c "import lightning.pytorch; print(lightning.pytorch.__version__)" | |
| # PyTorch Lightning DDP Checks | |
| CUDA_VISIBLE_DEVICES="0,1" python "tests/core_ptl/check_for_ranks.py" | |
| # Basic Import Checks | |
| python tests/core_ptl/check_imports.py --domain asr | |
| python tests/core_ptl/check_imports.py --domain tts | |
| ' | |
| L0_Setup_Test_Data_And_Models: | |
| needs: [pre-flight, cicd-test-container-build, cicd-wait-in-queue] | |
| runs-on: self-hosted-azure | |
| if: | | |
| needs.pre-flight.outputs.test_to_run != '[]' | |
| && needs.pre-flight.outputs.components_to_run != '[]' | |
| && ( | |
| success() | |
| || ( | |
| needs.cicd-wait-in-queue.result == 'skipped' | |
| && needs.pre-flight.outputs.is_ci_workload == 'true' | |
| ) | |
| ) | |
| && !cancelled() | |
| steps: | |
| - name: Checkout | |
| uses: actions/checkout@v4 | |
| with: | |
| path: ${{ github.run_id }} | |
| - name: main | |
| uses: NVIDIA/NeMo/.github/actions/test-template@main | |
| with: | |
| runner: ${{ runner.name }} | |
| script: L0_Setup_Test_Data_And_Models | |
| tests_to_run: '["L0_Setup_Test_Data_And_Models"]' | |
| cicd-main-unit-tests: | |
| needs: [pre-flight, cicd-test-container-build] | |
| uses: ./.github/workflows/cicd-main-unit-tests.yml | |
| if: | | |
| needs.pre-flight.outputs.test_to_run != '[]' | |
| && needs.pre-flight.outputs.components_to_run != '[]' | |
| && ( | |
| success() | |
| || ( | |
| needs.cicd-wait-in-queue.result == 'skipped' | |
| && needs.pre-flight.outputs.is_ci_workload == 'true' | |
| ) | |
| ) | |
| && !cancelled() | |
| with: | |
| test_to_run: ${{ needs.pre-flight.outputs.test_to_run }} | |
| cicd-main-speech: | |
| needs: [pre-flight, cicd-test-container-build, cicd-main-unit-tests] | |
| uses: ./.github/workflows/cicd-main-speech.yml | |
| if: | | |
| ( | |
| needs.pre-flight.outputs.test_to_run != '[]' | |
| && ( | |
| contains(fromJson(needs.pre-flight.outputs.components_to_run), 'speech') | |
| ) | |
| ) | |
| && ( | |
| success() | |
| || ( | |
| needs.cicd-wait-in-queue.result == 'skipped' | |
| && needs.pre-flight.outputs.is_ci_workload == 'true' | |
| ) | |
| ) | |
| && !cancelled() | |
| with: | |
| test_to_run: ${{ needs.pre-flight.outputs.test_to_run }} | |
| cicd-main-nemo2: | |
| needs: [pre-flight, cicd-test-container-build, cicd-main-unit-tests] | |
| uses: ./.github/workflows/cicd-main-nemo2.yml | |
| if: | | |
| ( | |
| needs.pre-flight.outputs.test_to_run != '[]' | |
| && ( | |
| contains(fromJson(needs.pre-flight.outputs.components_to_run), 'nemo2') | |
| || needs.pre-flight.outputs.components_to_run == '["all"]' | |
| ) | |
| ) | |
| && ( | |
| success() | |
| || ( | |
| needs.cicd-wait-in-queue.result == 'skipped' | |
| && needs.pre-flight.outputs.is_ci_workload == 'true' | |
| ) | |
| ) | |
| && !cancelled() | |
| with: | |
| test_to_run: ${{ needs.pre-flight.outputs.test_to_run }} | |
| Nemo_CICD_Test: | |
| needs: | |
| - pre-flight | |
| - cicd-test-container-build | |
| - cicd-import-tests | |
| - L0_Setup_Test_Data_And_Models | |
| - cicd-main-unit-tests | |
| - cicd-main-nemo2 | |
| - cicd-main-speech | |
| if: always() | |
| runs-on: ubuntu-latest | |
| permissions: write-all | |
| steps: | |
| - name: Checkout | |
| uses: actions/checkout@v4 | |
| - name: Get workflow result | |
| id: result | |
| env: | |
| GH_TOKEN: ${{ github.token }} | |
| RUN_ID: ${{ github.run_id }} | |
| HAS_LABEL: ${{ github.event.label.name == 'Run CICD' }} | |
| IS_SCHEDULED: ${{ github.event_name == 'schedule' }} | |
| run: | | |
| # Get workflow run details and check job conclusions | |
| LATEST_ATTEMPT=$(gh run view $RUN_ID --json jobs -q '[.jobs[] | select(.conclusion != null) | .conclusion] | last') | |
| NUM_FAILED=$(gh run view $RUN_ID --json jobs -q '[.jobs[] | select(.conclusion == "failure") | .name] | length') | |
| NUM_CANCELLED=$(gh run view $RUN_ID --json jobs -q '[.jobs[] | select(.conclusion == "cancelled") | .name] | length') | |
| if [[ $NUM_FAILED -eq 0 && $NUM_CANCELLED -eq 0 && ("$HAS_LABEL" == "true" || "$IS_SCHEDULED" == "true") ]]; then | |
| RESULT="success" | |
| elif [[ $NUM_CANCELLED -gt 0 ]]; then | |
| RESULT="cancelled" | |
| else | |
| RESULT="failure" | |
| fi | |
| # Output the final status | |
| echo "code=$RESULT" | tee -a $GITHUB_OUTPUT | |
| - name: Checkout for GH CLI | |
| uses: actions/checkout@v4 | |
| - name: Remove label if not cancelled | |
| if: | | |
| steps.result.outputs.code != 'cancelled' | |
| && github.event.label.name == 'Run CICD' | |
| && github.event.pull_request.head.repo.full_name == github.repository | |
| env: | |
| GH_TOKEN: ${{ github.token }} | |
| PR_NUMBER: ${{ github.event.number }} | |
| run: gh pr edit "$PR_NUMBER" --remove-label "Run CICD" | |
| - name: Pipeline successful, add PR comment | |
| if: | | |
| steps.result.outputs.code == 'success' | |
| && github.event_name == 'pull_request' | |
| && env.SLACK_WEBHOOK != '' | |
| uses: peter-evans/create-or-update-comment@v4 | |
| env: | |
| SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }} | |
| REPOSITORY: ${{ github.repository }} | |
| RUN_ID: ${{ github.run_id }} | |
| with: | |
| issue-number: ${{ github.event.number }} | |
| body: | | |
| [🤖]: Hi @${{ github.event.pull_request.user.login }} 👋, | |
| We wanted to let you know that a [CICD pipeline](https://github.com/${{ env.REPOSITORY }}/actions/runs/${{ env.RUN_ID }}) for this PR just finished successfully. | |
| So it might be time to merge this PR or get some approvals. | |
| //cc @chtruong814 @ko3n1g @pablo-garay @thomasdhc | |
| - name: "Pipeline not successful and not cancelled: Send Slack alert & create step summary" | |
| if: | | |
| steps.result.outputs.code == 'failure' | |
| && github.event.label.name == 'Run CICD' | |
| && env.SLACK_WEBHOOK != '' | |
| env: | |
| SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }} | |
| GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} | |
| REPOSITORY: ${{ github.repository }} | |
| RUN_ID: ${{ github.run_id }} | |
| PR_NUMBER: ${{ github.event.number }} | |
| SERVER_URL: ${{ github.server_url }} | |
| run: | | |
| set -x | |
| pip install PyGithub | |
| export BRANCH_NAME=${GITHUB_HEAD_REF:-${GITHUB_REF#refs/heads/}} | |
| python .github/scripts/notify.py | |
| - name: Exit | |
| if: ${{ always() }} | |
| env: | |
| RESULT: ${{ steps.result.outputs.code }} | |
| run: | | |
| if [ $RESULT == "success" ]; then | |
| exit 0 | |
| else | |
| exit 1 | |
| fi | |
| Coverage: | |
| runs-on: ubuntu-latest | |
| needs: [pre-flight, Nemo_CICD_Test] | |
| if: | | |
| needs.pre-flight.outputs.test_to_run != '[]' | |
| && needs.pre-flight.outputs.components_to_run != '[]' | |
| && ( | |
| success() | |
| || needs.Nemo_CICD_Test.result == 'success' | |
| ) | |
| && !cancelled() | |
| strategy: | |
| matrix: | |
| flag: [unit-test, e2e] | |
| steps: | |
| - name: Checkout | |
| uses: actions/checkout@v4 | |
| - name: Download coverage reports of current branch | |
| uses: actions/download-artifact@v4 | |
| with: | |
| pattern: coverage-${{ matrix.flag }}-* | |
| - name: Get total coverage of current branch | |
| shell: bash -x -e -u -o pipefail {0} | |
| if: always() | |
| run: | | |
| pip install coverage | |
| ls -al . | |
| ls -al coverage-*/ | |
| coverage combine --keep $(ls coverage-*/.coverage) | |
| coverage report -i | |
| rm -rf coverage-* | |
| ls -al | |
| - name: Upload coverage reports to Codecov | |
| uses: codecov/codecov-action@v5 | |
| with: | |
| token: ${{ secrets.CODECOV_TOKEN }} | |
| verbose: true | |
| flags: ${{ matrix.flag }} | |
| - name: Upload artifacts | |
| uses: actions/upload-artifact@v4 | |
| with: | |
| name: coverage-${{ matrix.flag }}-aggregated | |
| path: | | |
| .coverage | |
| include-hidden-files: true | |