diff --git a/.github/workflows/go.yaml b/.github/workflows/go.yaml index 92119cc..f191ec8 100644 --- a/.github/workflows/go.yaml +++ b/.github/workflows/go.yaml @@ -1,7 +1,7 @@ # Tests only run with --short, since the full tests involve downloading datasets -# and would be too costly for a Github actions. +# and would be too costly for a GitHub actions. # Locally in the development box it works because the datasets are cached. -name: "Tests" # The name of the workflow that will appear on Github +name: "Tests" # The name of the workflow that will appear on GitHub permissions: # read|write|none actions: read @@ -59,12 +59,14 @@ jobs: go test ./xlabuilder/... ./pjrt/... - name: Go Coverage Badge + if: github.ref == 'refs/heads/main' uses: tj-actions/coverage-badge-go@v2 with: green: 80 filename: docs/coverage.out - name: Commit README.md changes + if: github.ref == 'refs/heads/main' run: | if git diff --quiet -- 'README.md' ; then echo "README.md not modified." @@ -76,6 +78,7 @@ jobs: fi - name: Push Changes + if: github.ref == 'refs/heads/main' uses: ad-m/github-push-action@master with: github_token: ${{ github.token }} diff --git a/docs/CHANGELOG.md b/docs/CHANGELOG.md index 8893fbb..a46b221 100644 --- a/docs/CHANGELOG.md +++ b/docs/CHANGELOG.md @@ -3,6 +3,9 @@ * Added `install_linux_amd64_amazonlinux.sh` and pre-built libraries for amazonlinux (built using old glibc support). * Fixed installation scripts: s/sudo/$_SUDO. Also made them more verbose. * Removed dependency on `xargs` in installation script for Linux. +* Improved documentation on Nvidia GPU card detection, and error message if not found. +* Updated GitHub action (`go.yaml`) to only change the README.md with the result of the change, if pushing to the + `main` branch. # v0.4.9 - 2024-11-25 diff --git a/pjrt/cuda.go b/pjrt/cuda.go index 48f231b..889a5ce 100644 --- a/pjrt/cuda.go +++ b/pjrt/cuda.go @@ -18,7 +18,9 @@ func isCuda(name string) bool { var hasNvidiaGPUCache *bool -// hasNvidiaGPU tries to guess if there is an Nvidia GPU installed. +// hasNvidiaGPU tries to guess if there is an actual Nvidia GPU installed (as opposed to only the drivers/PJRT +// file installed, but no actual hardware). +// It does that by checking for the presence of the device files in /dev/nvidia*. func hasNvidiaGPU() bool { if hasNvidiaGPUCache != nil { return *hasNvidiaGPUCache @@ -29,6 +31,10 @@ func hasNvidiaGPU() bool { return false } hasGPU := len(matches) > 0 + if !hasGPU { + klog.Infof("No NVidia devices found matching \"/dev/nvidia*\", assuming there are no GPU cards installed in the system. " + + "To force the attempt to use the \"cuda\" PJRT, use its absolute path.") + } hasNvidiaGPUCache = &hasGPU return hasGPU }