diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 025b07b75..c159f8a8a 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -109,7 +109,7 @@ jobs: matrix: # On PRs only build one representative arch (packaging logic is identical # across all sm_*); build the full matrix on schedule/dispatch. - sm: ${{ github.event_name == 'pull_request' && fromJSON('["sm_89"]') || fromJSON('["sm_75", "sm_80", "sm_86", "sm_89", "sm_90", "sm_100", "sm_120"]') }} + sm: ${{ github.event_name == 'pull_request' && fromJSON('["sm_89"]') || fromJSON('["sm_75", "sm_80", "sm_86", "sm_89", "sm_90", "sm_100", "sm_120", "sm_121"]') }} steps: - name: Clone @@ -138,7 +138,7 @@ jobs: wget -q https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-keyring_1.1-1_all.deb sudo dpkg -i cuda-keyring_1.1-1_all.deb sudo apt-get update - sudo apt-get install -y cuda-toolkit-12-8 cmake ninja-build patchelf + sudo apt-get install -y cuda-toolkit-12-9 cmake ninja-build patchelf - name: Set CUDA environment run: | @@ -210,7 +210,7 @@ jobs: matrix: # On PRs only build one representative arch (packaging logic is identical # across all sm_*); build the full matrix on schedule/dispatch. - sm: ${{ github.event_name == 'pull_request' && fromJSON('["sm_89"]') || fromJSON('["sm_75", "sm_80", "sm_86", "sm_89", "sm_90", "sm_100", "sm_120"]') }} + sm: ${{ github.event_name == 'pull_request' && fromJSON('["sm_89"]') || fromJSON('["sm_75", "sm_80", "sm_86", "sm_89", "sm_90", "sm_100", "sm_120", "sm_121"]') }} steps: - name: Clone @@ -223,9 +223,9 @@ jobs: - name: Install CUDA Toolkit id: cuda-toolkit - uses: Jimver/cuda-toolkit@v0.2.22 + uses: Jimver/cuda-toolkit@v0.2.35 with: - cuda: '12.8.0' + cuda: '12.9.1' method: 'network' sub-packages: '["nvcc", "cudart", "cublas", "cublas_dev", "curand", "nvjitlink", "thrust", "visual_studio_integration"]' @@ -291,6 +291,98 @@ jobs: name: sd-${{ env.BRANCH_NAME }}-${{ steps.commit.outputs.short }}-windows-cuda-${{ matrix.sm }}-x64.zip path: sd-${{ env.BRANCH_NAME }}-${{ steps.commit.outputs.short }}-windows-cuda-${{ matrix.sm }}-x64.zip + ubuntu-arm64-cuda: + # GB10 (sm_121) ARM64 Linux build + runs-on: ubuntu-24.04-arm + + strategy: + fail-fast: false + matrix: + # sm_121 targets the GB10 (Blackwell) ARM64 device + sm: ["sm_121"] + + steps: + - name: Clone + uses: actions/checkout@v6 + with: + submodules: recursive + fetch-depth: 0 + repository: 'leejet/stable-diffusion.cpp' + ref: master + + - name: ccache + uses: ggml-org/ccache-action@v1.2.16 + with: + key: ubuntu-arm64-cuda-${{ matrix.sm }} + evict-old-files: 1d + + - name: Install CUDA Toolkit + run: | + wget -q https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2404/sbsa/cuda-keyring_1.1-1_all.deb + sudo dpkg -i cuda-keyring_1.1-1_all.deb + sudo apt-get update + sudo apt-get install -y cuda-toolkit-12-9 cmake ninja-build patchelf + + - name: Set CUDA environment + run: | + echo "CUDA_PATH=/usr/local/cuda" >> "$GITHUB_ENV" + echo "/usr/local/cuda/bin" >> "$GITHUB_PATH" + echo "LD_LIBRARY_PATH=/usr/local/cuda/lib64:${LD_LIBRARY_PATH:-}" >> "$GITHUB_ENV" + + - name: Build + id: cmake_build + run: | + cmake_arch="${{ matrix.sm }}" + cmake_arch="${cmake_arch#sm_}" + cmake -B build -S . \ + -DSD_CUBLAS=ON \ + -DGGML_CUDA=ON \ + -DCMAKE_CUDA_ARCHITECTURES="${cmake_arch}" \ + -DCMAKE_CUDA_COMPILER=/usr/local/cuda/bin/nvcc \ + -DGGML_NATIVE=OFF \ + -DSD_BUILD_SHARED_LIBS=ON \ + -DCMAKE_BUILD_TYPE=Release + cmake --build build --config Release -j $(nproc) + + - name: Bundle CUDA runtime libraries + run: | + cuda_lib=/usr/local/cuda/lib64 + cp -av ${cuda_lib}/libcudart.so* build/bin/ + cp -av ${cuda_lib}/libcublas.so* build/bin/ + cp -av ${cuda_lib}/libcublasLt.so* build/bin/ + cp -av ${cuda_lib}/libcurand.so* build/bin/ + cp -av ${cuda_lib}/libnvJitLink.so* build/bin/ + find build -name 'libggml*.so*' ! -path 'build/bin/*' -exec cp -av {} build/bin/ \; + + - name: Set RPATH for portable distribution + run: | + for f in build/bin/*; do + [ -f "$f" ] && ! [ -L "$f" ] || continue + if file "$f" | grep -q 'ELF'; then + patchelf --set-rpath '$ORIGIN' "$f" + fi + done + + - name: Get commit hash + id: commit + if: ${{ github.event_name == 'schedule' || github.event.inputs.create_release == 'true' }} + uses: prompt/actions-commit-hash@v2 + + - name: Pack artifacts + id: pack_artifacts + if: ${{ github.event_name == 'schedule' || github.event.inputs.create_release == 'true' }} + run: | + cp ggml/LICENSE ./build/bin/ggml.txt + cp LICENSE ./build/bin/stable-diffusion.cpp.txt + tar -cJf sd-${{ env.BRANCH_NAME }}-${{ steps.commit.outputs.short }}-ubuntu-cuda-${{ matrix.sm }}-arm64.tar.xz -C ./build/bin . + + - name: Upload artifacts + if: ${{ github.event_name == 'schedule' || github.event.inputs.create_release == 'true' }} + uses: actions/upload-artifact@v4 + with: + name: sd-${{ env.BRANCH_NAME }}-${{ steps.commit.outputs.short }}-ubuntu-cuda-${{ matrix.sm }}-arm64.tar.xz + path: sd-${{ env.BRANCH_NAME }}-${{ steps.commit.outputs.short }}-ubuntu-cuda-${{ matrix.sm }}-arm64.tar.xz + windows-latest-cmake: runs-on: windows-2022 @@ -840,6 +932,7 @@ jobs: - ubuntu-latest-rocm - ubuntu-latest-cmake - ubuntu-latest-cuda + - ubuntu-arm64-cuda - windows-latest-cmake-hip - windows-latest-rocm - windows-latest-cmake