feat(ml): rocm (#16613)

* feat(ml): introduce support of onnxruntime-rocm for AMD GPU

* try mutex for algo cache

use OrtMutex

* bump versions, run on mich

use 3.12

use 1.19.2

* acquire lock before any changes can be made

guard algo benchmark results

mark mutex as mutable

re-add /bin/sh (?)

use 3.10

use 6.1.2

* use composite cache key

1.19.2

fix variable name

fix variable reference

aaaaaaaaaaaaaaaaaaaa

* bump deps

* disable algo caching

* fix gha

* try ubuntu runner

* actually fix the gha

* update patch

* skip mimalloc preload for rocm

* increase build threads

* increase timeout for rocm

* Revert "increase timeout for rocm"

This reverts commit 2c4452f5d132198ed381a7b262b4a5cab5114b5f.

* attempt migraphx

* set migraphx_home

* Revert "set migraphx_home"

This reverts commit c121d3e48754b3bce100636f8d666deec58a44b7.

* Revert "attempt migraphx"

This reverts commit 521f9fb72dbe506dc6cb8faeb6494817d87265c6.

* migraphx, take two

* bump rocm

* allow cpu

* try only targeting migraphx

* skip tests

* migraph 

* known issues

* target gfx900 and gfx1102

* mention `HSA_USE_SVM`

* update lock

* set device id for rocm

---------

Co-authored-by: Mehdi GHESH <mehdi.ghesh@hotmail.fr>
This commit is contained in:
Mert
2025-03-17 17:08:19 -04:00
committed by GitHub
parent 6a40aa83b7
commit 2b37caba03
17 changed files with 340 additions and 50 deletions

View File

@@ -49,23 +49,38 @@ jobs:
runs-on: ubuntu-latest
strategy:
matrix:
suffix: ["", "-cuda", "-openvino", "-armnn","-rknn"]
suffix: ['', '-cuda', '-rocm', '-openvino', '-armnn', '-rknn']
steps:
- name: Login to GitHub Container Registry
uses: docker/login-action@74a5d142397b4f367a81961eba4e8cd7edddf772 # v3
with:
registry: ghcr.io
username: ${{ github.repository_owner }}
password: ${{ secrets.GITHUB_TOKEN }}
- name: Re-tag image
run: |
REGISTRY_NAME="ghcr.io"
REPOSITORY=${{ github.repository_owner }}/immich-machine-learning
TAG_OLD=main${{ matrix.suffix }}
TAG_PR=${{ github.event.number == 0 && github.ref_name || format('pr-{0}', github.event.number) }}${{ matrix.suffix }}
TAG_COMMIT=commit-${{ github.event_name != 'pull_request' && github.sha || github.event.pull_request.head.sha }}${{ matrix.suffix }}
docker buildx imagetools create -t $REGISTRY_NAME/$REPOSITORY:$TAG_PR $REGISTRY_NAME/$REPOSITORY:$TAG_OLD
docker buildx imagetools create -t $REGISTRY_NAME/$REPOSITORY:$TAG_COMMIT $REGISTRY_NAME/$REPOSITORY:$TAG_OLD
- name: Login to GitHub Container Registry
uses: docker/login-action@v3
with:
registry: ghcr.io
username: ${{ github.repository_owner }}
password: ${{ secrets.GITHUB_TOKEN }}
- name: Re-tag image
run: |
REGISTRY_NAME="ghcr.io"
REPOSITORY=${{ github.repository_owner }}/immich-machine-learning
TAG_OLD=main${{ matrix.suffix }}
TAG_PR=${{ github.event.number == 0 && github.ref_name || format('pr-{0}', github.event.number) }}${{ matrix.suffix }}
TAG_COMMIT=commit-${{ github.event_name != 'pull_request' && github.sha || github.event.pull_request.head.sha }}${{ matrix.suffix }}
docker buildx imagetools create -t $REGISTRY_NAME/$REPOSITORY:$TAG_PR $REGISTRY_NAME/$REPOSITORY:$TAG_OLD
docker buildx imagetools create -t $REGISTRY_NAME/$REPOSITORY:$TAG_COMMIT $REGISTRY_NAME/$REPOSITORY:$TAG_OLD
- name: Login to GitHub Container Registry
uses: docker/login-action@74a5d142397b4f367a81961eba4e8cd7edddf772 # v3
with:
registry: ghcr.io
username: ${{ github.repository_owner }}
password: ${{ secrets.GITHUB_TOKEN }}
- name: Re-tag image
run: |
REGISTRY_NAME="ghcr.io"
REPOSITORY=${{ github.repository_owner }}/immich-machine-learning
TAG_OLD=main${{ matrix.suffix }}
TAG_PR=${{ github.event.number == 0 && github.ref_name || format('pr-{0}', github.event.number) }}${{ matrix.suffix }}
TAG_COMMIT=commit-${{ github.event_name != 'pull_request' && github.sha || github.event.pull_request.head.sha }}${{ matrix.suffix }}
docker buildx imagetools create -t $REGISTRY_NAME/$REPOSITORY:$TAG_PR $REGISTRY_NAME/$REPOSITORY:$TAG_OLD
docker buildx imagetools create -t $REGISTRY_NAME/$REPOSITORY:$TAG_COMMIT $REGISTRY_NAME/$REPOSITORY:$TAG_OLD
retag_server:
name: Re-Tag Server
@@ -74,7 +89,7 @@ jobs:
runs-on: ubuntu-latest
strategy:
matrix:
suffix: [""]
suffix: ['']
steps:
- name: Login to GitHub Container Registry
uses: docker/login-action@74a5d142397b4f367a81961eba4e8cd7edddf772 # v3
@@ -120,6 +135,11 @@ jobs:
device: cuda
suffix: -cuda
- platform: linux/amd64
runner: mich
device: rocm
suffix: -rocm
- platform: linux/amd64
runner: ubuntu-latest
device: openvino
@@ -129,7 +149,7 @@ jobs:
runner: ubuntu-24.04-arm
device: armnn
suffix: -armnn
- platform: linux/arm64
runner: ubuntu-24.04-arm
device: rknn
@@ -220,6 +240,8 @@ jobs:
- device: cpu
- device: cuda
suffix: -cuda
- device: rocm
suffix: -rocm
- device: openvino
suffix: -openvino
- device: armnn
@@ -257,7 +279,7 @@ jobs:
id: meta
uses: docker/metadata-action@902fa8ec7d6ecbf8d84d538b9b233a880e428804 # v5
env:
DOCKER_METADATA_PR_HEAD_SHA: "true"
DOCKER_METADATA_PR_HEAD_SHA: 'true'
with:
flavor: |
# Disable latest tag
@@ -411,7 +433,7 @@ jobs:
id: meta
uses: docker/metadata-action@902fa8ec7d6ecbf8d84d538b9b233a880e428804 # v5
env:
DOCKER_METADATA_PR_HEAD_SHA: "true"
DOCKER_METADATA_PR_HEAD_SHA: 'true'
with:
flavor: |
# Disable latest tag