diff options
author | Harsha H S <hsharsha@users.noreply.github.com> | 2024-05-24 06:47:53 -0700 |
---|---|---|
committer | TensorFlower Gardener <gardener@tensorflow.org> | 2024-05-24 06:53:14 -0700 |
commit | bc6f10607a5d0cbb19f008f485448f6b68a90b37 (patch) | |
tree | 2231c42adc0a20bf36035f01cce2c76576dfe4fc | |
parent | d526a0c1d14a27306e2efa66bc4ae226ab216d5b (diff) | |
download | tensorflow-bc6f10607a5d0cbb19f008f485448f6b68a90b37.tar.gz |
PR #12948: [ROCm] Provide run_xla script to facilitate running XLA unit tests
Imported from GitHub PR https://github.com/openxla/xla/pull/12948
This is first step in enabling CI runs on AMD hardware. Planning to use this repository to house ROCm related scripts.
Copybara import of the project:
--
5465e8b4b83302dabf6ceb64552fd841fb29f2b0 by Harsha HS <harsha.havanurshamsundara@amd.com>:
[ROCm] Provide run_xla script to facilitate running XLA unit tests
--
d5a1217b452539607571ca4c8d76907722fd05bc by Harsha H S <hsharsha@users.noreply.github.com>:
Update run_xla.sh
Merging this change closes #12948
PiperOrigin-RevId: 636901595
-rwxr-xr-x | third_party/xla/build_tools/rocm/run_xla.sh | 98 |
1 files changed, 98 insertions, 0 deletions
diff --git a/third_party/xla/build_tools/rocm/run_xla.sh b/third_party/xla/build_tools/rocm/run_xla.sh new file mode 100755 index 00000000000..cacf913d8ae --- /dev/null +++ b/third_party/xla/build_tools/rocm/run_xla.sh @@ -0,0 +1,98 @@ +#!/usr/bin/env bash +# Copyright 2024 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# ============================================================================== + +set -e +set -x + +N_BUILD_JOBS=$(grep -c ^processor /proc/cpuinfo) +# If rocm-smi exists locally (it should) use it to find +# out how many GPUs we have to test with. +rocm-smi -i +STATUS=$? +if [ $STATUS -ne 0 ]; then TF_GPU_COUNT=1; else + TF_GPU_COUNT=$(rocm-smi -i|grep 'Device ID' |grep 'GPU' |wc -l) +fi +TF_TESTS_PER_GPU=1 +N_TEST_JOBS=$(expr ${TF_GPU_COUNT} \* ${TF_TESTS_PER_GPU}) + +echo "" +echo "Bazel will use ${N_BUILD_JOBS} concurrent build job(s) and ${N_TEST_JOBS} concurrent test job(s)." +echo "" + +# First positional argument (if any) specifies the ROCM_INSTALL_DIR +if [[ -n $1 ]]; then + ROCM_INSTALL_DIR=$1 +else + if [[ -z "${ROCM_PATH}" ]]; then + ROCM_INSTALL_DIR=/opt/rocm-6.1.0 + else + ROCM_INSTALL_DIR=$ROCM_PATH + fi +fi + +export PYTHON_BIN_PATH=`which python3` +PYTHON_VERSION=`python3 -c "import sys;print(f'{sys.version_info.major}.{sys.version_info.minor}')"` +export TF_PYTHON_VERSION=$PYTHON_VERSION +export TF_NEED_ROCM=1 +export ROCM_PATH=$ROCM_INSTALL_DIR +TAGS_FILTER="gpu,requires-gpu-amd,-requires-gpu-nvidia,-no_oss,-oss_excluded,-oss_serial,-no_gpu,-no_rocm" +UNSUPPORTED_GPU_TAGS="$(echo -requires-gpu-sm{60,70,80,86,89,90}{,-only})" +TAGS_FILTER="${TAGS_FILTER},${UNSUPPORTED_GPU_TAGS// /,}" +if [ -f /usertools/rocm.bazelrc ]; then + # Use the bazelrc files in /usertools if available + if [ ! -d /tf ];then + # The bazelrc files in /usertools expect /tf to exist + mkdir /tf + fi + + bazel \ + --bazelrc=/usertools/rocm.bazelrc \ + test \ + --config=sigbuild_local_cache \ + --config=rocm \ + --config=xla_cpp \ + --build_tag_filters=${TAGS_FILTER} \ + --test_tag_filters=${TAGS_FILTER} \ + --keep_going \ + --test_output=errors \ + --local_test_jobs=${N_TEST_JOBS} \ + --test_env=TF_TESTS_PER_GPU=$TF_TESTS_PER_GPU \ + --test_env=TF_GPU_COUNT=$TF_GPU_COUNT \ + --repo_env=HERMETIC_PYTHON_VERSION=3.11 \ + --action_env=XLA_FLAGS=--xla_gpu_force_compilation_parallelism=16 \ + --action_env=XLA_FLAGS=--xla_gpu_enable_llvm_module_compilation_parallelism=true \ + --run_under=//tools/ci_build/gpu_build:parallel_gpu_execute \ + -- //xla/... +else + + yes "" | $PYTHON_BIN_PATH configure.py + bazel \ + test \ + -k \ + --test_tag_filters=-no_oss,-oss_excluded,-oss_serial,gpu,requires-gpu,-no_gpu,-no_rocm --keep_going \ + --build_tag_filters=-no_oss,-oss_excluded,-oss_serial,gpu,requires-gpu,-no_gpu,-no_rocm \ + --config=rocm \ + --test_output=errors \ + --local_test_jobs=${N_TEST_JOBS} \ + --test_env=TF_TESTS_PER_GPU=$TF_TESTS_PER_GPU \ + --test_env=TF_GPU_COUNT=$TF_GPU_COUNT \ + --repo_env=HERMETIC_PYTHON_VERSION=3.11 \ + --action_env=XLA_FLAGS=--xla_gpu_force_compilation_parallelism=16 \ + --action_env=XLA_FLAGS=--xla_gpu_enable_llvm_module_compilation_parallelism=true \ + --run_under=//tools/ci_build/gpu_build:parallel_gpu_execute \ + -- //xla/... +fi |