diff --git a/.github/scripts/build.sh b/.github/scripts/build.sh index d9df68705..4e5ae4aa9 100755 --- a/.github/scripts/build.sh +++ b/.github/scripts/build.sh @@ -49,9 +49,31 @@ python third_party/torch-xpu-ops/.github/scripts/apply_torch_pr.py git submodule sync && git submodule update --init --recursive python -m pip install -r requirements.txt python -m pip install mkl-static mkl-include -# python -m pip install -U cmake==3.31.6 +export USE_STATIC_MKL=1 export USE_ONEMKL=1 export USE_XCCL=1 +export PYTORCH_EXTRA_INSTALL_REQUIREMENTS=" \ + intel-cmplr-lib-rt==2025.1.1 | \ + intel-cmplr-lib-ur==2025.1.1 | \ + intel-cmplr-lic-rt==2025.1.1 | \ + intel-sycl-rt==2025.1.1 | \ + oneccl-devel==2021.15.1; platform_system == 'Linux' and platform_machine == 'x86_64' | \ + oneccl==2021.15.1; platform_system == 'Linux' and platform_machine == 'x86_64' | \ + impi-rt==2021.15.0; platform_system == 'Linux' and platform_machine == 'x86_64' | \ + onemkl-sycl-blas==2025.1.0 | \ + onemkl-sycl-dft==2025.1.0 | \ + onemkl-sycl-lapack==2025.1.0 | \ + onemkl-sycl-rng==2025.1.0 | \ + onemkl-sycl-sparse==2025.1.0 | \ + dpcpp-cpp-rt==2025.1.1 | \ + intel-opencl-rt==2025.1.1 | \ + mkl==2025.1.0 | \ + intel-openmp==2025.1.1 | \ + tbb==2022.1.0 | \ + tcmlib==1.3.0 | \ + umf==0.10.0 | \ + intel-pti==0.12.0 +" # Build sed -i "s/checkout --quiet \${TORCH_XPU_OPS_COMMIT}/log -n 1/g" caffe2/CMakeLists.txt diff --git a/.github/scripts/check-ut.py b/.github/scripts/check-ut.py index 8cd490bc8..138856f44 100644 --- a/.github/scripts/check-ut.py +++ b/.github/scripts/check-ut.py @@ -30,9 +30,22 @@ def get_result(case): def get_message(case): if not case.result: return "" - return f"{case.result[0].message.splitlines()[0]}" + lines = case.result[0].message.splitlines() + message = [] + message.append(f"{case.result[0].message.splitlines()[0]}") + collect = False + for line in lines: + if "Traceback" in line: + collect = True + if collect and "Error: " in line: + collect = False + break + if collect: + message.append(line) -def print_md_row(row, print_header): + return "".join(message) + +def print_md_row(row, print_header, failure_list=None): if print_header: header = " | ".join([f"{key}" for key, _ in row.items()]) print(f"| {header} |") @@ -41,7 +54,11 @@ def print_md_row(row, print_header): row = " | ".join([f"{value}" for _, value in row.items()]) print(f"| {row} |") -def print_cases(cases): + if failure_list is not None: + failure_list.write(f"| {row} |\n") + + +def print_cases(cases, failure_list=None): print_header = True for case in cases: classname = get_classname(case) @@ -54,9 +71,10 @@ def print_cases(cases): 'Status': result, 'Message': message, } - print_md_row(row, print_header) + print_md_row(row, print_header, failure_list=failure_list) print_header = False + def print_suite(suite): print_header = True for suite in suites: @@ -75,6 +93,9 @@ def print_suite(suite): category = 'op_extended' elif 'op_ut' in ut: category = 'op_ut' + else: + category = "unknown" + row = { 'Category': category, 'UT': ut, @@ -103,11 +124,12 @@ def print_break(needed): if needed: print("") -if failures: - print_break(printed) - print("### Failures") - print_cases(failures) - printed = True +with open("ut_failure_list.csv", "w") as failure_list: + if failures: + print_break(printed) + print("### Failures") + print_cases(failures, failure_list=failure_list) + printed = True print("### Results Summary") print_suite(suites) diff --git a/.github/scripts/env.sh b/.github/scripts/env.sh index 3fd5b0353..1fb4d8c75 100644 --- a/.github/scripts/env.sh +++ b/.github/scripts/env.sh @@ -1,21 +1,7 @@ #!/bin/bash source /opt/intel/oneapi/compiler/latest/env/vars.sh -source /opt/intel/oneapi/umf/latest/env/vars.sh source /opt/intel/oneapi/pti/latest/env/vars.sh +source /opt/intel/oneapi/umf/latest/env/vars.sh source /opt/intel/oneapi/ccl/latest/env/vars.sh source /opt/intel/oneapi/mpi/latest/env/vars.sh -source /opt/intel/oneapi/mkl/latest/env/vars.sh -export PYTORCH_EXTRA_INSTALL_REQUIREMENTS="\ - intel-cmplr-lib-rt==2025.0.5 |\ - intel-cmplr-lib-ur==2025.0.5 |\ - intel-cmplr-lic-rt==2025.0.5 |\ - intel-sycl-rt==2025.0.5 |\ - impi-devel==2021.14.2 |\ - oneccl-devel==2021.14.1 |\ - mkl-devel==2025.0.1 |\ - onemkl-sycl-blas==2025.0.1 |\ - onemkl-sycl-dft==2025.0.1 |\ - onemkl-sycl-lapack==2025.0.1 |\ - tcmlib==1.2.0 | umf==0.9.1 | intel-pti==0.10.2 \ -" diff --git a/.github/workflows/_linux_ut.yml b/.github/workflows/_linux_ut.yml index 4307fd05e..600da1a0d 100644 --- a/.github/workflows/_linux_ut.yml +++ b/.github/workflows/_linux_ut.yml @@ -282,6 +282,9 @@ jobs: source activate xpu_op_${ZE_AFFINITY_MASK} pip install junitparser python .github/scripts/check-ut.py ${{ github.workspace }}/ut_log/*.xml >> $GITHUB_STEP_SUMMARY || true + if [ -e "ut_failure_list.csv" ];then + cp ut_failure_list.csv ${{ github.workspace }}/ut_log/ut_failure_list.csv >> $GITHUB_STEP_SUMMARY || true + fi - name: UT Test Results Check shell: bash run: | @@ -310,6 +313,12 @@ jobs: with: name: Inductor-XPU-UT-Data-${{ github.event.pull_request.number || github.sha }}-${{ env.UT_NAME }} path: ${{ github.workspace }}/ut_log + - name: Upload XPU UT Failure list + if: always() + uses: actions/upload-artifact@v4 + with: + name: XPU-UT-Failure-List-${{ github.event.pull_request.number || github.sha }}-${{ env.UT_NAME }} + path: ${{ github.workspace }}/ut_log/ut_failure_list.csv distributed_ut_test: runs-on: pvc_e2e