Skip to content

Commit 500e1c2

Browse files
authored
Backport infra/ansible to r2.0 (#5104)
1 parent 7c97401 commit 500e1c2

File tree

22 files changed

+674
-0
lines changed

22 files changed

+674
-0
lines changed

infra/ansible/.ansible-lint

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
---
2+
# .ansible-lint
3+
4+
profile: moderate
5+
skip_list:
6+
- schema[tasks]

infra/ansible/Dockerfile

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
ARG python_version=3.8
2+
ARG debian_version=buster
3+
4+
FROM python:${python_version}-${debian_version} AS build
5+
6+
WORKDIR /ansible
7+
RUN pip install ansible
8+
COPY . /ansible
9+
10+
ARG ansible_vars
11+
RUN ansible-playbook -vvv playbook.yaml -e "stage=build" -e "${ansible_vars}"
12+
13+
FROM python:${python_version}-${debian_version} AS release
14+
15+
WORKDIR /ansible
16+
RUN pip install ansible
17+
COPY . /ansible
18+
19+
ARG ansible_vars
20+
RUN ansible-playbook -vvv playbook.yaml -e "stage=release" -e "${ansible_vars}" --tags "install_deps"
21+
22+
WORKDIR /tmp/wheels
23+
COPY --from=build /src/pytorch/dist/*.whl ./
24+
COPY --from=build /src/pytorch/xla/dist/*.whl ./
25+
26+
RUN echo "Installing the following wheels" && ls *.whl
27+
RUN pip install *.whl
28+
29+
WORKDIR /
30+
31+
RUN rm -rf /ansible /tmp/wheels
32+
COPY --from=build /dist/*.whl /dist/

infra/ansible/README.md

Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,58 @@
1+
# Ansible playbook
2+
3+
This ansible playbook will perform the following actions on the localhost:
4+
* install required pip and apt packages, depending on the specified stage,
5+
architecture and accelerator (see [apt.yaml](config/apt.yaml) and
6+
[pip.yaml](config/pip.yaml)).
7+
* fetch bazel (version configured in [vars.yaml](config/vars.yaml)),
8+
* fetch PyTorch and XLA sources at master (or specific revisions,
9+
see role `fetch_srcs` in [playbook.yaml](playbook.yaml)).
10+
* set required environment variables (see [env.yaml](config/env.yaml)),
11+
* build and install PyTorch and XLA wheels,
12+
* apply infrastructure tests (see `*/tests.yaml` files in [roles](roles)).
13+
14+
## Prerequisites
15+
16+
* Python 3.8+
17+
* Ansible. Install with `pip install ansible`.
18+
19+
## Running
20+
21+
The playbook requires passing explicitly 3 variables that configure playbook
22+
behavior (installed pip/apt packages and set environment variables):
23+
* `stage`: build or release. Different packages are installed depending on
24+
the chosen stage.
25+
* `arch`: aarch64 or amd64. Architecture of the built image and wheels.
26+
* `accelerator`: tpu or cuda. Available accelerator.
27+
28+
The variables can be passed through `-e` flag: `-e "<var>=<value>"`.
29+
30+
Example: `ansible-playbook playbook.yaml -e "stage=build arch=amd64 accelerator=tpu"`
31+
32+
## Config structure
33+
34+
The playbook configuration is split into 4 files, per each logical system.
35+
The configuration is simply loaded as playbook variables which are then passed
36+
to specific roles and tasks.
37+
Only variables in [config/env.yaml](config/env.yaml) are passed as env variables.
38+
39+
* [apt.yaml](config/apt.yaml) - specifies apt packages for each stage and
40+
architecture or accelerator.
41+
Packages shared between all architectures and accelerators in a given stage
42+
are specified in `*_common`. They are appended to any architecture specific list.
43+
44+
This config also contains a list of required apt repos and signing keys.
45+
These variables are mainly consumed by the [install_deps](roles/install_deps/tasks/main.yaml) role.
46+
47+
* [pip.yaml](config/pip.yaml) - similarly to apt.yaml, lists pip packages per stage and arch / accelerator.
48+
In both pip and apt config files stage and and arch / accelerator are
49+
concatenated together and specified under one key (e.g. build_amd64, release_tpu).
50+
51+
* [env.yaml](config/env.yaml) - contains Ansible variables that are passed as env variables when
52+
building PyTorch and XLA (`build_env`). Variables in `release_env` are saved in `/etc/environment` (executed for the `release` stage).
53+
54+
* [vars.yaml](config/vars.yaml) - Ansible variables used in other config files and throughout the playbook.
55+
Not associated with any particular system.
56+
57+
Variables from these config files are dynamically loaded (during playbook execution),
58+
see [playbook.yaml](playbook.yaml).

infra/ansible/ansible.cfg

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
# See https://docs.ansible.com/ansible/latest/reference_appendices/config.html
2+
# for various configuration options.
3+
4+
[defaults]
5+
# Displays tasks execution duration.
6+
callbacks_enabled = profile_tasks
7+
# The playbooks is only run on the implicit localhost.
8+
# Silence warning about empty hosts inventory.
9+
localhost_warning = False
10+
# Make output human-readable.
11+
stdout_callback = yaml
12+
13+
[inventory]
14+
# Silence warning about no inventory.
15+
# This option is available since Ansible 2.14 (available only with Python 3.9+).
16+
inventory_unparsed_warning = False

infra/ansible/config/apt.yaml

Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
# Contains lists of apt packages for each stage (build|release) and arch or accelerator.
2+
apt:
3+
pkgs:
4+
build_common:
5+
- ccache
6+
- curl
7+
- git
8+
- gnupg
9+
- libopenblas-dev
10+
- ninja-build
11+
- procps
12+
- python3-pip
13+
- rename
14+
- vim
15+
- wget
16+
- clang-format-7
17+
- lcov
18+
- less
19+
20+
build_cuda:
21+
- "cuda-libraries-{{ cuda_version | replace('.', '-') }}"
22+
- "cuda-toolkit-{{ cuda_version | replace('.', '-') }}"
23+
- "cuda-minimal-build-{{ cuda_version | replace('.', '-') }}"
24+
- "{{ cuda_deps['libcudnn'][cuda_version] }}"
25+
- "{{ cuda_deps['libcudnn-dev'][cuda_version] }}"
26+
27+
build_amd64:
28+
- "clang-{{ clang_version }}"
29+
30+
build_aarch64:
31+
- scons
32+
- gcc-10
33+
- g++-10
34+
35+
release_common:
36+
- curl
37+
- git
38+
- gnupg
39+
- libgomp1
40+
- libopenblas-base
41+
- patch
42+
43+
release_cuda:
44+
- "cuda-libraries-{{ cuda_version | replace('.', '-') }}"
45+
- "cuda-minimal-build-{{ cuda_version | replace('.', '-') }}"
46+
- "{{ cuda_deps['libcudnn'][cuda_version] }}"
47+
48+
# Specify objects with string fields `url` and `keyring`.
49+
# The keyring path should start with /usr/share/keyrings/ for debian and ubuntu.
50+
signing_keys:
51+
- url: https://apt.llvm.org/llvm-snapshot.gpg.key
52+
keyring: /usr/share/keyrings/llvm.pgp
53+
- url: "https://developer.download.nvidia.com/compute/cuda/repos/{{ cuda_repo }}/x86_64/3bf863cc.pub"
54+
keyring: /usr/share/keyrings/cuda.pgp
55+
56+
repos:
57+
# signed-by path should match the corresponding keyring path above.
58+
- "deb [signed-by=/usr/share/keyrings/llvm.pgp] http://apt.llvm.org/{{ llvm_debian_repo }}/ llvm-toolchain-{{ llvm_debian_repo }}-{{ clang_version }} main"
59+
- "deb-src [signed-by=/usr/share/keyrings/llvm.pgp] http://apt.llvm.org/{{ llvm_debian_repo }}/ llvm-toolchain-{{ llvm_debian_repo }}-{{ clang_version }} main"
60+
- "deb [signed-by=/usr/share/keyrings/cuda.pgp] https://developer.download.nvidia.com/compute/cuda/repos/{{ cuda_repo }}/x86_64/ /"

infra/ansible/config/cuda_deps.yaml

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
# Versions of cuda dependencies for given cuda versions.
2+
# Note: wrap version in quotes to ensure they're treated as strings.
3+
cuda_deps:
4+
# List all libcudnn8 versions with `apt list -a libcudnn8`
5+
libcudnn:
6+
"11.8": libcudnn8=8.8.0.121-1+cuda11.8
7+
"11.7": libcudnn8=8.5.0.96-1+cuda11.7
8+
"11.2": libcudnn8=8.1.1.33-1+cuda11.2
9+
libcudnn-dev:
10+
"11.8": libcudnn8-dev=8.8.0.121-1+cuda11.8
11+
"11.7": libcudnn8-dev=8.5.0.96-1+cuda11.7
12+
"11.2": libcudnn8-dev=8.1.1.33-1+cuda11.2

infra/ansible/config/env.yaml

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
# Variables that will be stored in ~/.bashrc and ~/.zshrc files for the release stage.
2+
# They'll be accessible for all processes on the host, also in the development image.
3+
release_env:
4+
common:
5+
# Force GCC because clang/bazel has issues.
6+
CC: gcc
7+
CXX: g++
8+
# CC: "clang-{{ clang_version }}"
9+
# CXX: "clang++-{{ clang_version }}"
10+
LD_LIBRARY_PATH: "$LD_LIBRARY_PATH:/usr/local/lib"
11+
12+
tpu:
13+
ACCELERATOR: tpu
14+
TPUVM_MODE: 1
15+
16+
cuda:
17+
TF_CUDA_COMPUTE_CAPABILITIES: 7.0,7.5,8.0
18+
XLA_CUDA: 1
19+
20+
# Variables that will be passed to shell environment only for building PyTorch and XLA libs.
21+
build_env:
22+
common:
23+
LD_LIBRARY_PATH: "$LD_LIBRARY_PATH:/usr/local/lib"
24+
# Set explicitly to 0 as setup.py defaults this flag to true if unset.
25+
BUILD_CPP_TESTS: 0
26+
# Force GCC because clang/bazel has issues.
27+
CC: gcc
28+
CXX: g++
29+
PYTORCH_BUILD_NUMBER: 1
30+
TORCH_XLA_VERSION: "{{ package_version }}"
31+
PYTORCH_BUILD_VERSION: "{{ package_version }}"
32+
XLA_SANDBOX_BUILD: 1
33+
BAZEL_REMOTE_CACHE: 1
34+
SILO_NAME: "cache-silo-{{ arch }}-{{ accelerator }}"
35+
36+
amd64:
37+
ARCH: amd64
38+
39+
aarch64:
40+
41+
cuda:
42+
TF_CUDA_COMPUTE_CAPABILITIES: 7.0,7.5,8.0
43+
XLA_CUDA: 1
44+
45+
tpu:
46+
ACCELERATOR: tpu
47+
TPUVM_MODE: 1
48+
BUNDLE_LIBTPU: 1
49+

infra/ansible/config/pip.yaml

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
# Contains lists of pip packages for each stage (build|release) and arch or accelerator.
2+
pip:
3+
pkgs:
4+
# Shared between all architectures and accelerators for the build stage.
5+
build_common:
6+
- astunparse
7+
- cffi
8+
- cloud-tpu-client
9+
- cmake
10+
- coverage
11+
- dataclasses
12+
- expecttest==0.1.3
13+
- future
14+
- git-archive-all
15+
- google-api-python-client
16+
- google-cloud-storage
17+
- hypothesis
18+
- lark-parser
19+
- ninja
20+
- numpy
21+
- oauth2client
22+
- pyyaml
23+
- requests
24+
- setuptools
25+
- six
26+
- tensorboard
27+
- tensorboardX
28+
- tqdm
29+
- typing
30+
- typing_extensions
31+
- sympy
32+
- yapf==0.30.0
33+
34+
build_amd64:
35+
- mkl
36+
- mkl-include
37+
38+
build_aarch64:
39+
40+
# Shared between all architectures and accelerators for the release stage.
41+
release_common:
42+
- numpy
43+
- pyyaml
44+
- mkl
45+
- mkl-include
46+
47+
release_tpu:
48+
49+
# Packages that will be installed with the `--nodeps` flag.
50+
pkgs_nodeps:
51+
release_common:
52+
- torchvision
53+
- pillow

infra/ansible/config/vars.yaml

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
# Used for fetching cuda from the right repo, see apt.yaml.
2+
cuda_repo: ubuntu1804
3+
cuda_version: "11.8"
4+
# Used for fetching clang from the right repo, see apt.yaml.
5+
llvm_debian_repo: buster
6+
clang_version: 10
7+
# PyTorch and PyTorch/XLA wheel versions.
8+
package_version: 2.0
9+
# If set to true, wheels will be renamed to $WHEEL_NAME-nightly-cp38-cp38-linux_x86_64.whl.
10+
nightly_release: false

infra/ansible/development.Dockerfile

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
# Dockerfile for building a development image.
2+
# The built image contains all required pip and apt packages for building and
3+
# running PyTorch and PyTorch/XLA. The image doesn't contain any source code.
4+
ARG python_version=3.8
5+
ARG debian_version=buster
6+
7+
FROM python:${python_version}-${debian_version}
8+
9+
RUN pip install ansible
10+
11+
COPY . /ansible
12+
WORKDIR /ansible
13+
14+
# List Asnible tasks to apply for the dev image.
15+
ENV TAGS="bazel,configure_env,install_deps"
16+
17+
ARG ansible_vars
18+
RUN ansible-playbook playbook.yaml -e "stage=build" -e "${ansible_vars}" --tags "${TAGS}"
19+
RUN ansible-playbook playbook.yaml -e "stage=release" -e "${ansible_vars}" --tags "${TAGS}"

infra/ansible/e2e_tests.Dockerfile

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
ARG python_version=3.8
2+
ARG debian_version=buster
3+
4+
FROM python:${python_version}-${debian_version} AS build
5+
6+
WORKDIR /ansible
7+
RUN pip install ansible
8+
COPY . /ansible
9+
10+
# Build PyTorch and PyTorch/XLA wheels.
11+
ARG ansible_vars
12+
RUN ansible-playbook -vvv playbook.yaml -e "stage=build" -e "${ansible_vars}"
13+
14+
FROM python:${python_version}-${debian_version}
15+
WORKDIR /ansible
16+
RUN pip install ansible
17+
COPY . /ansible
18+
19+
# Install runtime pip and apt dependencies.
20+
ARG ansible_vars
21+
RUN ansible-playbook -vvv playbook.yaml -e "stage=release" -e "${ansible_vars}" --tags "install_deps"
22+
23+
# Copy test sources.
24+
RUN mkdir -p /src/pytorch/xla
25+
COPY --from=build /src/pytorch/xla/test /src/pytorch/xla/test
26+
27+
# Copy and install wheels.
28+
WORKDIR /tmp/wheels
29+
COPY --from=build /src/pytorch/dist/*.whl ./
30+
COPY --from=build /src/pytorch/xla/dist/*.whl ./
31+
32+
RUN echo "Installing the following wheels" && ls *.whl
33+
RUN pip install *.whl
34+
35+
WORKDIR /
36+
37+
# Clean-up unused directories.
38+
RUN rm -rf /ansible /tmp/wheels

0 commit comments

Comments
 (0)