pytorch
diff --git a/‎.github/scripts/pre-build-script-win.sh
Lines changed: 1 addition & 1 deletion b/‎.github/scripts/pre-build-script-win.sh
Lines changed: 1 addition & 1 deletion
diff --git a/‎.github/scripts/td_script.sh
Lines changed: 1 addition & 1 deletion b/‎.github/scripts/td_script.sh
Lines changed: 1 addition & 1 deletion
diff --git a/‎.github/scripts/version_script.bat
Lines changed: 1 addition & 1 deletion b/‎.github/scripts/version_script.bat
Lines changed: 1 addition & 1 deletion
diff --git a/‎.github/unittest/linux/scripts/run_all.sh
Lines changed: 10 additions & 1 deletion b/‎.github/unittest/linux/scripts/run_all.sh
Lines changed: 10 additions & 1 deletion
diff --git a/‎.github/unittest/linux_olddeps/scripts_gym_0_13/run_test.sh
Lines changed: 8 additions & 1 deletion b/‎.github/unittest/linux_olddeps/scripts_gym_0_13/run_test.sh
Lines changed: 8 additions & 1 deletion
diff --git a/‎.github/unittest/linux_optdeps/scripts/run_all.sh
Lines changed: 1 addition & 0 deletions b/‎.github/unittest/linux_optdeps/scripts/run_all.sh
Lines changed: 1 addition & 0 deletions
diff --git a/‎docs/source/reference/envs.rst
Lines changed: 1 addition & 0 deletions b/‎docs/source/reference/envs.rst
Lines changed: 1 addition & 0 deletions
diff --git a/‎docs/source/reference/index.rst
Lines changed: 1 addition & 0 deletions b/‎docs/source/reference/index.rst
Lines changed: 1 addition & 0 deletions
diff --git a/‎docs/source/reference/llms.rst
Lines changed: 149 additions & 0 deletions b/‎docs/source/reference/llms.rst
Lines changed: 149 additions & 0 deletions
diff --git a/‎setup.py
Lines changed: 1 addition & 1 deletion b/‎setup.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎test/llm/libs/test_mlgym.py
Lines changed: 101 additions & 0 deletions b/‎test/llm/libs/test_mlgym.py
Lines changed: 101 additions & 0 deletions
@@ -2,4 +2,4 @@
 
 pip install --upgrade setuptools
 
-export TORCHRL_BUILD_VERSION=0.8.0
+export TORCHRL_BUILD_VERSION=0.9.0
@@ -1,6 +1,6 @@
 #!/bin/bash
 
-export TORCHRL_BUILD_VERSION=0.8.0
+export TORCHRL_BUILD_VERSION=0.9.0
 pip install --upgrade setuptools
 
 # Check if ARCH is set to aarch64
 
@@ -1,5 +1,5 @@
 @echo off
-set TORCHRL_BUILD_VERSION=0.8.0
+set TORCHRL_BUILD_VERSION=0.9.0
 echo TORCHRL_BUILD_VERSION is set to %TORCHRL_BUILD_VERSION%
 
 @echo on
 
@@ -9,7 +9,14 @@ set -v
 
 if [[ $OSTYPE != 'darwin'* ]]; then
   apt-get update && apt-get upgrade -y
-  apt-get install -y vim git wget libsdl2-dev libsdl2-2.0-0 cmake
+  apt-get install -y vim git wget cmake
+
+  # Enable universe repository
+  # apt-get install -y software-properties-common
+  # add-apt-repository universe
+  # apt-get update
+
+  # apt-get install -y libsdl2-dev libsdl2-2.0-0
 
   apt-get install -y libglfw3 libgl1-mesa-glx libosmesa6 libglew-dev
   apt-get install -y libglvnd0 libgl1 libglx0 libegl1 libgles2 xvfb
@@ -208,11 +215,13 @@ pytest test/smoke_test_deps.py -v --durations 200 -k 'test_gym or test_dm_contro
 if [ "${CU_VERSION:-}" != cpu ] ; then
   python .github/unittest/helpers/coverage_run_parallel.py -m pytest test \
     --instafail --durations 200 -vv --capture no --ignore test/test_rlhf.py \
+    --ignore test/llm \
     --timeout=120 --mp_fork_if_no_cuda
 else
   python .github/unittest/helpers/coverage_run_parallel.py -m pytest test \
     --instafail --durations 200 -vv --capture no --ignore test/test_rlhf.py \
     --ignore test/test_distributed.py \
+    --ignore test/llm \
     --timeout=120 --mp_fork_if_no_cuda
 fi
 
 
@@ -28,7 +28,14 @@ python .github/unittest/helpers/coverage_run_parallel.py -m pytest test/smoke_te
 export DISPLAY=:99
 Xvfb :99 -screen 0 1400x900x24 > /dev/null 2>&1 &
 
-CKPT_BACKEND=torch MUJOCO_GL=egl python .github/unittest/helpers/coverage_run_parallel.py -m pytest --instafail -v --durations 200 --ignore test/test_distributed.py --ignore test/test_rlhf.py
+CKPT_BACKEND=torch MUJOCO_GL=egl python .github/unittest/helpers/coverage_run_parallel.py -m pytest \
+    --instafail -v \
+    --durations 200 \
+    --ignore test/test_distributed.py \
+    --ignore test/test_rlhf.py \
+    --ignore test/llm \
+    --mp_fork_if_no_cuda
+
 #pytest --instafail -v --durations 200
 #python test/test_libs.py
 coverage combine
 
@@ -159,6 +159,7 @@ export BATCHED_PIPE_TIMEOUT=60
 python .github/unittest/helpers/coverage_run_parallel.py -m pytest test \
   --instafail --durations 200 -vv --capture no --ignore test/test_rlhf.py \
   --ignore test/test_distributed.py \
+  --ignore test/llm \
   --timeout=120 --mp_fork_if_no_cuda
 
 coverage combine
 
@@ -1112,6 +1112,7 @@ to be able to create this other composition:
     CenterCrop
     ClipTransform
     Compose
+    ConditionalPolicySwitch
     ConditionalSkip
     Crop
     DataLoadingPrimer
 
@@ -7,6 +7,7 @@ API Reference
     collectors
     data
     envs
+    llms
     modules
     objectives
     trainers
 
@@ -0,0 +1,149 @@
+.. currentmodule:: torchrl
+
+LLM interface
+=============
+
+.. _ref_llms:
+
+TorchRL offers a set of tools for LLM post-training, as well as some examples for training or setup.
+
+Collectors
+----------
+
+TorchRL offers a specialized collector class (:class:`~torchrl.collectors.llm.LLMCollector`) that is tailored for LLM
+use cases. We also provide dedicated updaters for some inference engines.
+
+.. currentmodule:: torchrl.collectors.llm
+
+.. autosummary::
+    :toctree: generated/
+    :template: rl_template.rst
+
+    vLLMUpdater
+    LLMCollector
+
+
+Data structures
+---------------
+
+To handle text-based data structures (such as conversations etc.), we offer a few data structures dedicated to carrying
+data for LLM post-training.
+
+.. currentmodule:: torchrl.data.llm
+
+.. autosummary::
+    :toctree: generated/
+    :template: rl_template.rst
+
+    History
+    LLMData
+
+Environments
+------------
+
+When fine-tuning an LLM using TorchRL, the environment is a crucial component of the inference pipeline, alongside the
+policy and collector. Environments manage operations that are not handled by the LLM itself, such as interacting with
+tools, loading prompts from datasets, computing rewards (when necessary), and formatting data.
+
+The design of environments in TorchRL allows for flexibility and modularity. By framing tasks as environments, users can
+easily extend or modify existing environments using transforms. This approach enables the isolation of individual
+components within specific :class:`~torchrl.envs.EnvBase` or :class:`~torchrl.envs.Transform` subclasses, making it
+simpler to augment or alter the environment logic.
+
+Available Environment Classes and Utilities
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+TorchRL provides various environment classes and utilities for working with LLMs, including:
+
+- Various environment classes (:class:`~torchrl.envs.llm.ChatEnv`, :class:`~torchrl.envs.llm.DatasetChatEnv`,
+  :class:`~torchrl.envs.llm.GSM8KEnv`, etc.)
+- Utility functions (:class:`~torchrl.envs.make_gsm8k_env`, :class:`~torchrl.envs.make_mlgym`, etc.)
+- Transforms and other supporting classes (:class:`~torchrl.envs.KLRewardTransform`,
+  :class:`~torchrl.envs.TemplateTransform`, :class:`~torchrl.envs.Tokenizer`, etc.)
+
+These components can be used to create customized environments tailored to specific use cases and requirements.
+
+.. currentmodule:: torchrl.envs.llm
+
+.. autosummary::
+    :toctree: generated/
+    :template: rl_template.rst
+
+    ChatEnv
+    DatasetChatEnv
+    GSM8KEnv
+    make_gsm8k_env
+    GSM8KPrepareQuestion
+    GSM8KEnv
+    IFEvalEnv
+    IfEvalScorer
+    IFEvalScoreData
+    LLMEnv
+    LLMHashingEnv
+    make_mlgym
+    MLGymWrapper
+    GSM8KRewardParser
+    IfEvalScorer
+    as_nested_tensor
+    as_padded_tensor
+    DataLoadingPrimer
+    KLRewardTransform
+    TemplateTransform
+    Tokenizer
+
+Modules
+-------
+
+The :ref:`~torchrl.modules.llm` section provides a set of wrappers and utility functions for popular training and
+inference backends. The main goal of these primitives is to:
+
+- Unify the input / output data format across training and inference pipelines;
+- Unify the input / output data format across backends (to be able to use different backends across losses and
+  collectors, for instance)
+- Give appropriate tooling to construct these objects in typical RL settings (resource allocation, async execution,
+  weight update, etc.)
+
+Wrappers
+~~~~~~~~
+
+.. currentmodule:: torchrl.modules.llm
+
+.. autosummary::
+    :toctree: generated/
+    :template: rl_template.rst
+
+    TransformersWrapper
+    vLLMWrapper
+
+Utils
+~~~~~
+
+.. currentmodule:: torchrl.modules.llm
+
+.. autosummary::
+    :toctree: generated/
+    :template: rl_template.rst
+
+    CategoricalSequential
+    LLMOnDevice
+    make_vllm_worker
+    stateless_init_process_group
+    vLLMWorker
+
+Objectives
+----------
+
+LLM post training require some appropriate versions of the losses implemented in TorchRL.
+
+GRPO
+~~~~
+
+.. currentmodule:: torchrl.objectives.llm
+
+.. autosummary::
+    :toctree: generated/
+    :template: rl_template.rst
+
+    GRPOLoss
+    GRPOLossOutput
+    MCAdvantage
@@ -172,7 +172,7 @@ def _main(argv):
     if is_nightly:
         tensordict_dep = "tensordict-nightly"
     else:
-        tensordict_dep = "tensordict>=0.8.1,<0.9.0"
+        tensordict_dep = "tensordict>=0.9.0,<0.10.0"
 
     if is_nightly:
         version = get_nightly_version()
 
@@ -0,0 +1,101 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+from __future__ import annotations
+
+import argparse
+
+from functools import partial
+
+import pytest
+
+from torchrl import logger as torchrl_logger
+from torchrl.envs import SerialEnv
+
+from torchrl.envs.llm import make_mlgym
+from torchrl.modules.llm import TransformersWrapper
+
+
+class TestMLGYM:
+    def test_mlgym_specs(self):
+        from transformers import AutoModelForCausalLM, AutoTokenizer
+
+        model_name = "Qwen/Qwen2.5-7B-Instruct"
+        tokenizer = AutoTokenizer.from_pretrained(model_name)
+        tokenizer.eos_token = "<|im_end|>"
+        policy = TransformersWrapper(
+            AutoModelForCausalLM.from_pretrained(model_name).cuda(),
+            tokenizer=tokenizer,
+            from_text=True,
+            generate=True,
+            device="cuda:0",
+            generate_kwargs={
+                # "temperature": 0.8,
+                # "repetition_penalty": 1.5,
+                "max_new_tokens": 1024
+            },
+        )
+
+        env = SerialEnv(
+            1,
+            [
+                partial(
+                    make_mlgym,
+                    task="prisonersDilemma",
+                    tokenizer=tokenizer,
+                    device="cuda:0",
+                )
+            ],
+        )
+        rollout = env.rollout(3, policy)
+        torchrl_logger.info(f"{rollout=}")
+        env.check_env_specs(break_when_any_done="both")
+
+    def test_mlgym_task_reset(self):
+        from transformers import AutoModelForCausalLM, AutoTokenizer
+
+        model_name = "Qwen/Qwen2.5-7B-Instruct"
+        tokenizer = AutoTokenizer.from_pretrained(model_name)
+        tokenizer.eos_token = "<|im_end|>"
+        policy = TransformersWrapper(
+            AutoModelForCausalLM.from_pretrained(model_name).cuda(),
+            tokenizer=tokenizer,
+            from_text=True,
+            generate=True,
+            device="cuda:0",
+            generate_kwargs={
+                # "temperature": 0.8,
+                # "repetition_penalty": 1.5,
+                "max_new_tokens": 1024
+            },
+        )
+
+        env = SerialEnv(
+            1,
+            [
+                partial(
+                    make_mlgym,
+                    tasks=[
+                        "prisonersDilemma",
+                        "regressionKaggleHousePrice",
+                        "battleOfSexes",
+                    ],
+                    tokenizer=tokenizer,
+                    device="cuda:0",
+                )
+            ],
+        )
+        # We should get at least two tasks
+        rollout = env.rollout(100, policy, break_when_any_done=False)
+        torchrl_logger.info(f"{rollout=}")
+        torchrl_logger.info(rollout["task"])
+
+    def test_mlgym_wrong_format(self):
+        # A vanilla policy will not output anything useful, yet the env should run without error
+        ...
+
+
+if __name__ == "__main__":
+    args, unknown = argparse.ArgumentParser().parse_known_args()
+    pytest.main([__file__, "--capture", "no", "--exitfirst"] + unknown)
Original file line number	Diff line number	Diff line change
`@@ -2,4 +2,4 @@`
`2`	`2`
`3`	`3`	`pip install --upgrade setuptools`
`4`	`4`
`5`		`-export TORCHRL_BUILD_VERSION=0.8.0`
	`5`	`+export TORCHRL_BUILD_VERSION=0.9.0`