From 2d8ce523c009ba058fef0a4edb5da8e717fe23d4 Mon Sep 17 00:00:00 2001 From: Jesse Cai Date: Wed, 13 Nov 2024 19:17:16 -0500 Subject: [PATCH 01/23] Revert "pin nightlies to deal with std::badalloc (#1256)" This reverts commit 0e854ecbf75350d781fa60140ab1d3180a0404db. --- .github/workflows/regression_test.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/regression_test.yml b/.github/workflows/regression_test.yml index 975e0470f5..e34031f6c4 100644 --- a/.github/workflows/regression_test.yml +++ b/.github/workflows/regression_test.yml @@ -40,7 +40,7 @@ jobs: gpu-arch-version: "12.1" - name: CUDA Nightly runs-on: linux.g5.12xlarge.nvidia.gpu - torch-spec: '--pre torch==2.6.0.dev20241101 --index-url https://download.pytorch.org/whl/nightly/cu121' + torch-spec: '--pre torch --index-url https://download.pytorch.org/whl/nightly/cu121' gpu-arch-type: "cuda" gpu-arch-version: "12.1" @@ -61,7 +61,7 @@ jobs: gpu-arch-version: "" - name: CPU Nightly runs-on: linux.4xlarge - torch-spec: '--pre torch==2.6.0.dev20241101 --index-url https://download.pytorch.org/whl/nightly/cpu' + torch-spec: '--pre torch --index-url https://download.pytorch.org/whl/nightly/cpu' gpu-arch-type: "cpu" gpu-arch-version: "" From 47ce2090f6ea792262af9cd45d34bf30045760bc Mon Sep 17 00:00:00 2001 From: Jesse Cai Date: Thu, 14 Nov 2024 16:19:49 -0500 Subject: [PATCH 02/23] Update regression_test.yml --- .github/workflows/regression_test.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/regression_test.yml b/.github/workflows/regression_test.yml index e34031f6c4..e766d04950 100644 --- a/.github/workflows/regression_test.yml +++ b/.github/workflows/regression_test.yml @@ -65,7 +65,7 @@ jobs: gpu-arch-type: "cpu" gpu-arch-version: "" - uses: pytorch/test-infra/.github/workflows/linux_job.yml@main + uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main with: timeout: 120 runner: ${{ matrix.runs-on }} From a088402ca165a0035f03bcab9f8760679c18a0e4 Mon Sep 17 00:00:00 2001 From: Jesse Cai Date: Thu, 14 Nov 2024 16:39:51 -0500 Subject: [PATCH 03/23] Update regression_test.yml --- .github/workflows/regression_test.yml | 3 --- 1 file changed, 3 deletions(-) diff --git a/.github/workflows/regression_test.yml b/.github/workflows/regression_test.yml index e766d04950..46ec9e2ed4 100644 --- a/.github/workflows/regression_test.yml +++ b/.github/workflows/regression_test.yml @@ -74,9 +74,6 @@ jobs: script: | conda create -n venv python=3.9 -y conda activate venv - echo "::group::Install newer objcopy that supports --set-section-alignment" - yum install -y devtoolset-10-binutils - export PATH=/opt/rh/devtoolset-10/root/usr/bin/:$PATH python -m pip install --upgrade pip pip install ${{ matrix.torch-spec }} pip install -r dev-requirements.txt From 40a69c692cab9d9d77d6fa56bf82005a823f0b92 Mon Sep 17 00:00:00 2001 From: Jesse Cai Date: Mon, 18 Nov 2024 07:32:52 -0800 Subject: [PATCH 04/23] skip tests --- test/integration/test_integration.py | 6 ++++++ test/prototype/test_sparse_api.py | 1 + test/sparsity/test_fast_sparse_training.py | 2 ++ 3 files changed, 9 insertions(+) diff --git a/test/integration/test_integration.py b/test/integration/test_integration.py index 92d2dcd5c2..d4c0dce169 100644 --- a/test/integration/test_integration.py +++ b/test/integration/test_integration.py @@ -803,6 +803,8 @@ def test_aq_float8_dynamic_quant_tensorwise_scaling_subclass(self, device, dtype @unittest.skipIf(not TORCH_VERSION_AT_LEAST_2_3, "int4 requires torch nightly.") # @unittest.skipIf(TORCH_VERSION_AT_LEAST_2_5, "int4 skipping 2.5+ for now") def test_int4_weight_only_quant_subclass(self, device, dtype): + if device == "cpu": + self.skipTest(f"Temporarily skipping for {device}") if dtype != torch.bfloat16: self.skipTest(f"Fails for {dtype}") for test_shape in ([(16, 1024, 16)] + ([(1, 1024, 8)] if device=='cuda' else [])): @@ -896,6 +898,8 @@ def test_int8_weight_only_quant_with_freeze(self, device, dtype): @unittest.skipIf(not TORCH_VERSION_AT_LEAST_2_3, "int4 requires torch nightly.") # @unittest.skipIf(TORCH_VERSION_AT_LEAST_2_5, "int4 skipping 2.5+ for now") def test_int4_weight_only_quant_subclass_api(self, device, dtype): + if device == "cpu": + self.skipTest(f"Temporarily skipping for {device}") if dtype != torch.bfloat16: self.skipTest(f"Fails for {dtype}") for test_shape in ([(16, 1024, 16)] + ([(1, 1024, 256)] if device=='cuda' else [])): @@ -911,6 +915,8 @@ def test_int4_weight_only_quant_subclass_api(self, device, dtype): @unittest.skipIf(not TORCH_VERSION_AT_LEAST_2_3, "int4 requires torch nightly.") # @unittest.skipIf(TORCH_VERSION_AT_LEAST_2_5, "int4 skipping 2.5+ for now") def test_int4_weight_only_quant_subclass_api_grouped(self, device, dtype): + if device == "cpu": + self.skipTest(f"Temporarily skipping for {device}") if dtype != torch.bfloat16: self.skipTest(f"Fails for {dtype}") for test_shape in ([(256, 256, 16)] + ([(256, 256, 8)] if device=='cuda' else [])): diff --git a/test/prototype/test_sparse_api.py b/test/prototype/test_sparse_api.py index baf224e169..866793f7d0 100644 --- a/test/prototype/test_sparse_api.py +++ b/test/prototype/test_sparse_api.py @@ -31,6 +31,7 @@ class TestSemiStructuredSparse(common_utils.TestCase): @unittest.skipIf(not TORCH_VERSION_AT_LEAST_2_3, "pytorch 2.3+ feature") @unittest.skipIf(not torch.cuda.is_available(), "Need CUDA available") + @unittest.skipTest("Temporarily skipping to unpin nightlies") def test_sparse(self): input = torch.rand((128, 128)).half().cuda() model = ( diff --git a/test/sparsity/test_fast_sparse_training.py b/test/sparsity/test_fast_sparse_training.py index 2779d37293..5cdedc57ac 100644 --- a/test/sparsity/test_fast_sparse_training.py +++ b/test/sparsity/test_fast_sparse_training.py @@ -31,6 +31,7 @@ class TestRuntimeSemiStructuredSparsity(TestCase): @unittest.skipIf(not TORCH_VERSION_AT_LEAST_2_4, "pytorch 2.4+ feature") @unittest.skipIf(not torch.cuda.is_available(), "Need CUDA available") @unittest.skipIf(is_fbcode(), "broken in fbcode") + @unittest.skipTest("Temporarily skipping to unpin nightlies") def test_runtime_weight_sparsification(self): # need this import inside to not break 2.2 tests from torch.sparse import SparseSemiStructuredTensorCUSPARSELT @@ -72,6 +73,7 @@ def test_runtime_weight_sparsification(self): @unittest.skipIf(not TORCH_VERSION_AT_LEAST_2_4, "pytorch 2.4+ feature") @unittest.skipIf(not torch.cuda.is_available(), "Need CUDA available") @unittest.skipIf(is_fbcode(), "broken in fbcode") + @unittest.skipTest("Temporarily skipping to unpin nightlies") def test_runtime_weight_sparsification_compile(self): # need this import inside to not break 2.2 tests from torch.sparse import SparseSemiStructuredTensorCUSPARSELT From 66e11fcfba906ce7680ced77fdef825325324497 Mon Sep 17 00:00:00 2001 From: Jesse Cai Date: Mon, 18 Nov 2024 08:09:57 -0800 Subject: [PATCH 05/23] update --- test/dtypes/test_affine_quantized.py | 2 ++ test/integration/test_integration.py | 4 ++++ test/prototype/test_sparse_api.py | 2 +- test/sparsity/test_fast_sparse_training.py | 4 ++-- 4 files changed, 9 insertions(+), 3 deletions(-) diff --git a/test/dtypes/test_affine_quantized.py b/test/dtypes/test_affine_quantized.py index dd7e679e56..3b0e4ef3e2 100644 --- a/test/dtypes/test_affine_quantized.py +++ b/test/dtypes/test_affine_quantized.py @@ -143,6 +143,8 @@ class TestAffineQuantizedBasic(TestCase): @common_utils.parametrize("device", COMMON_DEVICES) @common_utils.parametrize("dtype", COMMON_DTYPES) def test_flatten_unflatten(self, apply_quant, device, dtype): + if device == "cpu": + self.skipTest(f"Temporarily skipping for {device}") l = torch.nn.Linear(128, 256, dtype=dtype, device=device) ql = apply_quant(l) lp_tensor = ql.weight diff --git a/test/integration/test_integration.py b/test/integration/test_integration.py index d4c0dce169..9595442a8e 100644 --- a/test/integration/test_integration.py +++ b/test/integration/test_integration.py @@ -662,6 +662,8 @@ def test_dequantize_int8_weight_only_quant_subclass(self, device, dtype): @unittest.skipIf(not TORCH_VERSION_AT_LEAST_2_3, "int4 requires torch nightly.") # @unittest.skipIf(TORCH_VERSION_AT_LEAST_2_5, "int4 skipping 2.5+ for now") def test_dequantize_int4_weight_only_quant_subclass(self, device, dtype): + if device == "cpu": + self.skipTest(f"Temporarily skipping for {device}") if dtype != torch.bfloat16: self.skipTest("Currently only supports bfloat16.") for test_shape in ([(16, 1024, 16)] + ([(1, 1024, 8)] if device=='cuda' else [])): @@ -673,6 +675,8 @@ def test_dequantize_int4_weight_only_quant_subclass(self, device, dtype): @unittest.skipIf(not TORCH_VERSION_AT_LEAST_2_3, "int4 requires torch nightly.") # @unittest.skipIf(TORCH_VERSION_AT_LEAST_2_5, "int4 skipping 2.5+ for now") def test_dequantize_int4_weight_only_quant_subclass_grouped(self, device, dtype): + if device == "cpu": + self.skipTest(f"Temporarily skipping for {device}") if dtype != torch.bfloat16: self.skipTest("Currently only supports bfloat16.") m_shapes = [16, 256] + ([1] if device=="cuda" else []) diff --git a/test/prototype/test_sparse_api.py b/test/prototype/test_sparse_api.py index 866793f7d0..1bbd1823db 100644 --- a/test/prototype/test_sparse_api.py +++ b/test/prototype/test_sparse_api.py @@ -31,7 +31,7 @@ class TestSemiStructuredSparse(common_utils.TestCase): @unittest.skipIf(not TORCH_VERSION_AT_LEAST_2_3, "pytorch 2.3+ feature") @unittest.skipIf(not torch.cuda.is_available(), "Need CUDA available") - @unittest.skipTest("Temporarily skipping to unpin nightlies") + @unittest.skip("Temporarily skipping to unpin nightlies") def test_sparse(self): input = torch.rand((128, 128)).half().cuda() model = ( diff --git a/test/sparsity/test_fast_sparse_training.py b/test/sparsity/test_fast_sparse_training.py index 5cdedc57ac..f2d5686fd3 100644 --- a/test/sparsity/test_fast_sparse_training.py +++ b/test/sparsity/test_fast_sparse_training.py @@ -31,7 +31,7 @@ class TestRuntimeSemiStructuredSparsity(TestCase): @unittest.skipIf(not TORCH_VERSION_AT_LEAST_2_4, "pytorch 2.4+ feature") @unittest.skipIf(not torch.cuda.is_available(), "Need CUDA available") @unittest.skipIf(is_fbcode(), "broken in fbcode") - @unittest.skipTest("Temporarily skipping to unpin nightlies") + @unittest.skip("Temporarily skipping to unpin nightlies") def test_runtime_weight_sparsification(self): # need this import inside to not break 2.2 tests from torch.sparse import SparseSemiStructuredTensorCUSPARSELT @@ -73,7 +73,7 @@ def test_runtime_weight_sparsification(self): @unittest.skipIf(not TORCH_VERSION_AT_LEAST_2_4, "pytorch 2.4+ feature") @unittest.skipIf(not torch.cuda.is_available(), "Need CUDA available") @unittest.skipIf(is_fbcode(), "broken in fbcode") - @unittest.skipTest("Temporarily skipping to unpin nightlies") + @unittest.skip("Temporarily skipping to unpin nightlies") def test_runtime_weight_sparsification_compile(self): # need this import inside to not break 2.2 tests from torch.sparse import SparseSemiStructuredTensorCUSPARSELT From c42e780c74dbfa73856e974e0388635e1db28fe9 Mon Sep 17 00:00:00 2001 From: Jesse Cai Date: Mon, 18 Nov 2024 09:35:54 -0800 Subject: [PATCH 06/23] work --- test/prototype/test_awq.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/test/prototype/test_awq.py b/test/prototype/test_awq.py index eccf8db8f6..547a1afc83 100644 --- a/test/prototype/test_awq.py +++ b/test/prototype/test_awq.py @@ -30,6 +30,8 @@ def forward(self, x): qdtypes = (torch.uint4, torch.uint7) else: qdtypes = () + +torch.serialization.add_safe_globals([ToyLinearModel]) @pytest.fixture(autouse=True) def run_before_and_after_tests(): @@ -126,4 +128,4 @@ def test_save_weights_only(): assert awq_out is not None assert awq_save_load_out is not None - assert torch.allclose(awq_out, awq_save_load_out, atol = 1e-2) \ No newline at end of file + assert torch.allclose(awq_out, awq_save_load_out, atol = 1e-2) From eaff4249a4e0a3c7b7490c2b597fce3b5a35fb19 Mon Sep 17 00:00:00 2001 From: Jesse Cai Date: Mon, 18 Nov 2024 09:40:52 -0800 Subject: [PATCH 07/23] fix --- test/prototype/test_awq.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/test/prototype/test_awq.py b/test/prototype/test_awq.py index 547a1afc83..cc5f2401f9 100644 --- a/test/prototype/test_awq.py +++ b/test/prototype/test_awq.py @@ -4,7 +4,7 @@ import torch from torchao.quantization import quantize_ -from torchao.utils import TORCH_VERSION_AT_LEAST_2_3, TORCH_VERSION_AT_LEAST_2_5 +from torchao.utils import TORCH_VERSION_AT_LEAST_2_3, TORCH_VERSION_AT_LEAST_2_5, TORCH_VERSION_AT_LEAST_2_6, if TORCH_VERSION_AT_LEAST_2_3: from torchao.prototype.awq import insert_awq_observer_, awq_uintx, AWQObservedLinear @@ -31,7 +31,9 @@ def forward(self, x): else: qdtypes = () -torch.serialization.add_safe_globals([ToyLinearModel]) +# load with weights_only=True +if TORCH_VERSION_AT_LEAST_2_6: + torch.serialization.add_safe_globals([ToyLinearModel]) @pytest.fixture(autouse=True) def run_before_and_after_tests(): From 4af3cf84169d889218ebdcb6c74e43e69d50f76c Mon Sep 17 00:00:00 2001 From: Jesse Cai Date: Mon, 18 Nov 2024 09:44:23 -0800 Subject: [PATCH 08/23] fix lint --- test/prototype/test_awq.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/prototype/test_awq.py b/test/prototype/test_awq.py index cc5f2401f9..d827059b23 100644 --- a/test/prototype/test_awq.py +++ b/test/prototype/test_awq.py @@ -4,7 +4,7 @@ import torch from torchao.quantization import quantize_ -from torchao.utils import TORCH_VERSION_AT_LEAST_2_3, TORCH_VERSION_AT_LEAST_2_5, TORCH_VERSION_AT_LEAST_2_6, +from torchao.utils import TORCH_VERSION_AT_LEAST_2_3, TORCH_VERSION_AT_LEAST_2_5, TORCH_VERSION_AT_LEAST_2_6 if TORCH_VERSION_AT_LEAST_2_3: from torchao.prototype.awq import insert_awq_observer_, awq_uintx, AWQObservedLinear From a0806a35cc0dbdc165bcd408c69a83d950341397 Mon Sep 17 00:00:00 2001 From: Jesse Cai Date: Mon, 18 Nov 2024 14:02:54 -0500 Subject: [PATCH 09/23] Update test_awq.py --- test/prototype/test_awq.py | 1 + 1 file changed, 1 insertion(+) diff --git a/test/prototype/test_awq.py b/test/prototype/test_awq.py index d827059b23..aaeb80af7f 100644 --- a/test/prototype/test_awq.py +++ b/test/prototype/test_awq.py @@ -34,6 +34,7 @@ def forward(self, x): # load with weights_only=True if TORCH_VERSION_AT_LEAST_2_6: torch.serialization.add_safe_globals([ToyLinearModel]) + torch.serialization.add_safe_globals([Linear]) @pytest.fixture(autouse=True) def run_before_and_after_tests(): From 680130582b68c2a65540d45f72b5fda2c9e16a2f Mon Sep 17 00:00:00 2001 From: Jesse Cai Date: Mon, 18 Nov 2024 16:33:24 -0500 Subject: [PATCH 10/23] Update test_awq.py --- test/prototype/test_awq.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/prototype/test_awq.py b/test/prototype/test_awq.py index aaeb80af7f..206dd33713 100644 --- a/test/prototype/test_awq.py +++ b/test/prototype/test_awq.py @@ -34,7 +34,7 @@ def forward(self, x): # load with weights_only=True if TORCH_VERSION_AT_LEAST_2_6: torch.serialization.add_safe_globals([ToyLinearModel]) - torch.serialization.add_safe_globals([Linear]) + torch.serialization.add_safe_globals([torch.nn.Linear]) @pytest.fixture(autouse=True) def run_before_and_after_tests(): From 43adaec3ce0e38563cfc048192ecbcb3e635012b Mon Sep 17 00:00:00 2001 From: Jesse Cai Date: Mon, 18 Nov 2024 16:55:20 -0500 Subject: [PATCH 11/23] Update regression_test.yml --- .github/workflows/regression_test.yml | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/.github/workflows/regression_test.yml b/.github/workflows/regression_test.yml index a1087db641..ee6ae1cfb6 100644 --- a/.github/workflows/regression_test.yml +++ b/.github/workflows/regression_test.yml @@ -70,11 +70,6 @@ jobs: torch-spec: 'torch==2.5.1 --index-url https://download.pytorch.org/whl/cu121' gpu-arch-type: "cuda" gpu-arch-version: "12.1" - - name: CUDA Nightly - runs-on: linux.g5.12xlarge.nvidia.gpu - torch-spec: '--pre torch --index-url https://download.pytorch.org/whl/nightly/cu121' - gpu-arch-type: "cuda" - gpu-arch-version: "12.1" - name: CPU 2.3 runs-on: linux.4xlarge torch-spec: 'torch==2.3.0 --index-url https://download.pytorch.org/whl/cpu' @@ -90,11 +85,6 @@ jobs: torch-spec: 'torch==2.5.1 --index-url https://download.pytorch.org/whl/cpu' gpu-arch-type: "cpu" gpu-arch-version: "" - - name: CPU Nightly - runs-on: linux.4xlarge - torch-spec: '--pre torch --index-url https://download.pytorch.org/whl/nightly/cpu' - gpu-arch-type: "cpu" - gpu-arch-version: "" uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main From 71e8088edaa86e0fda460bb41bf2983276941960 Mon Sep 17 00:00:00 2001 From: Jesse Cai Date: Mon, 18 Nov 2024 16:56:46 -0500 Subject: [PATCH 12/23] Update regression_test.yml --- .github/workflows/regression_test.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/regression_test.yml b/.github/workflows/regression_test.yml index ee6ae1cfb6..0aa738d1fc 100644 --- a/.github/workflows/regression_test.yml +++ b/.github/workflows/regression_test.yml @@ -86,8 +86,8 @@ jobs: gpu-arch-type: "cpu" gpu-arch-version: "" - - uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main + uses: pytorch/test-infra/.github/workflows/linux_job.yml@main + with: timeout: 120 runner: ${{ matrix.runs-on }} From 5d398a30efbd3b4913f1d62ef64de8ccdf756f33 Mon Sep 17 00:00:00 2001 From: Jesse Cai Date: Mon, 18 Nov 2024 16:57:38 -0500 Subject: [PATCH 13/23] Update regression_test.yml --- .github/workflows/regression_test.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.github/workflows/regression_test.yml b/.github/workflows/regression_test.yml index 0aa738d1fc..9f69edc140 100644 --- a/.github/workflows/regression_test.yml +++ b/.github/workflows/regression_test.yml @@ -96,6 +96,9 @@ jobs: script: | conda create -n venv python=3.9 -y conda activate venv + echo "::group::Install newer objcopy that supports --set-section-alignment" + yum install -y devtoolset-10-binutils + export PATH=/opt/rh/devtoolset-10/root/usr/bin/:$PATH python -m pip install --upgrade pip pip install ${{ matrix.torch-spec }} pip install -r dev-requirements.txt From ab43cd70a5124ab37760c09db3562683a4d74718 Mon Sep 17 00:00:00 2001 From: Jesse Cai Date: Mon, 18 Nov 2024 16:57:54 -0500 Subject: [PATCH 14/23] Update regression_test.yml --- .github/workflows/regression_test.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/.github/workflows/regression_test.yml b/.github/workflows/regression_test.yml index 9f69edc140..cf58e476c2 100644 --- a/.github/workflows/regression_test.yml +++ b/.github/workflows/regression_test.yml @@ -87,7 +87,6 @@ jobs: gpu-arch-version: "" uses: pytorch/test-infra/.github/workflows/linux_job.yml@main - with: timeout: 120 runner: ${{ matrix.runs-on }} From b2d7cbb26a99a0d3b8497d9349076a82b5584734 Mon Sep 17 00:00:00 2001 From: Jesse Cai Date: Mon, 18 Nov 2024 16:59:20 -0500 Subject: [PATCH 15/23] Update regression_test.yml --- .github/workflows/regression_test.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/regression_test.yml b/.github/workflows/regression_test.yml index cf58e476c2..f1104bf66a 100644 --- a/.github/workflows/regression_test.yml +++ b/.github/workflows/regression_test.yml @@ -25,12 +25,12 @@ jobs: include: - name: CUDA Nightly runs-on: linux.g5.12xlarge.nvidia.gpu - torch-spec: '--pre torch==2.6.0.dev20241101 --index-url https://download.pytorch.org/whl/nightly/cu121' + torch-spec: '--pre torch --index-url https://download.pytorch.org/whl/nightly/cu121' gpu-arch-type: "cuda" gpu-arch-version: "12.1" - name: CPU Nightly runs-on: linux.4xlarge - torch-spec: '--pre torch==2.6.0.dev20241101 --index-url https://download.pytorch.org/whl/nightly/cpu' + torch-spec: '--pre torch --index-url https://download.pytorch.org/whl/nightly/cpu' gpu-arch-type: "cpu" gpu-arch-version: "" From 8c168a1919e54617a90a3c1d5f9e1bc5af9b1915 Mon Sep 17 00:00:00 2001 From: Jesse Cai Date: Mon, 18 Nov 2024 18:56:16 -0500 Subject: [PATCH 16/23] Update test_awq.py --- test/prototype/test_awq.py | 1 + 1 file changed, 1 insertion(+) diff --git a/test/prototype/test_awq.py b/test/prototype/test_awq.py index 206dd33713..897dfc437c 100644 --- a/test/prototype/test_awq.py +++ b/test/prototype/test_awq.py @@ -35,6 +35,7 @@ def forward(self, x): if TORCH_VERSION_AT_LEAST_2_6: torch.serialization.add_safe_globals([ToyLinearModel]) torch.serialization.add_safe_globals([torch.nn.Linear]) + torch.serialization.add_safe_globals([torchao.dtypes.uintx.uintx_layout.UintxTensor]) @pytest.fixture(autouse=True) def run_before_and_after_tests(): From e7939268fe0fea314f893472a86efa5893dd3e90 Mon Sep 17 00:00:00 2001 From: Jesse Cai Date: Mon, 18 Nov 2024 22:21:01 -0500 Subject: [PATCH 17/23] Update test_awq.py --- test/prototype/test_awq.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/test/prototype/test_awq.py b/test/prototype/test_awq.py index 897dfc437c..5c77cc7c01 100644 --- a/test/prototype/test_awq.py +++ b/test/prototype/test_awq.py @@ -35,7 +35,8 @@ def forward(self, x): if TORCH_VERSION_AT_LEAST_2_6: torch.serialization.add_safe_globals([ToyLinearModel]) torch.serialization.add_safe_globals([torch.nn.Linear]) - torch.serialization.add_safe_globals([torchao.dtypes.uintx.uintx_layout.UintxTensor]) + from torchao.dtypes.uintx.uintx_layout import UintxTensor + torch.serialization.add_safe_globals([UintxTensor]) @pytest.fixture(autouse=True) def run_before_and_after_tests(): From 88cb4ac5563397b8857cb28c935571f1be78ea61 Mon Sep 17 00:00:00 2001 From: Jesse Cai Date: Tue, 19 Nov 2024 15:02:40 -0500 Subject: [PATCH 18/23] Update test_awq.py --- test/prototype/test_awq.py | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/test/prototype/test_awq.py b/test/prototype/test_awq.py index 5c77cc7c01..3756094eac 100644 --- a/test/prototype/test_awq.py +++ b/test/prototype/test_awq.py @@ -31,12 +31,6 @@ def forward(self, x): else: qdtypes = () -# load with weights_only=True -if TORCH_VERSION_AT_LEAST_2_6: - torch.serialization.add_safe_globals([ToyLinearModel]) - torch.serialization.add_safe_globals([torch.nn.Linear]) - from torchao.dtypes.uintx.uintx_layout import UintxTensor - torch.serialization.add_safe_globals([UintxTensor]) @pytest.fixture(autouse=True) def run_before_and_after_tests(): @@ -77,7 +71,7 @@ def test_awq_loading(device, qdtype): model_save_path = "awq_model.pth" torch.save(m, model_save_path) - loaded_model = torch.load(model_save_path) + loaded_model = torch.load(model_save_path. assign=True) os.remove(model_save_path) if torch.cuda.is_available(): From 38f46fcc8c11fbed4ce59a66825679b014ce956d Mon Sep 17 00:00:00 2001 From: Jesse Cai Date: Tue, 19 Nov 2024 15:03:03 -0500 Subject: [PATCH 19/23] Update test_awq.py --- test/prototype/test_awq.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/prototype/test_awq.py b/test/prototype/test_awq.py index 3756094eac..cddb83988b 100644 --- a/test/prototype/test_awq.py +++ b/test/prototype/test_awq.py @@ -71,7 +71,7 @@ def test_awq_loading(device, qdtype): model_save_path = "awq_model.pth" torch.save(m, model_save_path) - loaded_model = torch.load(model_save_path. assign=True) + loaded_model = torch.load(model_save_path, assign=True) os.remove(model_save_path) if torch.cuda.is_available(): From d015491d3542c5871a016399d110556d5c53e1f4 Mon Sep 17 00:00:00 2001 From: Jesse Cai Date: Tue, 19 Nov 2024 17:40:54 -0500 Subject: [PATCH 20/23] Update test_awq.py --- test/prototype/test_awq.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/prototype/test_awq.py b/test/prototype/test_awq.py index cddb83988b..ab4ebeeb6d 100644 --- a/test/prototype/test_awq.py +++ b/test/prototype/test_awq.py @@ -113,7 +113,7 @@ def test_save_weights_only(): # quantize is_observed_linear = lambda m, fqn: isinstance(m, AWQObservedLinear) quantize_(m, awq_uintx(quant_dtype = quant_dtype, group_size = group_size), is_observed_linear) - + model_save_path = "awq_model.pth" torch.save(m.state_dict(), model_save_path) m2.load_state_dict(torch.load(model_save_path), assign=True) # load weights only.torch.load(model_save_path) From 59357b94577152b33b954020768955e03137035b Mon Sep 17 00:00:00 2001 From: Jesse Cai Date: Tue, 19 Nov 2024 17:51:07 -0500 Subject: [PATCH 21/23] Update test_awq.py --- test/prototype/test_awq.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/test/prototype/test_awq.py b/test/prototype/test_awq.py index ab4ebeeb6d..8b196cbe4f 100644 --- a/test/prototype/test_awq.py +++ b/test/prototype/test_awq.py @@ -87,6 +87,7 @@ def test_awq_loading(device, qdtype): @pytest.mark.skipif(not TORCH_VERSION_AT_LEAST_2_5,reason="requires nightly pytorch") @pytest.mark.skipif(not torch.cuda.is_available(), reason="CUDA not available") +@pytest.mark.skip("Temporarily skipping to unpin nightlies") def test_save_weights_only(): dataset_size = 100 l1,l2,l3 = 512,256,128 @@ -113,10 +114,10 @@ def test_save_weights_only(): # quantize is_observed_linear = lambda m, fqn: isinstance(m, AWQObservedLinear) quantize_(m, awq_uintx(quant_dtype = quant_dtype, group_size = group_size), is_observed_linear) - + model_save_path = "awq_model.pth" torch.save(m.state_dict(), model_save_path) - m2.load_state_dict(torch.load(model_save_path), assign=True) # load weights only.torch.load(model_save_path) + m2.load_state_dict(torch.load(model_save_path, weights_only=False), assign=True) # load weights only.torch.load(model_save_path) os.remove(model_save_path) m = torch.compile(m, fullgraph=True) From 08b813f106ae7f1ea7f9a0809bc516b32ba489f7 Mon Sep 17 00:00:00 2001 From: Jesse Cai Date: Tue, 19 Nov 2024 18:05:37 -0500 Subject: [PATCH 22/23] Update test_awq.py --- test/prototype/test_awq.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/test/prototype/test_awq.py b/test/prototype/test_awq.py index 8b196cbe4f..40845b8336 100644 --- a/test/prototype/test_awq.py +++ b/test/prototype/test_awq.py @@ -4,7 +4,7 @@ import torch from torchao.quantization import quantize_ -from torchao.utils import TORCH_VERSION_AT_LEAST_2_3, TORCH_VERSION_AT_LEAST_2_5, TORCH_VERSION_AT_LEAST_2_6 +from torchao.utils import TORCH_VERSION_AT_LEAST_2_3, TORCH_VERSION_AT_LEAST_2_5 if TORCH_VERSION_AT_LEAST_2_3: from torchao.prototype.awq import insert_awq_observer_, awq_uintx, AWQObservedLinear @@ -30,7 +30,6 @@ def forward(self, x): qdtypes = (torch.uint4, torch.uint7) else: qdtypes = () - @pytest.fixture(autouse=True) def run_before_and_after_tests(): @@ -41,6 +40,7 @@ def run_before_and_after_tests(): @pytest.mark.parametrize("qdtype", qdtypes) @pytest.mark.skipif(not torch.cuda.is_available(), reason="CUDA not available") @pytest.mark.skipif(not TORCH_VERSION_AT_LEAST_2_5,reason="requires nightly pytorch") +@pytest.mark.skip("Temporarily skipping to unpin nightiles") def test_awq_loading(device, qdtype): if qdtype == torch.uint4 and device == "cpu": pytest.skip("uint4 not supported on cpu") @@ -71,7 +71,7 @@ def test_awq_loading(device, qdtype): model_save_path = "awq_model.pth" torch.save(m, model_save_path) - loaded_model = torch.load(model_save_path, assign=True) + loaded_model = torch.load(model_save_path) os.remove(model_save_path) if torch.cuda.is_available(): @@ -87,7 +87,6 @@ def test_awq_loading(device, qdtype): @pytest.mark.skipif(not TORCH_VERSION_AT_LEAST_2_5,reason="requires nightly pytorch") @pytest.mark.skipif(not torch.cuda.is_available(), reason="CUDA not available") -@pytest.mark.skip("Temporarily skipping to unpin nightlies") def test_save_weights_only(): dataset_size = 100 l1,l2,l3 = 512,256,128 From d722cf22060dfbc98bcabb3a7cb84b2f03487bcc Mon Sep 17 00:00:00 2001 From: Jesse Cai Date: Tue, 19 Nov 2024 18:06:05 -0500 Subject: [PATCH 23/23] Update test_awq.py --- test/prototype/test_awq.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/prototype/test_awq.py b/test/prototype/test_awq.py index 40845b8336..3663e027c7 100644 --- a/test/prototype/test_awq.py +++ b/test/prototype/test_awq.py @@ -116,7 +116,7 @@ def test_save_weights_only(): model_save_path = "awq_model.pth" torch.save(m.state_dict(), model_save_path) - m2.load_state_dict(torch.load(model_save_path, weights_only=False), assign=True) # load weights only.torch.load(model_save_path) + m2.load_state_dict(torch.load(model_save_path), assign=True) # load weights only.torch.load(model_save_path) os.remove(model_save_path) m = torch.compile(m, fullgraph=True)