From 956bdd1796485701f614404a93958999c2e103df Mon Sep 17 00:00:00 2001 From: jainapurva Date: Mon, 28 Oct 2024 16:41:23 -0700 Subject: [PATCH 1/2] Move files from quantization/prototype -> prototype/quantization --- .../test_mixed_precision.py | 2 +- torchao/prototype/quantization/__init__.py | 0 .../quantization}/mixed_precision/README.md | 0 .../quantization/mixed_precision/__init__.py | 0 .../scripts/BO_acc_modelsize.py | 0 .../scripts/BO_acc_throughput.py | 0 .../scripts/Llama3-8B_initial_samples.json | 0 .../scripts/Llama3-8B_parameters.json | 0 .../scripts/Mistral-7B_initial_samples.json | 0 .../scripts/Mistral-7B_parameters.json | 0 .../mixed_precision/scripts/__init__.py | 1 + .../mixed_precision/scripts/fit.py | 0 .../mixed_precision/scripts/hessian_grad.py | 0 .../mixed_precision/scripts/hessian_vhp.py | 0 .../mixed_precision/scripts/mp_quant_eval.py | 0 .../mixed_precision/scripts/naive_intNwo.py | 62 ++++++++++++++++++ .../mixed_precision/scripts/utils.py | 0 .../mixed_precision/scripts/naive_intNwo.py | 63 +------------------ 18 files changed, 65 insertions(+), 63 deletions(-) rename test/{quantization => prototype}/test_mixed_precision.py (95%) create mode 100644 torchao/prototype/quantization/__init__.py rename torchao/{quantization/prototype => prototype/quantization}/mixed_precision/README.md (100%) create mode 100644 torchao/prototype/quantization/mixed_precision/__init__.py rename torchao/{quantization/prototype => prototype/quantization}/mixed_precision/scripts/BO_acc_modelsize.py (100%) rename torchao/{quantization/prototype => prototype/quantization}/mixed_precision/scripts/BO_acc_throughput.py (100%) rename torchao/{quantization/prototype => prototype/quantization}/mixed_precision/scripts/Llama3-8B_initial_samples.json (100%) rename torchao/{quantization/prototype => prototype/quantization}/mixed_precision/scripts/Llama3-8B_parameters.json (100%) rename torchao/{quantization/prototype => prototype/quantization}/mixed_precision/scripts/Mistral-7B_initial_samples.json (100%) rename torchao/{quantization/prototype => prototype/quantization}/mixed_precision/scripts/Mistral-7B_parameters.json (100%) create mode 100644 torchao/prototype/quantization/mixed_precision/scripts/__init__.py rename torchao/{quantization/prototype => prototype/quantization}/mixed_precision/scripts/fit.py (100%) rename torchao/{quantization/prototype => prototype/quantization}/mixed_precision/scripts/hessian_grad.py (100%) rename torchao/{quantization/prototype => prototype/quantization}/mixed_precision/scripts/hessian_vhp.py (100%) rename torchao/{quantization/prototype => prototype/quantization}/mixed_precision/scripts/mp_quant_eval.py (100%) create mode 100644 torchao/prototype/quantization/mixed_precision/scripts/naive_intNwo.py rename torchao/{quantization/prototype => prototype/quantization}/mixed_precision/scripts/utils.py (100%) diff --git a/test/quantization/test_mixed_precision.py b/test/prototype/test_mixed_precision.py similarity index 95% rename from test/quantization/test_mixed_precision.py rename to test/prototype/test_mixed_precision.py index 8afd022d3c..bfcd7bed2b 100644 --- a/test/quantization/test_mixed_precision.py +++ b/test/prototype/test_mixed_precision.py @@ -4,7 +4,7 @@ import torch.nn as nn from torchao.quantization import quantize_, int8_weight_only, int4_weight_only from torchao.quantization.utils import compute_error -from torchao.quantization.prototype.mixed_precision.scripts.naive_intNwo import intN_weight_only +from torchao.prototype.quantization.mixed_precision.scripts.naive_intNwo import intN_weight_only _CUDA_IS_AVAILABLE = torch.cuda.is_available() diff --git a/torchao/prototype/quantization/__init__.py b/torchao/prototype/quantization/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/torchao/quantization/prototype/mixed_precision/README.md b/torchao/prototype/quantization/mixed_precision/README.md similarity index 100% rename from torchao/quantization/prototype/mixed_precision/README.md rename to torchao/prototype/quantization/mixed_precision/README.md diff --git a/torchao/prototype/quantization/mixed_precision/__init__.py b/torchao/prototype/quantization/mixed_precision/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/torchao/quantization/prototype/mixed_precision/scripts/BO_acc_modelsize.py b/torchao/prototype/quantization/mixed_precision/scripts/BO_acc_modelsize.py similarity index 100% rename from torchao/quantization/prototype/mixed_precision/scripts/BO_acc_modelsize.py rename to torchao/prototype/quantization/mixed_precision/scripts/BO_acc_modelsize.py diff --git a/torchao/quantization/prototype/mixed_precision/scripts/BO_acc_throughput.py b/torchao/prototype/quantization/mixed_precision/scripts/BO_acc_throughput.py similarity index 100% rename from torchao/quantization/prototype/mixed_precision/scripts/BO_acc_throughput.py rename to torchao/prototype/quantization/mixed_precision/scripts/BO_acc_throughput.py diff --git a/torchao/quantization/prototype/mixed_precision/scripts/Llama3-8B_initial_samples.json b/torchao/prototype/quantization/mixed_precision/scripts/Llama3-8B_initial_samples.json similarity index 100% rename from torchao/quantization/prototype/mixed_precision/scripts/Llama3-8B_initial_samples.json rename to torchao/prototype/quantization/mixed_precision/scripts/Llama3-8B_initial_samples.json diff --git a/torchao/quantization/prototype/mixed_precision/scripts/Llama3-8B_parameters.json b/torchao/prototype/quantization/mixed_precision/scripts/Llama3-8B_parameters.json similarity index 100% rename from torchao/quantization/prototype/mixed_precision/scripts/Llama3-8B_parameters.json rename to torchao/prototype/quantization/mixed_precision/scripts/Llama3-8B_parameters.json diff --git a/torchao/quantization/prototype/mixed_precision/scripts/Mistral-7B_initial_samples.json b/torchao/prototype/quantization/mixed_precision/scripts/Mistral-7B_initial_samples.json similarity index 100% rename from torchao/quantization/prototype/mixed_precision/scripts/Mistral-7B_initial_samples.json rename to torchao/prototype/quantization/mixed_precision/scripts/Mistral-7B_initial_samples.json diff --git a/torchao/quantization/prototype/mixed_precision/scripts/Mistral-7B_parameters.json b/torchao/prototype/quantization/mixed_precision/scripts/Mistral-7B_parameters.json similarity index 100% rename from torchao/quantization/prototype/mixed_precision/scripts/Mistral-7B_parameters.json rename to torchao/prototype/quantization/mixed_precision/scripts/Mistral-7B_parameters.json diff --git a/torchao/prototype/quantization/mixed_precision/scripts/__init__.py b/torchao/prototype/quantization/mixed_precision/scripts/__init__.py new file mode 100644 index 0000000000..1b0cae6ab3 --- /dev/null +++ b/torchao/prototype/quantization/mixed_precision/scripts/__init__.py @@ -0,0 +1 @@ +from .naive_intNwo import intN_weight_only diff --git a/torchao/quantization/prototype/mixed_precision/scripts/fit.py b/torchao/prototype/quantization/mixed_precision/scripts/fit.py similarity index 100% rename from torchao/quantization/prototype/mixed_precision/scripts/fit.py rename to torchao/prototype/quantization/mixed_precision/scripts/fit.py diff --git a/torchao/quantization/prototype/mixed_precision/scripts/hessian_grad.py b/torchao/prototype/quantization/mixed_precision/scripts/hessian_grad.py similarity index 100% rename from torchao/quantization/prototype/mixed_precision/scripts/hessian_grad.py rename to torchao/prototype/quantization/mixed_precision/scripts/hessian_grad.py diff --git a/torchao/quantization/prototype/mixed_precision/scripts/hessian_vhp.py b/torchao/prototype/quantization/mixed_precision/scripts/hessian_vhp.py similarity index 100% rename from torchao/quantization/prototype/mixed_precision/scripts/hessian_vhp.py rename to torchao/prototype/quantization/mixed_precision/scripts/hessian_vhp.py diff --git a/torchao/quantization/prototype/mixed_precision/scripts/mp_quant_eval.py b/torchao/prototype/quantization/mixed_precision/scripts/mp_quant_eval.py similarity index 100% rename from torchao/quantization/prototype/mixed_precision/scripts/mp_quant_eval.py rename to torchao/prototype/quantization/mixed_precision/scripts/mp_quant_eval.py diff --git a/torchao/prototype/quantization/mixed_precision/scripts/naive_intNwo.py b/torchao/prototype/quantization/mixed_precision/scripts/naive_intNwo.py new file mode 100644 index 0000000000..363bcb7b9c --- /dev/null +++ b/torchao/prototype/quantization/mixed_precision/scripts/naive_intNwo.py @@ -0,0 +1,62 @@ +import torch + +from torchao.quantization.quant_primitives import ( + MappingType, + ZeroPointDomain, +) + +from torchao.quantization import int8_weight_only, int4_weight_only +from torchao.quantization.quant_api import _get_linear_subclass_inserter + +def intN_weight_only(group_size=32, n=8, symmetric=False): + ''' + Apply int N-bit weight only quantization to a linear layer. + Args: + `group_size`: parameter for quantization, controls the granularity of quantization, smaller size is more fine grained, choices are [512, 256, 128, 64, 32] + `n`: number of bits to quantize to, choices are [8, 6, 5, 4, 3, 2] + Usage: + from torchao.quantization import quantize_ + quantize_(model, intN_weight_only(n=your_bit_choice, group_size=group_size), optional_filter_func_for_desired_layers_to_quantize) + ''' + # for asymmetric quantization + def apply_intN_weight_only_quant_asym(weight): + # avoid circular dependency + from torchao.dtypes import to_affine_quantized_intx + mapping_type = MappingType.ASYMMETRIC + block_size = (1, group_size) + target_dtype = torch.uint8 + quant_min = 0 + quant_max = 2**n-1 + eps = 1e-6 + preserve_zero = True + zero_point_dtype = torch.int64 + zero_point_domain = ZeroPointDomain.INT + return to_affine_quantized_intx(weight, mapping_type, block_size, target_dtype, quant_min, quant_max, eps, zero_point_dtype=zero_point_dtype)#, preserve_zero=preserve_zero,zero_point_domain=zero_point_domain) + + # for symmetric quantization + def apply_intN_weight_only_quant_sym(weight): + # avoid circular dependency + from torchao.dtypes import to_affine_quantized_intx + mapping_type = MappingType.SYMMETRIC + block_size = (1, group_size) + target_dtype = torch.int8 + quant_min = -2**(n-1) + quant_max = 2**(n-1)-1 + eps = 1e-6 + zero_point_dtype = torch.int64 + return to_affine_quantized_intx(weight, mapping_type, block_size, target_dtype, quant_min, quant_max, eps=eps, zero_point_dtype=zero_point_dtype) + + try: + assert n in [8, 6, 5, 4, 3, 2], "n must be one of [8, 6, 5, 4, 3, 2]" + if n == 8: + return int8_weight_only() + elif n == 4: + return int4_weight_only(group_size=group_size) + else: + if symmetric: + return _get_linear_subclass_inserter(apply_intN_weight_only_quant_sym) + else: + return _get_linear_subclass_inserter(apply_intN_weight_only_quant_asym) + except Exception as e: + raise + diff --git a/torchao/quantization/prototype/mixed_precision/scripts/utils.py b/torchao/prototype/quantization/mixed_precision/scripts/utils.py similarity index 100% rename from torchao/quantization/prototype/mixed_precision/scripts/utils.py rename to torchao/prototype/quantization/mixed_precision/scripts/utils.py diff --git a/torchao/quantization/prototype/mixed_precision/scripts/naive_intNwo.py b/torchao/quantization/prototype/mixed_precision/scripts/naive_intNwo.py index 363bcb7b9c..628ddfe27b 100644 --- a/torchao/quantization/prototype/mixed_precision/scripts/naive_intNwo.py +++ b/torchao/quantization/prototype/mixed_precision/scripts/naive_intNwo.py @@ -1,62 +1 @@ -import torch - -from torchao.quantization.quant_primitives import ( - MappingType, - ZeroPointDomain, -) - -from torchao.quantization import int8_weight_only, int4_weight_only -from torchao.quantization.quant_api import _get_linear_subclass_inserter - -def intN_weight_only(group_size=32, n=8, symmetric=False): - ''' - Apply int N-bit weight only quantization to a linear layer. - Args: - `group_size`: parameter for quantization, controls the granularity of quantization, smaller size is more fine grained, choices are [512, 256, 128, 64, 32] - `n`: number of bits to quantize to, choices are [8, 6, 5, 4, 3, 2] - Usage: - from torchao.quantization import quantize_ - quantize_(model, intN_weight_only(n=your_bit_choice, group_size=group_size), optional_filter_func_for_desired_layers_to_quantize) - ''' - # for asymmetric quantization - def apply_intN_weight_only_quant_asym(weight): - # avoid circular dependency - from torchao.dtypes import to_affine_quantized_intx - mapping_type = MappingType.ASYMMETRIC - block_size = (1, group_size) - target_dtype = torch.uint8 - quant_min = 0 - quant_max = 2**n-1 - eps = 1e-6 - preserve_zero = True - zero_point_dtype = torch.int64 - zero_point_domain = ZeroPointDomain.INT - return to_affine_quantized_intx(weight, mapping_type, block_size, target_dtype, quant_min, quant_max, eps, zero_point_dtype=zero_point_dtype)#, preserve_zero=preserve_zero,zero_point_domain=zero_point_domain) - - # for symmetric quantization - def apply_intN_weight_only_quant_sym(weight): - # avoid circular dependency - from torchao.dtypes import to_affine_quantized_intx - mapping_type = MappingType.SYMMETRIC - block_size = (1, group_size) - target_dtype = torch.int8 - quant_min = -2**(n-1) - quant_max = 2**(n-1)-1 - eps = 1e-6 - zero_point_dtype = torch.int64 - return to_affine_quantized_intx(weight, mapping_type, block_size, target_dtype, quant_min, quant_max, eps=eps, zero_point_dtype=zero_point_dtype) - - try: - assert n in [8, 6, 5, 4, 3, 2], "n must be one of [8, 6, 5, 4, 3, 2]" - if n == 8: - return int8_weight_only() - elif n == 4: - return int4_weight_only(group_size=group_size) - else: - if symmetric: - return _get_linear_subclass_inserter(apply_intN_weight_only_quant_sym) - else: - return _get_linear_subclass_inserter(apply_intN_weight_only_quant_asym) - except Exception as e: - raise - +from torchao.prototype.quantization.mixed_precision.scripts.naive_intNwo import intN_weight_only From a39f9f2af63e89b7e4b8294a3b0505ae6d5c9e67 Mon Sep 17 00:00:00 2001 From: jainapurva Date: Tue, 29 Oct 2024 11:12:26 -0700 Subject: [PATCH 2/2] Remove bc --- test/prototype/test_mixed_precision.py | 2 +- torchao/quantization/prototype/mixed_precision/__init__.py | 0 .../quantization/prototype/mixed_precision/scripts/__init__.py | 1 - .../prototype/mixed_precision/scripts/naive_intNwo.py | 1 - 4 files changed, 1 insertion(+), 3 deletions(-) delete mode 100644 torchao/quantization/prototype/mixed_precision/__init__.py delete mode 100644 torchao/quantization/prototype/mixed_precision/scripts/__init__.py delete mode 100644 torchao/quantization/prototype/mixed_precision/scripts/naive_intNwo.py diff --git a/test/prototype/test_mixed_precision.py b/test/prototype/test_mixed_precision.py index bfcd7bed2b..ce575722ee 100644 --- a/test/prototype/test_mixed_precision.py +++ b/test/prototype/test_mixed_precision.py @@ -4,7 +4,7 @@ import torch.nn as nn from torchao.quantization import quantize_, int8_weight_only, int4_weight_only from torchao.quantization.utils import compute_error -from torchao.prototype.quantization.mixed_precision.scripts.naive_intNwo import intN_weight_only +from torchao.prototype.quantization.mixed_precision.scripts import intN_weight_only _CUDA_IS_AVAILABLE = torch.cuda.is_available() diff --git a/torchao/quantization/prototype/mixed_precision/__init__.py b/torchao/quantization/prototype/mixed_precision/__init__.py deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/torchao/quantization/prototype/mixed_precision/scripts/__init__.py b/torchao/quantization/prototype/mixed_precision/scripts/__init__.py deleted file mode 100644 index 1b0cae6ab3..0000000000 --- a/torchao/quantization/prototype/mixed_precision/scripts/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from .naive_intNwo import intN_weight_only diff --git a/torchao/quantization/prototype/mixed_precision/scripts/naive_intNwo.py b/torchao/quantization/prototype/mixed_precision/scripts/naive_intNwo.py deleted file mode 100644 index 628ddfe27b..0000000000 --- a/torchao/quantization/prototype/mixed_precision/scripts/naive_intNwo.py +++ /dev/null @@ -1 +0,0 @@ -from torchao.prototype.quantization.mixed_precision.scripts.naive_intNwo import intN_weight_only