@@ -146,12 +146,12 @@ def preprocess_function(examples, tokenizer):
146146class OVWeightCompressionTest (unittest .TestCase ):
147147 # TODO : add models
148148 SUPPORTED_ARCHITECTURES_WITH_EXPECTED_COMPRESSED_MATMULS = (
149- (OVModelForSequenceClassification , "hf-internal-testing/tiny-random-bert" , 39 ),
150- (OVModelForCausalLM , "hf-internal-testing/tiny-random-gpt2" , 5 ),
149+ (OVModelForSequenceClassification , "hf-internal-testing/tiny-random-bert" , 39 , 35 ),
150+ (OVModelForCausalLM , "hf-internal-testing/tiny-random-gpt2" , 5 , 23 ),
151151 )
152152
153153 @parameterized .expand (SUPPORTED_ARCHITECTURES_WITH_EXPECTED_COMPRESSED_MATMULS )
154- def test_automodel_weight_compression (self , model_cls , model_name , expected_int8 ):
154+ def test_automodel_weight_compression (self , model_cls , model_name , expected_pt_int8 , expected_ov_int8 ):
155155 task = model_cls .export_feature
156156
157157 with tempfile .TemporaryDirectory () as tmp_dir :
@@ -166,7 +166,7 @@ def test_automodel_weight_compression(self, model_cls, model_name, expected_int8
166166
167167 # TODO: uncomment once move to a newer version of NNCF which has some fixes
168168 _ , num_int8 = get_num_quantized_nodes (model )
169- self .assertEqual (expected_int8 , num_int8 )
169+ self .assertEqual (expected_pt_int8 , num_int8 )
170170
171171 tokens = tokenizer ("This is a sample input" , return_tensors = "pt" )
172172 outputs = model (** tokens )
@@ -177,6 +177,27 @@ def test_automodel_weight_compression(self, model_cls, model_name, expected_int8
177177 loaded_config = OVConfig .from_pretrained (tmp_dir )
178178 self .assertEqual (expected_config .to_dict ()["compression" ], loaded_config .to_dict ()["compression" ])
179179
180+ @parameterized .expand (SUPPORTED_ARCHITECTURES_WITH_EXPECTED_COMPRESSED_MATMULS )
181+ def test_ovmodel_weight_compression (self , model_cls , model_name , expected_pt_int8 , expected_ov_int8 ):
182+ task = model_cls .export_feature
183+
184+ with tempfile .TemporaryDirectory () as tmp_dir :
185+ transformers_model = model_cls .from_pretrained (model_name , export = True )
186+ tokenizer = AutoTokenizer .from_pretrained (model_name )
187+ if tokenizer .pad_token is None :
188+ tokenizer .pad_token = tokenizer .eos_token
189+
190+ quantizer = OVQuantizer .from_pretrained (transformers_model , task = task )
191+ quantizer .quantize (save_directory = tmp_dir , weights_only = True )
192+ model = model_cls .from_pretrained (tmp_dir )
193+
194+ _ , num_int8 = get_num_quantized_nodes (model )
195+ self .assertEqual (expected_ov_int8 , num_int8 )
196+
197+ tokens = tokenizer ("This is a sample input" , return_tensors = "pt" )
198+ outputs = model (** tokens )
199+ self .assertTrue ("logits" in outputs )
200+
180201
181202class OVQuantizerQATest (unittest .TestCase ):
182203 SUPPORTED_ARCHITECTURES = (("hf-internal-testing/tiny-random-BertForQuestionAnswering" ,),)
0 commit comments