@@ -146,12 +146,12 @@ def preprocess_function(examples, tokenizer):
146146class OVWeightCompressionTest (unittest .TestCase ):
147147 # TODO : add models
148148 SUPPORTED_ARCHITECTURES_WITH_EXPECTED_COMPRESSED_MATMULS = (
149- (OVModelForSequenceClassification , "hf-internal-testing/tiny-random-bert" , 70 ),
150- (OVModelForCausalLM , "hf-internal-testing/tiny-random-gpt2" , 45 ),
149+ (OVModelForSequenceClassification , "hf-internal-testing/tiny-random-bert" , 70 , 35 ),
150+ (OVModelForCausalLM , "hf-internal-testing/tiny-random-gpt2" , 45 , 23 ),
151151 )
152152
153153 @parameterized .expand (SUPPORTED_ARCHITECTURES_WITH_EXPECTED_COMPRESSED_MATMULS )
154- def test_automodel_weight_compression (self , model_cls , model_name , expected_int8 ):
154+ def test_automodel_weight_compression (self , model_cls , model_name , expected_pt_int8 , expected_ov_int8 ):
155155 task = model_cls .export_feature
156156
157157 with tempfile .TemporaryDirectory () as tmp_dir :
@@ -166,7 +166,7 @@ def test_automodel_weight_compression(self, model_cls, model_name, expected_int8
166166
167167 # TODO: uncomment once move to a newer version of NNCF which has some fixes
168168 _ , num_int8 = get_num_quantized_nodes (model )
169- self .assertEqual (expected_int8 , num_int8 )
169+ self .assertEqual (expected_pt_int8 , num_int8 )
170170
171171 tokens = tokenizer ("This is a sample input" , return_tensors = "pt" )
172172 outputs = model (** tokens )
@@ -176,6 +176,27 @@ def test_automodel_weight_compression(self, model_cls, model_name, expected_int8
176176 loaded_config = OVConfig .from_pretrained (tmp_dir )
177177 self .assertIsNotNone (loaded_config )
178178
179+ @parameterized .expand (SUPPORTED_ARCHITECTURES_WITH_EXPECTED_COMPRESSED_MATMULS )
180+ def test_ovmodel_weight_compression (self , model_cls , model_name , expected_pt_int8 , expected_ov_int8 ):
181+ task = model_cls .export_feature
182+
183+ with tempfile .TemporaryDirectory () as tmp_dir :
184+ transformers_model = model_cls .from_pretrained (model_name , export = True )
185+ tokenizer = AutoTokenizer .from_pretrained (model_name )
186+ if tokenizer .pad_token is None :
187+ tokenizer .pad_token = tokenizer .eos_token
188+
189+ quantizer = OVQuantizer .from_pretrained (transformers_model , task = task )
190+ quantizer .quantize (save_directory = tmp_dir , weights_only = True )
191+ model = model_cls .from_pretrained (tmp_dir )
192+
193+ _ , num_int8 = get_num_quantized_nodes (model )
194+ self .assertEqual (expected_ov_int8 , num_int8 )
195+
196+ tokens = tokenizer ("This is a sample input" , return_tensors = "pt" )
197+ outputs = model (** tokens )
198+ self .assertTrue ("logits" in outputs )
199+
179200
180201class OVQuantizerQATest (unittest .TestCase ):
181202 SUPPORTED_ARCHITECTURES = (("hf-internal-testing/tiny-random-BertForQuestionAnswering" ,),)
0 commit comments