1. step
conda create -n optimum-intel python=3.12 -y

2. step
conda activate optimum-intel

3. step
uv pip install "optimum-intel[openvino]@git+https://github.com/Mohamed-Ashraf273/optimum-intel.git@support_gigachat3"

 
4. step
optimum-cli export openvino --model "/mnt/data2/models/ai-sage/GigaChat3-10B-A1.8B-bf16" --task text-generation-with-past --weight-format int4 "/mnt/data2/models/ov/GigaChat3-10B-A1.8B-ov-int4"

-----------------------
LOG-1
(optimum-intel) arc@xpu:~$ uv pip install "optimum-intel[openvino]@git+https://github.com/Mohamed-Ashraf273/optimum-intel.git@support_gigachat3"
Using Python 3.12.12 environment at: miniconda3/envs/optimum-intel
    Updated https://github.com/Mohamed-Ashraf273/optimum-intel.git (f231bca4d53adc66
    Updated https://github.com/huggingface/optimum-onnx.git (9ca6f688046d1824e3e0b66
    Updated https://github.com/huggingface/optimum (114647f51404f9747410afc287a8079f
Resolved 73 packages in 11.50s
      Built optimum @ git+https://github.com/huggingface/optimum@114647f51404f974741
      Built optimum-onnx @ git+https://github.com/huggingface/optimum-onnx.git@9ca6f
      Built optimum-intel @ git+https://github.com/Mohamed-Ashraf273/optimum-intel.g
Prepared 71 packages in 1m 24s
░░░░░░░░░░░░░░░░░░░░ [0/71] Installing wheels...                                                                                                                               warning: The module `nvidia` is provided by more than one package, which causes an install race condition and can result in a broken module. Consider removing your dependency on either `nvidia-cusolver-cu12` (v11.7.3.90) or `nvidia-cuda-nvrtc-cu12` (v12.8.93).
warning: The module `nvidia` is provided by more than one package, which causes an install race condition and can result in a broken module. Consider removing your dependency on either `nvidia-cuda-runtime-cu12` (v12.8.90) or `nvidia-cuda-nvrtc-cu12` (v12.8.93).
warning: The module `nvidia` is provided by more than one package, which causes an install race condition and can result in a broken module. Consider removing your dependency on either `nvidia-cufft-cu12` (v11.3.3.83) or `nvidia-cuda-runtime-cu12` (v12.8.90).
warning: The module `nvidia` is provided by more than one package, which causes an install race condition and can result in a broken module. Consider removing your dependency on either `nvidia-curand-cu12` (v10.3.9.90) or `nvidia-cufft-cu12` (v11.3.3.83).
warning: The module `nvidia` is provided by more than one package, which causes an install race condition and can result in a broken module. Consider removing your dependency on either `nvidia-nvjitlink-cu12` (v12.8.93) or `nvidia-curand-cu12` (v10.3.9.90).
warning: The module `nvidia` is provided by more than one package, which causes an install race condition and can result in a broken module. Consider removing your dependency on either `nvidia-nvtx-cu12` (v12.8.90) or `nvidia-nvjitlink-cu12` (v12.8.93).
warning: The module `nvidia` is provided by more than one package, which causes an install race condition and can result in a broken module. Consider removing your dependency on either `nvidia-nvtx-cu12` (v12.8.90) or `nvidia-cublas-cu12` (v12.8.4.1).
warning: The module `nvidia` is provided by more than one package, which causes an install race condition and can result in a broken module. Consider removing your dependency on either `nvidia-cusparse-cu12` (v12.5.8.93) or `nvidia-cublas-cu12` (v12.8.4.1).
warning: The module `nvidia` is provided by more than one package, which causes an install race condition and can result in a broken module. Consider removing your dependency on either `nvidia-cusparse-cu12` (v12.5.8.93) or `nvidia-cuda-cupti-cu12` (v12.8.90).
warning: The module `nvidia` is provided by more than one package, which causes an install race condition and can result in a broken module. Consider removing your dependency on either `nvidia-cufile-cu12` (v1.13.1.3) or `nvidia-cuda-cupti-cu12` (v12.8.90).
Installed 71 packages in 1.07s
 + certifi==2026.2.25
 + charset-normalizer==3.4.4
 + cuda-bindings==12.9.4
 + cuda-pathfinder==1.4.0
 + filelock==3.25.0
 + fsspec==2026.2.0
 + hf-xet==1.3.2
 + huggingface-hub==0.36.2
 + idna==3.11
 + jinja2==3.1.6
 + joblib==1.5.3
 + markdown-it-py==4.0.0
 + markupsafe==3.0.3
 + mdurl==0.1.2
 + ml-dtypes==0.5.4
 + mpmath==1.3.0
 + networkx==3.4.2
 + ninja==1.13.0
 + nncf==3.0.0
 + numpy==2.2.6
 + nvidia-cublas-cu12==12.8.4.1
 + nvidia-cuda-cupti-cu12==12.8.90
 + nvidia-cuda-nvrtc-cu12==12.8.93
 + nvidia-cuda-runtime-cu12==12.8.90
 + nvidia-cudnn-cu12==9.10.2.21
 + nvidia-cufft-cu12==11.3.3.83
 + nvidia-cufile-cu12==1.13.1.3
 + nvidia-curand-cu12==10.3.9.90
 + nvidia-cusolver-cu12==11.7.3.90
 + nvidia-cusparse-cu12==12.5.8.93
 + nvidia-cusparselt-cu12==0.7.1
 + nvidia-nccl-cu12==2.27.5
 + nvidia-nvjitlink-cu12==12.8.93
 + nvidia-nvshmem-cu12==3.4.5
 + nvidia-nvtx-cu12==12.8.90
 + onnx==1.20.1
 + onnx-ir==0.2.0
 + onnxscript==0.6.2
 + openvino==2026.0.0
 + openvino-telemetry==2025.2.0
 + openvino-tokenizers==2026.0.0.0
 + optimum==2.1.0.dev0 (from git+https://github.com/huggingface/optimum@114647f51404f9747410afc287a8079f1f16389b)
 + optimum-intel==1.27.0.dev0+f231bca (from git+https://github.com/Mohamed-Ashraf273/optimum-intel.git@f231bca4d53adc663611064dcceda19be8d2fbed)
 + optimum-onnx==0.1.0.dev0 (from git+https://github.com/huggingface/optimum-onnx.git@9ca6f688046d1824e3e0b660b1044fa0b39717f2)
 + pandas==2.3.3
 + protobuf==7.34.0
 + psutil==7.2.2
 + pydot==3.0.4
 + pygments==2.19.2
 + pyparsing==3.3.2
 + python-dateutil==2.9.0.post0
 + pytz==2026.1.post1
 + pyyaml==6.0.3
 + regex==2026.2.28
 + requests==2.32.5
 + rich==14.3.3
 + safetensors==0.7.0
 + scikit-learn==1.8.0
 + scipy==1.17.1
 + six==1.17.0
 + sympy==1.14.0
 + tabulate==0.10.0
 + threadpoolctl==3.6.0
 + tokenizers==0.22.2
 + torch==2.10.0
 + tqdm==4.67.3
 + transformers==4.57.6
 + triton==3.6.0
 + typing-extensions==4.15.0
 + tzdata==2025.3
 + urllib3==2.6.3

-----------------------
ERROR-1 
  File "/home/arc/miniconda3/envs/optimum-intel/bin/optimum-cli", line 10, in <module>
    sys.exit(main())
             ^^^^^^
  File "/home/arc/miniconda3/envs/optimum-intel/lib/python3.12/site-packages/optimum/commands/optimum_cli.py", line 219, in main
    service.run()
  File "/home/arc/miniconda3/envs/optimum-intel/lib/python3.12/site-packages/optimum/commands/export/openvino.py", line 468, in run
    main_export(
  File "/home/arc/miniconda3/envs/optimum-intel/lib/python3.12/site-packages/optimum/exporters/openvino/__main__.py", line 531, in main_export
    submodel_paths = export_from_model(
                     ^^^^^^^^^^^^^^^^^^
  File "/home/arc/miniconda3/envs/optimum-intel/lib/python3.12/site-packages/optimum/exporters/openvino/convert.py", line 771, in export_from_model
    export_models(
  File "/home/arc/miniconda3/envs/optimum-intel/lib/python3.12/site-packages/optimum/exporters/openvino/convert.py", line 531, in export_models
    export(
  File "/home/arc/miniconda3/envs/optimum-intel/lib/python3.12/site-packages/optimum/exporters/openvino/convert.py", line 209, in export
    raise ValueError(
ValueError: The current version of Transformers does not allow for the export of the model. Maximum required is 4.53.3, got: 4.57.6

-----------------------
LOG-2
(optimum-intel) arc@xpu:~$ uv pip install "transformers<=4.53.3" --force-reinstall
Using Python 3.12.12 environment at: miniconda3/envs/optimum-intel
Resolved 18 packages in 471ms
Prepared 18 packages in 1.23s
Uninstalled 18 packages in 254ms
Installed 18 packages in 114ms
 ~ certifi==2026.2.25
 ~ charset-normalizer==3.4.4
 ~ filelock==3.25.0
 ~ fsspec==2026.2.0
 ~ hf-xet==1.3.2
 ~ huggingface-hub==0.36.2
 ~ idna==3.11
 - numpy==2.2.6
 + numpy==2.4.2
 - packaging==25.0 (from file:///home/task_176104874243446/conda-bld/packaging_1761049080023/work)
 + packaging==26.0
 ~ pyyaml==6.0.3
 ~ regex==2026.2.28
 ~ requests==2.32.5
 ~ safetensors==0.7.0
 - tokenizers==0.22.2
 + tokenizers==0.21.4
 ~ tqdm==4.67.3
 - transformers==4.57.6
 + transformers==4.53.3
 ~ typing-extensions==4.15.0
 ~ urllib3==2.6.3

-----------------------
LOG-3
(optimum-intel) arc@xpu:~$ optimum-cli export openvino --model "/mnt/data2/models/ai-sage/GigaChat3-10B-A1.8B-bf16" --task text-generation-with-past --weight-format int4 "/mnt/data2/models/ov/GigaChat3-10B-A1.8B-ov-int4"
Loading checkpoint shards: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████| 11/11 [01:42<00:00,  9.30s/it]
Some weights of the model checkpoint at /mnt/data2/models/ai-sage/GigaChat3-10B-A1.8B-bf16 were not used when initializing DeepseekV3ForCausalLM: ['model.layers.26.eh_proj.weight', 'model.layers.26.embed_tokens.weight', 'model.layers.26.enorm.weight', 'model.layers.26.hnorm.weight', 'model.layers.26.input_layernorm.weight', 'model.layers.26.mlp.experts.0.down_proj.weight', 'model.layers.26.mlp.experts.0.gate_proj.weight', 'model.layers.26.mlp.experts.0.up_proj.weight', 'model.layers.26.mlp.experts.1.down_proj.weight', 'model.layers.26.mlp.experts.1.gate_proj.weight', 'model.layers.26.mlp.experts.1.up_proj.weight', 'model.layers.26.mlp.experts.10.down_proj.weight', 'model.layers.26.mlp.experts.10.gate_proj.weight', 'model.layers.26.mlp.experts.10.up_proj.weight', 'model.layers.26.mlp.experts.11.down_proj.weight', 'model.layers.26.mlp.experts.11.gate_proj.weight', 'model.layers.26.mlp.experts.11.up_proj.weight', 'model.layers.26.mlp.experts.12.down_proj.weight', 'model.layers.26.mlp.experts.12.gate_proj.weight', 'model.layers.26.mlp.experts.12.up_proj.weight', 'model.layers.26.mlp.experts.13.down_proj.weight', 'model.layers.26.mlp.experts.13.gate_proj.weight', 'model.layers.26.mlp.experts.13.up_proj.weight', 'model.layers.26.mlp.experts.14.down_proj.weight', 'model.layers.26.mlp.experts.14.gate_proj.weight', 'model.layers.26.mlp.experts.14.up_proj.weight', 'model.layers.26.mlp.experts.15.down_proj.weight', 'model.layers.26.mlp.experts.15.gate_proj.weight', 'model.layers.26.mlp.experts.15.up_proj.weight', 'model.layers.26.mlp.experts.16.down_proj.weight', 'model.layers.26.mlp.experts.16.gate_proj.weight', 'model.layers.26.mlp.experts.16.up_proj.weight', 'model.layers.26.mlp.experts.17.down_proj.weight', 'model.layers.26.mlp.experts.17.gate_proj.weight', 'model.layers.26.mlp.experts.17.up_proj.weight', 'model.layers.26.mlp.experts.18.down_proj.weight', 'model.layers.26.mlp.experts.18.gate_proj.weight', 'model.layers.26.mlp.experts.18.up_proj.weight', 'model.layers.26.mlp.experts.19.down_proj.weight', 'model.layers.26.mlp.experts.19.gate_proj.weight', 'model.layers.26.mlp.experts.19.up_proj.weight', 'model.layers.26.mlp.experts.2.down_proj.weight', 'model.layers.26.mlp.experts.2.gate_proj.weight', 'model.layers.26.mlp.experts.2.up_proj.weight', 'model.layers.26.mlp.experts.20.down_proj.weight', 'model.layers.26.mlp.experts.20.gate_proj.weight', 'model.layers.26.mlp.experts.20.up_proj.weight', 'model.layers.26.mlp.experts.21.down_proj.weight', 'model.layers.26.mlp.experts.21.gate_proj.weight', 'model.layers.26.mlp.experts.21.up_proj.weight', 'model.layers.26.mlp.experts.22.down_proj.weight', 'model.layers.26.mlp.experts.22.gate_proj.weight', 'model.layers.26.mlp.experts.22.up_proj.weight', 'model.layers.26.mlp.experts.23.down_proj.weight', 'model.layers.26.mlp.experts.23.gate_proj.weight', 'model.layers.26.mlp.experts.23.up_proj.weight', 'model.layers.26.mlp.experts.24.down_proj.weight', 'model.layers.26.mlp.experts.24.gate_proj.weight', 'model.layers.26.mlp.experts.24.up_proj.weight', 'model.layers.26.mlp.experts.25.down_proj.weight', 'model.layers.26.mlp.experts.25.gate_proj.weight', 'model.layers.26.mlp.experts.25.up_proj.weight', 'model.layers.26.mlp.experts.26.down_proj.weight', 'model.layers.26.mlp.experts.26.gate_proj.weight', 'model.layers.26.mlp.experts.26.up_proj.weight', 'model.layers.26.mlp.experts.27.down_proj.weight', 'model.layers.26.mlp.experts.27.gate_proj.weight', 'model.layers.26.mlp.experts.27.up_proj.weight', 'model.layers.26.mlp.experts.28.down_proj.weight', 'model.layers.26.mlp.experts.28.gate_proj.weight', 'model.layers.26.mlp.experts.28.up_proj.weight', 'model.layers.26.mlp.experts.29.down_proj.weight', 'model.layers.26.mlp.experts.29.gate_proj.weight', 'model.layers.26.mlp.experts.29.up_proj.weight', 'model.layers.26.mlp.experts.3.down_proj.weight', 'model.layers.26.mlp.experts.3.gate_proj.weight', 'model.layers.26.mlp.experts.3.up_proj.weight', 'model.layers.26.mlp.experts.30.down_proj.weight', 'model.layers.26.mlp.experts.30.gate_proj.weight', 'model.layers.26.mlp.experts.30.up_proj.weight', 'model.layers.26.mlp.experts.31.down_proj.weight', 'model.layers.26.mlp.experts.31.gate_proj.weight', 'model.layers.26.mlp.experts.31.up_proj.weight', 'model.layers.26.mlp.experts.32.down_proj.weight', 'model.layers.26.mlp.experts.32.gate_proj.weight', 'model.layers.26.mlp.experts.32.up_proj.weight', 'model.layers.26.mlp.experts.33.down_proj.weight', 'model.layers.26.mlp.experts.33.gate_proj.weight', 'model.layers.26.mlp.experts.33.up_proj.weight', 'model.layers.26.mlp.experts.34.down_proj.weight', 'model.layers.26.mlp.experts.34.gate_proj.weight', 'model.layers.26.mlp.experts.34.up_proj.weight', 'model.layers.26.mlp.experts.35.down_proj.weight', 'model.layers.26.mlp.experts.35.gate_proj.weight', 'model.layers.26.mlp.experts.35.up_proj.weight', 'model.layers.26.mlp.experts.36.down_proj.weight', 'model.layers.26.mlp.experts.36.gate_proj.weight', 'model.layers.26.mlp.experts.36.up_proj.weight', 'model.layers.26.mlp.experts.37.down_proj.weight', 'model.layers.26.mlp.experts.37.gate_proj.weight', 'model.layers.26.mlp.experts.37.up_proj.weight', 'model.layers.26.mlp.experts.38.down_proj.weight', 'model.layers.26.mlp.experts.38.gate_proj.weight', 'model.layers.26.mlp.experts.38.up_proj.weight', 'model.layers.26.mlp.experts.39.down_proj.weight', 'model.layers.26.mlp.experts.39.gate_proj.weight', 'model.layers.26.mlp.experts.39.up_proj.weight', 'model.layers.26.mlp.experts.4.down_proj.weight', 'model.layers.26.mlp.experts.4.gate_proj.weight', 'model.layers.26.mlp.experts.4.up_proj.weight', 'model.layers.26.mlp.experts.40.down_proj.weight', 'model.layers.26.mlp.experts.40.gate_proj.weight', 'model.layers.26.mlp.experts.40.up_proj.weight', 'model.layers.26.mlp.experts.41.down_proj.weight', 'model.layers.26.mlp.experts.41.gate_proj.weight', 'model.layers.26.mlp.experts.41.up_proj.weight', 'model.layers.26.mlp.experts.42.down_proj.weight', 'model.layers.26.mlp.experts.42.gate_proj.weight', 'model.layers.26.mlp.experts.42.up_proj.weight', 'model.layers.26.mlp.experts.43.down_proj.weight', 'model.layers.26.mlp.experts.43.gate_proj.weight', 'model.layers.26.mlp.experts.43.up_proj.weight', 'model.layers.26.mlp.experts.44.down_proj.weight', 'model.layers.26.mlp.experts.44.gate_proj.weight', 'model.layers.26.mlp.experts.44.up_proj.weight', 'model.layers.26.mlp.experts.45.down_proj.weight', 'model.layers.26.mlp.experts.45.gate_proj.weight', 'model.layers.26.mlp.experts.45.up_proj.weight', 'model.layers.26.mlp.experts.46.down_proj.weight', 'model.layers.26.mlp.experts.46.gate_proj.weight', 'model.layers.26.mlp.experts.46.up_proj.weight', 'model.layers.26.mlp.experts.47.down_proj.weight', 'model.layers.26.mlp.experts.47.gate_proj.weight', 'model.layers.26.mlp.experts.47.up_proj.weight', 'model.layers.26.mlp.experts.48.down_proj.weight', 'model.layers.26.mlp.experts.48.gate_proj.weight', 'model.layers.26.mlp.experts.48.up_proj.weight', 'model.layers.26.mlp.experts.49.down_proj.weight', 'model.layers.26.mlp.experts.49.gate_proj.weight', 'model.layers.26.mlp.experts.49.up_proj.weight', 'model.layers.26.mlp.experts.5.down_proj.weight', 'model.layers.26.mlp.experts.5.gate_proj.weight', 'model.layers.26.mlp.experts.5.up_proj.weight', 'model.layers.26.mlp.experts.50.down_proj.weight', 'model.layers.26.mlp.experts.50.gate_proj.weight', 'model.layers.26.mlp.experts.50.up_proj.weight', 'model.layers.26.mlp.experts.51.down_proj.weight', 'model.layers.26.mlp.experts.51.gate_proj.weight', 'model.layers.26.mlp.experts.51.up_proj.weight', 'model.layers.26.mlp.experts.52.down_proj.weight', 'model.layers.26.mlp.experts.52.gate_proj.weight', 'model.layers.26.mlp.experts.52.up_proj.weight', 'model.layers.26.mlp.experts.53.down_proj.weight', 'model.layers.26.mlp.experts.53.gate_proj.weight', 'model.layers.26.mlp.experts.53.up_proj.weight', 'model.layers.26.mlp.experts.54.down_proj.weight', 'model.layers.26.mlp.experts.54.gate_proj.weight', 'model.layers.26.mlp.experts.54.up_proj.weight', 'model.layers.26.mlp.experts.55.down_proj.weight', 'model.layers.26.mlp.experts.55.gate_proj.weight', 'model.layers.26.mlp.experts.55.up_proj.weight', 'model.layers.26.mlp.experts.56.down_proj.weight', 'model.layers.26.mlp.experts.56.gate_proj.weight', 'model.layers.26.mlp.experts.56.up_proj.weight', 'model.layers.26.mlp.experts.57.down_proj.weight', 'model.layers.26.mlp.experts.57.gate_proj.weight', 'model.layers.26.mlp.experts.57.up_proj.weight', 'model.layers.26.mlp.experts.58.down_proj.weight', 'model.layers.26.mlp.experts.58.gate_proj.weight', 'model.layers.26.mlp.experts.58.up_proj.weight', 'model.layers.26.mlp.experts.59.down_proj.weight', 'model.layers.26.mlp.experts.59.gate_proj.weight', 'model.layers.26.mlp.experts.59.up_proj.weight', 'model.layers.26.mlp.experts.6.down_proj.weight', 'model.layers.26.mlp.experts.6.gate_proj.weight', 'model.layers.26.mlp.experts.6.up_proj.weight', 'model.layers.26.mlp.experts.60.down_proj.weight', 'model.layers.26.mlp.experts.60.gate_proj.weight', 'model.layers.26.mlp.experts.60.up_proj.weight', 'model.layers.26.mlp.experts.61.down_proj.weight', 'model.layers.26.mlp.experts.61.gate_proj.weight', 'model.layers.26.mlp.experts.61.up_proj.weight', 'model.layers.26.mlp.experts.62.down_proj.weight', 'model.layers.26.mlp.experts.62.gate_proj.weight', 'model.layers.26.mlp.experts.62.up_proj.weight', 'model.layers.26.mlp.experts.63.down_proj.weight', 'model.layers.26.mlp.experts.63.gate_proj.weight', 'model.layers.26.mlp.experts.63.up_proj.weight', 'model.layers.26.mlp.experts.7.down_proj.weight', 'model.layers.26.mlp.experts.7.gate_proj.weight', 'model.layers.26.mlp.experts.7.up_proj.weight', 'model.layers.26.mlp.experts.8.down_proj.weight', 'model.layers.26.mlp.experts.8.gate_proj.weight', 'model.layers.26.mlp.experts.8.up_proj.weight', 'model.layers.26.mlp.experts.9.down_proj.weight', 'model.layers.26.mlp.experts.9.gate_proj.weight', 'model.layers.26.mlp.experts.9.up_proj.weight', 'model.layers.26.mlp.gate.e_score_correction_bias', 'model.layers.26.mlp.gate.weight', 'model.layers.26.mlp.shared_experts.down_proj.weight', 'model.layers.26.mlp.shared_experts.gate_proj.weight', 'model.layers.26.mlp.shared_experts.up_proj.weight', 'model.layers.26.post_attention_layernorm.weight', 'model.layers.26.self_attn.kv_a_layernorm.weight', 'model.layers.26.self_attn.kv_a_proj_with_mqa.weight', 'model.layers.26.self_attn.kv_b_proj.weight', 'model.layers.26.self_attn.o_proj.weight', 'model.layers.26.self_attn.q_proj.weight', 'model.layers.26.shared_head.head.weight', 'model.layers.26.shared_head.norm.weight']
- This IS expected if you are initializing DeepseekV3ForCausalLM from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DeepseekV3ForCausalLM from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
`loss_type=None` was set in the config but it is unrecognised.Using the default loss: `ForCausalLMLoss`.
/home/arc/miniconda3/envs/optimum-intel/lib/python3.12/site-packages/transformers/cache_utils.py:568: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs!
  or not self.key_cache[layer_idx].numel()  # the layer has no cache
/home/arc/miniconda3/envs/optimum-intel/lib/python3.12/site-packages/transformers/masking_utils.py:187: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs!
  if (padding_length := kv_length + kv_offset - attention_mask.shape[-1]) > 0:
/home/arc/miniconda3/envs/optimum-intel/lib/python3.12/site-packages/optimum/exporters/openvino/model_patcher.py:233: TracerWarning: torch.tensor results are registered as constants in the trace. You can safely ignore this warning if you use this function to create tensors out of constant variables that would be the same every time you call this function. In any other case, this might cause the trace to be incorrect.
  torch.tensor(0.0, device=mask.device, dtype=dtype),
/home/arc/miniconda3/envs/optimum-intel/lib/python3.12/site-packages/optimum/exporters/openvino/model_patcher.py:234: TracerWarning: torch.tensor results are registered as constants in the trace. You can safely ignore this warning if you use this function to create tensors out of constant variables that would be the same every time you call this function. In any other case, this might cause the trace to be incorrect.
  torch.tensor(torch.finfo(torch.float16).min, device=mask.device, dtype=dtype),
/home/arc/miniconda3/envs/optimum-intel/lib/python3.12/site-packages/transformers/cache_utils.py:552: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs!
  not self.key_cache[layer_idx].numel()  # prefers not t.numel() to len(t) == 0 to export the model
/home/arc/miniconda3/envs/optimum-intel/lib/python3.12/site-packages/transformers/integrations/sdpa_attention.py:59: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs!
  is_causal = query.shape[2] > 1 and attention_mask is None and getattr(module, "is_causal", True)
/home/arc/miniconda3/envs/optimum-intel/lib/python3.12/site-packages/transformers/models/deepseek_v3/modeling_deepseek_v3.py:184: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs!
  if token_indices.numel() > 0:
INFO:nncf:Statistics of the bitwidth distribution:
┍━━━━━━━━━━━━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┑
│ Weight compression mode   │ % all parameters (layers)   │ % ratio-defining parameters (layers)   │
┝━━━━━━━━━━━━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┿━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┥
│ int8_asym, per-channel    │ 14% (2 / 965)               │ 0% (0 / 963)                           │
├───────────────────────────┼─────────────────────────────┼────────────────────────────────────────┤
│ int4_asym, group size 128 │ 86% (963 / 965)             │ 100% (963 / 963)                       │
┕━━━━━━━━━━━━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┙
Applying Weight Compression ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━Applying Weight Compression ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 100% • 0:00:44 • 0:00:00
(optimum-intel) arc@xpu:~$ 

-----------------------
LOGS-4
https://github.com/SearchSavior/OpenArc?tab=readme-ov-file#openarc-add

openarc add --model-name GigaChat3-10B-A1.8B-ov-int4 --model-path /mnt/data2/models/ov/GigaChat3-10B-A1.8B-ov-int4 --engine ovgenai --model-type llm --device GPU.0

openarc add --model-name GigaChat3-10B-A1.8B-ov-int8 --model-path /mnt/data2/models/ov/GigaChat3-10B-A1.8B-ov-int8 --engine ovgenai --model-type llm --device GPU.1

openarc load GigaChat3-10B-A1.8B-ov-int8
openarc load GigaChat3-10B-A1.8B-ov-int8