polarengine-vllm/pyproject.toml at main · caiovicentino/polarengine-vllm · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
[build-system]
requires = ["setuptools>=64", "wheel"]
build-backend = "setuptools.build_meta"

[project]
name = "polarquant"
version = "0.5.0"
description = "PolarQuant: Hadamard-rotated Lloyd-Max quantization for LLM compression. Weights + KV cache + CLI."
readme = "README.md"
license = "Apache-2.0"
requires-python = ">=3.9"
authors = [
    {name = "Caio Vicentino", email = "caiovicentino@gmail.com"},
]
keywords = ["quantization", "llm", "compression", "hadamard", "transformers", "vllm", "kv-cache"]
classifiers = [
    "Development Status :: 4 - Beta",
    "Intended Audience :: Science/Research",
    "Programming Language :: Python :: 3",
    "Topic :: Scientific/Engineering :: Artificial Intelligence",
]
dependencies = [
    "torch>=2.0",
    "safetensors",
    "scipy",
    "huggingface_hub",
    "transformers",
]

[project.urls]
Homepage = "https://github.com/caiovicentino/polarengine-vllm"
Paper = "https://arxiv.org/abs/2603.29078"
Models = "https://huggingface.co/collections/caiovicentino1/polarquant-models-69cbc96292c5174df2088b08"

[project.optional-dependencies]
vllm = ["vllm>=0.8.0"]
triton = ["triton>=2.0"]
chat = ["gradio>=4.0", "torchao", "transformers", "accelerate", "sentencepiece"]
serve = ["fastapi", "uvicorn", "torchao", "transformers", "accelerate", "sentencepiece"]
all = ["gradio>=4.0", "torchao", "fastapi", "uvicorn", "transformers", "accelerate", "sentencepiece"]

[project.scripts]
polarquant = "polarengine_vllm.cli:main"

[project.entry-points."vllm.general_plugins"]
polarengine = "polarengine_vllm:register_polar_quant"

[tool.setuptools.packages.find]
include = ["polarengine_vllm*"]