-
Notifications
You must be signed in to change notification settings - Fork 3
Expand file tree
/
Copy pathpyproject.toml
More file actions
49 lines (43 loc) · 1.59 KB
/
pyproject.toml
File metadata and controls
49 lines (43 loc) · 1.59 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
[build-system]
requires = ["setuptools>=64", "wheel"]
build-backend = "setuptools.build_meta"
[project]
name = "polarquant"
version = "0.5.0"
description = "PolarQuant: Hadamard-rotated Lloyd-Max quantization for LLM compression. Weights + KV cache + CLI."
readme = "README.md"
license = "Apache-2.0"
requires-python = ">=3.9"
authors = [
{name = "Caio Vicentino", email = "caiovicentino@gmail.com"},
]
keywords = ["quantization", "llm", "compression", "hadamard", "transformers", "vllm", "kv-cache"]
classifiers = [
"Development Status :: 4 - Beta",
"Intended Audience :: Science/Research",
"Programming Language :: Python :: 3",
"Topic :: Scientific/Engineering :: Artificial Intelligence",
]
dependencies = [
"torch>=2.0",
"safetensors",
"scipy",
"huggingface_hub",
"transformers",
]
[project.urls]
Homepage = "https://github.com/caiovicentino/polarengine-vllm"
Paper = "https://arxiv.org/abs/2603.29078"
Models = "https://huggingface.co/collections/caiovicentino1/polarquant-models-69cbc96292c5174df2088b08"
[project.optional-dependencies]
vllm = ["vllm>=0.8.0"]
triton = ["triton>=2.0"]
chat = ["gradio>=4.0", "torchao", "transformers", "accelerate", "sentencepiece"]
serve = ["fastapi", "uvicorn", "torchao", "transformers", "accelerate", "sentencepiece"]
all = ["gradio>=4.0", "torchao", "fastapi", "uvicorn", "transformers", "accelerate", "sentencepiece"]
[project.scripts]
polarquant = "polarengine_vllm.cli:main"
[project.entry-points."vllm.general_plugins"]
polarengine = "polarengine_vllm:register_polar_quant"
[tool.setuptools.packages.find]
include = ["polarengine_vllm*"]