Skip to content
Open
Show file tree
Hide file tree
Changes from 3 commits
Commits
Show all changes
26 commits
Select commit Hold shift + click to select a range
d8fc7cc
refactor and support for multi algs fusion
n1ck-guo May 22, 2026
18e8b15
Merge branch 'main' into hengguo/refactor_algs
n1ck-guo May 26, 2026
4de34b8
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] May 26, 2026
bcf3633
fix bugs
n1ck-guo May 26, 2026
92a9723
fix and hanle shared config
n1ck-guo May 26, 2026
bc6569d
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] May 26, 2026
a9fc7df
merge main
n1ck-guo May 26, 2026
f0e8483
Merge branch 'hengguo/refactor_algs' of https://github.com/intel/auto…
n1ck-guo May 26, 2026
5a71dbf
relocate awq
n1ck-guo May 27, 2026
dae7221
Merge remote-tracking branch 'origin/main' into hengguo/refactor_algs
n1ck-guo May 27, 2026
533f12b
refactor scheme and entry
n1ck-guo May 28, 2026
6a6f97a
modify by comments
n1ck-guo May 28, 2026
d3a391c
merge main
n1ck-guo May 28, 2026
ecefed9
Merge remote-tracking branch 'origin/main' into hengguo/refactor_algs
n1ck-guo May 29, 2026
cab89be
fix ut
n1ck-guo May 29, 2026
8b2af76
fix
n1ck-guo May 29, 2026
6f3abc7
Merge remote-tracking branch 'origin/main' into hengguo/refactor_algs
n1ck-guo May 29, 2026
0ea5774
add llmc api
n1ck-guo May 29, 2026
9c7853f
Merge remote-tracking branch 'origin/main' into hengguo/refactor_algs
n1ck-guo Jun 3, 2026
4c5c470
fix gguf error
n1ck-guo Jun 3, 2026
8e12461
fix ut
n1ck-guo Jun 4, 2026
c470601
Merge remote-tracking branch 'origin/main' into hengguo/refactor_algs
n1ck-guo Jun 4, 2026
8e4fd0e
fix
n1ck-guo Jun 4, 2026
1fdb43a
Merge branch 'main' into hengguo/refactor_algs
n1ck-guo Jun 4, 2026
40a67fa
Merge remote-tracking branch 'origin/main' into hengguo/refactor_algs
n1ck-guo Jun 4, 2026
b4ae543
Merge branch 'hengguo/refactor_algs' of https://github.com/intel/auto…
n1ck-guo Jun 4, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
876 changes: 3 additions & 873 deletions auto_round/__main__.py

Large diffs are not rendered by default.

872 changes: 0 additions & 872 deletions auto_round/alg_ext.py

This file was deleted.

17 changes: 16 additions & 1 deletion auto_round/algorithms/quantization/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,23 @@
# See the License for the specific language governing permissions and
# limitations under the License.

from auto_round.algorithms.quantization.base import BaseQuantizers
from auto_round.algorithms.quantization.base import (
BasePipelineMember,
BaseWeightTransformer,
BaseQuantizer,
DiffusionMixin,
RTNLayerFallbackMixin,
)
from auto_round.algorithms.quantization.config import QuantizationConfig
from auto_round.algorithms.quantization.pipeline import (
ActCalibPolicy,
CalibTiming,
InputSource,
BlockContext,
QuantizationPipeline,
RunContext,
merge_policies,
)
from auto_round.algorithms.quantization.sign_round.config import SignRoundConfig
from auto_round.algorithms.quantization.sign_round.quantizer import SignRoundQuantizer
from auto_round.algorithms.quantization.sign_roundv2 import SignRoundV2Quantizer
Expand Down
5 changes: 5 additions & 0 deletions auto_round/algorithms/quantization/awq/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,3 +11,8 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from auto_round.algorithms.quantization.awq.config import AWQConfig
from auto_round.algorithms.quantization.awq.quantizer import AWQQuantizer

__all__ = ["AWQConfig", "AWQQuantizer"]
75 changes: 46 additions & 29 deletions auto_round/algorithms/quantization/awq/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,31 +18,20 @@
class AWQConfig(QuantizationConfig):
"""Configuration for AWQ (Activation-Aware Weight Quantization).

AWQ protects salient weight channels by analyzing activation patterns and
applying channel-wise scaling to reduce quantization error. The scaling
factors are computed offline via a grid search over calibration data.
After smoothing, standard RTN quantization is applied to the adjusted weights.
AWQ is a **pre-processing** algorithm (``role="preprocess"``). It
protects salient weight channels by analyzing activation patterns and
applying channel-wise scaling to reduce quantization error. After
smoothing, a separate ``block_quantizer`` (RTN, SignRound, …) performs
the actual weight compression.

Args:
duo_scaling: Use both activations and weights for the scaling factor.
True: always use duo scaling. False: only activation scaling.
"both": search both modes and pick the best.
n_grid: Number of grid points for the scaling-ratio search.
seqlen: Calibration sequence length.
nsamples: Number of calibration samples. Grid search time scales
linearly with nsamples (each batch triggers one parent forward
per grid point).
batch_size: Batch size for calibration forward passes.
apply_smooth: Whether to apply AWQ smoothing (channel scaling).
True: run both smoothing and quantization (default AWQ behavior).
False: skip smoothing, only run RTN quantization.
mappings: Explicit AWQ mappings. Each mapping is a dict with keys
``smooth_layer`` (str) and ``balance_layers`` (list[str]).
If None, mappings are inferred automatically from the model structure.
**kwargs: Forwarded to ``QuantizationConfig`` (bits, group_size, sym, …).
The quantization parameters (``bits``, ``group_size``, ``sym``,
``data_type``, …) on this config are used *only* for the internal grid
search loss calculation (quantize-dequantize during scale selection).
The definitive quantization parameters for the final weight compression
step come from the pipeline's ``block_quantizer`` config.
"""

_alg_cls = "AWQQuantizer"
_alg_cls: str = "AWQQuantizer"

def __init__(
self,
Expand All @@ -53,13 +42,38 @@ def __init__(
nsamples: int = 128,
batch_size: int = 8,
apply_smooth: bool = True,
mappings: list[dict] = None,
mappings: list[dict] | None = None,
**kwargs,
):
"""Initialize an AWQ configuration.

Args:
duo_scaling: Whether AWQ should use activation-aware and
weight-aware scaling together. Use True to always enable
duo scaling, False to use activation-only scaling, or
"both" to search both modes and keep the better result.
n_grid: Number of grid-search points used when searching the
AWQ scaling ratio.
seqlen: Calibration sequence length retained for compatibility
with standalone AWQ entry points.
nsamples: Number of calibration samples retained for compatibility
with standalone AWQ entry points.
batch_size: Batch size retained for compatibility with standalone
AWQ entry points.
apply_smooth: Whether to apply AWQ smoothing before the downstream
block quantizer.
mappings: Optional explicit AWQ smooth/balance mappings. Each
item should contain ``smooth_layer`` and
``balance_layers`` entries. If None, mappings are inferred
automatically from the model structure.
**kwargs: Common quantization arguments forwarded to
QuantizationConfig, such as bits, group_size, sym,
data_type, and activation quantization fields.
"""
super().__init__(**kwargs)

if isinstance(duo_scaling, str) and duo_scaling != "both":
raise ValueError(f"duo_scaling must be True, False, or 'both', got '{duo_scaling}'")
raise ValueError(f"duo_scaling must be True, False, or 'both', got '{duo_scaling!r}'")
self.duo_scaling = duo_scaling
self.n_grid = n_grid
self.seqlen = seqlen
Expand All @@ -69,9 +83,12 @@ def __init__(
self.mappings = mappings
self.infer_bs_coeff = 1
self.batch_dim = None
# NOTE: enable_quanted_input is NOT set here. It belongs to the
# block_quantizer (RTN/AutoRound), not to AWQ. See §3.7.1.

# TODO adjust those args defaults after architecture refactoring:
self.enable_quanted_input = False # AWQ doesn't cascade quantized block outputs
# AWQ uses plain RTN (no iterative optimization) for the quantization step.
self.disable_opt_rtn = True
self.orig_disable_opt_rtn = True
def __repr__(self) -> str:
return (
f"AWQConfig(duo_scaling={self.duo_scaling!r}, n_grid={self.n_grid}, "
f"bits={self.bits}, group_size={self.group_size}, sym={self.sym}, "
f"mappings={'<explicit>' if self.mappings else 'auto'})"
)
2 changes: 1 addition & 1 deletion auto_round/algorithms/quantization/awq/mappings.py
Original file line number Diff line number Diff line change
Expand Up @@ -386,7 +386,7 @@ def _build_hybrid_attention_mappings(model: torch.nn.Module) -> list[AWQMapping]

mappings.append(AWQMapping(r"up_proj$", [r"down_proj$"]))

logger.warning(
logger.info(
f"Built dynamic hybrid-attention AWQ mappings: "
f"{len(full_indices)} full-attention, {len(linear_indices)} linear-attention, "
f"projections={linear_proj_names}, MoE={is_moe}"
Expand Down
Loading
Loading