Skip to content

llmcompressor.observers.moving_base

Classes:

MovingAverageObserverBase

MovingAverageObserverBase(
    base_name: str,
    args: QuantizationArgs,
    module: Optional[Module] = None,
    **observer_kwargs,
)

Bases: Observer

Compute quantization parameters by taking the moving average of min/max values

Parameters:

  • base_name

    (str) –

    str used to name the observer attribute

  • args

    (QuantizationArgs) –

    quantization args used to calibrate and quantize the observed value

  • module

    (Optional[Module], default: None ) –

    optional module with attached quantization parameters. This argument is required to utilize existing qparams such as global_scale or g_idx

  • **observer_kwargs

    keyword arguments for observer initialization

Methods:

  • get_current_global_min_max

    Calculate the min and max value of the observed value (without moving average)

  • get_current_min_max

    Calculate the min and max value of the observed value (without moving average)

  • get_global_min_max

    Calculate moving average of min and max values from observed value

  • get_min_max

    Calculate moving average of min and max values from observed value

Source code in llmcompressor/observers/moving_base.py
def __init__(
    self,
    base_name: str,
    args: QuantizationArgs,
    module: Optional[torch.nn.Module] = None,
    **observer_kwargs,
):
    super().__init__(base_name, args, module, **observer_kwargs)
    self.avg_constant = self.args.observer_kwargs.get("averaging_constant", 0.01)

    self.past_min_vals = None
    self.past_max_vals = None
    self.past_global_min_vals = None
    self.past_global_max_vals = None

get_current_global_min_max abstractmethod

get_current_global_min_max(observed: Tensor) -> MinMaxTuple

Calculate the min and max value of the observed value (without moving average) for the purposes of global scale calculation

Source code in llmcompressor/observers/moving_base.py
@abstractmethod
def get_current_global_min_max(self, observed: torch.Tensor) -> MinMaxTuple:
    """
    Calculate the min and max value of the observed value (without moving average)
    for the purposes of global scale calculation
    """
    raise NotImplementedError()

get_current_min_max abstractmethod

get_current_min_max(observed: Tensor) -> MinMaxTuple

Calculate the min and max value of the observed value (without moving average)

Source code in llmcompressor/observers/moving_base.py
@abstractmethod
def get_current_min_max(self, observed: torch.Tensor) -> MinMaxTuple:
    """
    Calculate the min and max value of the observed value (without moving average)
    """
    raise NotImplementedError()

get_global_min_max

get_global_min_max(observed: Tensor) -> MinMaxTuple

Calculate moving average of min and max values from observed value for the purposes of global scale calculation

Parameters:

  • observed

    (Tensor) –

    value being observed whose shape is (num_observations, 1, group_size)

Returns:

  • MinMaxTuple

    minimum value and maximum value whose shapes are (1, )

Source code in llmcompressor/observers/moving_base.py
def get_global_min_max(self, observed: torch.Tensor) -> MinMaxTuple:
    """
    Calculate moving average of min and max values from observed value
    for the purposes of global scale calculation

    :param observed: value being observed whose shape is
        (num_observations, 1, group_size)
    :return: minimum value and maximum value whose shapes are (1, )
    """
    min_vals, max_vals = self.get_current_global_min_max(observed)

    if self.past_global_min_vals is not None and self.avg_constant != 1.0:
        # FUTURE: consider scaling by num observations (first dim)
        #         rather than reducing by first dim
        min_vals = self._lerp(
            self.past_global_min_vals, min_vals, self.avg_constant
        )
        max_vals = self._lerp(
            self.past_global_max_vals, max_vals, self.avg_constant
        )

    self.past_global_min_vals = min_vals
    self.past_global_max_vals = max_vals

    return min_vals, max_vals

get_min_max

get_min_max(observed: Tensor) -> MinMaxTuple

Calculate moving average of min and max values from observed value

Parameters:

  • observed

    (Tensor) –

    value being observed whose shape is (num_observations, *qparam_shape, group_size)

Returns:

  • MinMaxTuple

    minimum value and maximum value whose shapes are (*qparam_shape, )

Source code in llmcompressor/observers/moving_base.py
def get_min_max(self, observed: torch.Tensor) -> MinMaxTuple:
    """
    Calculate moving average of min and max values from observed value

    :param observed: value being observed whose shape is
        (num_observations, *qparam_shape, group_size)
    :return: minimum value and maximum value whose shapes are (*qparam_shape, )
    """
    min_vals, max_vals = self.get_current_min_max(observed)

    if self.past_min_vals is not None and self.avg_constant != 1.0:
        # FUTURE: consider scaling by num observations (first dim)
        #         rather than reducing by first dim
        min_vals = self._lerp(self.past_min_vals, min_vals, self.avg_constant)
        max_vals = self._lerp(self.past_max_vals, max_vals, self.avg_constant)

    self.past_min_vals = min_vals
    self.past_max_vals = max_vals

    return min_vals, max_vals