llmcompressor.observers.moving_base

Classes:

MovingAverageObserverBase –

Compute quantization parameters by taking the moving average of min/max values

MovingAverageObserverBase

MovingAverageObserverBase(
    base_name: str,
    args: QuantizationArgs,
    module: Optional[Module] = None,
    **observer_kwargs,
)

Bases: Observer

Compute quantization parameters by taking the moving average of min/max values

Parameters:

base_name
(str) –

str used to name the observer attribute
args
(QuantizationArgs) –

quantization args used to calibrate and quantize the observed value
module
(Optional[Module], default: None ) –

optional module with attached quantization parameters. This argument is required to utilize existing qparams such as global_scale or g_idx
**observer_kwargs
–

keyword arguments for observer initialization

Methods:

get_current_global_min_max –

Calculate the min and max value of the observed value (without moving average)
get_current_min_max –

Calculate the min and max value of the observed value (without moving average)
get_global_min_max –

Calculate moving average of min and max values from observed value
get_min_max –

Calculate moving average of min and max values from observed value

Source code in llmcompressor/observers/moving_base.py

def __init__(
    self,
    base_name: str,
    args: QuantizationArgs,
    module: Optional[torch.nn.Module] = None,
    **observer_kwargs,
):
    super().__init__(base_name, args, module, **observer_kwargs)
    self.avg_constant = self.args.observer_kwargs.get("averaging_constant", 0.01)

    self.past_min_vals = None
    self.past_max_vals = None
    self.past_global_min_vals = None
    self.past_global_max_vals = None

get_current_global_min_max `abstractmethod`

get_current_global_min_max(observed: Tensor) -> MinMaxTuple

Calculate the min and max value of the observed value (without moving average) for the purposes of global scale calculation

Source code in llmcompressor/observers/moving_base.py

@abstractmethod
def get_current_global_min_max(self, observed: torch.Tensor) -> MinMaxTuple:
    """
    Calculate the min and max value of the observed value (without moving average)
    for the purposes of global scale calculation
    """
    raise NotImplementedError()

get_current_min_max `abstractmethod`

get_current_min_max(observed: Tensor) -> MinMaxTuple

Calculate the min and max value of the observed value (without moving average)

Source code in llmcompressor/observers/moving_base.py

@abstractmethod
def get_current_min_max(self, observed: torch.Tensor) -> MinMaxTuple:
    """
    Calculate the min and max value of the observed value (without moving average)
    """
    raise NotImplementedError()

get_global_min_max

get_global_min_max(observed: Tensor) -> MinMaxTuple

Calculate moving average of min and max values from observed value for the purposes of global scale calculation

Parameters:

observed
(Tensor) –

value being observed whose shape is (num_observations, 1, group_size)

Returns:

MinMaxTuple –

minimum value and maximum value whose shapes are (1, )

Source code in llmcompressor/observers/moving_base.py

def get_global_min_max(self, observed: torch.Tensor) -> MinMaxTuple:
    """
    Calculate moving average of min and max values from observed value
    for the purposes of global scale calculation

    :param observed: value being observed whose shape is
        (num_observations, 1, group_size)
    :return: minimum value and maximum value whose shapes are (1, )
    """
    min_vals, max_vals = self.get_current_global_min_max(observed)

    if self.past_global_min_vals is not None and self.avg_constant != 1.0:
        # FUTURE: consider scaling by num observations (first dim)
        #         rather than reducing by first dim
        min_vals = self._lerp(
            self.past_global_min_vals, min_vals, self.avg_constant
        )
        max_vals = self._lerp(
            self.past_global_max_vals, max_vals, self.avg_constant
        )

    self.past_global_min_vals = min_vals
    self.past_global_max_vals = max_vals

    return min_vals, max_vals

get_min_max

get_min_max(observed: Tensor) -> MinMaxTuple

Calculate moving average of min and max values from observed value

Parameters:

observed
(Tensor) –

value being observed whose shape is (num_observations, *qparam_shape, group_size)

Returns:

MinMaxTuple –

minimum value and maximum value whose shapes are (*qparam_shape, )

Source code in llmcompressor/observers/moving_base.py

def get_min_max(self, observed: torch.Tensor) -> MinMaxTuple:
    """
    Calculate moving average of min and max values from observed value

    :param observed: value being observed whose shape is
        (num_observations, *qparam_shape, group_size)
    :return: minimum value and maximum value whose shapes are (*qparam_shape, )
    """
    min_vals, max_vals = self.get_current_min_max(observed)

    if self.past_min_vals is not None and self.avg_constant != 1.0:
        # FUTURE: consider scaling by num observations (first dim)
        #         rather than reducing by first dim
        min_vals = self._lerp(self.past_min_vals, min_vals, self.avg_constant)
        max_vals = self._lerp(self.past_max_vals, max_vals, self.avg_constant)

    self.past_min_vals = min_vals
    self.past_max_vals = max_vals

    return min_vals, max_vals

llmcompressor.observers.moving_base

MovingAverageObserverBase

`base_name`

`args`

`module`

`observer_kwargs`**

get_current_global_min_max `abstractmethod`

get_current_min_max `abstractmethod`

get_global_min_max

`observed`

get_min_max

`observed`

llmcompressor.observers.moving_base

MovingAverageObserverBase

base_name

args

module

**observer_kwargs

get_current_global_min_max abstractmethod

get_current_min_max abstractmethod

get_global_min_max

observed

get_min_max

observed

`base_name`

`args`

`module`

`observer_kwargs`**

get_current_global_min_max `abstractmethod`

get_current_min_max `abstractmethod`

`observed`

`observed`