Source code for dgenerate.pipelinewrapper.uris.sdnqquantizeruri

# Copyright (c) 2023, Teriks
#
# dgenerate is distributed under the following BSD 3-Clause License
#
# Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
#
# 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
#
# 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in
#    the documentation and/or other materials provided with the distribution.
#
# 3. Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
# HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
# ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import torch

import dgenerate.textprocessing as _textprocessing
import dgenerate.types as _types
from dgenerate.extras.sdnq import SDNQConfig
from dgenerate.pipelinewrapper.uris import exceptions as _exceptions

_sdnq_quantizer_uri_parser = _textprocessing.ConceptUriParser(
    'SDNQ Quantizer',
    [
        'type',
        'group-size',
        'quant-conv',
        'quantized-matmul',
        'quantized-matmul-conv'
    ])


[docs] class SDNQQuantizerUri: """ Representation of ``--quantizer`` URI for SDNQ backend. """ _valid_weight_dtypes = [ "int8", "int7", "int6", "int5", "int4", "int3", "int2", "uint8", "uint7", "uint6", "uint5", "uint4", "uint3", "uint2", "uint1", "bool", "float8_e4m3fn", "float8_e4m3fnuz", "float8_e5m2", "float8_e5m2fnuz" ] # pipelinewrapper.uris.util.get_uri_accepted_args_schema metadata NAMES = ['sdnq']
[docs] @staticmethod def help(): return """ SD.Next quantization backend configuration. This backend can be specified as "sdnq" in the URI. URI Format: sdnq;argument1=value1;argument2=value2 Example: sdnq;type=int4;group-size=8;quant-conv=true The argument "type" is the target data type for weights after quantization. NOWRAP! Integer types: - int8 (default), - int7 - int6 - int5 - int4 - int3 - int2 NOWRAP! Unsigned integer types: - uint8 - uint7 - uint6 - uint5 - uint4 - uint3 - uint2 - uint1 - bool NOWRAP! Floating point types: - float8_e4m3fn - float8_e4m3fnuz - float8_e5m2 - float8_e5m2fnuz The argument "group-size" is used to decide how many elements of a tensor will share the same quantization group. Must be >= 0. When 0 (default), uses per-tensor quantization. When > 0, groups tensor elements for more granular quantization scaling. The argument "quant-conv" is enables quantization of convolutional layers in UNet models. When True, quantizes Conv2d layers in addition to Linear layers. Only affects UNet architectures. The argument "quantized-matmul" is enables use of quantized INT8 or FP8 matrix multiplication instead of BF16/FP16. When True, uses optimized quantized matmul operations for improved performance and reduced memory usage. The argument "quantized-matmul-conv" is enables quantized matrix multiplication for convolutional layers. Same as quantized-matmul but specifically for convolutional layers in UNets like SDXL. """
OPTION_ARGS = { 'type': _valid_weight_dtypes } # ===
[docs] def __init__(self, type: str = "int8", group_size: int = 0, quant_conv: bool = False, quantized_matmul: bool = False, quantized_matmul_conv: bool = False): if type not in self._valid_weight_dtypes: raise _exceptions.InvalidSDNQQuantizerUriError( f'SDNQ type must be one of: ' f'{_textprocessing.oxford_comma(self._valid_weight_dtypes, "or")}.') if group_size < 0: raise _exceptions.InvalidSDNQQuantizerUriError( 'SDNQ group-size must be >= 0.') self.type = type self.group_size = group_size self.quant_conv = quant_conv self.quantized_matmul = quantized_matmul self.quantized_matmul_conv = quantized_matmul_conv
[docs] def to_config(self, compute_dtype: str | torch.dtype | None = None) -> SDNQConfig: return SDNQConfig( weights_dtype=self.type, group_size=self.group_size, quant_conv=self.quant_conv, use_quantized_matmul=self.quantized_matmul, use_quantized_matmul_conv=self.quantized_matmul_conv )
[docs] @staticmethod def parse(uri: _types.Uri) -> 'SDNQQuantizerUri': try: r = _sdnq_quantizer_uri_parser.parse(uri) if r.concept not in {'sdnq'}: raise _exceptions.InvalidSDNQQuantizerUriError( f'Unknown quantization backend: {r.concept}' ) weights_dtype = r.args.get('type', 'int8') group_size = int(r.args.get('group-size', 0)) quant_conv = _types.parse_bool(r.args.get('quant-conv', False)) quantized_matmul = _types.parse_bool(r.args.get('quantized-matmul', False)) quantized_matmul_conv = _types.parse_bool(r.args.get('quantized-matmul-conv', False)) return SDNQQuantizerUri( type=weights_dtype, group_size=group_size, quant_conv=quant_conv, quantized_matmul=quantized_matmul, quantized_matmul_conv=quantized_matmul_conv ) except _textprocessing.ConceptUriParseError as e: raise _exceptions.InvalidSDNQQuantizerUriError(e) from e