Source code for bentoml._internal.frameworks.diffusers

from __future__ import annotations

import logging
import os
import shutil
import typing as t
from typing import TYPE_CHECKING

import attr

import bentoml
from bentoml import Tag
from bentoml.exceptions import BentoMLException
from bentoml.exceptions import MissingDependencyException
from bentoml.exceptions import NotFound
from bentoml.models import ModelContext

from ..models.model import PartialKwargsModelOptions
from .utils.transformers import extract_commit_hash

if TYPE_CHECKING:
    from types import ModuleType

    from bentoml.types import ModelSignature
    from bentoml.types import ModelSignatureDict


try:
    import diffusers
    import torch
    from diffusers.loaders import LoraLoaderMixin
    from diffusers.loaders import TextualInversionLoaderMixin
    from diffusers.utils.import_utils import is_accelerate_available
    from diffusers.utils.import_utils import is_torch_version
    from diffusers.utils.import_utils import is_xformers_available
except ImportError:  # pragma: no cover
    raise MissingDependencyException(
        "'diffusers' and 'transformers' is required in order to use module 'bentoml.diffusers', install diffusers and its dependencies with 'pip install --upgrade diffusers transformers accelerate'. For more information, refer to https://github.com/huggingface/diffusers",
    )


MODULE_NAME = "bentoml.diffusers"
DIFFUSION_MODEL_FOLDER = "diffusion_model"
DIFFUSION_MODEL_CONFIG_FILE = "model_index.json"
API_VERSION = "v1"

logger = logging.getLogger(__name__)

LoraOptionType = t.Union[str, t.Dict[str, str]]
TextualInversionOptionType = t.Union[str, t.Dict[str, str]]


@attr.define
class DiffusersOptions(PartialKwargsModelOptions):
    """Options for the diffusers model."""

    pipeline_class: str | type[diffusers.DiffusionPipeline] | None = None
    scheduler_class: str | type[diffusers.SchedulerMixin] | None = None
    torch_dtype: str | torch.dtype | None = None
    device_map: str | dict[str, int | str | torch.device] | None = None
    custom_pipeline: str | None = None
    enable_xformers: bool | None = None
    enable_attention_slicing: int | str | None = None
    enable_model_cpu_offload: bool | None = None
    enable_sequential_cpu_offload: bool | None = None
    enable_torch_compile: bool | None = None
    low_cpu_mem_usage: bool | None = None
    variant: str | None = None
    load_pretrained_extra_kwargs: dict[str, t.Any] | None = None
    lora_dir: str | None = None
    lora_weights: LoraOptionType | list[LoraOptionType] | None = None
    textual_inversions: (
        TextualInversionOptionType | list[TextualInversionOptionType] | None
    ) = None


def _prepare_lora_args(
    raw_arg: LoraOptionType, lora_dir: str | None = None
) -> tuple[str, dict[str, str]]:
    if lora_dir is None:
        lora_dir = os.getcwd()

    lora_dir = os.path.expanduser(lora_dir)

    # if user only provide a string, we try to use the string as a
    # path (either absolute or relative) to the weight file. If no
    # file detected, treat the string as huggingface repository
    # identifier
    if isinstance(raw_arg, str):
        raw_arg = raw_arg.strip()
        arg_path = os.path.expanduser(raw_arg)
        weight_path = None

        # absolute path case
        if os.path.isabs(arg_path) and os.path.exists(arg_path):
            weight_path = arg_path

        # relative path case
        tmp_path = os.path.join(lora_dir, arg_path)
        if os.path.exists(tmp_path):
            weight_path = tmp_path

        if weight_path:
            model_name = os.path.dirname(weight_path)
            weight_name = os.path.basename(weight_path)
            kwargs = {"weight_name": weight_name}
            return (model_name, kwargs)

        # repo id case
        lst = raw_arg.split("/")
        if not len(lst) > 2:
            raise ValueError(f"{raw_arg} is not a valid huggingface LoRA path")
        model_name = "/".join(lst[:2])
        weight_name = "/".join(lst[2:])
        kwargs = {"weight_name": weight_name}
        return (model_name, kwargs)

    model_name = raw_arg.pop("model_name")
    return (model_name, raw_arg)


def _load_lora_weights_to_pipeline(
    pipeline: diffusers.DiffusionPipeline,
    lora_weights: LoraOptionType | list[LoraOptionType],
    lora_dir: str | None = None,
):
    if not isinstance(lora_weights, list):
        lora_weights = [lora_weights]

    if len(lora_weights) > 1:
        logger.warning(
            "Currently diffusers only support single lora weight loading. The first lora weight will be loaded and the rest will be discarded"
        )

    lora_weight = lora_weights[0]
    model_name, kwargs = _prepare_lora_args(lora_weight, lora_dir=lora_dir)
    pipeline.load_lora_weights(model_name, **kwargs)


def _prepare_textual_inversion_args(
    raw_arg: TextualInversionOptionType,
) -> tuple[str, dict[str, str]]:
    if isinstance(raw_arg, str):
        # if user only provide a string, we consider that a path to
        # the weight file
        model_name = "."
        kwargs = {"weight_name": raw_arg}
        return (model_name, kwargs)

    model_name = raw_arg.pop("model_name")
    return (model_name, raw_arg)


def _str2cls(
    full_cls_str: str,
) -> type[diffusers.DiffusionPipeline | diffusers.SchedulerMixin]:
    import importlib

    module_name, _, class_name = full_cls_str.rpartition(".")

    # if user only provide something like "StableDiffusionpipeline"
    # with out the module name, we will try the sane default
    if not module_name:
        module_name = "diffusers"

    module = importlib.import_module(module_name)
    cls = getattr(module, class_name)
    return cls


[docs]def get(tag_like: str | Tag) -> bentoml.Model: """ Get the BentoML model with the given tag. Args: tag_like: The tag of the model to retrieve from the model store. Returns: :obj:`~bentoml.Model`: A BentoML :obj:`~bentoml.Model` with the matching tag. Example: .. code-block:: python import bentoml # target model must be from the BentoML model store model = bentoml.diffusers.get("my_stable_diffusion_model") """ model = bentoml.models.get(tag_like) if model.info.module not in (MODULE_NAME, __name__): raise NotFound( f"Model {model.tag} was saved with module {model.info.module}, not loading with {MODULE_NAME}." ) return model
[docs]def load_model( bento_model: str | Tag | bentoml.Model, device_id: str | torch.device | None = None, pipeline_class: ( str | type[diffusers.pipelines.DiffusionPipeline] ) = diffusers.DiffusionPipeline, device_map: str | dict[str, int | str | torch.device] | None = None, custom_pipeline: str | None = None, scheduler_class: type[diffusers.SchedulerMixin] | None = None, torch_dtype: str | torch.dtype | None = None, low_cpu_mem_usage: bool | None = None, enable_xformers: bool = False, enable_attention_slicing: int | str | None = None, enable_model_cpu_offload: bool | None = None, enable_sequential_cpu_offload: bool | None = None, enable_torch_compile: bool | None = None, variant: str | None = None, lora_weights: LoraOptionType | list[LoraOptionType] | None = None, textual_inversions: ( TextualInversionOptionType | list[TextualInversionOptionType] | None ) = None, load_pretrained_extra_kwargs: dict[str, t.Any] | None = None, ) -> diffusers.DiffusionPipeline: """ Load a Diffusion model and convert it to diffusers `Pipeline <https://huggingface.co/docs/diffusers/api/pipelines/overview>`_ with the given tag from the local BentoML model store. Args: bento_model: Either the tag of the model to get from the store, or a BentoML ``~bentoml.Model`` instance to load the model from. device_id (:code:`str`, `optional`, default to :code:`None`): Optional devices to put the given model on. Refer to `device attributes <https://pytorch.org/docs/stable/tensor_attributes.html#torch.torch.device>`_. pipeline_class (:code:`type[diffusers.DiffusionPipeline]`, `optional`): DiffusionPipeline Class use to load the saved diffusion model, default to ``diffusers.DiffusionPipeline``. For more pipeline types, refer to `Pipeline Overview <https://huggingface.co/docs/diffusers/api/pipelines/overview>`_ device_map (:code:`None | str | Dict[str, Union[int, str, torch.device]]`, `optional`): A map that specifies where each submodule should go. For more information, refer to `device_map <https://huggingface.co/docs/diffusers/main/en/api/diffusion_pipeline#diffusers.DiffusionPipeline.from_pretrained.device_map>`_ custom_pipeline (:code:`None | str`, `optional`): An identifier of custom pipeline hosted on github. For a list of community maintained custom piplines, refer to https://github.com/huggingface/diffusers/tree/main/examples/community scheduler_class (:code:`type[diffusers.SchedulerMixin]`, `optional`): Scheduler Class to be used by DiffusionPipeline torch_dtype (:code:`str | torch.dtype`, `optional`): Override the default `torch.dtype` and load the model under this dtype. low_cpu_mem_usage (:code:`bool`, `optional`): Speed up model loading by not initializing the weights and only loading the pre-trained weights. defaults to `True` if torch version >= 1.9.0 else `False` enable_xformers (:code:`bool`, `optional`): Use xformers optimization if it's available. For more info, refer to https://github.com/facebookresearch/xformers variant (:code:`str`, *optional*): If specified load weights from `variant` filename, *e.g.* pytorch_model.<variant>.bin. lora_weights (:code:`LoraOptionType | list[LoraOptionType]` *optional*): lora weights to be loaded. :code:`LoraOptionType` can be either a string or a dictionary. When it's a string, it represents a path to the weight file. When it's a dictionary, it contains a key :code`"model_name"` pointing to a huggingface repository or a local directory, a key :code:`weight_name` pointing the weight file and other keys that will be passed to pipeline's :code:`load_lora_weights` method. textual_inversions (:code:`TextualInversionOptionType | list[TextualInversionOptionType]` *optional*): Textual inversions to be loaded. :code:`TextualInversionOptionType` can be either a string or a dictionary. When it's a string, it represents a path to the weight file. When it's a dictionary, it contains a key :code`"model_name"` pointing to a huggingface repository or a local directory, a key :code:`weight_name` pointing the weight file and other keys that will be passed to pipeline's :code:`load_lora_weights` method. load_pretrained_extra_kwargs: (:code:`dict[str, t.Any]`, *optional*): Extra kwargs passed to Pipeline class's :code:`from_pretrained` method Returns: The Diffusion model loaded as diffusers pipeline from the BentoML model store. Example: .. code-block:: python import bentoml pipeline = bentoml.diffusers.load_model('my_diffusers_model:latest') pipeline(prompt) """ # noqa if not isinstance(bento_model, bentoml.Model): bento_model = get(bento_model) if bento_model.info.module not in (MODULE_NAME, __name__): raise NotFound( f"Model {bento_model.tag} was saved with module {bento_model.info.module}, not loading with {MODULE_NAME}." ) if isinstance(pipeline_class, str): pipeline_class = t.cast( type[diffusers.DiffusionPipeline], _str2cls(pipeline_class) ) if lora_weights: if not issubclass(pipeline_class, LoraLoaderMixin): raise NotImplementedError( f"Class {pipeline_class} is not a subclass of LoraLoaderMixin, cannot load textual inversions" ) if textual_inversions: if not issubclass(pipeline_class, TextualInversionLoaderMixin): raise NotImplementedError( f"Class {pipeline_class} is not a subclass of TextualInversionLoaderMixin, cannot load lora weights" ) diffusion_model_dir = bento_model.path_of(DIFFUSION_MODEL_FOLDER) if low_cpu_mem_usage is None: if is_torch_version(">=", "1.9.0") and is_accelerate_available(): low_cpu_mem_usage = True else: low_cpu_mem_usage = False load_pretrained_extra_kwargs = load_pretrained_extra_kwargs or {} pipeline: diffusers.DiffusionPipeline = pipeline_class.from_pretrained( diffusion_model_dir, torch_dtype=torch_dtype, low_cpu_mem_usage=low_cpu_mem_usage, device_map=device_map, custom_pipeline=custom_pipeline, variant=variant, **load_pretrained_extra_kwargs, ) if scheduler_class: scheduler: diffusers.SchedulerMixin = scheduler_class.from_config( pipeline.scheduler.config ) pipeline.scheduler = scheduler if device_id is not None: move_model_to_device = True if str(device_id).lower().startswith("cuda"): # when device_map is not None, we should not move the # pipeline to gpu again see # https://github.com/huggingface/diffusers/issues/2782 if device_map is not None: move_model_to_device = False if enable_sequential_cpu_offload: move_model_to_device = False if enable_model_cpu_offload: move_model_to_device = False if move_model_to_device: pipeline = pipeline.to(device_id) if enable_xformers: pipeline.enable_xformers_memory_efficient_attention() if enable_sequential_cpu_offload: pipeline.enable_sequential_cpu_offload() if enable_model_cpu_offload: pipeline.enable_model_cpu_offload() if enable_attention_slicing is not None: pipeline.enable_attention_slicing(enable_attention_slicing) if enable_torch_compile: logger.info("Run torch compile on unet") pipeline.unet = torch.compile( pipeline.unet, mode="reduce-overhead", fullgraph=True ) if lora_weights: _load_lora_weights_to_pipeline(pipeline, lora_weights) if textual_inversions: if not isinstance(textual_inversions, list): textual_inversions = [textual_inversions] for textual_inversion in textual_inversions: model_name, kwargs = _prepare_textual_inversion_args(textual_inversion) pipeline.load_textual_inversion(model_name, **kwargs) return pipeline
[docs]def import_model( name: Tag | str, model_name_or_path: str | os.PathLike[str], *, proxies: dict[str, str] | None = None, revision: str = "main", variant: str | None = None, pipeline_class: str | type[diffusers.DiffusionPipeline] | None = None, sync_with_hub_version: bool = False, signatures: dict[str, ModelSignatureDict | ModelSignature] | None = None, labels: dict[str, str] | None = None, custom_objects: dict[str, t.Any] | None = None, external_modules: t.List[ModuleType] | None = None, metadata: dict[str, t.Any] | None = None, # ... ) -> bentoml.Model: """ Import Diffusion model from a artifact URI to the BentoML model store. Args: name: The name to give to the model in the BentoML store. This must be a valid :obj:`~bentoml.Tag` name. model_name_or_path: Can be either: - A string, the *repo id* of a pretrained pipeline hosted inside a model repo on https://huggingface.co/ Valid repo ids have to be located under a user or organization name, like `CompVis/ldm-text2im-large-256`. - A path to a *directory* containing pipeline weights saved using [`~DiffusionPipeline.save_pretrained`], e.g., `./my_pipeline_directory/`. proxies (`Dict[str, str]`, *optional*): A dictionary of proxy servers to use by protocol or endpoint, e.g., `{'http': 'foo.bar:3128', 'http://hostname': 'foo.bar:4012'}`. The proxies are used on each request. revision (`str`, *optional*, defaults to `"main"`): The specific model version to use. It can be a branch name, a tag name, or a commit id, since we use a git-based system for storing models and other artifacts on huggingface.co, so `revision` can be any identifier allowed by git. variant (`str`, *optional*): Variant of the model to import. For example there's "fp16" and "fp32" variant for "DeepFloyd/IF-I-XL-v1.0". This may save download bandwidth and local disk space. sync_with_hub_version (`bool`, default to False): If sync_with_hub_version is true, then the model imported by signatures: Signatures of predict methods to be used. If not provided, the signatures default to {"__call__": {"batchable": False}}. See :obj:`~bentoml.types.ModelSignature` for more details. labels: A default set of management labels to be associated with the model. For example: ``{"training-set": "data-v1"}``. custom_objects: Custom objects to be saved with the model. An example is ``{"my-normalizer": normalizer}``. Custom objects are serialized with cloudpickle. metadata: Metadata to be associated with the model. An example is ``{"param_a": .2}``. Metadata is intended for display in a model management UI and therefore all values in metadata dictionary must be a primitive Python type, such as ``str`` or ``int``. Returns: A :obj:`~bentoml.Model` instance referencing a saved model in the local BentoML model store. Example: .. code-block:: python import bentoml bentoml.diffusers.import_model( 'my_sd15_model', "runwayml/stable-diffusion-v1-5", signatures={ "__call__": {"batchable": False}, } ) """ tag = Tag.from_taglike(name) try: model = bentoml.models.get(tag) return model except bentoml.exceptions.NotFound: pass if sync_with_hub_version: if tag.version is not None: logger.warn( f"sync_with_hub_version is True, user provided version {tag.version} may be overridden by huggingface hub's commit hash" ) context = ModelContext( framework_name="diffusers", framework_versions={"diffusers": diffusers.__version__}, ) if signatures is None: signatures = { "__call__": {"batchable": False}, } logger.info( 'Using the default model signature for diffusers (%s) for model "%s".', signatures, name, ) if pipeline_class and isinstance(pipeline_class, str): pipeline_class = t.cast( type[diffusers.DiffusionPipeline], _str2cls(pipeline_class) ) options_dict: dict[str, str] = {} if pipeline_class: cls_str = f"{pipeline_class.__module__}.{pipeline_class.__name__}" options_dict["pipeline_class"] = cls_str if variant: options_dict["variant"] = variant options = DiffusersOptions(**options_dict) if options_dict else None if os.path.isdir(model_name_or_path): src_dir = model_name_or_path if sync_with_hub_version: raise BentoMLException( "Cannot sync version with huggingface hub when importing a local model" ) elif pipeline_class: src_dir = pipeline_class.download( model_name_or_path, proxies=proxies, revision=revision, variant=variant ) if sync_with_hub_version: from huggingface_hub.file_download import REGEX_COMMIT_HASH version = extract_commit_hash(src_dir, REGEX_COMMIT_HASH) if version is not None: if variant is not None: version = version + "-" + variant tag.version = version else: from huggingface_hub import snapshot_download src_dir = snapshot_download( model_name_or_path, proxies=proxies, revision=revision, ) if sync_with_hub_version: from huggingface_hub.file_download import REGEX_COMMIT_HASH version = extract_commit_hash(src_dir, REGEX_COMMIT_HASH) if version is not None: tag.version = version with bentoml.models._create( # type: ignore tag, module=MODULE_NAME, api_version=API_VERSION, signatures=signatures, labels=labels, options=options, custom_objects=custom_objects, external_modules=external_modules, metadata=metadata, context=context, ) as bento_model: diffusion_model_dir = bento_model.path_of(DIFFUSION_MODEL_FOLDER) ignore = shutil.ignore_patterns(".git") model_config_file = os.path.join(src_dir, DIFFUSION_MODEL_CONFIG_FILE) if not os.path.exists(model_config_file): raise BentoMLException(f'artifact "{src_dir}" is not a Diffusion model') shutil.copytree(src_dir, diffusion_model_dir, symlinks=False, ignore=ignore) return bento_model
[docs]def save_model( name: Tag | str, pipeline: diffusers.DiffusionPipeline, *, signatures: dict[str, ModelSignatureDict | ModelSignature] | None = None, labels: dict[str, str] | None = None, custom_objects: dict[str, t.Any] | None = None, external_modules: t.List[ModuleType] | None = None, metadata: dict[str, t.Any] | None = None, ) -> bentoml.Model: """ Save a DiffusionPipeline to the BentoML model store. Args: name: The name to give to the model in the BentoML store. This must be a valid :obj:`~bentoml.Tag` name. pipeline: Instance of the Diffusers pipeline to be saved signatures: Signatures of predict methods to be used. If not provided, the signatures default to {"__call__": {"batchable": False}}. See :obj:`~bentoml.types.ModelSignature` for more details. labels: A default set of management labels to be associated with the model. For example: ``{"training-set": "data-v1"}``. custom_objects: Custom objects to be saved with the model. An example is ``{"my-normalizer": normalizer}``. Custom objects are serialized with cloudpickle. metadata: Metadata to be associated with the model. An example is ``{"param_a": .2}``. Metadata is intended for display in a model management UI and therefore all values in metadata dictionary must be a primitive Python type, such as ``str`` or ``int``. Returns: A :obj:`~bentoml.Model` instance referencing a saved model in the local BentoML model store. """ if not isinstance(pipeline, diffusers.DiffusionPipeline): raise BentoMLException( "'pipeline' must be an instance of 'diffusers.DiffusionPipeline'. " ) context = ModelContext( framework_name="diffusers", framework_versions={"diffusers": diffusers.__version__}, ) if signatures is None: signatures = { "__call__": {"batchable": False}, } logger.info( 'Using the default model signature for diffusers (%s) for model "%s".', signatures, name, ) with bentoml.models._create( # type: ignore name, module=MODULE_NAME, api_version=API_VERSION, signatures=signatures, labels=labels, options=None, custom_objects=custom_objects, external_modules=external_modules, metadata=metadata, context=context, ) as bento_model: diffusion_model_dir = bento_model.path_of(DIFFUSION_MODEL_FOLDER) pipeline.save_pretrained(diffusion_model_dir) return bento_model
def get_runnable(bento_model: bentoml.Model) -> t.Type[bentoml.Runnable]: """ Private API: use :obj:`~bentoml.Model.to_runnable` instead. """ bento_options = t.cast(DiffusersOptions, bento_model.info.options) partial_kwargs: dict[str, t.Any] = bento_options.partial_kwargs # type: ignore pipeline_class: str | type[diffusers.DiffusionPipeline] = ( bento_options.pipeline_class or diffusers.DiffusionPipeline ) if isinstance(pipeline_class, str): pipeline_class = t.cast( type[diffusers.DiffusionPipeline], _str2cls(pipeline_class) ) scheduler_class: str | type[diffusers.SchedulerMixin] | None = ( bento_options.scheduler_class ) if scheduler_class and isinstance(scheduler_class, str): scheduler_class = t.cast( type[diffusers.SchedulerMixin], _str2cls(scheduler_class) ) custom_pipeline: str | None = bento_options.custom_pipeline _enable_xformers: bool | None = bento_options.enable_xformers enable_attention_slicing: int | str | None = bento_options.enable_attention_slicing enable_sequential_cpu_offload: bool | None = ( bento_options.enable_sequential_cpu_offload ) enable_model_cpu_offload: bool | None = bento_options.enable_model_cpu_offload enable_torch_compile: bool | None = bento_options.enable_torch_compile low_cpu_mem_usage: bool | None = bento_options.low_cpu_mem_usage variant: str | None = bento_options.variant _torch_dtype: str | torch.dtype | None = bento_options.torch_dtype device_map: str | dict[str, int | str | torch.device] | None = ( bento_options.device_map ) load_pretrained_extra_kwargs = bento_options.load_pretrained_extra_kwargs support_lora = True if issubclass(pipeline_class, LoraLoaderMixin) else False support_textual_inversion = ( True if issubclass(pipeline_class, TextualInversionLoaderMixin) else False ) lora_dir = bento_options.lora_dir lora_weights = bento_options.lora_weights textual_inversions = bento_options.textual_inversions if not support_lora and lora_weights: raise NotImplementedError( f"Class {pipeline_class} is not a subclass of LoraLoaderMixin, cannot load lora weights. " "Try using `bento_model.with_options(pipeline_class=diffusers.StableDiffusionPipeline) to specify the pipeline's class" ) if not support_textual_inversion and textual_inversions: raise NotImplementedError( f"Class {pipeline_class} is not a subclass of TextualInversionLoaderMixin, cannot load textual inversions" "Try using `bento_model.with_options(pipeline_class=diffusers.StableDiffusionPipeline) to specify the pipeline's class" ) class DiffusersRunnable(bentoml.Runnable): SUPPORTED_RESOURCES = ("nvidia.com/gpu", "cpu") SUPPORTS_CPU_MULTI_THREADING = True def __init__(self): super().__init__() if torch.cuda.is_available() and _torch_dtype is None: torch_dtype = torch.float16 else: torch_dtype = _torch_dtype enable_xformers: bool = False if torch.cuda.is_available() and _enable_xformers is None: if is_xformers_available(): enable_xformers: bool = True device_id: str | None = None if torch.cuda.is_available(): device_id = "cuda" self.lora_dir = lora_dir self.pipeline: diffusers.DiffusionPipeline = load_model( bento_model, device_id=device_id, device_map=device_map, pipeline_class=pipeline_class, scheduler_class=scheduler_class, torch_dtype=torch_dtype, custom_pipeline=custom_pipeline, enable_xformers=enable_xformers, enable_attention_slicing=enable_attention_slicing, enable_sequential_cpu_offload=enable_sequential_cpu_offload, enable_model_cpu_offload=enable_model_cpu_offload, enable_torch_compile=enable_torch_compile, low_cpu_mem_usage=low_cpu_mem_usage, variant=variant, lora_weights=lora_weights, textual_inversions=textual_inversions, load_pretrained_extra_kwargs=load_pretrained_extra_kwargs, ) @bentoml.Runnable.method(batchable=False) def _replace_scheduler(self, scheduler_txt: str): try: scheduler_cls = _str2cls(scheduler_txt) if isinstance(self.pipeline.scheduler, scheduler_cls): return dict(success=True) if scheduler_cls in self.pipeline.scheduler.compatibles: self.pipeline.scheduler = scheduler_cls.from_config( self.pipeline.scheduler.config, ) return dict(success=True) else: return dict( success=False, error_message="scheduler class is incompatible to this pipeline", ) except (ModuleNotFoundError, ValueError, AttributeError): logger.info(f"Cannot import {scheduler_txt}") return dict( success=False, error_message="cannot import scheduler class", ) if support_lora: def _load_lora_weights( self: DiffusersRunnable, lora_weights: LoraOptionType | list[LoraOptionType], ): _load_lora_weights_to_pipeline(self.pipeline, lora_weights, self.lora_dir) def _unload_lora_weights( self: DiffusersRunnable, ): self.pipeline.unload_lora_weights() # clear cached lora weights from GPU memory torch.cuda.empty_cache() else: def _load_lora_weights( self: DiffusersRunnable, lora_args: LoraOptionType | list[LoraOptionType], ): raise NotImplementedError( f"Class {pipeline_class} is not a subclass of LoraLoaderMixin, cannot load lora weights" ) def _unload_lora_weights( self: DiffusersRunnable, ): raise NotImplementedError( f"Class {pipeline_class} is not a subclass of LoraLoaderMixin, cannot unload lora weights" ) setattr(DiffusersRunnable, "_load_lora_weights", _load_lora_weights) setattr(DiffusersRunnable, "_unload_lora_weights", _unload_lora_weights) def make_run_method( method_name: str, partial_kwargs: dict[str, t.Any] | None ) -> t.Callable[..., t.Any]: if support_lora: def _run_method( runnable_self: DiffusersRunnable, *args: t.Any, **kwargs: t.Any ) -> t.Any: if method_partial_kwargs is not None: kwargs = dict(method_partial_kwargs, **kwargs) lora_weights: str | None = kwargs.pop("lora_weights", None) try: if lora_weights is not None: runnable_self._load_lora_weights(lora_weights) raw_method = getattr(runnable_self.pipeline, method_name) res = raw_method(*args, **kwargs) finally: torch.cuda.empty_cache() if lora_weights is not None: runnable_self._unload_lora_weights() # handle BaseOutput cannot be serialized yet if isinstance(res, diffusers.utils.BaseOutput): res = res.to_tuple() return res else: def _run_method( runnable_self: DiffusersRunnable, *args: t.Any, **kwargs: t.Any, ) -> t.Any: if method_partial_kwargs is not None: kwargs = dict(method_partial_kwargs, **kwargs) raw_method = getattr(runnable_self.pipeline, method_name) res = raw_method(*args, **kwargs) # handle BaseOutput cannot be serialized yet if isinstance(res, diffusers.utils.BaseOutput): res = res.to_tuple() return res return _run_method for method_name, options in bento_model.info.signatures.items(): method_partial_kwargs = partial_kwargs.get(method_name) DiffusersRunnable.add_method( make_run_method(method_name, method_partial_kwargs), name=method_name, batchable=options.batchable, batch_dim=options.batch_dim, input_spec=options.input_spec, output_spec=options.output_spec, ) return DiffusersRunnable