From 9c7cb44f24a1656bcff88f7727f9b728e721c30b Mon Sep 17 00:00:00 2001 From: Wagner Bruna Date: Thu, 14 Aug 2025 08:16:52 -0300 Subject: [PATCH 1/2] fix: avoid segfault for pix2pix models with no reference images --- stable-diffusion.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/stable-diffusion.cpp b/stable-diffusion.cpp index db4e07cb..21460b0e 100644 --- a/stable-diffusion.cpp +++ b/stable-diffusion.cpp @@ -2322,6 +2322,10 @@ sd_image_t* generate_image(sd_ctx_t* sd_ctx, const sd_img_gen_params_t* sd_img_g if (sd_img_gen_params->ref_images_count > 0) { LOG_INFO("EDIT mode"); } + else if (sd_ctx->sd->version == VERSION_SD1_PIX2PIX || sd_ctx->sd->version == VERSION_SDXL_PIX2PIX) { + LOG_ERROR("This model needs at least one reference image"); + return NULL; + } std::vector ref_latents; for (int i = 0; i < sd_img_gen_params->ref_images_count; i++) { From 9dae12fd4ea01e8801e4914dce02ff8a10711e26 Mon Sep 17 00:00:00 2001 From: Wagner Bruna Date: Fri, 15 Aug 2025 16:42:40 -0300 Subject: [PATCH 2/2] fix: default to empty reference on pix2pix models to avoid segfault --- stable-diffusion.cpp | 35 ++++++++++++++++++++++++++--------- 1 file changed, 26 insertions(+), 9 deletions(-) diff --git a/stable-diffusion.cpp b/stable-diffusion.cpp index 21460b0e..ba741bfa 100644 --- a/stable-diffusion.cpp +++ b/stable-diffusion.cpp @@ -448,6 +448,10 @@ class StableDiffusionGGML { diffusion_model->alloc_params_buffer(); diffusion_model->get_param_tensors(tensors); + if (sd_version_is_unet_edit(version)) { + vae_decode_only = false; + } + if (high_noise_diffusion_model) { high_noise_diffusion_model->alloc_params_buffer(); high_noise_diffusion_model->get_param_tensors(tensors); @@ -2319,23 +2323,36 @@ sd_image_t* generate_image(sd_ctx_t* sd_ctx, const sd_img_gen_params_t* sd_img_g init_latent = generate_init_latent(sd_ctx, work_ctx, width, height); } + sd_guidance_params_t guidance = sd_img_gen_params->sample_params.guidance; + std::vector ref_images; + for (int i = 0; i < sd_img_gen_params->ref_images_count; i++) { + ref_images.push_back(&sd_img_gen_params->ref_images[i]); + } + + std::vector empty_image_data; + sd_image_t empty_image = {(uint32_t)width, (uint32_t)height, 3, nullptr}; + if (ref_images.empty() && sd_version_is_unet_edit(sd_ctx->sd->version)) + { + LOG_WARN("This model needs at least one reference image; using an empty reference"); + empty_image_data.reserve(width * height * 3); + ref_images.push_back(&empty_image); + empty_image.data = empty_image_data.data(); + guidance.img_cfg = 0.f; + } + if (sd_img_gen_params->ref_images_count > 0) { LOG_INFO("EDIT mode"); } - else if (sd_ctx->sd->version == VERSION_SD1_PIX2PIX || sd_ctx->sd->version == VERSION_SDXL_PIX2PIX) { - LOG_ERROR("This model needs at least one reference image"); - return NULL; - } std::vector ref_latents; - for (int i = 0; i < sd_img_gen_params->ref_images_count; i++) { + for (int i = 0; i < ref_images.size(); i++) { ggml_tensor* img = ggml_new_tensor_4d(work_ctx, GGML_TYPE_F32, - sd_img_gen_params->ref_images[i].width, - sd_img_gen_params->ref_images[i].height, + ref_images[i]->width, + ref_images[i]->height, 3, 1); - sd_image_to_tensor(sd_img_gen_params->ref_images[i].data, img); + sd_image_to_tensor(ref_images[i]->data, img); ggml_tensor* latent = NULL; if (sd_ctx->sd->use_tiny_autoencoder) { @@ -2368,7 +2385,7 @@ sd_image_t* generate_image(sd_ctx_t* sd_ctx, const sd_img_gen_params_t* sd_img_g SAFE_STR(sd_img_gen_params->prompt), SAFE_STR(sd_img_gen_params->negative_prompt), sd_img_gen_params->clip_skip, - sd_img_gen_params->sample_params.guidance, + guidance, sd_img_gen_params->sample_params.eta, width, height,