From 6a1eab5d312dbf6bc11acd9be9f8ffd9cfc6a7f6 Mon Sep 17 00:00:00 2001 From: leejet Date: Sun, 7 Sep 2025 12:47:00 +0800 Subject: [PATCH 1/4] feat: add tensor loading time stat --- model.cpp | 50 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 50 insertions(+) diff --git a/model.cpp b/model.cpp index 1be05715..fda5dc9b 100644 --- a/model.cpp +++ b/model.cpp @@ -1966,6 +1966,16 @@ std::vector remove_duplicates(const std::vector& v } bool ModelLoader::load_tensors(on_new_tensor_cb_t on_new_tensor_cb) { + int64_t proccess_time_ms = 0; + int64_t read_time_ms = 0; + int64_t memcpy_time_ms = 0; + int64_t copy_to_backend_time_ms = 0; + int64_t convert_time_ms = 0; + + int64_t prev_time_ms = 0; + int64_t curr_time_ms = 0; + int64_t start_time = ggml_time_ms(); + prev_time_ms = start_time; std::vector processed_tensor_storages; for (auto& tensor_storage : tensor_storages) { // LOG_DEBUG("%s", name.c_str()); @@ -1978,6 +1988,9 @@ bool ModelLoader::load_tensors(on_new_tensor_cb_t on_new_tensor_cb) { } std::vector dedup = remove_duplicates(processed_tensor_storages); processed_tensor_storages = dedup; + curr_time_ms = ggml_time_ms(); + proccess_time_ms = curr_time_ms - prev_time_ms; + prev_time_ms = curr_time_ms; bool success = true; for (size_t file_index = 0; file_index < file_paths_.size(); file_index++) { @@ -2019,15 +2032,27 @@ bool ModelLoader::load_tensors(on_new_tensor_cb_t on_new_tensor_cb) { size_t entry_size = zip_entry_size(zip); if (entry_size != n) { read_buffer.resize(entry_size); + prev_time_ms = ggml_time_ms(); zip_entry_noallocread(zip, (void*)read_buffer.data(), entry_size); + curr_time_ms = ggml_time_ms(); + read_time_ms += curr_time_ms - prev_time_ms; + prev_time_ms = curr_time_ms; memcpy((void*)buf, (void*)(read_buffer.data() + tensor_storage.offset), n); + curr_time_ms = ggml_time_ms(); + memcpy_time_ms += curr_time_ms - prev_time_ms; } else { + prev_time_ms = ggml_time_ms(); zip_entry_noallocread(zip, (void*)buf, n); + curr_time_ms = ggml_time_ms(); + read_time_ms += curr_time_ms - prev_time_ms; } zip_entry_close(zip); } else { + prev_time_ms = ggml_time_ms(); file.seekg(tensor_storage.offset); file.read(buf, n); + curr_time_ms = ggml_time_ms(); + read_time_ms += curr_time_ms - prev_time_ms; if (!file) { LOG_ERROR("read tensor data failed: '%s'", file_path.c_str()); return false; @@ -2072,6 +2097,7 @@ bool ModelLoader::load_tensors(on_new_tensor_cb_t on_new_tensor_cb) { read_data(tensor_storage, (char*)dst_tensor->data, nbytes_to_read); } + prev_time_ms = ggml_time_ms(); if (tensor_storage.is_bf16) { // inplace op bf16_to_f32_vec((uint16_t*)dst_tensor->data, (float*)dst_tensor->data, tensor_storage.nelements()); @@ -2086,10 +2112,13 @@ bool ModelLoader::load_tensors(on_new_tensor_cb_t on_new_tensor_cb) { } else if (tensor_storage.is_i64) { i64_to_i32_vec((int64_t*)read_buffer.data(), (int32_t*)dst_tensor->data, tensor_storage.nelements()); } + curr_time_ms = ggml_time_ms(); + convert_time_ms += curr_time_ms - prev_time_ms; } else { read_buffer.resize(std::max(tensor_storage.nbytes(), tensor_storage.nbytes_to_read())); read_data(tensor_storage, (char*)read_buffer.data(), nbytes_to_read); + prev_time_ms = ggml_time_ms(); if (tensor_storage.is_bf16) { // inplace op bf16_to_f32_vec((uint16_t*)read_buffer.data(), (float*)read_buffer.data(), tensor_storage.nelements()); @@ -2109,11 +2138,14 @@ bool ModelLoader::load_tensors(on_new_tensor_cb_t on_new_tensor_cb) { convert_tensor((void*)read_buffer.data(), tensor_storage.type, dst_tensor->data, dst_tensor->type, (int)tensor_storage.nelements() / (int)tensor_storage.ne[0], (int)tensor_storage.ne[0]); + curr_time_ms = ggml_time_ms(); + convert_time_ms += curr_time_ms - prev_time_ms; } } else { read_buffer.resize(std::max(tensor_storage.nbytes(), tensor_storage.nbytes_to_read())); read_data(tensor_storage, (char*)read_buffer.data(), nbytes_to_read); + prev_time_ms = ggml_time_ms(); if (tensor_storage.is_bf16) { // inplace op bf16_to_f32_vec((uint16_t*)read_buffer.data(), (float*)read_buffer.data(), tensor_storage.nelements()); @@ -2133,14 +2165,24 @@ bool ModelLoader::load_tensors(on_new_tensor_cb_t on_new_tensor_cb) { if (tensor_storage.type == dst_tensor->type) { // copy to device memory + curr_time_ms = ggml_time_ms(); + convert_time_ms += curr_time_ms - prev_time_ms; + prev_time_ms = curr_time_ms; ggml_backend_tensor_set(dst_tensor, read_buffer.data(), 0, ggml_nbytes(dst_tensor)); + curr_time_ms = ggml_time_ms(); + copy_to_backend_time_ms += curr_time_ms - prev_time_ms; } else { // convert first, then copy to device memory convert_buffer.resize(ggml_nbytes(dst_tensor)); convert_tensor((void*)read_buffer.data(), tensor_storage.type, (void*)convert_buffer.data(), dst_tensor->type, (int)tensor_storage.nelements() / (int)tensor_storage.ne[0], (int)tensor_storage.ne[0]); + curr_time_ms = ggml_time_ms(); + convert_time_ms += curr_time_ms - prev_time_ms; + prev_time_ms = curr_time_ms; ggml_backend_tensor_set(dst_tensor, convert_buffer.data(), 0, ggml_nbytes(dst_tensor)); + curr_time_ms = ggml_time_ms(); + copy_to_backend_time_ms += curr_time_ms - prev_time_ms; } } ++tensor_count; @@ -2170,6 +2212,14 @@ bool ModelLoader::load_tensors(on_new_tensor_cb_t on_new_tensor_cb) { break; } } + int64_t end_time = ggml_time_ms(); + LOG_INFO("loading tensors completed, taking %.2fs (process: %.2f, read: %.2fs, memcpy: %.2fs, convert: %.2fs, copy_to_backend: %.2fs)", + (end_time - start_time) / 1000.f, + proccess_time_ms / 1000.f, + read_time_ms / 1000.f, + memcpy_time_ms / 1000.f, + convert_time_ms / 1000.f, + copy_to_backend_time_ms / 1000.f); return success; } From 35ac8ed47390c4c27defb189744eae44736c9f5b Mon Sep 17 00:00:00 2001 From: leejet Date: Sun, 7 Sep 2025 12:49:45 +0800 Subject: [PATCH 2/4] fix typo --- model.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/model.cpp b/model.cpp index fda5dc9b..ce9e9b7a 100644 --- a/model.cpp +++ b/model.cpp @@ -1966,7 +1966,7 @@ std::vector remove_duplicates(const std::vector& v } bool ModelLoader::load_tensors(on_new_tensor_cb_t on_new_tensor_cb) { - int64_t proccess_time_ms = 0; + int64_t process_time_ms = 0; int64_t read_time_ms = 0; int64_t memcpy_time_ms = 0; int64_t copy_to_backend_time_ms = 0; @@ -1989,7 +1989,7 @@ bool ModelLoader::load_tensors(on_new_tensor_cb_t on_new_tensor_cb) { std::vector dedup = remove_duplicates(processed_tensor_storages); processed_tensor_storages = dedup; curr_time_ms = ggml_time_ms(); - proccess_time_ms = curr_time_ms - prev_time_ms; + process_time_ms = curr_time_ms - prev_time_ms; prev_time_ms = curr_time_ms; bool success = true; @@ -2215,7 +2215,7 @@ bool ModelLoader::load_tensors(on_new_tensor_cb_t on_new_tensor_cb) { int64_t end_time = ggml_time_ms(); LOG_INFO("loading tensors completed, taking %.2fs (process: %.2f, read: %.2fs, memcpy: %.2fs, convert: %.2fs, copy_to_backend: %.2fs)", (end_time - start_time) / 1000.f, - proccess_time_ms / 1000.f, + process_time_ms / 1000.f, read_time_ms / 1000.f, memcpy_time_ms / 1000.f, convert_time_ms / 1000.f, From 5ed25a8cdd8c742d146135470bf828640f8d5502 Mon Sep 17 00:00:00 2001 From: leejet Date: Sun, 7 Sep 2025 12:55:30 +0800 Subject: [PATCH 3/4] add missing s --- model.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/model.cpp b/model.cpp index ce9e9b7a..66e04313 100644 --- a/model.cpp +++ b/model.cpp @@ -2213,7 +2213,7 @@ bool ModelLoader::load_tensors(on_new_tensor_cb_t on_new_tensor_cb) { } } int64_t end_time = ggml_time_ms(); - LOG_INFO("loading tensors completed, taking %.2fs (process: %.2f, read: %.2fs, memcpy: %.2fs, convert: %.2fs, copy_to_backend: %.2fs)", + LOG_INFO("loading tensors completed, taking %.2fs (process: %.2fs, read: %.2fs, memcpy: %.2fs, convert: %.2fs, copy_to_backend: %.2fs)", (end_time - start_time) / 1000.f, process_time_ms / 1000.f, read_time_ms / 1000.f, From 854fa231f137f9cf8f061c0c4758062adc7c7a09 Mon Sep 17 00:00:00 2001 From: leejet Date: Sun, 7 Sep 2025 22:45:45 +0800 Subject: [PATCH 4/4] remove redundant model loading log --- model.cpp | 4 ++-- stable-diffusion.cpp | 6 ------ 2 files changed, 2 insertions(+), 8 deletions(-) diff --git a/model.cpp b/model.cpp index 66e04313..4e42018c 100644 --- a/model.cpp +++ b/model.cpp @@ -1966,7 +1966,7 @@ std::vector remove_duplicates(const std::vector& v } bool ModelLoader::load_tensors(on_new_tensor_cb_t on_new_tensor_cb) { - int64_t process_time_ms = 0; + int64_t process_time_ms = 0; int64_t read_time_ms = 0; int64_t memcpy_time_ms = 0; int64_t copy_to_backend_time_ms = 0; @@ -1989,7 +1989,7 @@ bool ModelLoader::load_tensors(on_new_tensor_cb_t on_new_tensor_cb) { std::vector dedup = remove_duplicates(processed_tensor_storages); processed_tensor_storages = dedup; curr_time_ms = ggml_time_ms(); - process_time_ms = curr_time_ms - prev_time_ms; + process_time_ms = curr_time_ms - prev_time_ms; prev_time_ms = curr_time_ms; bool success = true; diff --git a/stable-diffusion.cpp b/stable-diffusion.cpp index 64164a2f..db234a1a 100644 --- a/stable-diffusion.cpp +++ b/stable-diffusion.cpp @@ -557,8 +557,6 @@ class StableDiffusionGGML { // load weights LOG_DEBUG("loading weights"); - int64_t t0 = ggml_time_ms(); - std::set ignore_tensors; tensors["alphas_cumprod"] = alphas_cumprod_tensor; if (use_tiny_autoencoder) { @@ -656,11 +654,7 @@ class StableDiffusionGGML { ggml_backend_is_cpu(clip_backend) ? "RAM" : "VRAM"); } - int64_t t1 = ggml_time_ms(); - LOG_INFO("loading model from '%s' completed, taking %.2fs", SAFE_STR(sd_ctx_params->model_path), (t1 - t0) * 1.0f / 1000); - // check is_using_v_parameterization_for_sd2 - if (sd_version_is_sd2(version)) { if (is_using_v_parameterization_for_sd2(ctx, sd_version_is_inpaint(version))) { is_using_v_parameterization = true;