@@ -1141,14 +1141,23 @@ __STATIC_INLINE__ struct ggml_tensor* ggml_ext_ones(struct ggml_context* ctx,
11411141}
11421142
11431143__STATIC_INLINE__ ggml_tensor* ggml_ext_cast_f32 (ggml_context* ctx, ggml_tensor* a) {
1144+ #ifdef SD_USE_VULKAN
1145+ auto zero_index = ggml_get_tensor (ctx, " ggml_runner_build_in_tensor:zero_int" );
1146+ auto out = ggml_reshape_1d (ctx, a, ggml_nelements (a));
1147+ out = ggml_get_rows (ctx, out, zero_index);
1148+ out = ggml_reshape (ctx, out, a);
1149+ // auto out = ggml_cast(ctx, a, GGML_TYPE_F32);
1150+ return out;
1151+ #else
11441152 auto out = ggml_reshape_2d (ctx, a, 1 , ggml_nelements (a));
11451153 ggml_tensor* one = ggml_ext_ones (ctx, 1 , 1 , 1 , 1 ); // [1,]
11461154 if (ggml_is_transposed (out)) {
11471155 out = ggml_mul_mat (ctx, one, out);
11481156 } else {
11491157 out = ggml_mul_mat (ctx, out, one);
11501158 }
1151- out = ggml_reshape (ctx, out, a);
1159+ out = ggml_reshape (ctx, out, a);
1160+ #endif
11521161 return out;
11531162}
11541163
@@ -1556,6 +1565,9 @@ struct GGMLRunner {
15561565 std::vector<float > one_vec = {1 .f };
15571566 ggml_tensor* one_tensor = nullptr ;
15581567
1568+ std::vector<int > zero_int_vec = {0 };
1569+ ggml_tensor* zero_int_tensor = nullptr ;
1570+
15591571 std::map<struct ggml_tensor *, const void *> backend_tensor_data_map;
15601572 std::map<std::string, struct ggml_tensor *> cache_tensor_map; // name -> tensor
15611573 const std::string final_result_name = " ggml_runner_final_result_tensor" ;
@@ -1626,10 +1638,15 @@ struct GGMLRunner {
16261638 one_tensor = ggml_new_tensor_1d (compute_ctx, GGML_TYPE_F32, 1 );
16271639 ggml_set_name (one_tensor, " ggml_runner_build_in_tensor:one" );
16281640 set_backend_tensor_data (one_tensor, one_vec.data ());
1641+
1642+ zero_int_tensor = ggml_new_tensor_1d (compute_ctx, GGML_TYPE_I32, 1 );
1643+ ggml_set_name (zero_int_tensor, " ggml_runner_build_in_tensor:zero_int" );
1644+ set_backend_tensor_data (zero_int_tensor, zero_int_vec.data ());
16291645 }
16301646
16311647 void prepare_build_in_tensor_after (struct ggml_cgraph * gf) {
16321648 ggml_build_forward_expand (gf, one_tensor);
1649+ ggml_build_forward_expand (gf, zero_int_tensor);
16331650 }
16341651
16351652 struct ggml_cgraph * new_graph_custom (size_t graph_size) {
0 commit comments