@@ -877,10 +877,6 @@ static ggml_cgraph * clip_image_build_graph_legacy(clip_ctx * ctx, const clip_im
877
877
auto inp_1 = ggml_conv_2d (ctx0, model.patch_embeddings_1 , inp_raw, patch_size, patch_size, 0 , 0 , 1 , 1 );
878
878
inp = ggml_add (ctx0, inp, inp_1);
879
879
880
- // ggml_build_forward_expand(gf, inp);
881
- // ggml_free(ctx0);
882
- // return gf;
883
-
884
880
inp = ggml_cont (ctx0, ggml_permute (ctx0, inp, 1 , 2 , 0 , 3 )); // [w, h, c, b] -> [c, w, h, b]
885
881
inp = ggml_reshape_4d (
886
882
ctx0, inp,
@@ -892,10 +888,6 @@ static ggml_cgraph * clip_image_build_graph_legacy(clip_ctx * ctx, const clip_im
892
888
inp = ggml_reshape_3d (
893
889
ctx0, inp,
894
890
hidden_size, patches_w * patches_h, batch_size);
895
-
896
- // ggml_build_forward_expand(gf, inp);
897
- // ggml_free(ctx0);
898
- // return gf;
899
891
}
900
892
else {
901
893
inp = ggml_reshape_3d (ctx0, inp, num_patches, hidden_size, batch_size);
@@ -984,18 +976,6 @@ static ggml_cgraph * clip_image_build_graph_legacy(clip_ctx * ctx, const clip_im
984
976
embeddings = ggml_reshape_2d (ctx0, embeddings, hidden_size * 4 , patches_w * patches_h * batch_size / 4 );
985
977
embeddings = ggml_get_rows (ctx0, embeddings, inv_window_idx);
986
978
embeddings = ggml_reshape_3d (ctx0, embeddings, hidden_size, patches_w * patches_h, batch_size);
987
-
988
- // positions = ggml_reshape_2d(ctx0, positions, num_position_ids / 4, 4);
989
- // positions = ggml_cont(ctx0, ggml_permute(ctx0, positions, 1, 0, 2, 3));
990
- // positions = ggml_reshape_2d(ctx0, positions, 16, num_position_ids / 16);
991
- // positions = ggml_get_rows(ctx0, positions, inv_window_idx);
992
- // positions = ggml_reshape_2d(ctx0, positions, 4, num_position_ids / 4);
993
- // positions = ggml_cont(ctx0, ggml_permute(ctx0, positions, 1, 0, 2, 3));
994
- // positions = ggml_reshape_1d(ctx0, positions, num_position_ids);
995
-
996
- // ggml_build_forward_expand(gf, embeddings);
997
- // ggml_free(ctx0);
998
- // return gf;
999
979
}
1000
980
1001
981
for (int il = 0 ; il < ctx->max_feature_layer ; il++) {
@@ -1019,12 +999,6 @@ static ggml_cgraph * clip_image_build_graph_legacy(clip_ctx * ctx, const clip_im
1019
999
cur = ggml_add (ctx0, ggml_mul (ctx0, cur, model.layers [il].ln_1_w ),
1020
1000
model.layers [il].ln_1_b );
1021
1001
}
1022
- // if ( il == 0) {
1023
- // // build the graph
1024
- // ggml_build_forward_expand(gf, cur);
1025
- // ggml_free(ctx0);
1026
- // return gf;
1027
- // }
1028
1002
1029
1003
// self-attention
1030
1004
{
@@ -1068,17 +1042,10 @@ static ggml_cgraph * clip_image_build_graph_legacy(clip_ctx * ctx, const clip_im
1068
1042
KQ = ggml_soft_max_inplace (ctx0, KQ);
1069
1043
} else {
1070
1044
KQ = ggml_soft_max_ext (ctx0, KQ, window_mask, 1 .0f , 0 .0f );
1071
-
1072
1045
// KQ = ggml_scale_inplace(ctx0, KQ, 1.0f / sqrt((float)d_head));
1073
1046
// KQ = ggml_add(ctx0, KQ, window_mask);
1074
1047
// KQ = ggml_soft_max_inplace(ctx0, KQ);
1075
1048
}
1076
- // if ( il == 0) {
1077
- // // build the graph
1078
- // ggml_build_forward_expand(gf, KQ);
1079
- // ggml_free(ctx0);
1080
- // return gf;
1081
- // }
1082
1049
1083
1050
struct ggml_tensor * KQV = ggml_mul_mat (ctx0, V, KQ);
1084
1051
KQV = ggml_reshape_4d (ctx0, KQV, d_head, num_positions, n_head, batch_size);
@@ -1094,12 +1061,6 @@ static ggml_cgraph * clip_image_build_graph_legacy(clip_ctx * ctx, const clip_im
1094
1061
cur = ggml_add (ctx0, cur, embeddings);
1095
1062
1096
1063
embeddings = cur; // embeddings = residual, cur = hidden_states
1097
- // if ( il == 0) {
1098
- // // build the graph
1099
- // ggml_build_forward_expand(gf, cur);
1100
- // ggml_free(ctx0);
1101
- // return gf;
1102
- // }
1103
1064
1104
1065
// layernorm2
1105
1066
if (ctx->use_rms_norm ) {
@@ -1151,19 +1112,8 @@ static ggml_cgraph * clip_image_build_graph_legacy(clip_ctx * ctx, const clip_im
1151
1112
cur = ggml_add (ctx0, embeddings, cur);
1152
1113
1153
1114
embeddings = cur;
1154
-
1155
- // if ( il == 0) {
1156
- // // build the graph
1157
- // ggml_build_forward_expand(gf, embeddings);
1158
- // ggml_free(ctx0);
1159
- // return gf;
1160
- // }
1161
1115
}
1162
1116
1163
- // ggml_build_forward_expand(gf, embeddings);
1164
- // ggml_free(ctx0);
1165
- // return gf;
1166
-
1167
1117
// post-layernorm
1168
1118
if (ctx->has_post_norm ) {
1169
1119
if (ctx->use_rms_norm ) {
@@ -3161,9 +3111,6 @@ bool clip_image_batch_encode(clip_ctx * ctx, const int n_threads, const clip_ima
3161
3111
}
3162
3112
3163
3113
struct ggml_tensor * positions = ggml_graph_get_tensor (gf, " positions" );
3164
-
3165
- // const int pw = image_size_width / patch_size;
3166
- // const int ph = image_size_height / patch_size;
3167
3114
const int mpow = (merge_ratio * merge_ratio);
3168
3115
int * positions_data = (int *)malloc (ggml_nbytes (positions));
3169
3116
@@ -3176,6 +3123,7 @@ bool clip_image_batch_encode(clip_ctx * ctx, const int n_threads, const clip_ima
3176
3123
for (int dx = 0 ; dx < 2 ; dx++) {
3177
3124
auto remap = idx[ptr / mpow];
3178
3125
remap = remap * mpow + (ptr % mpow);
3126
+ // auto remap = ptr;
3179
3127
3180
3128
positions_data[remap] = y + dy;
3181
3129
positions_data[num_patches + remap] = x + dx;
@@ -3187,7 +3135,7 @@ bool clip_image_batch_encode(clip_ctx * ctx, const int n_threads, const clip_ima
3187
3135
}
3188
3136
}
3189
3137
3190
- if (positions) ggml_backend_tensor_set (positions, positions_data, 0 , ggml_nbytes (positions));
3138
+ ggml_backend_tensor_set (positions, positions_data, 0 , ggml_nbytes (positions));
3191
3139
free (positions_data);
3192
3140
}
3193
3141
else if (ctx->proj_type == PROJECTOR_TYPE_GEMMA3) {
0 commit comments