Skip to content

Commit 6569cd9

Browse files
committed
cleaning up
1 parent 432938d commit 6569cd9

File tree

3 files changed

+8
-60
lines changed

3 files changed

+8
-60
lines changed

examples/llava/clip.cpp

Lines changed: 2 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -877,10 +877,6 @@ static ggml_cgraph * clip_image_build_graph_legacy(clip_ctx * ctx, const clip_im
877877
auto inp_1 = ggml_conv_2d(ctx0, model.patch_embeddings_1, inp_raw, patch_size, patch_size, 0, 0, 1, 1);
878878
inp = ggml_add(ctx0, inp, inp_1);
879879

880-
// ggml_build_forward_expand(gf, inp);
881-
// ggml_free(ctx0);
882-
// return gf;
883-
884880
inp = ggml_cont(ctx0, ggml_permute(ctx0, inp, 1, 2, 0, 3)); // [w, h, c, b] -> [c, w, h, b]
885881
inp = ggml_reshape_4d(
886882
ctx0, inp,
@@ -892,10 +888,6 @@ static ggml_cgraph * clip_image_build_graph_legacy(clip_ctx * ctx, const clip_im
892888
inp = ggml_reshape_3d(
893889
ctx0, inp,
894890
hidden_size, patches_w * patches_h, batch_size);
895-
896-
// ggml_build_forward_expand(gf, inp);
897-
// ggml_free(ctx0);
898-
// return gf;
899891
}
900892
else {
901893
inp = ggml_reshape_3d(ctx0, inp, num_patches, hidden_size, batch_size);
@@ -984,18 +976,6 @@ static ggml_cgraph * clip_image_build_graph_legacy(clip_ctx * ctx, const clip_im
984976
embeddings = ggml_reshape_2d(ctx0, embeddings, hidden_size * 4, patches_w * patches_h * batch_size / 4);
985977
embeddings = ggml_get_rows(ctx0, embeddings, inv_window_idx);
986978
embeddings = ggml_reshape_3d(ctx0, embeddings, hidden_size, patches_w * patches_h, batch_size);
987-
988-
// positions = ggml_reshape_2d(ctx0, positions, num_position_ids / 4, 4);
989-
// positions = ggml_cont(ctx0, ggml_permute(ctx0, positions, 1, 0, 2, 3));
990-
// positions = ggml_reshape_2d(ctx0, positions, 16, num_position_ids / 16);
991-
// positions = ggml_get_rows(ctx0, positions, inv_window_idx);
992-
// positions = ggml_reshape_2d(ctx0, positions, 4, num_position_ids / 4);
993-
// positions = ggml_cont(ctx0, ggml_permute(ctx0, positions, 1, 0, 2, 3));
994-
// positions = ggml_reshape_1d(ctx0, positions, num_position_ids);
995-
996-
// ggml_build_forward_expand(gf, embeddings);
997-
// ggml_free(ctx0);
998-
// return gf;
999979
}
1000980

1001981
for (int il = 0; il < ctx->max_feature_layer; il++) {
@@ -1019,12 +999,6 @@ static ggml_cgraph * clip_image_build_graph_legacy(clip_ctx * ctx, const clip_im
1019999
cur = ggml_add(ctx0, ggml_mul(ctx0, cur, model.layers[il].ln_1_w),
10201000
model.layers[il].ln_1_b);
10211001
}
1022-
// if ( il == 0) {
1023-
// // build the graph
1024-
// ggml_build_forward_expand(gf, cur);
1025-
// ggml_free(ctx0);
1026-
// return gf;
1027-
// }
10281002

10291003
// self-attention
10301004
{
@@ -1068,17 +1042,10 @@ static ggml_cgraph * clip_image_build_graph_legacy(clip_ctx * ctx, const clip_im
10681042
KQ = ggml_soft_max_inplace(ctx0, KQ);
10691043
} else {
10701044
KQ = ggml_soft_max_ext(ctx0, KQ, window_mask, 1.0f, 0.0f);
1071-
10721045
// KQ = ggml_scale_inplace(ctx0, KQ, 1.0f / sqrt((float)d_head));
10731046
// KQ = ggml_add(ctx0, KQ, window_mask);
10741047
// KQ = ggml_soft_max_inplace(ctx0, KQ);
10751048
}
1076-
// if ( il == 0) {
1077-
// // build the graph
1078-
// ggml_build_forward_expand(gf, KQ);
1079-
// ggml_free(ctx0);
1080-
// return gf;
1081-
// }
10821049

10831050
struct ggml_tensor * KQV = ggml_mul_mat(ctx0, V, KQ);
10841051
KQV = ggml_reshape_4d(ctx0, KQV, d_head, num_positions, n_head, batch_size);
@@ -1094,12 +1061,6 @@ static ggml_cgraph * clip_image_build_graph_legacy(clip_ctx * ctx, const clip_im
10941061
cur = ggml_add(ctx0, cur, embeddings);
10951062

10961063
embeddings = cur; // embeddings = residual, cur = hidden_states
1097-
// if ( il == 0) {
1098-
// // build the graph
1099-
// ggml_build_forward_expand(gf, cur);
1100-
// ggml_free(ctx0);
1101-
// return gf;
1102-
// }
11031064

11041065
// layernorm2
11051066
if (ctx->use_rms_norm) {
@@ -1151,19 +1112,8 @@ static ggml_cgraph * clip_image_build_graph_legacy(clip_ctx * ctx, const clip_im
11511112
cur = ggml_add(ctx0, embeddings, cur);
11521113

11531114
embeddings = cur;
1154-
1155-
// if ( il == 0) {
1156-
// // build the graph
1157-
// ggml_build_forward_expand(gf, embeddings);
1158-
// ggml_free(ctx0);
1159-
// return gf;
1160-
// }
11611115
}
11621116

1163-
// ggml_build_forward_expand(gf, embeddings);
1164-
// ggml_free(ctx0);
1165-
// return gf;
1166-
11671117
// post-layernorm
11681118
if (ctx->has_post_norm) {
11691119
if (ctx->use_rms_norm) {
@@ -3161,9 +3111,6 @@ bool clip_image_batch_encode(clip_ctx * ctx, const int n_threads, const clip_ima
31613111
}
31623112

31633113
struct ggml_tensor * positions = ggml_graph_get_tensor(gf, "positions");
3164-
3165-
// const int pw = image_size_width / patch_size;
3166-
// const int ph = image_size_height / patch_size;
31673114
const int mpow = (merge_ratio * merge_ratio);
31683115
int* positions_data = (int*)malloc(ggml_nbytes(positions));
31693116

@@ -3176,6 +3123,7 @@ bool clip_image_batch_encode(clip_ctx * ctx, const int n_threads, const clip_ima
31763123
for (int dx = 0; dx < 2; dx++) {
31773124
auto remap = idx[ptr / mpow];
31783125
remap = remap * mpow + (ptr % mpow);
3126+
// auto remap = ptr;
31793127

31803128
positions_data[remap] = y + dy;
31813129
positions_data[num_patches + remap] = x + dx;
@@ -3187,7 +3135,7 @@ bool clip_image_batch_encode(clip_ctx * ctx, const int n_threads, const clip_ima
31873135
}
31883136
}
31893137

3190-
if (positions) ggml_backend_tensor_set(positions, positions_data, 0, ggml_nbytes(positions));
3138+
ggml_backend_tensor_set(positions, positions_data, 0, ggml_nbytes(positions));
31913139
free(positions_data);
31923140
}
31933141
else if (ctx->proj_type == PROJECTOR_TYPE_GEMMA3) {

examples/llava/qwen2_vl_surgery.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -102,7 +102,7 @@ def main(args):
102102
np_dtype = np.float32
103103
ftype = 0
104104
elif args.data_type == 'fp16':
105-
dtype = torch.float32
105+
dtype = torch.float16
106106
np_dtype = np.float16
107107
ftype = 1
108108
else:

examples/llava/qwen2vl-cli.cpp

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -771,10 +771,10 @@ enum model_output_type {
771771
};
772772

773773
static void debug_dump_img_embed(struct llava_context * ctx_llava, model_output_type output_type) {
774-
int ih = 140;
775-
int iw = 196;
776-
// int ih = 56;
777-
// int iw = 56;
774+
constexpr int ih = 140;
775+
constexpr int iw = 196;
776+
// constexpr int ih = 56;
777+
// constexpr int iw = 56;
778778
// int n_embd = llama_model_n_embd(llama_get_model(ctx_llava->ctx_llama));
779779
int n_embd = 1280;
780780
int merge = 1;
@@ -954,7 +954,7 @@ int main(int argc, char ** argv) {
954954

955955
// debug_test_mrope_2d();
956956
debug_dump_img_embed(ctx_llava, model_output_type::final_layer);
957-
// debug_dump_img_embed(ctx_llava, model_output_type::conv3d);
957+
// debug_dump_img_embed(ctx_llava, model_output_type::last_attn_layer);
958958
// debug_test_get_rows();
959959
// dump_win_attn_mask();
960960
// debug_patch_layout();

0 commit comments

Comments
 (0)