Skip to content

Commit 9303bbf

Browse files
author
Chenxiaotao03
committed
delete depthwise_conv_2d and permute_cpy relative code, replace the two by the existed functions, and opt ldp definition, support LLAMA_PERF option for CMake
1 parent ea6cdcc commit 9303bbf

File tree

5 files changed

+46
-246
lines changed

5 files changed

+46
-246
lines changed

CMakeLists.txt

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -107,6 +107,13 @@ option(LLAMA_BUILD_TESTS "llama: build tests" ${LLAMA_STA
107107
option(LLAMA_BUILD_EXAMPLES "llama: build examples" ${LLAMA_STANDALONE})
108108
option(LLAMA_BUILD_SERVER "llama: build server example" ON)
109109

110+
111+
# add perf arguments
112+
option(LLAMA_PERF "llama: enable perf" OFF)
113+
if (LLAMA_PERF)
114+
add_definitions(-DGGML_PERF)
115+
endif()
116+
110117
# Required for relocatable CMake package
111118
include(${CMAKE_CURRENT_SOURCE_DIR}/scripts/build-info.cmake)
112119

android/build_64.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,6 @@ cmake ../../ \
33
-DCMAKE_TOOLCHAIN_FILE=$ANDROID_NDK/build/cmake/android.toolchain.cmake \
44
-DCMAKE_BUILD_TYPE=Release \
55
-DANDROID_ABI="arm64-v8a" \
6-
-DANDROID_PLATFORM=android-23
6+
-DANDROID_PLATFORM=android-23 $1
77

88
make -j4

examples/llava/clip.cpp

Lines changed: 27 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -583,25 +583,24 @@ static ggml_cgraph * clip_image_build_graph(clip_ctx * ctx, const clip_image_f32
583583
mlp_1 = ggml_gelu(ctx0, mlp_1);
584584
struct ggml_tensor * mlp_3 = ggml_mul_mat(ctx0, model.mm_model_mlp_3_w, mlp_1);
585585
mlp_3 = ggml_add(ctx0, mlp_3, model.mm_model_mlp_3_b);
586-
// transpose from [1, 576, 2048] --> [1, 24, 24, 2048] --> [1, 2048, 24, 24]
587-
mlp_3 = ggml_reshape_4d(ctx0, mlp_3, mlp_3->ne[0], n_patch, n_patch, mlp_3->ne[3]);
588-
// permute logic is src idxs 0,1,2,3 perm to dst idxs
589-
mlp_3 = ggml_permute_cpy(ctx0, mlp_3, 2, 0, 1, 3);
590-
// mlp_3 shape = [1, 2048, 24, 24], ne = [24, 24, 2048, 1]
586+
// mlp_3 shape = [1, 576, 2048], ne = [2048, 576, 1, 1]
591587

592588
// block 1
593589
struct ggml_tensor * block_1 = nullptr;
594590
{
591+
// transpose from [1, 576, 2048] --> [1, 2048, 576] --> [1, 2048, 24, 24]
592+
mlp_3 = ggml_cont(ctx0, ggml_permute(ctx0, mlp_3, 1, 0, 2, 3));
593+
mlp_3 = ggml_reshape_4d(ctx0, mlp_3, n_patch, n_patch, mlp_3->ne[1], mlp_3->ne[2]);
595594
// stride = 1, padding = 1, bias is nullptr
596595
block_1 = ggml_conv_depthwise_2d(ctx0, model.mm_model_block_1_block_0_0_w, mlp_3, nullptr, 1, 1, 1, 1, 1, 1);
597596

598597
// layer norm
599598
// // block_1 shape = [1, 2048, 24, 24], ne = [24, 24, 2048, 1]
600-
block_1 = ggml_permute_cpy(ctx0, block_1, 1, 2, 0, 3);
599+
block_1 = ggml_cont(ctx0, ggml_permute(ctx0, block_1, 1, 2, 0, 3));
601600
// block_1 shape = [1, 24, 24, 2048], ne = [2048, 24, 24, 1]
602601
block_1 = ggml_norm(ctx0, block_1, eps);
603602
block_1 = ggml_add(ctx0, ggml_mul(ctx0, block_1, model.mm_model_block_1_block_0_1_w), model.mm_model_block_1_block_0_1_b);
604-
block_1 = ggml_permute_cpy(ctx0, block_1, 2, 0, 1, 3);
603+
block_1 = ggml_cont(ctx0, ggml_permute(ctx0, block_1, 2, 0, 1, 3));
605604

606605
// block_1 shape = [1, 2048, 24, 24], ne = [24, 24, 2048, 1]
607606
// hardswish
@@ -621,17 +620,18 @@ static ggml_cgraph * clip_image_build_graph(clip_ctx * ctx, const clip_image_f32
621620
block_1 = ggml_reshape_4d(ctx0, block_1, 1, 1, block_1->ne[0], block_1->ne[1]);
622621
block_1 = ggml_mul(ctx0, block_1_hw, block_1);
623622

624-
// block_1 shape = [1, 2048, 24, 24], ne = [24, 24, 2048, 1]
625-
struct ggml_tensor* block_2_0_w_4d = ggml_reshape_4d(ctx0, model.mm_model_block_1_block_2_0_w, 1, 1,
626-
model.mm_model_block_1_block_2_0_w->ne[0], model.mm_model_block_1_block_2_0_w->ne[1]);
627-
block_1 = ggml_conv_2d(ctx0, block_2_0_w_4d, block_1, 1, 1, 0, 0, 1, 1);
623+
int w = block_1->ne[0], h = block_1->ne[1];
624+
block_1 = ggml_reshape_3d(ctx0, block_1, w*h, block_1->ne[2], block_1->ne[3]);
625+
block_1 = ggml_cont(ctx0, ggml_permute(ctx0, block_1, 1, 0, 2, 3));
626+
627+
// block_1 shape = [1, 24*24, 2048], ne = [24*24, 2048, 1]
628+
block_1 = ggml_mul_mat(ctx0, model.mm_model_block_1_block_2_0_w, block_1);
629+
block_1 = ggml_reshape_4d(ctx0, block_1, block_1->ne[0], w, h, block_1->ne[3]);
628630

629-
// layernorm
630-
block_1 = ggml_permute_cpy(ctx0, block_1, 1, 2, 0, 3);
631631
// block_1 shape = [1, 24, 24, 2048], ne = [2048, 24, 24, 1]
632632
block_1 = ggml_norm(ctx0, block_1, eps);
633633
block_1 = ggml_add(ctx0, ggml_mul(ctx0, block_1, model.mm_model_block_1_block_2_1_w), model.mm_model_block_1_block_2_1_b);
634-
block_1 = ggml_permute_cpy(ctx0, block_1, 2, 0, 1, 3);
634+
block_1 = ggml_cont(ctx0, ggml_permute(ctx0, block_1, 2, 0, 1, 3));
635635
// block1 shape = [1, 2048, 24, 24], ne = [24, 24, 2048, 1]
636636
// residual
637637
block_1 = ggml_add(ctx0, mlp_3, block_1);
@@ -644,11 +644,11 @@ static ggml_cgraph * clip_image_build_graph(clip_ctx * ctx, const clip_image_f32
644644

645645
// block_1 shape = [1, 2048, 12, 12], ne = [12, 12, 2048, 1]
646646
// layer norm
647-
block_1 = ggml_permute_cpy(ctx0, block_1, 1, 2, 0, 3);
647+
block_1 = ggml_cont(ctx0, ggml_permute(ctx0, block_1, 1, 2, 0, 3));
648648
// block_1 shape = [1, 12, 12, 2048], ne = [2048, 12, 12, 1]
649649
block_1 = ggml_norm(ctx0, block_1, eps);
650650
block_1 = ggml_add(ctx0, ggml_mul(ctx0, block_1, model.mm_model_block_2_block_0_1_w), model.mm_model_block_2_block_0_1_b);
651-
block_1 = ggml_permute_cpy(ctx0, block_1, 2, 0, 1, 3);
651+
block_1 = ggml_cont(ctx0, ggml_permute(ctx0, block_1, 2, 0, 1, 3));
652652
// block_1 shape = [1, 2048, 12, 12], ne = [12, 12, 2048, 1]
653653
// hardswish
654654
struct ggml_tensor * block_1_hw = ggml_hardswish(ctx0, block_1);
@@ -664,22 +664,25 @@ static ggml_cgraph * clip_image_build_graph(clip_ctx * ctx, const clip_image_f32
664664
block_1 = ggml_mul_mat(ctx0, model.mm_model_block_2_block_1_fc2_w, block_1);
665665
block_1 = ggml_add(ctx0, block_1, model.mm_model_block_2_block_1_fc2_b);
666666
block_1 = ggml_hardsigmoid(ctx0, block_1);
667-
667+
668668
// block_1_hw shape = [1, 2048, 12, 12], ne = [12, 12, 2048, 1], block_1 shape = [1, 2048, 1, 1], ne = [1, 1, 2048, 1]
669669
block_1 = ggml_reshape_4d(ctx0, block_1, 1, 1, block_1->ne[0], block_1->ne[1]);
670670
block_1 = ggml_mul(ctx0, block_1_hw, block_1);
671-
// block_1 shape = [1, 2048, 12, 12], ne = [12, 12, 2048, 1]
672-
struct ggml_tensor* block_2_0_w_4d = ggml_reshape_4d(ctx0, model.mm_model_block_2_block_2_0_w, 1, 1,
673-
model.mm_model_block_2_block_2_0_w->ne[0], model.mm_model_block_1_block_2_0_w->ne[1]);
674-
block_1 = ggml_conv_2d(ctx0, block_2_0_w_4d, block_1, 1, 1, 0, 0, 1, 1);
675-
// layernorm
676-
block_1 = ggml_permute_cpy(ctx0, block_1, 1, 2, 0, 3);
671+
672+
int w = block_1->ne[0], h = block_1->ne[1];
673+
block_1 = ggml_reshape_3d(ctx0, block_1, w*h, block_1->ne[2], block_1->ne[3]);
674+
block_1 = ggml_cont(ctx0, ggml_permute(ctx0, block_1, 1, 0, 2, 3));
675+
// block_1 shape = [1, 24*24, 2048], ne = [24*24, 2048, 1]
676+
block_1 = ggml_mul_mat(ctx0, model.mm_model_block_2_block_2_0_w, block_1);
677+
block_1 = ggml_reshape_4d(ctx0, block_1, block_1->ne[0], w, h, block_1->ne[3]);
678+
679+
677680
// block_1 shape = [1, 12, 12, 2048], ne = [2048, 12, 12, 1]
678681
block_1 = ggml_norm(ctx0, block_1, eps);
679682
block_1 = ggml_add(ctx0, ggml_mul(ctx0, block_1, model.mm_model_block_2_block_2_1_w), model.mm_model_block_2_block_2_1_b);
680683
block_1 = ggml_reshape_3d(ctx0, block_1, block_1->ne[0], block_1->ne[1] * block_1->ne[2], block_1->ne[3]);
681684
// block_1 shape = [1, 144, 2048], ne = [2048, 144, 1]
682-
}
685+
}
683686
embeddings = block_1;
684687
}
685688
else {

0 commit comments

Comments
 (0)