@@ -567,76 +567,6 @@ static void debug_dump_img_embed(struct llava_context * ctx_llava, model_output_
567
567
}
568
568
}
569
569
570
-
571
- static void dump_win_attn_mask () {
572
- const int image_size_width = 196 ;
573
- const int image_size_height = 140 ;
574
- const int patch_size = 14 ;
575
- const int attn_window_size = 112 ;
576
-
577
- const int merge_ratio = 2 ;
578
- const int ipw = image_size_width / patch_size;
579
- const int iph = image_size_height / patch_size;
580
- const int pw = image_size_width / patch_size / merge_ratio;
581
- const int ph = image_size_height / patch_size / merge_ratio;
582
- const int grid_window = attn_window_size / patch_size / merge_ratio;
583
- /*
584
- pw * ph = number of tokens output by ViT after apply patch merger
585
- ipw * ipw = number of vision token been processed inside ViT
586
- */
587
-
588
- std::vector<int > idx (ph * pw);
589
- std::vector<int > inv_idx (ph * pw);
590
- int dst = 0 ;
591
- // [num_vision_tokens, num_vision_tokens] attention mask tensor
592
- int ne = pow (ipw * iph, 2 );
593
- std::vector<float > mask (ne, std::numeric_limits<float >::lowest ());
594
- int mask_row = 0 ;
595
-
596
- for (int y = 0 ; y < ph; y+=grid_window)
597
- {
598
- for (int x = 0 ; x < pw; x+=grid_window)
599
- {
600
- const int win_h = std::min (grid_window, ph - y);
601
- const int win_w = std::min (grid_window, pw - x);
602
- const int dst_0 = dst;
603
- // group all tokens belong to the same window togather (to a continue range)
604
- for (int dy = 0 ; dy < win_h; dy++) {
605
- for (int dx = 0 ; dx < win_w; dx++) {
606
- const int src = (y + dy) * pw + (x + dx);
607
- assert (src < (int )idx.size ());
608
- assert (dst < (int )inv_idx.size ());
609
- idx[src] = dst;
610
- inv_idx[dst] = src;
611
- dst++;
612
- }
613
- }
614
-
615
- for (int r=0 ; r < win_h * win_w * merge_ratio * merge_ratio; r++) {
616
- int row_offset = mask_row * (ipw * iph);
617
- std::fill (
618
- mask.begin () + row_offset + (dst_0 * merge_ratio * merge_ratio),
619
- mask.begin () + row_offset + (dst * merge_ratio * merge_ratio),
620
- 0.0 );
621
- mask_row++;
622
- }
623
- }
624
- }
625
-
626
- auto output_path = " win_attn_mask_fp32.bin" ;
627
-
628
- std::ofstream outFile (output_path, std::ios::binary);
629
- if (outFile.is_open ()) {
630
- outFile.write (reinterpret_cast <const char *>(mask.data ()), ne * sizeof (float ));
631
-
632
- outFile.close ();
633
- std::cout << " Data successfully written to " << output_path << std::endl;
634
- } else {
635
- std::cerr << " Error opening file!" << std::endl;
636
- }
637
- }
638
-
639
-
640
570
#endif
641
571
642
572
0 commit comments