@@ -85,9 +85,9 @@ package runtime
85
85
import (
86
86
"internal/abi"
87
87
"internal/goarch"
88
- "internal/goexperiment"
89
88
"internal/runtime/atomic"
90
89
"runtime/internal/math"
90
+ "runtime/internal/sys"
91
91
"unsafe"
92
92
)
93
93
@@ -224,14 +224,11 @@ func init() {
224
224
// userArenaChunkReserveBytes returns the amount of additional bytes to reserve for
225
225
// heap metadata.
226
226
func userArenaChunkReserveBytes () uintptr {
227
- if goexperiment .AllocHeaders {
228
- // In the allocation headers experiment, we reserve the end of the chunk for
229
- // a pointer/scalar bitmap. We also reserve space for a dummy _type that
230
- // refers to the bitmap. The PtrBytes field of the dummy _type indicates how
231
- // many of those bits are valid.
232
- return userArenaChunkBytes / goarch .PtrSize / 8 + unsafe .Sizeof (_type {})
233
- }
234
- return 0
227
+ // In the allocation headers experiment, we reserve the end of the chunk for
228
+ // a pointer/scalar bitmap. We also reserve space for a dummy _type that
229
+ // refers to the bitmap. The PtrBytes field of the dummy _type indicates how
230
+ // many of those bits are valid.
231
+ return userArenaChunkBytes / goarch .PtrSize / 8 + unsafe .Sizeof (_type {})
235
232
}
236
233
237
234
type userArena struct {
@@ -549,6 +546,202 @@ func userArenaHeapBitsSetSliceType(typ *_type, n int, ptr unsafe.Pointer, s *msp
549
546
}
550
547
}
551
548
549
+ // userArenaHeapBitsSetType is the equivalent of heapSetType but for
550
+ // non-slice-backing-store Go values allocated in a user arena chunk. It
551
+ // sets up the type metadata for the value with type typ allocated at address ptr.
552
+ // base is the base address of the arena chunk.
553
+ func userArenaHeapBitsSetType (typ * _type , ptr unsafe.Pointer , s * mspan ) {
554
+ base := s .base ()
555
+ h := s .writeUserArenaHeapBits (uintptr (ptr ))
556
+
557
+ p := typ .GCData // start of 1-bit pointer mask (or GC program)
558
+ var gcProgBits uintptr
559
+ if typ .Kind_ & abi .KindGCProg != 0 {
560
+ // Expand gc program, using the object itself for storage.
561
+ gcProgBits = runGCProg (addb (p , 4 ), (* byte )(ptr ))
562
+ p = (* byte )(ptr )
563
+ }
564
+ nb := typ .PtrBytes / goarch .PtrSize
565
+
566
+ for i := uintptr (0 ); i < nb ; i += ptrBits {
567
+ k := nb - i
568
+ if k > ptrBits {
569
+ k = ptrBits
570
+ }
571
+ // N.B. On big endian platforms we byte swap the data that we
572
+ // read from GCData, which is always stored in little-endian order
573
+ // by the compiler. writeUserArenaHeapBits handles data in
574
+ // a platform-ordered way for efficiency, but stores back the
575
+ // data in little endian order, since we expose the bitmap through
576
+ // a dummy type.
577
+ h = h .write (s , readUintptr (addb (p , i / 8 )), k )
578
+ }
579
+ // Note: we call pad here to ensure we emit explicit 0 bits
580
+ // for the pointerless tail of the object. This ensures that
581
+ // there's only a single noMorePtrs mark for the next object
582
+ // to clear. We don't need to do this to clear stale noMorePtrs
583
+ // markers from previous uses because arena chunk pointer bitmaps
584
+ // are always fully cleared when reused.
585
+ h = h .pad (s , typ .Size_ - typ .PtrBytes )
586
+ h .flush (s , uintptr (ptr ), typ .Size_ )
587
+
588
+ if typ .Kind_ & abi .KindGCProg != 0 {
589
+ // Zero out temporary ptrmask buffer inside object.
590
+ memclrNoHeapPointers (ptr , (gcProgBits + 7 )/ 8 )
591
+ }
592
+
593
+ // Update the PtrBytes value in the type information. After this
594
+ // point, the GC will observe the new bitmap.
595
+ s .largeType .PtrBytes = uintptr (ptr ) - base + typ .PtrBytes
596
+
597
+ // Double-check that the bitmap was written out correctly.
598
+ const doubleCheck = false
599
+ if doubleCheck {
600
+ doubleCheckHeapPointersInterior (uintptr (ptr ), uintptr (ptr ), typ .Size_ , typ .Size_ , typ , & s .largeType , s )
601
+ }
602
+ }
603
+
604
+ type writeUserArenaHeapBits struct {
605
+ offset uintptr // offset in span that the low bit of mask represents the pointer state of.
606
+ mask uintptr // some pointer bits starting at the address addr.
607
+ valid uintptr // number of bits in buf that are valid (including low)
608
+ low uintptr // number of low-order bits to not overwrite
609
+ }
610
+
611
+ func (s * mspan ) writeUserArenaHeapBits (addr uintptr ) (h writeUserArenaHeapBits ) {
612
+ offset := addr - s .base ()
613
+
614
+ // We start writing bits maybe in the middle of a heap bitmap word.
615
+ // Remember how many bits into the word we started, so we can be sure
616
+ // not to overwrite the previous bits.
617
+ h .low = offset / goarch .PtrSize % ptrBits
618
+
619
+ // round down to heap word that starts the bitmap word.
620
+ h .offset = offset - h .low * goarch .PtrSize
621
+
622
+ // We don't have any bits yet.
623
+ h .mask = 0
624
+ h .valid = h .low
625
+
626
+ return
627
+ }
628
+
629
+ // write appends the pointerness of the next valid pointer slots
630
+ // using the low valid bits of bits. 1=pointer, 0=scalar.
631
+ func (h writeUserArenaHeapBits ) write (s * mspan , bits , valid uintptr ) writeUserArenaHeapBits {
632
+ if h .valid + valid <= ptrBits {
633
+ // Fast path - just accumulate the bits.
634
+ h .mask |= bits << h .valid
635
+ h .valid += valid
636
+ return h
637
+ }
638
+ // Too many bits to fit in this word. Write the current word
639
+ // out and move on to the next word.
640
+
641
+ data := h .mask | bits << h .valid // mask for this word
642
+ h .mask = bits >> (ptrBits - h .valid ) // leftover for next word
643
+ h .valid += valid - ptrBits // have h.valid+valid bits, writing ptrBits of them
644
+
645
+ // Flush mask to the memory bitmap.
646
+ idx := h .offset / (ptrBits * goarch .PtrSize )
647
+ m := uintptr (1 )<< h .low - 1
648
+ bitmap := s .heapBits ()
649
+ bitmap [idx ] = bswapIfBigEndian (bswapIfBigEndian (bitmap [idx ])& m | data )
650
+ // Note: no synchronization required for this write because
651
+ // the allocator has exclusive access to the page, and the bitmap
652
+ // entries are all for a single page. Also, visibility of these
653
+ // writes is guaranteed by the publication barrier in mallocgc.
654
+
655
+ // Move to next word of bitmap.
656
+ h .offset += ptrBits * goarch .PtrSize
657
+ h .low = 0
658
+ return h
659
+ }
660
+
661
+ // Add padding of size bytes.
662
+ func (h writeUserArenaHeapBits ) pad (s * mspan , size uintptr ) writeUserArenaHeapBits {
663
+ if size == 0 {
664
+ return h
665
+ }
666
+ words := size / goarch .PtrSize
667
+ for words > ptrBits {
668
+ h = h .write (s , 0 , ptrBits )
669
+ words -= ptrBits
670
+ }
671
+ return h .write (s , 0 , words )
672
+ }
673
+
674
+ // Flush the bits that have been written, and add zeros as needed
675
+ // to cover the full object [addr, addr+size).
676
+ func (h writeUserArenaHeapBits ) flush (s * mspan , addr , size uintptr ) {
677
+ offset := addr - s .base ()
678
+
679
+ // zeros counts the number of bits needed to represent the object minus the
680
+ // number of bits we've already written. This is the number of 0 bits
681
+ // that need to be added.
682
+ zeros := (offset + size - h .offset )/ goarch .PtrSize - h .valid
683
+
684
+ // Add zero bits up to the bitmap word boundary
685
+ if zeros > 0 {
686
+ z := ptrBits - h .valid
687
+ if z > zeros {
688
+ z = zeros
689
+ }
690
+ h .valid += z
691
+ zeros -= z
692
+ }
693
+
694
+ // Find word in bitmap that we're going to write.
695
+ bitmap := s .heapBits ()
696
+ idx := h .offset / (ptrBits * goarch .PtrSize )
697
+
698
+ // Write remaining bits.
699
+ if h .valid != h .low {
700
+ m := uintptr (1 )<< h .low - 1 // don't clear existing bits below "low"
701
+ m |= ^ (uintptr (1 )<< h .valid - 1 ) // don't clear existing bits above "valid"
702
+ bitmap [idx ] = bswapIfBigEndian (bswapIfBigEndian (bitmap [idx ])& m | h .mask )
703
+ }
704
+ if zeros == 0 {
705
+ return
706
+ }
707
+
708
+ // Advance to next bitmap word.
709
+ h .offset += ptrBits * goarch .PtrSize
710
+
711
+ // Continue on writing zeros for the rest of the object.
712
+ // For standard use of the ptr bits this is not required, as
713
+ // the bits are read from the beginning of the object. Some uses,
714
+ // like noscan spans, oblets, bulk write barriers, and cgocheck, might
715
+ // start mid-object, so these writes are still required.
716
+ for {
717
+ // Write zero bits.
718
+ idx := h .offset / (ptrBits * goarch .PtrSize )
719
+ if zeros < ptrBits {
720
+ bitmap [idx ] = bswapIfBigEndian (bswapIfBigEndian (bitmap [idx ]) &^ (uintptr (1 )<< zeros - 1 ))
721
+ break
722
+ } else if zeros == ptrBits {
723
+ bitmap [idx ] = 0
724
+ break
725
+ } else {
726
+ bitmap [idx ] = 0
727
+ zeros -= ptrBits
728
+ }
729
+ h .offset += ptrBits * goarch .PtrSize
730
+ }
731
+ }
732
+
733
+ // bswapIfBigEndian swaps the byte order of the uintptr on goarch.BigEndian platforms,
734
+ // and leaves it alone elsewhere.
735
+ func bswapIfBigEndian (x uintptr ) uintptr {
736
+ if goarch .BigEndian {
737
+ if goarch .PtrSize == 8 {
738
+ return uintptr (sys .Bswap64 (uint64 (x )))
739
+ }
740
+ return uintptr (sys .Bswap32 (uint32 (x )))
741
+ }
742
+ return x
743
+ }
744
+
552
745
// newUserArenaChunk allocates a user arena chunk, which maps to a single
553
746
// heap arena and single span. Returns a pointer to the base of the chunk
554
747
// (this is really important: we need to keep the chunk alive) and the span.
@@ -607,9 +800,7 @@ func newUserArenaChunk() (unsafe.Pointer, *mspan) {
607
800
// TODO(mknyszek): Track individual objects.
608
801
rzSize := computeRZlog (span .elemsize )
609
802
span .elemsize -= rzSize
610
- if goexperiment .AllocHeaders {
611
- span .largeType .Size_ = span .elemsize
612
- }
803
+ span .largeType .Size_ = span .elemsize
613
804
rzStart := span .base () + span .elemsize
614
805
span .userArenaChunkFree = makeAddrRange (span .base (), rzStart )
615
806
asanpoison (unsafe .Pointer (rzStart ), span .limit - rzStart )
@@ -924,13 +1115,12 @@ func (h *mheap) allocUserArenaChunk() *mspan {
924
1115
// visible to the background sweeper.
925
1116
h .central [spc ].mcentral .fullSwept (h .sweepgen ).push (s )
926
1117
927
- if goexperiment .AllocHeaders {
928
- // Set up an allocation header. Avoid write barriers here because this type
929
- // is not a real type, and it exists in an invalid location.
930
- * (* uintptr )(unsafe .Pointer (& s .largeType )) = uintptr (unsafe .Pointer (s .limit ))
931
- * (* uintptr )(unsafe .Pointer (& s .largeType .GCData )) = s .limit + unsafe .Sizeof (_type {})
932
- s .largeType .PtrBytes = 0
933
- s .largeType .Size_ = s .elemsize
934
- }
1118
+ // Set up an allocation header. Avoid write barriers here because this type
1119
+ // is not a real type, and it exists in an invalid location.
1120
+ * (* uintptr )(unsafe .Pointer (& s .largeType )) = uintptr (unsafe .Pointer (s .limit ))
1121
+ * (* uintptr )(unsafe .Pointer (& s .largeType .GCData )) = s .limit + unsafe .Sizeof (_type {})
1122
+ s .largeType .PtrBytes = 0
1123
+ s .largeType .Size_ = s .elemsize
1124
+
935
1125
return s
936
1126
}
0 commit comments