@@ -289,12 +289,48 @@ func (o options) generateBody(name string, executeSingleTriple func(ctx *execute
289
289
290
290
// Update states, max tablelog 28
291
291
{
292
- Comment ("Update Literal Length State" )
293
- o .updateState (name + "_llState" , llState , brValue , brBitsRead , "llTable" )
294
- Comment ("Update Match Length State" )
295
- o .updateState (name + "_mlState" , mlState , brValue , brBitsRead , "mlTable" )
296
- Comment ("Update Offset State" )
297
- o .updateState (name + "_ofState" , ofState , brValue , brBitsRead , "ofTable" )
292
+ if o .bmi2 {
293
+ // Get total number of bits (it is safe, as nBits is <= 9, thus 3*9 < 255)
294
+ total := GP64 ()
295
+ LEAQ (Mem {Base : llState , Index : mlState , Scale : 1 }, total )
296
+ ADDQ (ofState , total )
297
+ MOVBQZX (total .As8 (), total ) // total = llState.As8() + mlState.As8() + ofState.As8()
298
+
299
+ // Read `total` bits
300
+ bits := o .getBitsValue (name + "_getBits" , total , brValue , brBitsRead )
301
+
302
+ // Update states
303
+ Comment ("Update Offset State" )
304
+ {
305
+ nBits := ofState // Note: SHRXQ uses lower 6 bits of shift amount and BZHIQ lower 8 bits of count
306
+ lowBits := GP64 ()
307
+ BZHIQ (nBits , bits , lowBits ) // lowBits = bits & ((1 << nBits) - 1))
308
+ SHRXQ (nBits , bits , bits ) // bits >>= nBits
309
+ o .nextState (name + "_ofState" , ofState , lowBits , "ofTable" )
310
+ }
311
+ Comment ("Update Match Length State" )
312
+ {
313
+ nBits := mlState
314
+ lowBits := GP64 ()
315
+ BZHIQ (nBits , bits , lowBits ) // lowBits = bits & ((1 << nBits) - 1))
316
+ SHRXQ (nBits , bits , bits ) // lowBits >>= nBits
317
+ o .nextState (name + "_mlState" , mlState , lowBits , "mlTable" )
318
+ }
319
+ Comment ("Update Literal Length State" )
320
+ {
321
+ nBits := llState
322
+ lowBits := GP64 ()
323
+ BZHIQ (nBits , bits , lowBits ) // lowBits = bits & ((1 << nBits) - 1))
324
+ o .nextState (name + "_llState" , llState , lowBits , "llTable" )
325
+ }
326
+ } else {
327
+ Comment ("Update Literal Length State" )
328
+ o .updateState (name + "_llState" , llState , brValue , brBitsRead , "llTable" )
329
+ Comment ("Update Match Length State" )
330
+ o .updateState (name + "_mlState" , mlState , brValue , brBitsRead , "mlTable" )
331
+ Comment ("Update Offset State" )
332
+ o .updateState (name + "_ofState" , ofState , brValue , brBitsRead , "ofTable" )
333
+ }
298
334
}
299
335
Label (name + "_skip_update" )
300
336
@@ -624,6 +660,39 @@ func (o options) updateState(name string, state, brValue, brBitsRead reg.GPVirtu
624
660
MOVQ (Mem {Base : tablePtr , Index : DX , Scale : 8 }, state )
625
661
}
626
662
663
+ func (o options ) nextState (name string , state , lowBits reg.GPVirtual , table string ) {
664
+ DX := GP64 ()
665
+ if o .bmi2 {
666
+ tmp := GP64 ()
667
+ MOVQ (U32 (16 | (16 << 8 )), tmp )
668
+ BEXTRQ (tmp , state , DX )
669
+ } else {
670
+ MOVQ (state , DX )
671
+ SHRQ (U8 (16 ), DX )
672
+ MOVWQZX (DX .As16 (), DX )
673
+ }
674
+
675
+ ADDQ (lowBits , DX )
676
+
677
+ // Load table pointer
678
+ tablePtr := GP64 ()
679
+ Comment ("Load ctx." + table )
680
+ ctx := Dereference (Param ("ctx" ))
681
+ tableA , err := ctx .Field (table ).Base ().Resolve ()
682
+ if err != nil {
683
+ panic (err )
684
+ }
685
+ MOVQ (tableA .Addr , tablePtr )
686
+
687
+ // Check if below tablelog
688
+ assert (func (ok LabelRef ) {
689
+ CMPQ (DX , U32 (512 ))
690
+ JB (ok )
691
+ })
692
+ // Load new state
693
+ MOVQ (Mem {Base : tablePtr , Index : DX , Scale : 8 }, state )
694
+ }
695
+
627
696
// getBits will return nbits bits from brValue.
628
697
// If nbits == 0 it *may* jump to jmpZero, otherwise 0 is returned.
629
698
func (o options ) getBits (name string , nBits , brValue , brBitsRead reg.GPVirtual , jmpZero LabelRef ) reg.GPVirtual {
@@ -649,6 +718,33 @@ func (o options) getBits(name string, nBits, brValue, brBitsRead reg.GPVirtual,
649
718
return BX
650
719
}
651
720
721
+ // getBits will return nbits bits from brValue.
722
+ // If nbits == 0 then 0 is returned.
723
+ func (o options ) getBitsValue (name string , nBits , brValue , brBitsRead reg.GPVirtual ) reg.GPVirtual {
724
+ BX := GP64 ()
725
+ CX := reg .CL
726
+ if o .bmi2 {
727
+ LEAQ (Mem {Base : brBitsRead , Index : nBits , Scale : 1 }, CX .As64 ())
728
+ MOVQ (brValue , BX )
729
+ MOVQ (CX .As64 (), brBitsRead )
730
+ ROLQ (CX , BX )
731
+ BZHIQ (nBits , BX , BX )
732
+ } else {
733
+ XORQ (BX , BX )
734
+ CMPQ (nBits , U8 (0 ))
735
+ JZ (LabelRef (name + "_get_bits_value_zero" ))
736
+ MOVQ (brBitsRead , CX .As64 ())
737
+ ADDQ (nBits , brBitsRead )
738
+ MOVQ (brValue , BX )
739
+ SHLQ (CX , BX )
740
+ MOVQ (nBits , CX .As64 ())
741
+ NEGQ (CX .As64 ())
742
+ SHRQ (CX , BX )
743
+ Label (name + "_get_bits_value_zero" )
744
+ }
745
+ return BX
746
+ }
747
+
652
748
func (o options ) adjustOffset (name string , moP , llP Mem , offsetB reg.GPVirtual , offsets * [3 ]reg.GPVirtual ) (offset reg.GPVirtual ) {
653
749
offset = GP64 ()
654
750
MOVQ (moP , offset )
0 commit comments