@@ -362,6 +362,105 @@ static bool tryAndOfLogicalImmediates(uint64_t UImm,
362
362
return false ;
363
363
}
364
364
365
+ // Check whether the constant can be represented by exclusive-or of two 64-bit
366
+ // logical immediates. If so, materialize it with an ORR instruction followed
367
+ // by an EOR instruction.
368
+ //
369
+ // This encoding allows all remaining repeated byte patterns, and many repeated
370
+ // 16-bit values, to be encoded without needing four instructions. It can also
371
+ // represent some irregular bitmasks (although those would mostly only need
372
+ // three instructions otherwise).
373
+ static bool tryEorOfLogicalImmediates (uint64_t Imm,
374
+ SmallVectorImpl<ImmInsnModel> &Insn) {
375
+ // Determine the larger repetition size of the two possible logical
376
+ // immediates, by finding the repetition size of Imm.
377
+ unsigned BigSize = 64 ;
378
+
379
+ do {
380
+ BigSize /= 2 ;
381
+ uint64_t Mask = (1ULL << BigSize) - 1 ;
382
+
383
+ if ((Imm & Mask) != ((Imm >> BigSize) & Mask)) {
384
+ BigSize *= 2 ;
385
+ break ;
386
+ }
387
+ } while (BigSize > 2 );
388
+
389
+ uint64_t BigMask = ((uint64_t )-1LL ) >> (64 - BigSize);
390
+
391
+ // Find the last bit of each run of ones, circularly. For runs which wrap
392
+ // around from bit 0 to bit 63, this is the bit before the most-significant
393
+ // zero, otherwise it is the least-significant bit in the run of ones.
394
+ uint64_t RunStarts = Imm & ~rotl<uint64_t >(Imm, 1 );
395
+
396
+ // Find the smaller repetition size of the two possible logical immediates by
397
+ // counting the number of runs of one-bits within the BigSize-bit value. Both
398
+ // sizes may be the same. The EOR may add one or subtract one from the
399
+ // power-of-two count that can be represented by a logical immediate, or it
400
+ // may be left unchanged.
401
+ int RunsPerBigChunk = popcount (RunStarts & BigMask);
402
+
403
+ static const int8_t BigToSmallSizeTable[32 ] = {
404
+ -1 , -1 , 0 , 1 , 2 , 2 , -1 , 3 , 3 , 3 , -1 , -1 , -1 , -1 , -1 , 4 ,
405
+ 4 , 4 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , 5 ,
406
+ };
407
+
408
+ int BigToSmallShift = BigToSmallSizeTable[RunsPerBigChunk];
409
+
410
+ // Early-exit if the big chunk couldn't be a power-of-two number of runs
411
+ // EORed with another single run.
412
+ if (BigToSmallShift == -1 )
413
+ return false ;
414
+
415
+ unsigned SmallSize = BigSize >> BigToSmallShift;
416
+
417
+ // 64-bit values with a bit set every (1 << index) bits.
418
+ static const uint64_t RepeatedOnesTable[] = {
419
+ 0xffffffffffffffff , 0x5555555555555555 , 0x1111111111111111 ,
420
+ 0x0101010101010101 , 0x0001000100010001 , 0x0000000100000001 ,
421
+ 0x0000000000000001 ,
422
+ };
423
+
424
+ // This RepeatedOnesTable lookup is a faster implementation of the division
425
+ // 0xffffffffffffffff / ((1 << SmallSize) - 1), and can be thought of as
426
+ // dividing the 64-bit value into fields of width SmallSize, and placing a
427
+ // one in the least significant bit of each field.
428
+ uint64_t SmallOnes = RepeatedOnesTable[countr_zero (SmallSize)];
429
+
430
+ // Now we try to find the number of ones in each of the smaller repetitions,
431
+ // by looking at runs of ones in Imm. This can take three attempts, as the
432
+ // EOR may have changed the length of the first two runs we find.
433
+
434
+ // Rotate a run of ones so we can count the number of trailing set bits.
435
+ int Rotation = countr_zero (RunStarts);
436
+ uint64_t RotatedImm = rotr<uint64_t >(Imm, Rotation);
437
+ for (int Attempt = 0 ; Attempt < 3 ; ++Attempt) {
438
+ unsigned RunLength = countr_one (RotatedImm);
439
+
440
+ // Construct candidate values BigImm and SmallImm, such that if these two
441
+ // values are encodable, we have a solution. (SmallImm is constructed to be
442
+ // encodable, but this isn't guaranteed when RunLength >= SmallSize)
443
+ uint64_t SmallImm =
444
+ rotl<uint64_t >((SmallOnes << RunLength) - SmallOnes, Rotation);
445
+ uint64_t BigImm = Imm ^ SmallImm;
446
+
447
+ uint64_t BigEncoding = 0 ;
448
+ uint64_t SmallEncoding = 0 ;
449
+ if (AArch64_AM::processLogicalImmediate (BigImm, 64 , BigEncoding) &&
450
+ AArch64_AM::processLogicalImmediate (SmallImm, 64 , SmallEncoding)) {
451
+ Insn.push_back ({AArch64::ORRXri, 0 , SmallEncoding});
452
+ Insn.push_back ({AArch64::EORXri, 1 , BigEncoding});
453
+ return true ;
454
+ }
455
+
456
+ // Rotate to the next run of ones
457
+ Rotation += countr_zero (rotr<uint64_t >(RunStarts, Rotation) & ~1 );
458
+ RotatedImm = rotr<uint64_t >(Imm, Rotation);
459
+ }
460
+
461
+ return false ;
462
+ }
463
+
365
464
// / \brief Expand a MOVi32imm or MOVi64imm pseudo instruction to a
366
465
// / MOVZ or MOVN of width BitSize followed by up to 3 MOVK instructions.
367
466
static inline void expandMOVImmSimple (uint64_t Imm, unsigned BitSize,
@@ -503,6 +602,10 @@ void AArch64_IMM::expandMOVImm(uint64_t Imm, unsigned BitSize,
503
602
if (tryAndOfLogicalImmediates (Imm, Insn))
504
603
return ;
505
604
605
+ // Attempt to use a sequence of ORR-immediate followed by EOR-immediate.
606
+ if (tryEorOfLogicalImmediates (UImm, Insn))
607
+ return ;
608
+
506
609
// FIXME: Add more two-instruction sequences.
507
610
508
611
// Three instruction sequences.
0 commit comments