@@ -27,7 +27,7 @@ use rustc_session::config::{
27
27
} ;
28
28
use rustc_span:: { BytePos , InnerSpan , Pos , SpanData , SyntaxContext , sym} ;
29
29
use rustc_target:: spec:: { CodeModel , FloatAbi , RelocModel , SanitizerSet , SplitDebuginfo , TlsModel } ;
30
- use tracing:: debug;
30
+ use tracing:: { debug, trace } ;
31
31
32
32
use crate :: back:: lto:: ThinBuffer ;
33
33
use crate :: back:: owned_target_machine:: OwnedTargetMachine ;
@@ -537,9 +537,35 @@ pub(crate) unsafe fn llvm_optimize(
537
537
config : & ModuleConfig ,
538
538
opt_level : config:: OptLevel ,
539
539
opt_stage : llvm:: OptStage ,
540
+ skip_size_increasing_opts : bool ,
540
541
) -> Result < ( ) , FatalError > {
541
- let unroll_loops =
542
- opt_level != config:: OptLevel :: Size && opt_level != config:: OptLevel :: SizeMin ;
542
+ // Enzyme:
543
+ // The whole point of compiler based AD is to differentiate optimized IR instead of unoptimized
544
+ // source code. However, benchmarks show that optimizations increasing the code size
545
+ // tend to reduce AD performance. Therefore deactivate them before AD, then differentiate the code
546
+ // and finally re-optimize the module, now with all optimizations available.
547
+ // FIXME(ZuseZ4): In a future update we could figure out how to only optimize individual functions getting
548
+ // differentiated.
549
+
550
+ let unroll_loops;
551
+ let vectorize_slp;
552
+ let vectorize_loop;
553
+
554
+ // When we build rustc with enzyme/autodiff support, we want to postpone size-increasing
555
+ // optimizations until after differentiation. FIXME(ZuseZ4): Before shipping on nightly,
556
+ // we should make this more granular, or at least check that the user has at least one autodiff
557
+ // call in their code, to justify altering the compilation pipeline.
558
+ if skip_size_increasing_opts && cfg ! ( llvm_enzyme) {
559
+ unroll_loops = false ;
560
+ vectorize_slp = false ;
561
+ vectorize_loop = false ;
562
+ } else {
563
+ unroll_loops =
564
+ opt_level != config:: OptLevel :: Size && opt_level != config:: OptLevel :: SizeMin ;
565
+ vectorize_slp = config. vectorize_slp ;
566
+ vectorize_loop = config. vectorize_loop ;
567
+ }
568
+ trace ! ( ?unroll_loops, ?vectorize_slp, ?vectorize_loop) ;
543
569
let using_thin_buffers = opt_stage == llvm:: OptStage :: PreLinkThinLTO || config. bitcode_needed ( ) ;
544
570
let pgo_gen_path = get_pgo_gen_path ( config) ;
545
571
let pgo_use_path = get_pgo_use_path ( config) ;
@@ -603,8 +629,8 @@ pub(crate) unsafe fn llvm_optimize(
603
629
using_thin_buffers,
604
630
config. merge_functions ,
605
631
unroll_loops,
606
- config . vectorize_slp ,
607
- config . vectorize_loop ,
632
+ vectorize_slp,
633
+ vectorize_loop,
608
634
config. no_builtins ,
609
635
config. emit_lifetime_markers ,
610
636
sanitizer_options. as_ref ( ) ,
@@ -648,14 +674,29 @@ pub(crate) unsafe fn optimize(
648
674
unsafe { llvm:: LLVMWriteBitcodeToFile ( llmod, out. as_ptr ( ) ) } ;
649
675
}
650
676
677
+ // FIXME(ZuseZ4): support SanitizeHWAddress and prevent illegal/unsupported opts
678
+
651
679
if let Some ( opt_level) = config. opt_level {
652
680
let opt_stage = match cgcx. lto {
653
681
Lto :: Fat => llvm:: OptStage :: PreLinkFatLTO ,
654
682
Lto :: Thin | Lto :: ThinLocal => llvm:: OptStage :: PreLinkThinLTO ,
655
683
_ if cgcx. opts . cg . linker_plugin_lto . enabled ( ) => llvm:: OptStage :: PreLinkThinLTO ,
656
684
_ => llvm:: OptStage :: PreLinkNoLTO ,
657
685
} ;
658
- return unsafe { llvm_optimize ( cgcx, dcx, module, config, opt_level, opt_stage) } ;
686
+
687
+ // If we know that we will later run AD, then we disable vectorization and loop unrolling
688
+ let skip_size_increasing_opts = cfg ! ( llvm_enzyme) ;
689
+ return unsafe {
690
+ llvm_optimize (
691
+ cgcx,
692
+ dcx,
693
+ module,
694
+ config,
695
+ opt_level,
696
+ opt_stage,
697
+ skip_size_increasing_opts,
698
+ )
699
+ } ;
659
700
}
660
701
Ok ( ( ) )
661
702
}
0 commit comments