@@ -162,24 +162,22 @@ PyUnstable_SetOptimizer(_PyOptimizerObject *optimizer)
162
162
}
163
163
164
164
int
165
- _PyOptimizer_BackEdge (_PyInterpreterFrame * frame , _Py_CODEUNIT * src , _Py_CODEUNIT * dest , PyObject * * stack_pointer )
165
+ _PyOptimizer_Optimize (_PyInterpreterFrame * frame , _Py_CODEUNIT * start , PyObject * * stack_pointer )
166
166
{
167
- assert (src -> op .code == JUMP_BACKWARD );
168
167
PyCodeObject * code = (PyCodeObject * )frame -> f_executable ;
169
168
assert (PyCode_Check (code ));
170
169
PyInterpreterState * interp = _PyInterpreterState_GET ();
171
- if (!has_space_for_executor (code , src )) {
170
+ if (!has_space_for_executor (code , start )) {
172
171
return 0 ;
173
172
}
174
173
_PyOptimizerObject * opt = interp -> optimizer ;
175
174
_PyExecutorObject * executor = NULL ;
176
- /* Start optimizing at the destination to guarantee forward progress */
177
- int err = opt -> optimize (opt , code , dest , & executor , (int )(stack_pointer - _PyFrame_Stackbase (frame )));
175
+ int err = opt -> optimize (opt , code , start , & executor , (int )(stack_pointer - _PyFrame_Stackbase (frame )));
178
176
if (err <= 0 ) {
179
177
assert (executor == NULL );
180
178
return err ;
181
179
}
182
- int index = get_index_for_executor (code , src );
180
+ int index = get_index_for_executor (code , start );
183
181
if (index < 0 ) {
184
182
/* Out of memory. Don't raise and assume that the
185
183
* error will show up elsewhere.
@@ -190,7 +188,7 @@ _PyOptimizer_BackEdge(_PyInterpreterFrame *frame, _Py_CODEUNIT *src, _Py_CODEUNI
190
188
Py_DECREF (executor );
191
189
return 0 ;
192
190
}
193
- insert_executor (code , src , index , executor );
191
+ insert_executor (code , start , index , executor );
194
192
Py_DECREF (executor );
195
193
return 1 ;
196
194
}
@@ -316,38 +314,6 @@ BRANCH_TO_GUARD[4][2] = {
316
314
#define CONFIDENCE_RANGE 1000
317
315
#define CONFIDENCE_CUTOFF 333
318
316
319
- /* Returns 1 on success,
320
- * 0 if it failed to produce a worthwhile trace,
321
- * and -1 on an error.
322
- */
323
- static int
324
- translate_bytecode_to_trace (
325
- PyCodeObject * code ,
326
- _Py_CODEUNIT * instr ,
327
- _PyUOpInstruction * trace ,
328
- int buffer_size ,
329
- _PyBloomFilter * dependencies )
330
- {
331
- PyCodeObject * initial_code = code ;
332
- _Py_BloomFilter_Add (dependencies , initial_code );
333
- _Py_CODEUNIT * initial_instr = instr ;
334
- int trace_length = 0 ;
335
- int max_length = buffer_size ;
336
- struct {
337
- PyCodeObject * code ;
338
- _Py_CODEUNIT * instr ;
339
- } trace_stack [TRACE_STACK_SIZE ];
340
- int trace_stack_depth = 0 ;
341
- int confidence = CONFIDENCE_RANGE ; // Adjusted by branch instructions
342
-
343
- #ifdef Py_DEBUG
344
- char * python_lltrace = Py_GETENV ("PYTHON_LLTRACE" );
345
- int lltrace = 0 ;
346
- if (python_lltrace != NULL && * python_lltrace >= '0' ) {
347
- lltrace = * python_lltrace - '0' ; // TODO: Parse an int and all that
348
- }
349
- #endif
350
-
351
317
#ifdef Py_DEBUG
352
318
#define DPRINTF (level , ...) \
353
319
if (lltrace >= (level)) { printf(__VA_ARGS__); }
@@ -403,13 +369,47 @@ translate_bytecode_to_trace(
403
369
code = trace_stack[trace_stack_depth].code; \
404
370
instr = trace_stack[trace_stack_depth].instr;
405
371
372
+ /* Returns 1 on success,
373
+ * 0 if it failed to produce a worthwhile trace,
374
+ * and -1 on an error.
375
+ */
376
+ static int
377
+ translate_bytecode_to_trace (
378
+ PyCodeObject * code ,
379
+ _Py_CODEUNIT * instr ,
380
+ _PyUOpInstruction * trace ,
381
+ int buffer_size ,
382
+ _PyBloomFilter * dependencies )
383
+ {
384
+ bool progress_needed = true;
385
+ PyCodeObject * initial_code = code ;
386
+ _Py_BloomFilter_Add (dependencies , initial_code );
387
+ _Py_CODEUNIT * initial_instr = instr ;
388
+ int trace_length = 0 ;
389
+ int max_length = buffer_size ;
390
+ struct {
391
+ PyCodeObject * code ;
392
+ _Py_CODEUNIT * instr ;
393
+ } trace_stack [TRACE_STACK_SIZE ];
394
+ int trace_stack_depth = 0 ;
395
+ int confidence = CONFIDENCE_RANGE ; // Adjusted by branch instructions
396
+
397
+ #ifdef Py_DEBUG
398
+ char * python_lltrace = Py_GETENV ("PYTHON_LLTRACE" );
399
+ int lltrace = 0 ;
400
+ if (python_lltrace != NULL && * python_lltrace >= '0' ) {
401
+ lltrace = * python_lltrace - '0' ; // TODO: Parse an int and all that
402
+ }
403
+ #endif
404
+
406
405
DPRINTF (4 ,
407
406
"Optimizing %s (%s:%d) at byte offset %d\n" ,
408
407
PyUnicode_AsUTF8 (code -> co_qualname ),
409
408
PyUnicode_AsUTF8 (code -> co_filename ),
410
409
code -> co_firstlineno ,
411
410
2 * INSTR_IP (initial_instr , code ));
412
411
uint32_t target = 0 ;
412
+
413
413
top : // Jump here after _PUSH_FRAME or likely branches
414
414
for (;;) {
415
415
target = INSTR_IP (instr , code );
@@ -421,6 +421,15 @@ translate_bytecode_to_trace(
421
421
uint32_t oparg = instr -> op .arg ;
422
422
uint32_t extended = 0 ;
423
423
424
+ if (opcode == ENTER_EXECUTOR ) {
425
+ assert (oparg < 256 );
426
+ _PyExecutorObject * executor =
427
+ (_PyExecutorObject * )code -> co_executors -> executors [oparg ];
428
+ opcode = executor -> vm_data .opcode ;
429
+ DPRINTF (2 , " * ENTER_EXECUTOR -> %s\n" , _PyOpcode_OpName [opcode ]);
430
+ oparg = executor -> vm_data .oparg ;
431
+ }
432
+
424
433
if (opcode == EXTENDED_ARG ) {
425
434
instr ++ ;
426
435
extended = 1 ;
@@ -431,13 +440,23 @@ translate_bytecode_to_trace(
431
440
goto done ;
432
441
}
433
442
}
434
-
435
- if (opcode == ENTER_EXECUTOR ) {
436
- _PyExecutorObject * executor =
437
- (_PyExecutorObject * )code -> co_executors -> executors [oparg & 255 ];
438
- opcode = executor -> vm_data .opcode ;
439
- DPRINTF (2 , " * ENTER_EXECUTOR -> %s\n" , _PyOpcode_OpName [opcode ]);
440
- oparg = (oparg & 0xffffff00 ) | executor -> vm_data .oparg ;
443
+ assert (opcode != ENTER_EXECUTOR && opcode != EXTENDED_ARG );
444
+
445
+ /* Special case the first instruction,
446
+ * so that we can guarantee forward progress */
447
+ if (progress_needed ) {
448
+ progress_needed = false;
449
+ if (opcode == JUMP_BACKWARD || opcode == JUMP_BACKWARD_NO_INTERRUPT ) {
450
+ instr += 1 + _PyOpcode_Caches [opcode ] - (int32_t )oparg ;
451
+ initial_instr = instr ;
452
+ continue ;
453
+ }
454
+ else {
455
+ if (OPCODE_HAS_DEOPT (opcode )) {
456
+ opcode = _PyOpcode_Deopt [opcode ];
457
+ }
458
+ assert (!OPCODE_HAS_DEOPT (opcode ));
459
+ }
441
460
}
442
461
443
462
switch (opcode ) {
@@ -480,7 +499,9 @@ translate_bytecode_to_trace(
480
499
case JUMP_BACKWARD :
481
500
case JUMP_BACKWARD_NO_INTERRUPT :
482
501
{
483
- if (instr + 2 - oparg == initial_instr && code == initial_code ) {
502
+ _Py_CODEUNIT * target = instr + 1 + _PyOpcode_Caches [opcode ] - (int )oparg ;
503
+ if (target == initial_instr ) {
504
+ /* We have looped round to the start */
484
505
RESERVE (1 );
485
506
ADD_TO_TRACE (_JUMP_TO_TOP , 0 , 0 , 0 );
486
507
}
@@ -641,35 +662,33 @@ translate_bytecode_to_trace(
641
662
}
642
663
assert (code == initial_code );
643
664
// Skip short traces like _SET_IP, LOAD_FAST, _SET_IP, _EXIT_TRACE
644
- if (trace_length > 4 ) {
645
- ADD_TO_TRACE (_EXIT_TRACE , 0 , 0 , target );
646
- DPRINTF (1 ,
647
- "Created a trace for %s (%s:%d) at byte offset %d -- length %d\n" ,
648
- PyUnicode_AsUTF8 (code -> co_qualname ),
649
- PyUnicode_AsUTF8 (code -> co_filename ),
650
- code -> co_firstlineno ,
651
- 2 * INSTR_IP (initial_instr , code ),
652
- trace_length );
653
- OPT_HIST (trace_length + buffer_size - max_length , trace_length_hist );
654
- return 1 ;
655
- }
656
- else {
665
+ if (progress_needed || trace_length < 5 ) {
657
666
OPT_STAT_INC (trace_too_short );
658
667
DPRINTF (4 ,
659
668
"No trace for %s (%s:%d) at byte offset %d\n" ,
660
669
PyUnicode_AsUTF8 (code -> co_qualname ),
661
670
PyUnicode_AsUTF8 (code -> co_filename ),
662
671
code -> co_firstlineno ,
663
672
2 * INSTR_IP (initial_instr , code ));
673
+ return 0 ;
664
674
}
665
- return 0 ;
675
+ ADD_TO_TRACE (_EXIT_TRACE , 0 , 0 , target );
676
+ DPRINTF (1 ,
677
+ "Created a trace for %s (%s:%d) at byte offset %d -- length %d\n" ,
678
+ PyUnicode_AsUTF8 (code -> co_qualname ),
679
+ PyUnicode_AsUTF8 (code -> co_filename ),
680
+ code -> co_firstlineno ,
681
+ 2 * INSTR_IP (initial_instr , code ),
682
+ trace_length );
683
+ OPT_HIST (trace_length + buffer_size - max_length , trace_length_hist );
684
+ return 1 ;
685
+ }
666
686
667
687
#undef RESERVE
668
688
#undef RESERVE_RAW
669
689
#undef INSTR_IP
670
690
#undef ADD_TO_TRACE
671
691
#undef DPRINTF
672
- }
673
692
674
693
#define UNSET_BIT (array , bit ) (array[(bit)>>5] &= ~(1<<((bit)&31)))
675
694
#define SET_BIT (array , bit ) (array[(bit)>>5] |= (1<<((bit)&31)))
@@ -854,10 +873,20 @@ counter_optimize(
854
873
int Py_UNUSED (curr_stackentries )
855
874
)
856
875
{
876
+ int oparg = instr -> op .arg ;
877
+ while (instr -> op .code == EXTENDED_ARG ) {
878
+ instr ++ ;
879
+ oparg = (oparg << 8 ) | instr -> op .arg ;
880
+ }
881
+ if (instr -> op .code != JUMP_BACKWARD ) {
882
+ /* Counter optimizer can only handle backward edges */
883
+ return 0 ;
884
+ }
885
+ _Py_CODEUNIT * target = instr + 1 + _PyOpcode_Caches [JUMP_BACKWARD ] - oparg ;
857
886
_PyUOpInstruction buffer [3 ] = {
858
887
{ .opcode = _LOAD_CONST_INLINE_BORROW , .operand = (uintptr_t )self },
859
888
{ .opcode = _INTERNAL_INCREMENT_OPT_COUNTER },
860
- { .opcode = _EXIT_TRACE , .target = (uint32_t )(instr - _PyCode_CODE (code )) }
889
+ { .opcode = _EXIT_TRACE , .target = (uint32_t )(target - _PyCode_CODE (code )) }
861
890
};
862
891
_PyBloomFilter empty ;
863
892
_Py_BloomFilter_Init (& empty );
0 commit comments