24
24
import subprocess
25
25
import zipfile
26
26
import tarfile
27
+ import statistics
28
+ from pathlib import Path
27
29
from threading import Thread
28
- from typing import List , Any , Optional , Dict
30
+ from typing import List , Any , Optional , Dict , Tuple , Set
29
31
30
32
import constants
31
33
import oss_fuzz
34
36
DB_JSON_ALL_PROJECT_TIMESTAMP = 'all-project-timestamps.json'
35
37
DB_JSON_ALL_FUNCTIONS = 'all-functions-db-{PROJ}.json'
36
38
DB_JSON_ALL_CONSTRUCTORS = 'all-constructors-db-{PROJ}.json'
37
- DB_JSON_ALL_CURRENT_FUNCS = 'all-project-current.json'
39
+ DB_JSON_ALL_CURRENT = 'all-project-current.json'
38
40
DB_JSON_ALL_BRANCH_BLOCKERS = 'all-branch-blockers.json'
39
41
DB_BUILD_STATUS_JSON = 'build-status.json'
40
42
#DB_RAW_INTROSPECTOR_REPORTS = 'raw-introspector-reports'
44
46
DB_JSON_ALL_PROJECT_TIMESTAMP ,
45
47
DB_JSON_ALL_FUNCTIONS ,
46
48
DB_JSON_ALL_CONSTRUCTORS ,
47
- DB_JSON_ALL_CURRENT_FUNCS ,
49
+ DB_JSON_ALL_CURRENT ,
48
50
]
49
51
50
52
INTROSPECTOR_WEBAPP_ZIP = (
53
55
FI_EXCLUDE_ALL_NON_MUSTS = bool (int (os .getenv ('FI_EXCLUDE_ALL_NON_MUSTS' ,
54
56
'0' )))
55
57
58
+ NUM_RECENT_DAYS = 30
59
+ FUZZER_COVERAGE_IS_DEGRADED = 5 # 5% or more is a degradation
60
+
56
61
MUST_INCLUDES = set ()
57
62
MUST_INCLUDE_WITH_LANG : List [Any ] = []
58
63
@@ -283,9 +288,8 @@ def extract_and_refine_functions(all_function_list, date_str):
283
288
return refined_proj_list
284
289
285
290
286
- def extract_code_coverage_data (code_coverage_summary , project_name , date_str ,
287
- project_language ) -> Optional [Dict [str , Any ]]:
288
- """Gets coverage URL and line coverage total of a project"""
291
+ def extract_code_coverage_data (code_coverage_summary ):
292
+ """Extract the coverage data from a loaded coverage summary.json"""
289
293
# Extract data from the code coverage reports
290
294
if code_coverage_summary is None :
291
295
return None
@@ -307,8 +311,19 @@ def extract_code_coverage_data(code_coverage_summary, project_name, date_str,
307
311
except :
308
312
pass
309
313
314
+ return line_total_summary
315
+
316
+
317
+ def prepare_code_coverage_dict (
318
+ code_coverage_summary , project_name : str , date_str : str ,
319
+ project_language : str ) -> Optional [Dict [str , Any ]]:
320
+ """Gets coverage URL and line coverage total of a project"""
321
+ line_total_summary = extract_code_coverage_data (code_coverage_summary )
322
+ if line_total_summary is None :
323
+ return None
324
+
310
325
coverage_url = oss_fuzz .get_coverage_report_url (project_name ,
311
- date_str .replace ("-" , "" ),
326
+ date_str .replace ('-' , '' ),
312
327
project_language )
313
328
code_coverage_data_dict = {
314
329
'coverage_url' : coverage_url ,
@@ -442,7 +457,7 @@ def extract_local_project_data(project_name, oss_fuzz_path,
442
457
project_name
443
458
}
444
459
445
- code_coverage_data_dict = extract_code_coverage_data (
460
+ code_coverage_data_dict = prepare_code_coverage_dict (
446
461
code_coverage_summary , project_name , '' , project_language )
447
462
448
463
if cov_fuzz_stats is not None :
@@ -465,8 +480,8 @@ def extract_local_project_data(project_name, oss_fuzz_path,
465
480
dictionary_key = '%s###%s' % (project_name , '' )
466
481
manager_return_dict [dictionary_key ] = {
467
482
'project_timestamp' : project_timestamp ,
468
- " introspector-data-dict" : introspector_data_dict ,
469
- " coverage-data-dict" : code_coverage_data_dict ,
483
+ ' introspector-data-dict' : introspector_data_dict ,
484
+ ' coverage-data-dict' : code_coverage_data_dict ,
470
485
'all-header-files' : all_header_files ,
471
486
}
472
487
@@ -704,20 +719,30 @@ def extract_project_data(project_name, date_str, should_include_details,
704
719
'project_name' : project_name
705
720
}
706
721
707
- code_coverage_data_dict = extract_code_coverage_data (
722
+ code_coverage_data_dict = prepare_code_coverage_dict (
708
723
code_coverage_summary , project_name , date_str , project_language )
709
724
725
+ per_fuzzer_cov = {}
710
726
if cov_fuzz_stats is not None :
711
727
all_fuzzers = cov_fuzz_stats .split ("\n " )
712
728
if all_fuzzers [- 1 ] == '' :
713
729
all_fuzzers = all_fuzzers [0 :- 1 ]
714
730
amount_of_fuzzers = len (all_fuzzers )
731
+ for ff in all_fuzzers :
732
+ try :
733
+ fuzzer_cov = oss_fuzz .get_fuzzer_code_coverage_summary (
734
+ project_name , date_str .replace ("-" , "" ), ff )
735
+ fuzzer_cov_data = extract_code_coverage_data (fuzzer_cov )
736
+ per_fuzzer_cov [ff ] = fuzzer_cov_data
737
+ except :
738
+ pass
715
739
716
740
project_timestamp = {
717
741
"project_name" : project_name ,
718
742
"date" : date_str ,
719
743
'language' : project_language ,
720
744
'coverage-data' : code_coverage_data_dict ,
745
+ 'per-fuzzer-coverage-data' : per_fuzzer_cov ,
721
746
'introspector-data' : introspector_data_dict ,
722
747
'fuzzer-count' : amount_of_fuzzers ,
723
748
'project_repository' : project_repository ,
@@ -878,11 +903,105 @@ def extend_db_timestamps(db_timestamp, output_directory):
878
903
json .dump (existing_timestamps , f )
879
904
880
905
881
- def extend_db_json_files (project_timestamps , output_directory ):
906
+ def per_fuzzer_coverage_analysis (project_name : str ,
907
+ coverages : Dict [str , List [Tuple [int , str ]]],
908
+ lost_fuzzers ):
909
+ """Go through the recent coverage results and combine them into a short summary.
910
+ Including an assessment if the fuzzer got worse over time.
911
+ """
912
+
913
+ # TODO This might not be a good metric when coverage is not meaningful,
914
+ # for example for very small projects or projects that have low coverage
915
+ # already. Though, this might not be super bad as we are taking a look
916
+ # at per fuzzer coverage, which is should already be normalized to what
917
+ # can be reached.
918
+ # TODO What would be a good percentage to mark as coverage degradation,
919
+ # taking 5% for now but should be observed, maybe per it should be
920
+ # configurable per project as well.
921
+ results = {}
922
+ for ff , data in coverages .items ():
923
+ if len (data ) > 0 :
924
+ values = [dd [0 ] for dd in data ]
925
+ dates = [dd [1 ] for dd in data ]
926
+ latest_date_with_value = next (dd [1 ] for dd in reversed (data )
927
+ if dd [0 ] is not None )
928
+ if latest_date_with_value is not None :
929
+ report_url = oss_fuzz .get_fuzzer_code_coverage_summary_url (
930
+ project_name , latest_date_with_value .replace ('-' , '' ), ff )
931
+ report_url = report_url [:- len ('summary.json' )] + 'index.html'
932
+ else :
933
+ report_url = None
934
+ max_cov = max (values [:- 1 ], default = 0 )
935
+ avg_cov = round (statistics .fmean (values ), 2 )
936
+ current = values [- 1 ]
937
+ results [ff ] = {
938
+ 'report_url' : report_url ,
939
+ 'report_date' : latest_date_with_value ,
940
+ 'coverages_values' : values ,
941
+ 'coverages_dates' : dates ,
942
+ 'max' : max_cov ,
943
+ 'avg' : avg_cov ,
944
+ 'current' : current ,
945
+ 'has_degraded' :
946
+ (max_cov - current ) > FUZZER_COVERAGE_IS_DEGRADED ,
947
+ 'got_lost' : ff in lost_fuzzers ,
948
+ }
949
+ return results
950
+
951
+
952
+ def calculate_recent_results (projects_with_new_results , timestamps ,
953
+ num_days : int ):
954
+ """Analyse recent project data to detect possible degradations of fuzzer efficiency."""
955
+ from collections import defaultdict
956
+
957
+ data : Dict [str , Dict [str , Dict [str , Any ]]] = defaultdict (dict )
958
+ for pt in timestamps :
959
+ project_name = pt ['project_name' ]
960
+ if project_name in projects_with_new_results :
961
+ data [project_name ][pt ['date' ]] = pt
962
+
963
+ results = {}
964
+ for project_name , project_data in data .items ():
965
+ fuzzers_past = set ()
966
+ fuzzers_current : Set [str ] = set ()
967
+ per_fuzzer_coverages = defaultdict (list )
968
+
969
+ for do in (get_date_at_offset_as_str (ii )
970
+ for ii in range (- num_days , 0 , 1 )):
971
+ try :
972
+ date_data = project_data [do ]
973
+ per_fuzzer_coverage_data = date_data [
974
+ 'per-fuzzer-coverage-data' ]
975
+
976
+ fuzzers_past |= fuzzers_current
977
+ fuzzers_current = set (per_fuzzer_coverage_data .keys ())
978
+
979
+ for ff , cov_data in per_fuzzer_coverage_data .items ():
980
+ try :
981
+ perc = round (
982
+ 100 * cov_data ['covered' ] / cov_data ['count' ], 2 )
983
+ except :
984
+ perc = 0
985
+
986
+ per_fuzzer_coverages [ff ].append ((perc , do ))
987
+ except :
988
+ continue
989
+
990
+ fuzzer_diff = fuzzers_past - fuzzers_current
991
+ per_fuzzer_coverages = per_fuzzer_coverage_analysis (
992
+ project_name , per_fuzzer_coverages , fuzzer_diff )
993
+
994
+ results [project_name ] = per_fuzzer_coverages
995
+
996
+ return results
997
+
998
+
999
+ def extend_db_json_files (project_timestamps , output_directory ,
1000
+ should_include_details ):
882
1001
"""Extends a set of DB .json files."""
883
1002
884
1003
existing_timestamps = []
885
- logging .info ('Loading existing timestamps 1 ' )
1004
+ logging .info ('Loading existing timestamps' )
886
1005
if os .path .isfile (
887
1006
os .path .join (output_directory , DB_JSON_ALL_PROJECT_TIMESTAMP )):
888
1007
with open (
@@ -901,10 +1020,11 @@ def extend_db_json_files(project_timestamps, output_directory):
901
1020
existing_timestamp_mapping = dict ()
902
1021
903
1022
for es in existing_timestamps :
904
- if not es ['project_name' ] in existing_timestamp_mapping :
1023
+ if es ['project_name' ] not in existing_timestamp_mapping :
905
1024
existing_timestamp_mapping [es ['project_name' ]] = set ()
906
1025
existing_timestamp_mapping [es ['project_name' ]].add (es ['date' ])
907
1026
1027
+ projects_with_new_results = set ()
908
1028
for new_ts in project_timestamps :
909
1029
to_add = True
910
1030
@@ -914,24 +1034,44 @@ def extend_db_json_files(project_timestamps, output_directory):
914
1034
to_add = False
915
1035
if to_add :
916
1036
existing_timestamps .append (new_ts )
1037
+ projects_with_new_results .add (new_ts ['project_name' ])
917
1038
have_added = True
918
1039
919
1040
if FI_EXCLUDE_ALL_NON_MUSTS :
920
- new_timestamps = []
1041
+ # Filter existing timstamps to to only those in MUST_INCLUDES.
1042
+ kept_timestamps = []
921
1043
for ts in existing_timestamps :
922
1044
if ts ['project_name' ] in MUST_INCLUDES :
923
- new_timestamps .append (ts )
924
- existing_timestamps = new_timestamps
1045
+ kept_timestamps .append (ts )
1046
+ existing_timestamps = kept_timestamps
925
1047
926
- new_project_stamps = []
1048
+ # Also filter the current project results.
1049
+ kept_project_stamps = []
927
1050
for project_stamp in project_timestamps :
928
1051
if project_stamp ['project_name' ] in MUST_INCLUDES :
929
- new_project_stamps .append (project_stamp )
930
- project_timestamps = new_project_stamps
1052
+ kept_project_stamps .append (project_stamp )
1053
+ project_timestamps = kept_project_stamps
1054
+
1055
+ if should_include_details :
1056
+ recent_results = calculate_recent_results (projects_with_new_results ,
1057
+ existing_timestamps ,
1058
+ NUM_RECENT_DAYS )
1059
+ # TODO these results might detect issues that should be communicated with
1060
+ # project maintainers. The best approach might be to load the
1061
+ # project_timestamps file (all-project-current.json)
1062
+ # separately and load recent results there and maybe issue warnings.
1063
+ for pt in project_timestamps :
1064
+ try :
1065
+ pt ['recent_results' ] = recent_results .get (pt ['project_name' ])
1066
+ except Exception as exc :
1067
+ logger .warning (
1068
+ f'Could not get recent results for { pt ["project_name" ]} : { exc } '
1069
+ )
1070
+ else :
1071
+ recent_results = None
931
1072
932
- logging .info ('Dumping all current projects' )
933
- with open (os .path .join (output_directory , DB_JSON_ALL_CURRENT_FUNCS ),
934
- 'w' ) as f :
1073
+ logging .info ('Dumping current project data' )
1074
+ with open (os .path .join (output_directory , DB_JSON_ALL_CURRENT ), 'w' ) as f :
935
1075
json .dump (project_timestamps , f )
936
1076
937
1077
# Remove any light-introspector files because they should not be saved in the
@@ -999,7 +1139,8 @@ def update_db_files(db_timestamp,
999
1139
f .write (json .dumps (all_header_files ))
1000
1140
1001
1141
logging .info ('Extending DB json files' )
1002
- extend_db_json_files (project_timestamps , output_directory )
1142
+ extend_db_json_files (project_timestamps , output_directory ,
1143
+ should_include_details )
1003
1144
1004
1145
logging .info ('Extending DB time stamps' )
1005
1146
extend_db_timestamps (db_timestamp , output_directory )
0 commit comments