Skip to content

Commit a0435e6

Browse files
authored
per fuzzer coverage information in project overview (#2102)
* per fuzzer coverage plots in overview Signed-off-by: phi-go <[email protected]> * Use single quotes and fix formatting. Signed-off-by: phi-go <[email protected]> * Rework project reports Signed-off-by: phi-go <[email protected]> Signed-off-by: phi-go <[email protected]> * add missing return none Signed-off-by: phi-go <[email protected]> --------- Signed-off-by: phi-go <[email protected]>
1 parent c6f69b1 commit a0435e6

File tree

6 files changed

+317
-57
lines changed

6 files changed

+317
-57
lines changed

tools/web-fuzzing-introspection/app/static/assets/db/oss_fuzz.py

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -154,6 +154,12 @@ def get_code_coverage_summary_url(project_name, datestr):
154154
return project_url
155155

156156

157+
def get_fuzzer_code_coverage_summary_url(project_name, datestr, fuzzer):
158+
base_url = 'https://storage.googleapis.com/oss-fuzz-coverage/{0}/reports-by-target/{1}/{2}/linux/summary.json'
159+
project_url = base_url.format(project_name, datestr, fuzzer)
160+
return project_url
161+
162+
157163
def get_coverage_report_url(project_name, datestr, language):
158164
if language == 'java' or language == 'python' or language == 'go':
159165
file_report = "index.html"
@@ -256,6 +262,20 @@ def get_code_coverage_summary(project_name, datestr):
256262
return None
257263

258264

265+
def get_fuzzer_code_coverage_summary(project_name, datestr, fuzzer):
266+
cov_summary_url = get_fuzzer_code_coverage_summary_url(
267+
project_name, datestr, fuzzer)
268+
try:
269+
coverage_summary_raw = requests.get(cov_summary_url, timeout=20).text
270+
except:
271+
return None
272+
try:
273+
json_dict = json.loads(coverage_summary_raw)
274+
return json_dict
275+
except:
276+
return None
277+
278+
259279
def extract_new_introspector_functions(project_name, date_str):
260280
introspector_functions_url = get_introspector_report_url_all_functions(
261281
project_name, date_str.replace("-", ""))

tools/web-fuzzing-introspection/app/static/assets/db/web_db_creator_from_summary.py

Lines changed: 165 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -24,8 +24,10 @@
2424
import subprocess
2525
import zipfile
2626
import tarfile
27+
import statistics
28+
from pathlib import Path
2729
from threading import Thread
28-
from typing import List, Any, Optional, Dict
30+
from typing import List, Any, Optional, Dict, Tuple, Set
2931

3032
import constants
3133
import oss_fuzz
@@ -34,7 +36,7 @@
3436
DB_JSON_ALL_PROJECT_TIMESTAMP = 'all-project-timestamps.json'
3537
DB_JSON_ALL_FUNCTIONS = 'all-functions-db-{PROJ}.json'
3638
DB_JSON_ALL_CONSTRUCTORS = 'all-constructors-db-{PROJ}.json'
37-
DB_JSON_ALL_CURRENT_FUNCS = 'all-project-current.json'
39+
DB_JSON_ALL_CURRENT = 'all-project-current.json'
3840
DB_JSON_ALL_BRANCH_BLOCKERS = 'all-branch-blockers.json'
3941
DB_BUILD_STATUS_JSON = 'build-status.json'
4042
#DB_RAW_INTROSPECTOR_REPORTS = 'raw-introspector-reports'
@@ -44,7 +46,7 @@
4446
DB_JSON_ALL_PROJECT_TIMESTAMP,
4547
DB_JSON_ALL_FUNCTIONS,
4648
DB_JSON_ALL_CONSTRUCTORS,
47-
DB_JSON_ALL_CURRENT_FUNCS,
49+
DB_JSON_ALL_CURRENT,
4850
]
4951

5052
INTROSPECTOR_WEBAPP_ZIP = (
@@ -53,6 +55,9 @@
5355
FI_EXCLUDE_ALL_NON_MUSTS = bool(int(os.getenv('FI_EXCLUDE_ALL_NON_MUSTS',
5456
'0')))
5557

58+
NUM_RECENT_DAYS = 30
59+
FUZZER_COVERAGE_IS_DEGRADED = 5 # 5% or more is a degradation
60+
5661
MUST_INCLUDES = set()
5762
MUST_INCLUDE_WITH_LANG: List[Any] = []
5863

@@ -283,9 +288,8 @@ def extract_and_refine_functions(all_function_list, date_str):
283288
return refined_proj_list
284289

285290

286-
def extract_code_coverage_data(code_coverage_summary, project_name, date_str,
287-
project_language) -> Optional[Dict[str, Any]]:
288-
"""Gets coverage URL and line coverage total of a project"""
291+
def extract_code_coverage_data(code_coverage_summary):
292+
"""Extract the coverage data from a loaded coverage summary.json"""
289293
# Extract data from the code coverage reports
290294
if code_coverage_summary is None:
291295
return None
@@ -307,8 +311,19 @@ def extract_code_coverage_data(code_coverage_summary, project_name, date_str,
307311
except:
308312
pass
309313

314+
return line_total_summary
315+
316+
317+
def prepare_code_coverage_dict(
318+
code_coverage_summary, project_name: str, date_str: str,
319+
project_language: str) -> Optional[Dict[str, Any]]:
320+
"""Gets coverage URL and line coverage total of a project"""
321+
line_total_summary = extract_code_coverage_data(code_coverage_summary)
322+
if line_total_summary is None:
323+
return None
324+
310325
coverage_url = oss_fuzz.get_coverage_report_url(project_name,
311-
date_str.replace("-", ""),
326+
date_str.replace('-', ''),
312327
project_language)
313328
code_coverage_data_dict = {
314329
'coverage_url': coverage_url,
@@ -442,7 +457,7 @@ def extract_local_project_data(project_name, oss_fuzz_path,
442457
project_name
443458
}
444459

445-
code_coverage_data_dict = extract_code_coverage_data(
460+
code_coverage_data_dict = prepare_code_coverage_dict(
446461
code_coverage_summary, project_name, '', project_language)
447462

448463
if cov_fuzz_stats is not None:
@@ -465,8 +480,8 @@ def extract_local_project_data(project_name, oss_fuzz_path,
465480
dictionary_key = '%s###%s' % (project_name, '')
466481
manager_return_dict[dictionary_key] = {
467482
'project_timestamp': project_timestamp,
468-
"introspector-data-dict": introspector_data_dict,
469-
"coverage-data-dict": code_coverage_data_dict,
483+
'introspector-data-dict': introspector_data_dict,
484+
'coverage-data-dict': code_coverage_data_dict,
470485
'all-header-files': all_header_files,
471486
}
472487

@@ -704,20 +719,30 @@ def extract_project_data(project_name, date_str, should_include_details,
704719
'project_name': project_name
705720
}
706721

707-
code_coverage_data_dict = extract_code_coverage_data(
722+
code_coverage_data_dict = prepare_code_coverage_dict(
708723
code_coverage_summary, project_name, date_str, project_language)
709724

725+
per_fuzzer_cov = {}
710726
if cov_fuzz_stats is not None:
711727
all_fuzzers = cov_fuzz_stats.split("\n")
712728
if all_fuzzers[-1] == '':
713729
all_fuzzers = all_fuzzers[0:-1]
714730
amount_of_fuzzers = len(all_fuzzers)
731+
for ff in all_fuzzers:
732+
try:
733+
fuzzer_cov = oss_fuzz.get_fuzzer_code_coverage_summary(
734+
project_name, date_str.replace("-", ""), ff)
735+
fuzzer_cov_data = extract_code_coverage_data(fuzzer_cov)
736+
per_fuzzer_cov[ff] = fuzzer_cov_data
737+
except:
738+
pass
715739

716740
project_timestamp = {
717741
"project_name": project_name,
718742
"date": date_str,
719743
'language': project_language,
720744
'coverage-data': code_coverage_data_dict,
745+
'per-fuzzer-coverage-data': per_fuzzer_cov,
721746
'introspector-data': introspector_data_dict,
722747
'fuzzer-count': amount_of_fuzzers,
723748
'project_repository': project_repository,
@@ -878,11 +903,105 @@ def extend_db_timestamps(db_timestamp, output_directory):
878903
json.dump(existing_timestamps, f)
879904

880905

881-
def extend_db_json_files(project_timestamps, output_directory):
906+
def per_fuzzer_coverage_analysis(project_name: str,
907+
coverages: Dict[str, List[Tuple[int, str]]],
908+
lost_fuzzers):
909+
"""Go through the recent coverage results and combine them into a short summary.
910+
Including an assessment if the fuzzer got worse over time.
911+
"""
912+
913+
# TODO This might not be a good metric when coverage is not meaningful,
914+
# for example for very small projects or projects that have low coverage
915+
# already. Though, this might not be super bad as we are taking a look
916+
# at per fuzzer coverage, which is should already be normalized to what
917+
# can be reached.
918+
# TODO What would be a good percentage to mark as coverage degradation,
919+
# taking 5% for now but should be observed, maybe per it should be
920+
# configurable per project as well.
921+
results = {}
922+
for ff, data in coverages.items():
923+
if len(data) > 0:
924+
values = [dd[0] for dd in data]
925+
dates = [dd[1] for dd in data]
926+
latest_date_with_value = next(dd[1] for dd in reversed(data)
927+
if dd[0] is not None)
928+
if latest_date_with_value is not None:
929+
report_url = oss_fuzz.get_fuzzer_code_coverage_summary_url(
930+
project_name, latest_date_with_value.replace('-', ''), ff)
931+
report_url = report_url[:-len('summary.json')] + 'index.html'
932+
else:
933+
report_url = None
934+
max_cov = max(values[:-1], default=0)
935+
avg_cov = round(statistics.fmean(values), 2)
936+
current = values[-1]
937+
results[ff] = {
938+
'report_url': report_url,
939+
'report_date': latest_date_with_value,
940+
'coverages_values': values,
941+
'coverages_dates': dates,
942+
'max': max_cov,
943+
'avg': avg_cov,
944+
'current': current,
945+
'has_degraded':
946+
(max_cov - current) > FUZZER_COVERAGE_IS_DEGRADED,
947+
'got_lost': ff in lost_fuzzers,
948+
}
949+
return results
950+
951+
952+
def calculate_recent_results(projects_with_new_results, timestamps,
953+
num_days: int):
954+
"""Analyse recent project data to detect possible degradations of fuzzer efficiency."""
955+
from collections import defaultdict
956+
957+
data: Dict[str, Dict[str, Dict[str, Any]]] = defaultdict(dict)
958+
for pt in timestamps:
959+
project_name = pt['project_name']
960+
if project_name in projects_with_new_results:
961+
data[project_name][pt['date']] = pt
962+
963+
results = {}
964+
for project_name, project_data in data.items():
965+
fuzzers_past = set()
966+
fuzzers_current: Set[str] = set()
967+
per_fuzzer_coverages = defaultdict(list)
968+
969+
for do in (get_date_at_offset_as_str(ii)
970+
for ii in range(-num_days, 0, 1)):
971+
try:
972+
date_data = project_data[do]
973+
per_fuzzer_coverage_data = date_data[
974+
'per-fuzzer-coverage-data']
975+
976+
fuzzers_past |= fuzzers_current
977+
fuzzers_current = set(per_fuzzer_coverage_data.keys())
978+
979+
for ff, cov_data in per_fuzzer_coverage_data.items():
980+
try:
981+
perc = round(
982+
100 * cov_data['covered'] / cov_data['count'], 2)
983+
except:
984+
perc = 0
985+
986+
per_fuzzer_coverages[ff].append((perc, do))
987+
except:
988+
continue
989+
990+
fuzzer_diff = fuzzers_past - fuzzers_current
991+
per_fuzzer_coverages = per_fuzzer_coverage_analysis(
992+
project_name, per_fuzzer_coverages, fuzzer_diff)
993+
994+
results[project_name] = per_fuzzer_coverages
995+
996+
return results
997+
998+
999+
def extend_db_json_files(project_timestamps, output_directory,
1000+
should_include_details):
8821001
"""Extends a set of DB .json files."""
8831002

8841003
existing_timestamps = []
885-
logging.info('Loading existing timestamps 1')
1004+
logging.info('Loading existing timestamps')
8861005
if os.path.isfile(
8871006
os.path.join(output_directory, DB_JSON_ALL_PROJECT_TIMESTAMP)):
8881007
with open(
@@ -901,10 +1020,11 @@ def extend_db_json_files(project_timestamps, output_directory):
9011020
existing_timestamp_mapping = dict()
9021021

9031022
for es in existing_timestamps:
904-
if not es['project_name'] in existing_timestamp_mapping:
1023+
if es['project_name'] not in existing_timestamp_mapping:
9051024
existing_timestamp_mapping[es['project_name']] = set()
9061025
existing_timestamp_mapping[es['project_name']].add(es['date'])
9071026

1027+
projects_with_new_results = set()
9081028
for new_ts in project_timestamps:
9091029
to_add = True
9101030

@@ -914,24 +1034,44 @@ def extend_db_json_files(project_timestamps, output_directory):
9141034
to_add = False
9151035
if to_add:
9161036
existing_timestamps.append(new_ts)
1037+
projects_with_new_results.add(new_ts['project_name'])
9171038
have_added = True
9181039

9191040
if FI_EXCLUDE_ALL_NON_MUSTS:
920-
new_timestamps = []
1041+
# Filter existing timstamps to to only those in MUST_INCLUDES.
1042+
kept_timestamps = []
9211043
for ts in existing_timestamps:
9221044
if ts['project_name'] in MUST_INCLUDES:
923-
new_timestamps.append(ts)
924-
existing_timestamps = new_timestamps
1045+
kept_timestamps.append(ts)
1046+
existing_timestamps = kept_timestamps
9251047

926-
new_project_stamps = []
1048+
# Also filter the current project results.
1049+
kept_project_stamps = []
9271050
for project_stamp in project_timestamps:
9281051
if project_stamp['project_name'] in MUST_INCLUDES:
929-
new_project_stamps.append(project_stamp)
930-
project_timestamps = new_project_stamps
1052+
kept_project_stamps.append(project_stamp)
1053+
project_timestamps = kept_project_stamps
1054+
1055+
if should_include_details:
1056+
recent_results = calculate_recent_results(projects_with_new_results,
1057+
existing_timestamps,
1058+
NUM_RECENT_DAYS)
1059+
# TODO these results might detect issues that should be communicated with
1060+
# project maintainers. The best approach might be to load the
1061+
# project_timestamps file (all-project-current.json)
1062+
# separately and load recent results there and maybe issue warnings.
1063+
for pt in project_timestamps:
1064+
try:
1065+
pt['recent_results'] = recent_results.get(pt['project_name'])
1066+
except Exception as exc:
1067+
logger.warning(
1068+
f'Could not get recent results for {pt["project_name"]}: {exc}'
1069+
)
1070+
else:
1071+
recent_results = None
9311072

932-
logging.info('Dumping all current projects')
933-
with open(os.path.join(output_directory, DB_JSON_ALL_CURRENT_FUNCS),
934-
'w') as f:
1073+
logging.info('Dumping current project data')
1074+
with open(os.path.join(output_directory, DB_JSON_ALL_CURRENT), 'w') as f:
9351075
json.dump(project_timestamps, f)
9361076

9371077
# Remove any light-introspector files because they should not be saved in the
@@ -999,7 +1139,8 @@ def update_db_files(db_timestamp,
9991139
f.write(json.dumps(all_header_files))
10001140

10011141
logging.info('Extending DB json files')
1002-
extend_db_json_files(project_timestamps, output_directory)
1142+
extend_db_json_files(project_timestamps, output_directory,
1143+
should_include_details)
10031144

10041145
logging.info('Extending DB time stamps')
10051146
extend_db_timestamps(db_timestamp, output_directory)

tools/web-fuzzing-introspection/app/webapp/__init__.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -85,8 +85,9 @@ def load_db() -> None:
8585
introspector_data=project_timestamp['introspector-data'],
8686
fuzzer_count=project_timestamp['fuzzer-count'],
8787
project_repository=project_timestamp['project_repository'],
88-
light_analysis=project_timestamp.get('light-introspector',
89-
{})))
88+
light_analysis=project_timestamp.get('light-introspector', {}),
89+
recent_results=project_timestamp.get('recent_results'),
90+
))
9091

9192
introspector_data = project_timestamp.get('introspector-data', None)
9293
if introspector_data is None:

tools/web-fuzzing-introspection/app/webapp/models.py

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,8 @@ def __init__(self, name: str, language: str, date: str,
3030
introspector_data: Optional[Dict[str,
3131
Any]], fuzzer_count: int,
3232
project_repository: Optional[str], light_analysis: Dict[Any,
33-
Any]):
33+
Any],
34+
recent_results: Optional[Dict[str, Any]]):
3435
self.name = name
3536
self.language = language
3637
self.date = date
@@ -39,9 +40,13 @@ def __init__(self, name: str, language: str, date: str,
3940
self.fuzzer_count = fuzzer_count
4041
self.project_repository = project_repository
4142
self.light_analysis = light_analysis
43+
self.recent_results = recent_results
4244

4345
def has_introspector(self) -> bool:
44-
return self.introspector_data != None
46+
return self.introspector_data is not None
47+
48+
def has_recent_results(self) -> bool:
49+
return self.recent_results is not None
4550

4651

4752
class DBTimestamp:

tools/web-fuzzing-introspection/app/webapp/routes.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -574,7 +574,8 @@ def project_profile():
574574
coverage_data=None,
575575
introspector_data=None,
576576
project_repository=None,
577-
light_analysis={})
577+
light_analysis={},
578+
recent_results=None)
578579

579580
# Get statistics of the project
580581
project_statistics = data_storage.PROJECT_TIMESTAMPS

0 commit comments

Comments
 (0)