From 344e639d8ca50a2e3082a35516b7944b34ddc70a Mon Sep 17 00:00:00 2001 From: Daniel Harding Date: Sun, 29 May 2022 16:59:49 +0300 Subject: [PATCH 1/4] Make query key functions top-level functions Avoid redefining them every time the SQLPanel.generate_stats() method is called. --- debug_toolbar/panels/sql/panel.py | 33 +++++++++++++++---------------- 1 file changed, 16 insertions(+), 17 deletions(-) diff --git a/debug_toolbar/panels/sql/panel.py b/debug_toolbar/panels/sql/panel.py index 23f453567..6de67f4cb 100644 --- a/debug_toolbar/panels/sql/panel.py +++ b/debug_toolbar/panels/sql/panel.py @@ -48,6 +48,18 @@ def get_transaction_status_display(vendor, level): return choices.get(level) +def _similar_query_key(query): + return query["raw_sql"] + + +def _duplicate_query_key(query): + raw_params = () if query["raw_params"] is None else tuple(query["raw_params"]) + # saferepr() avoids problems because of unhashable types + # (e.g. lists) when used as dictionary keys. + # https://github.com/jazzband/django-debug-toolbar/issues/1091 + return (query["raw_sql"], saferepr(raw_params)) + + class SQLPanel(Panel): """ Panel that displays information about the SQL queries run while processing @@ -147,19 +159,6 @@ def generate_stats(self, request, response): query_similar = defaultdict(lambda: defaultdict(int)) query_duplicates = defaultdict(lambda: defaultdict(int)) - # The keys used to determine similar and duplicate queries. - def similar_key(query): - return query["raw_sql"] - - def duplicate_key(query): - raw_params = ( - () if query["raw_params"] is None else tuple(query["raw_params"]) - ) - # saferepr() avoids problems because of unhashable types - # (e.g. lists) when used as dictionary keys. - # https://github.com/jazzband/django-debug-toolbar/issues/1091 - return (query["raw_sql"], saferepr(raw_params)) - if self._queries: width_ratio_tally = 0 factor = int(256.0 / (len(self._databases) * 2.5)) @@ -181,8 +180,8 @@ def duplicate_key(query): # the last query recorded for each DB alias last_by_alias = {} for alias, query in self._queries: - query_similar[alias][similar_key(query)] += 1 - query_duplicates[alias][duplicate_key(query)] += 1 + query_similar[alias][_similar_query_key(query)] += 1 + query_duplicates[alias][_duplicate_query_key(query)] += 1 trans_id = query.get("trans_id") prev_query = last_by_alias.get(alias, {}) @@ -259,11 +258,11 @@ def duplicate_key(query): try: (query["similar_count"], query["similar_color"]) = query_similar_colors[ alias - ][similar_key(query)] + ][_similar_query_key(query)] ( query["duplicate_count"], query["duplicate_color"], - ) = query_duplicates_colors[alias][duplicate_key(query)] + ) = query_duplicates_colors[alias][_duplicate_query_key(query)] except KeyError: pass From ca1d31ce4aa2ec6ae19232ae96222376c9032375 Mon Sep 17 00:00:00 2001 From: Daniel Harding Date: Sun, 29 May 2022 17:37:30 +0300 Subject: [PATCH 2/4] Simplify similar/duplicate query recording Tweak the data structures to reduce the number of loops required, and pull common code into a function. --- debug_toolbar/panels/sql/panel.py | 74 ++++++++++++------------------- 1 file changed, 28 insertions(+), 46 deletions(-) diff --git a/debug_toolbar/panels/sql/panel.py b/debug_toolbar/panels/sql/panel.py index 6de67f4cb..0ac68d161 100644 --- a/debug_toolbar/panels/sql/panel.py +++ b/debug_toolbar/panels/sql/panel.py @@ -60,6 +60,21 @@ def _duplicate_query_key(query): return (query["raw_sql"], saferepr(raw_params)) +def _process_query_groups(query_groups, databases, colors, name): + counts = defaultdict(int) + for (alias, key), query_group in query_groups.items(): + count = len(query_group) + # Queries are similar / duplicates only if there are at least 2 of them. + if count > 1: + color = next(colors) + for query in query_group: + query[f"{name}_count"] = count + query[f"{name}_color"] = color + counts[alias] += count + for alias, db_info in databases.items(): + db_info[f"{name}_count"] = counts[alias] + + class SQLPanel(Panel): """ Panel that displays information about the SQL queries run while processing @@ -156,8 +171,8 @@ def disable_instrumentation(self): def generate_stats(self, request, response): colors = contrasting_color_generator() trace_colors = defaultdict(lambda: next(colors)) - query_similar = defaultdict(lambda: defaultdict(int)) - query_duplicates = defaultdict(lambda: defaultdict(int)) + similar_query_groups = defaultdict(list) + duplicate_query_groups = defaultdict(list) if self._queries: width_ratio_tally = 0 @@ -180,8 +195,10 @@ def generate_stats(self, request, response): # the last query recorded for each DB alias last_by_alias = {} for alias, query in self._queries: - query_similar[alias][_similar_query_key(query)] += 1 - query_duplicates[alias][_duplicate_query_key(query)] += 1 + similar_query_groups[(alias, _similar_query_key(query))].append(query) + duplicate_query_groups[(alias, _duplicate_query_key(query))].append( + query + ) trans_id = query.get("trans_id") prev_query = last_by_alias.get(alias, {}) @@ -234,48 +251,13 @@ def generate_stats(self, request, response): if final_query.get("trans_id") is not None: final_query["ends_trans"] = True - # Queries are similar / duplicates only if there's as least 2 of them. - # Also, to hide queries, we need to give all the duplicate groups an id - query_colors = contrasting_color_generator() - query_similar_colors = { - alias: { - query: (similar_count, next(query_colors)) - for query, similar_count in queries.items() - if similar_count >= 2 - } - for alias, queries in query_similar.items() - } - query_duplicates_colors = { - alias: { - query: (duplicate_count, next(query_colors)) - for query, duplicate_count in queries.items() - if duplicate_count >= 2 - } - for alias, queries in query_duplicates.items() - } - - for alias, query in self._queries: - try: - (query["similar_count"], query["similar_color"]) = query_similar_colors[ - alias - ][_similar_query_key(query)] - ( - query["duplicate_count"], - query["duplicate_color"], - ) = query_duplicates_colors[alias][_duplicate_query_key(query)] - except KeyError: - pass - - for alias, alias_info in self._databases.items(): - try: - alias_info["similar_count"] = sum( - e[0] for e in query_similar_colors[alias].values() - ) - alias_info["duplicate_count"] = sum( - e[0] for e in query_duplicates_colors[alias].values() - ) - except KeyError: - pass + group_colors = contrasting_color_generator() + _process_query_groups( + similar_query_groups, self._databases, group_colors, "similar" + ) + _process_query_groups( + duplicate_query_groups, self._databases, group_colors, "duplicate" + ) self.record_stats( { From dfaa44fe298aa07b806b3a6122882a7fec2803bb Mon Sep 17 00:00:00 2001 From: Daniel Harding Date: Mon, 30 May 2022 09:14:19 +0300 Subject: [PATCH 3/4] Avoid unnecessary use of saferepr() saferepr() is only necessary to guard against recursive data structures. However, if you pass a recursive data structure as an SQL query parameter, You're Gonna Have A Bad Time. So just use repr() which is faster. --- debug_toolbar/panels/sql/panel.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/debug_toolbar/panels/sql/panel.py b/debug_toolbar/panels/sql/panel.py index 0ac68d161..fd8a16289 100644 --- a/debug_toolbar/panels/sql/panel.py +++ b/debug_toolbar/panels/sql/panel.py @@ -1,7 +1,6 @@ import uuid from collections import defaultdict from copy import copy -from pprint import saferepr from django.db import connections from django.urls import path @@ -54,10 +53,10 @@ def _similar_query_key(query): def _duplicate_query_key(query): raw_params = () if query["raw_params"] is None else tuple(query["raw_params"]) - # saferepr() avoids problems because of unhashable types + # repr() avoids problems because of unhashable types # (e.g. lists) when used as dictionary keys. # https://github.com/jazzband/django-debug-toolbar/issues/1091 - return (query["raw_sql"], saferepr(raw_params)) + return (query["raw_sql"], repr(raw_params)) def _process_query_groups(query_groups, databases, colors, name): From 689723502c4e211ed57dbe50ce85b989f3fc391a Mon Sep 17 00:00:00 2001 From: Daniel Harding Date: Wed, 1 Jun 2022 05:34:53 +0300 Subject: [PATCH 4/4] Add a test for similar/duplicate query grouping --- tests/panels/test_sql.py | 57 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 57 insertions(+) diff --git a/tests/panels/test_sql.py b/tests/panels/test_sql.py index 40ec83dbb..55e723a9a 100644 --- a/tests/panels/test_sql.py +++ b/tests/panels/test_sql.py @@ -507,6 +507,63 @@ def test_nested_template_information(self): self.assertEqual(template_info["context"][0]["content"].strip(), "{{ users }}") self.assertEqual(template_info["context"][0]["highlight"], True) + def test_similar_and_duplicate_grouping(self): + self.assertEqual(len(self.panel._queries), 0) + + User.objects.filter(id=1).count() + User.objects.filter(id=1).count() + User.objects.filter(id=2).count() + User.objects.filter(id__lt=10).count() + User.objects.filter(id__lt=20).count() + User.objects.filter(id__gt=10, id__lt=20).count() + + response = self.panel.process_request(self.request) + self.panel.generate_stats(self.request, response) + + self.assertEqual(len(self.panel._queries), 6) + + queries = self.panel._queries + query = queries[0] + self.assertEqual(query[1]["similar_count"], 3) + self.assertEqual(query[1]["duplicate_count"], 2) + + query = queries[1] + self.assertEqual(query[1]["similar_count"], 3) + self.assertEqual(query[1]["duplicate_count"], 2) + + query = queries[2] + self.assertEqual(query[1]["similar_count"], 3) + self.assertTrue("duplicate_count" not in query[1]) + + query = queries[3] + self.assertEqual(query[1]["similar_count"], 2) + self.assertTrue("duplicate_count" not in query[1]) + + query = queries[4] + self.assertEqual(query[1]["similar_count"], 2) + self.assertTrue("duplicate_count" not in query[1]) + + query = queries[5] + self.assertTrue("similar_count" not in query[1]) + self.assertTrue("duplicate_count" not in query[1]) + + self.assertEqual(queries[0][1]["similar_color"], queries[1][1]["similar_color"]) + self.assertEqual(queries[0][1]["similar_color"], queries[2][1]["similar_color"]) + self.assertEqual( + queries[0][1]["duplicate_color"], queries[1][1]["duplicate_color"] + ) + self.assertNotEqual( + queries[0][1]["similar_color"], queries[0][1]["duplicate_color"] + ) + + self.assertEqual(queries[3][1]["similar_color"], queries[4][1]["similar_color"]) + self.assertNotEqual( + queries[0][1]["similar_color"], queries[3][1]["similar_color"] + ) + self.assertNotEqual( + queries[0][1]["duplicate_color"], queries[3][1]["similar_color"] + ) + class SQLPanelMultiDBTestCase(BaseMultiDBTestCase): panel_id = "SQLPanel"