@@ -38,10 +38,14 @@ def handle(self, *args, **options):
3838 # These shouldn't even exist, but we have quite a few rows like that
3939 # anyway. Since they're most likely just broken reports, we don't care.
4040 result = client .query_and_wait (
41- f"""SELECT r.*, t.language_code, t.translated_text
41+ f"""SELECT
42+ r.*, t.language_code, t.translated_text,
43+ c.label as ml_label, c.probability as ml_probability
4244 FROM `{ settings .BIGQUERY_TABLE } ` as r
4345 LEFT JOIN `{ settings .BIGQUERY_TRANSLATIONS_TABLE } ` t
4446 ON r.uuid = t.report_uuid
47+ LEFT JOIN `{ settings .BIGQUERY_CLASSIFICATION_TABLE } ` c
48+ ON r.uuid = c.report_uuid
4549 WHERE r.url IS NOT NULL
4650 AND r.comments IS NOT NULL
4751 AND r.reported_at >= @since;""" ,
@@ -53,6 +57,20 @@ def handle(self, *args, **options):
5357 )
5458
5559 for row in result :
60+ # The BugBot ML prediction can assign two labels, invalid or valid,
61+ # with a probability between 0 and 1. Having two labels makes
62+ # filtering and sorting harder, so let's transform "invalid 95%"
63+ # into "valid 5%".
64+ # There is a rare chance that a bug will have no score. In this case,
65+ # we just assign None, which will get treated as invalid in the
66+ # frontend.
67+ ml_valid_probability = None
68+ match row .ml_label :
69+ case "invalid" :
70+ ml_valid_probability = 1 - row .ml_probability
71+ case "valid" :
72+ ml_valid_probability = row .ml_probability
73+
5674 report_obj = Report (
5775 app_name = row .app_name ,
5876 app_channel = row .app_channel ,
@@ -66,6 +84,7 @@ def handle(self, *args, **options):
6684 url = urlsplit (row .url ),
6785 os = row .os ,
6886 uuid = row .uuid ,
87+ ml_valid_probability = ml_valid_probability ,
6988 )
7089 with suppress (IntegrityError ):
7190 ReportEntry .objects .create_from_report (report_obj )
0 commit comments