From eab64f53e187e7968cc5f9e4250d2fb6e397ae6d Mon Sep 17 00:00:00 2001
From: Nathaniel Travis <nathaniel.travis@ziprecruiter.com>
Date: Mon, 24 Mar 2025 15:15:33 -0700
Subject: [PATCH 1/7] handle case where not all metrics present

---
 flair/models/multitask_model.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/flair/models/multitask_model.py b/flair/models/multitask_model.py
index a20ad01ac7..0f1fc1db9c 100644
--- a/flair/models/multitask_model.py
+++ b/flair/models/multitask_model.py
@@ -198,7 +198,8 @@ def evaluate(  # type: ignore[override]
             # Add metrics so they will be available to _publish_eval_result.
             for avg_type in ("micro avg", "macro avg"):
                 for metric_type in ("f1-score", "precision", "recall"):
-                    scores[(task_id, avg_type, metric_type)] = result.classification_report[avg_type][metric_type]
+                    if result.classification_report.get(avg_type) and result.classification_report["avg_type"].get(metric_type):
+                        scores[(task_id, avg_type, metric_type)] = result.classification_report[avg_type][metric_type]
 
         scores["loss"] = loss.item() / len(batch_split)
 

From 699377cadccedc86f3c20e267aa5d70814f6d3c5 Mon Sep 17 00:00:00 2001
From: Nathaniel Travis <nathaniel.travis@ziprecruiter.com>
Date: Mon, 24 Mar 2025 16:09:33 -0700
Subject: [PATCH 2/7] small change

---
 flair/models/multitask_model.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/flair/models/multitask_model.py b/flair/models/multitask_model.py
index 0f1fc1db9c..77d7e45452 100644
--- a/flair/models/multitask_model.py
+++ b/flair/models/multitask_model.py
@@ -198,7 +198,7 @@ def evaluate(  # type: ignore[override]
             # Add metrics so they will be available to _publish_eval_result.
             for avg_type in ("micro avg", "macro avg"):
                 for metric_type in ("f1-score", "precision", "recall"):
-                    if result.classification_report.get(avg_type) and result.classification_report["avg_type"].get(metric_type):
+                    if result.classification_report.get(avg_type, None) and result.classification_report["avg_type"].get(metric_type, None):
                         scores[(task_id, avg_type, metric_type)] = result.classification_report[avg_type][metric_type]
 
         scores["loss"] = loss.item() / len(batch_split)

From c2e7e7715ceca9a1321d34e39ff8dca69c299f93 Mon Sep 17 00:00:00 2001
From: Nathaniel Travis <nathaniel.travis@ziprecruiter.com>
Date: Mon, 24 Mar 2025 16:12:43 -0700
Subject: [PATCH 3/7] real fix

---
 flair/models/multitask_model.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/flair/models/multitask_model.py b/flair/models/multitask_model.py
index 77d7e45452..d9f3b69f46 100644
--- a/flair/models/multitask_model.py
+++ b/flair/models/multitask_model.py
@@ -198,7 +198,7 @@ def evaluate(  # type: ignore[override]
             # Add metrics so they will be available to _publish_eval_result.
             for avg_type in ("micro avg", "macro avg"):
                 for metric_type in ("f1-score", "precision", "recall"):
-                    if result.classification_report.get(avg_type, None) and result.classification_report["avg_type"].get(metric_type, None):
+                    if result.classification_report.get(avg_type) and result.classification_report[avg_type].get(metric_type):
                         scores[(task_id, avg_type, metric_type)] = result.classification_report[avg_type][metric_type]
 
         scores["loss"] = loss.item() / len(batch_split)

From bf608f0926fde28b8799d44a71ad415afa560ac5 Mon Sep 17 00:00:00 2001
From: Nathaniel Travis <nathaniel.travis@ziprecruiter.com>
Date: Mon, 24 Mar 2025 16:53:07 -0700
Subject: [PATCH 4/7] formatting

---
 flair/models/multitask_model.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/flair/models/multitask_model.py b/flair/models/multitask_model.py
index d9f3b69f46..6839783991 100644
--- a/flair/models/multitask_model.py
+++ b/flair/models/multitask_model.py
@@ -198,7 +198,8 @@ def evaluate(  # type: ignore[override]
             # Add metrics so they will be available to _publish_eval_result.
             for avg_type in ("micro avg", "macro avg"):
                 for metric_type in ("f1-score", "precision", "recall"):
-                    if result.classification_report.get(avg_type) and result.classification_report[avg_type].get(metric_type):
+                    if (result.classification_report.get(avg_type) and 
+                        result.classification_report[avg_type].get(metric_type)):
                         scores[(task_id, avg_type, metric_type)] = result.classification_report[avg_type][metric_type]
 
         scores["loss"] = loss.item() / len(batch_split)

From f3ea87dca07d2aaa36349ba7439f15e5cd300c06 Mon Sep 17 00:00:00 2001
From: Nathaniel Travis <nathaniel.travis@ziprecruiter.com>
Date: Mon, 24 Mar 2025 17:11:29 -0700
Subject: [PATCH 5/7] black formatting

---
 flair/models/multitask_model.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/flair/models/multitask_model.py b/flair/models/multitask_model.py
index 6839783991..56d9a6afd7 100644
--- a/flair/models/multitask_model.py
+++ b/flair/models/multitask_model.py
@@ -198,8 +198,9 @@ def evaluate(  # type: ignore[override]
             # Add metrics so they will be available to _publish_eval_result.
             for avg_type in ("micro avg", "macro avg"):
                 for metric_type in ("f1-score", "precision", "recall"):
-                    if (result.classification_report.get(avg_type) and 
-                        result.classification_report[avg_type].get(metric_type)):
+                    if result.classification_report.get(avg_type) and result.classification_report[avg_type].get(
+                        metric_type
+                    ):
                         scores[(task_id, avg_type, metric_type)] = result.classification_report[avg_type][metric_type]
 
         scores["loss"] = loss.item() / len(batch_split)

From aa375f5af4f9a330feadef419dbc89527145a171 Mon Sep 17 00:00:00 2001
From: Nathaniel Travis <nathaniel.travis@ziprecruiter.com>
Date: Tue, 25 Mar 2025 09:07:33 -0700
Subject: [PATCH 6/7] add mse for regression tasks

---
 flair/models/multitask_model.py | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/flair/models/multitask_model.py b/flair/models/multitask_model.py
index 56d9a6afd7..371fb59f3f 100644
--- a/flair/models/multitask_model.py
+++ b/flair/models/multitask_model.py
@@ -203,6 +203,11 @@ def evaluate(  # type: ignore[override]
                     ):
                         scores[(task_id, avg_type, metric_type)] = result.classification_report[avg_type][metric_type]
 
+            # The above metrics only apply to classification tasks. For regression
+            # tasks add mse.
+            if result.scores.get("mse"):
+                scores[(task_id, "mse")] = result.scores["mse"]
+
         scores["loss"] = loss.item() / len(batch_split)
 
         return Result(

From 6e1e47d5015ec7c51d1dbb6441f5e777d2942169 Mon Sep 17 00:00:00 2001
From: Nathaniel Travis <nathaniel.travis@ziprecruiter.com>
Date: Tue, 25 Mar 2025 12:05:07 -0700
Subject: [PATCH 7/7] add other regression metrics

---
 flair/models/multitask_model.py | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/flair/models/multitask_model.py b/flair/models/multitask_model.py
index 371fb59f3f..8e953e757b 100644
--- a/flair/models/multitask_model.py
+++ b/flair/models/multitask_model.py
@@ -203,10 +203,11 @@ def evaluate(  # type: ignore[override]
                     ):
                         scores[(task_id, avg_type, metric_type)] = result.classification_report[avg_type][metric_type]
 
-            # The above metrics only apply to classification tasks. For regression
-            # tasks add mse.
-            if result.scores.get("mse"):
-                scores[(task_id, "mse")] = result.scores["mse"]
+            # The above metrics only apply to classification tasks. This adds
+            # regression metrics also.
+            for metric_type in ("mse", "mae", "pearson", "spearman"):
+                if result.scores.get(metric_type):
+                    scores[(task_id, metric_type)] = result.scores[metric_type]
 
         scores["loss"] = loss.item() / len(batch_split)