diff --git a/did/plugins/pagure.py b/did/plugins/pagure.py index de059e5f..039d6f7a 100644 --- a/did/plugins/pagure.py +++ b/did/plugins/pagure.py @@ -22,6 +22,7 @@ """ import datetime +import re from typing import Any, Optional import requests @@ -188,17 +189,46 @@ class Comment(): def __init__(self, data, options, url): self.options = options self.date = data["date"] - self.text = data["description_mk"].replace( - 'href="', - f'href="{url.replace("/api/0", "")}').replace( - '

', - '').replace( - '

', - '') + self.base_url = url.replace("/api/0", "") + self._parse(data["description_mk"]) + + def _parse(self, description_mk): + """ Extract project, id, and title from the HTML description """ + # Extract href, title attribute, and link text from the tag + match = re.search( + r']*>([^<]*)', + description_mk) + if match: + self.href = match.group(1) + self.title = match.group(2) or '' + # Parse project and id from href + # e.g. "/rpms/munge/pull-request/10" + path_match = re.search( + r'/([^/]+(?:/[^/]+)*)/(?:pull-request|issue)/(\d+)', + self.href) + if path_match: + self.project = path_match.group(1) + self.identifier = path_match.group(2) + else: + self.project = None + self.identifier = None + else: + self.href = None + self.title = re.sub(r'<[^>]+>', '', description_mk).strip() + self.project = None + self.identifier = None def __str__(self): """ String representation """ - return f'{self.date} - {self.text}' + if self.project and self.identifier: + label = f"{self.project}#{self.identifier}" + if self.options.format == "markdown": + url = f"{self.base_url}{self.href}" + return f"[{label}]({url}) - {self.title}" + return f"{label} - {self.title}" + return f'{self.date} - {self.title}' # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # Stats @@ -270,11 +300,20 @@ def fetch(self): if activity["type"] != "commented": continue - self.stats = sorted([ + comments = [ Comment(activity, self.options, self.parent.pagure.url) for activity in activity_stats if activity["type"] == "commented" - ], key=str) + ] + # Deduplicate: keep only one entry per PR/issue + seen = set() + unique = [] + for comment in comments: + key = (comment.project, comment.identifier) + if key not in seen: + seen.add(key) + unique.append(comment) + self.stats = sorted(unique, key=str) class PullRequestsClosed(Stats): diff --git a/tests/unit/plugins/test_pagure.py b/tests/unit/plugins/test_pagure.py index f5671338..f177f557 100644 --- a/tests/unit/plugins/test_pagure.py +++ b/tests/unit/plugins/test_pagure.py @@ -96,11 +96,11 @@ def test_pagure_comments(): did.base.Config(CONFIG) option = "--pagure-commented " stats = did.cli.main(option + INTERVAL)[0][0].stats[0].stats[COMMENTS].stats - assert any("2018-11-26 - psss commented on PR" in str(stat) for stat in stats) + assert any("did#2 - Closed Issue" in str(stat) for stat in stats) stats = did.cli.main(option + BEFORE)[0][0].stats[0].stats[COMMENTS].stats - assert any("2018-11-22 - psss commented on PR" in str(stat) for stat in stats) + assert any("fedora-ci/messages#16" in str(stat) for stat in stats) stats = did.cli.main(option + AFTER)[0][0].stats[0].stats[COMMENTS].stats - assert any("2018-11-27 - psss commented on issue" in str(stat) for stat in stats) + assert any("fedora-ci/metadata#3" in str(stat) for stat in stats) def test_pagure_missing_url(caplog: LogCaptureFixture):