Skip to content

Commit b9992cd

Browse files
Refactor metric extraction and clean up unused code
Replaced calls to extract_consensus_metrics_from_discussion with direct use of _extract_metrics_from_text in consensus.py and removed the now-unused extract_consensus_metrics_from_discussion function. Cleaned up debug logging in providers/grok.py and removed unused metadata handling in utils.py. Fixed cache format count key from 'v1.0' to '1.0' in utils.py.
1 parent 30fe49d commit b9992cd

3 files changed

Lines changed: 6 additions & 41 deletions

File tree

python/mllmcelltype/consensus.py

Lines changed: 5 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -666,13 +666,11 @@ def process_controversial_clusters(
666666
# Previously had consensus indicators check here, now using metrics extraction
667667

668668
# Extract consensus proportion and entropy values for the current round
669-
cp_value, h_value = extract_consensus_metrics_from_discussion(response)
669+
cp_value, h_value = _extract_metrics_from_text(response)[:2]
670670

671671
# If unable to extract from discussion, try to extract from consensus check response
672672
if cp_value is None or h_value is None:
673-
cp_value, h_value = extract_consensus_metrics_from_discussion(
674-
consensus_response
675-
)
673+
cp_value, h_value = _extract_metrics_from_text(consensus_response)[:2]
676674

677675
# If still unable to extract, use default values
678676
if cp_value is None:
@@ -710,7 +708,7 @@ def process_controversial_clusters(
710708
)
711709

712710
# Extract CP and H from the discussion if available
713-
cp_value, h_value = extract_consensus_metrics_from_discussion(response)
711+
cp_value, h_value = _extract_metrics_from_text(response)[:2]
714712
if cp_value is not None and h_value is not None:
715713
updated_consensus_proportion[cluster_id] = cp_value
716714
updated_entropy[cluster_id] = h_value
@@ -788,7 +786,7 @@ def process_controversial_clusters(
788786
# if available
789787
if rounds_history:
790788
last_round = rounds_history[-1]
791-
cp_value, h_value = extract_consensus_metrics_from_discussion(last_round)
789+
cp_value, h_value = _extract_metrics_from_text(last_round)[:2]
792790
if cp_value is not None and h_value is not None:
793791
updated_consensus_proportion[cluster_id] = cp_value
794792
updated_entropy[cluster_id] = h_value
@@ -806,7 +804,7 @@ def process_controversial_clusters(
806804
# Extract metrics from the last round if available
807805
if cluster_id not in updated_consensus_proportion and rounds_history:
808806
last_round = rounds_history[-1]
809-
cp_value, h_value = extract_consensus_metrics_from_discussion(last_round)
807+
cp_value, h_value = _extract_metrics_from_text(last_round)[:2]
810808
if cp_value is not None and h_value is not None:
811809
updated_consensus_proportion[cluster_id] = cp_value
812810
updated_entropy[cluster_id] = h_value
@@ -835,22 +833,6 @@ def process_controversial_clusters(
835833
return results, discussion_history, updated_consensus_proportion, updated_entropy
836834

837835

838-
def extract_consensus_metrics_from_discussion(
839-
discussion: str,
840-
) -> tuple[Optional[float], Optional[float]]:
841-
"""Extract consensus proportion (CP) and entropy (H) values from discussion text.
842-
843-
Args:
844-
discussion: Text of the model discussion
845-
846-
Returns:
847-
tuple[Optional[float], Optional[float]]: Extracted CP and H values, or None if not found
848-
849-
"""
850-
cp, h, _ = _extract_metrics_from_text(discussion)
851-
return cp, h
852-
853-
854836
def extract_cell_type_from_discussion(discussion: str) -> Optional[str]:
855837
"""Extract the final cell type determination from a discussion.
856838
@@ -1001,7 +983,6 @@ def interactive_consensus_annotation(
1001983
raise ValueError(error_msg)
1002984

1003985
# Filter marker_genes to only include specified clusters
1004-
original_marker_genes = marker_genes.copy()
1005986
marker_genes = {cluster_id: marker_genes[cluster_id] for cluster_id in valid_clusters}
1006987

1007988
# Log the filtering

python/mllmcelltype/providers/grok.py

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -45,9 +45,6 @@ def process_grok(
4545

4646
write_log(f"Using model: {model}")
4747

48-
# Process all input at once
49-
write_log("Processing input in 1 chunk", level="debug")
50-
5148
# Prepare the request body
5249
body = {"model": model, "messages": [{"role": "user", "content": prompt}]}
5350

@@ -85,8 +82,6 @@ def process_grok(
8582
write_log(f"Got response with {len(res)} lines")
8683
write_log(f"Raw response from Grok:\n{res}", level="debug")
8784

88-
# Success, exit retry loop
89-
write_log("All chunks processed successfully", level="debug")
9085
# Clean up results (remove commas at the end of lines)
9186
return [line.rstrip(",") for line in res]
9287

python/mllmcelltype/utils.py

Lines changed: 1 addition & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -321,23 +321,12 @@ def format_results(results: list[str], clusters: list[str]) -> dict[str, str]:
321321
# Extract annotations from JSON structure
322322
if "annotations" in data and isinstance(data["annotations"], list):
323323
json_result = {}
324-
metadata = {}
325324

326325
for annotation in data["annotations"]:
327326
if "cluster" in annotation and "cell_type" in annotation:
328327
cluster_id = annotation["cluster"]
329328
json_result[cluster_id] = annotation["cell_type"]
330329

331-
# Store additional metadata if available
332-
cluster_metadata = {}
333-
if "confidence" in annotation:
334-
cluster_metadata["confidence"] = annotation["confidence"]
335-
if "key_markers" in annotation:
336-
cluster_metadata["key_markers"] = annotation["key_markers"]
337-
338-
if cluster_metadata:
339-
metadata[cluster_id] = cluster_metadata
340-
341330
# If we found annotations for all clusters, return the result
342331
if len(json_result) == len(clusters):
343332
write_log("Successfully parsed JSON response", level="info")
@@ -699,7 +688,7 @@ def get_cache_stats(cache_dir: Optional[str] = None) -> dict[str, Any]:
699688
oldest = float("inf")
700689
newest = 0
701690
provider_counts = {}
702-
format_counts = {"legacy": 0, "v1.0": 0, "unknown": 0}
691+
format_counts = {"legacy": 0, "1.0": 0, "unknown": 0}
703692
valid_files = 0
704693
invalid_files = 0
705694

0 commit comments

Comments
 (0)