fix: mmlusr_answer_only_anatomy task

christinaexyou · christinaexyou · commit 998c7ae5d151 · 2025-09-09T10:51:53.000-04:00
diff --git a/lm_eval/tasks/mmlusr/answer_only/_mmlusr_a_yml b/lm_eval/tasks/mmlusr/answer_only/_mmlusr_a_yml
@@ -4,10 +4,10 @@ fewshot_split: train
 fewshot_config:
   sampler: first_n
 output_type: multiple_choice
-process_docs: !function utils.process_docs
-doc_to_text: "{{question.strip()}}\nA. {{choices[0]}}\nB. {{choices[1]}}\nC. {{choices[2]}}\nD. {{choices[3]}}\nAnswer:"
-doc_to_choice: ["A", "B", "C", "D"]
-doc_to_target: answer
+# process_docs will be overridden by individual tasks
+doc_to_text: !function utils.doc_to_text
+doc_to_choice: !function utils.doc_to_choice
+doc_to_target: !function utils.doc_to_target
 metric_list:
   - metric: acc
     aggregation: mean
diff --git a/lm_eval/tasks/mmlusr/answer_only/answer_only_anatomy.yaml b/lm_eval/tasks/mmlusr/answer_only/answer_only_anatomy.yaml
@@ -1,7 +1,8 @@
-"dataset_name": "answer_only_anatomy"
+"dataset_name": "answer_only"
 "description": "The following are multiple choice questions (with answers) about anatomy.\n\
   \n"
 "tag": "mmlusr_answer_only_stem_tasks"
 "include": "_mmlusr_a_yml"
 "task": "mmlusr_answer_only_anatomy"
 "task_alias": "anatomy"
+process_docs: !function utils.process_docs_anatomy
diff --git a/lm_eval/tasks/mmlusr/answer_only/utils.py b/lm_eval/tasks/mmlusr/answer_only/utils.py
@@ -1,19 +1,80 @@
 import datasets
 
 
+# Subject boundaries for MMLU-SR answer_only configuration
+# Based on empirical analysis of the dataset structure
+SUBJECT_BOUNDARIES = {
+    "anatomy": (100, 228),  # 129 questions
+    # Add other subjects as needed
+}
+
+
+def filter_by_subject(dataset: datasets.Dataset, subject: str) -> datasets.Dataset:
+    """Filter dataset to only include questions for a specific subject."""
+    if subject not in SUBJECT_BOUNDARIES:
+        raise ValueError(f"Unknown subject: {subject}. Available subjects: {list(SUBJECT_BOUNDARIES.keys())}")
+
+    start_idx, end_idx = SUBJECT_BOUNDARIES[subject]
+
+    # Handle different split sizes - boundaries are for test split (13985 rows)
+    # If this is a smaller split (like train), scale the boundaries proportionally
+    dataset_size = len(dataset)
+    if dataset_size != 13985:  # Not the full test split
+        # Scale boundaries proportionally
+        scale_factor = dataset_size / 13985
+        start_idx = int(start_idx * scale_factor)
+        end_idx = int(end_idx * scale_factor)
+
+    # Ensure we don't go out of bounds
+    end_idx = min(end_idx, dataset_size - 1)
+
+    if start_idx >= dataset_size:
+        # Return empty dataset if boundaries are completely out of range
+        return dataset.select([])
+
+    return dataset.select(range(start_idx, end_idx + 1))
+
+
 def process_docs(dataset: datasets.Dataset) -> datasets.Dataset:
     def _helper(doc):
-        # Assuming that the 'answer' field in the dataset now contains numbers 0-3 instead of 'A', 'B', 'C', 'D'
-        answer_list = ["A", "B", "C", "D"]
-        # Convert numeric index to corresponding letter
-        answer_index = int(doc["answer"])  # Make sure the answer is an integer
-        answer_letter = answer_list[answer_index]
-
+        # Map generic column names to expected format
+        # column_0: question, column_1-4: choices, column_5: answer
         out_doc = {
-            "questions": doc["question"],
-            "choices": [doc["choice1"], doc["choice2"], doc["choice3"], doc["choice4"]],
-            "answer": answer_letter,  # Include the letter for clarity
+            "questions": doc["column_0"],
+            "choices": [doc["column_1"], doc["column_2"], doc["column_3"], doc["column_4"]],
+            "answer": doc["column_5"],  # Already in letter format (A, B, C, D)
         }
         return out_doc
 
     return dataset.map(_helper)
+
+
+def doc_to_text(doc):
+    return doc["questions"].strip()
+
+
+def doc_to_target(doc):
+    return doc["answer"]
+
+
+def doc_to_choice(doc):
+    return ["A", "B", "C", "D"]
+
+
+def process_docs_anatomy(dataset: datasets.Dataset) -> datasets.Dataset:
+    """Process docs specifically for anatomy questions - filter and format."""
+    # First filter to get only anatomy questions
+    filtered_dataset = filter_by_subject(dataset, "anatomy")
+
+    # Then apply the standard processing
+    def _helper(doc):
+        # Map generic column names to expected format
+        # column_0: question, column_1-4: choices, column_5: answer
+        out_doc = {
+            "questions": doc["column_0"],
+            "choices": [doc["column_1"], doc["column_2"], doc["column_3"], doc["column_4"]],
+            "answer": doc["column_5"],  # Already in letter format (A, B, C, D)
+        }
+        return out_doc
+
+    return filtered_dataset.map(_helper)