Addressing coderabbit comments

mazam-lab · mazam-lab · commit 88ad308b60ed · 2025-10-13T14:47:41.000-04:00
diff --git a/examples/notebooks/memory_estimator_example.ipynb b/examples/notebooks/memory_estimator_example.ipynb
@@ -73,13 +73,13 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 24,
+   "execution_count": null,
    "id": "70462895",
    "metadata": {},
    "outputs": [],
    "source": [
     "num_gpus = 2\n",
-    "gpu_memory = 48 * (2**30) # 80 GB in bytes"
+    "gpu_memory = 48 * (2**30) # 48 GB in bytes"
    ]
   },
   {
diff --git a/src/training_hub/profiling/memory_estimator.py b/src/training_hub/profiling/memory_estimator.py
@@ -1,8 +1,5 @@
-from click import FLOAT
-import torch 
-from typing import Callable, Optional, override
+from typing import override
 from transformers import AutoModel
-from transformers.models.perceiver.modeling_perceiver import PerceiverMultimodalPreprocessor
 
 """
 Code assisted by Cursor/Claude4
@@ -14,8 +11,8 @@
 FLOAT8_BYTES_N: int = 1
 ADAMW_PARAMS_N: int = 2
 
-# Helper lambda to do the rounding when printing 
-ROUNDER = lambda value : str(round(value / 1073741824, 1))
+# Helper function to do the rounding when printing 
+def ROUNDER(value: int) -> str: return str(round(value / 1073741824, 1))
 
 class BasicEstimator:
     """
@@ -43,11 +40,12 @@ def __init__(
         num_gpus: int = 8,
         gpu_memory: int = 85899345920,
         model_path: str = "ibm-granite/granite-3.3-8b-instruct",
-        effective_batch_size: int = None,
-        max_seq_len: int = None,
-        max_tokens_per_gpu: int = None,
+        effective_batch_size: int | None = None,
+        max_seq_len: int | None = None,
+        max_tokens_per_gpu: int | None = None,
         use_liger: bool = False,
         verbose: int = 1,
+        trust_remote_code: bool = False,
     ):
         self.num_gpus = num_gpus
         self.gpu_memory = gpu_memory
@@ -56,7 +54,7 @@ def __init__(
         self.verbose = verbose
         
         # Load model directly
-        self.model = AutoModel.from_pretrained(model_path, trust_remote_code=True)
+        self.model = AutoModel.from_pretrained(model_path, trust_remote_code=trust_remote_code)
 
         # Determine parameters needed for calculations
         self.num_params: int = self.model.num_parameters(only_trainable=False)
@@ -278,9 +276,9 @@ def __init__(
         num_gpus: int = 8,
         gpu_memory: int = 85899345920,
         model_path: str = "ibm-granite/granite-3.3-8b-instruct",
-        effective_batch_size: int = None,
-        max_seq_len: int = None,
-        max_tokens_per_gpu: int = None,
+        effective_batch_size: int | None = None,
+        max_seq_len: int | None = None,
+        max_tokens_per_gpu: int | None = None,
         use_liger: bool = False,
         verbose: int = 1,
     ):
@@ -348,11 +346,12 @@ def estimate(
         num_gpus: int = 8,
         gpu_memory: int = 85899345920,
         model_path: str = "ibm-granite/granite-3.3-8b-instruct",
-        effective_batch_size: int = None,
-        max_seq_len: int = None,
-        max_tokens_per_gpu: int = None,
+        effective_batch_size: int | None = None,
+        max_seq_len: int | None = None,
+        max_tokens_per_gpu: int | None = None,
         use_liger: bool = False,
         verbose: int = 1,
+        trust_remote_code: bool = False
     ):
     """
     Convenience function for performing estimation
@@ -383,7 +382,25 @@ def estimate(
     """
 
     if training_method.lower() == "osft":
-        estimator = OSFTEstimator(num_gpus, gpu_memory, model_path, effective_batch_size, max_seq_len, max_tokens_per_gpu, use_liger, verbose)
+        estimator = OSFTEstimator(num_gpus,
+                                    gpu_memory,
+                                    model_path,
+                                    effective_batch_size,
+                                    max_seq_len,
+                                    max_tokens_per_gpu,
+                                    use_liger,
+                                    verbose,
+                                    trust_remote_code,
+                                )
     else:
-        estimator = BasicEstimator(num_gpus, gpu_memory, model_path, effective_batch_size, max_seq_len, max_tokens_per_gpu, use_liger, verbose)
+        estimator = BasicEstimator(num_gpus,
+                                    gpu_memory,
+                                    model_path,
+                                    effective_batch_size,
+                                    max_seq_len,
+                                    max_tokens_per_gpu,
+                                    use_liger,
+                                    verbose, 
+                                    trust_remote_code
+                                )
     return estimator.estimate()

Original file line number	Diff line number	Diff line change
`@@ -73,13 +73,13 @@`
`73`	`73`	`},`
`74`	`74`	`{`
`75`	`75`	`"cell_type": "code",`
`76`		`- "execution_count": 24,`
	`76`	`+ "execution_count": null,`
`77`	`77`	`"id": "70462895",`
`78`	`78`	`"metadata": {},`
`79`	`79`	`"outputs": [],`
`80`	`80`	`"source": [`
`81`	`81`	`"num_gpus = 2\n",`
`82`		`- "gpu_memory = 48 * (2**30) # 80 GB in bytes"`
	`82`	`+ "gpu_memory = 48 * (2**30) # 48 GB in bytes"`
`83`	`83`	`]`
`84`	`84`	`},`
`85`	`85`	`{`