👔 Create expected outputs log in longitudinal template flow

shnizzedy · shnizzedy · commit d087351f207a · 2025-04-07T15:06:25.000-04:00
diff --git a/CPAC/pipeline/cpac_pipeline.py b/CPAC/pipeline/cpac_pipeline.py
@@ -29,7 +29,6 @@
 
 import yaml
 import nipype
-from nipype import config, logging
 from flowdump import save_workflow_json, WorkflowJSONMeta
 from indi_aws import aws_utils, fetch_creds
 
@@ -198,6 +197,7 @@
 from CPAC.utils.monitoring import (
     FMLOGGER,
     getLogger,
+    init_loggers,
     log_nodes_cb,
     log_nodes_initial,
     LOGTAIL,
@@ -221,11 +221,11 @@
 
 
 def run_workflow(
-    sub_dict,
-    c,
-    run,
-    pipeline_timing_info=None,
-    p_name=None,
+    sub_dict: dict,
+    c: Configuration,
+    run: bool,
+    pipeline_timing_info: Optional[list] = None,
+    p_name: Optional[str] = None,
     plugin="MultiProc",
     plugin_args=None,
     test_config=False,
@@ -256,8 +256,6 @@ def run_workflow(
        0 for success
        1 for general failure
     """
-    from CPAC.utils.datasource import bidsier_prefix
-
     if plugin is not None and not isinstance(plugin, str):
         msg = (
             'CPAC.pipeline.cpac_pipeline.run_workflow requires a '
@@ -273,36 +271,7 @@ def run_workflow(
     subject_id, p_name, log_dir = set_subject(sub_dict, c)
     c["subject_id"] = subject_id
 
-    set_up_logger(
-        f"{subject_id}_expectedOutputs",
-        filename=f'{bidsier_prefix(c["subject_id"])}_' 'expectedOutputs.yml',
-        level="info",
-        log_dir=log_dir,
-        mock=True,
-        overwrite_existing=True,
-    )
-    if c.pipeline_setup["Debugging"]["verbose"]:
-        set_up_logger("CPAC.engine", level="debug", log_dir=log_dir, mock=True)
-
-    config.update_config(
-        {
-            "logging": {
-                "log_directory": log_dir,
-                "log_to_file": bool(
-                    getattr(c.pipeline_setup["log_directory"], "run_logging", True)
-                ),
-            },
-            "execution": {
-                "crashfile_format": "txt",
-                "resource_monitor_frequency": 0.2,
-                "stop_on_first_crash": c[
-                    "pipeline_setup", "system_config", "fail_fast"
-                ],
-            },
-        }
-    )
-    config.enable_resource_monitor()
-    logging.update_logging(config)
+    init_loggers(subject_id, c, log_dir, mock=True, longitudinal=False)
 
     # Start timing here
     pipeline_start_time = time.time()
@@ -555,6 +524,7 @@ def run_workflow(
 
     workflow_result = None
     exitcode = 0
+    cb_log_filename = os.path.join(log_dir, "callback.log")
     try:
         subject_info["resource_pool"] = []
 
@@ -566,8 +536,6 @@ def run_workflow(
         subject_info["status"] = "Running"
 
         # Create callback logger
-        cb_log_filename = os.path.join(log_dir, "callback.log")
-
         try:
             if not os.path.exists(os.path.dirname(cb_log_filename)):
                 os.makedirs(os.path.dirname(cb_log_filename))
@@ -598,8 +566,9 @@ def run_workflow(
             plugin = MultiProcPlugin(plugin_args)
 
         try:
-            # Actually run the pipeline now, for the current subject
-            workflow_result = workflow.run(plugin=plugin, plugin_args=plugin_args)
+            if run:
+                # Actually run the pipeline now, for the current subject
+                workflow_result = workflow.run(plugin=plugin, plugin_args=plugin_args)
         except UnicodeDecodeError:
             msg = (
                 "C-PAC migrated from Python 2 to Python 3 in v1.6.2 (see "
@@ -814,7 +783,7 @@ def run_workflow(
                     run_start=pipeline_start_datetime,
                     run_finish=strftime("%Y-%m-%d %H:%M:%S"),
                     output_check=check_outputs(
-                        c.pipeline_setup["output_directory"]["path"],
+                        c["pipeline_setup"]["output_directory"]["path"],
                         log_dir,
                         c.pipeline_setup["pipeline_name"],
                         c["subject_id"],
@@ -1073,14 +1042,6 @@ def build_T1w_registration_stack(
     space: Literal["longitudinal", "T1w"] = "T1w",
 ):
     """Build the T1w registration pipeline blocks."""
-    # if space == "longitudinal":
-    #     for using in cfg[
-    #         "registration_workflows", "anatomical_registration", "registration", "using"
-    #     ]:
-    #         if using.lower() != "fsl":
-    #             msg = f"{using} anatomical registration not yet implemented for longitudinal workflows."
-    #             raise NotImplementedError(msg)
-
     if not pipeline_blocks:
         pipeline_blocks = []
 
diff --git a/CPAC/pipeline/cpac_runner.py b/CPAC/pipeline/cpac_runner.py
@@ -29,11 +29,11 @@
 from CPAC.utils.configuration import check_pname, Configuration, set_subject
 from CPAC.utils.configuration.yaml_template import upgrade_pipeline_to_1_8
 from CPAC.utils.ga import track_run
-from CPAC.utils.monitoring import failed_to_start, log_nodes_cb, WFLOGGER
+from CPAC.utils.monitoring import failed_to_start, init_loggers, log_nodes_cb, WFLOGGER
 
 
-# Run condor jobs
 def run_condor_jobs(c, config_file, subject_list_file, p_name):
+    """Run condor jobs."""
     # Import packages
     import subprocess
     from time import strftime
@@ -249,6 +249,8 @@ def run_T1w_longitudinal(sublist, cfg: Configuration, dry_run: bool = False):
     # sessions for each participant as value
     for subject_id, sub_list in subject_id_dict.items():
         if len(sub_list) > 1:
+            _, _, log_dir = set_subject(subject_id_dict, cfg)
+            init_loggers(subject_id, cfg, log_dir, mock=True, longitudinal=True)
             anat_longitudinal_wf(subject_id, sub_list, cfg, dry_run=dry_run)
         elif len(sub_list) == 1:
             warnings.warn(
@@ -491,161 +493,10 @@ def run(
         """
 
         # BEGIN LONGITUDINAL TEMPLATE PIPELINE
-        if (
-            hasattr(c, "longitudinal_template_generation")
-            and c.longitudinal_template_generation["run"]
-        ):
+        if c["longitudinal_template_generation", "run"]:
             run_T1w_longitudinal(sublist, c, dry_run=test_config)
             # TODO functional longitudinal pipeline
 
-        """
-            if valid_longitudinal_data:
-                rsc_file_list = []
-                for dirpath, dirnames, filenames in os.walk(c.pipeline_setup[
-                                                                'output_directory']['path']):
-                    for f in filenames:
-                        # TODO is there a better way to check output folder name?
-                        if f != '.DS_Store' and 'T1w_longitudinal_pipeline' in dirpath:
-                            rsc_file_list.append(os.path.join(dirpath, f))
-
-                subject_specific_dict = {subj: [] for subj in subject_id_dict.keys()}
-                session_specific_dict = {os.path.join(session['subject_id'], session['unique_id']): [] for session in sublist}
-                for rsc_path in rsc_file_list:
-                    key = [s for s in session_specific_dict.keys() if s in rsc_path]
-                    if key:
-                        session_specific_dict[key[0]].append(rsc_path)
-                    else:
-                        subj = [s for s in subject_specific_dict.keys() if s in rsc_path]
-                        if subj:
-                            subject_specific_dict[subj[0]].append(rsc_path)
-
-                # update individual-specific outputs:
-                # anatomical_brain, anatomical_brain_mask and anatomical_reorient
-                for key in session_specific_dict.keys():
-                    for f in session_specific_dict[key]:
-                        sub, ses = key.split('/')
-                        ses_list = [subj for subj in sublist if sub in subj['subject_id'] and ses in subj['unique_id']]
-                        if len(ses_list) > 1:
-                            raise Exception("There are several files containing " + f)
-                        if len(ses_list) == 1:
-                            ses = ses_list[0]
-                            subj_id = ses['subject_id']
-                            tmp = f.split(c.pipeline_setup['output_directory']['path'])[-1]
-                            keys = tmp.split(os.sep)
-                            if keys[0] == '':
-                                keys = keys[1:]
-                            if len(keys) > 1:
-                                if ses.get('resource_pool') is None:
-                                    ses['resource_pool'] = {
-                                        keys[0].split(c.pipeline_setup['pipeline_name'] + '_')[-1]: {
-                                            keys[-2]: f
-                                        }
-                                    }
-                                else:
-                                    strat_key = keys[0].split(c.pipeline_setup['pipeline_name'] + '_')[-1]
-                                    if ses['resource_pool'].get(strat_key) is None:
-                                        ses['resource_pool'].update({
-                                            strat_key: {
-                                                keys[-2]: f
-                                            }
-                                        })
-                                    else:
-                                        ses['resource_pool'][strat_key].update({
-                                                keys[-2]: f
-                                            })
-
-                for key in subject_specific_dict:
-                    for f in subject_specific_dict[key]:
-                        ses_list = [subj for subj in sublist if key in subj['anat']]
-                        for ses in ses_list:
-                            tmp = f.split(c.pipeline_setup['output_directory']['path'])[-1]
-                            keys = tmp.split(os.sep)
-                            if keys[0] == '':
-                                keys = keys[1:]
-                            if len(keys) > 1:
-                                if ses.get('resource_pool') is None:
-                                    ses['resource_pool'] = {
-                                        keys[0].split(c.pipeline_setup['pipeline_name'] + '_')[-1]: {
-                                            keys[-2]: f
-                                        }
-                                    }
-                                else:
-                                    strat_key = keys[0].split(c.pipeline_setup['pipeline_name'] + '_')[-1]
-                                    if ses['resource_pool'].get(strat_key) is None:
-                                        ses['resource_pool'].update({
-                                            strat_key: {
-                                                keys[-2]: f
-                                            }
-                                        })
-                                    else:
-                                        if keys[-2] == 'anatomical_brain' or keys[-2] == 'anatomical_brain_mask' or keys[-2] == 'anatomical_skull_leaf':
-                                            pass
-                                        elif 'apply_warp_anat_longitudinal_to_standard' in keys[-2] or 'fsl_apply_xfm_longitudinal' in keys[-2]:
-                                            # TODO update!!!
-                                            # it assumes session id == last key (ordered by session count instead of session id) + 1
-                                            # might cause problem if session id is not continuous
-                                            def replace_index(target1, target2, file_path):
-                                                index1 = file_path.index(target1)+len(target1)
-                                                index2 = file_path.index(target2)+len(target2)
-                                                file_str_list = list(file_path)
-                                                file_str_list[index1] = "*"
-                                                file_str_list[index2] = "*"
-                                                file_path_updated = "".join(file_str_list)
-                                                file_list = glob.glob(file_path_updated)
-                                                file_list.sort()
-                                                return file_list
-                                            if ses['unique_id'] == str(int(keys[-2][-1])+1):
-                                                if keys[-3] == 'seg_probability_maps':
-                                                    f_list = replace_index('seg_probability_maps_', 'segment_prob_', f)
-                                                    ses['resource_pool'][strat_key].update({
-                                                        keys[-3]: f_list
-                                                    })
-                                                elif keys[-3] == 'seg_partial_volume_files':
-                                                    f_list = replace_index('seg_partial_volume_files_', 'segment_pve_', f)
-                                                    ses['resource_pool'][strat_key].update({
-                                                        keys[-3]: f_list
-                                                    })
-                                                else:
-                                                    ses['resource_pool'][strat_key].update({
-                                                        keys[-3]: f # keys[-3]: 'anatomical_to_standard'
-                                                    })
-                                        elif keys[-2] != 'warp_list':
-                                            ses['resource_pool'][strat_key].update({
-                                                    keys[-2]: f
-                                                })
-                                        elif keys[-2] == 'warp_list':
-                                            if 'ses-'+ses['unique_id'] in tmp:
-                                                ses['resource_pool'][strat_key].update({
-                                                    keys[-2]: f
-                                                })
-                for key in subject_specific_dict:
-                    ses_list = [subj for subj in sublist if key in subj['anat']]
-                    for ses in ses_list:
-                        for reg_strat in strat_list:
-                            try:
-                                ss_strat_list = list(ses['resource_pool'])
-                                for strat_key in ss_strat_list:
-                                    try:
-                                        ses['resource_pool'][strat_key].update({
-                                            'registration_method': reg_strat['registration_method']
-                                        })
-                                    except KeyError:
-                                        pass
-                            except KeyError:
-                                pass
-
-                yaml.dump(sublist, open(os.path.join(c.pipeline_setup['working_directory']['path'],'data_config_longitudinal.yml'), 'w'), default_flow_style=False)
-                WFLOGGER.info("\n\nLongitudinal pipeline completed.\n\n")
-
-                # skip main preprocessing
-                if (
-                    not c.anatomical_preproc['run'] and
-                    not c.functional_preproc['run']
-                ):
-                    sys.exit()
-        """
-        # END LONGITUDINAL TEMPLATE PIPELINE
-
         # If it only allows one, run it linearly
         if c.pipeline_setup["system_config"]["num_participants_at_once"] == 1:
             for sub in sublist:
diff --git a/CPAC/utils/monitoring/__init__.py b/CPAC/utils/monitoring/__init__.py
@@ -26,6 +26,7 @@
     FMLOGGER,
     getLogger,
     IFLOGGER,
+    init_loggers,
     set_up_logger,
     UTLOGGER,
     WFLOGGER,
@@ -44,6 +45,7 @@
     "FMLOGGER",
     "getLogger",
     "IFLOGGER",
+    "init_loggers",
     "LoggingHTTPServer",
     "LoggingRequestHandler",
     "log_nodes_cb",
diff --git a/CPAC/utils/monitoring/custom_logging.py b/CPAC/utils/monitoring/custom_logging.py