diff --git a/autoPyTorch/api/tabular_classification.py b/autoPyTorch/api/tabular_classification.py
index 28d01027d..0d737d7c0 100644
--- a/autoPyTorch/api/tabular_classification.py
+++ b/autoPyTorch/api/tabular_classification.py
@@ -14,7 +14,6 @@
 from autoPyTorch.datasets.base_dataset import BaseDatasetPropertiesType
 from autoPyTorch.datasets.resampling_strategy import (
     HoldoutValTypes,
-    CrossValTypes,
     ResamplingStrategies,
 )
 from autoPyTorch.datasets.tabular_dataset import TabularDataset
@@ -384,13 +383,6 @@ def search(
             dataset_name=dataset_name
         )
 
-        if not isinstance(self.resampling_strategy, (CrossValTypes, HoldoutValTypes)):
-            raise ValueError(
-                'Hyperparameter optimization requires a validation split. '
-                'Expected `self.resampling_strategy` to be either '
-                '(CrossValTypes, HoldoutValTypes), but got {}'.format(self.resampling_strategy)
-            )
-
         return self._search(
             dataset=self.dataset,
             optimize_metric=optimize_metric,
diff --git a/autoPyTorch/api/tabular_regression.py b/autoPyTorch/api/tabular_regression.py
index 9cc74227d..c18a90c42 100644
--- a/autoPyTorch/api/tabular_regression.py
+++ b/autoPyTorch/api/tabular_regression.py
@@ -14,7 +14,6 @@
 from autoPyTorch.datasets.base_dataset import BaseDatasetPropertiesType
 from autoPyTorch.datasets.resampling_strategy import (
     HoldoutValTypes,
-    CrossValTypes,
     ResamplingStrategies,
 )
 from autoPyTorch.datasets.tabular_dataset import TabularDataset
@@ -384,13 +383,6 @@ def search(
             dataset_name=dataset_name
         )
 
-        if not isinstance(self.resampling_strategy, (CrossValTypes, HoldoutValTypes)):
-            raise ValueError(
-                'Hyperparameter optimization requires a validation split. '
-                'Expected `self.resampling_strategy` to be either '
-                '(CrossValTypes, HoldoutValTypes), but got {}'.format(self.resampling_strategy)
-            )
-
         return self._search(
             dataset=self.dataset,
             optimize_metric=optimize_metric,
diff --git a/autoPyTorch/data/tabular_feature_validator.py b/autoPyTorch/data/tabular_feature_validator.py
index 6895e8478..3f853a653 100644
--- a/autoPyTorch/data/tabular_feature_validator.py
+++ b/autoPyTorch/data/tabular_feature_validator.py
@@ -14,14 +14,13 @@
 from sklearn.exceptions import NotFittedError
 from sklearn.impute import SimpleImputer
 from sklearn.pipeline import make_pipeline
-from sklearn.preprocessing import OneHotEncoder, StandardScaler
+from sklearn.preprocessing import OrdinalEncoder
 
 from autoPyTorch.data.base_feature_validator import BaseFeatureValidator, SUPPORTED_FEAT_TYPES
 
 
 def _create_column_transformer(
     preprocessors: Dict[str, List[BaseEstimator]],
-    numerical_columns: List[str],
     categorical_columns: List[str],
 ) -> ColumnTransformer:
     """
@@ -32,8 +31,6 @@ def _create_column_transformer(
     Args:
         preprocessors (Dict[str, List[BaseEstimator]]):
             Dictionary containing list of numerical and categorical preprocessors.
-        numerical_columns (List[str]):
-            List of names of numerical columns
         categorical_columns (List[str]):
             List of names of categorical columns
 
@@ -41,17 +38,11 @@ def _create_column_transformer(
         ColumnTransformer
     """
 
-    numerical_pipeline = 'drop'
-    categorical_pipeline = 'drop'
-    if len(numerical_columns) > 0:
-        numerical_pipeline = make_pipeline(*preprocessors['numerical'])
-    if len(categorical_columns) > 0:
-        categorical_pipeline = make_pipeline(*preprocessors['categorical'])
+    categorical_pipeline = make_pipeline(*preprocessors['categorical'])
 
     return ColumnTransformer([
-        ('categorical_pipeline', categorical_pipeline, categorical_columns),
-        ('numerical_pipeline', numerical_pipeline, numerical_columns)],
-        remainder='drop'
+        ('categorical_pipeline', categorical_pipeline, categorical_columns)],
+        remainder='passthrough'
     )
 
 
@@ -59,22 +50,17 @@ def get_tabular_preprocessors() -> Dict[str, List[BaseEstimator]]:
     """
     This function creates a Dictionary containing a list
     of numerical and categorical preprocessors
-
     Returns:
         Dict[str, List[BaseEstimator]]
     """
     preprocessors: Dict[str, List[BaseEstimator]] = dict()
 
     # Categorical Preprocessors
-    onehot_encoder = OneHotEncoder(categories='auto', sparse=False, handle_unknown='ignore')
+    ordinal_encoder = OrdinalEncoder(handle_unknown='use_encoded_value',
+                                     unknown_value=-1)
     categorical_imputer = SimpleImputer(strategy='constant', copy=False)
 
-    # Numerical Preprocessors
-    numerical_imputer = SimpleImputer(strategy='median', copy=False)
-    standard_scaler = StandardScaler(with_mean=True, with_std=True, copy=False)
-
-    preprocessors['categorical'] = [categorical_imputer, onehot_encoder]
-    preprocessors['numerical'] = [numerical_imputer, standard_scaler]
+    preprocessors['categorical'] = [categorical_imputer, ordinal_encoder]
 
     return preprocessors
 
@@ -161,31 +147,47 @@ def _fit(
 
             X = cast(pd.DataFrame, X)
 
-            self.all_nan_columns = set([column for column in X.columns if X[column].isna().all()])
+            all_nan_columns = X.columns[X.isna().all()]
+            for col in all_nan_columns:
+                X[col] = pd.to_numeric(X[col])
+
+            # Handle objects if possible
+            exist_object_columns = has_object_columns(X.dtypes.values)
+            if exist_object_columns:
+                X = self.infer_objects(X)
 
-            categorical_columns, numerical_columns, feat_type = self._get_columns_info(X)
+            self.dtypes = [dt.name for dt in X.dtypes]  # Also note this change in self.dtypes
+            self.all_nan_columns = set(all_nan_columns)
 
-            self.enc_columns = categorical_columns
+            self.enc_columns, self.feat_type = self._get_columns_info(X)
 
-            preprocessors = get_tabular_preprocessors()
-            self.column_transformer = _create_column_transformer(
-                preprocessors=preprocessors,
-                numerical_columns=numerical_columns,
-                categorical_columns=categorical_columns,
-            )
+            if len(self.enc_columns) > 0:
 
-            # Mypy redefinition
-            assert self.column_transformer is not None
-            self.column_transformer.fit(X)
+                preprocessors = get_tabular_preprocessors()
+                self.column_transformer = _create_column_transformer(
+                    preprocessors=preprocessors,
+                    categorical_columns=self.enc_columns,
+                )
 
-            # The column transformer reorders the feature types
-            # therefore, we need to change the order of columns as well
-            # This means categorical columns are shifted to the left
+                # Mypy redefinition
+                assert self.column_transformer is not None
+                self.column_transformer.fit(X)
 
-            self.feat_type = sorted(
-                feat_type,
-                key=functools.cmp_to_key(self._comparator)
-            )
+                # The column transformer moves categorical columns before all numerical columns
+                # therefore, we need to sort categorical columns so that it complies this change
+
+                self.feat_type = sorted(
+                    self.feat_type,
+                    key=functools.cmp_to_key(self._comparator)
+                )
+
+                encoded_categories = self.column_transformer.\
+                    named_transformers_['categorical_pipeline'].\
+                    named_steps['ordinalencoder'].categories_
+                self.categories = [
+                    list(range(len(cat)))
+                    for cat in encoded_categories
+                ]
 
             # differently to categorical_columns and numerical_columns,
             # this saves the index of the column.
@@ -265,6 +267,23 @@ def transform(
         if hasattr(X, "iloc") and not scipy.sparse.issparse(X):
             X = cast(Type[pd.DataFrame], X)
 
+            if self.all_nan_columns is None:
+                raise ValueError('_fit must be called before calling transform')
+
+            for col in list(self.all_nan_columns):
+                X[col] = np.nan
+                X[col] = pd.to_numeric(X[col])
+
+        if len(self.categorical_columns) > 0:
+            # when some categorical columns are not all nan in the training set
+            # but they are all nan in the testing or validation set
+            # we change those columns to `object` dtype
+            # to ensure that these columns are changed to appropriate dtype
+            # in self.infer_objects
+            all_nan_cat_cols = set(X[self.enc_columns].columns[X[self.enc_columns].isna().all()])
+            dtype_dict = {col: 'object' for col in self.enc_columns if col in all_nan_cat_cols}
+            X = X.astype(dtype_dict)
+
         # Check the data here so we catch problems on new test data
         self._check_data(X)
 
@@ -273,11 +292,6 @@ def transform(
         # We need to convert the column in test data to
         # object otherwise the test column is interpreted as float
         if self.column_transformer is not None:
-            if len(self.categorical_columns) > 0:
-                categorical_columns = self.column_transformer.transformers_[0][-1]
-                for column in categorical_columns:
-                    if X[column].isna().all():
-                        X[column] = X[column].astype('object')
             X = self.column_transformer.transform(X)
 
         # Sparse related transformations
@@ -361,7 +375,6 @@ def _check_data(
                 self.column_order = column_order
 
             dtypes = [dtype.name for dtype in X.dtypes]
-
             diff_cols = X.columns[[s_dtype != dtype for s_dtype, dtype in zip(self.dtypes, dtypes)]]
             if len(self.dtypes) == 0:
                 self.dtypes = dtypes
@@ -373,7 +386,7 @@ def _check_data(
     def _get_columns_info(
         self,
         X: pd.DataFrame,
-    ) -> Tuple[List[str], List[str], List[str]]:
+    ) -> Tuple[List[str], List[str]]:
         """
         Return the columns to be encoded from a pandas dataframe
 
@@ -392,15 +405,12 @@ def _get_columns_info(
         """
 
         # Register if a column needs encoding
-        numerical_columns = []
         categorical_columns = []
         # Also, register the feature types for the estimator
         feat_type = []
 
         # Make sure each column is a valid type
         for i, column in enumerate(X.columns):
-            if self.all_nan_columns is not None and column in self.all_nan_columns:
-                continue
             column_dtype = self.dtypes[i]
             err_msg = "Valid types are `numerical`, `categorical` or `boolean`, " \
                       "but input column {} has an invalid type `{}`.".format(column, column_dtype)
@@ -411,7 +421,6 @@ def _get_columns_info(
             # TypeError: data type not understood in certain pandas types
             elif is_numeric_dtype(column_dtype):
                 feat_type.append('numerical')
-                numerical_columns.append(column)
             elif column_dtype == 'object':
                 # TODO verify how would this happen when we always convert the object dtypes to category
                 raise TypeError(
@@ -437,7 +446,7 @@ def _get_columns_info(
                     "before feeding it to AutoPyTorch.".format(err_msg)
                 )
 
-        return categorical_columns, numerical_columns, feat_type
+        return categorical_columns, feat_type
 
     def list_to_pandas(
         self,
@@ -507,22 +516,26 @@ def infer_objects(self, X: pd.DataFrame) -> pd.DataFrame:
             pd.DataFrame
         """
         if hasattr(self, 'object_dtype_mapping'):
-            # Mypy does not process the has attr. This dict is defined below
-            for key, dtype in self.object_dtype_mapping.items():  # type: ignore[has-type]
-                # honor the training data types
-                try:
-                    X[key] = X[key].astype(dtype.name)
-                except Exception as e:
-                    # Try inference if possible
-                    self.logger.warning(f'Casting the column {key} to {dtype} caused the exception {e}')
-                    pass
+            # honor the training data types
+            try:
+                # Mypy does not process the has attr.
+                X = X.astype(self.object_dtype_mapping)  # type: ignore[has-type]
+            except Exception as e:
+                # Try inference if possible
+                self.logger.warning(f'Casting the columns to training dtypes '  # type: ignore[has-type]
+                                    f'{self.object_dtype_mapping} caused the exception {e}')
+                pass
         else:
-            # Calling for the first time to infer the categories
-            X = X.infer_objects()
-            for column, data_type in zip(X.columns, X.dtypes):
-                if not is_numeric_dtype(data_type):
-                    X[column] = X[column].astype('category')
-
+            if len(self.dtypes) != 0:
+                # when train data has no object dtype, but test does
+                # we prioritise the datatype given in training data
+                dtype_dict = {col: dtype for col, dtype in zip(X.columns, self.dtypes)}
+                X = X.astype(dtype_dict)
+            else:
+                # Calling for the first time to infer the categories
+                X = X.infer_objects()
+                dtype_dict = {col: 'category' for col, dtype in zip(X.columns, X.dtypes) if not is_numeric_dtype(dtype)}
+                X = X.astype(dtype_dict)
             # only numerical attributes and categories
             self.object_dtype_mapping = {column: data_type for column, data_type in zip(X.columns, X.dtypes)}
 
diff --git a/autoPyTorch/datasets/base_dataset.py b/autoPyTorch/datasets/base_dataset.py
index 803ee7cd8..ddf7c8ddf 100644
--- a/autoPyTorch/datasets/base_dataset.py
+++ b/autoPyTorch/datasets/base_dataset.py
@@ -125,7 +125,6 @@ def __init__(
         self.holdout_validators: Dict[str, HoldOutFunc] = {}
         self.no_resampling_validators: Dict[str, NoResamplingFunc] = {}
         self.random_state = np.random.RandomState(seed=seed)
-        self.no_resampling_validators: Dict[str, NoResamplingFunc] = {}
         self.shuffle = shuffle
         self.resampling_strategy = resampling_strategy
         self.resampling_strategy_args = resampling_strategy_args
@@ -143,10 +142,6 @@ def __init__(
             else:
                 self.output_shape = self.train_tensors[1].shape[-1] if self.train_tensors[1].ndim > 1 else 1
 
-        # TODO: Look for a criteria to define small enough to preprocess
-        # False for the regularization cocktails initially
-        self.is_small_preprocess = False
-
         # Make sure cross validation splits are created once
         self.cross_validators = CrossValFuncs.get_cross_validators(*CrossValTypes)
         self.holdout_validators = HoldOutFuncs.get_holdout_validators(*HoldoutValTypes)
diff --git a/autoPyTorch/datasets/resampling_strategy.py b/autoPyTorch/datasets/resampling_strategy.py
index c246b4427..78447a04e 100644
--- a/autoPyTorch/datasets/resampling_strategy.py
+++ b/autoPyTorch/datasets/resampling_strategy.py
@@ -39,13 +39,6 @@ def __call__(self, random_state: np.random.RandomState, val_share: float,
         ...
 
 
-class NoResamplingFunc(Protocol):
-    def __call__(self,
-                 random_state: np.random.RandomState,
-                 indices: np.ndarray) -> np.ndarray:
-        ...
-
-
 class CrossValTypes(IntEnum):
     """The type of cross validation
 
diff --git a/autoPyTorch/evaluation/fit_evaluator.py b/autoPyTorch/evaluation/fit_evaluator.py
deleted file mode 100644
index 52c47b4fa..000000000
--- a/autoPyTorch/evaluation/fit_evaluator.py
+++ /dev/null
@@ -1,378 +0,0 @@
-import time
-from multiprocessing.queues import Queue
-from typing import Any, Dict, List, Optional, Tuple, Union
-
-from ConfigSpace.configuration_space import Configuration
-
-import numpy as np
-
-from sklearn.base import BaseEstimator
-
-from smac.tae import StatusType
-
-from autoPyTorch.automl_common.common.utils.backend import Backend
-from autoPyTorch.datasets.resampling_strategy import NoResamplingStrategyTypes
-from autoPyTorch.evaluation.abstract_evaluator import (
-    AbstractEvaluator,
-    fit_and_suppress_warnings
-)
-from autoPyTorch.evaluation.utils import DisableFileOutputParameters
-from autoPyTorch.pipeline.components.training.metrics.base import autoPyTorchMetric
-from autoPyTorch.utils.common import subsampler
-from autoPyTorch.utils.hyperparameter_search_space_update import HyperparameterSearchSpaceUpdates
-
-
-class FitEvaluator(AbstractEvaluator):
-    def __init__(self, backend: Backend, queue: Queue,
-                 metric: autoPyTorchMetric,
-                 budget: float,
-                 budget_type: str = None,
-                 pipeline_config: Optional[Dict[str, Any]] = None,
-                 configuration: Optional[Configuration] = None,
-                 seed: int = 1,
-                 output_y_hat_optimization: bool = False,
-                 num_run: Optional[int] = None,
-                 include: Optional[Dict[str, Any]] = None,
-                 exclude: Optional[Dict[str, Any]] = None,
-                 disable_file_output: Optional[List[Union[str, DisableFileOutputParameters]]] = None,
-                 init_params: Optional[Dict[str, Any]] = None,
-                 logger_port: Optional[int] = None,
-                 keep_models: Optional[bool] = None,
-                 all_supported_metrics: bool = True,
-                 search_space_updates: Optional[HyperparameterSearchSpaceUpdates] = None) -> None:
-        super().__init__(
-            backend=backend,
-            queue=queue,
-            configuration=configuration,
-            metric=metric,
-            seed=seed,
-            output_y_hat_optimization=output_y_hat_optimization,
-            num_run=num_run,
-            include=include,
-            exclude=exclude,
-            disable_file_output=disable_file_output,
-            init_params=init_params,
-            budget=budget,
-            budget_type=budget_type,
-            logger_port=logger_port,
-            all_supported_metrics=all_supported_metrics,
-            pipeline_config=pipeline_config,
-            search_space_updates=search_space_updates
-        )
-        if not isinstance(self.datamanager.resampling_strategy, NoResamplingStrategyTypes):
-            raise ValueError(
-                "FitEvaluator needs to be fitted on the whole dataset and resampling_strategy "
-                "must be `NoResamplingStrategyTypes`, but got {}".format(
-                    self.datamanager.resampling_strategy
-                ))
-
-        self.splits = self.datamanager.splits
-        self.Y_target: Optional[np.ndarray] = None
-        self.Y_train_targets: np.ndarray = np.ones(self.y_train.shape) * np.NaN
-        self.pipeline: Optional[BaseEstimator] = None
-
-        self.logger.debug("Search space updates :{}".format(self.search_space_updates))
-        self.keep_models = keep_models
-
-    def fit_predict_and_loss(self) -> None:
-        """Fit, predict and compute the loss for no resampling strategy"""
-        assert self.splits is not None, "Can't fit pipeline in {} is datamanager.splits is None" \
-            .format(self.__class__.__name__)
-        additional_run_info: Optional[Dict] = None
-        split_id = 0
-        self.logger.info("Starting fit {}".format(split_id))
-
-        pipeline = self._get_pipeline()
-
-        train_split, test_split = self.splits[split_id]
-        assert test_split is None
-        self.Y_actual_train = self.y_train[train_split]
-        y_train_pred, y_valid_pred, y_test_pred = self._fit_and_predict(pipeline, split_id,
-                                                                        train_indices=train_split,
-                                                                        test_indices=test_split,
-                                                                        add_pipeline_to_self=True)
-        train_loss = self._loss(self.y_train[train_split], y_train_pred)
-        if y_valid_pred is not None:
-            loss = self._loss(self.y_valid, y_valid_pred)
-        elif y_test_pred is not None:
-            loss = self._loss(self.y_test, y_test_pred)
-        else:
-            loss = train_loss
-
-        additional_run_info = pipeline.get_additional_run_info() if hasattr(
-            pipeline, 'get_additional_run_info') else {}
-
-        status = StatusType.SUCCESS
-
-        self.logger.debug("In train evaluator fit_predict_and_loss, num_run: {} loss:{}".format(
-            self.num_run,
-            loss
-        ))
-        self.finish_up(
-            loss=loss,
-            train_loss=train_loss,
-            valid_pred=y_valid_pred,
-            test_pred=y_test_pred,
-            additional_run_info=additional_run_info,
-            file_output=True,
-            status=status,
-            opt_pred=None
-        )
-
-    def _fit_and_predict(self, pipeline: BaseEstimator, fold: int, train_indices: Union[np.ndarray, List],
-                         test_indices: None,
-                         add_pipeline_to_self: bool
-                         ) -> Tuple[np.ndarray, Optional[np.ndarray], Optional[np.ndarray]]:
-
-        X = {'train_indices': train_indices,
-             'val_indices': test_indices,
-             'split_id': fold,
-             'num_run': self.num_run,
-             **self.fit_dictionary}  # fit dictionary
-        y = None
-        fit_and_suppress_warnings(self.logger, pipeline, X, y)
-        self.logger.info("Model fitted, now predicting")
-        (
-            Y_train_pred,
-            Y_valid_pred,
-            Y_test_pred
-        ) = self._predict(
-            pipeline,
-            train_indices=train_indices,
-        )
-
-        if add_pipeline_to_self:
-            self.pipeline = pipeline
-
-        return Y_train_pred, Y_valid_pred, Y_test_pred
-
-    def _predict(self, pipeline: BaseEstimator,
-                 train_indices: Union[np.ndarray, List]
-                 ) -> Tuple[np.ndarray, Optional[np.ndarray], Optional[np.ndarray]]:
-
-        train_pred = self.predict_function(subsampler(self.X_train, train_indices), pipeline,
-                                           self.y_train[train_indices])
-
-        if self.X_valid is not None:
-            valid_pred = self.predict_function(self.X_valid, pipeline,
-                                               self.y_valid)
-        else:
-            valid_pred = None
-
-        if self.X_test is not None:
-            test_pred = self.predict_function(self.X_test, pipeline,
-                                              self.y_train[train_indices])
-        else:
-            test_pred = None
-
-        return train_pred, valid_pred, test_pred
-
-    def finish_up(self, loss: Dict[str, float], train_loss: Dict[str, float],
-                  valid_pred: Optional[np.ndarray],
-                  test_pred: Optional[np.ndarray], additional_run_info: Optional[Dict],
-                  file_output: bool, status: StatusType,
-                  opt_pred: Optional[np.ndarray]
-                  ) -> Optional[Tuple[float, float, int, Dict]]:
-        """This function does everything necessary after the fitting is done:
-
-        * predicting
-        * saving the necessary files
-        We use it as the signal handler so we can recycle the code for the
-        normal usecase and when the runsolver kills us here :)"""
-
-        self.duration = time.time() - self.starttime
-
-        if file_output:
-            loss_, additional_run_info_ = self.file_output(
-                None, valid_pred, test_pred,
-            )
-        else:
-            loss_ = None
-            additional_run_info_ = {}
-
-        validation_loss, test_loss = self.calculate_auxiliary_losses(
-            valid_pred, test_pred
-        )
-
-        if loss_ is not None:
-            return self.duration, loss_, self.seed, additional_run_info_
-
-        cost = loss[self.metric.name]
-
-        additional_run_info = (
-            {} if additional_run_info is None else additional_run_info
-        )
-        for metric_name, value in loss.items():
-            additional_run_info[metric_name] = value
-        additional_run_info['duration'] = self.duration
-        additional_run_info['num_run'] = self.num_run
-        if train_loss is not None:
-            additional_run_info['train_loss'] = train_loss
-        if validation_loss is not None:
-            additional_run_info['validation_loss'] = validation_loss
-        if test_loss is not None:
-            additional_run_info['test_loss'] = test_loss
-
-        rval_dict = {'loss': cost,
-                     'additional_run_info': additional_run_info,
-                     'status': status}
-
-        self.queue.put(rval_dict)
-        return None
-
-    def file_output(
-        self,
-        Y_optimization_pred: np.ndarray,
-        Y_valid_pred: np.ndarray,
-        Y_test_pred: np.ndarray,
-    ) -> Tuple[Optional[float], Dict]:
-
-        # Abort if predictions contain NaNs
-        for y, s in [
-            [Y_valid_pred, 'validation'],
-            [Y_test_pred, 'test']
-        ]:
-            if y is not None and not np.all(np.isfinite(y)):
-                return (
-                    1.0,
-                    {
-                        'error':
-                            'Model predictions for %s set contains NaNs.' % s
-                    },
-                )
-
-        # Abort if we don't want to output anything.
-        if 'all' in self.disable_file_output:
-            return None, {}
-
-        if getattr(self, 'pipeline', None) is not None:
-            if 'pipeline' not in self.disable_file_output:
-                pipeline = self.pipeline
-            else:
-                pipeline = None
-        else:
-            pipeline = None
-
-        self.logger.debug("Saving model {}_{}_{} to disk".format(self.seed, self.num_run, self.budget))
-        self.backend.save_numrun_to_dir(
-            seed=int(self.seed),
-            idx=int(self.num_run),
-            budget=float(self.budget),
-            model=pipeline,
-            cv_model=None,
-            ensemble_predictions=None,
-            valid_predictions=(
-                Y_valid_pred if 'y_valid' not in
-                                self.disable_file_output else None
-            ),
-            test_predictions=(
-                Y_test_pred if 'y_test' not in
-                               self.disable_file_output else None
-            ),
-        )
-
-        return None, {}
-
-
-# create closure for evaluating an algorithm
-def eval_function(
-    backend: Backend,
-    queue: Queue,
-    metric: autoPyTorchMetric,
-    budget: float,
-    config: Optional[Configuration],
-    seed: int,
-    num_run: int,
-    include: Optional[Dict[str, Any]],
-    exclude: Optional[Dict[str, Any]],
-    output_y_hat_optimization: bool = False,
-    disable_file_output: Optional[List[Union[str, DisableFileOutputParameters]]] = None,
-    pipeline_config: Optional[Dict[str, Any]] = None,
-    budget_type: str = None,
-    init_params: Optional[Dict[str, Any]] = None,
-    logger_port: Optional[int] = None,
-    all_supported_metrics: bool = True,
-    search_space_updates: Optional[HyperparameterSearchSpaceUpdates] = None,
-    instance: str = None,
-) -> None:
-    """
-    This closure allows the communication between the ExecuteTaFuncWithQueue and the
-    pipeline trainer (TrainEvaluator).
-
-    Fundamentally, smac calls the ExecuteTaFuncWithQueue.run() method, which internally
-    builds a TrainEvaluator. The TrainEvaluator builds a pipeline, stores the output files
-    to disc via the backend, and puts the performance result of the run in the queue.
-
-
-    Attributes:
-        backend (Backend):
-            An object to interface with the disk storage. In particular, allows to
-            access the train and test datasets
-        queue (Queue):
-            Each worker available will instantiate an evaluator, and after completion,
-            it will return the evaluation result via a multiprocessing queue
-        metric (autoPyTorchMetric):
-            A scorer object that is able to evaluate how good a pipeline was fit. It
-            is a wrapper on top of the actual score method (a wrapper on top of scikit
-            lean accuracy for example) that formats the predictions accordingly.
-        budget: (float):
-            The amount of epochs/time a configuration is allowed to run.
-        budget_type  (str):
-            The budget type, which can be epochs or time
-        pipeline_config (Optional[Dict[str, Any]]):
-            Defines the content of the pipeline being evaluated. For example, it
-            contains pipeline specific settings like logging name, or whether or not
-            to use tensorboard.
-        config (Union[int, str, Configuration]):
-            Determines the pipeline to be constructed.
-        seed (int):
-            A integer that allows for reproducibility of results
-        output_y_hat_optimization (bool):
-            Whether this worker should output the target predictions, so that they are
-            stored on disk. Fundamentally, the resampling strategy might shuffle the
-            Y_train targets, so we store the split in order to re-use them for ensemble
-            selection.
-        num_run (Optional[int]):
-            An identifier of the current configuration being fit. This number is unique per
-            configuration.
-        include (Optional[Dict[str, Any]]):
-            An optional dictionary to include components of the pipeline steps.
-        exclude (Optional[Dict[str, Any]]):
-            An optional dictionary to exclude components of the pipeline steps.
-        disable_file_output (Union[bool, List[str]]):
-            By default, the model, it's predictions and other metadata is stored on disk
-            for each finished configuration. This argument allows the user to skip
-            saving certain file type, for example the model, from being written to disk.
-        init_params (Optional[Dict[str, Any]]):
-            Optional argument that is passed to each pipeline step. It is the equivalent of
-            kwargs for the pipeline steps.
-        logger_port (Optional[int]):
-            Logging is performed using a socket-server scheme to be robust against many
-            parallel entities that want to write to the same file. This integer states the
-            socket port for the communication channel. If None is provided, a traditional
-            logger is used.
-        instance (str):
-            An instance on which to evaluate the current pipeline. By default we work
-            with a single instance, being the provided X_train, y_train of a single dataset.
-            This instance is a compatibility argument for SMAC, that is capable of working
-            with multiple datasets at the same time.
-    """
-    evaluator = FitEvaluator(
-        backend=backend,
-        queue=queue,
-        metric=metric,
-        configuration=config,
-        seed=seed,
-        num_run=num_run,
-        include=include,
-        exclude=exclude,
-        disable_file_output=disable_file_output,
-        init_params=init_params,
-        budget=budget,
-        budget_type=budget_type,
-        logger_port=logger_port,
-        all_supported_metrics=all_supported_metrics,
-        pipeline_config=pipeline_config,
-        search_space_updates=search_space_updates
-    )
-    evaluator.fit_predict_and_loss()
diff --git a/autoPyTorch/evaluation/train_evaluator.py b/autoPyTorch/evaluation/train_evaluator.py
index 5c044b9eb..95614c22c 100644
--- a/autoPyTorch/evaluation/train_evaluator.py
+++ b/autoPyTorch/evaluation/train_evaluator.py
@@ -152,13 +152,6 @@ def __init__(self, backend: Backend, queue: Queue,
             search_space_updates=search_space_updates
         )
 
-        if not isinstance(self.datamanager.resampling_strategy, (CrossValTypes, HoldoutValTypes)):
-            raise ValueError(
-                'TrainEvaluator expect to have (CrossValTypes, HoldoutValTypes) as '
-                'resampling_strategy, but got {}'.format(self.datamanager.resampling_strategy)
-            )
-
-
         if not isinstance(self.datamanager.resampling_strategy, (CrossValTypes, HoldoutValTypes)):
             resampling_strategy = self.datamanager.resampling_strategy
             raise ValueError(
@@ -428,10 +421,10 @@ def eval_train_function(
     budget: float,
     config: Optional[Configuration],
     seed: int,
+    output_y_hat_optimization: bool,
     num_run: int,
     include: Optional[Dict[str, Any]],
     exclude: Optional[Dict[str, Any]],
-    output_y_hat_optimization: bool,
     disable_file_output: Optional[List[Union[str, DisableFileOutputParameters]]] = None,
     pipeline_config: Optional[Dict[str, Any]] = None,
     budget_type: str = None,
diff --git a/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/TabularColumnTransformer.py b/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/TabularColumnTransformer.py
index e8f95ab57..05bede68a 100644
--- a/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/TabularColumnTransformer.py
+++ b/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/TabularColumnTransformer.py
@@ -3,14 +3,14 @@
 import numpy as np
 
 from sklearn.compose import ColumnTransformer
-# from sklearn.pipeline import make_pipeline
+from sklearn.pipeline import make_pipeline
 
 import torch
 
 from autoPyTorch.pipeline.components.preprocessing.tabular_preprocessing.base_tabular_preprocessing import (
     autoPyTorchTabularPreprocessingComponent
 )
-# from autoPyTorch.pipeline.components.preprocessing.tabular_preprocessing.utils import get_tabular_preprocessers
+from autoPyTorch.pipeline.components.preprocessing.tabular_preprocessing.utils import get_tabular_preprocessers
 from autoPyTorch.utils.common import FitRequirement, subsampler
 
 
@@ -52,11 +52,11 @@ def fit(self, X: Dict[str, Any], y: Any = None) -> "TabularColumnTransformer":
         numerical_pipeline = 'passthrough'
         categorical_pipeline = 'passthrough'
 
-        # preprocessors = get_tabular_preprocessers(X)
-        # if len(X['dataset_properties']['numerical_columns']):
-        #     numerical_pipeline = make_pipeline(*preprocessors['numerical'])
-        # if len(X['dataset_properties']['categorical_columns']):
-        #     categorical_pipeline = make_pipeline(*preprocessors['categorical'])
+        preprocessors = get_tabular_preprocessers(X)
+        if len(X['dataset_properties']['numerical_columns']):
+            numerical_pipeline = make_pipeline(*preprocessors['numerical'])
+        if len(X['dataset_properties']['categorical_columns']):
+            categorical_pipeline = make_pipeline(*preprocessors['categorical'])
 
         self.preprocessor = ColumnTransformer([
             ('numerical_pipeline', numerical_pipeline, X['dataset_properties']['numerical_columns']),
diff --git a/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/encoding/NoEncoder.py b/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/encoding/NoEncoder.py
index d62ee26d2..929e99048 100644
--- a/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/encoding/NoEncoder.py
+++ b/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/encoding/NoEncoder.py
@@ -40,7 +40,7 @@ def transform(self, X: Dict[str, Any]) -> Dict[str, Any]:
         Returns:
             (Dict[str, Any]): the updated 'X' dictionary
         """
-        # X.update({'encoder': self.preprocessor})
+        X.update({'encoder': self.preprocessor})
         return X
 
     @staticmethod
diff --git a/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/encoding/base_encoder.py b/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/encoding/base_encoder.py
index 9829cadcd..eadc0a188 100644
--- a/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/encoding/base_encoder.py
+++ b/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/encoding/base_encoder.py
@@ -28,5 +28,5 @@ def transform(self, X: Dict[str, Any]) -> Dict[str, Any]:
         if self.preprocessor['numerical'] is None and self.preprocessor['categorical'] is None:
             raise ValueError("cant call transform on {} without fitting first."
                              .format(self.__class__.__name__))
-        # X.update({'encoder': self.preprocessor})
+        X.update({'encoder': self.preprocessor})
         return X
diff --git a/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/imputation/base_imputer.py b/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/imputation/base_imputer.py
index ac0648481..b65f3c229 100644
--- a/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/imputation/base_imputer.py
+++ b/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/imputation/base_imputer.py
@@ -29,5 +29,5 @@ def transform(self, X: Dict[str, Any]) -> Dict[str, Any]:
         if self.preprocessor['numerical'] is None and self.preprocessor['categorical'] is None:
             raise ValueError("cant call transform on {} without fitting first."
                              .format(self.__class__.__name__))
-        # X.update({'imputer': self.preprocessor})
+        X.update({'imputer': self.preprocessor})
         return X
diff --git a/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/scaling/NoScaler.py b/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/scaling/NoScaler.py
index 9775d17dd..9d50aa8f5 100644
--- a/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/scaling/NoScaler.py
+++ b/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/scaling/NoScaler.py
@@ -43,7 +43,7 @@ def transform(self, X: Dict[str, Any]) -> Dict[str, Any]:
         Returns:
             np.ndarray: Transformed features
         """
-        # X.update({'scaler': self.preprocessor})
+        X.update({'scaler': self.preprocessor})
         return X
 
     @staticmethod
diff --git a/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/scaling/base_scaler.py b/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/scaling/base_scaler.py
index 270fac246..39834dd2b 100644
--- a/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/scaling/base_scaler.py
+++ b/autoPyTorch/pipeline/components/preprocessing/tabular_preprocessing/scaling/base_scaler.py
@@ -28,5 +28,5 @@ def transform(self, X: Dict[str, Any]) -> Dict[str, Any]:
         if self.preprocessor['numerical'] is None and self.preprocessor['categorical'] is None:
             raise ValueError("cant call transform on {} without fitting first."
                              .format(self.__class__.__name__))
-        # X.update({'scaler': self.preprocessor})
+        X.update({'scaler': self.preprocessor})
         return X
diff --git a/autoPyTorch/pipeline/components/setup/early_preprocessor/EarlyPreprocessing.py b/autoPyTorch/pipeline/components/setup/early_preprocessor/EarlyPreprocessing.py
index 7fbf33f99..c25ea6bb0 100644
--- a/autoPyTorch/pipeline/components/setup/early_preprocessor/EarlyPreprocessing.py
+++ b/autoPyTorch/pipeline/components/setup/early_preprocessor/EarlyPreprocessing.py
@@ -20,7 +20,7 @@ def __init__(self, random_state: Optional[np.random.RandomState] = None) -> None
         super().__init__()
         self.random_state = random_state
         self.add_fit_requirements([
-            FitRequirement('is_small_preprocess', (bool,), user_defined=True, dataset_property=True),
+            # FitRequirement('is_small_preprocess', (bool,), user_defined=True, dataset_property=True),
             FitRequirement('X_train', (np.ndarray, pd.DataFrame, csr_matrix), user_defined=True,
                            dataset_property=False)])
 
@@ -32,14 +32,13 @@ def fit(self, X: Dict[str, Any], y: Any = None) -> "EarlyPreprocessing":
     def transform(self, X: Dict[str, Any]) -> Dict[str, Any]:
 
         transforms = get_preprocess_transforms(X)
-        if X['dataset_properties']['is_small_preprocess']:
-            if 'X_train' in X:
-                X_train = X['X_train']
-            else:
-                # Incorporate the transform to the dataset
-                X_train = X['backend'].load_datamanager().train_tensors[0]
-
-            X['X_train'] = preprocess(dataset=X_train, transforms=transforms)
+        if 'X_train' in X:
+            X_train = X['X_train']
+        else:
+            # Incorporate the transform to the dataset
+            X_train = X['backend'].load_datamanager().train_tensors[0]
+
+        X['X_train'] = preprocess(dataset=X_train, transforms=transforms)
 
         # We need to also save the preprocess transforms for inference
         X.update({'preprocess_transforms': transforms})
diff --git a/autoPyTorch/pipeline/components/setup/lr_scheduler/base_scheduler.py b/autoPyTorch/pipeline/components/setup/lr_scheduler/base_scheduler.py
index 671a70f6a..bc53e2e1f 100644
--- a/autoPyTorch/pipeline/components/setup/lr_scheduler/base_scheduler.py
+++ b/autoPyTorch/pipeline/components/setup/lr_scheduler/base_scheduler.py
@@ -46,7 +46,7 @@ def transform(self, X: Dict[str, Any]) -> Dict[str, Any]:
         X.update(
             lr_scheduler=self.scheduler,
             step_interval=self.step_interval,
-            is_cyclic_scheduler= self.get_properties()['cyclic']
+            is_cyclic_scheduler=self.get_properties()['cyclic']
         )
         return X
 
diff --git a/autoPyTorch/pipeline/components/setup/network/base_network.py b/autoPyTorch/pipeline/components/setup/network/base_network.py
index 02782e7a2..7ec872b96 100644
--- a/autoPyTorch/pipeline/components/setup/network/base_network.py
+++ b/autoPyTorch/pipeline/components/setup/network/base_network.py
@@ -1,5 +1,4 @@
-from typing import Any, Dict, Optional, Union
-from typing import Any, Dict, List, Optional
+from typing import Any, Dict, List, Optional, Union
 
 from ConfigSpace.configuration_space import ConfigurationSpace
 
diff --git a/autoPyTorch/pipeline/components/setup/network_backbone/base_network_backbone.py b/autoPyTorch/pipeline/components/setup/network_backbone/base_network_backbone.py
index 1a04d6645..e82f72abb 100644
--- a/autoPyTorch/pipeline/components/setup/network_backbone/base_network_backbone.py
+++ b/autoPyTorch/pipeline/components/setup/network_backbone/base_network_backbone.py
@@ -28,7 +28,6 @@ def __init__(self,
                  **kwargs: Any):
         super().__init__()
         self.add_fit_requirements([
-            FitRequirement('is_small_preprocess', (bool,), user_defined=True, dataset_property=True),
             FitRequirement('X_train', (np.ndarray, pd.DataFrame, csr_matrix), user_defined=True,
                            dataset_property=False),
             FitRequirement('input_shape', (Iterable,), user_defined=True, dataset_property=True),
@@ -52,12 +51,7 @@ def fit(self, X: Dict[str, Any], y: Any = None) -> BaseEstimator:
         self.check_requirements(X, y)
         X_train = X['X_train']
 
-        if X["dataset_properties"]["is_small_preprocess"]:
-            input_shape = X_train.shape[1:]
-        else:
-            # get input shape by transforming first two elements of the training set
-            column_transformer = X['tabular_transformer'].preprocessor
-            input_shape = column_transformer.transform(X_train[:1]).shape[1:]
+        input_shape = X_train.shape[1:]
 
         input_shape = get_output_shape(X['network_embedding'], input_shape=input_shape)
         self.input_shape = input_shape
diff --git a/autoPyTorch/pipeline/components/setup/network_embedding/base_network_embedding.py b/autoPyTorch/pipeline/components/setup/network_embedding/base_network_embedding.py
index 844a4616b..998055d2b 100644
--- a/autoPyTorch/pipeline/components/setup/network_embedding/base_network_embedding.py
+++ b/autoPyTorch/pipeline/components/setup/network_embedding/base_network_embedding.py
@@ -1,4 +1,4 @@
-# import copy
+import copy
 from typing import Any, Dict, Optional, Tuple
 
 import numpy as np
@@ -17,11 +17,11 @@ def __init__(self, random_state: Optional[np.random.RandomState] = None):
 
     def fit(self, X: Dict[str, Any], y: Any = None) -> BaseEstimator:
 
-        num_numerical_columns, num_input_features = self._get_args(X)
+        num_numerical_columns, num_input_features = self._get_required_info_from_data(X)
 
         self.embedding = self.build_embedding(
             num_input_features=num_input_features,
-            num_numerical_features=num_numerical_columns)  # type: ignore[arg-type]
+            num_numerical_features=num_numerical_columns)
         return self
 
     def transform(self, X: Dict[str, Any]) -> Dict[str, Any]:
@@ -31,22 +31,39 @@ def transform(self, X: Dict[str, Any]) -> Dict[str, Any]:
     def build_embedding(self, num_input_features: np.ndarray, num_numerical_features: int) -> nn.Module:
         raise NotImplementedError
 
-    def _get_args(self, X: Dict[str, Any]) -> Tuple[None, None]:  # Tuple[int, np.ndarray]:
+    def _get_required_info_from_data(self, X: Dict[str, Any]) -> Tuple[int, np.ndarray]:
+        """
+        Returns the number of numerical columns after preprocessing and
+        an array of size equal to the number of input features
+        containing zeros for numerical data and number of categories
+        for categorical data. This is required to build the embedding.
+
+        Args:
+            X (Dict[str, Any]):
+                Fit dictionary
+
+        Returns:
+            Tuple[int, np.ndarray]:
+                number of numerical columns and array indicating
+                number of categories for categorical columns and
+                0 for numerical columns
+        """
         # Feature preprocessors can alter numerical columns
-        # if len(X['dataset_properties']['numerical_columns']) == 0:
-        #     num_numerical_columns = 0
-        # else:
-        #     X_train = copy.deepcopy(X['backend'].load_datamanager().train_tensors[0][:2])
-        #
-        #     numerical_column_transformer = X['tabular_transformer'].preprocessor. \
-        #         named_transformers_['numerical_pipeline']
-        #     num_numerical_columns = numerical_column_transformer.transform(
-        #         X_train[:, X['dataset_properties']['numerical_columns']]).shape[1]
-        # num_input_features = np.zeros((num_numerical_columns + len(X['dataset_properties']['categorical_columns'])),
-        #                               dtype=int)
-        # categories = X['dataset_properties']['categories']
-        #
-        # for i, category in enumerate(categories):
-        #     num_input_features[num_numerical_columns + i, ] = len(category)
-        # return num_numerical_columns, num_input_features
-        return None, None
+        if len(X['dataset_properties']['numerical_columns']) == 0:
+            num_numerical_columns = 0
+        else:
+            X_train = copy.deepcopy(X['backend'].load_datamanager().train_tensors[0][:2])
+
+            numerical_column_transformer = X['tabular_transformer'].preprocessor. \
+                named_transformers_['numerical_pipeline']
+            num_numerical_columns = numerical_column_transformer.transform(
+                X_train[:, X['dataset_properties']['numerical_columns']]).shape[1]
+
+        num_cols = num_numerical_columns + len(X['dataset_properties']['categorical_columns'])
+        num_input_feats = np.zeros(num_cols, dtype=np.int32)
+
+        categories = X['dataset_properties']['categories']
+        for idx, cats in enumerate(categories, start=num_numerical_columns):
+            num_input_feats[idx] = len(cats)
+
+        return num_numerical_columns, num_input_feats
diff --git a/autoPyTorch/pipeline/components/training/data_loader/base_data_loader.py b/autoPyTorch/pipeline/components/training/data_loader/base_data_loader.py
index 113726870..0cea0b2c7 100644
--- a/autoPyTorch/pipeline/components/training/data_loader/base_data_loader.py
+++ b/autoPyTorch/pipeline/components/training/data_loader/base_data_loader.py
@@ -56,8 +56,8 @@ def __init__(self, batch_size: int = 64,
         # Define fit requirements
         self.add_fit_requirements([
             FitRequirement("split_id", (int,), user_defined=True, dataset_property=False),
-            FitRequirement("Backend", (Backend,), user_defined=True, dataset_property=False),
-            FitRequirement("is_small_preprocess", (bool,), user_defined=True, dataset_property=True)])
+            FitRequirement("Backend", (Backend,), user_defined=True, dataset_property=False)
+        ])
 
     def transform(self, X: Dict[str, Any]) -> Dict[str, Any]:
         """The transform function calls the transform function of the
@@ -102,10 +102,9 @@ def fit(self, X: Dict[str, Any], y: Any = None) -> torch.utils.data.DataLoader:
             self.val_transform,
             train=False,
         )
-        if X['dataset_properties']["is_small_preprocess"]:
-            # This parameter indicates that the data has been pre-processed for speed
-            # Overwrite the datamanager with the pre-processes data
-            datamanager.replace_data(X['X_train'], X['X_test'] if 'X_test' in X else None)
+        # This parameter indicates that the data has been pre-processed for speed
+        # Overwrite the datamanager with the pre-processes data
+        datamanager.replace_data(X['X_train'], X['X_test'] if 'X_test' in X else None)
 
         train_dataset = datamanager.get_dataset(split_id=X['split_id'], train=True)
 
@@ -221,10 +220,6 @@ def check_requirements(self, X: Dict[str, Any], y: Any = None) -> None:
         if 'backend' not in X:
             raise ValueError("backend is needed to load the data from disk")
 
-        if 'is_small_preprocess' not in X['dataset_properties']:
-            raise ValueError("is_small_pre-process is required to know if the data was preprocessed"
-                             " or if the data-loader should transform it while loading a batch")
-
         # We expect this class to be a base for image/tabular/time
         # And the difference among this data types should be mainly
         # in the transform, so we delegate for special transformation checking
diff --git a/autoPyTorch/pipeline/components/training/data_loader/feature_data_loader.py b/autoPyTorch/pipeline/components/training/data_loader/feature_data_loader.py
index 4e41ec838..d6f3081a0 100644
--- a/autoPyTorch/pipeline/components/training/data_loader/feature_data_loader.py
+++ b/autoPyTorch/pipeline/components/training/data_loader/feature_data_loader.py
@@ -72,7 +72,7 @@ def build_transform(self, X: Dict[str, Any], mode: str) -> torchvision.transform
         # distinction is performed
         candidate_transformations: List[Callable] = []
 
-        if 'test' in mode or not X['dataset_properties']['is_small_preprocess']:
+        if 'test' in mode:
             candidate_transformations.append((ExpandTransform()))
             candidate_transformations.extend(X['preprocess_transforms'])
             candidate_transformations.append((ContractTransform()))
@@ -93,5 +93,5 @@ def _check_transform_requirements(self, X: Dict[str, Any], y: Any = None) -> Non
                 mechanism, in which during a transform, a components adds relevant information
                 so that further stages can be properly fitted
         """
-        if not X['dataset_properties']['is_small_preprocess'] and 'preprocess_transforms' not in X:
+        if 'preprocess_transforms' not in X:
             raise ValueError("Cannot find the preprocess_transforms in the fit dictionary")
diff --git a/autoPyTorch/pipeline/components/training/data_loader/image_data_loader.py b/autoPyTorch/pipeline/components/training/data_loader/image_data_loader.py
index 21cc05447..38cdd48b0 100644
--- a/autoPyTorch/pipeline/components/training/data_loader/image_data_loader.py
+++ b/autoPyTorch/pipeline/components/training/data_loader/image_data_loader.py
@@ -41,7 +41,7 @@ def build_transform(self, X: Dict[str, Any], mode: str) -> torchvision.transform
         # check if data set is small enough to be preprocessed.
         # If it is, then no need to add preprocess_transforms to
         # the data loader as the data is already preprocessed
-        if 'test' in mode or not X['dataset_properties']['is_small_preprocess']:
+        if 'test' in mode:
             transformations.append(X['preprocess_transforms'])
 
         # Transform to tensor
@@ -63,5 +63,5 @@ def _check_transform_requirements(self, X: Dict[str, Any], y: Any = None) -> Non
         if not X['image_augmenter'] and 'image_augmenter' not in X:
             raise ValueError("Cannot find the image_augmenter in the fit dictionary")
 
-        if not X['dataset_properties']['is_small_preprocess'] and 'preprocess_transforms' not in X:
+        if 'preprocess_transforms' not in X:
             raise ValueError("Cannot find the preprocess_transforms in the fit dictionary")
diff --git a/autoPyTorch/pipeline/components/training/trainer/AdversarialTrainer.py b/autoPyTorch/pipeline/components/training/trainer/AdversarialTrainer.py
index 67ae71188..fc78e4655 100644
--- a/autoPyTorch/pipeline/components/training/trainer/AdversarialTrainer.py
+++ b/autoPyTorch/pipeline/components/training/trainer/AdversarialTrainer.py
@@ -76,7 +76,7 @@ def criterion_preparation(self, y_a: np.ndarray, y_b: np.ndarray = None, lam: fl
         # Initial implementation, consider the adversarial loss and the normal network loss
         # equally.
         return lambda criterion, pred, adversarial_pred: 0.5 * criterion(pred, y_a) + \
-                                                         0.5 * criterion(adversarial_pred, y_a)
+            0.5 * criterion(adversarial_pred, y_a)
 
     def train_step(self, data: np.ndarray, targets: np.ndarray) -> Tuple[float, torch.Tensor]:
         """
diff --git a/autoPyTorch/pipeline/components/training/trainer/__init__.py b/autoPyTorch/pipeline/components/training/trainer/__init__.py
index 28220fdcd..50869d000 100755
--- a/autoPyTorch/pipeline/components/training/trainer/__init__.py
+++ b/autoPyTorch/pipeline/components/training/trainer/__init__.py
@@ -86,7 +86,7 @@ def get_fit_requirements(self) -> Optional[List[FitRequirement]]:
 
     def get_available_components(
         self,
-        dataset_properties: Optional[Dict[str, str]] = None,
+        dataset_properties: Optional[Dict[str, BaseDatasetPropertiesType]] = None,
         include: Optional[List[str]] = None,
         exclude: Optional[List[str]] = None,
     ) -> Dict[str, autoPyTorchComponent]:
diff --git a/autoPyTorch/pipeline/components/training/trainer/base_trainer.py b/autoPyTorch/pipeline/components/training/trainer/base_trainer.py
index 574b2002f..517ae08bb 100644
--- a/autoPyTorch/pipeline/components/training/trainer/base_trainer.py
+++ b/autoPyTorch/pipeline/components/training/trainer/base_trainer.py
@@ -26,7 +26,6 @@
 from autoPyTorch.pipeline.components.setup.lr_scheduler.constants import StepIntervalUnit
 from autoPyTorch.pipeline.components.training.base_training import autoPyTorchTrainingComponent
 from autoPyTorch.pipeline.components.training.metrics.metrics import CLASSIFICATION_METRICS, REGRESSION_METRICS
-from autoPyTorch.pipeline.components.training.trainer.utils import Lookahead
 from autoPyTorch.pipeline.components.training.metrics.utils import calculate_score
 from autoPyTorch.pipeline.components.training.trainer.utils import Lookahead, swa_update
 from autoPyTorch.utils.common import FitRequirement, HyperparameterSearchSpace, add_hyperparameter, get_hyperparameter
diff --git a/test/test_data/test_feature_validator.py b/test/test_data/test_feature_validator.py
index c8e05182c..048514559 100644
--- a/test/test_data/test_feature_validator.py
+++ b/test/test_data/test_feature_validator.py
@@ -237,7 +237,7 @@ def test_featurevalidator_categorical_nan(input_data_featuretest):
     transformed_X = validator.transform(input_data_featuretest)
     assert any(pd.isna(input_data_featuretest))
     categories_ = validator.column_transformer.\
-        named_transformers_['categorical_pipeline'].named_steps['onehotencoder'].categories_
+        named_transformers_['categorical_pipeline'].named_steps['ordinalencoder'].categories_
     assert any(('0' in categories) or (0 in categories) or ('missing_value' in categories) for categories in
                categories_)
     assert np.issubdtype(transformed_X.dtype, np.number)
@@ -313,9 +313,8 @@ def test_featurevalidator_get_columns_to_encode():
 
     validator.fit(df)
 
-    categorical_columns, numerical_columns, feat_type = validator._get_columns_info(df)
+    categorical_columns, feat_type = validator._get_columns_info(df)
 
-    assert numerical_columns == ['int', 'float']
     assert categorical_columns == ['category', 'bool']
     assert feat_type == ['numerical', 'numerical', 'categorical', 'categorical']
 
@@ -327,8 +326,8 @@ def feature_validator_remove_nan_catcolumns(df_train: pd.DataFrame, df_test: pd.
     transformed_df_train = validator.transform(df_train)
     transformed_df_test = validator.transform(df_test)
 
-    assert np.array_equal(transformed_df_train, ans_train)
-    assert np.array_equal(transformed_df_test, ans_test)
+    np.testing.assert_array_equal(transformed_df_train, ans_train)
+    np.testing.assert_array_equal(transformed_df_test, ans_test)
 
 
 def test_feature_validator_remove_nan_catcolumns():
@@ -373,7 +372,7 @@ def test_feature_validator_remove_nan_catcolumns():
         ],
         dtype='category',
     )
-    ans_train = np.array([[0, 1], [1, 0], [0, 1]], dtype=np.float64)
+    ans_train = np.array([[1, np.nan, np.nan], [0, np.nan, np.nan], [1, np.nan, np.nan]], dtype=np.float64)
     df_test = pd.DataFrame(
         [
             {'A': np.nan, 'B': np.nan, 'C': 5},
@@ -382,7 +381,7 @@ def test_feature_validator_remove_nan_catcolumns():
         ],
         dtype='category',
     )
-    ans_test = np.array([[1, 0], [1, 0], [0, 1]], dtype=np.float64)
+    ans_test = np.array([[0, np.nan, np.nan], [0, np.nan, np.nan], [1, np.nan, np.nan]], dtype=np.float64)
     feature_validator_remove_nan_catcolumns(df_train, df_test, ans_train, ans_test)
 
     # Second case, there exist null columns (B and C) in the training set and
@@ -395,7 +394,7 @@ def test_feature_validator_remove_nan_catcolumns():
         ],
         dtype='category',
     )
-    ans_train = np.array([[0, 1], [1, 0], [0, 1]], dtype=np.float64)
+    ans_train = np.array([[1, np.nan, np.nan], [0, np.nan, np.nan], [1, np.nan, np.nan]], dtype=np.float64)
     df_test = pd.DataFrame(
         [
             {'A': np.nan, 'B': np.nan, 'C': np.nan},
@@ -404,7 +403,7 @@ def test_feature_validator_remove_nan_catcolumns():
         ],
         dtype='category',
     )
-    ans_test = np.array([[1, 0], [1, 0], [0, 1]], dtype=np.float64)
+    ans_test = np.array([[0, np.nan, np.nan], [0, np.nan, np.nan], [1, np.nan, np.nan]], dtype=np.float64)
     feature_validator_remove_nan_catcolumns(df_train, df_test, ans_train, ans_test)
 
     # Third case, there exist no null columns in the training set and
@@ -416,7 +415,7 @@ def test_feature_validator_remove_nan_catcolumns():
         ],
         dtype='category',
     )
-    ans_train = np.array([[1, 0, 1, 0], [0, 1, 0, 1]], dtype=np.float64)
+    ans_train = np.array([[0, 0], [1, 1]], dtype=np.float64)
     df_test = pd.DataFrame(
         [
             {'A': np.nan, 'B': np.nan},
@@ -424,7 +423,7 @@ def test_feature_validator_remove_nan_catcolumns():
         ],
         dtype='category',
     )
-    ans_test = np.array([[0, 0, 0, 0], [0, 0, 0, 0]], dtype=np.float64)
+    ans_test = np.array([[-1, -1], [-1, -1]], dtype=np.float64)
     feature_validator_remove_nan_catcolumns(df_train, df_test, ans_train, ans_test)
 
 
@@ -504,7 +503,7 @@ def test_column_transformer_created(input_data_featuretest):
 
     # Make sure that the encoded features are actually encoded. Categorical columns are at
     # the start after transformation. In our fixtures, this is also honored prior encode
-    cat_columns, _, feature_types = validator._get_columns_info(input_data_featuretest)
+    cat_columns, feature_types = validator._get_columns_info(input_data_featuretest)
 
     # At least one categorical
     assert 'categorical' in validator.feat_type
@@ -513,13 +512,20 @@ def test_column_transformer_created(input_data_featuretest):
     if np.any([pd.api.types.is_numeric_dtype(input_data_featuretest[col]
                                              ) for col in input_data_featuretest.columns]):
         assert 'numerical' in validator.feat_type
-        # we expect this input to be the fixture 'pandas_mixed_nan'
-        np.testing.assert_array_equal(transformed_X, np.array([[1., 0., -1.], [0., 1., 1.]]))
-    else:
-        np.testing.assert_array_equal(transformed_X, np.array([[1., 0., 1., 0.], [0., 1., 0., 1.]]))
-
-    if not all([feat_type in ['numerical', 'categorical'] for feat_type in feature_types]):
-        raise ValueError("Expected only numerical and categorical feature types")
+    for i, feat_type in enumerate(feature_types):
+        if 'numerical' in feat_type:
+            np.testing.assert_array_equal(
+                transformed_X[:, i],
+                input_data_featuretest[input_data_featuretest.columns[i]].to_numpy()
+            )
+        elif 'categorical' in feat_type:
+            np.testing.assert_array_equal(
+                transformed_X[:, i],
+                # Expect always 0, 1... because we use a ordinal encoder
+                np.array([0, 1])
+            )
+        else:
+            raise ValueError(feat_type)
 
 
 def test_no_new_category_after_fit():
@@ -554,7 +560,7 @@ def test_unknown_encode_value():
     # The first row should have a 0, 0 as we added a
     # new categorical there and one hot encoder marks
     # it as all zeros for the transformed column
-    expected_row = [0.0, 0.0, -0.5584294383572701, 0.5000000000000004, -1.5136598016833485]
+    expected_row = [-1, -41, -3, -987.2]
     assert expected_row == x_t[0].tolist()
 
 
@@ -678,16 +684,11 @@ def test_feature_validator_imbalanced_data():
     validator.fit(X_train)
 
     train_feature_types = copy.deepcopy(validator.feat_type)
-    assert train_feature_types == ['numerical']
+    assert train_feature_types == ['numerical', 'numerical', 'numerical', 'numerical']
     # validator will throw an error if the column types are not the same
     transformed_X_test = validator.transform(X_test)
     transformed_X_test = pd.DataFrame(transformed_X_test)
     assert sorted(validator.all_nan_columns) == sorted(['A', 'C', 'D'])
-    # as there are no categorical columns, we can make such an
-    # assertion. We only expect to drop the all nan columns
-    total_all_nan_columns = len(validator.all_nan_columns)
-    total_columns = len(validator.column_order)
-    assert total_columns - total_all_nan_columns == len(transformed_X_test.columns)
 
     # Columns with not all null values in the train split and
     # completely null on the test split.
diff --git a/test/test_data/test_validation.py b/test/test_data/test_validation.py
index 08d848e0e..97ef8cdae 100644
--- a/test/test_data/test_validation.py
+++ b/test/test_data/test_validation.py
@@ -84,7 +84,7 @@ def test_sparse_data_validation_for_regression():
 
     validator.fit(X_train=X_sp, y_train=y)
 
-    X_t, y_t = validator.transform(X, y)
+    X_t, y_t = validator.transform(X_sp, y)
     # make sure everything was encoded to number
     assert np.issubdtype(X_t.dtype, np.number)
     assert np.issubdtype(y_t.dtype, np.number)
diff --git a/test/test_datasets/test_tabular_dataset.py b/test/test_datasets/test_tabular_dataset.py
index 2ee8b608e..710111f9c 100644
--- a/test/test_datasets/test_tabular_dataset.py
+++ b/test/test_datasets/test_tabular_dataset.py
@@ -28,7 +28,6 @@ def test_get_dataset_properties(backend, fit_dictionary_tabular):
         'categorical_columns',
         'numerical_columns',
         'issparse',
-        'is_small_preprocess',
         'task_type',
         'output_type',
         'input_shape',
diff --git a/test/test_evaluation/test_fit_evaluator.py b/test/test_evaluation/test_fit_evaluator.py
deleted file mode 100644
index 1515ba74f..000000000
--- a/test/test_evaluation/test_fit_evaluator.py
+++ /dev/null
@@ -1,206 +0,0 @@
-import multiprocessing
-import os
-import queue
-import shutil
-import sys
-import unittest
-import unittest.mock
-
-from ConfigSpace import Configuration
-
-import numpy as np
-
-from sklearn.base import BaseEstimator
-
-from smac.tae import StatusType
-
-from autoPyTorch.automl_common.common.utils.backend import create
-from autoPyTorch.datasets.resampling_strategy import NoResamplingStrategyTypes
-from autoPyTorch.evaluation.fit_evaluator import FitEvaluator
-from autoPyTorch.evaluation.utils import read_queue
-from autoPyTorch.pipeline.base_pipeline import BasePipeline
-from autoPyTorch.pipeline.components.training.metrics.metrics import accuracy
-
-this_directory = os.path.dirname(__file__)
-sys.path.append(this_directory)
-from evaluation_util import (  # noqa (E402: module level import not at top of file)
-    BaseEvaluatorTest,
-    get_binary_classification_datamanager,
-    get_multiclass_classification_datamanager,
-    get_regression_datamanager,
-)  # noqa (E402: module level import not at top of file)
-
-
-class BackendMock(object):
-    def load_datamanager(self):
-        return get_multiclass_classification_datamanager()
-
-
-class Dummy(object):
-    def __init__(self):
-        self.name = 'dummy'
-
-
-class DummyPipeline(BasePipeline):
-    def __init__(self):
-        mocked_estimator = unittest.mock.Mock(spec=BaseEstimator)
-        self.steps = [('MockStep', mocked_estimator)]
-        pass
-
-    def predict_proba(self, X, batch_size=None):
-        return np.tile([0.6, 0.4], (len(X), 1))
-
-    def get_additional_run_info(self):
-        return {}
-
-
-class TestFitEvaluator(BaseEvaluatorTest, unittest.TestCase):
-    _multiprocess_can_split_ = True
-
-    def setUp(self):
-        """
-        Creates a backend mock
-        """
-        tmp_dir_name = self.id()
-        self.ev_path = os.path.join(this_directory, '.tmp_evaluations', tmp_dir_name)
-        if os.path.exists(self.ev_path):
-            shutil.rmtree(self.ev_path)
-        os.makedirs(self.ev_path, exist_ok=False)
-        dummy_model_files = [os.path.join(self.ev_path, str(n)) for n in range(100)]
-        dummy_pred_files = [os.path.join(self.ev_path, str(n)) for n in range(100, 200)]
-        dummy_cv_model_files = [os.path.join(self.ev_path, str(n)) for n in range(200, 300)]
-        backend_mock = unittest.mock.Mock()
-        backend_mock.get_model_dir.return_value = self.ev_path
-        backend_mock.get_cv_model_dir.return_value = self.ev_path
-        backend_mock.get_model_path.side_effect = dummy_model_files
-        backend_mock.get_cv_model_path.side_effect = dummy_cv_model_files
-        backend_mock.get_prediction_output_path.side_effect = dummy_pred_files
-        backend_mock.temporary_directory = self.ev_path
-        self.backend_mock = backend_mock
-
-        self.tmp_dir = os.path.join(self.ev_path, 'tmp_dir')
-        self.output_dir = os.path.join(self.ev_path, 'out_dir')
-
-    def tearDown(self):
-        if os.path.exists(self.ev_path):
-            shutil.rmtree(self.ev_path)
-
-    @unittest.mock.patch('autoPyTorch.pipeline.tabular_classification.TabularClassificationPipeline')
-    def test_no_resampling(self, pipeline_mock):
-        # Binary iris, contains 69 train samples, 31 test samples
-        D = get_binary_classification_datamanager(NoResamplingStrategyTypes.no_resampling)
-        pipeline_mock.predict_proba.side_effect = \
-            lambda X, batch_size=None: np.tile([0.6, 0.4], (len(X), 1))
-        pipeline_mock.side_effect = lambda **kwargs: pipeline_mock
-        pipeline_mock.get_additional_run_info.return_value = None
-        pipeline_mock.get_default_pipeline_options.return_value = {'budget_type': 'epochs', 'epochs': 10}
-
-        configuration = unittest.mock.Mock(spec=Configuration)
-        backend_api = create(self.tmp_dir, self.output_dir, 'autoPyTorch')
-        backend_api.load_datamanager = lambda: D
-        queue_ = multiprocessing.Queue()
-
-        evaluator = FitEvaluator(backend_api, queue_, configuration=configuration, metric=accuracy, budget=0)
-        evaluator.file_output = unittest.mock.Mock(spec=evaluator.file_output)
-        evaluator.file_output.return_value = (None, {})
-
-        evaluator.fit_predict_and_loss()
-
-        rval = read_queue(evaluator.queue)
-        self.assertEqual(len(rval), 1)
-        result = rval[0]['loss']
-        self.assertEqual(len(rval[0]), 3)
-        self.assertRaises(queue.Empty, evaluator.queue.get, timeout=1)
-
-        self.assertEqual(evaluator.file_output.call_count, 1)
-        self.assertEqual(result, 0.5806451612903225)
-        self.assertEqual(pipeline_mock.fit.call_count, 1)
-        # 2 calls because of train and test set
-        self.assertEqual(pipeline_mock.predict_proba.call_count, 2)
-        self.assertEqual(evaluator.file_output.call_count, 1)
-        # Should be none as no val preds are mentioned
-        self.assertIsNone(evaluator.file_output.call_args[0][0])
-        # Number of y_test_preds and Y_test should be the same
-        self.assertEqual(evaluator.file_output.call_args[0][2].shape[0],
-                         D.test_tensors[1].shape[0])
-        self.assertEqual(evaluator.pipeline.fit.call_count, 1)
-
-    @unittest.mock.patch.object(FitEvaluator, '_loss')
-    def test_file_output(self, loss_mock):
-
-        D = get_regression_datamanager(NoResamplingStrategyTypes.no_resampling)
-        D.name = 'test'
-        self.backend_mock.load_datamanager.return_value = D
-        configuration = unittest.mock.Mock(spec=Configuration)
-        queue_ = multiprocessing.Queue()
-        loss_mock.return_value = None
-
-        evaluator = FitEvaluator(self.backend_mock, queue_, configuration=configuration, metric=accuracy, budget=0)
-
-        self.backend_mock.get_model_dir.return_value = True
-        evaluator.pipeline = 'model'
-        evaluator.Y_optimization = D.train_tensors[1]
-        rval = evaluator.file_output(
-            D.train_tensors[1],
-            None,
-            D.test_tensors[1],
-        )
-
-        self.assertEqual(rval, (None, {}))
-        # These targets are not saved as Fit evaluator is not used to make an ensemble
-        self.assertEqual(self.backend_mock.save_targets_ensemble.call_count, 0)
-        self.assertEqual(self.backend_mock.save_numrun_to_dir.call_count, 1)
-        self.assertEqual(self.backend_mock.save_numrun_to_dir.call_args_list[-1][1].keys(),
-                         {'seed', 'idx', 'budget', 'model', 'cv_model',
-                          'ensemble_predictions', 'valid_predictions', 'test_predictions'})
-        self.assertIsNotNone(self.backend_mock.save_numrun_to_dir.call_args_list[-1][1]['model'])
-        self.assertIsNone(self.backend_mock.save_numrun_to_dir.call_args_list[-1][1]['cv_model'])
-
-        # Check for not containing NaNs - that the models don't predict nonsense
-        # for unseen data
-        D.test_tensors[1][0] = np.NaN
-        rval = evaluator.file_output(
-            D.train_tensors[1],
-            None,
-            D.test_tensors[1],
-        )
-        self.assertEqual(
-            rval,
-            (
-                1.0,
-                {
-                    'error':
-                    'Model predictions for test set contains NaNs.'
-                },
-            )
-        )
-
-    @unittest.mock.patch('autoPyTorch.pipeline.tabular_classification.TabularClassificationPipeline')
-    def test_predict_proba_binary_classification(self, mock):
-        D = get_binary_classification_datamanager(NoResamplingStrategyTypes.no_resampling)
-        self.backend_mock.load_datamanager.return_value = D
-        mock.predict_proba.side_effect = lambda y, batch_size=None: np.array(
-            [[0.1, 0.9]] * y.shape[0]
-        )
-        mock.side_effect = lambda **kwargs: mock
-        mock.get_default_pipeline_options.return_value = {'budget_type': 'epochs', 'epochs': 10}
-        configuration = unittest.mock.Mock(spec=Configuration)
-        queue_ = multiprocessing.Queue()
-
-        evaluator = FitEvaluator(self.backend_mock, queue_, configuration=configuration, metric=accuracy, budget=0)
-
-        evaluator.fit_predict_and_loss()
-        Y_test_pred = self.backend_mock.save_numrun_to_dir.call_args_list[0][1][
-            'test_predictions']
-
-        for i in range(7):
-            self.assertEqual(0.9, Y_test_pred[i][1])
-
-    def test_get_results(self):
-        queue_ = multiprocessing.Queue()
-        for i in range(5):
-            queue_.put((i * 1, 1 - (i * 0.2), 0, "", StatusType.SUCCESS))
-        result = read_queue(queue_)
-        self.assertEqual(len(result), 5)
-        self.assertEqual(result[0][0], 0)
-        self.assertAlmostEqual(result[0][1], 1.0)
diff --git a/test/test_pipeline/components/preprocessing/test_encoders.py b/test/test_pipeline/components/preprocessing/test_encoders.py
index ac796291c..a901823ba 100644
--- a/test/test_pipeline/components/preprocessing/test_encoders.py
+++ b/test/test_pipeline/components/preprocessing/test_encoders.py
@@ -10,8 +10,6 @@
 from autoPyTorch.pipeline.components.preprocessing.tabular_preprocessing.encoding.OneHotEncoder import OneHotEncoder
 
 
-# TODO: fix in preprocessing PR
-@unittest.skip("Skipping tests as preprocessing is not finalised")
 class TestEncoders(unittest.TestCase):
 
     def test_one_hot_encoder_no_unknown(self):
diff --git a/test/test_pipeline/components/preprocessing/test_imputers.py b/test/test_pipeline/components/preprocessing/test_imputers.py
index d2de6d7d3..18b43bfa6 100644
--- a/test/test_pipeline/components/preprocessing/test_imputers.py
+++ b/test/test_pipeline/components/preprocessing/test_imputers.py
@@ -11,8 +11,6 @@
 from autoPyTorch.pipeline.components.preprocessing.tabular_preprocessing.imputation.SimpleImputer import SimpleImputer
 
 
-# TODO: fix in preprocessing PR
-@unittest.skip("Skipping tests as preprocessing is not finalised")
 class TestSimpleImputer(unittest.TestCase):
 
     def test_get_config_space(self):
diff --git a/test/test_pipeline/components/preprocessing/test_scalers.py b/test/test_pipeline/components/preprocessing/test_scalers.py
index cd41308fa..94ba0f2dc 100644
--- a/test/test_pipeline/components/preprocessing/test_scalers.py
+++ b/test/test_pipeline/components/preprocessing/test_scalers.py
@@ -12,8 +12,6 @@
 from autoPyTorch.pipeline.components.preprocessing.tabular_preprocessing.scaling.StandardScaler import StandardScaler
 
 
-# TODO: fix in preprocessing PR
-@unittest.skip("Skipping tests as preprocessing is not finalised")
 class TestNormalizer(unittest.TestCase):
 
     def test_l2_norm(self):
@@ -131,8 +129,6 @@ def test_max_norm(self):
                                                [0.84615385, 0.92307692, 1]]))
 
 
-# TODO: fix in preprocessing PR
-@unittest.skip("Skipping tests as preprocessing is not finalised")
 class TestMinMaxScaler(unittest.TestCase):
 
     def test_minmax_scaler(self):
@@ -174,8 +170,6 @@ def test_minmax_scaler(self):
                                                [0.76923077, 0.76923077, 0.76923077]]))
 
 
-# TODO: fix in preprocessing PR
-@unittest.skip("Skipping tests as preprocessing is not finalised")
 class TestStandardScaler(unittest.TestCase):
 
     def test_standard_scaler(self):
@@ -218,8 +212,6 @@ def test_standard_scaler(self):
                                                [0.8396642, 0.8396642, 0.8396642]]))
 
 
-# TODO: fix in preprocessing PR
-@unittest.skip("Skipping tests as preprocessing is not finalised")
 class TestNoneScaler(unittest.TestCase):
 
     def test_none_scaler(self):
diff --git a/test/test_pipeline/components/preprocessing/test_tabular_column_transformer.py b/test/test_pipeline/components/preprocessing/test_tabular_column_transformer.py
index d7a59383c..c4d8ccd50 100644
--- a/test/test_pipeline/components/preprocessing/test_tabular_column_transformer.py
+++ b/test/test_pipeline/components/preprocessing/test_tabular_column_transformer.py
@@ -14,13 +14,14 @@
 
 
 # TODO: fix in preprocessing PR
-@pytest.mark.skip("Skipping tests as preprocessing is not finalised")
+# @pytest.mark.skip("Skipping tests as preprocessing is not finalised")
 @pytest.mark.parametrize("fit_dictionary_tabular", ['classification_numerical_only',
                                                     'classification_categorical_only',
                                                     'classification_numerical_and_categorical'], indirect=True)
 class TestTabularTransformer:
     def test_tabular_preprocess(self, fit_dictionary_tabular):
         pipeline = TabularPipeline(dataset_properties=fit_dictionary_tabular['dataset_properties'])
+        X_train = fit_dictionary_tabular['X_train'].copy()
         pipeline = pipeline.fit(fit_dictionary_tabular)
         X = pipeline.transform(fit_dictionary_tabular)
         column_transformer = X['tabular_transformer']
@@ -32,17 +33,17 @@ def test_tabular_preprocess(self, fit_dictionary_tabular):
         # as the later is not callable and runs into error in the compose transform
         assert isinstance(column_transformer, TabularColumnTransformer)
 
-        data = column_transformer.preprocessor.fit_transform(X['X_train'])
+        data = column_transformer.preprocessor.fit_transform(X_train)
         assert isinstance(data, np.ndarray)
 
         # Make sure no columns are unintentionally dropped after preprocessing
         if len(fit_dictionary_tabular['dataset_properties']["numerical_columns"]) == 0:
             categorical_pipeline = column_transformer.preprocessor.named_transformers_['categorical_pipeline']
-            categorical_data = categorical_pipeline.transform(X['X_train'])
+            categorical_data = categorical_pipeline.transform(X_train)
             assert data.shape[1] == categorical_data.shape[1]
         elif len(fit_dictionary_tabular['dataset_properties']["categorical_columns"]) == 0:
             numerical_pipeline = column_transformer.preprocessor.named_transformers_['numerical_pipeline']
-            numerical_data = numerical_pipeline.transform(X['X_train'])
+            numerical_data = numerical_pipeline.transform(X_train)
             assert data.shape[1] == numerical_data.shape[1]
 
     def test_sparse_data(self, fit_dictionary_tabular):
diff --git a/test/test_pipeline/components/training/test_feature_data_loader.py b/test/test_pipeline/components/training/test_feature_data_loader.py
index 7d4c9d80d..7e97494a4 100644
--- a/test/test_pipeline/components/training/test_feature_data_loader.py
+++ b/test/test_pipeline/components/training/test_feature_data_loader.py
@@ -9,13 +9,13 @@
 
 
 class TestFeatureDataLoader(unittest.TestCase):
-    def test_build_transform_small_preprocess_true(self):
+    def test_build_transform(self):
         """
         Makes sure a proper composition is created
         """
         loader = FeatureDataLoader()
 
-        fit_dictionary = {'dataset_properties': {'is_small_preprocess': True}}
+        fit_dictionary = {'dataset_properties': {}}
         for thing in ['imputer', 'scaler', 'encoder']:
             fit_dictionary[thing] = [unittest.mock.Mock()]
 
@@ -25,19 +25,3 @@ def test_build_transform_small_preprocess_true(self):
 
         # No preprocessing needed here as it was done before
         self.assertEqual(len(compose.transforms), 1)
-
-    def test_build_transform_small_preprocess_false(self):
-        """
-        Makes sure a proper composition is created
-        """
-        loader = FeatureDataLoader()
-
-        fit_dictionary = {'dataset_properties': {'is_small_preprocess': False},
-                          'preprocess_transforms': [unittest.mock.Mock()]}
-
-        compose = loader.build_transform(fit_dictionary, mode='train')
-
-        self.assertIsInstance(compose, torchvision.transforms.Compose)
-
-        # We expect the to tensor, the preproces transforms and the check_array
-        self.assertEqual(len(compose.transforms), 4)
diff --git a/test/test_pipeline/components/training/test_training.py b/test/test_pipeline/components/training/test_training.py
index 034ea71d7..5b7be7f49 100644
--- a/test/test_pipeline/components/training/test_training.py
+++ b/test/test_pipeline/components/training/test_training.py
@@ -93,12 +93,6 @@ def test_check_requirements(self):
                                     'backend is needed to load the data from'):
             loader.fit(fit_dictionary)
 
-        # Then the is small fit
-        fit_dictionary.update({'backend': unittest.mock.Mock()})
-        with self.assertRaisesRegex(ValueError,
-                                    'is_small_pre-process is required to know if th'):
-            loader.fit(fit_dictionary)
-
     def test_fit_transform(self):
         """ Makes sure that fit and transform work as intended """
         backend = unittest.mock.Mock()