diff --git a/d2go/config/config.py b/d2go/config/config.py index bcc601d7..3cc896ce 100644 --- a/d2go/config/config.py +++ b/d2go/config/config.py @@ -8,16 +8,14 @@ import mock import yaml -from d2go.registry.builtin import CONFIG_UPDATER_REGISTRY from detectron2.config import CfgNode as _CfgNode from fvcore.common.registry import Registry -from .utils import reroute_config_path +from .utils import reroute_config_path, resolve_default_config logger = logging.getLogger(__name__) CONFIG_CUSTOM_PARSE_REGISTRY = Registry("CONFIG_CUSTOM_PARSE") -DEFAULTS_GENERATOR_KEY = "_DEFAULTS_" def _opts_to_dict(opts: List[str]): @@ -84,7 +82,7 @@ def _run_custom_processing(self, is_dump=False): def get_default_cfg(self): """Return the defaults for this instance of CfgNode""" - return _resolve_default_config(self) + return resolve_default_config(self) @contextlib.contextmanager @@ -175,29 +173,6 @@ def auto_scale_world_size(cfg, new_world_size): logger.info("Auto-scaled the config according to the actual world size: \n" + table) -def _resolve_default_config(cfg: CfgNode) -> CfgNode: - if DEFAULTS_GENERATOR_KEY not in cfg: - raise ValueError( - f"Can't resolved default config because `{DEFAULTS_GENERATOR_KEY}` is" - f" missing from cfg: \n{cfg}" - ) - - updater_names: List[str] = cfg[DEFAULTS_GENERATOR_KEY] - assert isinstance(updater_names, list), updater_names - assert [isinstance(x, str) for x in updater_names], updater_names - - # starting from a empty CfgNode, sequentially apply the generator - cfg = CfgNode() - for name in updater_names: - updater = CONFIG_UPDATER_REGISTRY.get(name) - cfg = updater(cfg) - - # the resolved default config should keep the same default generator - cfg[DEFAULTS_GENERATOR_KEY] = updater_names - - return cfg - - def load_full_config_from_file(filename: str) -> CfgNode: loaded_cfg = CfgNode.load_yaml_with_base(filename) loaded_cfg = CfgNode(loaded_cfg) # cast Dict to CfgNode diff --git a/d2go/config/utils.py b/d2go/config/utils.py index 6bfa3ce6..76dd6999 100644 --- a/d2go/config/utils.py +++ b/d2go/config/utils.py @@ -1,13 +1,19 @@ #!/usr/bin/env python3 # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved +import logging import os from enum import Enum from typing import Any, Dict, List import pkg_resources +from d2go.registry.builtin import CONFIG_UPDATER_REGISTRY from mobile_cv.common.misc.oss_utils import fb_overwritable +logger = logging.getLogger(__name__) + +DEFAULTS_KEY = "_DEFAULTS_" + @fb_overwritable() def reroute_config_path(path: str) -> str: @@ -189,7 +195,18 @@ def get_diff_cfg_rec(old_cfg, new_cfg, out): return out out = new_cfg.__class__() - return get_diff_cfg_rec(old_cfg, new_cfg, out) + diff_cfg = get_diff_cfg_rec(old_cfg, new_cfg, out) + + # Keep the `_DEFAULTS_` even though they should be the same + old_defaults = old_cfg.get(DEFAULTS_KEY, None) + new_defaults = new_cfg.get(DEFAULTS_KEY, None) + assert ( + old_defaults == new_defaults + ), f"{DEFAULTS_KEY} doesn't match! old ({old_defaults}) vs new ({new_defaults})" + if new_defaults is not None: + diff_cfg[DEFAULTS_KEY] = new_defaults + + return diff_cfg def namedtuple_to_dict(obj: Any): @@ -202,3 +219,27 @@ def namedtuple_to_dict(obj: Any): else: res[k] = v return res + + +def resolve_default_config(cfg): + if DEFAULTS_KEY not in cfg: + raise ValueError( + f"Can't resolved default config because `{DEFAULTS_KEY}` is" + f" missing from cfg: \n{cfg}" + ) + + updater_names: List[str] = cfg[DEFAULTS_KEY] + assert isinstance(updater_names, list), updater_names + assert [isinstance(x, str) for x in updater_names], updater_names + + logger.info(f"Resolving default config by applying updaters: {updater_names} ...") + # starting from a empty CfgNode, sequentially apply the generator + cfg = type(cfg)() + for name in updater_names: + updater = CONFIG_UPDATER_REGISTRY.get(name) + cfg = updater(cfg) + + # the resolved default config should keep the same default generator + cfg[DEFAULTS_KEY] = updater_names + + return cfg diff --git a/d2go/quantization/modeling.py b/d2go/quantization/modeling.py index 3db95c16..4250b203 100644 --- a/d2go/quantization/modeling.py +++ b/d2go/quantization/modeling.py @@ -10,6 +10,7 @@ import detectron2.utils.comm as comm import torch from d2go.quantization import learnable_qat +from d2go.registry.builtin import CONFIG_UPDATER_REGISTRY from detectron2.checkpoint import DetectionCheckpointer from detectron2.engine import HookBase, SimpleTrainer from mobile_cv.arch.quantization.observer import update_stat as observer_update_stat @@ -79,6 +80,7 @@ def _load_model(self, checkpoint): return super()._load_model(checkpoint) +@CONFIG_UPDATER_REGISTRY.register("core:quantization") def add_quantization_default_configs(_C): CfgNode = type(_C) _C.QUANTIZATION = CfgNode() @@ -136,6 +138,8 @@ def add_quantization_default_configs(_C): _C.register_deprecated_key("QUANTIZATION.SILICON_QAT") _C.register_deprecated_key("QUANTIZATION.SILICON_QAT.ENABLED") + return _C + # TODO: model.to(device) might not work for detection meta-arch, this function is the # workaround, in general, we might need a meta-arch API for this if needed. diff --git a/d2go/runner/debug_runner.py b/d2go/runner/debug_runner.py index 8c478fe1..d695a7ed 100644 --- a/d2go/runner/debug_runner.py +++ b/d2go/runner/debug_runner.py @@ -8,21 +8,30 @@ import torch import torch.nn as nn from d2go.quantization.modeling import QATCheckpointer +from d2go.registry.builtin import CONFIG_UPDATER_REGISTRY from d2go.runner.default_runner import BaseRunner -from d2go.utils.get_default_cfg import add_tensorboard_default_configs +from d2go.runner.defaults import ( + add_base_runner_default_cfg, + add_tensorboard_default_configs, +) from detectron2.utils.file_io import PathManager -class DebugRunner(BaseRunner): - def get_default_cfg(self): - _C = super().get_default_cfg() +@CONFIG_UPDATER_REGISTRY.register("DebugRunner") +def add_debug_runner_default_cfg(cfg): + assert len(cfg) == 0, "start from scratch, but previous cfg is non-empty!" + _C = add_base_runner_default_cfg(cfg) + + # _C.TENSORBOARD... + add_tensorboard_default_configs(_C) - # _C.TENSORBOARD... - add_tensorboard_default_configs(_C) + # target metric + _C.TEST.TARGET_METRIC = "dataset0:dummy0:metric1" + return _C - # target metric - _C.TEST.TARGET_METRIC = "dataset0:dummy0:metric1" - return _C + +class DebugRunner(BaseRunner): + get_default_cfg = None def build_model(self, cfg, eval_only=False): return nn.Sequential() diff --git a/d2go/runner/default_runner.py b/d2go/runner/default_runner.py index 03f9b6d8..96e574d5 100644 --- a/d2go/runner/default_runner.py +++ b/d2go/runner/default_runner.py @@ -27,9 +27,13 @@ from d2go.modeling.model_freezing_utils import freeze_matched_bn, set_requires_grad from d2go.optimizer import build_optimizer_mapper from d2go.quantization.modeling import QATCheckpointer, QATHook, setup_qat_model +from d2go.runner.defaults import ( + add_base_runner_default_cfg, + add_detectron2go_runner_default_cfg, + add_generalized_rcnn_runner_default_cfg, +) from d2go.runner.training_hooks import update_hooks_from_registry from d2go.utils.flop_calculator import attach_profilers -from d2go.utils.get_default_cfg import get_default_cfg from d2go.utils.helper import D2Trainer, TensorboardXWriter from d2go.utils.misc import get_tensorboard_log_dir from d2go.utils.visualization import DataLoaderVisWrapper, VisualizationEvaluator @@ -129,11 +133,6 @@ def default_scale_quantization_configs(cfg, new_world_size): ) -@fb_overwritable() -def add_fb_base_runner_default_configs(cfg: CfgNode) -> CfgNode: - return cfg - - @fb_overwritable() def prepare_fb_model(cfg: CfgNode, model: torch.nn.Module) -> torch.nn.Module: return model @@ -162,19 +161,7 @@ def register(self, cfg): @staticmethod def get_default_cfg(): - """ - Override `get_default_cfg` for adding non common config. - """ - from detectron2.config import get_cfg as get_d2_cfg - - cfg = get_d2_cfg() - cfg = CfgNode.cast_from_other_class( - cfg - ) # upgrade from D2's CfgNode to D2Go's CfgNode - - cfg.SOLVER.AUTO_SCALING_METHODS = ["default_scale_d2_configs"] - - return cfg + return add_base_runner_default_cfg(CfgNode()) def build_model(self, cfg, eval_only=False): # cfg may need to be reused to build trace model again, thus clone @@ -213,12 +200,7 @@ def register(self, cfg): @staticmethod def get_default_cfg(): - cfg = super(Detectron2GoRunner, Detectron2GoRunner).get_default_cfg() - - cfg.PROFILERS = ["default_flop_counter"] - cfg = add_fb_base_runner_default_configs(cfg) - - return get_default_cfg(cfg) + return add_detectron2go_runner_default_cfg(CfgNode()) # temporary API def _build_model(self, cfg, eval_only=False): @@ -618,24 +600,7 @@ def _create_qat_hook(self, cfg) -> Optional[QATHook]: return QATHook(cfg, self.build_detection_train_loader) -def _add_rcnn_default_config(_C): - _C.EXPORT_CAFFE2 = CfgNode() - _C.EXPORT_CAFFE2.USE_HEATMAP_MAX_KEYPOINT = False - - # Options about how to export the model - _C.RCNN_EXPORT = CfgNode() - # whether or not to include the postprocess (GeneralizedRCNN._postprocess) step - # inside the exported model - _C.RCNN_EXPORT.INCLUDE_POSTPROCESS = False - - _C.RCNN_PREPARE_FOR_EXPORT = "default_rcnn_prepare_for_export" - _C.RCNN_PREPARE_FOR_QUANT = "default_rcnn_prepare_for_quant" - _C.RCNN_PREPARE_FOR_QUANT_CONVERT = "default_rcnn_prepare_for_quant_convert" - - class GeneralizedRCNNRunner(Detectron2GoRunner): @staticmethod def get_default_cfg(): - _C = super(GeneralizedRCNNRunner, GeneralizedRCNNRunner).get_default_cfg() - _add_rcnn_default_config(_C) - return _C + return add_generalized_rcnn_runner_default_cfg(CfgNode()) diff --git a/d2go/utils/get_default_cfg.py b/d2go/runner/defaults.py similarity index 62% rename from d2go/utils/get_default_cfg.py rename to d2go/runner/defaults.py index e71b0407..70f5cdd0 100644 --- a/d2go/utils/get_default_cfg.py +++ b/d2go/runner/defaults.py @@ -1,6 +1,8 @@ #!/usr/bin/env python3 # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved -from d2go.config import CfgNode as CN + + +from d2go.config import CfgNode, CfgNode as CN from d2go.data.build import ( add_random_subset_training_sampler_default_configs, add_weighted_training_sampler_default_configs, @@ -12,10 +14,30 @@ from d2go.modeling.model_freezing_utils import add_model_freezing_configs from d2go.modeling.subclass import add_subclass_configs from d2go.quantization.modeling import add_quantization_default_configs +from d2go.registry.builtin import CONFIG_UPDATER_REGISTRY +from detectron2.config import get_cfg as get_d2_cfg from mobile_cv.common.misc.oss_utils import fb_overwritable -@fb_overwritable() +@CONFIG_UPDATER_REGISTRY.register("BaseRunner") +def add_base_runner_default_cfg(cfg): + assert len(cfg) == 0, "start from scratch, but previous cfg is non-empty!" + + cfg = get_d2_cfg() + # upgrade from D2's CfgNode to D2Go's CfgNode + cfg = CfgNode.cast_from_other_class(cfg) + + cfg.SOLVER.AUTO_SCALING_METHODS = ["default_scale_d2_configs"] + + # Set find_unused_parameters for DistributedDataParallel. + cfg.MODEL.DDP_FIND_UNUSED_PARAMETERS = False + # Set FP16 gradient compression for DistributedDataParallel. + cfg.MODEL.DDP_FP16_GRAD_COMPRESS = False + + return cfg + + +@CONFIG_UPDATER_REGISTRY.register("core:tensorboard") def add_tensorboard_default_configs(_C): _C.TENSORBOARD = CN() # Output from dataloader will be written to tensorboard at this frequency @@ -29,17 +51,22 @@ def add_tensorboard_default_configs(_C): # TENSORBOARD.LOG_DIR will be determined solely by OUTPUT_DIR _C.register_deprecated_key("TENSORBOARD.LOG_DIR") + return _C + -@fb_overwritable() def add_abnormal_checker_configs(_C): _C.ABNORMAL_CHECKER = CN() # check and log the iteration with bad losses if enabled _C.ABNORMAL_CHECKER.ENABLED = False -@fb_overwritable() -def get_default_cfg(_C): - # _C.MODEL.FBNET... +@CONFIG_UPDATER_REGISTRY.register("Detectron2GoRunner") +def add_detectron2go_runner_default_cfg(cfg): + assert len(cfg) == 0, "start from scratch, but previous cfg is non-empty!" + + _C = add_base_runner_default_cfg(cfg) + + # _C.MODEL.FBNET_V2... add_fbnet_v2_default_configs(_C) # _C.MODEL.FROZEN_LAYER_REG_EXP add_model_freezing_configs(_C) @@ -64,11 +91,6 @@ def get_default_cfg(_C): # _C.MODEL.FCOS add_fcos_configs(_C) - # Set find_unused_parameters for DistributedDataParallel. - _C.MODEL.DDP_FIND_UNUSED_PARAMETERS = False - # Set FP16 gradient compression for DistributedDataParallel. - _C.MODEL.DDP_FP16_GRAD_COMPRESS = False - # Set default optimizer _C.SOLVER.OPTIMIZER = "sgd" _C.SOLVER.LR_MULTIPLIER_OVERWRITE = [] @@ -94,4 +116,40 @@ def get_default_cfg(_C): # List of modeling hook names _C.MODEL.MODELING_HOOKS = [] + # Profiler + _C.PROFILERS = ["default_flop_counter"] + + # Add FB specific configs + _add_detectron2go_runner_default_fb_cfg(_C) + + return _C + + +@fb_overwritable() +def _add_detectron2go_runner_default_fb_cfg(_C): + return _C + + +def _add_rcnn_default_config(_C): + _C.EXPORT_CAFFE2 = CfgNode() + _C.EXPORT_CAFFE2.USE_HEATMAP_MAX_KEYPOINT = False + + # Options about how to export the model + _C.RCNN_EXPORT = CfgNode() + # whether or not to include the postprocess (GeneralizedRCNN._postprocess) step + # inside the exported model + _C.RCNN_EXPORT.INCLUDE_POSTPROCESS = False + + _C.RCNN_PREPARE_FOR_EXPORT = "default_rcnn_prepare_for_export" + _C.RCNN_PREPARE_FOR_QUANT = "default_rcnn_prepare_for_quant" + _C.RCNN_PREPARE_FOR_QUANT_CONVERT = "default_rcnn_prepare_for_quant_convert" + + +@CONFIG_UPDATER_REGISTRY.register("GeneralizedRCNNRunner") +def add_generalized_rcnn_runner_default_cfg(cfg): + assert len(cfg) == 0, "start from scratch, but previous cfg is non-empty!" + + _C = add_detectron2go_runner_default_cfg(cfg) + _add_rcnn_default_config(_C) + return _C diff --git a/d2go/setup.py b/d2go/setup.py index 58dc37f9..ef626a14 100644 --- a/d2go/setup.py +++ b/d2go/setup.py @@ -13,6 +13,7 @@ from d2go.config import ( auto_scale_world_size, CfgNode, + load_full_config_from_file, reroute_config_path, temp_defrost, ) @@ -165,15 +166,22 @@ def prepare_for_launch(args): logger.info(args) runner = create_runner(args.runner) - cfg = runner.get_default_cfg() + # `Runner::get_default_cfg` will be deprecated, during the transition, the `Runner` + # class should declare the deprecation by setting `get_default_cfg = None`. + if runner.get_default_cfg is not None: + cfg = runner.get_default_cfg() - if args.config_file: - with PathManager.open(reroute_config_path(args.config_file), "r") as f: - print("Loaded config file {}:\n{}".format(args.config_file, f.read())) - cfg.merge_from_file(args.config_file) - cfg.merge_from_list(args.opts) + if args.config_file: + with PathManager.open(reroute_config_path(args.config_file), "r") as f: + print("Loaded config file {}:\n{}".format(args.config_file, f.read())) + cfg.merge_from_file(args.config_file) + cfg.merge_from_list(args.opts) + else: + cfg = create_cfg_from_cli_args(args, default_cfg=cfg) else: - cfg = create_cfg_from_cli_args(args, default_cfg=cfg) + cfg = load_full_config_from_file(reroute_config_path(args.config_file)) + cfg.merge_from_list(args.opts) + cfg.freeze() assert args.output_dir or args.config_file @@ -222,7 +230,7 @@ def setup_after_launch( logger.info("Running with runner: {}".format(runner)) # save the diff config - if runner: + if runner and runner.get_default_cfg is not None: default_cfg = runner.get_default_cfg() dump_cfg( get_diff_cfg(default_cfg, cfg), @@ -230,6 +238,7 @@ def setup_after_launch( ) else: # TODO: support getting default_cfg without runner. + # TODO: support lightning task pass # scale the config after dumping so that dumped config files keep original world size