development/doxygen/analysis_2scripts_2grafei_2model_2config_8py_source.html

import basf2

import yaml

import collections.abc


def _update_config_dict(d, u):

    """

    Updates the config dictionary.


    .. seealso:: Need a

    `recursive solution <https://stackoverflow.com/questions/3232943/update-value-of-a-nested-dictionary-of-varying-depth>`_

    because it's a nested dict of varying depth.

    Args:

        d (dict): Dictionary to update.

        u (dict): Dictionary of configs.


    Returns:

        dict: Updated dictionary of configs.

    """

    for k, v in u.items():

        if isinstance(v, collections.abc.Mapping):

            d[k] = _update_config_dict(d.get(k, {}), v)

        else:

            d[k] = v

    return d


def load_config(cfg_path=None, model=None, dataset=None, run_name=None, samples=None, **kwargs):

    """

    Load default configs followed by user configs and populate dataset tags.


    Args:

        cfg_path(str or Path): Path to user config yaml.

        model(str): Name of model to use (overwrites loaded config).

        dataset(int): Individual dataset to load (overwrites loaded config).

        run_name(str): Name of training run (overwrites loaded config).

        samples(int): Number of samples to train on (overwrites loaded config).


    Returns:

        dict, list: Loaded training configuration dictionary

        and list of tuples containing (tag name, dataset path, tag key).

    """


    # First load default configs

    with open(basf2.find_file('data/analysis/grafei_config.yaml')) as file:

        configs = yaml.safe_load(file)


    # Load user configs if defined, overwriting defaults

    if cfg_path is not None:

        with open(cfg_path) as file:

            # Approach if configs was not a nested dict

            # configs.update(yaml.safe_load(file))

            # Use custom update function for nested dict

            configs = _update_config_dict(configs, yaml.safe_load(file))


    # Overwrite model architecture if specified in command line

    if model is not None:

        configs['train']['model'] = model

    # Set datasets to load, or default to all if unset

    if (dataset is not None) or ('datasets' not in configs['dataset']):

        configs['dataset']['datasets'] = dataset

    # And run name

    if run_name is not None:

        configs['output']['run_name'] = run_name


    # Finally, generate the dataset tags

    tags = _generate_dataset_tags(configs, samples)


    return configs, tags


def _generate_dataset_tags(configs, samples=None):

    """

    Generate the different dataset tags and assign their file paths.

    This helps us keep track of the train/val datasets throughout training and evaluation.


    Args:

        config (dict): Training configuration dictionary.

        samples (dict): Number of training samples.


    Returns:

        list: List of tuples containing (tag name, dataset path, tag key).

    """

    # Fetch whichever data source we're loading

    source_confs = configs['dataset']


    # Set up appropriate dataset tags

    tags = [

        ("Training", source_confs['path'], 'train'),

        ("Validation", source_confs['path'], 'val'),

    ]


    # And overwrite any data source specific configs

    source_confs['config']['samples'] = samples


    return tags


collections.abc

dataset
Definition: dataset.py:1