Processes

`behavysis_pipeline.processes.CalculateParams` ¶

summary

Source code in behavysis_pipeline/processes/calculate_params.py

class CalculateParams:
    """__summary__"""

    @staticmethod
    def start_frame(
        dlc_fp: str,
        configs_fp: str,
    ) -> str:
        """
        Determine the starting frame of the experiment based on when the subject "likely" entered
        the footage.

        This is done by looking at a sliding window of time. If the median likelihood of the subject
        existing in each frame across the sliding window is greater than the defined pcutoff, then
        the determine this as the start time.

        Notes
        -----
        The config file must contain the following parameters:
        ```
        - user
            - calculate_params
                - start_frame
                    - window_sec: float
                    - pcutoff: float
        ```
        """
        outcome = ""
        # Getting necessary config parameters
        configs = ExperimentConfigs.read_json(configs_fp)
        configs_filt = Model_check_exists(**configs.user.calculate_params.start_frame)
        bpts = configs.get_ref(configs_filt.bodyparts)
        window_sec = configs.get_ref(configs_filt.window_sec)
        pcutoff = configs.get_ref(configs_filt.pcutoff)
        fps = configs.auto.formatted_vid.fps
        # Asserting that the necessary auto configs are valid
        assert fps is not None, "fps is None. Please calculate fps first."
        # Deriving more parameters
        window_frames = int(np.round(fps * window_sec, 0))
        # Loading dataframe
        dlc_df = KeypointsMixin.clean_headings(KeypointsMixin.read_feather(dlc_fp))
        # Getting likehoods of subject (given bpts) existing in each frame
        df_lhoods = calc_likelihoods(dlc_df, bpts, window_frames)
        # Determining start time. Start frame is the first frame of the rolling window's range
        df_lhoods["exists"] = df_lhoods["rolling"] > pcutoff
        # Getting when subject first and last exists in video
        start_frame = 0
        if np.all(df_lhoods["exists"] == 0):
            # If subject never exists (i.e. no True values in exist column), then raise warning
            outcome += (
                "WARNING: The subject was not detected in any frames - using the first frame."
                + "Please check the video.\n"
            )
        else:
            start_frame = df_lhoods[df_lhoods["exists"]].index[0]
        # Writing to configs
        configs = ExperimentConfigs.read_json(configs_fp)
        configs.auto.start_frame = start_frame
        # configs.auto.start_sec = start_frame / fps
        configs.write_json(configs_fp)
        return outcome

    @staticmethod
    def stop_frame(dlc_fp: str, configs_fp: str) -> str:
        """
        Calculates the end time according to the following equation:

        ```
        stop_frame = start_frame + experiment_duration
        ```

        Notes
        -----
        The config file must contain the following parameters:
        ```
        TODO
        ```
        """
        outcome = ""
        # Getting necessary config parameters
        configs = ExperimentConfigs.read_json(configs_fp)
        configs_filt = Model_stop_frame(**configs.user.calculate_params.stop_frame)
        dur_sec = configs.get_ref(configs_filt.dur_sec)
        start_frame = configs.auto.start_frame
        fps = configs.auto.formatted_vid.fps
        auto_stop_frame = configs.auto.formatted_vid.total_frames
        # Asserting that the necessary auto configs are valid
        assert (
            start_frame is not None
        ), "start_frame is None. Please calculate start_frame first."
        assert fps is not None, "fps is None. Please calculate fps first."
        assert (
            auto_stop_frame is not None
        ), "total_frames is None. Please calculate total_frames first."
        # Calculating stop_frame
        dur_frames = int(dur_sec * fps)
        stop_frame = start_frame + dur_frames
        # Make a warning if the use-specified dur_sec is larger than the duration of the video.
        if auto_stop_frame is None:
            outcome += (
                "WARNING: The length of the video itself has not been calculated yet."
            )
        elif stop_frame > auto_stop_frame:
            outcome += (
                "WARNING: The user specified dur_sec in the configs file is greater "
                + "than the actual length of the video. Please check to see if this video is "
                + "too short or if the dur_sec value is incorrect.\n"
            )
        # Writing to config
        configs = ExperimentConfigs.read_json(configs_fp)
        configs.auto.stop_frame = stop_frame
        # configs.auto.stop_sec = stop_frame / fps
        configs.write_json(configs_fp)
        return outcome

    @staticmethod
    def exp_dur(dlc_fp: str, configs_fp: str) -> str:
        """
        Calculates the duration in seconds, from the time the specified bodyparts appeared
        to the time they disappeared.
        Appear/disappear is calculated from likelihood.
        """
        outcome = ""
        # Getting necessary config parameters
        configs = ExperimentConfigs.read_json(configs_fp)
        configs_filt = Model_check_exists(**configs.user.calculate_params.exp_dur)
        bpts = configs.get_ref(configs_filt.bodyparts)
        window_sec = configs.get_ref(configs_filt.window_sec)
        pcutoff = configs.get_ref(configs_filt.pcutoff)
        fps = configs.auto.formatted_vid.fps
        # Asserting that the necessary auto configs are valid
        assert fps is not None, "fps is None. Please calculate fps first."
        # Deriving more parameters
        window_frames = int(np.round(fps * window_sec, 0))
        # Loading dataframe
        dlc_df = KeypointsMixin.clean_headings(KeypointsMixin.read_feather(dlc_fp))
        # Getting likehoods of subject (given bpts) existing in each frame
        df_lhoods = calc_likelihoods(dlc_df, bpts, window_frames)
        # Determining start time. Start frame is the first frame of the rolling window's range
        df_lhoods["exists"] = df_lhoods["rolling"] > pcutoff
        # Getting when subject first and last exists in video
        exp_dur_frames = 0
        if np.all(df_lhoods["exists"] == 0):
            # If subject never exists (i.e. no True values in exist column), then raise warning
            outcome += (
                "WARNING: The subject was not detected in any frames - using the first frame."
                + "Please check the video.\n"
            )
        else:
            start_frame = df_lhoods[df_lhoods["exists"]].index[0]
            stop_frame = df_lhoods[df_lhoods["exists"]].index[-1]
            exp_dur_frames = stop_frame - start_frame
        # Writing to configs
        configs = ExperimentConfigs.read_json(configs_fp)
        configs.auto.exp_dur_frames = exp_dur_frames
        # configs.auto.exp_dur_secs = exp_dur_frames / fps
        configs.write_json(configs_fp)
        return outcome

    @staticmethod
    def px_per_mm(dlc_fp: str, configs_fp: str) -> str:
        """
        Calculates the pixels per mm conversion for the video.

        This is done by averaging the (x, y) coordinates of each corner,
        finding the average x difference for the widths in pixels and y distance
        for the heights in pixels,
        dividing these pixel distances by their respective mm distances
        (from the *config.json file),
        and taking the average of these width and height conversions to estimate
        the px to mm
        conversion.

        Notes
        -----
        The config file must contain the following parameters:
        ```
        - user
            - calculate_params
                - px_per_mm
                    - point_a: str
                    - point_b: str
                    - dist_mm: float
        ```
        """
        outcome = ""
        # Getting necessary config parameters
        configs = ExperimentConfigs.read_json(configs_fp)
        configs_filt = Model_px_per_mm(**configs.user.calculate_params.px_per_mm)
        pt_a = configs.get_ref(configs_filt.pt_a)
        pt_b = configs.get_ref(configs_filt.pt_b)
        pcutoff = configs.get_ref(configs_filt.pcutoff)
        dist_mm = configs.get_ref(configs_filt.dist_mm)
        # Loading dataframe
        dlc_df = KeypointsMixin.clean_headings(KeypointsMixin.read_feather(dlc_fp))
        # Imputing missing values with 0 (only really relevant for `likelihood` columns)
        dlc_df = dlc_df.fillna(0)
        # Checking that the two reference points are valid
        KeypointsMixin.check_bpts_exist(dlc_df, [pt_a, pt_b])
        # Getting calibration points (x, y, likelihood) values
        pt_a_df = dlc_df[IndivColumns.SINGLE.value, pt_a]
        pt_b_df = dlc_df[IndivColumns.SINGLE.value, pt_b]
        # Interpolating points which are below a likelihood threshold (linear)
        pt_a_df.loc[pt_a_df[Coords.LIKELIHOOD.value] < pcutoff] = np.nan
        pt_a_df = pt_a_df.interpolate(method="linear", axis=0).bfill()
        pt_b_df.loc[pt_b_df[Coords.LIKELIHOOD.value] < pcutoff] = np.nan
        pt_b_df = pt_b_df.interpolate(method="linear", axis=0).bfill()
        # Getting distance between calibration points
        dist_px = np.nanmean(
            np.sqrt(
                np.square(pt_a_df["x"] - pt_b_df["x"])
                + np.square(pt_a_df["y"] - pt_b_df["y"])
            )
        )
        # Finding pixels per mm conversion, using the given arena width and height as calibration
        px_per_mm = dist_px / dist_mm
        # Saving to configs file
        configs = ExperimentConfigs.read_json(configs_fp)
        configs.auto.px_per_mm = px_per_mm
        configs.write_json(configs_fp)
        return outcome

`exp_dur(dlc_fp, configs_fp)` `staticmethod` ¶

Calculates the duration in seconds, from the time the specified bodyparts appeared to the time they disappeared. Appear/disappear is calculated from likelihood.

Source code in behavysis_pipeline/processes/calculate_params.py

@staticmethod
def exp_dur(dlc_fp: str, configs_fp: str) -> str:
    """
    Calculates the duration in seconds, from the time the specified bodyparts appeared
    to the time they disappeared.
    Appear/disappear is calculated from likelihood.
    """
    outcome = ""
    # Getting necessary config parameters
    configs = ExperimentConfigs.read_json(configs_fp)
    configs_filt = Model_check_exists(**configs.user.calculate_params.exp_dur)
    bpts = configs.get_ref(configs_filt.bodyparts)
    window_sec = configs.get_ref(configs_filt.window_sec)
    pcutoff = configs.get_ref(configs_filt.pcutoff)
    fps = configs.auto.formatted_vid.fps
    # Asserting that the necessary auto configs are valid
    assert fps is not None, "fps is None. Please calculate fps first."
    # Deriving more parameters
    window_frames = int(np.round(fps * window_sec, 0))
    # Loading dataframe
    dlc_df = KeypointsMixin.clean_headings(KeypointsMixin.read_feather(dlc_fp))
    # Getting likehoods of subject (given bpts) existing in each frame
    df_lhoods = calc_likelihoods(dlc_df, bpts, window_frames)
    # Determining start time. Start frame is the first frame of the rolling window's range
    df_lhoods["exists"] = df_lhoods["rolling"] > pcutoff
    # Getting when subject first and last exists in video
    exp_dur_frames = 0
    if np.all(df_lhoods["exists"] == 0):
        # If subject never exists (i.e. no True values in exist column), then raise warning
        outcome += (
            "WARNING: The subject was not detected in any frames - using the first frame."
            + "Please check the video.\n"
        )
    else:
        start_frame = df_lhoods[df_lhoods["exists"]].index[0]
        stop_frame = df_lhoods[df_lhoods["exists"]].index[-1]
        exp_dur_frames = stop_frame - start_frame
    # Writing to configs
    configs = ExperimentConfigs.read_json(configs_fp)
    configs.auto.exp_dur_frames = exp_dur_frames
    # configs.auto.exp_dur_secs = exp_dur_frames / fps
    configs.write_json(configs_fp)
    return outcome

`px_per_mm(dlc_fp, configs_fp)` `staticmethod` ¶

Calculates the pixels per mm conversion for the video.

This is done by averaging the (x, y) coordinates of each corner, finding the average x difference for the widths in pixels and y distance for the heights in pixels, dividing these pixel distances by their respective mm distances (from the *config.json file), and taking the average of these width and height conversions to estimate the px to mm conversion.

Notes

The config file must contain the following parameters:

- user
    - calculate_params
        - px_per_mm
            - point_a: str
            - point_b: str
            - dist_mm: float

Source code in behavysis_pipeline/processes/calculate_params.py

@staticmethod
def px_per_mm(dlc_fp: str, configs_fp: str) -> str:
    """
    Calculates the pixels per mm conversion for the video.

    This is done by averaging the (x, y) coordinates of each corner,
    finding the average x difference for the widths in pixels and y distance
    for the heights in pixels,
    dividing these pixel distances by their respective mm distances
    (from the *config.json file),
    and taking the average of these width and height conversions to estimate
    the px to mm
    conversion.

    Notes
    -----
    The config file must contain the following parameters:
    ```
    - user
        - calculate_params
            - px_per_mm
                - point_a: str
                - point_b: str
                - dist_mm: float
    ```
    """
    outcome = ""
    # Getting necessary config parameters
    configs = ExperimentConfigs.read_json(configs_fp)
    configs_filt = Model_px_per_mm(**configs.user.calculate_params.px_per_mm)
    pt_a = configs.get_ref(configs_filt.pt_a)
    pt_b = configs.get_ref(configs_filt.pt_b)
    pcutoff = configs.get_ref(configs_filt.pcutoff)
    dist_mm = configs.get_ref(configs_filt.dist_mm)
    # Loading dataframe
    dlc_df = KeypointsMixin.clean_headings(KeypointsMixin.read_feather(dlc_fp))
    # Imputing missing values with 0 (only really relevant for `likelihood` columns)
    dlc_df = dlc_df.fillna(0)
    # Checking that the two reference points are valid
    KeypointsMixin.check_bpts_exist(dlc_df, [pt_a, pt_b])
    # Getting calibration points (x, y, likelihood) values
    pt_a_df = dlc_df[IndivColumns.SINGLE.value, pt_a]
    pt_b_df = dlc_df[IndivColumns.SINGLE.value, pt_b]
    # Interpolating points which are below a likelihood threshold (linear)
    pt_a_df.loc[pt_a_df[Coords.LIKELIHOOD.value] < pcutoff] = np.nan
    pt_a_df = pt_a_df.interpolate(method="linear", axis=0).bfill()
    pt_b_df.loc[pt_b_df[Coords.LIKELIHOOD.value] < pcutoff] = np.nan
    pt_b_df = pt_b_df.interpolate(method="linear", axis=0).bfill()
    # Getting distance between calibration points
    dist_px = np.nanmean(
        np.sqrt(
            np.square(pt_a_df["x"] - pt_b_df["x"])
            + np.square(pt_a_df["y"] - pt_b_df["y"])
        )
    )
    # Finding pixels per mm conversion, using the given arena width and height as calibration
    px_per_mm = dist_px / dist_mm
    # Saving to configs file
    configs = ExperimentConfigs.read_json(configs_fp)
    configs.auto.px_per_mm = px_per_mm
    configs.write_json(configs_fp)
    return outcome

`start_frame(dlc_fp, configs_fp)` `staticmethod` ¶

Determine the starting frame of the experiment based on when the subject "likely" entered the footage.

This is done by looking at a sliding window of time. If the median likelihood of the subject existing in each frame across the sliding window is greater than the defined pcutoff, then the determine this as the start time.

Notes

The config file must contain the following parameters:

- user
    - calculate_params
        - start_frame
            - window_sec: float
            - pcutoff: float

Source code in behavysis_pipeline/processes/calculate_params.py

@staticmethod
def start_frame(
    dlc_fp: str,
    configs_fp: str,
) -> str:
    """
    Determine the starting frame of the experiment based on when the subject "likely" entered
    the footage.

    This is done by looking at a sliding window of time. If the median likelihood of the subject
    existing in each frame across the sliding window is greater than the defined pcutoff, then
    the determine this as the start time.

    Notes
    -----
    The config file must contain the following parameters:
    ```
    - user
        - calculate_params
            - start_frame
                - window_sec: float
                - pcutoff: float
    ```
    """
    outcome = ""
    # Getting necessary config parameters
    configs = ExperimentConfigs.read_json(configs_fp)
    configs_filt = Model_check_exists(**configs.user.calculate_params.start_frame)
    bpts = configs.get_ref(configs_filt.bodyparts)
    window_sec = configs.get_ref(configs_filt.window_sec)
    pcutoff = configs.get_ref(configs_filt.pcutoff)
    fps = configs.auto.formatted_vid.fps
    # Asserting that the necessary auto configs are valid
    assert fps is not None, "fps is None. Please calculate fps first."
    # Deriving more parameters
    window_frames = int(np.round(fps * window_sec, 0))
    # Loading dataframe
    dlc_df = KeypointsMixin.clean_headings(KeypointsMixin.read_feather(dlc_fp))
    # Getting likehoods of subject (given bpts) existing in each frame
    df_lhoods = calc_likelihoods(dlc_df, bpts, window_frames)
    # Determining start time. Start frame is the first frame of the rolling window's range
    df_lhoods["exists"] = df_lhoods["rolling"] > pcutoff
    # Getting when subject first and last exists in video
    start_frame = 0
    if np.all(df_lhoods["exists"] == 0):
        # If subject never exists (i.e. no True values in exist column), then raise warning
        outcome += (
            "WARNING: The subject was not detected in any frames - using the first frame."
            + "Please check the video.\n"
        )
    else:
        start_frame = df_lhoods[df_lhoods["exists"]].index[0]
    # Writing to configs
    configs = ExperimentConfigs.read_json(configs_fp)
    configs.auto.start_frame = start_frame
    # configs.auto.start_sec = start_frame / fps
    configs.write_json(configs_fp)
    return outcome

`stop_frame(dlc_fp, configs_fp)` `staticmethod` ¶

Calculates the end time according to the following equation:

stop_frame = start_frame + experiment_duration

Notes

The config file must contain the following parameters:

TODO

Source code in behavysis_pipeline/processes/calculate_params.py

@staticmethod
def stop_frame(dlc_fp: str, configs_fp: str) -> str:
    """
    Calculates the end time according to the following equation:

    ```
    stop_frame = start_frame + experiment_duration
    ```

    Notes
    -----
    The config file must contain the following parameters:
    ```
    TODO
    ```
    """
    outcome = ""
    # Getting necessary config parameters
    configs = ExperimentConfigs.read_json(configs_fp)
    configs_filt = Model_stop_frame(**configs.user.calculate_params.stop_frame)
    dur_sec = configs.get_ref(configs_filt.dur_sec)
    start_frame = configs.auto.start_frame
    fps = configs.auto.formatted_vid.fps
    auto_stop_frame = configs.auto.formatted_vid.total_frames
    # Asserting that the necessary auto configs are valid
    assert (
        start_frame is not None
    ), "start_frame is None. Please calculate start_frame first."
    assert fps is not None, "fps is None. Please calculate fps first."
    assert (
        auto_stop_frame is not None
    ), "total_frames is None. Please calculate total_frames first."
    # Calculating stop_frame
    dur_frames = int(dur_sec * fps)
    stop_frame = start_frame + dur_frames
    # Make a warning if the use-specified dur_sec is larger than the duration of the video.
    if auto_stop_frame is None:
        outcome += (
            "WARNING: The length of the video itself has not been calculated yet."
        )
    elif stop_frame > auto_stop_frame:
        outcome += (
            "WARNING: The user specified dur_sec in the configs file is greater "
            + "than the actual length of the video. Please check to see if this video is "
            + "too short or if the dur_sec value is incorrect.\n"
        )
    # Writing to config
    configs = ExperimentConfigs.read_json(configs_fp)
    configs.auto.stop_frame = stop_frame
    # configs.auto.stop_sec = stop_frame / fps
    configs.write_json(configs_fp)
    return outcome

`behavysis_pipeline.processes.ClassifyBehaviours` ¶

summary

Source code in behavysis_pipeline/processes/classify_behaviours.py

class ClassifyBehaviours:
    """__summary__"""

    @staticmethod
    @IOMixin.overwrite_check()
    def classify_behaviours(
        features_fp: str,
        out_fp: str,
        configs_fp: str,
        overwrite: bool,
    ) -> str:
        """
        Given model config files in the BehavClassifier format, generates beahviour predidctions
        on the given extracted features dataframe.

        Parameters
        ----------
        features_fp : str
            _description_
        out_fp : str
            _description_
        configs_fp : str
            _description_
        overwrite : bool
            Whether to overwrite the output file (if it exists).

        Returns
        -------
        str
            Description of the function's outcome.

        Notes
        -----
        The config file must contain the following parameters:
        ```
        - user
            - classify_behaviours
                - models: list[str]
        ```
        Where the `models` list is a list of `model_config.json` filepaths.
        """
        outcome = ""
        # Getting necessary config parameters
        configs = ExperimentConfigs.read_json(configs_fp)
        models_ls = configs.user.classify_behaviours
        # Getting features data
        features_df = DFIOMixin.read_feather(features_fp)
        # Initialising y_preds df
        # Getting predictions for each classifier model and saving
        # in a list of pd.DataFrames
        behav_preds_ls = np.zeros(len(models_ls), dtype="object")
        for i, model_config in enumerate(models_ls):
            # Getting model (clf, pcutoff, min_window_frames)
            model_fp = configs.get_ref(model_config.model_fp)
            model = BehavClassifier.load(model_fp)
            pcutoff = configs.get_ref(model_config.pcutoff)
            pcutoff = model.configs.pcutoff if pcutoff is None else pcutoff
            min_window_frames = configs.get_ref(model_config.min_window_frames)
            # Running the clf pipeline
            df_i = model.pipeline_run(features_df)
            # Getting prob and pred column names
            prob_col = (model.configs.behaviour_name, BehavColumns.PROB.value)
            pred_col = (model.configs.behaviour_name, BehavColumns.PRED.value)
            # Using pcutoff to get binary predictions
            df_i[pred_col] = (df_i[prob_col] > pcutoff).astype(int)
            # Filling in small non-behav bouts
            df_i[pred_col] = merge_bouts(df_i[pred_col], min_window_frames)
            # Adding model predictions df to list
            behav_preds_ls[i] = df_i
            # Logging outcome
            outcome += f"Completed {model.configs.behaviour_name} classification.\n"
        # Concatenating predictions to a single dataframe
        behavs_df = pd.concat(behav_preds_ls, axis=1)
        # Setting the index and column names
        behavs_df.index.names = DFIOMixin.enum_to_list(BehavIN)
        behavs_df.columns.names = DFIOMixin.enum_to_list(BehavCN)
        # Checking df
        BehavMixin.check_df(behavs_df)
        # Saving behav_preds df
        DFIOMixin.write_feather(behavs_df, out_fp)
        # Returning outcome
        return outcome

`classify_behaviours(features_fp, out_fp, configs_fp, overwrite)` `staticmethod` ¶

Given model config files in the BehavClassifier format, generates beahviour predidctions on the given extracted features dataframe.

Parameters:

Name	Type	Description	Default
`features_fp`	`str`	description	required
`out_fp`	`str`	description	required
`configs_fp`	`str`	description	required
`overwrite`	`bool`	Whether to overwrite the output file (if it exists).	required

Returns:

Type	Description
`str`	Description of the function's outcome.

Notes

The config file must contain the following parameters:

- user
    - classify_behaviours
        - models: list[str]

Where the models list is a list of model_config.json filepaths.

Source code in behavysis_pipeline/processes/classify_behaviours.py

@staticmethod
@IOMixin.overwrite_check()
def classify_behaviours(
    features_fp: str,
    out_fp: str,
    configs_fp: str,
    overwrite: bool,
) -> str:
    """
    Given model config files in the BehavClassifier format, generates beahviour predidctions
    on the given extracted features dataframe.

    Parameters
    ----------
    features_fp : str
        _description_
    out_fp : str
        _description_
    configs_fp : str
        _description_
    overwrite : bool
        Whether to overwrite the output file (if it exists).

    Returns
    -------
    str
        Description of the function's outcome.

    Notes
    -----
    The config file must contain the following parameters:
    ```
    - user
        - classify_behaviours
            - models: list[str]
    ```
    Where the `models` list is a list of `model_config.json` filepaths.
    """
    outcome = ""
    # Getting necessary config parameters
    configs = ExperimentConfigs.read_json(configs_fp)
    models_ls = configs.user.classify_behaviours
    # Getting features data
    features_df = DFIOMixin.read_feather(features_fp)
    # Initialising y_preds df
    # Getting predictions for each classifier model and saving
    # in a list of pd.DataFrames
    behav_preds_ls = np.zeros(len(models_ls), dtype="object")
    for i, model_config in enumerate(models_ls):
        # Getting model (clf, pcutoff, min_window_frames)
        model_fp = configs.get_ref(model_config.model_fp)
        model = BehavClassifier.load(model_fp)
        pcutoff = configs.get_ref(model_config.pcutoff)
        pcutoff = model.configs.pcutoff if pcutoff is None else pcutoff
        min_window_frames = configs.get_ref(model_config.min_window_frames)
        # Running the clf pipeline
        df_i = model.pipeline_run(features_df)
        # Getting prob and pred column names
        prob_col = (model.configs.behaviour_name, BehavColumns.PROB.value)
        pred_col = (model.configs.behaviour_name, BehavColumns.PRED.value)
        # Using pcutoff to get binary predictions
        df_i[pred_col] = (df_i[prob_col] > pcutoff).astype(int)
        # Filling in small non-behav bouts
        df_i[pred_col] = merge_bouts(df_i[pred_col], min_window_frames)
        # Adding model predictions df to list
        behav_preds_ls[i] = df_i
        # Logging outcome
        outcome += f"Completed {model.configs.behaviour_name} classification.\n"
    # Concatenating predictions to a single dataframe
    behavs_df = pd.concat(behav_preds_ls, axis=1)
    # Setting the index and column names
    behavs_df.index.names = DFIOMixin.enum_to_list(BehavIN)
    behavs_df.columns.names = DFIOMixin.enum_to_list(BehavCN)
    # Checking df
    BehavMixin.check_df(behavs_df)
    # Saving behav_preds df
    DFIOMixin.write_feather(behavs_df, out_fp)
    # Returning outcome
    return outcome

`behavysis_pipeline.processes.Evaluate` ¶

summary

Source code in behavysis_pipeline/processes/evaluate.py

class Evaluate:
    """__summary__"""

    ###############################################################################################
    #               MAKE KEYPOINTS PLOTS
    ###############################################################################################

    @staticmethod
    def keypoints_plot(
        vid_fp: str,
        dlc_fp: str,
        behavs_fp: str,
        out_dir: str,
        configs_fp: str,
        overwrite: bool,
    ) -> str:
        """
        Make keypoints evaluation plot of likelihood of each bodypart through time.
        """
        outcome = ""
        name = IOMixin.get_name(dlc_fp)
        out_dir = os.path.join(out_dir, Evaluate.keypoints_plot.__name__)
        out_fp = os.path.join(out_dir, f"{name}.png")
        os.makedirs(out_dir, exist_ok=True)
        # If overwrite is False, checking if we should skip processing
        if not overwrite and os.path.exists(out_fp):
            return DiagnosticsMixin.warning_msg()

        # Getting necessary config parameters
        configs = ExperimentConfigs.read_json(configs_fp)
        configs_filt = configs.user.evaluate.keypoints_plot
        bpts = configs.get_ref(configs_filt.bodyparts)
        fps = configs.auto.formatted_vid.fps

        # Read the file
        df = KeypointsMixin.clean_headings(KeypointsMixin.read_feather(dlc_fp))
        # Checking the bodyparts specified in the configs exist in the dataframe
        KeypointsMixin.check_bpts_exist(df, bpts)
        # Making data-long ways
        idx = pd.IndexSlice
        df = (
            df.loc[:, idx[:, bpts]]
            .stack([KeypointsCN.INDIVIDUALS.value, KeypointsCN.BODYPARTS.value])
            .reset_index()
        )
        # Adding the timestamp column
        df["timestamp"] = df[BehavIN.FRAME.value] / fps
        # Making plot
        g = sns.FacetGrid(
            df,
            row=KeypointsCN.INDIVIDUALS.value,
            height=5,
            aspect=10,
        )
        g.map_dataframe(
            sns.lineplot,
            x="timestamp",
            y=Coords.LIKELIHOOD.value,
            hue=KeypointsCN.BODYPARTS.value,
            alpha=0.4,
        )
        g.add_legend()
        # Saving plot
        g.savefig(out_fp)
        g.figure.clf()
        # Returning outcome string
        return outcome

    ###############################################################################################
    # MAKE BEHAVIOUR PLOTS
    ###############################################################################################

    @staticmethod
    def behav_plot(
        vid_fp: str,
        dlc_fp: str,
        behavs_fp: str,
        out_dir: str,
        configs_fp: str,
        overwrite: bool,
    ) -> str:
        """
        Make behaviour evaluation plot of the predicted and actual behaviours through time.
        """
        outcome = ""
        name = IOMixin.get_name(behavs_fp)
        out_dir = os.path.join(out_dir, Evaluate.behav_plot.__name__)
        out_fp = os.path.join(out_dir, f"{name}.png")
        os.makedirs(out_dir, exist_ok=True)
        # If overwrite is False, checking if we should skip processing
        if not overwrite and os.path.exists(out_fp):
            return DiagnosticsMixin.warning_msg()

        # Getting necessary config parameters
        configs = ExperimentConfigs.read_json(configs_fp)
        # configs_filt = configs.user.evaluate.behav_plot
        fps = configs.auto.formatted_vid.fps

        # Read the file
        df = BehavMixin.read_feather(behavs_fp)
        # Making data-long ways
        df = (
            df.stack([BehavCN.BEHAVIOURS.value, BehavCN.OUTCOMES.value])
            .reset_index()
            .rename(columns={0: "value"})
        )
        # Adding the timestamp column
        df["timestamp"] = df[BehavIN.FRAME.value] / fps
        # Making plot
        g = sns.FacetGrid(
            df,
            row=BehavCN.BEHAVIOURS.value,
            height=5,
            aspect=10,
        )
        g.map_dataframe(
            sns.lineplot,
            x="timestamp",
            y="value",
            hue=BehavCN.OUTCOMES.value,
            alpha=0.4,
        )
        g.add_legend()
        # Saving plot
        g.savefig(out_fp)
        g.figure.clf()
        # Returning outcome string
        return outcome

    ###############################################################################################
    #               MAKE KEYPOINTS VIDEO
    ###############################################################################################

    @staticmethod
    def eval_vid(
        vid_fp: str,
        dlc_fp: str,
        behavs_fp: str,
        out_dir: str,
        configs_fp: str,
        overwrite: bool,
    ) -> str:
        """
        Run the DLC model on the formatted video to generate a DLC annotated video and DLC file for
        all experiments. The DLC model's config.yaml filepath must be specified in the `config_path`
        parameter in the `user` section of the config file.
        """

        outcome = ""
        name = IOMixin.get_name(vid_fp)
        out_dir = os.path.join(out_dir, Evaluate.eval_vid.__name__)
        out_fp = os.path.join(out_dir, f"{name}.mp4")
        os.makedirs(out_dir, exist_ok=True)
        # If overwrite is False, checking if we should skip processing
        if not overwrite and os.path.exists(out_fp):
            return DiagnosticsMixin.warning_msg()

        # Getting necessary config parameters
        configs = ExperimentConfigs.read_json(configs_fp)
        configs_filt = configs.user.evaluate.eval_vid
        funcs_names = configs.get_ref(configs_filt.funcs)
        pcutoff = configs.get_ref(configs_filt.pcutoff)
        colour_level = configs.get_ref(configs_filt.colour_level)
        radius = configs.get_ref(configs_filt.radius)
        cmap = configs.get_ref(configs_filt.cmap)

        # Modifying dlc_df and making list of how to select dlc_df components to optimise processing
        dlc_df = KeypointsMixin.clean_headings(KeypointsMixin.read_feather(dlc_fp))
        # Filtering out IndivColumns.PROCESS.value columns
        if IndivColumns.PROCESS.value in dlc_df.columns.unique("individuals"):
            dlc_df.drop(columns=IndivColumns.PROCESS.value, level="individuals")
        # Getting (indivs, bpts) MultiIndex
        indivs_bpts_ls = dlc_df.columns.droplevel("coords").unique()
        # Rounding and converting to correct dtypes - "x" and "y" values are ints
        dlc_df = dlc_df.fillna(0)
        columns = dlc_df.columns[
            dlc_df.columns.get_level_values("coords").isin(["x", "y"])
        ]
        dlc_df[columns] = dlc_df[columns].round(0).astype(int)
        # Changing the columns MultiIndex to a single-level index. For speedup
        dlc_df.columns = [
            f"{indiv}_{bpt}_{coord}" for indiv, bpt, coord in dlc_df.columns
        ]
        # Making the corresponding colours list for each bodypart instance
        # (colours depend on indiv/bpt)
        colours_i, _ = pd.factorize(indivs_bpts_ls.get_level_values(colour_level))
        colours = (plt.get_cmap(cmap)(colours_i / colours_i.max()) * 255)[
            :, [2, 1, 0, 3]
        ]

        # Getting behavs df
        try:
            behavs_df = BehavMixin.read_feather(behavs_fp)
        except FileNotFoundError:
            outcome += (
                "WARNING: behavs file not found or could not be loaded."
                + "Disregarding behaviour."
                + "If you have run the behaviour classifier, please check this file.\n"
            )
            behavs_df = BehavMixin.init_df(dlc_df.index)
        # Getting list of behaviours
        behavs_ls = behavs_df.columns.unique("behaviours")
        # Making sure all relevant behaviour outcome columns exist
        for behav in behavs_ls:
            for i in BehavColumns:
                i = i.value
                if (behav, i) not in behavs_df:
                    behavs_df[(behav, i)] = 0
        # Changing the columns MultiIndex to a single-level index. For speedup
        behavs_df.columns = [
            f"{behav}_{outcome}" for behav, outcome in behavs_df.columns
        ]

        # MAKING ANNOTATED VIDEO
        # Settings the funcs for how to annotate the video
        funcs: list[Callable[[np.ndarray, int], np.ndarray]] = list()
        for f_name in funcs_names:
            if f_name == "johansson":
                outcome += f"Added {f_name} to video. \n"
                funcs.append(lambda frame, i: annot_johansson(frame))
            elif f_name == "keypoints":
                outcome += f"Added {f_name} to video. \n"
                funcs.append(
                    lambda frame, i: annot_keypoints(
                        frame, dlc_df.loc[i], indivs_bpts_ls, colours, pcutoff, radius
                    )
                )
            elif f_name == "behavs":
                outcome += f"Added {f_name} to video. \n"
                funcs.append(
                    lambda frame, i: annot_behav(frame, behavs_df.loc[i], behavs_ls)
                )
            else:
                continue
        # Open the input video
        in_cap = cv2.VideoCapture(vid_fp)
        fps = in_cap.get(cv2.CAP_PROP_FPS)
        width = int(in_cap.get(cv2.CAP_PROP_FRAME_WIDTH))
        height = int(in_cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
        total_frames = int(in_cap.get(cv2.CAP_PROP_FRAME_COUNT))
        # Define the codec and create VideoWriter object
        out_cap = cv2.VideoWriter(
            out_fp, cv2.VideoWriter_fourcc(*"mp4v"), fps, (width, height)
        )
        # Annotating each frame using the created functions
        outcome += annotate(in_cap, out_cap, funcs, total_frames)
        # Release video objects
        in_cap.release()
        out_cap.release()
        # Returning outcome string
        return outcome

`behav_plot(vid_fp, dlc_fp, behavs_fp, out_dir, configs_fp, overwrite)` `staticmethod` ¶

Make behaviour evaluation plot of the predicted and actual behaviours through time.

Source code in behavysis_pipeline/processes/evaluate.py

@staticmethod
def behav_plot(
    vid_fp: str,
    dlc_fp: str,
    behavs_fp: str,
    out_dir: str,
    configs_fp: str,
    overwrite: bool,
) -> str:
    """
    Make behaviour evaluation plot of the predicted and actual behaviours through time.
    """
    outcome = ""
    name = IOMixin.get_name(behavs_fp)
    out_dir = os.path.join(out_dir, Evaluate.behav_plot.__name__)
    out_fp = os.path.join(out_dir, f"{name}.png")
    os.makedirs(out_dir, exist_ok=True)
    # If overwrite is False, checking if we should skip processing
    if not overwrite and os.path.exists(out_fp):
        return DiagnosticsMixin.warning_msg()

    # Getting necessary config parameters
    configs = ExperimentConfigs.read_json(configs_fp)
    # configs_filt = configs.user.evaluate.behav_plot
    fps = configs.auto.formatted_vid.fps

    # Read the file
    df = BehavMixin.read_feather(behavs_fp)
    # Making data-long ways
    df = (
        df.stack([BehavCN.BEHAVIOURS.value, BehavCN.OUTCOMES.value])
        .reset_index()
        .rename(columns={0: "value"})
    )
    # Adding the timestamp column
    df["timestamp"] = df[BehavIN.FRAME.value] / fps
    # Making plot
    g = sns.FacetGrid(
        df,
        row=BehavCN.BEHAVIOURS.value,
        height=5,
        aspect=10,
    )
    g.map_dataframe(
        sns.lineplot,
        x="timestamp",
        y="value",
        hue=BehavCN.OUTCOMES.value,
        alpha=0.4,
    )
    g.add_legend()
    # Saving plot
    g.savefig(out_fp)
    g.figure.clf()
    # Returning outcome string
    return outcome

`eval_vid(vid_fp, dlc_fp, behavs_fp, out_dir, configs_fp, overwrite)` `staticmethod` ¶

Run the DLC model on the formatted video to generate a DLC annotated video and DLC file for all experiments. The DLC model's config.yaml filepath must be specified in the config_path parameter in the user section of the config file.

Source code in behavysis_pipeline/processes/evaluate.py

@staticmethod
def eval_vid(
    vid_fp: str,
    dlc_fp: str,
    behavs_fp: str,
    out_dir: str,
    configs_fp: str,
    overwrite: bool,
) -> str:
    """
    Run the DLC model on the formatted video to generate a DLC annotated video and DLC file for
    all experiments. The DLC model's config.yaml filepath must be specified in the `config_path`
    parameter in the `user` section of the config file.
    """

    outcome = ""
    name = IOMixin.get_name(vid_fp)
    out_dir = os.path.join(out_dir, Evaluate.eval_vid.__name__)
    out_fp = os.path.join(out_dir, f"{name}.mp4")
    os.makedirs(out_dir, exist_ok=True)
    # If overwrite is False, checking if we should skip processing
    if not overwrite and os.path.exists(out_fp):
        return DiagnosticsMixin.warning_msg()

    # Getting necessary config parameters
    configs = ExperimentConfigs.read_json(configs_fp)
    configs_filt = configs.user.evaluate.eval_vid
    funcs_names = configs.get_ref(configs_filt.funcs)
    pcutoff = configs.get_ref(configs_filt.pcutoff)
    colour_level = configs.get_ref(configs_filt.colour_level)
    radius = configs.get_ref(configs_filt.radius)
    cmap = configs.get_ref(configs_filt.cmap)

    # Modifying dlc_df and making list of how to select dlc_df components to optimise processing
    dlc_df = KeypointsMixin.clean_headings(KeypointsMixin.read_feather(dlc_fp))
    # Filtering out IndivColumns.PROCESS.value columns
    if IndivColumns.PROCESS.value in dlc_df.columns.unique("individuals"):
        dlc_df.drop(columns=IndivColumns.PROCESS.value, level="individuals")
    # Getting (indivs, bpts) MultiIndex
    indivs_bpts_ls = dlc_df.columns.droplevel("coords").unique()
    # Rounding and converting to correct dtypes - "x" and "y" values are ints
    dlc_df = dlc_df.fillna(0)
    columns = dlc_df.columns[
        dlc_df.columns.get_level_values("coords").isin(["x", "y"])
    ]
    dlc_df[columns] = dlc_df[columns].round(0).astype(int)
    # Changing the columns MultiIndex to a single-level index. For speedup
    dlc_df.columns = [
        f"{indiv}_{bpt}_{coord}" for indiv, bpt, coord in dlc_df.columns
    ]
    # Making the corresponding colours list for each bodypart instance
    # (colours depend on indiv/bpt)
    colours_i, _ = pd.factorize(indivs_bpts_ls.get_level_values(colour_level))
    colours = (plt.get_cmap(cmap)(colours_i / colours_i.max()) * 255)[
        :, [2, 1, 0, 3]
    ]

    # Getting behavs df
    try:
        behavs_df = BehavMixin.read_feather(behavs_fp)
    except FileNotFoundError:
        outcome += (
            "WARNING: behavs file not found or could not be loaded."
            + "Disregarding behaviour."
            + "If you have run the behaviour classifier, please check this file.\n"
        )
        behavs_df = BehavMixin.init_df(dlc_df.index)
    # Getting list of behaviours
    behavs_ls = behavs_df.columns.unique("behaviours")
    # Making sure all relevant behaviour outcome columns exist
    for behav in behavs_ls:
        for i in BehavColumns:
            i = i.value
            if (behav, i) not in behavs_df:
                behavs_df[(behav, i)] = 0
    # Changing the columns MultiIndex to a single-level index. For speedup
    behavs_df.columns = [
        f"{behav}_{outcome}" for behav, outcome in behavs_df.columns
    ]

    # MAKING ANNOTATED VIDEO
    # Settings the funcs for how to annotate the video
    funcs: list[Callable[[np.ndarray, int], np.ndarray]] = list()
    for f_name in funcs_names:
        if f_name == "johansson":
            outcome += f"Added {f_name} to video. \n"
            funcs.append(lambda frame, i: annot_johansson(frame))
        elif f_name == "keypoints":
            outcome += f"Added {f_name} to video. \n"
            funcs.append(
                lambda frame, i: annot_keypoints(
                    frame, dlc_df.loc[i], indivs_bpts_ls, colours, pcutoff, radius
                )
            )
        elif f_name == "behavs":
            outcome += f"Added {f_name} to video. \n"
            funcs.append(
                lambda frame, i: annot_behav(frame, behavs_df.loc[i], behavs_ls)
            )
        else:
            continue
    # Open the input video
    in_cap = cv2.VideoCapture(vid_fp)
    fps = in_cap.get(cv2.CAP_PROP_FPS)
    width = int(in_cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    height = int(in_cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    total_frames = int(in_cap.get(cv2.CAP_PROP_FRAME_COUNT))
    # Define the codec and create VideoWriter object
    out_cap = cv2.VideoWriter(
        out_fp, cv2.VideoWriter_fourcc(*"mp4v"), fps, (width, height)
    )
    # Annotating each frame using the created functions
    outcome += annotate(in_cap, out_cap, funcs, total_frames)
    # Release video objects
    in_cap.release()
    out_cap.release()
    # Returning outcome string
    return outcome

`keypoints_plot(vid_fp, dlc_fp, behavs_fp, out_dir, configs_fp, overwrite)` `staticmethod` ¶

Make keypoints evaluation plot of likelihood of each bodypart through time.

Source code in behavysis_pipeline/processes/evaluate.py

@staticmethod
def keypoints_plot(
    vid_fp: str,
    dlc_fp: str,
    behavs_fp: str,
    out_dir: str,
    configs_fp: str,
    overwrite: bool,
) -> str:
    """
    Make keypoints evaluation plot of likelihood of each bodypart through time.
    """
    outcome = ""
    name = IOMixin.get_name(dlc_fp)
    out_dir = os.path.join(out_dir, Evaluate.keypoints_plot.__name__)
    out_fp = os.path.join(out_dir, f"{name}.png")
    os.makedirs(out_dir, exist_ok=True)
    # If overwrite is False, checking if we should skip processing
    if not overwrite and os.path.exists(out_fp):
        return DiagnosticsMixin.warning_msg()

    # Getting necessary config parameters
    configs = ExperimentConfigs.read_json(configs_fp)
    configs_filt = configs.user.evaluate.keypoints_plot
    bpts = configs.get_ref(configs_filt.bodyparts)
    fps = configs.auto.formatted_vid.fps

    # Read the file
    df = KeypointsMixin.clean_headings(KeypointsMixin.read_feather(dlc_fp))
    # Checking the bodyparts specified in the configs exist in the dataframe
    KeypointsMixin.check_bpts_exist(df, bpts)
    # Making data-long ways
    idx = pd.IndexSlice
    df = (
        df.loc[:, idx[:, bpts]]
        .stack([KeypointsCN.INDIVIDUALS.value, KeypointsCN.BODYPARTS.value])
        .reset_index()
    )
    # Adding the timestamp column
    df["timestamp"] = df[BehavIN.FRAME.value] / fps
    # Making plot
    g = sns.FacetGrid(
        df,
        row=KeypointsCN.INDIVIDUALS.value,
        height=5,
        aspect=10,
    )
    g.map_dataframe(
        sns.lineplot,
        x="timestamp",
        y=Coords.LIKELIHOOD.value,
        hue=KeypointsCN.BODYPARTS.value,
        alpha=0.4,
    )
    g.add_legend()
    # Saving plot
    g.savefig(out_fp)
    g.figure.clf()
    # Returning outcome string
    return outcome

`behavysis_pipeline.processes.ExtractFeatures` ¶

summary

Source code in behavysis_pipeline/processes/extract_features.py

class ExtractFeatures:
    """__summary__"""

    @staticmethod
    @IOMixin.overwrite_check()
    def extract_features(
        dlc_fp: str,
        out_fp: str,
        configs_fp: str,
        temp_dir: str,
        overwrite: bool,
    ) -> str:
        """
        Extracting features from preprocessed DLC dataframe using SimBA
        processes.

        Parameters
        ----------
        dlc_fp : str
            Preprocessed DLC filepath.
        out_fp : str
            Filepath to save extracted_features dataframe.
        configs_fp : str
            Configs JSON filepath.
        temp_dir : str
            Temporary directory path. Used during intermediate SimBA processes.
        overwrite : bool
            Whether to overwrite the out_fp file (if it exists).

        Returns
        -------
        str
            The outcome of the process.
        """
        outcome = ""
        # Getting directory and file paths
        name = IOMixin.get_name(dlc_fp)
        cpid = MultiprocMixin.get_cpid()
        configs_dir = os.path.split(configs_fp)[0]
        simba_in_dir = os.path.join(temp_dir, f"input_{cpid}")
        simba_dir = os.path.join(temp_dir, f"simba_proj_{cpid}")
        features_from_dir = os.path.join(
            simba_dir, "project_folder", "csv", "features_extracted"
        )
        # Preparing dlc dfs for input to SimBA project
        os.makedirs(simba_in_dir, exist_ok=True)
        simba_in_fp = os.path.join(simba_in_dir, f"{name}.csv")
        # Selecting bodyparts for SimBA (8 bpts, 2 indivs)
        df = KeypointsMixin.read_feather(dlc_fp)
        df = select_cols(df, configs_fp)
        # Saving dlc frame to place in the SimBA features extraction df
        index = df.index
        # Need to remove index name for SimBA to import correctly
        df.index.name = None
        # Saving as csv
        df.to_csv(simba_in_fp)
        # Removing simba folder (if it exists)
        IOMixin.silent_rm(simba_dir)
        # Running SimBA env and script to run SimBA feature extraction
        outcome += run_simba_subproc(
            simba_dir, simba_in_dir, configs_dir, temp_dir, cpid
        )
        # Exporting SimBA feature extraction csv to feather
        simba_out_fp = os.path.join(features_from_dir, f"{name}.csv")
        export_2_feather(simba_out_fp, out_fp, index)
        # Removing temp folders (simba_in_dir, simba_dir)
        IOMixin.silent_rm(simba_in_dir)
        IOMixin.silent_rm(simba_dir)
        # Returning outcome
        return outcome

`extract_features(dlc_fp, out_fp, configs_fp, temp_dir, overwrite)` `staticmethod` ¶

Extracting features from preprocessed DLC dataframe using SimBA processes.

Parameters:

Name	Type	Description	Default
`dlc_fp`	`str`	Preprocessed DLC filepath.	required
`out_fp`	`str`	Filepath to save extracted_features dataframe.	required
`configs_fp`	`str`	Configs JSON filepath.	required
`temp_dir`	`str`	Temporary directory path. Used during intermediate SimBA processes.	required
`overwrite`	`bool`	Whether to overwrite the out_fp file (if it exists).	required

Returns:

Type	Description
`str`	The outcome of the process.

Source code in behavysis_pipeline/processes/extract_features.py

@staticmethod
@IOMixin.overwrite_check()
def extract_features(
    dlc_fp: str,
    out_fp: str,
    configs_fp: str,
    temp_dir: str,
    overwrite: bool,
) -> str:
    """
    Extracting features from preprocessed DLC dataframe using SimBA
    processes.

    Parameters
    ----------
    dlc_fp : str
        Preprocessed DLC filepath.
    out_fp : str
        Filepath to save extracted_features dataframe.
    configs_fp : str
        Configs JSON filepath.
    temp_dir : str
        Temporary directory path. Used during intermediate SimBA processes.
    overwrite : bool
        Whether to overwrite the out_fp file (if it exists).

    Returns
    -------
    str
        The outcome of the process.
    """
    outcome = ""
    # Getting directory and file paths
    name = IOMixin.get_name(dlc_fp)
    cpid = MultiprocMixin.get_cpid()
    configs_dir = os.path.split(configs_fp)[0]
    simba_in_dir = os.path.join(temp_dir, f"input_{cpid}")
    simba_dir = os.path.join(temp_dir, f"simba_proj_{cpid}")
    features_from_dir = os.path.join(
        simba_dir, "project_folder", "csv", "features_extracted"
    )
    # Preparing dlc dfs for input to SimBA project
    os.makedirs(simba_in_dir, exist_ok=True)
    simba_in_fp = os.path.join(simba_in_dir, f"{name}.csv")
    # Selecting bodyparts for SimBA (8 bpts, 2 indivs)
    df = KeypointsMixin.read_feather(dlc_fp)
    df = select_cols(df, configs_fp)
    # Saving dlc frame to place in the SimBA features extraction df
    index = df.index
    # Need to remove index name for SimBA to import correctly
    df.index.name = None
    # Saving as csv
    df.to_csv(simba_in_fp)
    # Removing simba folder (if it exists)
    IOMixin.silent_rm(simba_dir)
    # Running SimBA env and script to run SimBA feature extraction
    outcome += run_simba_subproc(
        simba_dir, simba_in_dir, configs_dir, temp_dir, cpid
    )
    # Exporting SimBA feature extraction csv to feather
    simba_out_fp = os.path.join(features_from_dir, f"{name}.csv")
    export_2_feather(simba_out_fp, out_fp, index)
    # Removing temp folders (simba_in_dir, simba_dir)
    IOMixin.silent_rm(simba_in_dir)
    IOMixin.silent_rm(simba_dir)
    # Returning outcome
    return outcome

`behavysis_pipeline.processes.FormatVid` ¶

Class for formatting videos based on given parameters.

Source code in behavysis_pipeline/processes/format_vid.py

class FormatVid:
    """
    Class for formatting videos based on given parameters.
    """

    @staticmethod
    @IOMixin.overwrite_check()
    def format_vid(in_fp: str, out_fp: str, configs_fp: str, overwrite: bool) -> str:
        """
        Formats the input video with the given parameters.

        Parameters
        ----------
        in_fp : str
            The input video filepath.
        out_fp : str
            The output video filepath.
        configs_fp : str
            The JSON configs filepath.
        overwrite : bool
            Whether to overwrite the output file (if it exists).

        Returns
        -------
        str
            Description of the function's outcome.
        """
        outcome = ""
        # Finding all necessary config parameters for video formatting
        configs = ExperimentConfigs.read_json(configs_fp)
        configs_filt = configs.user.format_vid

        # Processing the video
        outcome += ProcessVidMixin.process_vid(
            in_fp=in_fp,
            out_fp=out_fp,
            height_px=configs_filt.height_px,
            width_px=configs_filt.width_px,
            fps=configs_filt.fps,
            start_sec=configs_filt.start_sec,
            stop_sec=configs_filt.stop_sec,
        )

        # Saving video metadata to configs dict
        outcome += FormatVid.get_vid_metadata(in_fp, out_fp, configs_fp, overwrite)
        return outcome

    @staticmethod
    def get_vid_metadata(
        in_fp: str, out_fp: str, configs_fp: str, overwrite: bool
    ) -> str:
        """
        Finds the video metadata/parameters for either the raw or formatted video,
        and stores this data in the experiment's config file.

        Parameters
        ----------
        in_fp : str
            The input video filepath.
        out_fp : str
            The output video filepath.
        configs_fp : str
            The JSON configs filepath.
        overwrite : bool
            Whether to overwrite the output file (if it exists). IGNORED

        Returns
        -------
        str
            Description of the function's outcome.
        """
        outcome = ""

        # Saving video metadata to configs dict
        configs = ExperimentConfigs.read_json(configs_fp)
        for ftype, fp in (("raw_vid", in_fp), ("formatted_vid", out_fp)):
            try:
                setattr(configs.auto, ftype, ProcessVidMixin.get_vid_metadata(fp))
            except ValueError as e:
                outcome += f"WARNING: {str(e)}\n"
        outcome += "Video metadata stored in config file.\n"
        configs.write_json(configs_fp)
        return outcome

`format_vid(in_fp, out_fp, configs_fp, overwrite)` `staticmethod` ¶

Formats the input video with the given parameters.

Parameters:

Name	Type	Description	Default
`in_fp`	`str`	The input video filepath.	required
`out_fp`	`str`	The output video filepath.	required
`configs_fp`	`str`	The JSON configs filepath.	required
`overwrite`	`bool`	Whether to overwrite the output file (if it exists).	required

Returns:

Type	Description
`str`	Description of the function's outcome.

Source code in behavysis_pipeline/processes/format_vid.py

@staticmethod
@IOMixin.overwrite_check()
def format_vid(in_fp: str, out_fp: str, configs_fp: str, overwrite: bool) -> str:
    """
    Formats the input video with the given parameters.

    Parameters
    ----------
    in_fp : str
        The input video filepath.
    out_fp : str
        The output video filepath.
    configs_fp : str
        The JSON configs filepath.
    overwrite : bool
        Whether to overwrite the output file (if it exists).

    Returns
    -------
    str
        Description of the function's outcome.
    """
    outcome = ""
    # Finding all necessary config parameters for video formatting
    configs = ExperimentConfigs.read_json(configs_fp)
    configs_filt = configs.user.format_vid

    # Processing the video
    outcome += ProcessVidMixin.process_vid(
        in_fp=in_fp,
        out_fp=out_fp,
        height_px=configs_filt.height_px,
        width_px=configs_filt.width_px,
        fps=configs_filt.fps,
        start_sec=configs_filt.start_sec,
        stop_sec=configs_filt.stop_sec,
    )

    # Saving video metadata to configs dict
    outcome += FormatVid.get_vid_metadata(in_fp, out_fp, configs_fp, overwrite)
    return outcome

`get_vid_metadata(in_fp, out_fp, configs_fp, overwrite)` `staticmethod` ¶

Finds the video metadata/parameters for either the raw or formatted video, and stores this data in the experiment's config file.

Parameters:

Name	Type	Description	Default
`in_fp`	`str`	The input video filepath.	required
`out_fp`	`str`	The output video filepath.	required
`configs_fp`	`str`	The JSON configs filepath.	required
`overwrite`	`bool`	Whether to overwrite the output file (if it exists). IGNORED	required

Returns:

Type	Description
`str`	Description of the function's outcome.

Source code in behavysis_pipeline/processes/format_vid.py

@staticmethod
def get_vid_metadata(
    in_fp: str, out_fp: str, configs_fp: str, overwrite: bool
) -> str:
    """
    Finds the video metadata/parameters for either the raw or formatted video,
    and stores this data in the experiment's config file.

    Parameters
    ----------
    in_fp : str
        The input video filepath.
    out_fp : str
        The output video filepath.
    configs_fp : str
        The JSON configs filepath.
    overwrite : bool
        Whether to overwrite the output file (if it exists). IGNORED

    Returns
    -------
    str
        Description of the function's outcome.
    """
    outcome = ""

    # Saving video metadata to configs dict
    configs = ExperimentConfigs.read_json(configs_fp)
    for ftype, fp in (("raw_vid", in_fp), ("formatted_vid", out_fp)):
        try:
            setattr(configs.auto, ftype, ProcessVidMixin.get_vid_metadata(fp))
        except ValueError as e:
            outcome += f"WARNING: {str(e)}\n"
    outcome += "Video metadata stored in config file.\n"
    configs.write_json(configs_fp)
    return outcome

`behavysis_pipeline.processes.Preprocess` ¶

summary

Source code in behavysis_pipeline/processes/preprocess.py

class Preprocess:
    """_summary_"""

    @staticmethod
    @IOMixin.overwrite_check()
    def start_stop_trim(
        in_fp: str, out_fp: str, configs_fp: str, overwrite: bool
    ) -> str:
        """
        Filters the rows of a DLC formatted dataframe to include only rows within the start
        and end time of the experiment, given a corresponding configs dict.

        Parameters
        ----------
        in_fp : str
            The file path of the input DLC formatted dataframe.
        out_fp : str
            The file path of the output trimmed dataframe.
        configs_fp : str
            The file path of the configs dict.
        overwrite : bool
            If True, overwrite the output file if it already exists. If False, skip processing
            if the output file already exists.

        Returns
        -------
        str
            An outcome message indicating the result of the trimming process.

        Notes
        -----
        The config file must contain the following parameters:
        ```
        - user
            - preprocess
                - start_stop_trim
                    - start_frame: int
                    - stop_frame: int
        ```
        """
        outcome = ""
        # Getting necessary config parameters
        configs = ExperimentConfigs.read_json(configs_fp)
        start_frame = configs.auto.start_frame
        stop_frame = configs.auto.stop_frame

        # Reading file
        df = KeypointsMixin.read_feather(in_fp)

        # Trimming dataframe
        df = df.loc[start_frame:stop_frame, :]

        # Writing file
        DFIOMixin.write_feather(df, out_fp)

        return outcome

    @staticmethod
    @IOMixin.overwrite_check()
    def interpolate(in_fp: str, out_fp: str, configs_fp: str, overwrite: bool) -> str:
        """
        "Smooths" out noticeable jitter of points, where the likelihood (and accuracy) of
        a point's coordinates are low (e.g., when the subject's head goes out of view). It
        does this by linearly interpolating the frames of a body part that are below a given
        likelihood pcutoff.

        Notes
        -----
        The config file must contain the following parameters:
        ```
        - user
            - preprocess
                - interpolate
                    - pcutoff: float
        ```
        """
        outcome = ""
        # Getting necessary config parameters
        configs = ExperimentConfigs.read_json(configs_fp)
        configs_filt = Model_interpolate(**configs.user.preprocess.interpolate)
        # Reading file
        df = KeypointsMixin.read_feather(in_fp)
        # Gettings the unique groups of (individual, bodypart) groups.
        unique_cols = df.columns.droplevel(["coords"]).unique()
        # Setting low-likelihood points to Nan to later interpolate
        for scorer, indiv, bp in unique_cols:
            # Imputing Nan likelihood points with 0
            df[(scorer, indiv, bp, Coords.LIKELIHOOD.value)].fillna(
                value=0, inplace=True
            )
            # Setting x and y coordinates of points that have low likelihood to Nan
            to_remove = (
                df[(scorer, indiv, bp, Coords.LIKELIHOOD.value)] < configs_filt.pcutoff
            )
            df.loc[to_remove, (scorer, indiv, bp, Coords.X.value)] = np.nan
            df.loc[to_remove, (scorer, indiv, bp, Coords.Y.value)] = np.nan
        # linearly interpolating Nan x and y points.
        # Also backfilling points at the start.
        # Also forward filling points at the end.
        # Also imputing nan points with 0 (if the ENTIRE column is nan, then it's imputed)
        df = df.interpolate(method="linear", axis=0).bfill().ffill()
        # if df.isnull().values.any() then the entire column is nan (print warning)
        df = df.fillna(0)
        # Writing file
        DFIOMixin.write_feather(df, out_fp)
        return outcome

    @staticmethod
    @IOMixin.overwrite_check()
    def refine_ids(in_fp: str, out_fp: str, configs_fp: str, overwrite: bool) -> str:
        """
        Ensures that the identity is correctly tracked for maDLC.
        Assumes interpolatePoints and calcBodyCentre has already been run.

        Notes
        -----
        The config file must contain the following parameters:
        ```
        - user
            - preprocess
                - refine_ids
                    - marked: str
                    - unmarked: str
                    - marking: str
                    - window_sec: float
                    - metric: ["current", "rolling", "binned"]
        ```
        """
        outcome = ""
        # Reading file
        df = KeypointsMixin.read_feather(in_fp)
        # Getting necessary config parameters
        configs = ExperimentConfigs.read_json(configs_fp)
        configs_filt = Model_refine_ids(**configs.user.preprocess.refine_ids)
        marked = configs.get_ref(configs_filt.marked)
        unmarked = configs.get_ref(configs_filt.unmarked)
        marking = configs.get_ref(configs_filt.marking)
        window_sec = configs.get_ref(configs_filt.window_sec)
        bpts = configs.get_ref(configs_filt.bodyparts)
        metric = configs.get_ref(configs_filt.metric)
        fps = configs.auto.formatted_vid.fps
        # Calculating more parameters
        window_frames = int(np.round(fps * window_sec, 0))
        # Error checking for invalid/non-existent column names marked, unmarked, and marking
        for column, level in [
            (marked, "individuals"),
            (unmarked, "individuals"),
            (marking, "bodyparts"),
        ]:
            if column not in df.columns.unique(level):
                raise ValueError(
                    f'The marking value in the config file, "{column}",'
                    + " is not a column name in the DLC file."
                )
        # Checking that bodyparts are all valid
        KeypointsMixin.check_bpts_exist(df, bpts)
        # Calculating the distances between the bodycentres and the marking
        df_aggr = aggregate_df(df, marking, [marked, unmarked], bpts)
        # Getting "to_switch" decision series for each frame
        df_switch = decice_switch(df_aggr, window_frames, marked, unmarked)
        # Updating df with the switched values
        df_switched = switch_identities(df, df_switch[metric], marked, unmarked)
        # Writing to file
        DFIOMixin.write_feather(df_switched, out_fp)
        return outcome

`interpolate(in_fp, out_fp, configs_fp, overwrite)` `staticmethod` ¶

"Smooths" out noticeable jitter of points, where the likelihood (and accuracy) of a point's coordinates are low (e.g., when the subject's head goes out of view). It does this by linearly interpolating the frames of a body part that are below a given likelihood pcutoff.

Notes

The config file must contain the following parameters:

- user
    - preprocess
        - interpolate
            - pcutoff: float

Source code in behavysis_pipeline/processes/preprocess.py

@staticmethod
@IOMixin.overwrite_check()
def interpolate(in_fp: str, out_fp: str, configs_fp: str, overwrite: bool) -> str:
    """
    "Smooths" out noticeable jitter of points, where the likelihood (and accuracy) of
    a point's coordinates are low (e.g., when the subject's head goes out of view). It
    does this by linearly interpolating the frames of a body part that are below a given
    likelihood pcutoff.

    Notes
    -----
    The config file must contain the following parameters:
    ```
    - user
        - preprocess
            - interpolate
                - pcutoff: float
    ```
    """
    outcome = ""
    # Getting necessary config parameters
    configs = ExperimentConfigs.read_json(configs_fp)
    configs_filt = Model_interpolate(**configs.user.preprocess.interpolate)
    # Reading file
    df = KeypointsMixin.read_feather(in_fp)
    # Gettings the unique groups of (individual, bodypart) groups.
    unique_cols = df.columns.droplevel(["coords"]).unique()
    # Setting low-likelihood points to Nan to later interpolate
    for scorer, indiv, bp in unique_cols:
        # Imputing Nan likelihood points with 0
        df[(scorer, indiv, bp, Coords.LIKELIHOOD.value)].fillna(
            value=0, inplace=True
        )
        # Setting x and y coordinates of points that have low likelihood to Nan
        to_remove = (
            df[(scorer, indiv, bp, Coords.LIKELIHOOD.value)] < configs_filt.pcutoff
        )
        df.loc[to_remove, (scorer, indiv, bp, Coords.X.value)] = np.nan
        df.loc[to_remove, (scorer, indiv, bp, Coords.Y.value)] = np.nan
    # linearly interpolating Nan x and y points.
    # Also backfilling points at the start.
    # Also forward filling points at the end.
    # Also imputing nan points with 0 (if the ENTIRE column is nan, then it's imputed)
    df = df.interpolate(method="linear", axis=0).bfill().ffill()
    # if df.isnull().values.any() then the entire column is nan (print warning)
    df = df.fillna(0)
    # Writing file
    DFIOMixin.write_feather(df, out_fp)
    return outcome

`refine_ids(in_fp, out_fp, configs_fp, overwrite)` `staticmethod` ¶

Ensures that the identity is correctly tracked for maDLC. Assumes interpolatePoints and calcBodyCentre has already been run.

Notes

The config file must contain the following parameters:

- user
    - preprocess
        - refine_ids
            - marked: str
            - unmarked: str
            - marking: str
            - window_sec: float
            - metric: ["current", "rolling", "binned"]

Source code in behavysis_pipeline/processes/preprocess.py

@staticmethod
@IOMixin.overwrite_check()
def refine_ids(in_fp: str, out_fp: str, configs_fp: str, overwrite: bool) -> str:
    """
    Ensures that the identity is correctly tracked for maDLC.
    Assumes interpolatePoints and calcBodyCentre has already been run.

    Notes
    -----
    The config file must contain the following parameters:
    ```
    - user
        - preprocess
            - refine_ids
                - marked: str
                - unmarked: str
                - marking: str
                - window_sec: float
                - metric: ["current", "rolling", "binned"]
    ```
    """
    outcome = ""
    # Reading file
    df = KeypointsMixin.read_feather(in_fp)
    # Getting necessary config parameters
    configs = ExperimentConfigs.read_json(configs_fp)
    configs_filt = Model_refine_ids(**configs.user.preprocess.refine_ids)
    marked = configs.get_ref(configs_filt.marked)
    unmarked = configs.get_ref(configs_filt.unmarked)
    marking = configs.get_ref(configs_filt.marking)
    window_sec = configs.get_ref(configs_filt.window_sec)
    bpts = configs.get_ref(configs_filt.bodyparts)
    metric = configs.get_ref(configs_filt.metric)
    fps = configs.auto.formatted_vid.fps
    # Calculating more parameters
    window_frames = int(np.round(fps * window_sec, 0))
    # Error checking for invalid/non-existent column names marked, unmarked, and marking
    for column, level in [
        (marked, "individuals"),
        (unmarked, "individuals"),
        (marking, "bodyparts"),
    ]:
        if column not in df.columns.unique(level):
            raise ValueError(
                f'The marking value in the config file, "{column}",'
                + " is not a column name in the DLC file."
            )
    # Checking that bodyparts are all valid
    KeypointsMixin.check_bpts_exist(df, bpts)
    # Calculating the distances between the bodycentres and the marking
    df_aggr = aggregate_df(df, marking, [marked, unmarked], bpts)
    # Getting "to_switch" decision series for each frame
    df_switch = decice_switch(df_aggr, window_frames, marked, unmarked)
    # Updating df with the switched values
    df_switched = switch_identities(df, df_switch[metric], marked, unmarked)
    # Writing to file
    DFIOMixin.write_feather(df_switched, out_fp)
    return outcome

`start_stop_trim(in_fp, out_fp, configs_fp, overwrite)` `staticmethod` ¶

Filters the rows of a DLC formatted dataframe to include only rows within the start and end time of the experiment, given a corresponding configs dict.

Parameters:

Name	Type	Description	Default
`in_fp`	`str`	The file path of the input DLC formatted dataframe.	required
`out_fp`	`str`	The file path of the output trimmed dataframe.	required
`configs_fp`	`str`	The file path of the configs dict.	required
`overwrite`	`bool`	If True, overwrite the output file if it already exists. If False, skip processing if the output file already exists.	required

Returns:

Type	Description
`str`	An outcome message indicating the result of the trimming process.

Notes

The config file must contain the following parameters:

- user
    - preprocess
        - start_stop_trim
            - start_frame: int
            - stop_frame: int

Source code in behavysis_pipeline/processes/preprocess.py

@staticmethod
@IOMixin.overwrite_check()
def start_stop_trim(
    in_fp: str, out_fp: str, configs_fp: str, overwrite: bool
) -> str:
    """
    Filters the rows of a DLC formatted dataframe to include only rows within the start
    and end time of the experiment, given a corresponding configs dict.

    Parameters
    ----------
    in_fp : str
        The file path of the input DLC formatted dataframe.
    out_fp : str
        The file path of the output trimmed dataframe.
    configs_fp : str
        The file path of the configs dict.
    overwrite : bool
        If True, overwrite the output file if it already exists. If False, skip processing
        if the output file already exists.

    Returns
    -------
    str
        An outcome message indicating the result of the trimming process.

    Notes
    -----
    The config file must contain the following parameters:
    ```
    - user
        - preprocess
            - start_stop_trim
                - start_frame: int
                - stop_frame: int
    ```
    """
    outcome = ""
    # Getting necessary config parameters
    configs = ExperimentConfigs.read_json(configs_fp)
    start_frame = configs.auto.start_frame
    stop_frame = configs.auto.stop_frame

    # Reading file
    df = KeypointsMixin.read_feather(in_fp)

    # Trimming dataframe
    df = df.loc[start_frame:stop_frame, :]

    # Writing file
    DFIOMixin.write_feather(df, out_fp)

    return outcome

`behavysis_pipeline.processes.RunDLC` ¶

summary

Source code in behavysis_pipeline/processes/run_dlc.py

class RunDLC:
    """_summary_"""

    @staticmethod
    @IOMixin.overwrite_check()
    def ma_dlc_analyse_single(
        in_fp: str,
        out_fp: str,
        configs_fp: str,
        temp_dir: str,
        gputouse: int | None,
        overwrite: bool,
    ) -> str:
        """
        Running custom DLC script to generate a DLC keypoints dataframe from a single video.
        """
        outcome = ""
        # Specifying the GPU to use
        gputouse = "None" if not gputouse else gputouse
        # Getting model_fp
        configs = ExperimentConfigs.read_json(configs_fp)
        model_fp = configs.get_ref(configs.user.run_dlc.model_fp)
        # Derive more parameters
        dlc_out_dir = os.path.join(temp_dir, f"dlc_{gputouse}")
        out_dir = os.path.dirname(out_fp)
        # Making output directories
        os.makedirs(dlc_out_dir, exist_ok=True)

        # Assertion: the config.yaml file must exist.
        if not os.path.isfile(model_fp):
            raise ValueError(
                f'The given model_fp file does not exist: "{model_fp}".\n'
                + 'Check this file and specify a DLC ".yaml" config file.'
            )

        # Running the DLC subprocess (in a separate conda env)
        run_dlc_subproc(model_fp, [in_fp], dlc_out_dir, temp_dir, gputouse)

        # Exporting the h5 to feather the out_dir
        export_2_feather(in_fp, dlc_out_dir, out_dir)
        # IOMixin.silent_rm(dlc_out_dir)

        return outcome

    @staticmethod
    def ma_dlc_analyse_batch(
        in_fp_ls: list[str],
        out_dir: str,
        configs_dir: str,
        temp_dir: str,
        gputouse: int | None,
        overwrite: bool,
    ) -> str:
        """
        Running custom DLC script to generate a DLC keypoints dataframe from a single video.
        """
        outcome = ""

        # Specifying the GPU to use
        # and making the output directory
        if not gputouse:
            gputouse = "None"
        # Making output directories
        dlc_out_dir = os.path.join(temp_dir, f"dlc_{gputouse}")
        os.makedirs(dlc_out_dir, exist_ok=True)

        # If overwrite is False, filtering for only experiments that need processing
        if not overwrite:
            # Getting only the in_fp_ls elements that do not exist in out_dir
            in_fp_ls = [
                i
                for i in in_fp_ls
                if not os.path.exists(
                    os.path.join(out_dir, f"{IOMixin.get_name(i)}.feather")
                )
            ]

        # If there are no videos to process, return
        if len(in_fp_ls) == 0:
            return outcome

        # Getting the DLC model config path
        # Getting the names of the files that need processing
        dlc_fp_ls = [IOMixin.get_name(i) for i in in_fp_ls]
        # Getting their corresponding configs_fp
        dlc_fp_ls = [os.path.join(configs_dir, f"{i}.json") for i in dlc_fp_ls]
        # Reading their configs
        dlc_fp_ls = [ExperimentConfigs.read_json(i) for i in dlc_fp_ls]
        # Getting their model_fp
        dlc_fp_ls = [i.user.run_dlc.model_fp for i in dlc_fp_ls]
        # Converting to a set
        dlc_fp_set = set(dlc_fp_ls)
        # Assertion: all model_fp must be the same
        assert len(dlc_fp_set) == 1
        # Getting the model_fp
        model_fp = dlc_fp_set.pop()
        # Assertion: the config.yaml file must exist.
        assert os.path.isfile(model_fp), (
            f'The given model_fp file does not exist: "{model_fp}".\n'
            + 'Check this file and specify a DLC ".yaml" config file.'
        )

        # Running the DLC subprocess (in a separate conda env)
        run_dlc_subproc(model_fp, in_fp_ls, dlc_out_dir, temp_dir, gputouse)

        # Exporting the h5 to feather the out_dir
        for in_fp in in_fp_ls:
            export_2_feather(in_fp, dlc_out_dir, out_dir)
        IOMixin.silent_rm(dlc_out_dir)
        # Returning outcome
        return outcome

`ma_dlc_analyse_batch(in_fp_ls, out_dir, configs_dir, temp_dir, gputouse, overwrite)` `staticmethod` ¶

Running custom DLC script to generate a DLC keypoints dataframe from a single video.

Source code in behavysis_pipeline/processes/run_dlc.py

@staticmethod
def ma_dlc_analyse_batch(
    in_fp_ls: list[str],
    out_dir: str,
    configs_dir: str,
    temp_dir: str,
    gputouse: int | None,
    overwrite: bool,
) -> str:
    """
    Running custom DLC script to generate a DLC keypoints dataframe from a single video.
    """
    outcome = ""

    # Specifying the GPU to use
    # and making the output directory
    if not gputouse:
        gputouse = "None"
    # Making output directories
    dlc_out_dir = os.path.join(temp_dir, f"dlc_{gputouse}")
    os.makedirs(dlc_out_dir, exist_ok=True)

    # If overwrite is False, filtering for only experiments that need processing
    if not overwrite:
        # Getting only the in_fp_ls elements that do not exist in out_dir
        in_fp_ls = [
            i
            for i in in_fp_ls
            if not os.path.exists(
                os.path.join(out_dir, f"{IOMixin.get_name(i)}.feather")
            )
        ]

    # If there are no videos to process, return
    if len(in_fp_ls) == 0:
        return outcome

    # Getting the DLC model config path
    # Getting the names of the files that need processing
    dlc_fp_ls = [IOMixin.get_name(i) for i in in_fp_ls]
    # Getting their corresponding configs_fp
    dlc_fp_ls = [os.path.join(configs_dir, f"{i}.json") for i in dlc_fp_ls]
    # Reading their configs
    dlc_fp_ls = [ExperimentConfigs.read_json(i) for i in dlc_fp_ls]
    # Getting their model_fp
    dlc_fp_ls = [i.user.run_dlc.model_fp for i in dlc_fp_ls]
    # Converting to a set
    dlc_fp_set = set(dlc_fp_ls)
    # Assertion: all model_fp must be the same
    assert len(dlc_fp_set) == 1
    # Getting the model_fp
    model_fp = dlc_fp_set.pop()
    # Assertion: the config.yaml file must exist.
    assert os.path.isfile(model_fp), (
        f'The given model_fp file does not exist: "{model_fp}".\n'
        + 'Check this file and specify a DLC ".yaml" config file.'
    )

    # Running the DLC subprocess (in a separate conda env)
    run_dlc_subproc(model_fp, in_fp_ls, dlc_out_dir, temp_dir, gputouse)

    # Exporting the h5 to feather the out_dir
    for in_fp in in_fp_ls:
        export_2_feather(in_fp, dlc_out_dir, out_dir)
    IOMixin.silent_rm(dlc_out_dir)
    # Returning outcome
    return outcome

`ma_dlc_analyse_single(in_fp, out_fp, configs_fp, temp_dir, gputouse, overwrite)` `staticmethod` ¶

Running custom DLC script to generate a DLC keypoints dataframe from a single video.

Source code in behavysis_pipeline/processes/run_dlc.py

@staticmethod
@IOMixin.overwrite_check()
def ma_dlc_analyse_single(
    in_fp: str,
    out_fp: str,
    configs_fp: str,
    temp_dir: str,
    gputouse: int | None,
    overwrite: bool,
) -> str:
    """
    Running custom DLC script to generate a DLC keypoints dataframe from a single video.
    """
    outcome = ""
    # Specifying the GPU to use
    gputouse = "None" if not gputouse else gputouse
    # Getting model_fp
    configs = ExperimentConfigs.read_json(configs_fp)
    model_fp = configs.get_ref(configs.user.run_dlc.model_fp)
    # Derive more parameters
    dlc_out_dir = os.path.join(temp_dir, f"dlc_{gputouse}")
    out_dir = os.path.dirname(out_fp)
    # Making output directories
    os.makedirs(dlc_out_dir, exist_ok=True)

    # Assertion: the config.yaml file must exist.
    if not os.path.isfile(model_fp):
        raise ValueError(
            f'The given model_fp file does not exist: "{model_fp}".\n'
            + 'Check this file and specify a DLC ".yaml" config file.'
        )

    # Running the DLC subprocess (in a separate conda env)
    run_dlc_subproc(model_fp, [in_fp], dlc_out_dir, temp_dir, gputouse)

    # Exporting the h5 to feather the out_dir
    export_2_feather(in_fp, dlc_out_dir, out_dir)
    # IOMixin.silent_rm(dlc_out_dir)

    return outcome

`behavysis_pipeline.processes.UpdateConfigs` ¶

summary

Source code in behavysis_pipeline/processes/update_configs.py

class UpdateConfigs:
    """_summary_"""

    @staticmethod
    def update_configs(
        configs_fp: str,
        default_configs_fp: str,
        overwrite: Literal["user", "all"],
    ) -> str:
        """
        Initialises the config files with the given `default_configs`.
        The different types of overwriting are:
        - "user": Only the user parameters are updated.
        - "all": All parameters are updated.

        Parameters
        ----------
        configs_fp : str
            The filepath of the existing config file.
        default_configs_fp : str
            The filepath of the default config file to use.
        overwrite : Literal["user", "all"]
            Specifies how to update the config files.

        Returns
        -------
        str
            Description of the function's outcome.
        """
        outcome = ""
        # Parsing in the experiment's existing JSON configs
        try:
            configs = ExperimentConfigs.read_json(configs_fp)
        except (FileNotFoundError, ValidationError):
            configs = ExperimentConfigs()
        # Reading in the new configs from the given configs_fp
        default_configs = ExperimentConfigs.read_json(default_configs_fp)
        # Overwriting the configs file (with given method)
        if overwrite == "user":
            configs.user = default_configs.user
            configs.ref = default_configs.ref
            outcome += "Updating user and ref configs.\n"
        elif overwrite == "all":
            configs = default_configs
            outcome += "Updating all configs.\n"
        else:
            raise ValueError(
                f'Invalid value "{overwrite}" passed to function. '
                + 'The value must be either "user", or "all".'
            )
        # Writing new configs to JSON file
        configs.write_json(configs_fp)
        return outcome

`update_configs(configs_fp, default_configs_fp, overwrite)` `staticmethod` ¶

Initialises the config files with the given default_configs. The different types of overwriting are: - "user": Only the user parameters are updated. - "all": All parameters are updated.

Parameters:

Name	Type	Description	Default
`configs_fp`	`str`	The filepath of the existing config file.	required
`default_configs_fp`	`str`	The filepath of the default config file to use.	required
`overwrite`	`Literal['user', 'all']`	Specifies how to update the config files.	required

Returns:

Type	Description
`str`	Description of the function's outcome.

Source code in behavysis_pipeline/processes/update_configs.py

@staticmethod
def update_configs(
    configs_fp: str,
    default_configs_fp: str,
    overwrite: Literal["user", "all"],
) -> str:
    """
    Initialises the config files with the given `default_configs`.
    The different types of overwriting are:
    - "user": Only the user parameters are updated.
    - "all": All parameters are updated.

    Parameters
    ----------
    configs_fp : str
        The filepath of the existing config file.
    default_configs_fp : str
        The filepath of the default config file to use.
    overwrite : Literal["user", "all"]
        Specifies how to update the config files.

    Returns
    -------
    str
        Description of the function's outcome.
    """
    outcome = ""
    # Parsing in the experiment's existing JSON configs
    try:
        configs = ExperimentConfigs.read_json(configs_fp)
    except (FileNotFoundError, ValidationError):
        configs = ExperimentConfigs()
    # Reading in the new configs from the given configs_fp
    default_configs = ExperimentConfigs.read_json(default_configs_fp)
    # Overwriting the configs file (with given method)
    if overwrite == "user":
        configs.user = default_configs.user
        configs.ref = default_configs.ref
        outcome += "Updating user and ref configs.\n"
    elif overwrite == "all":
        configs = default_configs
        outcome += "Updating all configs.\n"
    else:
        raise ValueError(
            f'Invalid value "{overwrite}" passed to function. '
            + 'The value must be either "user", or "all".'
        )
    # Writing new configs to JSON file
    configs.write_json(configs_fp)
    return outcome

Processes

behavysis_pipeline.processes.CalculateParams ¶

exp_dur(dlc_fp, configs_fp) staticmethod ¶

px_per_mm(dlc_fp, configs_fp) staticmethod ¶

start_frame(dlc_fp, configs_fp) staticmethod ¶

stop_frame(dlc_fp, configs_fp) staticmethod ¶

behavysis_pipeline.processes.ClassifyBehaviours ¶

classify_behaviours(features_fp, out_fp, configs_fp, overwrite) staticmethod ¶

behavysis_pipeline.processes.Evaluate ¶

behav_plot(vid_fp, dlc_fp, behavs_fp, out_dir, configs_fp, overwrite) staticmethod ¶

eval_vid(vid_fp, dlc_fp, behavs_fp, out_dir, configs_fp, overwrite) staticmethod ¶

keypoints_plot(vid_fp, dlc_fp, behavs_fp, out_dir, configs_fp, overwrite) staticmethod ¶

behavysis_pipeline.processes.ExtractFeatures ¶

extract_features(dlc_fp, out_fp, configs_fp, temp_dir, overwrite) staticmethod ¶

behavysis_pipeline.processes.FormatVid ¶

format_vid(in_fp, out_fp, configs_fp, overwrite) staticmethod ¶

get_vid_metadata(in_fp, out_fp, configs_fp, overwrite) staticmethod ¶

behavysis_pipeline.processes.Preprocess ¶

interpolate(in_fp, out_fp, configs_fp, overwrite) staticmethod ¶

refine_ids(in_fp, out_fp, configs_fp, overwrite) staticmethod ¶

start_stop_trim(in_fp, out_fp, configs_fp, overwrite) staticmethod ¶

behavysis_pipeline.processes.RunDLC ¶

ma_dlc_analyse_batch(in_fp_ls, out_dir, configs_dir, temp_dir, gputouse, overwrite) staticmethod ¶

ma_dlc_analyse_single(in_fp, out_fp, configs_fp, temp_dir, gputouse, overwrite) staticmethod ¶

behavysis_pipeline.processes.UpdateConfigs ¶

update_configs(configs_fp, default_configs_fp, overwrite) staticmethod ¶

`behavysis_pipeline.processes.CalculateParams` ¶

`exp_dur(dlc_fp, configs_fp)` `staticmethod` ¶

`px_per_mm(dlc_fp, configs_fp)` `staticmethod` ¶

`start_frame(dlc_fp, configs_fp)` `staticmethod` ¶

`stop_frame(dlc_fp, configs_fp)` `staticmethod` ¶

`behavysis_pipeline.processes.ClassifyBehaviours` ¶

`classify_behaviours(features_fp, out_fp, configs_fp, overwrite)` `staticmethod` ¶

`behavysis_pipeline.processes.Evaluate` ¶

`behav_plot(vid_fp, dlc_fp, behavs_fp, out_dir, configs_fp, overwrite)` `staticmethod` ¶

`eval_vid(vid_fp, dlc_fp, behavs_fp, out_dir, configs_fp, overwrite)` `staticmethod` ¶

`keypoints_plot(vid_fp, dlc_fp, behavs_fp, out_dir, configs_fp, overwrite)` `staticmethod` ¶

`behavysis_pipeline.processes.ExtractFeatures` ¶

`extract_features(dlc_fp, out_fp, configs_fp, temp_dir, overwrite)` `staticmethod` ¶

`behavysis_pipeline.processes.FormatVid` ¶

`format_vid(in_fp, out_fp, configs_fp, overwrite)` `staticmethod` ¶

`get_vid_metadata(in_fp, out_fp, configs_fp, overwrite)` `staticmethod` ¶

`behavysis_pipeline.processes.Preprocess` ¶

`interpolate(in_fp, out_fp, configs_fp, overwrite)` `staticmethod` ¶

`refine_ids(in_fp, out_fp, configs_fp, overwrite)` `staticmethod` ¶

`start_stop_trim(in_fp, out_fp, configs_fp, overwrite)` `staticmethod` ¶

`behavysis_pipeline.processes.RunDLC` ¶

`ma_dlc_analyse_batch(in_fp_ls, out_dir, configs_dir, temp_dir, gputouse, overwrite)` `staticmethod` ¶

`ma_dlc_analyse_single(in_fp, out_fp, configs_fp, temp_dir, gputouse, overwrite)` `staticmethod` ¶

`behavysis_pipeline.processes.UpdateConfigs` ¶

`update_configs(configs_fp, default_configs_fp, overwrite)` `staticmethod` ¶