Source code for cosinorage.datahandlers.utils.calc_enmo

###########################################################################
# Copyright (C) 2025 ETH Zurich
# CosinorAge: Prediction of biological age based on accelerometer data
# using the CosinorAge method proposed by Shim, Fleisch and Barata
# (https://www.nature.com/articles/s41746-024-01111-x)
#
# Authors: Jacob Leo Oskar Hunecke
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#         http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
##########################################################################

import numpy as np
import pandas as pd


[docs] def calculate_enmo(data: pd.DataFrame, verbose: bool = False) -> pd.DataFrame: """ Calculate the Euclidean Norm Minus One (ENMO) metric from accelerometer data. This function computes the ENMO metric, which is a widely used measure in physical activity research for quantifying acceleration while accounting for gravity. Parameters ---------- data : pd.DataFrame DataFrame containing accelerometer data with columns: - 'x': X-axis acceleration values - 'y': Y-axis acceleration values - 'z': Z-axis acceleration values All values should be in g units (1g = 9.81 m/s²). verbose : bool, default=False If True, prints processing information. Returns ------- numpy.ndarray Array of ENMO values. Values are truncated at 0, meaning negative values are set to 0. Returns np.nan if calculation fails. Notes ----- - ENMO = sqrt(x² + y² + z²) - 1 - Values are truncated at 0 (negative values become 0) - ENMO represents acceleration in excess of 1g (gravity) - Commonly used in physical activity and sleep research - Handles errors gracefully by returning np.nan Examples -------- >>> import pandas as pd >>> import numpy as np >>> >>> # Create sample accelerometer data >>> data = pd.DataFrame({ ... 'x': [0.1, 0.2, 0.3], ... 'y': [0.1, 0.2, 0.3], ... 'z': [1.0, 1.1, 1.2] # Close to 1g (gravity) ... }) >>> >>> # Calculate ENMO >>> enmo_values = calculate_enmo(data, verbose=True) >>> print(f"ENMO values: {enmo_values}") >>> # Output: [0.014, 0.028, 0.042] (approximately) """ if data.empty: return pd.DataFrame() try: _acc_vectors = data[["x", "y", "z"]].values _enmo_vals = np.linalg.norm(_acc_vectors, axis=1) - 1 _enmo_vals = np.maximum(_enmo_vals, 0) except Exception as e: print(f"Error calculating ENMO: {e}") _enmo_vals = np.nan if verbose: print(f"Calculated ENMO for {data.shape[0]} accelerometer records") return _enmo_vals
[docs] def calculate_minute_level_enmo( data: pd.DataFrame, meta_dict: dict = {}, verbose: bool = False ) -> pd.DataFrame: """ Resample high-frequency ENMO data to minute-level by averaging over each minute. This function aggregates high-frequency ENMO data to minute-level resolution using mean aggregation, which is the standard approach for circadian rhythm analysis. Parameters ---------- data : pd.DataFrame DataFrame with datetime index and 'ENMO' column containing high-frequency ENMO data. Optional 'wear' column for wear time information. meta_dict : dict, default={} Dictionary containing metadata. Should include: - 'sf': Sampling frequency in Hz (defaults to 25Hz if not specified) verbose : bool, default=False If True, prints processing information. Returns ------- pd.DataFrame DataFrame containing minute-level aggregated data with: - 'ENMO': Mean ENMO value for each minute - 'wear': Mean wear time for each minute (if wear column exists in input) Index is datetime at minute resolution. Raises ------ ValueError If sampling frequency is less than 1/60 Hz (less than one sample per minute). Notes ----- - Uses pandas resample('min').mean() for aggregation - Handles both ENMO and wear columns if present - Converts index to datetime format - Standard preprocessing step for circadian rhythm analysis - Handles errors gracefully by returning empty DataFrame Examples -------- >>> import pandas as pd >>> >>> # Create sample high-frequency ENMO data >>> dates = pd.date_range('2023-01-01 00:00:00', periods=3600, freq='S') # 1 hour of second-level data >>> data = pd.DataFrame({ ... 'ENMO': np.random.uniform(0, 0.1, 3600), ... 'wear': np.random.choice([0, 1], 3600) ... }, index=dates) >>> >>> # Resample to minute level >>> meta_dict = {'sf': 1} # 1 Hz sampling frequency >>> minute_data = calculate_minute_level_enmo(data, meta_dict=meta_dict, verbose=True) >>> print(f"Original records: {len(data)}") >>> print(f"Minute-level records: {len(minute_data)}") """ # Get sampling frequency from meta_dict or use default sf = meta_dict.get("sf", 25) # Default to 25Hz if not specified if sf < 1 / 60: raise ValueError("Sampling frequency must be at least 1 minute") if data.empty: return pd.DataFrame() try: minute_level_enmo_df = ( data["enmo"].resample("min").mean().to_frame(name="enmo") ) # check if data has a wear column if "wear" in data.columns: minute_level_enmo_df["wear"] = data["wear"].resample("min").mean() except Exception as e: print(f"Error resampling ENMO data: {e}") minute_level_enmo_df = pd.DataFrame() minute_level_enmo_df.index = pd.to_datetime(minute_level_enmo_df.index) if verbose: print( f"Aggregated ENMO values at the minute level leading to {minute_level_enmo_df.shape[0]} records" ) return minute_level_enmo_df