Source code for cosinorage.datahandlers.utils.calc_enmo
###########################################################################
# Copyright (C) 2025 ETH Zurich
# CosinorAge: Prediction of biological age based on accelerometer data
# using the CosinorAge method proposed by Shim, Fleisch and Barata
# (https://www.nature.com/articles/s41746-024-01111-x)
#
# Authors: Jacob Leo Oskar Hunecke
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
##########################################################################
import numpy as np
import pandas as pd
[docs]
def calculate_enmo(data: pd.DataFrame, verbose: bool = False) -> pd.DataFrame:
"""
Calculate the Euclidean Norm Minus One (ENMO) metric from accelerometer data.
This function computes the ENMO metric, which is a widely used measure in physical
activity research for quantifying acceleration while accounting for gravity.
Parameters
----------
data : pd.DataFrame
DataFrame containing accelerometer data with columns:
- 'x': X-axis acceleration values
- 'y': Y-axis acceleration values
- 'z': Z-axis acceleration values
All values should be in g units (1g = 9.81 m/s²).
verbose : bool, default=False
If True, prints processing information.
Returns
-------
numpy.ndarray
Array of ENMO values. Values are truncated at 0, meaning negative
values are set to 0. Returns np.nan if calculation fails.
Notes
-----
- ENMO = sqrt(x² + y² + z²) - 1
- Values are truncated at 0 (negative values become 0)
- ENMO represents acceleration in excess of 1g (gravity)
- Commonly used in physical activity and sleep research
- Handles errors gracefully by returning np.nan
Examples
--------
>>> import pandas as pd
>>> import numpy as np
>>>
>>> # Create sample accelerometer data
>>> data = pd.DataFrame({
... 'x': [0.1, 0.2, 0.3],
... 'y': [0.1, 0.2, 0.3],
... 'z': [1.0, 1.1, 1.2] # Close to 1g (gravity)
... })
>>>
>>> # Calculate ENMO
>>> enmo_values = calculate_enmo(data, verbose=True)
>>> print(f"ENMO values: {enmo_values}")
>>> # Output: [0.014, 0.028, 0.042] (approximately)
"""
if data.empty:
return pd.DataFrame()
try:
_acc_vectors = data[["x", "y", "z"]].values
_enmo_vals = np.linalg.norm(_acc_vectors, axis=1) - 1
_enmo_vals = np.maximum(_enmo_vals, 0)
except Exception as e:
print(f"Error calculating ENMO: {e}")
_enmo_vals = np.nan
if verbose:
print(f"Calculated ENMO for {data.shape[0]} accelerometer records")
return _enmo_vals
[docs]
def calculate_minute_level_enmo(
data: pd.DataFrame, meta_dict: dict = {}, verbose: bool = False
) -> pd.DataFrame:
"""
Resample high-frequency ENMO data to minute-level by averaging over each minute.
This function aggregates high-frequency ENMO data to minute-level resolution
using mean aggregation, which is the standard approach for circadian rhythm analysis.
Parameters
----------
data : pd.DataFrame
DataFrame with datetime index and 'ENMO' column containing high-frequency ENMO data.
Optional 'wear' column for wear time information.
meta_dict : dict, default={}
Dictionary containing metadata. Should include:
- 'sf': Sampling frequency in Hz (defaults to 25Hz if not specified)
verbose : bool, default=False
If True, prints processing information.
Returns
-------
pd.DataFrame
DataFrame containing minute-level aggregated data with:
- 'ENMO': Mean ENMO value for each minute
- 'wear': Mean wear time for each minute (if wear column exists in input)
Index is datetime at minute resolution.
Raises
------
ValueError
If sampling frequency is less than 1/60 Hz (less than one sample per minute).
Notes
-----
- Uses pandas resample('min').mean() for aggregation
- Handles both ENMO and wear columns if present
- Converts index to datetime format
- Standard preprocessing step for circadian rhythm analysis
- Handles errors gracefully by returning empty DataFrame
Examples
--------
>>> import pandas as pd
>>>
>>> # Create sample high-frequency ENMO data
>>> dates = pd.date_range('2023-01-01 00:00:00', periods=3600, freq='S') # 1 hour of second-level data
>>> data = pd.DataFrame({
... 'ENMO': np.random.uniform(0, 0.1, 3600),
... 'wear': np.random.choice([0, 1], 3600)
... }, index=dates)
>>>
>>> # Resample to minute level
>>> meta_dict = {'sf': 1} # 1 Hz sampling frequency
>>> minute_data = calculate_minute_level_enmo(data, meta_dict=meta_dict, verbose=True)
>>> print(f"Original records: {len(data)}")
>>> print(f"Minute-level records: {len(minute_data)}")
"""
# Get sampling frequency from meta_dict or use default
sf = meta_dict.get("sf", 25) # Default to 25Hz if not specified
if sf < 1 / 60:
raise ValueError("Sampling frequency must be at least 1 minute")
if data.empty:
return pd.DataFrame()
try:
minute_level_enmo_df = (
data["enmo"].resample("min").mean().to_frame(name="enmo")
)
# check if data has a wear column
if "wear" in data.columns:
minute_level_enmo_df["wear"] = data["wear"].resample("min").mean()
except Exception as e:
print(f"Error resampling ENMO data: {e}")
minute_level_enmo_df = pd.DataFrame()
minute_level_enmo_df.index = pd.to_datetime(minute_level_enmo_df.index)
if verbose:
print(
f"Aggregated ENMO values at the minute level leading to {minute_level_enmo_df.shape[0]} records"
)
return minute_level_enmo_df