###########################################################################
# Copyright (C) 2025 ETH Zurich
# CosinorAge: Prediction of biological age based on accelerometer data
# using the CosinorAge method proposed by Shim, Fleisch and Barata
# (https://www.nature.com/articles/s41746-024-01111-x)
#
# Authors: Jacob Leo Oskar Hunecke
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
##########################################################################
from typing import List
import numpy as np
import pandas as pd
[docs]
def IS(data: pd.Series) -> float:
r"""Calculate the interdaily stability (IS) for the entire dataset.
Interdaily stability quantifies the strength of coupling between the
rest-activity rhythm and environmental zeitgebers. It compares the
24-hour pattern across days.
Parameters
----------
data : pd.Series
Time series data containing activity measurements with datetime index
and 'ENMO' column. Should contain multiple days of minute-level data.
Returns
-------
float
Interdaily stability value ranging from 0 to 1, where:
- 0 indicates no stability (random activity patterns)
- 1 indicates perfect stability (identical daily patterns)
Returns np.nan if insufficient data or calculation fails.
Notes
-----
- Resamples data to hourly resolution for calculation
- IS = (D * sum((hourly_means - overall_mean)²)) / sum((all_values - overall_mean)²)
- Higher values indicate more consistent daily activity patterns
- Requires multiple days of data for meaningful calculation
- Used in circadian rhythm analysis to assess rhythm stability
Examples
--------
>>> import pandas as pd
>>> import numpy as np
>>>
>>> # Create sample multi-day activity data
>>> dates = pd.date_range('2023-01-01', periods=4320, freq='min') # 3 days
>>> # Simulate consistent daily pattern
>>> hours = dates.hour
>>> enmo = pd.Series(np.sin(hours * np.pi / 12) + 1 + np.random.normal(0, 0.1, 4320), index=dates)
>>>
>>> # Calculate interdaily stability
>>> is_value = IS(enmo)
>>> print(f"Interdaily Stability: {is_value:.3f}")
>>> # Higher values indicate more consistent daily patterns
"""
if len(data) == 0:
return np.nan
data_ = data.copy()[["enmo"]]
data_ = data_.resample("h").mean()
data_["hour"] = data_.index.hour
# Calculate key values
H = 24 # Hours per day
D = len(pd.unique(data_.index.date)) # Number of days
z_mean = data_["enmo"].mean() # Overall mean
# Calculate hourly means across days
hourly_means = data_.groupby("hour")["enmo"].mean()
# Calculate numerator
numerator = D * np.sum(np.power(hourly_means - z_mean, 2), axis=0)
# Calculate denominator
denominator = np.sum(np.power(data_["enmo"] - z_mean, 2), axis=0)
if denominator == 0 or np.isnan(denominator) or np.isinf(denominator):
return np.nan
IS = float(numerator / denominator)
return IS
[docs]
def IV(data: pd.Series) -> float:
r"""Calculate the intradaily variability (IV) for the entire dataset.
Intradaily variability quantifies the fragmentation of rest-activity patterns
within each 24-hour period. It is calculated as the ratio of the mean squared
first derivative to the variance.
Parameters
----------
data : pd.Series
Time series data containing activity measurements with datetime index
and 'ENMO' column. Should contain multiple days of minute-level data.
Returns
-------
float
Intradaily variability value, where:
- Lower values indicate less fragmented activity patterns
- Higher values indicate more fragmented activity patterns
Returns np.nan if insufficient data or calculation fails.
Notes
-----
- Resamples data to hourly resolution for calculation
- IV = (P * sum((z_p - z_{p-1})²)) / ((P-1) * sum((z_p - z_mean)²))
- Lower values indicate more consolidated rest-activity periods
- Higher values indicate more fragmented sleep and activity patterns
- Used in circadian rhythm analysis to assess rhythm fragmentation
Examples
--------
>>> import pandas as pd
>>> import numpy as np
>>>
>>> # Create sample multi-day activity data
>>> dates = pd.date_range('2023-01-01', periods=4320, freq='min') # 3 days
>>> # Simulate fragmented activity pattern
>>> hours = dates.hour
>>> enmo = pd.Series(np.random.uniform(0, 1, 4320), index=dates) # Random activity
>>>
>>> # Calculate intradaily variability
>>> iv_value = IV(enmo)
>>> print(f"Intradaily Variability: {iv_value:.3f}")
>>> # Higher values indicate more fragmented activity patterns
"""
if len(data) == 0:
return np.nan
data_ = data.copy()[["enmo"]]
P = len(data_)
# resample to hourly data
data_ = data_.resample("h").mean()
# Calculate numerator: P * sum((z_p - z_{p-1})^2)
first_derivative_squared = np.sum(
np.power(
data_[1:].reset_index(drop=True)
- data_[:-1].reset_index(drop=True),
2,
),
axis=0,
)
numerator = float(P * first_derivative_squared.iloc[0])
# Calculate denominator: (P-1) * sum((z_p - z_mean)^2)
deviations_squared = np.sum(np.power(data_ - data_.mean(), 2), axis=0)
denominator = float((P - 1) * deviations_squared.iloc[0])
if denominator == 0 or np.isnan(denominator) or np.isinf(denominator):
return np.nan
IV = numerator / denominator
return IV
[docs]
def M10(data: pd.Series) -> List[float]:
r"""Calculate the M10 (mean activity during the 10 most active hours)
and the start time of the 10 most active hours (M10_start) for each day.
M10 provides information about the most active period during each day,
which typically corresponds to the main activity phase.
Parameters
----------
data : pd.Series
Time series data containing activity measurements with datetime index
and 'ENMO' column. Should contain minute-level data for multiple days.
Returns
-------
tuple
Tuple containing two lists:
- m10: List of mean activity values during the 10 most active hours for each day
- m10_start: List of start times (datetime) of the 10 most active hours for each day
Returns empty lists if insufficient data.
Notes
-----
- Uses rolling 10-hour windows (600 minutes) to find the most active period
- Calculates mean activity within each window and finds the maximum
- Returns both the activity level and start time of the most active period
- Used in circadian rhythm analysis to identify the main activity phase
- Typically corresponds to daytime activity periods
Examples
--------
>>> import pandas as pd
>>> import numpy as np
>>>
>>> # Create sample multi-day activity data
>>> dates = pd.date_range('2023-01-01', periods=4320, freq='min') # 3 days
>>> # Simulate activity with peak during day
>>> hours = dates.hour
>>> enmo = pd.Series(np.sin(hours * np.pi / 12) + 1 + np.random.normal(0, 0.1, 4320), index=dates)
>>>
>>> # Calculate M10 for each day
>>> m10_values, m10_starts = M10(enmo)
>>> print(f"M10 values: {m10_values}")
>>> print(f"M10 start times: {m10_starts}")
"""
if len(data) == 0:
return [], []
data_ = data.copy()[["enmo"]]
daily_groups = data_.groupby(data_.index.date)
m10 = []
m10_start = []
for date, day_data in daily_groups:
# calculate the rolling mean over 10-hour windows
window_size = 600 # 10 hours * 60 minutes
rolling_means = (
day_data[::-1]
.rolling(window=window_size, center=False)
.mean()[::-1]
.dropna()
)
# Find the window with maximum activity
max_mean = float(rolling_means.max().iloc[0])
max_start_idx = rolling_means.idxmax().iloc[0]
if pd.isna(max_mean) or np.isnan(max_mean) or np.isinf(max_mean):
m10.append(np.nan)
m10_start.append(np.nan)
else:
m10.append(max_mean)
m10_start.append(max_start_idx)
return m10, m10_start
[docs]
def L5(data: pd.Series) -> List[float]:
r"""Calculate the L5 (mean activity during the 5 least active hours)
and the start time of the 5 least active hours (L5_start) for each day.
L5 provides information about the least active period during each day,
which typically corresponds to the main rest phase.
Parameters
----------
data : pd.Series
Time series data containing activity measurements with datetime index
and 'ENMO' column. Should contain minute-level data for multiple days.
Returns
-------
tuple
Tuple containing two lists:
- l5: List of mean activity values during the 5 least active hours for each day
- l5_start: List of start times (datetime) of the 5 least active hours for each day
Returns empty lists if insufficient data.
Notes
-----
- Uses rolling 5-hour windows (300 minutes) to find the least active period
- Calculates mean activity within each window and finds the minimum
- Returns both the activity level and start time of the least active period
- Used in circadian rhythm analysis to identify the main rest phase
- Typically corresponds to nighttime sleep periods
Examples
--------
>>> import pandas as pd
>>> import numpy as np
>>>
>>> # Create sample multi-day activity data
>>> dates = pd.date_range('2023-01-01', periods=4320, freq='min') # 3 days
>>> # Simulate activity with low during night
>>> hours = dates.hour
>>> enmo = pd.Series(np.sin(hours * np.pi / 12) + 1 + np.random.normal(0, 0.1, 4320), index=dates)
>>>
>>> # Calculate L5 for each day
>>> l5_values, l5_starts = L5(enmo)
>>> print(f"L5 values: {l5_values}")
>>> print(f"L5 start times: {l5_starts}")
"""
if len(data) == 0:
return [], []
data_ = data.copy()[["enmo"]]
daily_groups = data_.groupby(data_.index.date)
l5 = []
l5_start = []
for date, day_data in daily_groups:
# calculate the rolling mean over 5-hour windows
window_size = 300 # 5 hours * 60 minutes
rolling_means = (
day_data[::-1]
.rolling(window=window_size, center=False)
.mean()[::-1]
.dropna()
)
# Find the window with minimum activity
min_mean = float(rolling_means.min().iloc[0])
min_start_idx = rolling_means.idxmin().iloc[0]
if pd.isna(min_mean) or np.isnan(min_mean) or np.isinf(min_mean):
l5.append(np.nan)
l5_start.append(np.nan)
else:
l5.append(min_mean)
l5_start.append(min_start_idx)
return l5, l5_start
[docs]
def RA(m10: List[float], l5: List[float]) -> List[float]:
r"""Calculate the relative amplitude (RA) for each day.
Relative amplitude is calculated as the difference between the most active
10-hour period and least active 5-hour period, divided by their sum.
This provides a normalized measure of the daily activity rhythm strength.
Parameters
----------
m10 : List[float]
List of M10 values (mean activity during 10 most active hours) for each day.
Should be output from the M10() function.
l5 : List[float]
List of L5 values (mean activity during 5 least active hours) for each day.
Should be output from the L5() function.
Returns
-------
List[float]
List of relative amplitude values for each day, where:
- Values range from 0 to 1
- Higher values indicate stronger daily activity rhythms
- Lower values indicate weaker daily activity rhythms
Returns empty list if input lists are empty.
Notes
-----
- RA = (M10 - L5) / (M10 + L5)
- Normalized measure that accounts for overall activity level
- Higher values indicate more pronounced rest-activity cycles
- Used in circadian rhythm analysis to assess rhythm strength
- Requires both M10 and L5 values from the same dataset
Examples
--------
>>> import pandas as pd
>>> import numpy as np
>>>
>>> # Create sample multi-day activity data
>>> dates = pd.date_range('2023-01-01', periods=4320, freq='min') # 3 days
>>> hours = dates.hour
>>> enmo = pd.Series(np.sin(hours * np.pi / 12) + 1 + np.random.normal(0, 0.1, 4320), index=dates)
>>>
>>> # Calculate M10 and L5 first
>>> m10_values, m10_starts = M10(enmo)
>>> l5_values, l5_starts = L5(enmo)
>>>
>>> # Calculate relative amplitude
>>> ra_values = RA(m10_values, l5_values)
>>> print(f"Relative Amplitude values: {ra_values}")
>>> # Higher values indicate stronger daily activity rhythms
"""
if len(m10) == 0 or len(l5) == 0:
return []
if len(m10) != len(l5):
raise ValueError("m10 and l5 must have the same length")
ra = []
for i in range(len(m10)):
denominator = m10[i] + l5[i]
if denominator == 0 or np.isnan(denominator) or np.isinf(denominator):
ra.append(np.nan)
else:
ra.append((m10[i] - l5[i]) / denominator)
return ra