Source code for cosinorage.features.utils.nonparam_analysis

###########################################################################
# Copyright (C) 2025 ETH Zurich
# CosinorAge: Prediction of biological age based on accelerometer data
# using the CosinorAge method proposed by Shim, Fleisch and Barata
# (https://www.nature.com/articles/s41746-024-01111-x)
#
# Authors: Jacob Leo Oskar Hunecke
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#         http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
##########################################################################

from typing import List

import numpy as np
import pandas as pd


[docs] def IS(data: pd.Series) -> float: r"""Calculate the interdaily stability (IS) for the entire dataset. Interdaily stability quantifies the strength of coupling between the rest-activity rhythm and environmental zeitgebers. It compares the 24-hour pattern across days. Parameters ---------- data : pd.Series Time series data containing activity measurements with datetime index and 'ENMO' column. Should contain multiple days of minute-level data. Returns ------- float Interdaily stability value ranging from 0 to 1, where: - 0 indicates no stability (random activity patterns) - 1 indicates perfect stability (identical daily patterns) Returns np.nan if insufficient data or calculation fails. Notes ----- - Resamples data to hourly resolution for calculation - IS = (D * sum((hourly_means - overall_mean)²)) / sum((all_values - overall_mean)²) - Higher values indicate more consistent daily activity patterns - Requires multiple days of data for meaningful calculation - Used in circadian rhythm analysis to assess rhythm stability Examples -------- >>> import pandas as pd >>> import numpy as np >>> >>> # Create sample multi-day activity data >>> dates = pd.date_range('2023-01-01', periods=4320, freq='min') # 3 days >>> # Simulate consistent daily pattern >>> hours = dates.hour >>> enmo = pd.Series(np.sin(hours * np.pi / 12) + 1 + np.random.normal(0, 0.1, 4320), index=dates) >>> >>> # Calculate interdaily stability >>> is_value = IS(enmo) >>> print(f"Interdaily Stability: {is_value:.3f}") >>> # Higher values indicate more consistent daily patterns """ if len(data) == 0: return np.nan data_ = data.copy()[["enmo"]] data_ = data_.resample("h").mean() data_["hour"] = data_.index.hour # Calculate key values H = 24 # Hours per day D = len(pd.unique(data_.index.date)) # Number of days z_mean = data_["enmo"].mean() # Overall mean # Calculate hourly means across days hourly_means = data_.groupby("hour")["enmo"].mean() # Calculate numerator numerator = D * np.sum(np.power(hourly_means - z_mean, 2), axis=0) # Calculate denominator denominator = np.sum(np.power(data_["enmo"] - z_mean, 2), axis=0) if denominator == 0 or np.isnan(denominator) or np.isinf(denominator): return np.nan IS = float(numerator / denominator) return IS
[docs] def IV(data: pd.Series) -> float: r"""Calculate the intradaily variability (IV) for the entire dataset. Intradaily variability quantifies the fragmentation of rest-activity patterns within each 24-hour period. It is calculated as the ratio of the mean squared first derivative to the variance. Parameters ---------- data : pd.Series Time series data containing activity measurements with datetime index and 'ENMO' column. Should contain multiple days of minute-level data. Returns ------- float Intradaily variability value, where: - Lower values indicate less fragmented activity patterns - Higher values indicate more fragmented activity patterns Returns np.nan if insufficient data or calculation fails. Notes ----- - Resamples data to hourly resolution for calculation - IV = (P * sum((z_p - z_{p-1})²)) / ((P-1) * sum((z_p - z_mean)²)) - Lower values indicate more consolidated rest-activity periods - Higher values indicate more fragmented sleep and activity patterns - Used in circadian rhythm analysis to assess rhythm fragmentation Examples -------- >>> import pandas as pd >>> import numpy as np >>> >>> # Create sample multi-day activity data >>> dates = pd.date_range('2023-01-01', periods=4320, freq='min') # 3 days >>> # Simulate fragmented activity pattern >>> hours = dates.hour >>> enmo = pd.Series(np.random.uniform(0, 1, 4320), index=dates) # Random activity >>> >>> # Calculate intradaily variability >>> iv_value = IV(enmo) >>> print(f"Intradaily Variability: {iv_value:.3f}") >>> # Higher values indicate more fragmented activity patterns """ if len(data) == 0: return np.nan data_ = data.copy()[["enmo"]] P = len(data_) # resample to hourly data data_ = data_.resample("h").mean() # Calculate numerator: P * sum((z_p - z_{p-1})^2) first_derivative_squared = np.sum( np.power( data_[1:].reset_index(drop=True) - data_[:-1].reset_index(drop=True), 2, ), axis=0, ) numerator = float(P * first_derivative_squared.iloc[0]) # Calculate denominator: (P-1) * sum((z_p - z_mean)^2) deviations_squared = np.sum(np.power(data_ - data_.mean(), 2), axis=0) denominator = float((P - 1) * deviations_squared.iloc[0]) if denominator == 0 or np.isnan(denominator) or np.isinf(denominator): return np.nan IV = numerator / denominator return IV
[docs] def M10(data: pd.Series) -> List[float]: r"""Calculate the M10 (mean activity during the 10 most active hours) and the start time of the 10 most active hours (M10_start) for each day. M10 provides information about the most active period during each day, which typically corresponds to the main activity phase. Parameters ---------- data : pd.Series Time series data containing activity measurements with datetime index and 'ENMO' column. Should contain minute-level data for multiple days. Returns ------- tuple Tuple containing two lists: - m10: List of mean activity values during the 10 most active hours for each day - m10_start: List of start times (datetime) of the 10 most active hours for each day Returns empty lists if insufficient data. Notes ----- - Uses rolling 10-hour windows (600 minutes) to find the most active period - Calculates mean activity within each window and finds the maximum - Returns both the activity level and start time of the most active period - Used in circadian rhythm analysis to identify the main activity phase - Typically corresponds to daytime activity periods Examples -------- >>> import pandas as pd >>> import numpy as np >>> >>> # Create sample multi-day activity data >>> dates = pd.date_range('2023-01-01', periods=4320, freq='min') # 3 days >>> # Simulate activity with peak during day >>> hours = dates.hour >>> enmo = pd.Series(np.sin(hours * np.pi / 12) + 1 + np.random.normal(0, 0.1, 4320), index=dates) >>> >>> # Calculate M10 for each day >>> m10_values, m10_starts = M10(enmo) >>> print(f"M10 values: {m10_values}") >>> print(f"M10 start times: {m10_starts}") """ if len(data) == 0: return [], [] data_ = data.copy()[["enmo"]] daily_groups = data_.groupby(data_.index.date) m10 = [] m10_start = [] for date, day_data in daily_groups: # calculate the rolling mean over 10-hour windows window_size = 600 # 10 hours * 60 minutes rolling_means = ( day_data[::-1] .rolling(window=window_size, center=False) .mean()[::-1] .dropna() ) # Find the window with maximum activity max_mean = float(rolling_means.max().iloc[0]) max_start_idx = rolling_means.idxmax().iloc[0] if pd.isna(max_mean) or np.isnan(max_mean) or np.isinf(max_mean): m10.append(np.nan) m10_start.append(np.nan) else: m10.append(max_mean) m10_start.append(max_start_idx) return m10, m10_start
[docs] def L5(data: pd.Series) -> List[float]: r"""Calculate the L5 (mean activity during the 5 least active hours) and the start time of the 5 least active hours (L5_start) for each day. L5 provides information about the least active period during each day, which typically corresponds to the main rest phase. Parameters ---------- data : pd.Series Time series data containing activity measurements with datetime index and 'ENMO' column. Should contain minute-level data for multiple days. Returns ------- tuple Tuple containing two lists: - l5: List of mean activity values during the 5 least active hours for each day - l5_start: List of start times (datetime) of the 5 least active hours for each day Returns empty lists if insufficient data. Notes ----- - Uses rolling 5-hour windows (300 minutes) to find the least active period - Calculates mean activity within each window and finds the minimum - Returns both the activity level and start time of the least active period - Used in circadian rhythm analysis to identify the main rest phase - Typically corresponds to nighttime sleep periods Examples -------- >>> import pandas as pd >>> import numpy as np >>> >>> # Create sample multi-day activity data >>> dates = pd.date_range('2023-01-01', periods=4320, freq='min') # 3 days >>> # Simulate activity with low during night >>> hours = dates.hour >>> enmo = pd.Series(np.sin(hours * np.pi / 12) + 1 + np.random.normal(0, 0.1, 4320), index=dates) >>> >>> # Calculate L5 for each day >>> l5_values, l5_starts = L5(enmo) >>> print(f"L5 values: {l5_values}") >>> print(f"L5 start times: {l5_starts}") """ if len(data) == 0: return [], [] data_ = data.copy()[["enmo"]] daily_groups = data_.groupby(data_.index.date) l5 = [] l5_start = [] for date, day_data in daily_groups: # calculate the rolling mean over 5-hour windows window_size = 300 # 5 hours * 60 minutes rolling_means = ( day_data[::-1] .rolling(window=window_size, center=False) .mean()[::-1] .dropna() ) # Find the window with minimum activity min_mean = float(rolling_means.min().iloc[0]) min_start_idx = rolling_means.idxmin().iloc[0] if pd.isna(min_mean) or np.isnan(min_mean) or np.isinf(min_mean): l5.append(np.nan) l5_start.append(np.nan) else: l5.append(min_mean) l5_start.append(min_start_idx) return l5, l5_start
[docs] def RA(m10: List[float], l5: List[float]) -> List[float]: r"""Calculate the relative amplitude (RA) for each day. Relative amplitude is calculated as the difference between the most active 10-hour period and least active 5-hour period, divided by their sum. This provides a normalized measure of the daily activity rhythm strength. Parameters ---------- m10 : List[float] List of M10 values (mean activity during 10 most active hours) for each day. Should be output from the M10() function. l5 : List[float] List of L5 values (mean activity during 5 least active hours) for each day. Should be output from the L5() function. Returns ------- List[float] List of relative amplitude values for each day, where: - Values range from 0 to 1 - Higher values indicate stronger daily activity rhythms - Lower values indicate weaker daily activity rhythms Returns empty list if input lists are empty. Notes ----- - RA = (M10 - L5) / (M10 + L5) - Normalized measure that accounts for overall activity level - Higher values indicate more pronounced rest-activity cycles - Used in circadian rhythm analysis to assess rhythm strength - Requires both M10 and L5 values from the same dataset Examples -------- >>> import pandas as pd >>> import numpy as np >>> >>> # Create sample multi-day activity data >>> dates = pd.date_range('2023-01-01', periods=4320, freq='min') # 3 days >>> hours = dates.hour >>> enmo = pd.Series(np.sin(hours * np.pi / 12) + 1 + np.random.normal(0, 0.1, 4320), index=dates) >>> >>> # Calculate M10 and L5 first >>> m10_values, m10_starts = M10(enmo) >>> l5_values, l5_starts = L5(enmo) >>> >>> # Calculate relative amplitude >>> ra_values = RA(m10_values, l5_values) >>> print(f"Relative Amplitude values: {ra_values}") >>> # Higher values indicate stronger daily activity rhythms """ if len(m10) == 0 or len(l5) == 0: return [] if len(m10) != len(l5): raise ValueError("m10 and l5 must have the same length") ra = [] for i in range(len(m10)): denominator = m10[i] + l5[i] if denominator == 0 or np.isnan(denominator) or np.isinf(denominator): ra.append(np.nan) else: ra.append((m10[i] - l5[i]) / denominator) return ra