Source code for cosinorage.datahandlers.utils.frequency_detection

###########################################################################
# Copyright (C) 2025 ETH Zurich
# CosinorAge: Prediction of biological age based on accelerometer data
# using the CosinorAge method proposed by Shim, Fleisch and Barata
# (https://www.nature.com/articles/s41746-024-01111-x)
#
# Authors: Jacob Leo Oskar Hunecke
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#         http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
##########################################################################

import pandas as pd


[docs] def detect_frequency_from_timestamps(timestamps: pd.Series) -> float: """ Detect sampling frequency by finding the most common time delta. This function analyzes a series of timestamps to determine the sampling frequency of the data by calculating the time differences between consecutive samples and finding the most frequently occurring interval. Parameters ---------- timestamps : pd.Series Series or array of datetime objects representing the timestamps of data points. Can be pandas datetime objects, numpy datetime64, or string timestamps that can be converted to datetime. Returns ------- float Sampling frequency in Hz (samples per second). Raises ------ ValueError If less than two timestamps are provided. If no time deltas can be calculated. If the most common time delta is zero. If the mode cannot be determined. Notes ----- - The function converts all timestamps to pandas datetime format - Time deltas are calculated in seconds - The most common (mode) time delta is used to determine frequency - Frequency is calculated as 1.0 / most_common_delta Examples -------- >>> import pandas as pd >>> >>> # Regular 25 Hz sampling >>> timestamps = pd.date_range('2023-01-01', periods=100, freq='40ms') >>> freq = detect_frequency_from_timestamps(timestamps) >>> print(f"Detected frequency: {freq:.1f} Hz") Detected frequency: 25.0 Hz >>> >>> # Irregular sampling with some missing points >>> irregular_times = pd.to_datetime([ ... '2023-01-01 00:00:00', ... '2023-01-01 00:00:00.040', ... '2023-01-01 00:00:00.080', ... '2023-01-01 00:00:00.120', ... '2023-01-01 00:00:00.200', # Gap here ... '2023-01-01 00:00:00.240' ... ]) >>> freq = detect_frequency_from_timestamps(irregular_times) >>> print(f"Detected frequency: {freq:.1f} Hz") Detected frequency: 25.0 Hz """ # Convert to datetime if needed if not pd.api.types.is_datetime64_any_dtype(timestamps): timestamps = pd.to_datetime(timestamps, errors="coerce") timestamps = pd.Series(timestamps).dropna() if len(timestamps) < 2: raise ValueError( "At least two timestamps are required to detect frequency." ) # Calculate all time deltas in ms time_deltas = timestamps.diff().dropna() # Convert to seconds if hasattr(time_deltas, "dt"): time_deltas_seconds = time_deltas.dt.total_seconds() else: # If already timedelta64[ns] dtype, convert directly time_deltas_seconds = time_deltas.astype("timedelta64[s]").astype( float ) # Convert to pandas Series to use mode() time_deltas_series = pd.Series(time_deltas_seconds) # Find the most common delta (majority) if time_deltas_series.empty: raise ValueError("Not enough time deltas to determine frequency.") mode = time_deltas_series.mode() if mode.empty: raise ValueError("Could not determine the most common time delta.") most_common_delta = mode.iloc[0] if most_common_delta == 0: raise ValueError( "Most common time delta is zero, cannot determine frequency." ) # Calculate frequency frequency = 1.0 / most_common_delta return frequency