Source code for cosinorage.datahandlers.galaxydatahandler

###########################################################################
# Copyright (C) 2025 ETH Zurich
# CosinorAge: Prediction of biological age based on accelerometer data
# using the CosinorAge method proposed by Shim, Fleisch and Barata
# (https://www.nature.com/articles/s41746-024-01111-x)
#
# Authors: Jacob Leo Oskar Hunecke
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#         http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
##########################################################################

import os
from typing import Union

from .datahandler import DataHandler, clock
from .utils.calc_enmo import calculate_minute_level_enmo
from .utils.galaxy_binary import (filter_galaxy_binary_data,
                                  preprocess_galaxy_binary_data,
                                  read_galaxy_binary_data,
                                  resample_galaxy_binary_data)
from .utils.galaxy_csv import (filter_galaxy_csv_data,
                               preprocess_galaxy_csv_data,
                               read_galaxy_csv_data, resample_galaxy_csv_data)


[docs] class GalaxyDataHandler(DataHandler): """ Unified data handler for Samsung Galaxy Watch accelerometer data. This class handles loading, filtering, and processing of Galaxy Watch accelerometer data in both binary and CSV formats. Currently supports: - Binary format with accelerometer data type - CSV format with ENMO data type Attributes ---------- galaxy_file_path : str Path to the Galaxy Watch data file (for CSV) or directory (for binary). data_format : str Format of the data ('csv' or 'binary'). data_type : str Type of the data ('enmo' or 'accelerometer'). time_column : str Name of the timestamp column. data_columns : list Names of the data columns. preprocess_args : dict Arguments for preprocessing. """
[docs] def __init__( self, galaxy_file_path: str, data_format: str = "binary", data_type: Union[str, None] = None, time_column: Union[str, None] = None, data_columns: Union[list, None] = None, preprocess_args: dict = {}, verbose: bool = False, ): super().__init__() if data_format not in ["csv", "binary"]: raise ValueError("data_format must be either 'csv' or 'binary'") # Set default data_type based on data_format if not provided if data_type is None: if data_format == "csv": data_type = "enmo" else: # binary data_type = "accelerometer" if data_type not in ["enmo", "accelerometer"]: raise ValueError( "data_type must be either 'enmo' or 'accelerometer'" ) # Set default column names based on data_format and data_type if time_column is None: if data_format == "csv": time_column = "time" # Only ENMO is supported for CSV else: # binary time_column = "unix_timestamp_in_ms" if data_columns is None: if data_type == "enmo": data_columns = ["enmo_mg"] else: # accelerometer (binary only) data_columns = [ "acceleration_x", "acceleration_y", "acceleration_z", ] # Validate format-type combinations if data_format == "csv" and data_type != "enmo": raise ValueError( "CSV format currently only supports 'enmo' data_type" ) if data_format == "binary" and data_type != "accelerometer": raise ValueError( "Binary format currently only supports 'accelerometer' data_type" ) # Validate data_columns based on data_type if data_type == "enmo" and len(data_columns) != 1: raise ValueError( "For 'enmo' data_type, data_columns should contain exactly one column name" ) if data_type == "accelerometer" and len(data_columns) != 3: raise ValueError( "For 'accelerometer' data_type, data_columns should contain exactly three column names" ) if data_format == "csv": if not os.path.isfile(galaxy_file_path): raise ValueError( "For CSV format, galaxy_file_path should be a file path. Please also ensure that the file is existing." ) else: # binary if not os.path.isdir(galaxy_file_path): raise ValueError( "For binary format, galaxy_file_path should be a directory path. Please also ensure that the directory is existing." ) self.galaxy_file_path = galaxy_file_path self.data_format = data_format self.data_type = data_type self.time_column = time_column self.data_columns = data_columns self.preprocess_args = preprocess_args self.meta_dict["datasource"] = "Samsung Galaxy Smartwatch" self.meta_dict["data_format"] = ( "CSV" if data_format == "csv" else "Binary" if data_format == "binary" else "Unknown" ) self.meta_dict["raw_data_type"] = ( "ENMO" if data_type == "enmo" else "Accelerometer" if data_type == "accelerometer" else "Unknown" ) self.meta_dict["time_column"] = time_column self.meta_dict["data_columns"] = data_columns self.__load_data(verbose=verbose)
@clock def __load_data(self, verbose: bool = False): """ Internal method to load and process Galaxy Watch data. Parameters ---------- verbose : bool, optional Whether to print processing information. Defaults to False. """ if self.data_format == "csv" and self.data_type == "enmo": # Use CSV processing functions for ENMO data self.raw_data = read_galaxy_csv_data( self.galaxy_file_path, meta_dict=self.meta_dict, time_column=self.time_column, data_columns=self.data_columns, verbose=verbose, ) self.sf_data = filter_galaxy_csv_data( self.raw_data, meta_dict=self.meta_dict, verbose=verbose, preprocess_args=self.preprocess_args, ) self.sf_data = resample_galaxy_csv_data( self.sf_data, meta_dict=self.meta_dict, verbose=verbose ) self.sf_data = preprocess_galaxy_csv_data( self.sf_data, preprocess_args=self.preprocess_args, meta_dict=self.meta_dict, verbose=verbose, ) self.ml_data = calculate_minute_level_enmo( self.sf_data, self.meta_dict, verbose=verbose ) elif ( self.data_format == "binary" and self.data_type == "accelerometer" ): # Use binary processing functions for accelerometer data self.raw_data = read_galaxy_binary_data( self.galaxy_file_path, meta_dict=self.meta_dict, time_column=self.time_column, data_columns=self.data_columns, verbose=verbose, ) self.sf_data = filter_galaxy_binary_data( self.raw_data, meta_dict=self.meta_dict, verbose=verbose, preprocess_args=self.preprocess_args, ) self.sf_data = resample_galaxy_binary_data( self.sf_data, meta_dict=self.meta_dict, verbose=verbose ) self.sf_data = preprocess_galaxy_binary_data( self.sf_data, preprocess_args=self.preprocess_args, meta_dict=self.meta_dict, verbose=verbose, ) self.ml_data = calculate_minute_level_enmo( self.sf_data, self.meta_dict, verbose=verbose ) else: # This should not happen due to validation in __init__, but just in case raise ValueError( f"Unsupported combination: data_format='{self.data_format}', data_type='{self.data_type}'" )