Source code for core.preprocessing.downsample

#!/usr/bin/env python3
# -*- coding: utf-8 -*-


[docs]def downsample(dataset, target_frequency, algorithm="mean"): """ Downsampling the sampling frequency (decrease the number of rows) of given core.data.dataset.Dataset :parameter dataset: Dataset object that wants to downsample :type dataset: core.data.dataset.Dataset :parameter target_frequency: sampling frequency in second that the dataset wants to become :type target_frequency: int :parameter algorithm: downsampling algorithm. Only ``'mean'`` is available for now :type algorithm: str :return: None """ from ..data import Dataset from numpy import array, concatenate, full, nan, isnan, interp from pandas import DataFrame if not isinstance(dataset, Dataset): raise TypeError("Dataset has to be class core.data.dataset.Dataset") new_data = array([], dtype=float) new_data.shape = (0, len(dataset.feature_list)) new_occupancy = array([], dtype=float) new_occupancy.shape = (0, 1) rooms = dataset.room_list detail_room = dataset.room_mapping time_col = dataset.time_column_index for room in rooms: data, occupancy = dataset[room] data = concatenate((data, occupancy), axis=1) start_t = data[0, time_col] end_t = data[-1, time_col] edited_data = full([int((end_t - start_t) // target_frequency) + 1, new_data.shape[1] + 1], nan) data[:, time_col] = ((data[:, time_col] - start_t) // target_frequency).astype(int) if algorithm == "mean": df = DataFrame(data) df = df.groupby(time_col).mean() time = array(df.index, dtype=int) data = concatenate((array(df, dtype=float), full((time.shape[0], 1), 0)), axis=1) data[:, time_col + 1:] = data[:, time_col:-1] data[:, time_col] = time.astype(float) edited_data[time, :] = data edited_data = edited_data.T mask = ~isnan(edited_data) xp = mask.ravel().nonzero()[0] fp = edited_data[~isnan(edited_data)] x = isnan(edited_data).ravel().nonzero()[0] edited_data[isnan(edited_data)] = interp(x, xp, fp) edited_data = edited_data.T edited_data[:, time_col] = edited_data[:, time_col] * target_frequency + start_t detail_room[room] = (new_data.shape[0], new_data.shape[0] + edited_data.shape[0]) new_data = concatenate((new_data, edited_data[:, :-1]), axis=0) occupancy = edited_data[:, -1].round() occupancy.shape += (1,) new_occupancy = concatenate((new_occupancy, occupancy), axis=0) dataset.change_values(new_data) dataset.change_occupancy(new_occupancy) dataset.change_room_mapping(detail_room)