Source code for core.stats.dropout_rate

#!/usr/bin/env python3
# -*- coding: utf-8 -*-


[docs]def dropout_rate(dataset, dataset_level=False): """ Compute the dropout rate for a given dataset. Dropout rate is the percent of rows that is invalid :parameter dataset: Dataset object that want to compute the dropout rate. The dropout rate is the percentage of data points missing in Dataset :type dataset: core.data.dataset.Dataset :parameter dataset_level: decide the result is separate for each room in room_list or combine for the whole dataset together :type dataset_level: bool :rtype: str or dict(str, str) :return: the room name with its corresponding dropout rate """ from core.data.dataset import Dataset from numpy import isnan if not isinstance(dataset, Dataset): raise TypeError("Dataset has to be class core.data.dataset.Dataset") if dataset_level: data = dataset.data drop_row = ((~isnan(data)).sum(axis=1) != data.shape[1]).sum() return drop_row / data.shape[0] else: result = {} rooms = dataset.room_list for room in rooms: data = dataset[room][0] drop_row = ((~isnan(data)).sum(axis=1) != data.shape[1]).sum() result[room] = drop_row / data.shape[0] return result