Source code for core.data.import_data

#!/usr/bin/env python3
# -*- coding: utf-8 -*-


[docs]def import_data(file_name, time_column_index=None, mode='csv', header=True, room_name=None, tz=0): """ Load raw data from the disk. :type file_name: str :param file_name: the name of the raw data file :type time_column_index: int :param time_column_index: the column index for the timestamp in given raw data file :type mode: str :param mode: the format for raw data. Currently only support ``csv`` :type header: bool :param header: indicate whether the raw data contains a header on the first row. If ``False``, then assign unique index for each column :type room_name: str or None :param room_name: the name of the room. If ``None``, then assign unique number for the room :type tz: int :param tz: the time zone offset that need to fix in the raw data file :rtype: core.data.dataset.Dataset :return: The structured data set with one raw input data """ from csv import reader from dateutil.parser import parse from numpy import nan, asarray from .dataset import Dataset if mode == 'csv': with open(file_name, 'r') as input_file: csv_reader = reader(input_file, delimiter=',') feature_name = [] data = [] if header: feature_name = next(csv_reader)[:-1] for line in csv_reader: if not len(line): continue for i in range(len(line)): if i == time_column_index: line[i] = parse(line[i]).timestamp() + tz * 60 * 60 elif not len(line[i]): line[i] = nan else: try: line[i] = float(line[i]) except ValueError: line[i] = nan data.append(line) data = asarray(data, dtype=float) if not len(feature_name): feature_name = list(range(data.shape[1])) dataset = Dataset() dataset.add_room(data[:, :-1], occupancy=data[:, -1], header=False, room_name=room_name) dataset.set_feature_name(feature_name) dataset.time_column_index = time_column_index return dataset