#!/usr/bin/env python3
# -*- coding: utf-8 -*-
from .model.superclass import *
from .evaluation.superclass import *
from .data.load_sample import load_sample
[docs]def easy_experiment(source,
target_test,
target_retrain=None,
domain_adaptive=False,
models="all",
binary_evaluation=True,
evaluation_metrics="all",
thread_num=4,
remove_time=True,
plot=False
):
"""
A function for researcher to fast test all models on one dataset and evaluate by all metrics
:parameter source: the source domain with full knowledge for training the model
:type source: core.data.dataset.Dataset
:parameter target_retrain: the labelled ground truth Dataset in the target domain for re-training the model
:type target_retrain: ``None`` or core.data.dataset.Dataset
:parameter target_test: the Dataset in the rest of the target domain for testing by using sensor data only
:type target_test: core.data.dataset.Dataset
:parameter domain_adaptive: indicate whether use normal supervised learning model
or domain-adaptive semi-supervised learning model
:type domain_adaptive: bool
:parameter binary_evaluation: indicate whether use binary evaluation metrics
or occupancy count metrics
:type binary_evaluation: bool
:parameter models: choose the models want to use in this experiment. If ``'all'`` then all model with
selected superclass will add to the experiment.
:type models: str, list(str)
:parameter evaluation_metrics: choose the evaluation metrics want to use in this experiment. If ``'all'`` then
all metrics with selected superclass will add to the experiment.
:type evaluation_metrics: str, list(str)
:parameter thread_num: the maximum number of threads can use to speed up
:type thread_num: int
:parameter remove_time: decide whether remove the time column when predicting occupancy level
:type remove_time: bool
:rtype: list(dict(str, dict(str, score)), dict(str, numpy.ndarray))
:return: first is the final score of the metrics by all models, and
second is the prediction result
"""
if domain_adaptive and target_retrain is None:
raise ValueError("Domain Adaptive model must have target_retrain dataset")
# test_time = target_test.data[:, target_test.time_column].flatten()
if remove_time:
if source.time_column_index is not None:
source.remove_feature([source.feature_mapping[source.time_column_index]])
if target_test.time_column_index is not None:
target_test.remove_feature([target_test.feature_mapping[target_test.time_column_index]])
if target_retrain is not None and target_retrain.time_column_index is not None:
target_retrain.remove_feature([target_retrain.feature_mapping[target_retrain.time_column_index]])
if domain_adaptive:
model = DomainAdaptiveModel(source, target_retrain, target_test, thread_num=thread_num)
else:
model = NormalModel(source, target_test, thread_num=thread_num)
if models == "all":
model.get_all_model()
else:
model.add_model(models)
results = model.run_all_model()
# if plot:
# plot_dict = dict()
# from pickle import dump
# for model in results:
# print(results[model].shape, test_time.shape)
# # plot_dict[model] = test_time[results[model].flatten() > 0]
# # with open(model, 'wb') as file:
# # dump(plot_dict[model], file)
# # dump(results[model], file)
# plot_dict["Truth"] = test_time[target_test.occupancy.flatten() > 0]
# with open("Truth", 'wb') as file:
# dump(plot_dict["Truth"], file)
#
# # plot_occupancy_distribution(plot_dict, orientation="horizontal",
# # evaluation=True, size=2, swarm=True)
total_result = dict()
for model_result in results.keys():
total_result[model_result] = dict()
if binary_evaluation:
metrics = BinaryEvaluation(results[model_result], target_test.occupancy)
else:
metrics = OccupancyEvaluation(results[model_result], target_test.occupancy)
if evaluation_metrics == "all":
metrics.get_all_metrics()
else:
metrics.add_metrics(evaluation_metrics)
total_result[model_result] = metrics.run_all_metrics()
return total_result, results
[docs]def easy_set_experiment(source_set,
target_test_set=None,
split_percentage=0.8,
target_retrain=None,
domain_adaptive=False,
models="all",
binary_evaluation=True,
evaluation_metrics="all",
thread_num=4,
remove_time=True,
plot=False):
"""
A function for researcher to fast test all models on all dataset and evaluate by all metrics.
Please make sure all keys in *source_set*, *target_test_set*, and *target_retrain* are the same
:parameter source_set: the set of source domain with full knowledge for training the model
:type source_set: dict(str, core.data.dataset.Dataset)
:parameter target_retrain: the labelled ground truth Dataset in the target domain for re-training the model
:type target_retrain: ``None`` or dict(str, core.data.dataset.Dataset)
:parameter target_test_set: the set of Datasets in the rest of the target domain for
testing by using sensor data only. If ``None`` then split source domain to get
new source domain and target domain
:type target_test_set: dict(str, core.data.dataset.Dataset)
:parameter split_percentage: percentage of the row in the first part
:type split_percentage: float
:parameter domain_adaptive: indicate whether use normal supervised learning model
or domain-adaptive semi-supervised learning model
:type domain_adaptive: bool
:parameter binary_evaluation: indicate whether use binary evaluation metrics
or occupancy count metrics
:type binary_evaluation: bool
:parameter models: choose the models want to use in this experiment. If ``'all'`` then all model with
selected superclass will add to the experiment.
:type models: str, list(str)
:parameter evaluation_metrics: choose the evaluation metrics want to use in this experiment. If ``'all'`` then
all metrics with selected superclass will add to the experiment.
:type evaluation_metrics: str, list(str)
:parameter thread_num: the maximum number of threads can use to speed up
:type thread_num: int
:parameter remove_time: decide whether remove the time column when predicting occupancy level
:type remove_time: bool
:parameter plot: unused
:type plot: bool
:rtype: list(dict(str, dict(str, dict(str, score))), dict(str, dict(str, numpy.ndarray)))
:return: first is the final score of the metrics by all Datasets, all models, and
second is the prediction result
"""
if source_set == "all":
source_set = load_sample(source_set)
elif not isinstance(source_set, dict):
raise TypeError("Source must be 'all' or a dictionary")
if target_retrain is None:
target_retrain = dict()
elif not isinstance(target_retrain, dict):
raise TypeError("Target_retrain must be None or a dictionary")
if target_test_set is None:
target_test_set = dict()
elif not isinstance(target_test_set, dict):
raise TypeError("Target_test_set must be None or a dictionary")
results = dict()
pred = dict()
for dataset in source_set.keys():
if target_test_set.get(dataset, None) is None:
source, target = source_set[dataset].split(split_percentage)
results[dataset], pred[dataset] = easy_experiment(source,
target,
target_retrain=target_retrain.get(dataset, None),
domain_adaptive=domain_adaptive,
models=models,
binary_evaluation=binary_evaluation,
evaluation_metrics=evaluation_metrics,
thread_num=thread_num,
remove_time=remove_time,
plot=plot)
else:
results[dataset], pred[dataset] = easy_experiment(source_set[dataset],
target_test_set[dataset],
target_retrain=target_retrain.get(dataset, None),
domain_adaptive=domain_adaptive,
models=models,
binary_evaluation=binary_evaluation,
evaluation_metrics=evaluation_metrics,
thread_num=thread_num,
remove_time=remove_time,
plot=plot)
return results, pred