--- title: External data keywords: fastai sidebar: home_sidebar summary: "Helper functions used to download and extract common time series datasets." description: "Helper functions used to download and extract common time series datasets." nb_path: "nbs/012_data.external.ipynb" ---
from fastai.data.transforms import get_files
PATH = Path('.')
dsids = ['ECGFiveDays', 'AtrialFibrillation'] # univariate and multivariate
for dsid in dsids:
print(dsid)
tgt_dir = PATH/f'data/UCR/{dsid}'
if os.path.isdir(tgt_dir): shutil.rmtree(tgt_dir)
test_eq(len(get_files(tgt_dir)), 0) # no file left
X_train, y_train, X_valid, y_valid = get_UCR_data(dsid)
test_eq(len(get_files(tgt_dir, '.npy')), 6)
test_eq(len(get_files(tgt_dir, '.npy')), len(get_files(tgt_dir))) # test no left file/ dir
del X_train, y_train, X_valid, y_valid
start = time.time()
X_train, y_train, X_valid, y_valid = get_UCR_data(dsid)
elapsed = time.time() - start
test_eq(elapsed < 1, True)
test_eq(X_train.ndim, 3)
test_eq(y_train.ndim, 1)
test_eq(X_valid.ndim, 3)
test_eq(y_valid.ndim, 1)
test_eq(len(get_files(tgt_dir, '.npy')), 6)
test_eq(len(get_files(tgt_dir, '.npy')), len(get_files(tgt_dir))) # test no left file/ dir
test_eq(X_train.ndim, 3)
test_eq(y_train.ndim, 1)
test_eq(X_valid.ndim, 3)
test_eq(y_valid.ndim, 1)
test_eq(X_train.dtype, np.float32)
test_eq(X_train.__class__.__name__, 'memmap')
del X_train, y_train, X_valid, y_valid
X_train, y_train, X_valid, y_valid = get_UCR_data(dsid, on_disk=False)
test_eq(X_train.__class__.__name__, 'ndarray')
del X_train, y_train, X_valid, y_valid
X_train, y_train, X_valid, y_valid = get_UCR_data('natops')
dsid = 'natops'
X_train, y_train, X_valid, y_valid = get_UCR_data(dsid, verbose=True)
X, y, splits = get_UCR_data(dsid, split_data=False)
test_eq(X[splits[0]], X_train)
test_eq(y[splits[1]], y_valid)
test_eq(X[splits[0]], X_train)
test_eq(y[splits[1]], y_valid)
test_type(X, X_train)
test_type(y, y_train)
dsid = 'ECGFiveDays'
X, y, splits = get_UCR_data(dsid, split_data=False, on_disk=False, force_download=False)
check_data(X, y, splits)
check_data(X[:, 0], y, splits)
y = y.astype(np.float32)
check_data(X, y, splits)
y[:10] = np.nan
check_data(X[:, 0], y, splits)
X, y, splits = get_UCR_data(dsid, split_data=False, on_disk=False, force_download=False)
splits = get_splits(y, 3)
check_data(X, y, splits)
check_data(X[:, 0], y, splits)
y[:5]= np.nan
check_data(X[:, 0], y, splits)
X, y, splits = get_UCR_data(dsid, split_data=False, on_disk=False, force_download=False)
dsid = "Covid3Month"
X_train, y_train, X_valid, y_valid = get_Monash_regression_data(dsid, on_disk=False, split_data=True, force_download=False)
X, y, splits = get_Monash_regression_data(dsid, on_disk=True, split_data=False, force_download=False, verbose=True)
if X_train is not None:
test_eq(X_train.shape, (140, 1, 84))
if X is not None:
test_eq(X.shape, (201, 1, 84))
ts = get_forecasting_time_series("sunspots", force_download=False)
test_eq(len(ts), 3235)
ts
ts = get_forecasting_time_series("weather", force_download=False)
if ts is not None:
test_eq(len(ts), 70091)
print(ts)
dsid = 'm1_yearly_dataset'
X = get_Monash_forecasting_data(dsid, force_download=False)
if X is not None:
test_eq(X.shape, (181, 1, 58))