torchcnnbuilder.preprocess

View Source

1from torchcnnbuilder.preprocess._dynamic_window import (
2    multi_output_tensor,
3    single_output_tensor,
4)
5
6__all__ = [
7    "single_output_tensor",
8    "multi_output_tensor",
9]

def single_output_tensor( data: Sequence[Any], forecast_len: int, additional_x: Optional[Sequence[Any]] = None, additional_is_array: bool = False, additional_x_stack: bool = True, threshold: Union[bool, float] = False, x_binarize: bool = False) -> torch.utils.data.dataset.TensorDataset: View Source

 8def single_output_tensor(
 9    data: Sequence[Any],
10    forecast_len: int,
11    additional_x: Union[Sequence[Any], None] = None,
12    additional_is_array: bool = False,
13    additional_x_stack: bool = True,
14    threshold: Union[bool, float] = False,
15    x_binarize: bool = False,
16) -> TensorDataset:
17    """
18    Preprocesses a time series into a tensor with input (X) and output (Y) parts for single-step predictions.
19    See the tensor transformation diagram below:
20
21    ![image](../../.docs/media/single_output_tensor.png)
22
23    This function prepares input and output tensors for training a model with a given forecast length. Additional
24    optional data can be provided to expand the input features with extra time series.
25
26    Args:
27        data (Sequence[Any]): The time series data in an N-dimensional format (e.g., list, numpy array, or tensor).
28        forecast_len (int): Number of time steps for each target output tensor.
29        additional_x (Union[Sequence[Any], None], optional): Extra data to add as additional input features.
30            Defaults to None.
31        additional_is_array (bool, optional): If True, treats `additional_x` as a collection of separate
32            time series. Defaults to False.
33        additional_x_stack (bool, optional): If True, stacks `additional_x` features onto input data (X).
34            If False and `additional_is_array` is also False, `additional_x` will be returned separately.
35            Defaults to True.
36        threshold (Union[bool, float], optional): Threshold for binarizing output tensor (Y). If set to a float,
37            values above the threshold are set to 1, and values below are set to 0. Defaults to False.
38        x_binarize (bool, optional): If True, applies the same binarization as `threshold` to the input tensor (X).
39            Defaults to False.
40
41    Returns:
42       A dataset containing the input and output tensors for training.
43
44    Raises:
45        ValueError: If `forecast_len` is greater than the data length.
46        TypeError: If the types of `data` or `additional_x` are unsupported.
47    """
48    tensors = torch.Tensor(data)
49
50    y_train_tensor = tensors[-forecast_len:]
51    x_train_tensor = tensors[:-forecast_len]
52
53    if additional_x is not None:
54        additional_x = torch.Tensor(additional_x)
55
56        if not additional_is_array:
57            extra_x_train_tensor = additional_x[:-forecast_len]
58            if additional_x_stack:
59                x_train_tensor = torch.stack([x_train_tensor, extra_x_train_tensor], dim=1)
60
61        else:
62            channels = [x_train_tensor]
63            for array in additional_x:
64                x_array = array[:-forecast_len]
65                channels.append(x_array)
66
67            x_train_tensor = torch.stack(channels, dim=1)
68
69    if not isinstance(threshold, bool):
70        y_train_tensor[y_train_tensor > threshold] = 1
71        y_train_tensor[y_train_tensor <= threshold] = 0
72
73        if x_binarize:
74            x_train_tensor[x_train_tensor > threshold] = 1
75            x_train_tensor[x_train_tensor <= threshold] = 0
76
77            if additional_x is not None and not additional_is_array and not additional_x_stack:
78                extra_x_train_tensor[extra_x_train_tensor > threshold] = 1
79                extra_x_train_tensor[extra_x_train_tensor <= threshold] = 0
80
81    if additional_x is not None and not additional_is_array and not additional_x_stack:
82        return TensorDataset(x_train_tensor[None], extra_x_train_tensor[None], y_train_tensor[None])
83
84    return TensorDataset(x_train_tensor[None], y_train_tensor[None])

Preprocesses a time series into a tensor with input (X) and output (Y) parts for single-step predictions. See the tensor transformation diagram below:

This function prepares input and output tensors for training a model with a given forecast length. Additional optional data can be provided to expand the input features with extra time series.

Arguments:

data (Sequence[Any]): The time series data in an N-dimensional format (e.g., list, numpy array, or tensor).
forecast_len (int): Number of time steps for each target output tensor.
additional_x (Union[Sequence[Any], None], optional): Extra data to add as additional input features. Defaults to None.
additional_is_array (bool, optional): If True, treats additional_x as a collection of separate time series. Defaults to False.
additional_x_stack (bool, optional): If True, stacks additional_x features onto input data (X). If False and additional_is_array is also False, additional_x will be returned separately. Defaults to True.
threshold (Union[bool, float], optional): Threshold for binarizing output tensor (Y). If set to a float, values above the threshold are set to 1, and values below are set to 0. Defaults to False.
x_binarize (bool, optional): If True, applies the same binarization as threshold to the input tensor (X). Defaults to False.

Returns:

A dataset containing the input and output tensors for training.

Raises:

ValueError: If forecast_len is greater than the data length.
TypeError: If the types of data or additional_x are unsupported.

def multi_output_tensor( data: Sequence[Any], forecast_len: int, pre_history_len: int, additional_x: Optional[Sequence[Any]] = None, additional_is_array: bool = False, additional_x_stack: bool = True, threshold: Union[bool, float] = False, x_binarize: bool = False) -> torch.utils.data.dataset.TensorDataset: View Source

 87def multi_output_tensor(
 88    data: Sequence[Any],
 89    forecast_len: int,
 90    pre_history_len: int,
 91    additional_x: Union[Sequence[Any], None] = None,
 92    additional_is_array: bool = False,
 93    additional_x_stack: bool = True,
 94    threshold: Union[bool, float] = False,
 95    x_binarize: bool = False,
 96) -> TensorDataset:
 97    """
 98    Preprocesses a time series into a sliding-window tensor with input (X) and output (Y)
 99    parts for multi-step predictions. See the tensor transformation diagram below:
100
101    ![image](../../.docs/media/single_output_tensor.png)
102
103    This function prepares input and output tensors for training a model with a given forecast length. Additional
104    optional data can be provided to expand the input features with extra time series.
105
106    Args:
107        data (Sequence[Any]): Time series data in an N-dimensional format (e.g., list, numpy array, or tensor).
108        forecast_len (int): Number of time steps for each target output tensor.
109        pre_history_len (int): Length of the time window for input tensors.
110        additional_x (Union[Sequence[Any], None], optional): Additional input data to augment features.
111            Defaults to None.
112        additional_is_array (bool, optional): If True, treats `additional_x` as separate time series. Defaults to False.
113        additional_x_stack (bool, optional): If True, stacks `additional_x` features onto input data (X). If False
114            and `additional_is_array` is also False, `additional_x` is returned separately. Defaults to True.
115        threshold (Union[bool, float], optional): Threshold for binarizing the output tensor (Y). If set to a float,
116            values above the threshold are set to 1, and values below are set to 0. Defaults to False.
117        x_binarize (bool, optional): If True, applies binarization to the input tensor (X) as per `threshold`.
118            Defaults to False.
119
120    Returns:
121        A dataset containing input and output tensors for training.
122
123    Raises:
124        ValueError: If `forecast_len` or `pre_history_len` exceed the data length.
125        TypeError: If the types of `data` or `additional_x` are unsupported.
126    """
127    tensors = torch.Tensor(data)
128
129    x_train_list, y_train_list = [], []
130
131    if additional_x is not None:
132        if additional_is_array:
133            additional_x = torch.stack(list(map(torch.Tensor, additional_x)))
134        else:
135            additional_x = torch.Tensor(additional_x)
136        extra_x_train_list = []
137
138    for i in range(tensors.shape[0] - forecast_len - pre_history_len):
139        x = tensors[i : i + pre_history_len]
140        y = tensors[i + pre_history_len : i + pre_history_len + forecast_len]
141
142        if additional_x is not None:
143            if not additional_is_array:
144                extra_x = additional_x[i : i + pre_history_len]
145                if additional_x_stack:
146                    x = torch.stack([x, extra_x], dim=1)
147                else:
148                    extra_x_train_list.append(extra_x)
149
150            else:
151                channels = [x]
152                for array in additional_x:
153                    x_array = array[i : i + pre_history_len]
154                    channels.append(x_array)
155                x = torch.stack(channels, dim=1)
156
157        if not isinstance(threshold, bool):
158            y[y > threshold] = 1
159            y[y <= threshold] = 0
160
161            if x_binarize:
162                x[x > threshold] = 1
163                x[x <= threshold] = 0
164
165                if additional_x is not None and not additional_is_array and not additional_x_stack:
166                    extra_x[extra_x > threshold] = 1
167                    extra_x[extra_x <= threshold] = 0
168
169        x_train_list.append(x)
170        y_train_list.append(y)
171
172    x_train_tensor, y_train_tensor = torch.stack(x_train_list), torch.stack(y_train_list)
173
174    if additional_x is not None and not additional_is_array and not additional_x_stack:
175        extra_x_train_tensor = torch.stack(extra_x_train_list)
176        return TensorDataset(x_train_tensor, extra_x_train_tensor, y_train_tensor)
177
178    return TensorDataset(x_train_tensor, y_train_tensor)

Preprocesses a time series into a sliding-window tensor with input (X) and output (Y) parts for multi-step predictions. See the tensor transformation diagram below:

This function prepares input and output tensors for training a model with a given forecast length. Additional optional data can be provided to expand the input features with extra time series.

Arguments:

data (Sequence[Any]): Time series data in an N-dimensional format (e.g., list, numpy array, or tensor).
forecast_len (int): Number of time steps for each target output tensor.
pre_history_len (int): Length of the time window for input tensors.
additional_x (Union[Sequence[Any], None], optional): Additional input data to augment features. Defaults to None.
additional_is_array (bool, optional): If True, treats additional_x as separate time series. Defaults to False.
additional_x_stack (bool, optional): If True, stacks additional_x features onto input data (X). If False and additional_is_array is also False, additional_x is returned separately. Defaults to True.
threshold (Union[bool, float], optional): Threshold for binarizing the output tensor (Y). If set to a float, values above the threshold are set to 1, and values below are set to 0. Defaults to False.
x_binarize (bool, optional): If True, applies binarization to the input tensor (X) as per threshold. Defaults to False.

Returns:

A dataset containing input and output tensors for training.

Raises:

ValueError: If forecast_len or pre_history_len exceed the data length.
TypeError: If the types of data or additional_x are unsupported.