diff --git a/.devcontainer/Dockerfile b/.devcontainer/Dockerfile
new file mode 100644
index 0000000..42b35de
--- /dev/null
+++ b/.devcontainer/Dockerfile
@@ -0,0 +1,71 @@
+# Base stage: Build your environment with CUDA and Python dependencies
+FROM nvidia/cuda:12.8.0-devel-ubuntu24.04 AS base
+
+# Set environment variables for CUDA and Python
+ENV PYTHONUNBUFFERED=1 \
+    PYTHONDONTWRITEBYTECODE=1 \
+    DEBIAN_FRONTEND=noninteractive \
+    NVIDIA_VISIBLE_DEVICES=all \
+    NVIDIA_DRIVER_CAPABILITIES=compute,utility
+
+# Install required packages
+RUN apt-get update \
+    && apt-get upgrade -y \
+    && apt-get install -y --no-install-recommends \
+        nano \
+        git \
+        apt-utils \
+        sudo \
+        wget \
+        python3-dev \
+        python3-pip \
+        python3-venv \
+        build-essential \
+        libnss3 \
+        libatk-bridge2.0-0 \
+        libatk1.0-0 \
+        libxcomposite1 \
+        libxdamage1 \
+        libxrandr2 \
+        libgtk-3-0 \
+        libgbm1 \
+        xvfb \
+        libpq-dev \
+        build-essential \
+    && rm -rf /var/lib/apt/lists/*
+
+# Ensure "python" points to "python3"
+RUN ln -s /usr/bin/python3 /usr/bin/python
+
+# Upgrade pip and install TensorFlow with GPU support
+RUN pip install --break-system-packages --upgrade --ignore-installed pip setuptools wheel \
+&& pip install --break-system-packages tensorflow[and-cuda]==2.19.0
+
+# Define user parameters
+ARG USERNAME=vscode
+ARG USER_UID=1001
+ARG USER_GID=1001
+
+# Create group and user if they don't exist
+RUN if ! getent group $USER_GID; then groupadd --gid $USER_GID $USERNAME; fi && \
+if ! id -u $USERNAME >/dev/null 2>&1; then \
+useradd -s /bin/bash --uid $USER_UID --gid $USER_GID -m $USERNAME; \
+fi && \
+echo "$USERNAME ALL=(root) NOPASSWD:ALL" > /etc/sudoers.d/$USERNAME && \
+chmod 0440 /etc/sudoers.d/$USERNAME
+
+# Set working directory and default user in base stage
+WORKDIR /home/$USERNAME
+USER $USERNAME
+
+# Final stage: Use the base image with all modifications
+FROM base AS dev_containers_target_stage
+
+# Explicitly set the user and working directory (this ensures the user entry is present)
+USER vscode
+WORKDIR /home/vscode
+
+# Disable the warning about pip being installed in a system directory (this is a workaround for the warning) use only inside dockerfile
+RUN python -m pip config set global.break-system-packages true
+
+CMD ["/bin/bash"]
\ No newline at end of file
diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json
new file mode 100644
index 0000000..a53f3f1
--- /dev/null
+++ b/.devcontainer/devcontainer.json
@@ -0,0 +1,32 @@
+{
+    "name": "Python 3",
+    "context": "..",
+    "dockerFile": "Dockerfile",
+    
+    "remoteUser": "vscode",
+    
+    "customizations": {
+        "env": {
+            "PYTHONPATH": "/workspace:${PYTHONPATH}"
+        },
+        "vscode": {
+            "extensions": [
+                "ms-python.python",
+                "mhutchie.git-graph",
+                "vscode-icons-team.vscode-icons"
+            ]
+        }
+    },
+
+    // Add the following lines to enable GPU
+    "runArgs": [
+        "--gpus", "all"  // Explicitly target GPU 1
+    ],
+
+    // Ensures NVIDIA runtime is used
+    "hostRequirements": {
+        "gpu": true
+    }
+	// Uncomment the next line to run commands after the container is created.
+	// "postCreateCommand": "bash .devcontainer/setup.sh"
+}
\ No newline at end of file
diff --git a/.gitignore b/.gitignore
index e665169..7ccf541 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,7 +1,7 @@
 __pycache__
 *.egg-info
 *.pyc
-venv
+venv*
 
 Datasets/*
 Models/*
diff --git a/CHANGELOG.md b/CHANGELOG.md
index b9988a4..8707d4c 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,3 +1,21 @@
+## [2.0.0.dev1] - 2025-03-31
+### Added
+- New `use_cache` parameter in the handwriting recognition tutorial
+- Type annotations and metadata handling in various callbacks and preprocessors
+- Added .devcointainer configuration for VSCode, enabling a consistent Docker development environment
+
+### Changed
+- Version bump from 1.2.5 to 2.0.0 (updating mltu compatibility with TensorFlow 2.19)
+- `.gitignore` updated to include `venv*`
+- Tutorials now assert `mltu` must be version 1.2.5 or lower for backward compatibility
+- Training configs updated (e.g., batch size from 16 to 128)
+- Code transitioned from `tf.keras` to core `keras` modules
+- Data providers updated with multiprocessing and other kwargs
+- Various logging, import handling, and minor bug fixes throughout the codebase
+
+### TODO
+- Remove `# pyright: ignore` comments, that temporarily suppress type checking errors
+
 ## [1.2.5] - 2024-05-04
 ### Added
 - Added exception in `mltu.dataProvider.DataProvider` to raise ValueError when dataset is not iterable
diff --git a/Tutorials/01_image_to_word/train.py b/Tutorials/01_image_to_word/train.py
index 2fd214a..7b358dd 100644
--- a/Tutorials/01_image_to_word/train.py
+++ b/Tutorials/01_image_to_word/train.py
@@ -1,3 +1,7 @@
+# For this to work you need to install mltu version 1.2.5 or lower
+import mltu
+assert mltu.__version__ == "1.2.5", "mltu version must be 1.2.5 or lower"
+
 import os
 from tqdm import tqdm
 import tensorflow as tf
diff --git a/Tutorials/02_captcha_to_text/train.py b/Tutorials/02_captcha_to_text/train.py
index 4ceb03e..86967c2 100644
--- a/Tutorials/02_captcha_to_text/train.py
+++ b/Tutorials/02_captcha_to_text/train.py
@@ -1,3 +1,7 @@
+# For this to work you need to install mltu version 1.2.5 or lower
+import mltu
+assert mltu.__version__ == "1.2.5", "mltu version must be 1.2.5 or lower"
+
 import tensorflow as tf
 try: [tf.config.experimental.set_memory_growth(gpu, True) for gpu in tf.config.experimental.list_physical_devices("GPU")]
 except: pass
diff --git a/Tutorials/03_handwriting_recognition/configs.py b/Tutorials/03_handwriting_recognition/configs.py
index db01bf6..06b6680 100644
--- a/Tutorials/03_handwriting_recognition/configs.py
+++ b/Tutorials/03_handwriting_recognition/configs.py
@@ -11,7 +11,7 @@ def __init__(self):
         self.height = 32
         self.width = 128
         self.max_text_length = 0
-        self.batch_size = 16
-        self.learning_rate = 0.0005
+        self.batch_size = 128
+        self.learning_rate = 0.001
         self.train_epochs = 1000
         self.train_workers = 20
\ No newline at end of file
diff --git a/Tutorials/03_handwriting_recognition/train.py b/Tutorials/03_handwriting_recognition/train.py
index 2a00f29..b359526 100644
--- a/Tutorials/03_handwriting_recognition/train.py
+++ b/Tutorials/03_handwriting_recognition/train.py
@@ -4,6 +4,10 @@
 
 from keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau, TensorBoard
 
+# For this to work you need to install mltu version 1.2.5 or lower
+import mltu
+assert mltu.__version__ == "1.2.5", "mltu version must be 1.2.5 or lower"
+
 from mltu.preprocessors import ImageReader
 from mltu.transformers import ImageResizer, LabelIndexer, LabelPadding, ImageShowCV2
 from mltu.augmentors import RandomBrightness, RandomRotate, RandomErodeDilate, RandomSharpen
@@ -88,6 +92,7 @@ def download_and_unzip(url, extract_to="Datasets", chunk_size=1024*1024):
         LabelIndexer(configs.vocab),
         LabelPadding(max_word_length=configs.max_text_length, padding_value=len(configs.vocab)),
         ],
+        use_cache=True
 )
 
 # Split the dataset into training and validation sets
diff --git a/Tutorials/04_sentence_recognition/train.py b/Tutorials/04_sentence_recognition/train.py
index 0bce0f7..b8d7c29 100644
--- a/Tutorials/04_sentence_recognition/train.py
+++ b/Tutorials/04_sentence_recognition/train.py
@@ -1,3 +1,7 @@
+# For this to work you need to install mltu version 1.2.5 or lower
+import mltu
+assert mltu.__version__ == "1.2.5", "mltu version must be 1.2.5 or lower"
+
 import tensorflow as tf
 try: [tf.config.experimental.set_memory_growth(gpu, True) for gpu in tf.config.experimental.list_physical_devices("GPU")]
 except: pass
diff --git a/Tutorials/05_sound_to_text/train.py b/Tutorials/05_sound_to_text/train.py
index 91ad68e..fe941cd 100644
--- a/Tutorials/05_sound_to_text/train.py
+++ b/Tutorials/05_sound_to_text/train.py
@@ -1,3 +1,7 @@
+# For this to work you need to install mltu version 1.2.5 or lower
+import mltu
+assert mltu.__version__ == "1.2.5", "mltu version must be 1.2.5 or lower"
+
 import tensorflow as tf
 try: [tf.config.experimental.set_memory_growth(gpu, True) for gpu in tf.config.experimental.list_physical_devices("GPU")]
 except: pass
diff --git a/Tutorials/06_pytorch_introduction/train.py b/Tutorials/06_pytorch_introduction/train.py
index f5155f0..93eb0ff 100644
--- a/Tutorials/06_pytorch_introduction/train.py
+++ b/Tutorials/06_pytorch_introduction/train.py
@@ -1,3 +1,7 @@
+# For this to work you need to install mltu version 1.2.5 or lower
+import mltu
+assert mltu.__version__ == "1.2.5", "mltu version must be 1.2.5 or lower"
+
 import os
 import cv2
 import numpy as np
diff --git a/Tutorials/07_pytorch_wrapper/train.py b/Tutorials/07_pytorch_wrapper/train.py
index 9f17c85..32250a9 100644
--- a/Tutorials/07_pytorch_wrapper/train.py
+++ b/Tutorials/07_pytorch_wrapper/train.py
@@ -1,3 +1,7 @@
+# For this to work you need to install mltu version 1.2.5 or lower
+import mltu
+assert mltu.__version__ == "1.2.5", "mltu version must be 1.2.5 or lower"
+
 import os
 import numpy as np
 import requests, gzip, os, hashlib
diff --git a/Tutorials/08_handwriting_recognition_torch/train_torch.py b/Tutorials/08_handwriting_recognition_torch/train_torch.py
index 8cf185e..b80e5cb 100644
--- a/Tutorials/08_handwriting_recognition_torch/train_torch.py
+++ b/Tutorials/08_handwriting_recognition_torch/train_torch.py
@@ -1,3 +1,7 @@
+# For this to work you need to install mltu version 1.2.5 or lower
+import mltu
+assert mltu.__version__ == "1.2.5", "mltu version must be 1.2.5 or lower"
+
 import os
 import tarfile
 from tqdm import tqdm
diff --git a/Tutorials/09_translation_transformer/train.py b/Tutorials/09_translation_transformer/train.py
index 0658a13..175e4ef 100644
--- a/Tutorials/09_translation_transformer/train.py
+++ b/Tutorials/09_translation_transformer/train.py
@@ -1,3 +1,7 @@
+# For this to work you need to install mltu version 1.2.5 or lower
+import mltu
+assert mltu.__version__ == "1.2.5", "mltu version must be 1.2.5 or lower"
+
 import numpy as np
 
 import tensorflow as tf
diff --git a/Tutorials/10_wav2vec2_torch/train.py b/Tutorials/10_wav2vec2_torch/train.py
index eb6c68c..0b8e636 100644
--- a/Tutorials/10_wav2vec2_torch/train.py
+++ b/Tutorials/10_wav2vec2_torch/train.py
@@ -1,3 +1,7 @@
+# For this to work you need to install mltu version 1.2.5 or lower
+import mltu
+assert mltu.__version__ == "1.2.5", "mltu version must be 1.2.5 or lower"
+
 import os
 import tarfile
 import pandas as pd
diff --git a/Tutorials/10_wav2vec2_torch/train_tf.py b/Tutorials/10_wav2vec2_torch/train_tf.py
index 93c8fb2..08bd1dc 100644
--- a/Tutorials/10_wav2vec2_torch/train_tf.py
+++ b/Tutorials/10_wav2vec2_torch/train_tf.py
@@ -1,3 +1,7 @@
+# For this to work you need to install mltu version 1.2.5 or lower
+import mltu
+assert mltu.__version__ == "1.2.5", "mltu version must be 1.2.5 or lower"
+
 import tensorflow as tf
 try:
     [
diff --git a/Tutorials/12_handwriting_recognition_2/configs.py b/Tutorials/12_handwriting_recognition_2/configs.py
new file mode 100644
index 0000000..612e442
--- /dev/null
+++ b/Tutorials/12_handwriting_recognition_2/configs.py
@@ -0,0 +1,17 @@
+import os
+from datetime import datetime
+
+from mltu.configs import BaseModelConfigs
+
+class ModelConfigs(BaseModelConfigs):
+    def __init__(self):
+        super().__init__()
+        self.model_path = os.path.join("Models/12_handwriting_recognition_2", datetime.strftime(datetime.now(), "%Y%m%d%H%M"))
+        self.vocab = ""
+        self.height = 32
+        self.width = 128
+        self.max_text_length = 0
+        self.batch_size = 128
+        self.learning_rate = 0.002
+        self.train_epochs = 1000
+        self.train_workers = 20
\ No newline at end of file
diff --git a/Tutorials/12_handwriting_recognition_2/data_utils.py b/Tutorials/12_handwriting_recognition_2/data_utils.py
new file mode 100644
index 0000000..ab5e923
--- /dev/null
+++ b/Tutorials/12_handwriting_recognition_2/data_utils.py
@@ -0,0 +1,127 @@
+"""
+Data utilities for downloading, unzipping, and processing handwriting datasets.
+"""
+
+import os
+import concurrent.futures
+from tqdm import tqdm
+from urllib.request import urlopen
+from io import BytesIO
+from zipfile import ZipFile
+from typing import Optional, Tuple, List, Set
+
+def download_and_unzip(url: str, extract_to: str = "Datasets", chunk_size: int = 1024 * 1024) -> None:
+    """
+    Downloads a ZIP file from a URL and extracts it to the specified location.
+
+    Args:
+        url (str): The URL of the ZIP file to download.
+        extract_to (str): The directory to extract files to. Defaults to "Datasets".
+        chunk_size (int): The size (in bytes) of each chunk to read from the URL.
+            Defaults to 1024 * 1024.
+
+    Raises:
+        ConnectionError: If the URL cannot be opened.
+        OSError: If the ZIP file cannot be opened or extracted.
+    """
+    try:
+        http_response = urlopen(url)
+    except Exception as e:
+        raise ConnectionError(f"Failed to open URL: {url}") from e
+
+    if not hasattr(http_response, "length") or http_response.length is None:
+        raise OSError("Could not determine file size for download.")
+
+    iterations = (http_response.length // chunk_size) + 1
+    chunks = []
+    for _ in tqdm(range(iterations), desc="Downloading"):
+        chunks.append(http_response.read(chunk_size))
+
+    data = b"".join(chunks)
+    try:
+        zipfile = ZipFile(BytesIO(data))
+        zipfile.extractall(path=extract_to)
+    except Exception as e:
+        raise OSError("Failed to extract ZIP file.") from e
+
+
+def process_line(line: str, dataset_path: str) -> Optional[Tuple[str, str]]:
+    """
+    Processes a single line from the dataset metadata file to extract
+    the image path and label.
+
+    Args:
+        line (str): A single line containing image metadata.
+        dataset_path (str): The base path of the dataset.
+
+    Returns:
+        Optional[Tuple[str, str]]:
+            A tuple of (relative_path, label) if valid, otherwise None.
+    """
+    if line.startswith("#"):
+        return None
+
+    line_split = line.split(" ")
+    if len(line_split) < 2 or line_split[1] == "err":
+        return None
+
+    folder1 = line_split[0][:3]
+    folder2 = "-".join(line_split[0].split("-")[:2])
+    file_name = line_split[0] + ".png"
+    label = line_split[-1].rstrip("\n")
+
+    rel_path = os.path.join(dataset_path, "words", folder1, folder2, file_name)
+    if not os.path.exists(rel_path):
+        return None
+
+    return rel_path, label
+
+
+def load_dataset(dataset_path: str) -> Tuple[List[List[str]], Set[str], int]:
+    """
+    Loads the dataset by reading 'words.txt' in the specified folder,
+    processing each line, and collating results.
+
+    Args:
+        dataset_path (str): The path to the dataset folder containing 'words.txt'.
+
+    Returns:
+        Tuple[List[List[str]], Set[str], int]:
+            A tuple containing:
+            - dataset (List[List[str]]): A list of [rel_path, label] entries.
+            - vocab (Set[str]): A set of unique characters found in labels.
+            - max_len (int): The longest label length.
+    """
+    dataset: List[List[str]] = []
+    vocab: Set[str] = set()
+    max_len: int = 0
+
+    words_file = os.path.join(dataset_path, "words.txt")
+    if not os.path.exists(words_file):
+        raise FileNotFoundError(f"Could not find 'words.txt' at {words_file}")
+
+    with open(words_file, "r", encoding="utf-8") as file_obj:
+        lines = file_obj.readlines()
+        # Reduced dataset size for demonstration; remove slicing as needed.
+        lines = lines[:1000]
+
+    with concurrent.futures.ThreadPoolExecutor(max_workers=os.cpu_count()) as executor:
+        results = list(
+            tqdm(
+                executor.map(
+                    lambda ln: process_line(ln, dataset_path), lines
+                ),
+                total=len(lines),
+                desc="Processing lines"
+            )
+        )
+
+    for r in results:
+        if r is None:
+            continue
+        rel_path, label = r
+        dataset.append([rel_path, label])
+        vocab.update(list(label))
+        max_len = max(max_len, len(label))
+
+    return dataset, vocab, max_len
\ No newline at end of file
diff --git a/Tutorials/12_handwriting_recognition_2/inferenceModel.py b/Tutorials/12_handwriting_recognition_2/inferenceModel.py
new file mode 100644
index 0000000..c4dd919
--- /dev/null
+++ b/Tutorials/12_handwriting_recognition_2/inferenceModel.py
@@ -0,0 +1,48 @@
+import cv2
+import typing
+import numpy as np
+
+from mltu.inferenceModel import OnnxInferenceModel
+from mltu.utils.text_utils import ctc_decoder, get_cer
+
+class ImageToWordModel(OnnxInferenceModel):
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+
+    def predict(self, image: np.ndarray):
+        image = cv2.resize(image, self.input_shapes[0][1:3][::-1])
+
+        image_pred = np.expand_dims(image, axis=0).astype(np.float32)
+
+        preds = self.model.run(self.output_names, {self.input_names[0]: image_pred})[0]
+
+        text = ctc_decoder(preds, self.model.metadata["vocab"])[0]
+
+        return text
+
+if __name__ == "__main__":
+    import pandas as pd
+    from tqdm import tqdm
+
+    model = ImageToWordModel(model_path="Models/12_handwriting_recognition_2/202503281916/model.onnx",)
+
+    df = pd.read_csv("Models/03_handwriting_recognition/202301111911/val.csv").values.tolist()
+
+    accum_cer = []
+    for image_path, label in tqdm(df):
+        image = cv2.imread(image_path.replace("\\", "/"))
+
+        prediction_text = model.predict(image)
+
+        cer = get_cer(prediction_text, label)
+        print(f"Image: {image_path}, Label: {label}, Prediction: {prediction_text}, CER: {cer}")
+
+        accum_cer.append(cer)
+
+        # resize by 4x
+        image = cv2.resize(image, (image.shape[1] * 4, image.shape[0] * 4))
+        cv2.imshow("Image", image)
+        cv2.waitKey(0)
+        cv2.destroyAllWindows()
+
+    print(f"Average CER: {np.average(accum_cer)}")
\ No newline at end of file
diff --git a/Tutorials/12_handwriting_recognition_2/model.py b/Tutorials/12_handwriting_recognition_2/model.py
new file mode 100644
index 0000000..d705ca9
--- /dev/null
+++ b/Tutorials/12_handwriting_recognition_2/model.py
@@ -0,0 +1,37 @@
+import keras
+from keras import layers
+
+from mltu.tensorflow.model_utils import residual_block
+
+
+def train_model(input_dim, output_dim, activation="leaky_relu", dropout=0.2):
+    
+    inputs = layers.Input(shape=input_dim, name="input")
+
+    # normalize images here instead in preprocessing step
+    input = layers.Lambda(lambda x: x / 255)(inputs)
+
+    x1 = residual_block(input, 16, activation=activation, skip_conv=True, strides=1, dropout=dropout)
+
+    x2 = residual_block(x1, 16, activation=activation, skip_conv=True, strides=2, dropout=dropout)
+    x3 = residual_block(x2, 16, activation=activation, skip_conv=False, strides=1, dropout=dropout)
+
+    x4 = residual_block(x3, 32, activation=activation, skip_conv=True, strides=2, dropout=dropout)
+    x5 = residual_block(x4, 32, activation=activation, skip_conv=False, strides=1, dropout=dropout)
+
+    x6 = residual_block(x5, 64, activation=activation, skip_conv=True, strides=2, dropout=dropout)
+    x7 = residual_block(x6, 64, activation=activation, skip_conv=True, strides=1, dropout=dropout)
+
+    x8 = residual_block(x7, 64, activation=activation, skip_conv=False, strides=1, dropout=dropout)
+    x9 = residual_block(x8, 64, activation=activation, skip_conv=False, strides=1, dropout=dropout)
+
+    squeezed = layers.Reshape((x9.shape[-3] * x9.shape[-2], x9.shape[-1]))(x9)
+    # tf2onnx is only supporting converting LSTM layer with unroll=True
+    lstm_layer = layers.LSTM(128, return_sequences=True, unroll=True)
+    blstm = layers.Bidirectional(lstm_layer)(squeezed)
+    blstm = layers.Dropout(dropout)(blstm)
+
+    output = layers.Dense(output_dim + 1, activation="softmax", name="output")(blstm)
+
+    model = keras.models.Model(inputs=inputs, outputs=output)
+    return model
diff --git a/Tutorials/12_handwriting_recognition_2/requirements.txt b/Tutorials/12_handwriting_recognition_2/requirements.txt
new file mode 100644
index 0000000..6f3690b
--- /dev/null
+++ b/Tutorials/12_handwriting_recognition_2/requirements.txt
@@ -0,0 +1,10 @@
+tensorflow==2.19.0
+tf-keras==2.19.0
+tqdm==4.67.1
+opencv-python==4.11.0.86
+pillow==11.1.0
+pandas==2.2.3
+PyYAML==6.0.2
+tf2onnx==1.16.1
+onnxruntime==1.21.0
+numpy==2.1.3
\ No newline at end of file
diff --git a/Tutorials/12_handwriting_recognition_2/train.py b/Tutorials/12_handwriting_recognition_2/train.py
new file mode 100644
index 0000000..270bf76
--- /dev/null
+++ b/Tutorials/12_handwriting_recognition_2/train.py
@@ -0,0 +1,111 @@
+import os
+#visible 1 gpu
+os.environ["CUDA_VISIBLE_DEVICES"] = "1"
+import tensorflow as tf
+try: [tf.config.experimental.set_memory_growth(gpu, True) for gpu in tf.config.experimental.list_physical_devices("GPU")]
+except: pass
+
+import keras
+
+from mltu.preprocessors import ImageReader
+from mltu.transformers import ImageResizer, LabelIndexer, LabelPadding, ImageShowCV2
+from mltu.augmentors import RandomBrightness, RandomRotate, RandomErodeDilate, RandomSharpen, RandomElasticTransform, RandomGaussianBlur, RandomSaltAndPepper
+from mltu.annotations.images import CVImage
+
+
+from mltu.tensorflow.dataProvider import DataProvider
+from mltu.tensorflow.losses import CTCloss
+from mltu.tensorflow.callbacks import Model2onnx, TrainLogger
+from mltu.tensorflow.metrics import CERMetric, WERMetric
+
+from model import train_model
+from configs import ModelConfigs
+
+import os
+import tarfile
+from data_utils import download_and_unzip, load_dataset
+
+dataset_path = os.path.join("Datasets", "IAM_Words")
+if not os.path.exists(dataset_path):
+    download_and_unzip("https://git.io/J0fjL", extract_to="Datasets")
+
+    file = tarfile.open(os.path.join(dataset_path, "words.tgz"))
+    file.extractall(os.path.join(dataset_path, "words"))
+
+dataset, vocab, max_len = load_dataset(dataset_path)
+
+# Create a ModelConfigs object to store model configurations
+configs = ModelConfigs()
+configs.vocab = "".join(sorted(vocab))
+configs.max_text_length = max_len
+configs.save()
+
+# Create a data provider for the dataset
+data_provider = DataProvider(
+    dataset=dataset,
+    skip_validation=True,
+    batch_size=configs.batch_size,
+    data_preprocessors=[ImageReader(CVImage)],
+    transformers=[
+        ImageResizer(configs.width, configs.height, keep_aspect_ratio=False),
+        LabelIndexer(configs.vocab),
+        LabelPadding(max_word_length=configs.max_text_length, padding_value=len(configs.vocab)),
+        ],
+    use_cache=True,
+    workers=configs.train_workers,
+    max_queue_size=10,
+    # use_multiprocessing=True
+)
+
+# Split the dataset into training and validation sets
+train_data_provider, val_data_provider = data_provider.split(split = 0.9)
+
+# Augment training data with random brightness, rotation and erode/dilate
+train_data_provider.augmentors = [
+    RandomBrightness(), 
+    RandomErodeDilate(),
+    RandomSharpen(),
+    RandomRotate(angle=10), 
+    RandomElasticTransform(),
+    RandomGaussianBlur(),
+    RandomSaltAndPepper(),
+]
+
+# Creating TensorFlow model architecture
+model = train_model(
+    input_dim = (configs.height, configs.width, 3),
+    output_dim = len(configs.vocab),
+)
+
+# Compile the model and print summary
+model.compile(
+    optimizer=keras.optimizers.Nadam(learning_rate=configs.learning_rate), # pyright: ignore
+    loss=CTCloss(), 
+    metrics=[
+        CERMetric(vocabulary=configs.vocab),
+        WERMetric(vocabulary=configs.vocab)
+    ],
+    jit_compile=False, # pyright: ignore
+    run_eagerly=False,
+)
+model.summary(line_length=110)
+
+# Define callbacks
+earlystopper = keras.callbacks.EarlyStopping(monitor="val_CER", patience=20, verbose=1, mode="min")
+checkpoint = keras.callbacks.ModelCheckpoint(f"{configs.model_path}/model.h5", monitor="val_CER", verbose=1, save_best_only=True, mode="min")
+trainLogger = TrainLogger(configs.model_path)
+tb_callback = keras.callbacks.TensorBoard(f"{configs.model_path}/logs", update_freq="epoch")
+reduceLROnPlat = keras.callbacks.ReduceLROnPlateau(monitor="val_CER", factor=0.9, min_delta=1e-10, patience=5, verbose=1, mode="min")
+model2onnx = Model2onnx(f"{configs.model_path}/model.h5", metadata={"vocab": configs.vocab}, save_on_epoch_end=True, opset=18)
+
+# Save training and validation datasets as csv files
+train_data_provider.to_csv(os.path.join(configs.model_path, "train.csv"))
+val_data_provider.to_csv(os.path.join(configs.model_path, "val.csv"))
+
+# Train the model
+model.fit(
+    train_data_provider,
+    validation_data=val_data_provider,
+    epochs=configs.train_epochs,
+    callbacks=[earlystopper, checkpoint, trainLogger, reduceLROnPlat, tb_callback, model2onnx],
+)
\ No newline at end of file
diff --git a/mltu/__init__.py b/mltu/__init__.py
index 015215d..543835a 100644
--- a/mltu/__init__.py
+++ b/mltu/__init__.py
@@ -1,4 +1,4 @@
-__version__ = "1.2.5"
+__version__ = "2.0.0.dev1"
 
 from .annotations.images import Image
 from .annotations.images import CVImage
diff --git a/mltu/annotations/audio.py b/mltu/annotations/audio.py
index ec9c2ac..8bb9d55 100644
--- a/mltu/annotations/audio.py
+++ b/mltu/annotations/audio.py
@@ -52,7 +52,7 @@ def numpy(self) -> np.ndarray:
     def __add__(self, other: np.ndarray) -> np.ndarray:
         self._audio = self._audio + other
         self.augmented = True
-        return self
+        return self # pyright: ignore
     
     def __len__(self) -> int:
         return len(self._audio)
diff --git a/mltu/annotations/images.py b/mltu/annotations/images.py
index 929db53..0f0741d 100644
--- a/mltu/annotations/images.py
+++ b/mltu/annotations/images.py
@@ -9,6 +9,9 @@
 
 from PIL import Image as PilImage
 
+import logging
+logger = logging.getLogger(__name__)
+
 
 class Image(ABC):
     def __init__(self) -> None:
@@ -33,11 +36,11 @@ def HSV(self) -> np.ndarray:
         pass
 
     @abstractmethod
-    def update(self, image: np.ndarray):
+    def update(self, image: np.ndarray) -> "Image":
         pass
 
     @abstractmethod
-    def flip(self, axis: int = 0):
+    def flip(self, axis: int = 0) -> "Image":
         pass
 
     @abstractmethod
@@ -67,13 +70,12 @@ def __init__(
             method: int = cv2.IMREAD_COLOR,
             path: str = "",
             color: str = "BGR"
-    ) -> None:
+        ) -> None:
         super().__init__()
         
         if isinstance(image, str):
             if not os.path.exists(image):
                 raise FileNotFoundError(f"Image {image} not found.")
-
             self._image = cv2.imread(image, method)
             self.path = image
             self.color = "BGR"
@@ -130,7 +132,7 @@ def HSV(self) -> np.ndarray:
         else:
             raise ValueError(f"Unknown color format {self.color}")
 
-    def update(self, image: np.ndarray):
+    def update(self, image: np.ndarray) -> "CVImage":
         if isinstance(image, np.ndarray):
             self._image = image
 
@@ -141,10 +143,9 @@ def update(self, image: np.ndarray):
 
             return self
 
-        else:
-            raise TypeError(f"image must be numpy.ndarray, not {type(image)}")
+        raise TypeError(f"image must be numpy.ndarray, not {type(image)}")
 
-    def flip(self, axis: int = 0):
+    def flip(self, axis: int = 0) -> "CVImage":
         """ Flip image along x or y axis
 
         Args:
@@ -178,7 +179,8 @@ class PillowImage(Image):
 
     def __init__(
             self,
-            image: str) -> None:
+            image: typing.Union[str, np.ndarray],
+        ) -> None:
         super().__init__()
 
         if isinstance(image, str):
@@ -189,6 +191,8 @@ def __init__(
             self._image = PilImage.open(image)
 
             self.init_successful = True
+        elif isinstance(image, np.ndarray):
+            raise NotImplementedError("PillowImage does not support numpy.ndarray as input")
         else:
             raise TypeError("Image must be a path to an image")
 
@@ -202,7 +206,7 @@ def __init__(
 
     @property
     def is_animated(self) -> bool:
-        return hasattr(self._image, "is_animated") and self._image.is_animated
+        return isinstance(self._image, PilImage.Image) and getattr(self._image, "is_animated", False)
 
     @property
     def image(self) -> np.ndarray:
@@ -247,7 +251,7 @@ def _init_attributes(self):
         self.height = self.image.shape[0]
         self.channels = 1 if len(self.image.shape) == 2 else self.image.shape[2]
 
-    def update(self, image: PilImage.Image):
+    def update(self, image: np.ndarray) -> "PillowImage":
         if isinstance(image, PilImage.Image):
             self._image = image
         elif isinstance(image, np.ndarray):
diff --git a/mltu/dataProvider.py b/mltu/dataProvider.py
index 06f708d..68be978 100644
--- a/mltu/dataProvider.py
+++ b/mltu/dataProvider.py
@@ -15,18 +15,19 @@ class DataProvider:
     def __init__(
             self,
             dataset: typing.Union[str, list, pd.DataFrame],
-            data_preprocessors: typing.List[typing.Callable] = None,
+            data_preprocessors: typing.Union[typing.List[typing.Callable], None] = None,
             batch_size: int = 4,
             shuffle: bool = True,
             initial_epoch: int = 1,
-            augmentors: typing.List[Augmentor] = None,
-            transformers: typing.List[Transformer] = None,
-            batch_postprocessors: typing.List[typing.Callable] = None,
+            augmentors: typing.Union[typing.List[Augmentor], None] = None,
+            transformers: typing.Union[typing.List[Transformer], None] = None,
+            batch_postprocessors: typing.Union[typing.List[typing.Callable], None] = None,
             skip_validation: bool = True,
-            limit: int = None,
+            limit: typing.Union[int, None]=None,
             use_cache: bool = False,
             log_level: int = logging.INFO,
             numpy: bool = True,
+            **kwargs
     ) -> None:
         """ Standardised object for providing data to a model while training.
 
@@ -137,7 +138,7 @@ def on_epoch_end(self):
         # Remove any samples that were marked for removal
         for remove in self._on_epoch_end_remove:
             self.logger.warning(f"Removing {remove} from dataset.")
-            self._dataset.remove(remove)
+            self._dataset.remove(remove) # pyright: ignore
         self._on_epoch_end_remove = []
 
     def validate_list_dataset(self, dataset: list) -> list:
@@ -148,7 +149,7 @@ def validate_list_dataset(self, dataset: list) -> list:
 
         return validated_data
 
-    def validate(self, dataset: typing.Union[str, list, pd.DataFrame]) -> typing.Union[list, str]:
+    def validate(self, dataset: typing.Union[str, list, pd.DataFrame]) -> typing.Union[list, str]: # pyright: ignore
         """ Validate the dataset and return the dataset """
 
         if isinstance(dataset, str):
diff --git a/mltu/preprocessors.py b/mltu/preprocessors.py
index cb65ca1..c4cebc1 100644
--- a/mltu/preprocessors.py
+++ b/mltu/preprocessors.py
@@ -2,11 +2,9 @@
 import typing
 import importlib
 import numpy as np
-import matplotlib.pyplot as plt
-import matplotlib
 import logging
 
-from . import Image
+from . import Image, CVImage, PillowImage
 from mltu.annotations.audio import Audio
 
 """ Implemented Preprocessors:
@@ -18,12 +16,16 @@
 
 class ImageReader:
     """Read image from path and return image and label"""
-    def __init__(self, image_class: Image, log_level: int = logging.INFO, ) -> None:
+    def __init__(self, image_class: typing.Type[typing.Union[CVImage, PillowImage]] = CVImage, log_level: int = logging.INFO) -> None:
+        self._image_class = image_class
         self.logger = logging.getLogger(self.__class__.__name__)
         self.logger.setLevel(log_level)
-        self._image_class = image_class
 
-    def __call__(self, image_path: typing.Union[str, np.ndarray], label: typing.Any) -> typing.Tuple[Image, typing.Any]:
+    def __call__(
+            self, 
+            image_path: typing.Union[str, np.ndarray, typing.Any], 
+            label: typing.Any
+        ) -> typing.Tuple[typing.Union[Image, None], typing.Any]:
         """ Read image from path and return image and label
         
         Args:
@@ -59,6 +61,7 @@ def import_librosa(object) -> None:
         try:
             object.librosa = importlib.import_module('librosa')
             print("librosa version:", object.librosa.__version__)
+            return object.librosa # pyright: ignore
         except:
             raise ImportError("librosa is required to augment Audio. Please install it with `pip install librosa`.")
 
@@ -71,7 +74,7 @@ class AudioReader:
     """
     def __init__(
             self, 
-            sample_rate = None,
+            sample_rate: int=22050,
             log_level: int = logging.INFO, 
         ) -> None:
         self.sample_rate = sample_rate
@@ -108,7 +111,7 @@ def __call__(self, audio_path: str, label: typing.Any) -> typing.Tuple[np.ndarra
             audio = None
             self.logger.warning(f"Audio {audio_path} could not be read, returning None.")
 
-        return audio, label
+        return audio, label # pyright: ignore
     
 class WavReader:
     """Read wav file with librosa and return audio and label
@@ -130,6 +133,7 @@ def __init__(
         self.frame_step = frame_step
         self.fft_length = fft_length
 
+        matplotlib = importlib.import_module('matplotlib')
         matplotlib.interactive(False)
         # import librosa using importlib
         import_librosa(self)
@@ -150,12 +154,12 @@ def get_spectrogram(wav_path: str, frame_length: int, frame_step: int, fft_lengt
         import_librosa(WavReader)
 
         # Load the wav file and store the audio data in the variable 'audio' and the sample rate in 'orig_sr'
-        audio, orig_sr = WavReader.librosa.load(wav_path) 
+        audio, orig_sr = WavReader.librosa.load(wav_path) # pyright: ignore
 
         # Compute the Short Time Fourier Transform (STFT) of the audio data and store it in the variable 'spectrogram'
         # The STFT is computed with a hop length of 'frame_step' samples, a window length of 'frame_length' samples, and 'fft_length' FFT components.
         # The resulting spectrogram is also transposed for convenience
-        spectrogram = WavReader.librosa.stft(audio, hop_length=frame_step, win_length=frame_length, n_fft=fft_length).T
+        spectrogram = WavReader.librosa.stft(audio, hop_length=frame_step, win_length=frame_length, n_fft=fft_length).T # pyright: ignore
 
         # Take the absolute value of the spectrogram to obtain the magnitude spectrum
         spectrogram = np.abs(spectrogram)
@@ -170,17 +174,18 @@ def get_spectrogram(wav_path: str, frame_length: int, frame_step: int, fft_lengt
         return spectrogram
 
     @staticmethod
-    def plot_raw_audio(wav_path: str, title: str = None, sr: int = 16000) -> None:
+    def plot_raw_audio(wav_path: str, title: str="Audio Plot", sr: int=16000) -> None: 
         """Plot the raw audio of a WAV file
 
         Args:
             wav_path (str): Path to the WAV file.
             sr (int, optional): Sample rate of the WAV file. Defaults to 16000.
-            title (str, optional): Title
+            title (str, optional): Title, defaults to "Audio Plot"
         """
+        plt = importlib.import_module('matplotlib.pyplot')
         import_librosa(WavReader)
         # Load the wav file and store the audio data in the variable 'audio' and the sample rate in 'orig_sr'
-        audio, orig_sr = WavReader.librosa.load(wav_path, sr=sr)
+        audio, orig_sr = WavReader.librosa.load(wav_path, sr=sr) # pyright: ignore
 
         duration = len(audio) / orig_sr
 
@@ -188,7 +193,7 @@ def plot_raw_audio(wav_path: str, title: str = None, sr: int = 16000) -> None:
 
         plt.figure(figsize=(15, 5))
         plt.plot(time, audio)
-        plt.title(title) if title else plt.title("Audio Plot")
+        plt.title(title)
         plt.ylabel("signal wave")
         plt.xlabel("time (s)")
         plt.tight_layout()
@@ -204,6 +209,8 @@ def plot_spectrogram(spectrogram: np.ndarray, title:str = "", transpose: bool =
             transpose (bool, optional): Transpose the spectrogram. Defaults to True.
             invert (bool, optional): Invert the spectrogram. Defaults to True.
         """
+        plt = importlib.import_module('matplotlib.pyplot')
+
         if transpose:
             spectrogram = spectrogram.T
         
diff --git a/mltu/tensorflow/callbacks.py b/mltu/tensorflow/callbacks.py
index ea9479e..86df348 100644
--- a/mltu/tensorflow/callbacks.py
+++ b/mltu/tensorflow/callbacks.py
@@ -1,20 +1,23 @@
 import os
+import typing
 import tensorflow as tf
-from keras.callbacks import Callback
+import keras
 
+from pathlib import Path
 import logging
 
-class Model2onnx(Callback):
+class Model2onnx(keras.callbacks.Callback):
     """ Converts the model to onnx format after training is finished. """
     def __init__(
         self, 
         saved_model_path: str, 
-        metadata: dict=None,
+        metadata: typing.Union[dict, None]=None,
         save_on_epoch_end: bool=False,
+        opset: typing.Union[int, None]=None
         ) -> None:
         """ Converts the model to onnx format after training is finished.
         Args:
-            saved_model_path (str): Path to the saved .h5 model.
+            saved_model_path (str): Path to the saved model.
             metadata (dict, optional): Dictionary containing metadata to be added to the onnx model. Defaults to None.
             save_on_epoch_end (bool, optional): Save the onnx model on every epoch end. Defaults to False.
         """
@@ -22,6 +25,7 @@ def __init__(
         self.saved_model_path = saved_model_path
         self.metadata = metadata
         self.save_on_epoch_end = save_on_epoch_end
+        self.opset = opset
 
         try:
             import tf2onnx
@@ -34,51 +38,48 @@ def __init__(
             raise ImportError("onnx is not installed. Please install it using 'pip install onnx'")
 
     @staticmethod
-    def model2onnx(model: tf.keras.Model, onnx_model_path: str):
-        try:
-            import tf2onnx
+    def model2onnx(model: keras.Model, onnx_model_path: str, opset: typing.Union[int, None]=None):
+        import tf2onnx
 
-            # convert the model to onnx format
-            tf2onnx.convert.from_keras(model, output_path=onnx_model_path)
+        # Handle input signature where model has multiple inputs
+        input_signature = [tf.TensorSpec(shape=t.shape, dtype=t.dtype, name=t.name) for t in model.inputs] # pyright: ignore
 
-        except Exception as e:
-            print(e)
+        # convert the model to onnx format
+        model_proto, external_tensor_storage = tf2onnx.convert.from_keras(model, input_signature=input_signature, opset=opset, output_path=onnx_model_path)
 
-    @staticmethod
-    def include_metadata(onnx_model_path: str, metadata: dict=None):
-        try:
-            if metadata and isinstance(metadata, dict):
+        return model_proto, external_tensor_storage
 
-                import onnx
-                # Load the ONNX model
-                onnx_model = onnx.load(onnx_model_path)
+    @staticmethod
+    def include_metadata(onnx_model_path: str, metadata: typing.Union[dict, None]=None):
+        if metadata and isinstance(metadata, dict):
 
-                # Add the metadata dictionary to the model's metadata_props attribute
-                for key, value in metadata.items():
-                    meta = onnx_model.metadata_props.add()
-                    meta.key = key
-                    meta.value = str(value)
+            import onnx
+            # Load the ONNX model
+            onnx_model = onnx.load(onnx_model_path)
 
-                # Save the modified ONNX model
-                onnx.save(onnx_model, onnx_model_path)
+            # Add the metadata dictionary to the model's metadata_props attribute
+            for key, value in metadata.items():
+                meta = onnx_model.metadata_props.add()
+                meta.key = key
+                meta.value = str(value)
 
-        except Exception as e:
-            print(e)  
+            # Save the modified ONNX model
+            onnx.save(onnx_model, onnx_model_path)
 
-    def on_epoch_end(self, epoch: int, logs: dict=None):
+    def on_epoch_end(self, epoch: int, logs: typing.Union[dict, None]=None):
         """ Converts the model to onnx format on every epoch end. """
         if self.save_on_epoch_end:
             self.on_train_end(logs=logs)
 
-    def on_train_end(self, logs=None):
+    def on_train_end(self, logs: typing.Union[dict, None]=None):
         """ Converts the model to onnx format after training is finished. """
-        self.model.load_weights(self.saved_model_path)
-        onnx_model_path = self.saved_model_path.replace(".h5", ".onnx")
-        self.model2onnx(self.model, onnx_model_path)
+        self._model.load_weights(self.saved_model_path) # pyright: ignore
+        onnx_model_path = str(Path(self.saved_model_path).with_suffix('.onnx'))
+        self.model2onnx(self.model, onnx_model_path, self.opset) # pyright: ignore
         self.include_metadata(onnx_model_path, self.metadata)
 
 
-class TrainLogger(Callback):
+class TrainLogger(keras.callbacks.Callback):
     """Logs training metrics to a file.
     
     Args:
@@ -108,13 +109,14 @@ def __init__(self, log_path: str, log_file: str="logs.log", logLevel=logging.INF
 
         self.logger.addHandler(self.file_handler)
 
-    def on_epoch_end(self, epoch: int, logs: dict=None):
-        epoch_message = f"Epoch {epoch}; "
-        logs_message = "; ".join([f"{key}: {value}" for key, value in logs.items()])
-        self.logger.info(epoch_message + logs_message)
+    def on_epoch_end(self, epoch: int, logs: typing.Union[dict, None]=None):
+        if logs:
+            epoch_message = f"Epoch {epoch}; "
+            logs_message = "; ".join([f"{key}: {value}" for key, value in logs.items()])
+            self.logger.info(epoch_message + logs_message)
 
 
-class WarmupCosineDecay(Callback):
+class WarmupCosineDecay(keras.callbacks.Callback):
     """ Cosine decay learning rate scheduler with warmup
 
     Args:
@@ -141,28 +143,27 @@ def __init__(
         self.decay_epochs = decay_epochs
         self.initial_lr = initial_lr
         self.verbose = verbose
+        self.model: keras.Model
 
-    def on_epoch_begin(self, epoch: int, logs: dict=None):
+    def on_epoch_begin(self, epoch: int, logs: typing.Union[dict, None]=None):
         """ Adjust learning rate at the beginning of each epoch """
 
         if epoch >= self.warmup_epochs + self.decay_epochs:
-            return logs
+            return
 
         if epoch < self.warmup_epochs:
             lr = self.initial_lr + (self.lr_after_warmup - self.initial_lr) * (epoch + 1) / self.warmup_epochs
         else:
             progress = (epoch - self.warmup_epochs) / self.decay_epochs
-            lr = self.final_lr + 0.5 * (self.lr_after_warmup - self.final_lr) * (1 + tf.cos(tf.constant(progress) * 3.14159))
+            lr = self.final_lr + 0.5 * (self.lr_after_warmup - self.final_lr) * (1 + tf.cos(tf.constant(progress) * 3.14159)) # pyright: ignore
 
-        tf.keras.backend.set_value(self.model.optimizer.lr, lr)
+        tf.keras.backend.set_value(self.model.optimizer.lr, lr) # pyright: ignore
         
         if self.verbose:
             print(f"Epoch {epoch + 1} - Learning Rate: {lr}")
     
-    def on_epoch_end(self, epoch: int, logs: dict=None):
+    def on_epoch_end(self, epoch: int, logs: typing.Union[dict, None]=None):
         logs = logs or {}
         
         # Log the learning rate value
-        logs["lr"] = self.model.optimizer.lr
-        
-        return logs
\ No newline at end of file
+        logs["lr"] = self.model.optimizer.lr
\ No newline at end of file
diff --git a/mltu/tensorflow/dataProvider.py b/mltu/tensorflow/dataProvider.py
index d5f46ae..4e7f807 100644
--- a/mltu/tensorflow/dataProvider.py
+++ b/mltu/tensorflow/dataProvider.py
@@ -1,7 +1,10 @@
-import tensorflow as tf
+import keras
 
 from ..dataProvider import DataProvider as dataProvider
 
-class DataProvider(dataProvider, tf.keras.utils.Sequence):
+class DataProvider(dataProvider, keras.utils.Sequence):
     def __init__(self, *args, **kwargs):
+        self.workers = kwargs.pop("workers", 10)
+        self.use_multiprocessing = kwargs.pop("use_multiprocessing", False)
+        self.max_queue_size = kwargs.pop("max_queue_size", 10)
         super().__init__(*args, **kwargs)
diff --git a/mltu/tensorflow/model_utils.py b/mltu/tensorflow/model_utils.py
index 1040f7b..c544c3c 100644
--- a/mltu/tensorflow/model_utils.py
+++ b/mltu/tensorflow/model_utils.py
@@ -1,10 +1,8 @@
+import keras
 import typing
 import tensorflow as tf
-from tensorflow import keras
-from keras import layers
-from keras.models import Model
 
-class CustomModel(Model):
+class CustomModel(keras.models.Model):
     """ Custom TensorFlow model for debugging training process purposes
     """
     def train_step(self, train_data):
@@ -17,7 +15,7 @@ def train_step(self, train_data):
             gradients = tape.gradient(loss, self.trainable_weights)
 
         # Applying the gradients on the model using the specified optimizer
-        self.optimizer.apply_gradients(zip(gradients, self.trainable_weights))
+        self.optimizer.apply_gradients(zip(gradients, self.trainable_weights)) # pyright: ignore
 
         # Update the metrics.
         # Metrics are configured in `compile()`.
@@ -41,19 +39,19 @@ def test_step(self, test_data):
         return {m.name: m.result() for m in self.metrics}
 
 
-def activation_layer(layer, activation: str="relu", alpha: float=0.1) -> tf.Tensor:
+def activation_layer(layer, activation: str="relu", negative_slope: float=0.1) -> keras.layers.Layer:
     """ Activation layer wrapper for LeakyReLU and ReLU activation functions
     Args:
         layer: tf.Tensor
         activation: str, activation function name (default: 'relu')
-        alpha: float (LeakyReLU activation function parameter)
+        negative_slope: float (LeakyReLU activation function parameter)
     Returns:
         tf.Tensor
     """
     if activation == "relu":
-        layer = layers.ReLU()(layer)
+        layer = keras.layers.ReLU()(layer)
     elif activation == "leaky_relu":
-        layer = layers.LeakyReLU(alpha=alpha)(layer)
+        layer = keras.layers.LeakyReLU(negative_slope=negative_slope)(layer)
 
     return layer
 
@@ -67,29 +65,30 @@ def residual_block(
         padding: str = "same",
         kernel_initializer: str = "he_uniform",
         activation: str = "relu",
-        dropout: float = 0.2):
+        dropout: float = 0.2
+    ) -> keras.layers.Layer:
     # Create skip connection tensor
     x_skip = x
 
     # Perform 1-st convolution
-    x = layers.Conv2D(filter_num, kernel_size, padding = padding, strides = strides, kernel_initializer=kernel_initializer)(x)
-    x = layers.BatchNormalization()(x)
+    x = keras.layers.Conv2D(filter_num, kernel_size, padding = padding, strides = strides, kernel_initializer=kernel_initializer)(x)
+    x = keras.layers.BatchNormalization()(x)
     x = activation_layer(x, activation=activation)
 
     # Perform 2-nd convoluti
-    x = layers.Conv2D(filter_num, kernel_size, padding = padding, kernel_initializer=kernel_initializer)(x)
-    x = layers.BatchNormalization()(x)
+    x = keras.layers.Conv2D(filter_num, kernel_size, padding = padding, kernel_initializer=kernel_initializer)(x)
+    x = keras.layers.BatchNormalization()(x)
 
     # Perform 3-rd convolution if skip_conv is True, matchin the number of filters and the shape of the skip connection tensor
     if skip_conv:
-        x_skip = layers.Conv2D(filter_num, 1, padding = padding, strides = strides, kernel_initializer=kernel_initializer)(x_skip)
+        x_skip = keras.layers.Conv2D(filter_num, 1, padding = padding, strides = strides, kernel_initializer=kernel_initializer)(x_skip)
 
     # Add x and skip connection and apply activation function
-    x = layers.Add()([x, x_skip])     
+    x = keras.layers.Add()([x, x_skip])     
     x = activation_layer(x, activation=activation)
 
     # Apply dropout
     if dropout:
-        x = layers.Dropout(dropout)(x)
+        x = keras.layers.Dropout(dropout)(x)
 
     return x
\ No newline at end of file
diff --git a/mltu/tensorflow/transformer/utils.py b/mltu/tensorflow/transformer/utils.py
index 471f92e..13e64b0 100644
--- a/mltu/tensorflow/transformer/utils.py
+++ b/mltu/tensorflow/transformer/utils.py
@@ -1,7 +1,7 @@
+import keras
 import tensorflow as tf
 
-
-class MaskedLoss(tf.keras.losses.Loss):
+class MaskedLoss(keras.losses.Loss):
     """ Masked loss function for Transformer.
 
     Args:
@@ -12,7 +12,7 @@ def __init__(self, mask_value: int=0, reduction: str='none') -> None:
         super(MaskedLoss, self).__init__()
         self.mask_value = mask_value
         self.reduction = reduction
-        self.loss_object = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True, reduction=reduction)
+        self.loss_object = keras.losses.SparseCategoricalCrossentropy(from_logits=True, reduction=reduction)
 
     def __call__(self, y_true: tf.Tensor, y_pred: tf.Tensor, sample_weight=None) -> tf.Tensor:
         """ Calculate masked loss.
@@ -28,13 +28,13 @@ def __call__(self, y_true: tf.Tensor, y_pred: tf.Tensor, sample_weight=None) ->
         loss = self.loss_object(y_true, y_pred)
 
         mask = tf.cast(mask, dtype=loss.dtype)
-        loss *= mask
+        loss *= mask # pyright: ignore
 
         loss = tf.reduce_sum(loss) / tf.reduce_sum(mask)
         return loss
 
 
-class MaskedAccuracy(tf.keras.metrics.Metric):
+class MaskedAccuracy(keras.metrics.Metric):
     """ Masked accuracy metric for Transformer.
 
     Args:
@@ -48,12 +48,13 @@ def __init__(self, mask_value: int=0, name: str='masked_accuracy') -> None:
         self.count = self.add_weight(name='count', initializer='zeros')
 
     @tf.function
-    def update_state(self, y_true: tf.Tensor, y_pred: tf.Tensor, sample_weight=None):
+    def update_state(self, y_true: tf.Tensor, y_pred: tf.Tensor, sample_weight=None): # pyright: ignore
         """ Update state of the metric.
 
         Args:
             y_true (tf.Tensor): True labels.
             y_pred (tf.Tensor): Predicted labels.
+            sample_weight: (Optional) sample weights.
         """
         pred = tf.argmax(y_pred, axis=2)
         label = tf.cast(y_true, pred.dtype)
@@ -80,7 +81,7 @@ def result(self) -> tf.Tensor:
         return self.total / self.count
     
 
-class CERMetric(tf.keras.metrics.Metric):
+class CERMetric(keras.metrics.Metric):
     """A custom TensorFlow metric to compute the Character Error Rate (CER).
     
     Args:
@@ -116,8 +117,8 @@ def get_cer(self, pred, y_true, padding=-1):
         end_token_index = tf.argmax(equal_int, axis=1)
 
         # mask out everything after end token
-        new_range = tf.range(tf.shape(pred)[1], dtype=tf.int64)
-        range_matrix = tf.tile(new_range[None, :], [tf.shape(pred)[0], 1])
+        new_range = tf.range(tf.shape(pred)[1], dtype=tf.int64) # pyright: ignore
+        range_matrix = tf.tile(new_range[None, :], [tf.shape(pred)[0], 1]) # pyright: ignore
 
         mask = range_matrix <= tf.expand_dims(end_token_index, axis=1)
         masked_pred = tf.where(mask, pred, padding)
@@ -151,7 +152,7 @@ def update_state(self, y_true, y_pred, sample_weight=None):
         self.cer_accumulator.assign_add(tf.reduce_sum(distance))
         
         # Increment the batch_counter by the batch size
-        self.batch_counter.assign_add(len(y_true))
+        self.batch_counter.assign_add(y_true.shape[0])
 
     def result(self):
         """ Computes and returns the metric result.
diff --git a/mltu/torch/dataProvider.py b/mltu/torch/dataProvider.py
index 14fb4bd..668ba3a 100644
--- a/mltu/torch/dataProvider.py
+++ b/mltu/torch/dataProvider.py
@@ -13,7 +13,7 @@
 
 
 class ThreadExecutor:
-    def __init__(self, target: typing.Callable, workers: int = os.cpu_count()) -> None:
+    def __init__(self, target: typing.Callable, workers: int=os.cpu_count() or 1) -> None:
         self.target = target
         self.workers = workers
         
@@ -87,7 +87,7 @@ def __exit__(self):
 
 
 class ProcessExecutor:
-    def __init__(self, target: typing.Callable, workers: int = os.cpu_count()) -> None:
+    def __init__(self, target: typing.Callable, workers: int=os.cpu_count() or 1) -> None:
         self.target = target
         self.workers = workers
         self.busy = False
@@ -110,7 +110,7 @@ def __call__(self, data) -> typing.Any:
             for index, data_batch in enumerate(data):
                 for worker in self.mp_workers:
                     if worker.busy == False and results[index] is None:
-                        results[index] = worker.send(data_batch)
+                        results[index] = worker.send(data_batch) # pyright: ignore
                         break
 
             # receive data from workers
@@ -130,24 +130,23 @@ def __call__(self, data) -> typing.Any:
         self.busy = False
         return results
 
-
 class DataProvider(BaseDataProvider):
     """ DataProvider for PyTorch with multiprocessing and multithreading support.
     """
     def __init__(
             self, 
             dataset: typing.Union[str, list, pd.DataFrame],
-            data_preprocessors: typing.List[typing.Callable] = None,
+            data_preprocessors: typing.Optional[typing.List[typing.Callable]] = None,
             batch_size: int = 4,
             shuffle: bool = True,
             initial_epoch: int = 1,
-            augmentors: typing.List[Augmentor] = None,
-            transformers: typing.List[Transformer] = None,
-            batch_postprocessors: typing.List[typing.Callable] = None,
+            augmentors: typing.Optional[typing.List[Augmentor]] = None,
+            transformers: typing.Optional[typing.List[Transformer]] = None,
+            batch_postprocessors: typing.Optional[typing.List[typing.Callable]] = None,
             skip_validation: bool = True,
-            limit: int = None,
+            limit: typing.Union[int, None] = None,
             use_cache: bool = False,
-            workers: int = os.cpu_count(),
+            workers: int = os.cpu_count() or 1,
             use_multiprocessing: bool = False,
             max_queue_size: int = 5,
             **kwargs
diff --git a/mltu/transformers.py b/mltu/transformers.py
index e4cb4bb..d1ebfc1 100644
--- a/mltu/transformers.py
+++ b/mltu/transformers.py
@@ -54,7 +54,7 @@ def __init__(
         width: int, 
         height: int, 
         keep_aspect_ratio: bool=False, 
-        padding_color: typing.Tuple[int]=(0, 0, 0)
+        padding_color: typing.Tuple[int, int, int]=(0, 0, 0)
         ) -> None:
         self._width = width
         self._height = height
@@ -77,7 +77,7 @@ def unpad_maintaining_aspect_ratio(padded_image: np.ndarray, original_width: int
         return original_image
 
     @staticmethod
-    def resize_maintaining_aspect_ratio(image: np.ndarray, width_target: int, height_target: int, padding_color: typing.Tuple[int]=(0, 0, 0)) -> np.ndarray:
+    def resize_maintaining_aspect_ratio(image: np.ndarray, width_target: int, height_target: int, padding_color: typing.Tuple[int, int, int]=(0, 0, 0)) -> np.ndarray:
         """ Resize image maintaining aspect ratio and pad with padding_color.
 
         Args:
@@ -132,7 +132,7 @@ class LabelIndexer(Transformer):
     """
     def __init__(
         self, 
-        vocab: typing.List[str]
+        vocab: typing.Union[str, typing.List[str]]
         ) -> None:
         self.vocab = vocab
 
@@ -150,7 +150,7 @@ class LabelPadding(Transformer):
     def __init__(
         self, 
         padding_value: int,
-        max_word_length: int = None, 
+        max_word_length: typing.Optional[int]=None, 
         use_on_batch: bool = False
         ) -> None:
         self.max_word_length = max_word_length
@@ -172,6 +172,8 @@ def __call__(self, data: np.ndarray, label: np.ndarray):
             return data, padded_labels
 
         label = label[:self.max_word_length]
+        if self.max_word_length is None:
+            raise ValueError("max_word_length must be specified.")
         return data, np.pad(label, (0, self.max_word_length - len(label)), "constant", constant_values=self.padding_value)
 
 
@@ -208,7 +210,7 @@ class SpectrogramPadding(Transformer):
     def __init__(
         self, 
         padding_value: int,
-        max_spectrogram_length: int = None, 
+        max_spectrogram_length: typing.Optional[int] = None, 
         use_on_batch: bool = False
         ) -> None:
         self.max_spectrogram_length = max_spectrogram_length
@@ -247,9 +249,9 @@ def __init__(self, max_audio_length: int, padding_value: int = 0, use_on_batch:
     def __call__(self, audio: Audio, label: typing.Any):
         # batched padding
         if self.use_on_batch:
-            max_len = max([len(a) for a in audio])
+            max_len = max([len(a) for a in audio]) # pyright: ignore
             padded_audios = []
-            for a in audio:
+            for a in audio: # pyright: ignore
                 # limit audio if it exceed max_audio_length
                 padded_audio = np.pad(a, (0, max_len - a.shape[0]), mode="constant", constant_values=self.padding_value)
                 padded_audios.append(padded_audio)
diff --git a/requirements.txt b/requirements.txt
index feec954..bfab0f5 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,9 +1,8 @@
-PyYAML>=6.0
-tqdm
+PyYAML==6.0.2
+tqdm==4.67.1
 qqdm==0.0.7
-pandas
-numpy
-opencv-python
-Pillow>=9.4.0
-onnxruntime>=1.15.0  # onnxruntime-gpu for GPU support
-matplotlib
\ No newline at end of file
+pandas==2.2.3
+numpy==2.1.3
+opencv-python==4.11.0.86
+pillow==11.1.0
+onnxruntime==1.21.0  # onnxruntime-gpu for GPU support
\ No newline at end of file