diff --git a/.dvc/.gitignore b/.dvc/.gitignore
new file mode 100644
index 00000000..528f30c7
--- /dev/null
+++ b/.dvc/.gitignore
@@ -0,0 +1,3 @@
+/config.local
+/tmp
+/cache
diff --git a/.dvc/config b/.dvc/config
new file mode 100644
index 00000000..e69de29b
diff --git a/.dvcignore b/.dvcignore
new file mode 100644
index 00000000..51973055
--- /dev/null
+++ b/.dvcignore
@@ -0,0 +1,3 @@
+# Add patterns of files dvc should ignore, which could improve
+# the performance. Learn more at
+# https://dvc.org/doc/user-guide/dvcignore
diff --git a/.github/workflows/train.yml b/.github/workflows/train.yml
new file mode 100644
index 00000000..d4239944
--- /dev/null
+++ b/.github/workflows/train.yml
@@ -0,0 +1,18 @@
+name: Train Model
+on: [push]
+jobs:
+  train:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v3
+      - uses: actions/setup-python@v4
+        with:
+          python-version: '3.10'
+      - run: pip install -r requirements.txt
+      - run: python -m pytest tests/
+      - run: python src/train.py || exit 1
+      - run: ls -R mlruns/ 
+      - uses: actions/upload-artifact@v4
+        with:
+          name: model
+          path: mlruns/
\ No newline at end of file
diff --git a/.gitignore b/.gitignore
index 3ab04e2f..02ada6a8 100644
--- a/.gitignore
+++ b/.gitignore
@@ -3,88 +3,44 @@ __pycache__/
 *.py[cod]
 *$py.class
 
-# C extensions
-*.so
-
-# Distribution / packaging
-.Python
-build/
-develop-eggs/
-dist/
-downloads/
-eggs/
-.eggs/
-lib/
-lib64/
-parts/
-sdist/
-var/
-wheels/
-*.egg-info/
-.installed.cfg
-*.egg
-MANIFEST
-venv/
-
-# PyInstaller
-#  Usually these files are written by a python script from a template
-#  before PyInstaller builds the exe, so as to inject date/other infos into it.
-*.manifest
-*.spec
-
-# Installer logs
-pip-log.txt
-pip-delete-this-directory.txt
-
-# Unit test / coverage reports
-htmlcov/
-.tox/
-.coverage
-.coverage.*
-.cache
-nosetests.xml
-coverage.xml
-*.cover
-.hypothesis/
-.pytest_cache/
-*-testresults.xml
-test-output.xml
-
-# Translations
-*.mo
-*.pot
-
-# Django stuff:
-*.log
-local_settings.py
-db.sqlite3
-
-# Flask stuff:
-instance/
-.webassets-cache
-
-# Scrapy stuff:
-.scrapy
-
-# Sphinx documentation
-docs/_build/
-
-# PyBuilder
-target/
-
-# Jupyter Notebook
-.ipynb_checkpoints
-
-# pyenv
-.python-version
-
-# celery beat schedule file
-celerybeat-schedule
-
-# SageMath parsed files
-*.sage.py
-
-# Environments
+# Project-specific ignores
+/data/raw/
+/data/interim/
+/models/
+/mlruns/
+/mlartifacts/
+
+# Exceptions - keep these directories (empty placeholder files will be tracked)
+!/data/processed/
+!/data/processed/.gitkeep
+!/models/.gitkeep
+!/notebooks/
+!/notebooks/.gitkeep
+
+# Keep all source code
+!/src/
+!/src/*.py
+
+# Keep documentation and configs
+!*.md
+!LICENSE
+!README.md
+!OUTLINE
+!TIMELINE
+!requirements.txt
+!setup.py
+
+# DVC files (track these)
+!.dvc/
+!data/.dvc
+!models/.dvc
+
+# IDE and environment files
+.vscode/
+.idea/
+*.swp
+*.swo
+*~
 .env
 .venv
 env/
@@ -94,18 +50,36 @@ env.bak/
 venv.bak/
 *.vscode
 condaenv.*
+pipenv
+Pipfile*
+poetry.lock
 
-# Spyder project settings
-.spyderproject
-.spyproject
+# Build and distribution files
+build/
+dist/
+*.egg-info/
+*.egg
+*.whl
 
-# Rope project settings
-.ropeproject
+# Logs and debug files
+*.log
+logs/
+debug/
 
-# mkdocs documentation
-/site
+# Test and coverage reports
+.coverage
+htmlcov/
+.pytest_cache/
+test-results.xml
 
-# mypy
-.mypy_cache/
+# Jupyter
+.ipynb_checkpoints/
 
+# OS-specific
 .DS_Store
+Thumbs.db
+
+# Python cache
+.mypy_cache/
+.python-version
+
diff --git a/data/.gitignore b/data/.gitignore
new file mode 100644
index 00000000..43ee6de4
--- /dev/null
+++ b/data/.gitignore
@@ -0,0 +1 @@
+/raw_data.csv
diff --git a/data/processed/train_data.csv b/data/processed/train_data.csv
new file mode 100644
index 00000000..19620ce2
--- /dev/null
+++ b/data/processed/train_data.csv
@@ -0,0 +1,3 @@
+﻿feature1,feature2,target
+1.2,3.4,0
+5.6,7.8,1
diff --git a/data/raw_data.csv.dvc b/data/raw_data.csv.dvc
new file mode 100644
index 00000000..1b7cdaf5
--- /dev/null
+++ b/data/raw_data.csv.dvc
@@ -0,0 +1,5 @@
+outs:
+- md5: fd253016ea5e108529ecfa57303ea9dd
+  size: 48
+  hash: md5
+  path: raw_data.csv
diff --git a/models/.gitignore b/models/.gitignore
new file mode 100644
index 00000000..5605a1d1
--- /dev/null
+++ b/models/.gitignore
@@ -0,0 +1,2 @@
+/model.pkldel.pkl
+/model.pkl
diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 00000000..ab9652f1
--- /dev/null
+++ b/requirements.txt
@@ -0,0 +1,6 @@
+mlflow
+scikit-learn
+evidently
+pandas
+dvc
+pytest
\ No newline at end of file
diff --git a/setup.py b/setup.py
new file mode 100644
index 00000000..cc19cfcf
--- /dev/null
+++ b/setup.py
@@ -0,0 +1,7 @@
+from setuptools import setup, find_packages
+
+setup(
+    name="mlops",
+    version="0.1",
+    packages=find_packages(),
+)
\ No newline at end of file
diff --git a/src/__init__.py b/src/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/src/monitor.py b/src/monitor.py
new file mode 100644
index 00000000..d8a1465a
--- /dev/null
+++ b/src/monitor.py
@@ -0,0 +1,28 @@
+from evidently import ColumnMapping
+from evidently.report import Report
+from evidently.metric_preset import DataDriftPreset
+from evidently.metrics import ClassificationQualityMetric
+import pandas as pd
+import logging
+
+def generate_report(current_data: pd.DataFrame, 
+                   reference_data: pd.DataFrame,
+                   target_col: str = "target"):
+    """Generate data drift and quality report"""
+    column_mapping = ColumnMapping(
+        target=target_col,
+        numerical_features=current_data.select_dtypes(include='number').columns.tolist()
+    )
+    
+    report = Report(metrics=[
+        DataDriftPreset(),
+        ClassificationQualityMetric()
+    ])
+    
+    report.run(
+        current_data=current_data,
+        reference_data=reference_data,
+        column_mapping=column_mapping
+    )
+    
+    return report
\ No newline at end of file
diff --git a/src/train.py b/src/train.py
new file mode 100644
index 00000000..a8b96dfa
--- /dev/null
+++ b/src/train.py
@@ -0,0 +1,278 @@
+import mlflow
+import numpy as np
+import pandas as pd
+from pathlib import Path
+from sklearn.ensemble import RandomForestClassifier
+from sklearn.model_selection import train_test_split
+from sklearn.metrics import (accuracy_score, f1_score, log_loss,
+                           precision_score, recall_score, 
+                           classification_report)
+from mlflow.models.signature import infer_signature
+from mlflow.tracking import MlflowClient
+import logging
+import json
+from datetime import datetime
+
+# Configure logging
+logging.basicConfig(
+    level=logging.INFO,
+    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
+    handlers=[
+        logging.FileHandler('training.log'),
+        logging.StreamHandler()
+    ]
+)
+logger = logging.getLogger(__name__)
+
+# Constants
+BASE_DIR = Path(__file__).parent.parent
+DATA_PATH = BASE_DIR / "data" / "processed" / "train_data.csv"
+MODEL_NAME = "Fraud_Detection_Model"
+VALIDATION_THRESHOLD = 0.9
+CONFIG = {
+    "data": {
+        "test_size": 0.2,
+        "random_state": 42,
+        "target_col": "target"
+    },
+    "model": {
+        "type": "RandomForestClassifier",
+        "params": {
+            "n_estimators": 150,
+            "max_depth": 8,
+            "min_samples_split": 2,
+            "random_state": 42,
+            "class_weight": "balanced"
+        }
+    }
+}
+
+def load_and_validate_data():
+    data = pd.read_csv(DATA_PATH)
+    
+    # Check for any target column if not specified
+    target_col = CONFIG['data'].get('target_col', 'target')
+    if target_col not in data.columns:
+        # Try common target column names
+        for col in ['target', 'label', 'class']:
+            if col in data.columns:
+                target_col = col
+                break
+        else:
+            raise ValueError(f"No target column found in {DATA_PATH}")
+
+    # Adjust sample size check
+    min_samples = CONFIG['data'].get('min_samples', 10)
+    if len(data) < min_samples:
+        logger.warning(f"Dataset has only {len(data)} samples (min {min_samples})")
+    
+    class_counts = data[CONFIG['data']['target_col']].value_counts()
+    if len(class_counts) < 2:
+        raise ValueError(f"Need at least 2 classes, found {class_counts.index.tolist()}")
+    
+    if len(data) < CONFIG['data'].get('min_samples', 100):
+        logger.warning(f"Dataset small ({len(data)} samples)")
+    
+    return train_test_split(
+        data.drop(target_col, axis=1),
+        data[target_col],
+        test_size=CONFIG['data'].get('test_size', 0.2),
+        random_state=CONFIG['data'].get('random_state', 42)
+    )
+
+def evaluate_model(model, X_test, y_test):
+    y_pred = model.predict(X_test)
+    
+    if hasattr(model, "predict_proba"):
+        proba = model.predict_proba(X_test)
+        y_proba = proba[:, -1]
+    else:
+        y_proba = None
+    
+    report = classification_report(y_test, y_pred, output_dict=True, zero_division=0)
+    
+    metrics = {
+        "accuracy": accuracy_score(y_test, y_pred),
+        "f1_score": f1_score(y_test, y_pred, average='weighted'),
+        "precision": precision_score(y_test, y_pred, average='weighted'),
+        "recall": recall_score(y_test, y_pred, average='weighted'),
+        "classification_report": report  # Keep the full report
+    }
+    
+    class_metrics = {
+        f"class_{k}_{metric}": v 
+        for k, v in report.items() 
+        if isinstance(v, dict)
+        for metric, v in v.items()
+    }
+    metrics.update(class_metrics)
+    
+    if len(np.unique(y_test)) > 1 and y_proba is not None:
+        metrics["log_loss"] = log_loss(y_test, y_proba)
+    
+    return metrics, y_pred
+
+def setup_mlflow():
+    """Configure MLflow tracking"""
+    mlflow.set_tracking_uri(f"file:{str(BASE_DIR / 'mlruns')}")
+    mlflow.set_experiment("Fraud_Detection")
+    
+    # Enable autologging
+    mlflow.sklearn.autolog(
+        log_input_examples=True,
+        log_model_signatures=True,
+        log_models=True
+    )
+
+def register_and_promote_model(client, run_id, metrics):
+    """Handle model versioning and promotion"""
+    try:
+        # Get the newly created version
+        new_version = client.get_latest_versions(MODEL_NAME, stages=["None"])[0]
+        
+        # Add comprehensive metadata
+        client.set_model_version_tag(
+            name=MODEL_NAME,
+            version=new_version.version,
+            key="validation_status",
+            value="Pending"
+        )
+        
+        client.set_model_version_tag(
+            name=MODEL_NAME,
+            version=new_version.version,
+            key="deployment_ready",
+            value=str(metrics["accuracy"] >= VALIDATION_THRESHOLD).lower()
+        )
+        
+        # Evaluate promotion criteria
+        if metrics["accuracy"] >= VALIDATION_THRESHOLD:
+            promote_model(client, new_version, metrics)
+        else:
+            client.set_model_version_tag(
+                name=MODEL_NAME,
+                version=new_version.version,
+                key="validation_status",
+                value="Rejected"
+            )
+            logger.warning(f"Model accuracy {metrics['accuracy']:.2f} below threshold {VALIDATION_THRESHOLD}")
+            
+    except Exception as e:
+        logger.error(f"Model registration failed: {str(e)}")
+        raise
+
+def promote_model(client, new_version, metrics):
+    """Promote model through staging to production"""
+    try:
+        # Transition to Staging
+        client.transition_model_version_stage(
+            name=MODEL_NAME,
+            version=new_version.version,
+            stage="Staging"
+        )
+        client.set_registered_model_alias(
+            name=MODEL_NAME,
+            alias="Challenger",
+            version=new_version.version
+        )
+        
+        # Check against current champion
+        try:
+            champion_version = client.get_model_version_by_alias(MODEL_NAME, "Champion")
+            champion_run = client.get_run(champion_version.run_id)
+            champion_metrics = champion_run.data.metrics
+            
+            if metrics["accuracy"] > champion_metrics["accuracy"]:
+                # Archive old champion
+                client.transition_model_version_stage(
+                    name=MODEL_NAME,
+                    version=champion_version.version,
+                    stage="Archived"
+                )
+                
+                # Promote new champion
+                client.transition_model_version_stage(
+                    name=MODEL_NAME,
+                    version=new_version.version,
+                    stage="Production"
+                )
+                client.set_registered_model_alias(
+                    name=MODEL_NAME,
+                    alias="Champion",
+                    version=new_version.version
+                )
+                logger.info(f"New champion! Version {new_version.version} promoted to Production")
+                
+        except Exception as e:
+            logger.warning(f"No existing champion found: {str(e)}")
+            # First deployment - promote directly to Production
+            client.transition_model_version_stage(
+                name=MODEL_NAME,
+                version=new_version.version,
+                stage="Production"
+            )
+            client.set_registered_model_alias(
+                name=MODEL_NAME,
+                alias="Champion",
+                version=new_version.version
+            )
+            
+    except Exception as e:
+        logger.error(f"Model promotion failed: {str(e)}")
+        raise
+
+def train_and_register():
+    """End-to-end training and registration pipeline"""
+    try:
+        # Setup tracking
+        setup_mlflow()
+        
+        # Load data
+        X_train, X_test, y_train, y_test = load_and_validate_data()
+        
+        # Train model
+        with mlflow.start_run(run_name=f"challenger_{datetime.now().strftime('%Y%m%d_%H%M%S')}"):
+            # Log config
+            mlflow.log_dict(CONFIG, "config.json")
+            
+            # Initialize model
+            model = RandomForestClassifier(**CONFIG['model']['params'])
+            
+            # Train
+            model.fit(X_train, y_train)
+            
+            # Evaluate
+            metrics, y_pred = evaluate_model(model, X_test, y_test)
+            
+            # Log metrics
+            mlflow.log_metrics({
+                k: v for k, v in metrics.items() 
+                if not k.endswith('_report') and isinstance(v, (int, float))
+            })
+
+            if 'classification_report' in metrics:
+                mlflow.log_text(
+                    json.dumps(metrics['classification_report'], indent=2),
+                    "classification_report.json"
+                )
+            
+            # Log model
+            signature = infer_signature(X_train, y_pred)
+            mlflow.sklearn.log_model(
+                sk_model=model,
+                artifact_path="model",
+                signature=signature,
+                input_example=X_train.iloc[:1],
+                registered_model_name=MODEL_NAME
+            )
+            
+            # Register and promote
+            client = MlflowClient()
+            register_and_promote_model(client, mlflow.active_run().info.run_id, metrics)
+            
+    except Exception as e:
+        logger.error(f"Training pipeline failed: {str(e)}", exc_info=True)
+        raise
+
+if __name__ == "__main__":
+    train_and_register()
\ No newline at end of file
diff --git a/tests/__init__.py b/tests/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/tests/conftest.py b/tests/conftest.py
new file mode 100644
index 00000000..a1290976
--- /dev/null
+++ b/tests/conftest.py
@@ -0,0 +1,28 @@
+import pytest
+import pandas as pd
+import numpy as np
+from copy import deepcopy
+from src.train import CONFIG
+
+@pytest.fixture
+def sample_data():
+    """Generate test data with correct column names"""
+    return pd.DataFrame({
+        'feature1': np.random.normal(0, 1, 200),
+        'feature2': np.random.uniform(0, 1, 200),
+        'target': np.random.randint(0, 2, 200) 
+    })
+
+@pytest.fixture
+def original_config():
+    """Preserve original configuration"""
+    return deepcopy(CONFIG)
+
+@pytest.fixture(autouse=True)
+def restore_config(original_config):
+    """Auto-restore config after each test"""
+    yield
+    CONFIG.clear()
+    CONFIG.update(original_config)
+
+
diff --git a/tests/test_train.py b/tests/test_train.py
new file mode 100644
index 00000000..4e7b6542
--- /dev/null
+++ b/tests/test_train.py
@@ -0,0 +1,22 @@
+from src.train import load_and_validate_data
+
+# tests/test_train.py
+def test_data_loading(sample_data, tmp_path, monkeypatch):
+    # Save test data
+    test_path = tmp_path / "test_data.csv"
+    sample_data.to_csv(test_path, index=False)
+    
+    # Temporarily patch the configuration
+    from src.train import CONFIG, DATA_PATH
+    CONFIG['data']['target_col'] = 'target'  # Must match your fixture column
+    CONFIG['data']['min_samples'] = 5  # Lower threshold for tests
+    
+    # Use monkeypatch to safely modify DATA_PATH
+    monkeypatch.setattr('src.train.DATA_PATH', str(test_path))
+    
+    # Import AFTER patching
+    from src.train import load_and_validate_data
+    X_train, X_test, y_train, y_test = load_and_validate_data()
+    
+    assert len(X_train) > 0
+    assert len(y_test) > 0
\ No newline at end of file