mrbestnaija
diff --git a/‎dvc.lock‎
Lines changed: 132 additions & 0 deletions b/‎dvc.lock‎
Lines changed: 132 additions & 0 deletions
diff --git a/‎dvc.yaml‎
Lines changed: 54 additions & 0 deletions b/‎dvc.yaml‎
Lines changed: 54 additions & 0 deletions
@@ -18,3 +18,135 @@ stages:
       md5: 4224576f0267bf88902f87f0f6200967
       size: 2757
       isexec: true
+  featurize:
+    cmd: python src/stages/featurize.py --config=params.yaml
+    deps:
+    - path: data/raw/iris.csv
+      md5: 4224576f0267bf88902f87f0f6200967
+      size: 2757
+    - path: src/stages/featurize.py
+      md5: d1cc78e9ae6c9a43099cf2b43e377975
+      size: 1395
+    params:
+      params.yaml:
+        base:
+          random_state: 42
+          log_level: INFO
+        featurize:
+          features_path: data/processed/featured_iris.csv
+          target_column: target
+    outs:
+    - path: data/processed/featured_iris.csv
+      md5: 5d03a1564b3038fc35a842f8e4bde491
+      size: 7260
+      isexec: true
+  data_split:
+    cmd: python src/stages/data_split.py --config=params.yaml
+    deps:
+    - path: data/processed/featured_iris.csv
+      md5: 5d03a1564b3038fc35a842f8e4bde491
+      size: 7260
+    - path: src/stages/data_split.py
+      md5: 146a803b3261f01f798da85b49cfe00e
+      size: 1401
+    params:
+      params.yaml:
+        base:
+          random_state: 42
+          log_level: INFO
+        data_split:
+          test_size: 0.2
+          trainset_path: data/processed/train_iris.csv
+          testset_path: data/processed/test_iris.csv
+        featurize:
+          features_path: data/processed/featured_iris.csv
+          target_column: target
+    outs:
+    - path: data/processed/test_iris.csv
+      md5: b5e45593a772fc66629488e1806505c4
+      size: 1492
+      isexec: true
+    - path: data/processed/train_iris.csv
+      md5: ed8a7e5ba0a211251bdee6c498fe3eb4
+      size: 5724
+      isexec: true
+  train:
+    cmd: python src/stages/train.py --config=params.yaml
+    deps:
+    - path: data/processed/test_iris.csv
+      md5: b5e45593a772fc66629488e1806505c4
+      size: 1492
+    - path: data/processed/train_iris.csv
+      md5: ed8a7e5ba0a211251bdee6c498fe3eb4
+      size: 5724
+    - path: src/stages/train.py
+      md5: c8a0d71871c74e8abfa118bb165588f5
+      size: 1490
+    params:
+      params.yaml:
+        base:
+          random_state: 42
+          log_level: INFO
+        train:
+          cv: 3
+          estimator_name: logreg
+          estimators:
+            logreg:
+              param_grid:
+                C:
+                - 0.001
+                max_iter:
+                - 100
+                solver:
+                - lbfgs
+                multi_class:
+                - multinomial
+            svm:
+              param_grid:
+                C:
+                - 0.1
+                - 1.0
+                kernel:
+                - rbf
+                - linear
+                gamma:
+                - scale
+                degree:
+                - 3
+                - 5
+          model_path: models/model.joblib
+    outs:
+    - path: models/model.joblib
+      md5: 485ee3fb7877070a51a6b07d07d6244c
+      size: 2883
+      isexec: true
+  evaluate:
+    cmd: python src/stages/evaluate.py --config=params.yaml
+    deps:
+    - path: data/processed/test_iris.csv
+      md5: b5e45593a772fc66629488e1806505c4
+      size: 1492
+    - path: models/model.joblib
+      md5: 485ee3fb7877070a51a6b07d07d6244c
+      size: 2883
+    - path: src/stages/evaluate.py
+      md5: eab9636bc1bf222815f1941a3abfc99e
+      size: 2492
+    params:
+      params.yaml:
+        base:
+          random_state: 42
+          log_level: INFO
+        evaluate:
+          reports_dir: reports
+          metrics_file: metrics.json
+          confusion_matrix_image: confusion_matrix.png
+    outs:
+    - path: reports/confusion_matrix.png
+      md5: 64609d4d2fe8d2718531f253d881dde6
+      size: 24999
+      isexec: true
+    - path: reports/metrics.json
+      md5: d533847a0ca14ca93752b1b1f1df349e
+      size: 32
+      isexec: true
@@ -1,4 +1,6 @@
+# DAG of all the stages in the pipeline
 stages:
+# The first stage of the pipeline
   data_load:
     cmd: python src/stages/data_load.py --config=params.yaml
     deps:
@@ -8,3 +10,55 @@ stages:
     - data_load
     outs:
     - data/raw/iris.csv
+# The second stage of the pipeline
+  featurize:
+    cmd: python src/stages/featurize.py --config=params.yaml
+    deps:
+    - data/raw/iris.csv
+    - src/stages/featurize.py
+    params:
+    - base
+    - featurize
+    outs:
+    - data/processed/featured_iris.csv
+# The third stage of the pipeline
+  data_split:
+    cmd: python src/stages/data_split.py --config=params.yaml
+    deps:
+    - data/processed/featured_iris.csv
+    - src/stages/data_split.py
+    params:
+    - base
+    - data_split
+    - featurize
+    outs:
+    - data/processed/test_iris.csv
+    - data/processed/train_iris.csv
+# The fourth stage of the pipeline
+  train:
+    cmd: python src/stages/train.py --config=params.yaml
+    deps:
+    - data/processed/test_iris.csv
+    - data/processed/train_iris.csv
+    - src/stages/train.py
+    params:
+    - base
+    - train
+    outs:
+    - models/model.joblib
+# The fifth stage of the pipeline
+  evaluate:
+    cmd: python src/stages/evaluate.py --config=params.yaml
+    deps:
+    - models/model.joblib
+    - data/processed/test_iris.csv
+    - src/stages/evaluate.py
+
+    params:
+    - base
+    - evaluate
+    outs:
+    - reports/metrics.json
+    - reports/confusion_matrix.png
+    
+