Skip to content

Commit 97c8237

Browse files
committed
updated the project with automation of data_load,featurize,data_split, train and evaluate
1 parent 6f370b6 commit 97c8237

File tree

5 files changed

+251
-279
lines changed

5 files changed

+251
-279
lines changed

‎dvc.lock‎

Lines changed: 132 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,3 +18,135 @@ stages:
1818
md5: 4224576f0267bf88902f87f0f6200967
1919
size: 2757
2020
isexec: true
21+
featurize:
22+
cmd: python src/stages/featurize.py --config=params.yaml
23+
deps:
24+
- path: data/raw/iris.csv
25+
md5: 4224576f0267bf88902f87f0f6200967
26+
size: 2757
27+
- path: src/stages/featurize.py
28+
md5: d1cc78e9ae6c9a43099cf2b43e377975
29+
size: 1395
30+
params:
31+
params.yaml:
32+
base:
33+
random_state: 42
34+
log_level: INFO
35+
featurize:
36+
features_path: data/processed/featured_iris.csv
37+
target_column: target
38+
outs:
39+
- path: data/processed/featured_iris.csv
40+
md5: 5d03a1564b3038fc35a842f8e4bde491
41+
size: 7260
42+
isexec: true
43+
data_split:
44+
cmd: python src/stages/data_split.py --config=params.yaml
45+
deps:
46+
- path: data/processed/featured_iris.csv
47+
md5: 5d03a1564b3038fc35a842f8e4bde491
48+
size: 7260
49+
- path: src/stages/data_split.py
50+
md5: 146a803b3261f01f798da85b49cfe00e
51+
size: 1401
52+
params:
53+
params.yaml:
54+
base:
55+
random_state: 42
56+
log_level: INFO
57+
data_split:
58+
test_size: 0.2
59+
trainset_path: data/processed/train_iris.csv
60+
testset_path: data/processed/test_iris.csv
61+
featurize:
62+
features_path: data/processed/featured_iris.csv
63+
target_column: target
64+
outs:
65+
- path: data/processed/test_iris.csv
66+
md5: b5e45593a772fc66629488e1806505c4
67+
size: 1492
68+
isexec: true
69+
- path: data/processed/train_iris.csv
70+
md5: ed8a7e5ba0a211251bdee6c498fe3eb4
71+
size: 5724
72+
isexec: true
73+
train:
74+
cmd: python src/stages/train.py --config=params.yaml
75+
deps:
76+
- path: data/processed/test_iris.csv
77+
md5: b5e45593a772fc66629488e1806505c4
78+
size: 1492
79+
- path: data/processed/train_iris.csv
80+
md5: ed8a7e5ba0a211251bdee6c498fe3eb4
81+
size: 5724
82+
- path: src/stages/train.py
83+
md5: c8a0d71871c74e8abfa118bb165588f5
84+
size: 1490
85+
params:
86+
params.yaml:
87+
base:
88+
random_state: 42
89+
log_level: INFO
90+
train:
91+
cv: 3
92+
estimator_name: logreg
93+
estimators:
94+
logreg:
95+
param_grid:
96+
C:
97+
- 0.001
98+
max_iter:
99+
- 100
100+
solver:
101+
- lbfgs
102+
multi_class:
103+
- multinomial
104+
svm:
105+
param_grid:
106+
C:
107+
- 0.1
108+
- 1.0
109+
kernel:
110+
- rbf
111+
- linear
112+
gamma:
113+
- scale
114+
degree:
115+
- 3
116+
- 5
117+
model_path: models/model.joblib
118+
outs:
119+
- path: models/model.joblib
120+
md5: 485ee3fb7877070a51a6b07d07d6244c
121+
size: 2883
122+
isexec: true
123+
evaluate:
124+
cmd: python src/stages/evaluate.py --config=params.yaml
125+
deps:
126+
- path: data/processed/test_iris.csv
127+
md5: b5e45593a772fc66629488e1806505c4
128+
size: 1492
129+
- path: models/model.joblib
130+
md5: 485ee3fb7877070a51a6b07d07d6244c
131+
size: 2883
132+
- path: src/stages/evaluate.py
133+
md5: eab9636bc1bf222815f1941a3abfc99e
134+
size: 2492
135+
params:
136+
params.yaml:
137+
base:
138+
random_state: 42
139+
log_level: INFO
140+
evaluate:
141+
reports_dir: reports
142+
metrics_file: metrics.json
143+
confusion_matrix_image: confusion_matrix.png
144+
outs:
145+
- path: reports/confusion_matrix.png
146+
md5: 64609d4d2fe8d2718531f253d881dde6
147+
size: 24999
148+
isexec: true
149+
- path: reports/metrics.json
150+
md5: d533847a0ca14ca93752b1b1f1df349e
151+
size: 32
152+
isexec: true

‎dvc.yaml‎

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,6 @@
1+
# DAG of all the stages in the pipeline
12
stages:
3+
# The first stage of the pipeline
24
data_load:
35
cmd: python src/stages/data_load.py --config=params.yaml
46
deps:
@@ -8,3 +10,55 @@ stages:
810
- data_load
911
outs:
1012
- data/raw/iris.csv
13+
# The second stage of the pipeline
14+
featurize:
15+
cmd: python src/stages/featurize.py --config=params.yaml
16+
deps:
17+
- data/raw/iris.csv
18+
- src/stages/featurize.py
19+
params:
20+
- base
21+
- featurize
22+
outs:
23+
- data/processed/featured_iris.csv
24+
# The third stage of the pipeline
25+
data_split:
26+
cmd: python src/stages/data_split.py --config=params.yaml
27+
deps:
28+
- data/processed/featured_iris.csv
29+
- src/stages/data_split.py
30+
params:
31+
- base
32+
- data_split
33+
- featurize
34+
outs:
35+
- data/processed/test_iris.csv
36+
- data/processed/train_iris.csv
37+
# The fourth stage of the pipeline
38+
train:
39+
cmd: python src/stages/train.py --config=params.yaml
40+
deps:
41+
- data/processed/test_iris.csv
42+
- data/processed/train_iris.csv
43+
- src/stages/train.py
44+
params:
45+
- base
46+
- train
47+
outs:
48+
- models/model.joblib
49+
# The fifth stage of the pipeline
50+
evaluate:
51+
cmd: python src/stages/evaluate.py --config=params.yaml
52+
deps:
53+
- models/model.joblib
54+
- data/processed/test_iris.csv
55+
- src/stages/evaluate.py
56+
57+
params:
58+
- base
59+
- evaluate
60+
outs:
61+
- reports/metrics.json
62+
- reports/confusion_matrix.png
63+
64+

0 commit comments

Comments
 (0)