Skip to content

Commit 73464b1

Browse files
committed
update app
1 parent a9dfd5b commit 73464b1

File tree

4 files changed

+27
-6
lines changed

4 files changed

+27
-6
lines changed

‎.gitignore‎

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,8 @@ ctcdecode/
33
wandb/
44
yttm*
55

6+
cache/
7+
68
# Byte-compiled / optimized / DLL files
79
__pycache__/
810
*.py[cod]

‎demo/app.py‎

Lines changed: 20 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -163,7 +163,6 @@ def action(file_uploader, transformations):
163163
st.balloons()
164164

165165

166-
167166
def recognize(file_path, audio):
168167
ds = {}
169168
ds["speech"] = audio
@@ -172,12 +171,14 @@ def recognize(file_path, audio):
172171
input_values = processor(ds["speech"], return_tensors="pt", padding="longest").input_values # Batch size 1
173172

174173
# retrieve logits
175-
logits = model(input_values).logits
174+
logits = model(input_values).logits[0]
176175

177176
# take argmax and decode
178-
predicted_ids = torch.argmax(logits, dim=-1)
179-
transcription = processor.batch_decode(predicted_ids)
180-
return transcription[0]
177+
# predicted_ids = torch.argmax(logits, dim=-1)
178+
# transcription = processor.batch_decode(predicted_ids)
179+
transcription = ngram_lm_model.decode(logits.cpu().detach().numpy(), beam_width=500)
180+
181+
return transcription
181182

182183

183184
def main():
@@ -188,7 +189,19 @@ def main():
188189
"Once you have chosen augmentation techniques, select or upload an audio file\n. "
189190
'Then click "Apply" to start! \n\n'
190191
)
192+
191193
if True:
194+
col1, col2, col3 = st.columns([1,9,1])
195+
196+
with col1:
197+
st.write("")
198+
199+
with col2:
200+
st.image("demo/assets/demoo.gif")
201+
202+
with col3:
203+
st.write("")
204+
192205
st.subheader("Team members:")
193206
members = '''
194207
Pham Hung Manh\n
@@ -197,7 +210,9 @@ def main():
197210
Nguyen Nhu Toan\n
198211
Ho Nguyen Khang\n'''
199212
st.markdown(members)
213+
200214
st.success("Manh Ph")
215+
# st.sidebar.image("demo/assets/demoo.gif")
201216
st.sidebar.markdown("Choose the transformations here:")
202217
gaussian_noise = st.sidebar.checkbox("GaussianNoise")
203218
frequency_mask = st.sidebar.checkbox("FrequencyMask")

‎demo/assets/demoo.gif‎

138 KB
Loading

‎requirements.txt‎

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,4 +8,8 @@ tqdm
88
librosa
99
python-Levenshtein
1010
audiomentations
11-
pytorch_warmup
11+
pytorch_warmup
12+
kenlm
13+
datasets
14+
transformers
15+
pyctcdecode

0 commit comments

Comments
 (0)