Skip to content

Commit ab6052a

Browse files
initial commit
1 parent 84f6f6d commit ab6052a

File tree

6 files changed

+241
-1
lines changed

6 files changed

+241
-1
lines changed

‎README.md‎

Lines changed: 80 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,81 @@
11
# AlexaLikeWhisper
2-
Implement of audio recognition "Whisper" released by OpenAI triggered on Wakeup word detection
2+
Implement of audio speech recognition "Whisper" released by OpenAI triggered on Wakeup word detection
3+
4+
# Demo
5+
![AlexaLikeWhisper](https://www.techlife-hacking.com/wp-content/uploads/2022/10/whisper.gif)
6+
After detected wakeup words, whisper recognizes audio speech like Alexa!
7+
Using recognized words, you can control avatar robots or IoT...etc!
8+
9+
# System
10+
![System](https://www.techlife-hacking.com/wp-content/uploads/2022/10/whisper_en.png)
11+
Users : Say wakeup words like "Hey, Siri" and some speech
12+
PC : Input audio speech with a microphone and recognize it with whisper
13+
IoT : Using recognized words, do tasks
14+
15+
16+
# PC Spec
17+
OS : Ubuntu 20.04
18+
GPU : Geforce RTX 2080Ti
19+
# Setup
20+
## PC
21+
### Install build dependencies
22+
install pytorch
23+
Install Pytorch with matching GPU, CUDA and cuDNN versions.
24+
[Pytorch](https://www.techlife-hacking.com/?p=1325)
25+
26+
```
27+
# install transformers
28+
pip install transformers
29+
30+
# install whisper
31+
sudo apt update && sudo apt install ffmpeg
32+
pip install git+https://github.com/openai/whisper.git
33+
34+
# install pyaudio
35+
sudo apt-get install portaudio19-dev
36+
pip install pyaudio
37+
38+
# install pvporcupine
39+
pip install pvporcupine
40+
```
41+
42+
To use pvporcupine, you need to register to [PICOVOICE](https://console.picovoice.ai/) and get a API Key.
43+
And download a model file(.ppn) and place it in AlexaLikeWhisper/model.
44+
45+
# Usage
46+
```
47+
# get source of alexa like whisper
48+
git clone https://github.com/tech-life-hacking/AlexaLikeWhisper.git
49+
```
50+
Place a model file(.ppn) in AlexaLikeWhisper/model.
51+
52+
```python
53+
import os
54+
import sys
55+
56+
sys.path.append(os.path.join(os.path.dirname(__file__), 'AlexaLikeWhisper'))
57+
import scripts.alexa_like_whisper as alexa_like_whisper
58+
59+
if __name__ == "__main__":
60+
# Modelsizes on whisper
61+
MODELSIZES = ['tiny', 'base', 'small', 'medium', 'large']
62+
63+
# AccessKey obtained from Picovoice Console (https://console.picovoice.ai/)
64+
ACCESS_KEY = "YOUR_ACCESS_KEY"
65+
KEYWORD_PATH = ['PPN_FILE_PATH']
66+
67+
# Recording Time(s)
68+
RECORDING_TIME = 3
69+
70+
alexa_like = alexa_like_whisper.AlexaLikeWhisper(ACCESS_KEY, KEYWORD_PATH, MODELSIZES[3], RECORDING_TIME)
71+
72+
while True:
73+
result = alexa_like.run()
74+
print(result)
75+
76+
```
77+
78+
result shows
79+
- Waiting wakeup words : "Sleep"
80+
- After detected wakeup words and on recording : "On recording..."
81+
- When recognizing audio speech : the result

‎scripts/alexa_like_whisper.py‎

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
import scripts.strategy as strategy
2+
import time
3+
4+
class AlexaLikeWhisper():
5+
def __init__(self, access_key, keyword_path, modelsize, recoding_time=3):
6+
# initialize
7+
self.context = strategy.Recognizer(access_key, keyword_path, modelsize, recoding_time)
8+
self.context.initialize()
9+
10+
def run(self):
11+
self.context.read()
12+
fin_flag, result = self.context.recognize()
13+
14+
# get event
15+
if result == 'Wake':
16+
event = 'Whisper'
17+
self.context.change_recognizer(event)
18+
self.context.initialize()
19+
20+
if fin_flag:
21+
event = 'WakeupWordDetection'
22+
self.context.change_recognizer(event)
23+
self.context.initialize()
24+
25+
return result

‎scripts/hotword_detection.py‎

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
import pvporcupine
2+
3+
class WakeupWordDetection():
4+
def __init__(self, access_key, keyword_paths):
5+
# AccessKey obtained from Picovoice Console (https://console.picovoice.ai/)
6+
access_key = access_key
7+
self.handle = pvporcupine.create(access_key=access_key, keyword_paths=keyword_paths)
8+
9+
def detect(self, pcm):
10+
return self.handle.process(pcm)

‎scripts/input.py‎

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
import pyaudio
2+
3+
class Microphone():
4+
def __init__(self):
5+
self.pa = pyaudio.PyAudio()
6+
device = self.pa.get_default_output_device_info()
7+
self.audio_stream = self.pa.open(
8+
rate=int(device['defaultSampleRate']),
9+
channels=1,
10+
format=pyaudio.paInt16,
11+
input=True)
12+
13+
def initialize(self, rate, format, frame_length):
14+
# initialize
15+
self.audio_stream.stop_stream()
16+
self.audio_stream.close()
17+
self.audio_stream = self.pa.open(
18+
rate=rate,
19+
channels=1,
20+
format=format,
21+
input=True,
22+
frames_per_buffer=frame_length)
23+
24+
def read(self, frame_length):
25+
return self.audio_stream.read(frame_length)

‎scripts/recognition.py‎

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
import whisper
2+
3+
class Whisper():
4+
def __init__(self, loaded_model):
5+
# Parameter on whisper
6+
self.model = whisper.load_model(loaded_model)
7+
print('Finished loading model')
8+
9+
def recognize(self):
10+
return self.model.transcribe("temp.wav", language="ja")

‎scripts/strategy.py‎

Lines changed: 91 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,91 @@
1+
import scripts.hotword_detection as hotword_detection
2+
import scripts.input as input
3+
import scripts.recognition as recognition
4+
5+
import struct
6+
import numpy as np
7+
import scipy.io.wavfile
8+
from abc import ABCMeta, abstractmethod
9+
10+
import torch
11+
12+
WHISPER_RATE = 44100
13+
WHISPER_FRAME_LENGTH = 1024
14+
15+
class Strategy(metaclass=ABCMeta):
16+
@abstractmethod
17+
def read(self):
18+
pass
19+
20+
@abstractmethod
21+
def recognize(self):
22+
pass
23+
24+
class WaitWakeupWord():
25+
def __init__(self, access_key, keyword_path):
26+
self.wakeup = hotword_detection.WakeupWordDetection(access_key, keyword_path)
27+
self.mike = input.Microphone()
28+
29+
def initialize(self):
30+
self.mike.initialize(self.wakeup.handle.sample_rate, input.pyaudio.paInt16, self.wakeup.handle.frame_length)
31+
32+
def read(self):
33+
audio = self.mike.read(self.wakeup.handle.frame_length)
34+
self.pcm = struct.unpack_from("h" * self.wakeup.handle.frame_length, audio)
35+
36+
def recognize(self):
37+
result = self.wakeup.handle.process(self.pcm)
38+
if result >= 0:
39+
return False, "Wake"
40+
else:
41+
return False, "Sleep"
42+
43+
class WakeupWordDetected():
44+
def __init__(self, modelsize, recoding_time):
45+
self.frames = []
46+
self.counter = 0
47+
self.mike = input.Microphone()
48+
self.whis = recognition.Whisper(modelsize)
49+
self.recoding_time = int(WHISPER_RATE*recoding_time/WHISPER_FRAME_LENGTH)
50+
51+
def initialize(self):
52+
self.frames = []
53+
self.counter = 0
54+
self.mike.initialize(WHISPER_RATE, input.pyaudio.paFloat32, WHISPER_FRAME_LENGTH)
55+
56+
def read(self):
57+
self.counter += 1
58+
audio = self.mike.read(WHISPER_FRAME_LENGTH)
59+
d = np.frombuffer(audio, dtype=np.float32)
60+
self.frames = np.append(self.frames, d)
61+
62+
def recognize(self):
63+
if self.counter == self.recoding_time:
64+
self.frames = np.array(self.frames).flatten()
65+
scipy.io.wavfile.write("temp.wav", WHISPER_RATE, self.frames)
66+
return True, self.whis.recognize()['text']
67+
else:
68+
return False, "On recording..."
69+
70+
class Recognizer:
71+
def __init__(self, access_key, keyword_path, modelsize, recoding_time):
72+
self.wakeup_word_detection = WaitWakeupWord(access_key, keyword_path)
73+
self.whisper = WakeupWordDetected(modelsize, recoding_time)
74+
self.strategy = self.wakeup_word_detection
75+
76+
def change_recognizer(self, recognizer):
77+
if recognizer == 'WakeupWordDetection':
78+
self.strategy = self.wakeup_word_detection
79+
elif recognizer == 'Whisper':
80+
self.strategy = self.whisper
81+
else:
82+
pass
83+
84+
def initialize(self):
85+
self.strategy.initialize()
86+
87+
def read(self):
88+
self.strategy.read()
89+
90+
def recognize(self):
91+
return self.strategy.recognize()

0 commit comments

Comments
 (0)