tech-life-hacking
diff --git a/‎README.md‎
Lines changed: 80 additions & 1 deletion b/‎README.md‎
Lines changed: 80 additions & 1 deletion
diff --git a/‎scripts/alexa_like_whisper.py‎
Lines changed: 25 additions & 0 deletions b/‎scripts/alexa_like_whisper.py‎
Lines changed: 25 additions & 0 deletions
diff --git a/‎scripts/hotword_detection.py‎
Lines changed: 10 additions & 0 deletions b/‎scripts/hotword_detection.py‎
Lines changed: 10 additions & 0 deletions
diff --git a/‎scripts/input.py‎
Lines changed: 25 additions & 0 deletions b/‎scripts/input.py‎
Lines changed: 25 additions & 0 deletions
diff --git a/‎scripts/recognition.py‎
Lines changed: 10 additions & 0 deletions b/‎scripts/recognition.py‎
Lines changed: 10 additions & 0 deletions
diff --git a/‎scripts/strategy.py‎
Lines changed: 91 additions & 0 deletions b/‎scripts/strategy.py‎
Lines changed: 91 additions & 0 deletions
@@ -1,2 +1,81 @@
 # AlexaLikeWhisper
-Implement of audio recognition "Whisper" released by OpenAI triggered on Wakeup word detection
+Implement of audio speech recognition "Whisper" released by OpenAI triggered on Wakeup word detection
+
+# Demo
+![AlexaLikeWhisper](https://www.techlife-hacking.com/wp-content/uploads/2022/10/whisper.gif)  
+After detected wakeup words, whisper recognizes audio speech like Alexa!  
+Using recognized words, you can control avatar robots or IoT...etc!  
+
+# System
+![System](https://www.techlife-hacking.com/wp-content/uploads/2022/10/whisper_en.png)  
+Users : Say wakeup words like "Hey, Siri" and some speech  
+PC : Input audio speech with a microphone and recognize it with whisper  
+IoT : Using recognized words, do tasks  
+
+
+# PC Spec
+OS : Ubuntu 20.04  
+GPU : Geforce RTX 2080Ti  
+# Setup
+## PC
+### Install build dependencies
+install pytorch  
+Install Pytorch with matching GPU, CUDA and cuDNN versions.  
+[Pytorch](https://www.techlife-hacking.com/?p=1325)  
+
+```
+# install transformers
+pip install transformers
+
+# install whisper
+sudo apt update && sudo apt install ffmpeg
+pip install git+https://github.com/openai/whisper.git
+
+# install pyaudio
+sudo apt-get install portaudio19-dev
+pip install pyaudio
+
+# install pvporcupine
+pip install pvporcupine
+```
+
+To use pvporcupine, you need to register to [PICOVOICE](https://console.picovoice.ai/) and get a API Key.  
+And download a model file(.ppn) and place it in AlexaLikeWhisper/model.  
+
+# Usage
+```
+# get source of alexa like whisper
+git clone https://github.com/tech-life-hacking/AlexaLikeWhisper.git
+```
+Place a model file(.ppn) in AlexaLikeWhisper/model.  
+
+```python
+import os
+import sys
+
+sys.path.append(os.path.join(os.path.dirname(__file__), 'AlexaLikeWhisper'))
+import scripts.alexa_like_whisper as alexa_like_whisper
+
+if __name__ == "__main__":
+    # Modelsizes on whisper
+    MODELSIZES = ['tiny', 'base', 'small', 'medium', 'large']
+
+    # AccessKey obtained from Picovoice Console (https://console.picovoice.ai/)
+    ACCESS_KEY = "YOUR_ACCESS_KEY"
+    KEYWORD_PATH = ['PPN_FILE_PATH']
+
+    # Recording Time(s)
+    RECORDING_TIME = 3
+
+    alexa_like = alexa_like_whisper.AlexaLikeWhisper(ACCESS_KEY, KEYWORD_PATH, MODELSIZES[3], RECORDING_TIME)
+
+    while True:
+        result = alexa_like.run()
+        print(result)
+
+```
+
+result shows  
+- Waiting wakeup words : "Sleep"
+- After detected wakeup words and on recording : "On recording..."
+- When recognizing audio speech : the result
@@ -0,0 +1,25 @@
+import scripts.strategy as strategy
+import time
+
+class AlexaLikeWhisper():
+    def __init__(self, access_key, keyword_path, modelsize, recoding_time=3):
+        # initialize
+        self.context = strategy.Recognizer(access_key, keyword_path, modelsize, recoding_time)
+        self.context.initialize()
+
+    def run(self):
+        self.context.read()
+        fin_flag, result = self.context.recognize()
+
+        # get event
+        if result == 'Wake':
+            event = 'Whisper'
+            self.context.change_recognizer(event)
+            self.context.initialize()
+
+        if fin_flag:
+            event = 'WakeupWordDetection'
+            self.context.change_recognizer(event)
+            self.context.initialize()
+
+        return result
@@ -0,0 +1,10 @@
+import pvporcupine
+
+class WakeupWordDetection():
+    def __init__(self, access_key, keyword_paths):
+        # AccessKey obtained from Picovoice Console (https://console.picovoice.ai/)
+        access_key = access_key
+        self.handle = pvporcupine.create(access_key=access_key, keyword_paths=keyword_paths)
+
+    def detect(self, pcm):
+        return self.handle.process(pcm)
@@ -0,0 +1,25 @@
+import pyaudio
+
+class Microphone():
+    def __init__(self):
+        self.pa = pyaudio.PyAudio()
+        device = self.pa.get_default_output_device_info()
+        self.audio_stream = self.pa.open(
+            rate=int(device['defaultSampleRate']),
+            channels=1,
+            format=pyaudio.paInt16,
+            input=True)
+
+    def initialize(self, rate, format, frame_length):
+        # initialize
+        self.audio_stream.stop_stream()
+        self.audio_stream.close()
+        self.audio_stream = self.pa.open(
+            rate=rate,
+            channels=1,
+            format=format,
+            input=True,
+            frames_per_buffer=frame_length)
+
+    def read(self, frame_length):
+        return self.audio_stream.read(frame_length)
@@ -0,0 +1,10 @@
+import whisper
+
+class Whisper():
+    def __init__(self, loaded_model):
+        # Parameter on whisper
+        self.model = whisper.load_model(loaded_model)
+        print('Finished loading model')
+
+    def recognize(self):
+        return self.model.transcribe("temp.wav", language="ja")
@@ -0,0 +1,91 @@
+import scripts.hotword_detection as hotword_detection
+import scripts.input as input
+import scripts.recognition as recognition
+
+import struct
+import numpy as np
+import scipy.io.wavfile
+from abc import ABCMeta, abstractmethod
+
+import torch
+
+WHISPER_RATE = 44100
+WHISPER_FRAME_LENGTH = 1024
+
+class Strategy(metaclass=ABCMeta):
+    @abstractmethod
+    def read(self):
+        pass
+
+    @abstractmethod
+    def recognize(self):
+        pass
+
+class WaitWakeupWord():
+    def __init__(self, access_key, keyword_path):
+        self.wakeup = hotword_detection.WakeupWordDetection(access_key, keyword_path)
+        self.mike = input.Microphone()
+
+    def initialize(self):
+        self.mike.initialize(self.wakeup.handle.sample_rate, input.pyaudio.paInt16, self.wakeup.handle.frame_length)
+
+    def read(self):
+        audio = self.mike.read(self.wakeup.handle.frame_length)
+        self.pcm = struct.unpack_from("h" * self.wakeup.handle.frame_length, audio)
+
+    def recognize(self):
+        result = self.wakeup.handle.process(self.pcm)
+        if result >= 0:
+            return False, "Wake"
+        else:
+            return False, "Sleep"
+
+class WakeupWordDetected():
+    def __init__(self, modelsize, recoding_time):
+        self.frames = []
+        self.counter = 0
+        self.mike = input.Microphone()
+        self.whis = recognition.Whisper(modelsize)
+        self.recoding_time = int(WHISPER_RATE*recoding_time/WHISPER_FRAME_LENGTH)
+
+    def initialize(self):
+        self.frames = []
+        self.counter = 0
+        self.mike.initialize(WHISPER_RATE, input.pyaudio.paFloat32, WHISPER_FRAME_LENGTH)
+
+    def read(self):
+        self.counter += 1
+        audio = self.mike.read(WHISPER_FRAME_LENGTH)
+        d = np.frombuffer(audio, dtype=np.float32)
+        self.frames = np.append(self.frames, d)
+
+    def recognize(self):
+        if self.counter == self.recoding_time:
+            self.frames = np.array(self.frames).flatten()
+            scipy.io.wavfile.write("temp.wav", WHISPER_RATE, self.frames)
+            return True, self.whis.recognize()['text']
+        else:
+            return False, "On recording..."
+
+class Recognizer:
+    def __init__(self, access_key, keyword_path, modelsize, recoding_time):
+        self.wakeup_word_detection = WaitWakeupWord(access_key, keyword_path)
+        self.whisper = WakeupWordDetected(modelsize, recoding_time)
+        self.strategy = self.wakeup_word_detection
+
+    def change_recognizer(self, recognizer):
+        if recognizer == 'WakeupWordDetection':
+            self.strategy = self.wakeup_word_detection
+        elif recognizer == 'Whisper':
+            self.strategy = self.whisper
+        else:
+            pass
+
+    def initialize(self):
+        self.strategy.initialize()
+
+    def read(self):
+        self.strategy.read()
+
+    def recognize(self):
+        return self.strategy.recognize()