1+ import scripts .hotword_detection as hotword_detection
2+ import scripts .input as input
3+ import scripts .recognition as recognition
4+
5+ import struct
6+ import numpy as np
7+ import scipy .io .wavfile
8+ from abc import ABCMeta , abstractmethod
9+
10+ import torch
11+
12+ WHISPER_RATE = 44100
13+ WHISPER_FRAME_LENGTH = 1024
14+
15+ class Strategy (metaclass = ABCMeta ):
16+ @abstractmethod
17+ def read (self ):
18+ pass
19+
20+ @abstractmethod
21+ def recognize (self ):
22+ pass
23+
24+ class WaitWakeupWord ():
25+ def __init__ (self , access_key , keyword_path ):
26+ self .wakeup = hotword_detection .WakeupWordDetection (access_key , keyword_path )
27+ self .mike = input .Microphone ()
28+
29+ def initialize (self ):
30+ self .mike .initialize (self .wakeup .handle .sample_rate , input .pyaudio .paInt16 , self .wakeup .handle .frame_length )
31+
32+ def read (self ):
33+ audio = self .mike .read (self .wakeup .handle .frame_length )
34+ self .pcm = struct .unpack_from ("h" * self .wakeup .handle .frame_length , audio )
35+
36+ def recognize (self ):
37+ result = self .wakeup .handle .process (self .pcm )
38+ if result >= 0 :
39+ return False , "Wake"
40+ else :
41+ return False , "Sleep"
42+
43+ class WakeupWordDetected ():
44+ def __init__ (self , modelsize , recoding_time ):
45+ self .frames = []
46+ self .counter = 0
47+ self .mike = input .Microphone ()
48+ self .whis = recognition .Whisper (modelsize )
49+ self .recoding_time = int (WHISPER_RATE * recoding_time / WHISPER_FRAME_LENGTH )
50+
51+ def initialize (self ):
52+ self .frames = []
53+ self .counter = 0
54+ self .mike .initialize (WHISPER_RATE , input .pyaudio .paFloat32 , WHISPER_FRAME_LENGTH )
55+
56+ def read (self ):
57+ self .counter += 1
58+ audio = self .mike .read (WHISPER_FRAME_LENGTH )
59+ d = np .frombuffer (audio , dtype = np .float32 )
60+ self .frames = np .append (self .frames , d )
61+
62+ def recognize (self ):
63+ if self .counter == self .recoding_time :
64+ self .frames = np .array (self .frames ).flatten ()
65+ scipy .io .wavfile .write ("temp.wav" , WHISPER_RATE , self .frames )
66+ return True , self .whis .recognize ()['text' ]
67+ else :
68+ return False , "On recording..."
69+
70+ class Recognizer :
71+ def __init__ (self , access_key , keyword_path , modelsize , recoding_time ):
72+ self .wakeup_word_detection = WaitWakeupWord (access_key , keyword_path )
73+ self .whisper = WakeupWordDetected (modelsize , recoding_time )
74+ self .strategy = self .wakeup_word_detection
75+
76+ def change_recognizer (self , recognizer ):
77+ if recognizer == 'WakeupWordDetection' :
78+ self .strategy = self .wakeup_word_detection
79+ elif recognizer == 'Whisper' :
80+ self .strategy = self .whisper
81+ else :
82+ pass
83+
84+ def initialize (self ):
85+ self .strategy .initialize ()
86+
87+ def read (self ):
88+ self .strategy .read ()
89+
90+ def recognize (self ):
91+ return self .strategy .recognize ()
0 commit comments