I implemented speech recognition using the new SpeechKit framework.
However, I cannot find any guidance on how to implement automatic detection of the end of speech.
Does anyone know if this is really possible in some way?
func startRecording() { // check if recognition task is running if recognitionTask != nil { recognitionTask?.cancel() recognitionTask = nil } // create an audio session for the audio recording let audioSession = AVAudioSession.sharedInstance() do { try audioSession.setCategory(AVAudioSessionCategoryRecord) // recording try audioSession.setMode(AVAudioSessionModeMeasurement) // measurement try audioSession.setActive(true, with: .notifyOthersOnDeactivation) } catch { print("audioSession properties weren't set because of an error") } // this will be sent to the Apple servers recognitionRequest = SFSpeechAudioBufferRecognitionRequest() // check if the device has an audio input for recording guard let inputNode = audioEngine.inputNode else { fatalError("Audio engine has no input node") } guard let recognitionRequest = recognitionRequest else { fatalError("Unable to create an SFSpeechAudioBufferRecognitionRequest object") } // also partial results will be reported recognitionRequest.shouldReportPartialResults = true // start recognition, and finally return a final transcript recognitionTask = speechRecognizer?.recognitionTask(with: recognitionRequest, resultHandler: { result, error in var isFinal = false if result != nil { self.textView.text = result?.bestTranscription.formattedString isFinal = (result?.isFinal)! } // if there is no error, or result is final, stop everything if error != nil || isFinal { self.audioEngine.stop() inputNode.removeTap(onBus: 0) self.recognitionRequest = nil self.recognitionTask = nil self.microphoneButton.isEnabled = true } }) let recordingFormat = inputNode.outputFormat(forBus: 0) // add an audio input to the request inputNode.installTap(onBus: 0, bufferSize: 1024, format: recordingFormat) { buffer, when in self.recognitionRequest?.append(buffer) } // prepare and start the audio engine audioEngine.prepare() do { try audioEngine.start() } catch { print("audioEngine couldn't start because of an error.") } textView.text = "Say something, I'm listening!" } }
source share