Skip to content

Commit 7435eb5

Browse files
authored
feat: upgrade speech models to v2 specification (#6798)
## background transcription model implementations updated to `specificationVersion: 'v2'` and updated the `SpeechModelV2` ## summary - update all speech models to use v2 specification version - create v2 speech model type definitions ## verification - type errors resolved for all speech providers - specification versions now match interface requirements ## tasks - [x] update specificationVersion to 'v2' in all speech model implementations - [x] create v2 speech model types and interfaces - [x] update provider exports to use v2 types
1 parent 159f418 commit 7435eb5

22 files changed

+93
-88
lines changed

‎.changeset/odd-peaches-beam.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
---
2+
'@ai-sdk/provider': major
3+
---
4+
5+
feat: upgrade speech models to v2 specification

‎content/docs/07-reference/01-ai-sdk-core/12-generate-speech.mdx

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ console.log(audio);
3737
content={[
3838
{
3939
name: 'model',
40-
type: 'SpeechModelV1',
40+
type: 'SpeechModelV2',
4141
description: 'The speech model to use.',
4242
},
4343
{

‎packages/ai/core/generate-speech/generate-speech.test.ts

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,9 @@
11
import {
22
JSONValue,
3-
SpeechModelV1,
4-
SpeechModelV1CallWarning,
3+
SpeechModelV2,
4+
SpeechModelV2CallWarning,
55
} from '@ai-sdk/provider';
6-
import { MockSpeechModelV1 } from '../test/mock-speech-model-v1';
6+
import { MockSpeechModelV2 } from '../test/mock-speech-model-v2';
77
import { generateSpeech } from './generate-speech';
88
import {
99
GeneratedAudioFile,
@@ -21,7 +21,7 @@ const sampleText = 'This is a sample text to convert to speech.';
2121

2222
const createMockResponse = (options: {
2323
audio: GeneratedAudioFile;
24-
warnings?: SpeechModelV1CallWarning[];
24+
warnings?: SpeechModelV2CallWarning[];
2525
timestamp?: Date;
2626
modelId?: string;
2727
headers?: Record<string, string>;
@@ -42,10 +42,10 @@ describe('generateSpeech', () => {
4242
const abortController = new AbortController();
4343
const abortSignal = abortController.signal;
4444

45-
let capturedArgs!: Parameters<SpeechModelV1['doGenerate']>[0];
45+
let capturedArgs!: Parameters<SpeechModelV2['doGenerate']>[0];
4646

4747
await generateSpeech({
48-
model: new MockSpeechModelV1({
48+
model: new MockSpeechModelV2({
4949
doGenerate: async args => {
5050
capturedArgs = args;
5151
return createMockResponse({
@@ -73,7 +73,7 @@ describe('generateSpeech', () => {
7373

7474
it('should return warnings', async () => {
7575
const result = await generateSpeech({
76-
model: new MockSpeechModelV1({
76+
model: new MockSpeechModelV2({
7777
doGenerate: async () =>
7878
createMockResponse({
7979
audio: mockFile,
@@ -103,7 +103,7 @@ describe('generateSpeech', () => {
103103

104104
it('should return the audio data', async () => {
105105
const result = await generateSpeech({
106-
model: new MockSpeechModelV1({
106+
model: new MockSpeechModelV2({
107107
doGenerate: async () =>
108108
createMockResponse({
109109
audio: mockFile,
@@ -130,7 +130,7 @@ describe('generateSpeech', () => {
130130
it('should throw NoSpeechGeneratedError when no audio is returned', async () => {
131131
await expect(
132132
generateSpeech({
133-
model: new MockSpeechModelV1({
133+
model: new MockSpeechModelV2({
134134
doGenerate: async () =>
135135
createMockResponse({
136136
audio: new DefaultGeneratedAudioFile({
@@ -157,7 +157,7 @@ describe('generateSpeech', () => {
157157
it('should include response headers in error when no audio generated', async () => {
158158
await expect(
159159
generateSpeech({
160-
model: new MockSpeechModelV1({
160+
model: new MockSpeechModelV2({
161161
doGenerate: async () =>
162162
createMockResponse({
163163
audio: new DefaultGeneratedAudioFile({
@@ -192,7 +192,7 @@ describe('generateSpeech', () => {
192192
const testHeaders = { 'x-test': 'value' };
193193

194194
const result = await generateSpeech({
195-
model: new MockSpeechModelV1({
195+
model: new MockSpeechModelV2({
196196
doGenerate: async () =>
197197
createMockResponse({
198198
audio: mockFile,

‎packages/ai/core/generate-speech/generate-speech.ts

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
import { JSONValue, SpeechModelV1 } from '@ai-sdk/provider';
1+
import { JSONValue, SpeechModelV2 } from '@ai-sdk/provider';
22
import { NoSpeechGeneratedError } from '../../src/error/no-speech-generated-error';
33
import {
44
audioMediaTypeSignatures,
@@ -46,7 +46,7 @@ export async function generateSpeech({
4646
/**
4747
The speech model to use.
4848
*/
49-
model: SpeechModelV1;
49+
model: SpeechModelV2;
5050

5151
/**
5252
The text to convert to speech.

‎packages/ai/core/test/mock-speech-model-v1.ts

Lines changed: 0 additions & 24 deletions
This file was deleted.
Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
import { SpeechModelV2 } from '@ai-sdk/provider';
2+
import { notImplemented } from './not-implemented';
3+
4+
export class MockSpeechModelV2 implements SpeechModelV2 {
5+
readonly specificationVersion = 'v2';
6+
readonly provider: SpeechModelV2['provider'];
7+
readonly modelId: SpeechModelV2['modelId'];
8+
9+
doGenerate: SpeechModelV2['doGenerate'];
10+
11+
constructor({
12+
provider = 'mock-provider',
13+
modelId = 'mock-model-id',
14+
doGenerate = notImplemented,
15+
}: {
16+
provider?: SpeechModelV2['provider'];
17+
modelId?: SpeechModelV2['modelId'];
18+
doGenerate?: SpeechModelV2['doGenerate'];
19+
} = {}) {
20+
this.provider = provider;
21+
this.modelId = modelId;
22+
this.doGenerate = doGenerate;
23+
}
24+
}

‎packages/ai/core/transcribe/transcribe.test.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ import {
33
TranscriptionModelV2,
44
TranscriptionModelV2CallWarning,
55
} from '@ai-sdk/provider';
6-
import { MockTranscriptionModelV2 } from '../test/mock-transcription-model-v1';
6+
import { MockTranscriptionModelV2 } from '../test/mock-transcription-model-v2';
77
import { transcribe } from './transcribe';
88

99
const audioData = new Uint8Array([1, 2, 3, 4]); // Sample audio data
Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,12 @@
1-
import { SpeechModelV1, SpeechModelV1CallWarning } from '@ai-sdk/provider';
1+
import { SpeechModelV2, SpeechModelV2CallWarning } from '@ai-sdk/provider';
22

33
/**
44
Speech model that is used by the AI SDK Core functions.
55
*/
6-
export type SpeechModel = SpeechModelV1;
6+
export type SpeechModel = SpeechModelV2;
77

88
/**
99
Warning from the model provider for this call. The call will proceed, but e.g.
1010
some settings might not be supported, which can lead to suboptimal results.
1111
*/
12-
export type SpeechWarning = SpeechModelV1CallWarning;
12+
export type SpeechWarning = SpeechModelV2CallWarning;

‎packages/hume/src/hume-provider.ts

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
import { SpeechModelV1, ProviderV2 } from '@ai-sdk/provider';
1+
import { SpeechModelV2, ProviderV2 } from '@ai-sdk/provider';
22
import { FetchFunction, loadApiKey } from '@ai-sdk/provider-utils';
33
import { HumeSpeechModel } from './hume-speech-model';
44

@@ -10,7 +10,7 @@ export interface HumeProvider extends Pick<ProviderV2, 'speechModel'> {
1010
/**
1111
Creates a model for speech synthesis.
1212
*/
13-
speech(): SpeechModelV1;
13+
speech(): SpeechModelV2;
1414
}
1515

1616
export interface HumeProviderSettings {

0 commit comments

Comments
 (0)