Skip to content

Commit ee38081

Browse files
authored
feat: add support for audio/webm to detect-media-type (#7497)
## Background Chrome uses WebM audio with MediaRecorder by default. Sending WebM audio to the OpenAI transcription API was setting mediaType to audio/wav and causing an error. ## Summary I added support for WebM to detect-media-type. Now the WebM mediaType is correctly set in the gpt-4o-transcribe parameters. ## Verification I modified my local installation of the AI package version beta 5.0.0-beta.25 with the changes from this PR. I was then able to successfully call transcribe with audio from Chrome's MediaRecorder in my application.
1 parent 36aede2 commit ee38081

File tree

3 files changed

+32
-0
lines changed

3 files changed

+32
-0
lines changed

‎.changeset/polite-rivers-smoke.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
---
2+
'ai': patch
3+
---
4+
5+
Add support for audio/webm to detect-media-type

‎packages/ai/src/util/detect-media-type.test.ts

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -415,6 +415,28 @@ describe('detectMediaType', () => {
415415
});
416416
});
417417

418+
describe('WEBM', () => {
419+
it('should detect WEBM from bytes', () => {
420+
const webmBytes = new Uint8Array([0x1a, 0x45, 0xdf, 0xa3]);
421+
expect(
422+
detectMediaType({
423+
data: webmBytes,
424+
signatures: audioMediaTypeSignatures,
425+
}),
426+
).toBe('audio/webm');
427+
});
428+
429+
it('should detect WEBM from base64', () => {
430+
const webmBase64 = 'GkXfow=='; // Base64 string starting with WEBM signature
431+
expect(
432+
detectMediaType({
433+
data: webmBase64,
434+
signatures: audioMediaTypeSignatures,
435+
}),
436+
).toBe('audio/webm');
437+
});
438+
});
439+
418440
describe('error cases', () => {
419441
it('should return undefined for unknown image formats', () => {
420442
const unknownBytes = new Uint8Array([0x00, 0x01, 0x02, 0x03]);

‎packages/ai/src/util/detect-media-type.ts

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,11 @@ export const audioMediaTypeSignatures = [
8383
bytesPrefix: [0x66, 0x74, 0x79, 0x70],
8484
base64Prefix: 'ZnR5cA',
8585
},
86+
{
87+
mediaType: 'audio/webm',
88+
bytesPrefix: [0x1a, 0x45, 0xdf, 0xa3],
89+
base64Prefix: 'GkXf',
90+
},
8691
] as const;
8792

8893
const stripID3 = (data: Uint8Array | string) => {

0 commit comments

Comments
 (0)