vercel
diff --git a/‎.changeset/beige-ligers-kneel.md
Lines changed: 5 additions & 0 deletions b/‎.changeset/beige-ligers-kneel.md
Lines changed: 5 additions & 0 deletions
diff --git a/‎content/docs/04-ai-sdk-ui/22-smooth-stream-japanese.mdx
Lines changed: 19 additions & 0 deletions b/‎content/docs/04-ai-sdk-ui/22-smooth-stream-japanese.mdx
Lines changed: 19 additions & 0 deletions
diff --git a/‎content/docs/04-ai-sdk-ui/23-smooth-stream-chinese.mdx
Lines changed: 19 additions & 0 deletions b/‎content/docs/04-ai-sdk-ui/23-smooth-stream-chinese.mdx
Lines changed: 19 additions & 0 deletions
diff --git a/‎content/docs/07-reference/01-ai-sdk-core/80-smooth-stream.mdx
Lines changed: 55 additions & 2 deletions b/‎content/docs/07-reference/01-ai-sdk-core/80-smooth-stream.mdx
Lines changed: 55 additions & 2 deletions
diff --git a/‎examples/ai-core/src/stream-text/smooth-stream-chinese.ts
Lines changed: 39 additions & 0 deletions b/‎examples/ai-core/src/stream-text/smooth-stream-chinese.ts
Lines changed: 39 additions & 0 deletions
diff --git a/‎examples/ai-core/src/stream-text/smooth-stream-japanese.ts
Lines changed: 39 additions & 0 deletions b/‎examples/ai-core/src/stream-text/smooth-stream-japanese.ts
Lines changed: 39 additions & 0 deletions
diff --git a/‎packages/ai/core/generate-text/index.ts
Lines changed: 1 addition & 1 deletion b/‎packages/ai/core/generate-text/index.ts
Lines changed: 1 addition & 1 deletion
@@ -0,0 +1,5 @@
+---
+'ai': patch
+---
+
+feat(smooth-stream): chunking callbacks
@@ -0,0 +1,19 @@
+---
+title: Smooth streaming japanese text
+description: Learn how to stream smooth stream japanese text
+---
+
+# Smooth streaming japanese text
+
+You can smooth stream japanese text by using the `smoothStream` function, and the following regex that splits either on words of japanese characters:
+
+```tsx filename="page.tsx"
+import { smoothStream } from 'ai';
+import { useChat } from '@ai-sdk/react';
+
+const { data } = useChat({
+  experimental_transform: smoothStream({
+    chunking: /[\u3040-\u309F\u30A0-\u30FF]|\S+\s+/,
+  }),
+});
+```
@@ -0,0 +1,19 @@
+---
+title: Smooth streaming chinese text
+description: Learn how to stream smooth stream chinese text
+---
+
+# Smooth streaming chinese text
+
+You can smooth stream chinese text by using the `smoothStream` function, and the following regex that splits either on words of chinese characters:
+
+```tsx filename="page.tsx"
+import { smoothStream } from 'ai';
+import { useChat } from '@ai-sdk/react';
+
+const { data } = useChat({
+  experimental_transform: smoothStream({
+    chunking: /[\u4E00-\u9FFF]|\S+\s+/,
+  }),
+});
+```
@@ -42,14 +42,67 @@ const result = streamText({
     },
     {
       name: 'chunking',
-      type: '"word" | "line" | RegExp',
+      type: '"word" | "line" | RegExp | (buffer: string) => string | undefined | null',
       isOptional: true,
       description:
-        'Controls how the text is chunked for streaming. Use "word" to stream word by word (default), "line" to stream line by line, or provide a custom RegExp pattern for custom chunking.',
+        'Controls how the text is chunked for streaming. Use "word" to stream word by word (default), "line" to stream line by line, or provide a custom callback or RegExp pattern for custom chunking.',
     },
   ]}
 />
 
+#### Word chunking caveats with non-latin languages
+
+<Note>
+    The word based chunking **does not work well** with the following languages that do not delimit words with spaces:
+
+    For these languages we recommend using a custom regex, like the following:
+
+    - Chinese - `/[\u4E00-\u9FFF]|\S+\s+/`
+    - Japanese - `/[\u3040-\u309F\u30A0-\u30FF]|\S+\s+/`
+
+    For these languages you could pass your own language aware chunking function:
+
+    - Vietnamese
+    - Thai
+    - Javanese (Aksara Jawa)
+
+</Note>
+
+#### Regex based chunking
+
+To use regex based chunking, pass a `RegExp` to the `chunking` option.
+
+```ts
+// To split on underscores:
+smoothStream({
+  chunking: /_+/,
+});
+
+// Also can do it like this, same behavior
+smoothStream({
+  chunking: /[^_]*_/,
+});
+```
+
+#### Custom callback chunking
+
+To use a custom callback for chunking, pass a function to the `chunking` option.
+
+```ts
+smoothStream({
+  chunking: text => {
+    const findString = 'some string';
+    const index = text.indexOf(findString);
+
+    if (index === -1) {
+      return null;
+    }
+
+    return text.slice(0, index) + findString;
+  },
+});
+```
+
 ### Returns
 
 Returns a `TransformStream` that:
 
@@ -0,0 +1,39 @@
+import { simulateReadableStream, smoothStream, streamText } from 'ai';
+import { MockLanguageModelV1 } from 'ai/test';
+
+async function main() {
+  const result = streamText({
+    model: new MockLanguageModelV1({
+      doStream: async () => ({
+        stream: simulateReadableStream({
+          chunks: [
+            { type: 'text-delta', textDelta: '你好你好你好你好你好' },
+            { type: 'text-delta', textDelta: '你好你好你好你好你好' },
+            { type: 'text-delta', textDelta: '你好你好你好你好你好' },
+            { type: 'text-delta', textDelta: '你好你好你好你好你好' },
+            { type: 'text-delta', textDelta: '你好你好你好你好你好' },
+            {
+              type: 'finish',
+              finishReason: 'stop',
+              logprobs: undefined,
+              usage: { completionTokens: 10, promptTokens: 3 },
+            },
+          ],
+          chunkDelayInMs: 400,
+        }),
+        rawCall: { rawPrompt: null, rawSettings: {} },
+      }),
+    }),
+
+    prompt: 'Say hello in Chinese!',
+    experimental_transform: smoothStream({
+      chunking: /[\u4E00-\u9FFF]|\S+\s+/,
+    }),
+  });
+
+  for await (const textPart of result.textStream) {
+    process.stdout.write(textPart);
+  }
+}
+
+main().catch(console.error);
@@ -0,0 +1,39 @@
+import { simulateReadableStream, smoothStream, streamText } from 'ai';
+import { MockLanguageModelV1 } from 'ai/test';
+
+async function main() {
+  const result = streamText({
+    model: new MockLanguageModelV1({
+      doStream: async () => ({
+        stream: simulateReadableStream({
+          chunks: [
+            { type: 'text-delta', textDelta: 'こんにちは' },
+            { type: 'text-delta', textDelta: 'こんにちは' },
+            { type: 'text-delta', textDelta: 'こんにちは' },
+            { type: 'text-delta', textDelta: 'こんにちは' },
+            { type: 'text-delta', textDelta: 'こんにちは' },
+            {
+              type: 'finish',
+              finishReason: 'stop',
+              logprobs: undefined,
+              usage: { completionTokens: 10, promptTokens: 3 },
+            },
+          ],
+          chunkDelayInMs: 400,
+        }),
+        rawCall: { rawPrompt: null, rawSettings: {} },
+      }),
+    }),
+
+    prompt: 'Say hello in Japanese!',
+    experimental_transform: smoothStream({
+      chunking: /[\u3040-\u309F\u30A0-\u30FF]|\S+\s+/,
+    }),
+  });
+
+  for await (const textPart of result.textStream) {
+    process.stdout.write(textPart);
+  }
+}
+
+main().catch(console.error);
@@ -6,7 +6,7 @@ export type {
   GeneratedFile,
 } from './generated-file';
 export * as Output from './output';
-export { smoothStream } from './smooth-stream';
+export { smoothStream, type ChunkDetector } from './smooth-stream';
 export type { StepResult } from './step-result';
 export { streamText } from './stream-text';
 export type {
-Original file line number
+Diff line change
@@ @@ -0,0 +1,5 @@ @@
 +---
 +'ai': patch
 +---
++
 +feat(smooth-stream): chunking callbacks