Skip to content

Commit 39d4e60

Browse files
authored
feat(js/plugins/google-genai): added support for tool request streaming for Gemini 3 (#3845)
1 parent 34d81d4 commit 39d4e60

File tree

12 files changed

+813
-117
lines changed

12 files changed

+813
-117
lines changed

‎js/plugins/google-genai/package.json‎

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,12 +31,14 @@
3131
"author": "genkit",
3232
"license": "Apache-2.0",
3333
"dependencies": {
34-
"google-auth-library": "^9.14.2"
34+
"google-auth-library": "^9.14.2",
35+
"jsonpath-plus": "^10.3.0"
3536
},
3637
"peerDependencies": {
3738
"genkit": "workspace:^"
3839
},
3940
"devDependencies": {
41+
"@types/jsonpath-plus": "^5.0.5",
4042
"@types/node": "^20.11.16",
4143
"@types/sinon": "^17.0.4",
4244
"npm-run-all": "^4.1.5",

‎js/plugins/google-genai/src/common/converters.ts‎

Lines changed: 166 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414
* limitations under the License.
1515
*/
1616

17-
import { GenkitError, z } from 'genkit';
17+
import { GenkitError, ToolRequest, z } from 'genkit';
1818
import {
1919
CandidateData,
2020
MessageData,
@@ -23,12 +23,14 @@ import {
2323
TextPart,
2424
ToolDefinition,
2525
} from 'genkit/model';
26+
import { JSONPath } from 'jsonpath-plus';
2627
import {
2728
FunctionCallingMode,
2829
FunctionDeclaration,
2930
GenerateContentCandidate as GeminiCandidate,
3031
Content as GeminiContent,
3132
Part as GeminiPart,
33+
PartialArg,
3234
Schema,
3335
SchemaType,
3436
VideoMetadata,
@@ -139,38 +141,41 @@ function toGeminiToolRequest(part: Part): GeminiPart {
139141
if (!part.toolRequest?.input) {
140142
throw Error('Invalid ToolRequestPart: input was missing.');
141143
}
142-
return maybeAddGeminiThoughtSignature(part, {
143-
functionCall: {
144-
name: part.toolRequest.name,
145-
args: part.toolRequest.input,
146-
},
147-
});
144+
const functionCall: GeminiPart['functionCall'] = {
145+
name: part.toolRequest.name,
146+
args: part.toolRequest.input,
147+
};
148+
if (part.toolRequest.ref) {
149+
functionCall.id = part.toolRequest.ref;
150+
}
151+
return maybeAddGeminiThoughtSignature(part, { functionCall });
148152
}
149153

150154
function toGeminiToolResponse(part: Part): GeminiPart {
151155
if (!part.toolResponse?.output) {
152156
throw Error('Invalid ToolResponsePart: output was missing.');
153157
}
154-
return maybeAddGeminiThoughtSignature(part, {
155-
functionResponse: {
158+
const functionResponse: GeminiPart['functionResponse'] = {
159+
name: part.toolResponse.name,
160+
response: {
156161
name: part.toolResponse.name,
157-
response: {
158-
name: part.toolResponse.name,
159-
content: part.toolResponse.output,
160-
},
162+
content: part.toolResponse.output,
161163
},
164+
};
165+
if (part.toolResponse.ref) {
166+
functionResponse.id = part.toolResponse.ref;
167+
}
168+
return maybeAddGeminiThoughtSignature(part, {
169+
functionResponse,
162170
});
163171
}
164172

165173
function toGeminiReasoning(part: Part): GeminiPart {
166174
const out: GeminiPart = { thought: true };
167-
if (typeof part.metadata?.thoughtSignature === 'string') {
168-
out.thoughtSignature = part.metadata.thoughtSignature;
169-
}
170175
if (part.reasoning?.length) {
171176
out.text = part.reasoning;
172177
}
173-
return out;
178+
return maybeAddGeminiThoughtSignature(part, out);
174179
}
175180

176181
function toGeminiCustom(part: Part): GeminiPart {
@@ -354,10 +359,9 @@ function maybeAddThoughtSignature(geminiPart: GeminiPart, part: Part): Part {
354359
}
355360

356361
function fromGeminiThought(part: GeminiPart): Part {
357-
return {
362+
return maybeAddThoughtSignature(part, {
358363
reasoning: part.text || '',
359-
metadata: { thoughtSignature: part.thoughtSignature },
360-
};
364+
});
361365
}
362366

363367
function fromGeminiInlineData(part: GeminiPart): Part {
@@ -400,34 +404,153 @@ function fromGeminiFileData(part: GeminiPart): Part {
400404
});
401405
}
402406

403-
function fromGeminiFunctionCall(part: GeminiPart, ref: string): Part {
407+
/**
408+
* Applies Gemini partial args to the target object.
409+
*
410+
* https://docs.cloud.google.com/vertex-ai/generative-ai/docs/reference/rest/v1/Content#PartialArg
411+
*/
412+
export function applyGeminiPartialArgs(
413+
target: object,
414+
partialArgs: PartialArg[]
415+
) {
416+
for (const partialArg of partialArgs) {
417+
if (!partialArg.jsonPath) {
418+
continue;
419+
}
420+
let value: boolean | string | number | null | undefined;
421+
if (partialArg.boolValue !== undefined) {
422+
value = partialArg.boolValue;
423+
} else if (partialArg.nullValue !== undefined) {
424+
value = null;
425+
} else if (partialArg.numberValue !== undefined) {
426+
value = partialArg.numberValue;
427+
} else if (partialArg.stringValue !== undefined) {
428+
value = partialArg.stringValue;
429+
}
430+
if (value === undefined) {
431+
continue;
432+
}
433+
434+
let current: any = target;
435+
const path = JSONPath.toPathArray(partialArg.jsonPath);
436+
// ex: for path '$.data[0][0]' toPathArray returns: ['$', 'data', '0', '0']
437+
// we skip the first (root) reference and dereference the rest.
438+
for (let i = 1; i < path.length - 1; i++) {
439+
const key = path[i];
440+
const nextKey = path[i + 1];
441+
if (current[key] === undefined) {
442+
if (!isNaN(parseInt(nextKey, 10))) {
443+
current[key] = [];
444+
} else {
445+
current[key] = {};
446+
}
447+
}
448+
current = current[key];
449+
}
450+
451+
const finalKey = path[path.length - 1];
452+
if (
453+
partialArg.stringValue !== undefined &&
454+
typeof current[finalKey] === 'string'
455+
) {
456+
current[finalKey] += partialArg.stringValue;
457+
} else {
458+
current[finalKey] = value as any;
459+
}
460+
}
461+
}
462+
463+
function fromGeminiFunctionCall(
464+
part: GeminiPart,
465+
previousChunks?: CandidateData[]
466+
): Part {
404467
if (!part.functionCall) {
405468
throw Error(
406469
'Invalid Gemini Function Call Part: missing function call data'
407470
);
408471
}
409-
return maybeAddThoughtSignature(part, {
410-
toolRequest: {
411-
name: part.functionCall.name,
412-
input: part.functionCall.args,
413-
ref,
414-
},
415-
});
472+
const req: Partial<ToolRequest> = {
473+
name: part.functionCall.name,
474+
input: part.functionCall.args,
475+
};
476+
477+
if (part.functionCall.id) {
478+
req.ref = part.functionCall.id;
479+
}
480+
481+
if (part.functionCall.willContinue) {
482+
req.partial = true;
483+
}
484+
485+
handleFunctionCallPartials(req, part, previousChunks);
486+
487+
const toolRequest: Part = { toolRequest: req as ToolRequest };
488+
489+
return maybeAddThoughtSignature(part, toolRequest);
490+
}
491+
492+
function handleFunctionCallPartials(
493+
req: Partial<ToolRequest>,
494+
part: GeminiPart,
495+
previousChunks?: CandidateData[]
496+
) {
497+
if (!part.functionCall) {
498+
throw Error(
499+
'Invalid Gemini Function Call Part: missing function call data'
500+
);
501+
}
502+
503+
// we try to find if there's a previous partial tool request part.
504+
const prevPart = previousChunks?.at(-1)?.message.content?.at(-1);
505+
const prevPartialToolRequestPart =
506+
prevPart?.toolRequest && prevPart?.toolRequest.partial
507+
? prevPart
508+
: undefined;
509+
510+
// if the current functionCall has partialArgs, we try to apply the diff to the
511+
// potentially including the previous partial part.
512+
if (part.functionCall.partialArgs) {
513+
const newInput = prevPartialToolRequestPart?.toolRequest?.input
514+
? JSON.parse(JSON.stringify(prevPartialToolRequestPart.toolRequest.input))
515+
: {};
516+
applyGeminiPartialArgs(newInput, part.functionCall.partialArgs);
517+
req.input = newInput;
518+
}
519+
520+
// If there's a previous partial part, we copy some fields over, because the
521+
// API will not return these.
522+
if (prevPartialToolRequestPart) {
523+
if (!req.name) {
524+
req.name = prevPartialToolRequestPart.toolRequest.name;
525+
}
526+
if (!req.ref) {
527+
req.ref = prevPartialToolRequestPart.toolRequest.ref;
528+
}
529+
// This is a special case for the final partial function call chunk from the API,
530+
// it will have nothing... so we need to make sure to copy the input
531+
// from the previous.
532+
if (req.input === undefined) {
533+
req.input = prevPartialToolRequestPart.toolRequest.input;
534+
}
535+
}
416536
}
417537

418-
function fromGeminiFunctionResponse(part: GeminiPart, ref?: string): Part {
538+
function fromGeminiFunctionResponse(part: GeminiPart): Part {
419539
if (!part.functionResponse) {
420540
throw new Error(
421541
'Invalid Gemini Function Call Part: missing function call data'
422542
);
423543
}
424-
return maybeAddThoughtSignature(part, {
544+
const toolResponse: Part = {
425545
toolResponse: {
426546
name: part.functionResponse.name.replace(/__/g, '/'), // restore slashes
427547
output: part.functionResponse.response,
428-
ref,
429548
},
430-
});
549+
};
550+
if (part.functionResponse.id) {
551+
toolResponse.toolResponse.ref = part.functionResponse.id;
552+
}
553+
return maybeAddThoughtSignature(part, toolResponse);
431554
}
432555

433556
function fromExecutableCode(part: GeminiPart): Part {
@@ -462,20 +585,26 @@ function fromGeminiText(part: GeminiPart): Part {
462585
return maybeAddThoughtSignature(part, { text: part.text } as TextPart);
463586
}
464587

465-
function fromGeminiPart(part: GeminiPart, ref: string): Part {
588+
function fromGeminiPart(
589+
part: GeminiPart,
590+
previousChunks?: CandidateData[]
591+
): Part {
466592
if (part.thought) return fromGeminiThought(part as any);
467593
if (typeof part.text === 'string') return fromGeminiText(part);
468594
if (part.inlineData) return fromGeminiInlineData(part);
469595
if (part.fileData) return fromGeminiFileData(part);
470-
if (part.functionCall) return fromGeminiFunctionCall(part, ref);
471-
if (part.functionResponse) return fromGeminiFunctionResponse(part, ref);
596+
if (part.functionCall) return fromGeminiFunctionCall(part, previousChunks);
597+
if (part.functionResponse) return fromGeminiFunctionResponse(part);
472598
if (part.executableCode) return fromExecutableCode(part);
473599
if (part.codeExecutionResult) return fromCodeExecutionResult(part);
474600

475601
throw new Error('Unsupported GeminiPart type ' + JSON.stringify(part));
476602
}
477603

478-
export function fromGeminiCandidate(candidate: GeminiCandidate): CandidateData {
604+
export function fromGeminiCandidate(
605+
candidate: GeminiCandidate,
606+
previousChunks?: CandidateData[]
607+
): CandidateData {
479608
const parts = candidate.content?.parts || [];
480609
const genkitCandidate: CandidateData = {
481610
index: candidate.index || 0,
@@ -484,7 +613,7 @@ export function fromGeminiCandidate(candidate: GeminiCandidate): CandidateData {
484613
content: parts
485614
// the model sometimes returns empty parts, ignore those.
486615
.filter((p) => Object.keys(p).length > 0)
487-
.map((part, index) => fromGeminiPart(part, index.toString())),
616+
.map((part) => fromGeminiPart(part, previousChunks)),
488617
},
489618
finishReason: fromGeminiFinishReason(candidate.finishReason),
490619
finishMessage: candidate.finishMessage,

‎js/plugins/google-genai/src/common/types.ts‎

Lines changed: 36 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -300,18 +300,44 @@ export declare interface GenerativeContentBlob {
300300
* values.
301301
*/
302302
export declare interface FunctionCall {
303+
/**
304+
* The unique id of the function call. If populated, the client to execute the
305+
* `function_call` and return the response with the matching `id`.
306+
*/
307+
id?: string;
303308
/** The name of the function specified in FunctionDeclaration.name. */
304-
name: string;
309+
name?: string;
305310
/** The arguments to pass to the function. */
306-
args: object;
311+
args?: object;
312+
/** Optional. The partial argument value of the function call. If provided, represents the arguments/fields that are streamed incrementally. */
313+
partialArgs?: PartialArg[];
314+
/** Optional. Whether this is the last part of the FunctionCall. If true, another partial message for the current FunctionCall is expected to follow. */
315+
willContinue?: boolean;
316+
}
317+
318+
/** Partial argument value of the function call. This data type is not supported in Gemini API. */
319+
export declare interface PartialArg {
320+
/** Optional. Represents a null value. */
321+
nullValue?: 'NULL_VALUE';
322+
/** Optional. Represents a double value. */
323+
numberValue?: number;
324+
/** Optional. Represents a string value. */
325+
stringValue?: string;
326+
/** Optional. Represents a boolean value. */
327+
boolValue?: boolean;
328+
/** Required. A JSON Path (RFC 9535) to the argument being streamed. https://datatracker.ietf.org/doc/html/rfc9535. e.g. "$.foo.bar[0].data". */
329+
jsonPath?: string;
330+
/** Optional. Whether this is not the last part of the same json_path. If true, another PartialArg message for the current json_path is expected to follow. */
331+
willContinue?: boolean;
307332
}
308-
309333
/**
310334
* The result output of a FunctionCall that contains a string representing
311335
* the FunctionDeclaration.name and a structured JSON object containing any
312336
* output from the function call. It is used as context to the model.
313337
*/
314338
export declare interface FunctionResponse {
339+
/** Optional. The id of the function call this response is for. Populated by the client to match the corresponding function call `id`. */
340+
id?: string;
315341
/** The name of the function specified in FunctionDeclaration.name. */
316342
name: string;
317343
/** The expected response from the model. */
@@ -1056,6 +1082,13 @@ export declare interface FunctionCallingConfig {
10561082
* will predict a function call from the set of function names provided.
10571083
*/
10581084
allowedFunctionNames?: string[];
1085+
1086+
/**
1087+
* When set to true, arguments of a single function call will be streamed out
1088+
* in multiple parts/contents/responses. Partial parameter results will be
1089+
* returned in the [FunctionCall.partial_args] field.
1090+
*/
1091+
streamFunctionCallArguments?: boolean;
10591092
}
10601093

10611094
export declare interface LatLng {

0 commit comments

Comments
 (0)