Built for speed with hardware-specific optimizations for Intel CPUs, GPUs, and NPUs. Advanced techniques like speculative decoding, KV-cache optimization, and other deliver maximum inference performance.
Simple, intuitive APIs in both Python and C++ that hide complexity while providing full control. Get started with just a few lines of code, then customize with advanced features as needed.
Pre-built pipelines for text generation, image creation, speech recognition, speech generation, and visual language processing. No need to build inference loops or handle tokenization - everything works out of the box.
Compatible with popular models including Llama, Mistral, Phi, Qwen, Stable Diffusion, Flux, Whisper, etc. Easy model conversion from Hugging Face and ModelScope.
Minimal dependencies and smaller disk footprint compared to heavyweight frameworks. Perfect for edge deployment, containers, and resource-constrained environments.
Run the same code on Linux, Windows, and macOS. Deploy across different hardware configurations without code changes - from laptops to data center servers.
Create chatbots, text summarization, content generation, and question-answering applications with state-of-the-art Large Language Models (LLMs).
import openvino_genai as ov_genai
pipe = ov_genai.LLMPipeline(model_path, "CPU")
print(pipe.generate("What is OpenVINO?", max_new_tokens=100))
#include "openvino/genai/llm_pipeline.hpp"
#include <iostream>
int main(int argc, char* argv[]) {
std::string models_path = argv[1];
ov::genai::LLMPipeline pipe(model_path, "CPU");
std::cout << pipe.generate("What is OpenVINO?", ov::genai::max_new_tokens(100)) << '\n';
}
import { LLMPipeline } from "openvino-genai-node";
const pipe = await LLMPipeline(model_path, "CPU");
const result = await pipe.generate("What is OpenVINO?", { max_new_tokens: 100 });
console.log(result);
Analyze and describe images with Vision Language Models (VLMs) to build AI assistants and tools for legal document review, medical analysis, document processing, and visual content understanding applications.
import openvino_genai as ov_genai
import openvino as ov
from PIL import Image
import numpy as np
from pathlib import Path
def read_image(path: str) -> ov.Tensor:
pic = Image.open(path).convert("RGB")
image_data = np.array(pic)[None]
return ov.Tensor(image_data)
def read_images(path: str) -> list[ov.Tensor]:
entry = Path(path)
if entry.is_dir():
return [read_image(str(file)) for file in sorted(entry.iterdir())]
return [read_image(path)]
images = read_images("./images")
pipe = ov_genai.VLMPipeline(model_path, "CPU")
result = pipe.generate(prompt, images=images, max_new_tokens=100)
print(result.texts[0])
# To input videos frames, use 'videos=', frames tensor layout = [Frame, H, W, C]
# result = pipe.generate(prompt, videos=[frames], max_new_tokens=100)
#include "openvino/genai/visual_language/pipeline.hpp"
#include "load_image.hpp"
#include <iostream>
int main(int argc, char* argv[]) {
std::string models_path = argv[1], images_path = argv[2];;
std::vector<ov::Tensor> images = utils::load_images(images_path);
ov::genai::VLMPipeline pipe(models_path, "CPU");
ov::genai::VLMDecodedResults result = pipe.generate(
prompt,
ov::genai::images(images),
ov::genai::max_new_tokens(100)
);
std::cout << result.texts[0] << std::endl;
// To input videos frames, use 'ov::genai::videos' property, frames tensor layout = [Frame, H, W, C]
// pipe.generate(prompt, ov::genai::videos(std::vector<ov::Tensor>{frames}), ov::genai::max_new_tokens(100));
}
import { addon as ov } from "openvino-node";
import { VLMPipeline } from "openvino-genai-node";
import { stat, readdir } from "node:fs/promises";
import sharp from "sharp";
import path from "node:path";
async function readImage(imagePath) {
const img = sharp(imagePath);
const metadata = await img.metadata();
const { width, height, channels } = metadata;
const imageBuffer = await img.raw().toBuffer();
return new ov.Tensor(ov.element.u8, [height, width, channels], imageBuffer);
}
async function readImages(imagePath) {
const stats = await stat(imagePath);
if (stats.isDirectory()) {
const files = await readdir(imagePath);
return Promise.all(files.sort().map((file) => readImage(path.join(imagePath, file))));
}
return [await readImage(imagePath)];
}
const images = await readImages("./images");
const pipe = await VLMPipeline(modelPath, "CPU");
const result = await pipe.generate(prompt, {
images,
generationConfig: { max_new_tokens: 100 },
});
console.log(result.texts[0]);
// To input videos frames, use 'videos' option, frames tensor layout = [Frame, H, W, C]
// const result = await pipe.generate(prompt, { videos: [frames], generationConfig: { max_new_tokens: 100 } });
Create and modify images with diffusion models for art generation, product design, and creative applications using Stable Diffusion and similar architectures.
import openvino_genai as ov_genai
from PIL import Image
pipe = ov_genai.Text2ImagePipeline(model_path, "CPU")
image_tensor = pipe.generate(prompt)
image = Image.fromarray(image_tensor.data[0])
image.save("image.bmp")
#include "openvino/genai/image_generation/text2image_pipeline.hpp"
#include "imwrite.hpp"
int main(int argc, char* argv[]) {
const std::string models_path = argv[1], prompt = argv[2];
ov::genai::Text2ImagePipeline pipe(models_path, "CPU");
ov::Tensor image = pipe.generate(prompt);
imwrite("image.bmp", image, true);
}
Create videos with LTX-Video model using a diffusion-based generation pipeline to produce high-quality clips for creative storytelling, marketing content, product demos, and rapid visual prototyping.
import openvino_genai as ov_genai
import cv2
def save_video(filename: str, video_tensor, fps: int = 25):
batch_size, num_frames, height, width, _ = video_tensor.shape
video_data = video_tensor.data
for b in range(batch_size):
if batch_size == 1:
output_path = filename
else:
base, ext = filename.rsplit(".", 1) if "." in filename else (filename, "avi")
output_path = f"{base}_b{b}.{ext}"
fourcc = cv2.VideoWriter_fourcc(*"MJPG")
writer = cv2.VideoWriter(output_path, fourcc, fps, (width, height))
for f in range(num_frames):
frame_bgr = cv2.cvtColor(video_data[b, f], cv2.COLOR_RGB2BGR)
writer.write(frame_bgr)
writer.release()
print(f"Wrote {output_path} ({num_frames} frames, {width}x{height} @ {fps} fps)")
pipe = ov_genai.Text2VideoPipeline(model_path, "CPU")
video = pipe.generate(prompt).video
save_video("genai_video.avi", video)
#include "openvino/genai/video_generation/text2video_pipeline.hpp"
#include "imwrite_video.hpp"
int main(int argc, char* argv[]) {
const std::string models_path = argv[1], prompt = argv[2];
ov::genai::Text2VideoPipeline pipe(models_path, "CPU");
ov::Tensor video = pipe.generate(prompt).video;
imwrite_video("genai_video.avi", video);
}
Convert speech to text using Whisper models for video transcription, meeting notes, multilingual audio content processing, and accessibility applications.
import openvino_genai as ov_genai
import librosa
def read_wav(filepath):
raw_speech, samplerate = librosa.load(filepath, sr=16000)
return raw_speech.tolist()
raw_speech = read_wav('sample.wav')
pipe = ov_genai.WhisperPipeline(model_path, "CPU")
result = pipe.generate(raw_speech, max_new_tokens=100)
print(result)
#include "openvino/genai/whisper_pipeline.hpp"
#include "audio_utils.hpp"
#include <iostream>
int main(int argc, char* argv[]) {
std::filesystem::path models_path = argv[1];
std::string wav_file_path = argv[2];
ov::genai::RawSpeechInput raw_speech = utils::audio::read_wav(wav_file_path);
ov::genai::WhisperPipeline pipe(models_path, "CPU");
auto result = pipe.generate(raw_speech, ov::genai::max_new_tokens(100));
std::cout << result << std::endl;
}
Convert text to speech using SpeechT5 TTS models.
import openvino_genai
import soundfile as sf
pipeline = openvino_genai.Text2SpeechPipeline(model_path, "CPU")
# Generate audio using the default speaker
result = pipeline.generate("Hello OpenVINO GenAI")
# speech tensor contains the waveform of the spoken phrase
speech = result.speeches[0]
sf.write("output_audio.wav", speech.data[0], samplerate=16000)
#include "audio_utils.hpp"
#include "openvino/genai/speech_generation/text2speech_pipeline.hpp"
int main(int argc, char* argv[]) {
std::string models_path = argv[1];
ov::genai::Text2SpeechPipeline pipeline(model_path, "CPU");
auto result = pipeline.generate("Hello OpenVINO GenAI");
auto waveform_size = result.speeches[0].get_size();
auto waveform_ptr = result.speeches[0].data<const float>();
auto bits_per_sample = result.speeches[0].get_element_type().bitwidth();
utils::audio::save_to_wav(waveform_ptr, waveform_size, "output_audio.wav", bits_per_sample);
return 0;
}
Generate vector representations for text using embedding models. Useful for semantic search, retrieval augmented generation (RAG).
import openvino_genai as ov_genai
pipeline = ov_genai.TextEmbeddingPipeline(
models_path,
"CPU",
pooling_type = ov_genai.TextEmbeddingPipeline.PoolingType.MEAN,
normalize = True
)
documents_embeddings = pipeline.embed_documents(documents)
query_embeddings = pipeline.embed_query("What is the capital of France?")
#include "openvino/genai/rag/text_embedding_pipeline.hpp"
int main(int argc, char* argv[]) try {
auto documents = std::vector<std::string>(argv + 2, argv + argc);
std::string models_path = argv[1];
ov::genai::TextEmbeddingPipeline pipeline(
models_path,
"CPU",
ov::genai::pooling_type(ov::genai::TextEmbeddingPipeline::PoolingType::MEAN),
ov::genai::normalize(true)
);
ov::genai::EmbeddingResults documents_embeddings = pipeline.embed_documents(documents);
ov::genai::EmbeddingResult query_embedding = pipeline.embed_query("What is the capital of France?");
}
import { TextEmbeddingPipeline, PoolingType } from "openvino-genai-node";
const pipeline = await TextEmbeddingPipeline(
models_path,
"CPU",
{
pooling_type: PoolingType.MEAN,
normalize: true
}
);
const documents_embeddings = await pipeline.embedDocuments(documents);
const query_embeddings = await pipeline.embedQuery("What is the capital of France?");
Boost the relevance and accuracy of your Retrieval-Augmented Generation (RAG) workflows by reranking retrieved documents with the TextRerankPipeline.
import openvino_genai
pipeline = openvino_genai.TextRerankPipeline(model_path, "CPU", top_n=3)
rerank_result = pipeline.rerank(query, documents)
print("Reranked documents:")
for index, score in rerank_result:
print(f"Document {index} (score: {score:.4f}): {documents[index]}")
#include "openvino/genai/rag/text_rerank_pipeline.hpp"
int main(int argc, char* argv[]) {
std::vector<std::string> documents(argv + 3, argv + argc);
std::string models_path = argv[1], query = argv[2];
ov::genai::TextRerankPipeline pipeline(models_path, "CPU", ov::genai::top_n(3));
auto rerank_result = pipeline.rerank(query, documents);
std::cout << "Reranked documents:\n";
for (const auto& [index, score] : rerank_result) {
std::cout << "Document " << index << " (score: " << score << "): " << documents[index] << '\n';
}
}
import { TextRerankPipeline } from 'openvino-genai-node';
const pipeline = await TextRerankPipeline(modelPath, { device: "CPU", config: { top_n: 3 }, });
const rerankResult = await pipeline.rerank(query, documents);
console.log("Reranked documents:");
for (const [index, score] of rerankResult) {
console.log(`Document ${index} (score: ${score.toFixed(4)}): ${documents[index]}`);
}
Unlock the power of OpenVINO GenAI™ for your projects.
Get started with seamless installation now!
python -m pip install openvino-genai