Documentation
¶
Index ¶
- Variables
- func BackendInit()
- func EnableDebug()
- func FreeModel(model *Model)
- func GetModelArch(modelPath string) (string, error)
- func PrintSystemInfo() string
- func Quantize(infile, outfile string, ftype uint32) error
- func SchemaToGrammar(schema []byte) []byte
- type Batch
- type ClipContext
- type Context
- func (c *Context) Decode(batch *Batch) error
- func (c *Context) GetEmbeddingsIth(i int) []float32
- func (c *Context) GetEmbeddingsSeq(seqId int) []float32
- func (c *Context) KvCacheClear()
- func (c *Context) KvCacheDefrag()
- func (c *Context) KvCacheSeqAdd(seqId int, p0 int, p1 int, delta int)
- func (c *Context) KvCacheSeqCp(srcSeqId int, dstSeqId int, p0 int, p1 int)
- func (c *Context) KvCacheSeqRm(seqId int, p0 int, p1 int) bool
- func (c *Context) Model() *Model
- func (c *Context) SetCrossAttention(state bool)
- func (c *Context) Synchronize()
- type ContextParams
- type MllamaContext
- type Model
- func (m *Model) AddBOSToken() bool
- func (m *Model) ApplyLoraFromFile(context *Context, loraPath string, scale float32, threads int) error
- func (m *Model) NEmbd() int
- func (m *Model) NumVocab() int
- func (m *Model) TokenIsEog(token int) bool
- func (m *Model) TokenToPiece(token int) string
- func (m *Model) Tokenize(text string, addSpecial bool, parseSpecial bool) ([]int, error)
- type ModelParams
- type SamplingContext
- type SamplingParams
Constants ¶
This section is empty.
Variables ¶
var ErrKvCacheFull = errors.New("could not find a kv cache slot")
Functions ¶
func BackendInit ¶
func BackendInit()
func EnableDebug ¶
func EnableDebug()
func GetModelArch ¶
func PrintSystemInfo ¶
func PrintSystemInfo() string
func SchemaToGrammar ¶
SchemaToGrammar converts the provided JSON schema to a grammar. It returns nil if the provided schema is invalid JSON or an invalid JSON schema.
Types ¶
type Batch ¶
type Batch struct {
// contains filtered or unexported fields
}
func NewBatch ¶
Creates a new batch for either word tokens or image embeddings (if embedSize is non-zero). Batches cannot contain both types at the same time. batchSize is the maximum number of entries that can be added per sequence
func (*Batch) Add ¶
Add adds either a token or an image embedding to the batch depending on the type when the batch was initialized. The other argument will be ignored. Adds to the batch with the given position for the given sequence ids, and optionally instructs to include logits.
func (*Batch) IsEmbedding ��
type ClipContext ¶
type ClipContext struct {
// contains filtered or unexported fields
}
vision processing
func NewClipContext ¶
func NewClipContext(llamaContext *Context, modelPath string) (*ClipContext, error)
func (*ClipContext) Free ¶
func (c *ClipContext) Free()
type Context ¶
type Context struct {
// contains filtered or unexported fields
}
func NewContextWithModel ¶
func NewContextWithModel(model *Model, params ContextParams) (*Context, error)
func (*Context) GetEmbeddingsIth ¶
func (*Context) GetEmbeddingsSeq ¶
Get the embeddings for a sequence id
func (*Context) KvCacheClear ¶
func (c *Context) KvCacheClear()
func (*Context) KvCacheDefrag ¶
func (c *Context) KvCacheDefrag()
func (*Context) KvCacheSeqAdd ¶
func (*Context) KvCacheSeqCp ¶
func (*Context) SetCrossAttention ¶
func (*Context) Synchronize ¶
func (c *Context) Synchronize()
type ContextParams ¶
type ContextParams struct {
// contains filtered or unexported fields
}
func NewContextParams ¶
type MllamaContext ¶
type MllamaContext struct {
// contains filtered or unexported fields
}
func NewMllamaContext ¶
func NewMllamaContext(llamaContext *Context, modelPath string) (*MllamaContext, error)
func (*MllamaContext) EmbedSize ¶
func (m *MllamaContext) EmbedSize(llamaContext *Context) int
func (*MllamaContext) Free ¶
func (m *MllamaContext) Free()
type Model ¶
type Model struct {
// contains filtered or unexported fields
}
func LoadModelFromFile ¶
func LoadModelFromFile(modelPath string, params ModelParams) (*Model, error)
func (*Model) AddBOSToken ¶
func (*Model) ApplyLoraFromFile ¶
func (*Model) TokenIsEog ¶
func (*Model) TokenToPiece ¶
type ModelParams ¶
type SamplingContext ¶
type SamplingContext struct {
// contains filtered or unexported fields
}
sampling TODO: this is a temporary wrapper to allow calling C++ code from CGo
func NewSamplingContext ¶
func NewSamplingContext(model *Model, params SamplingParams) (*SamplingContext, error)
func (*SamplingContext) Accept ¶
func (s *SamplingContext) Accept(id int, applyGrammar bool)
func (*SamplingContext) Reset ¶
func (s *SamplingContext) Reset()