Large Language
- AquilaModel
- llama
- AquilaForCausalLM
- llama
- ArcticForCausalLM
- arctic
- BaiChuanForCausalLM
- baichuan
- BloomForCausalLM
- bloom
- CohereForCausalLM
- commandr
- DbrxForCausalLM
- dbrx
- DeciLMForCausalLM
- decilm
- DeepseekForCausalLM
- deepseek
- DeepseekV2ForCausalLM
- deepseek_v2
- ExaoneForCausalLM
- exaone
- FalconForCausalLM
- falcon
- GemmaForCausalLM
- gemma
- Gemma2ForCausalLM
- gemma2
- GPT2LMHeadModel
- gpt2
- GPTBigCodeForCausalLM
- gpt_bigcode
- GPTJForCausalLM
- gpt_j
- GPTNeoXForCausalLM
- gpt_neox
- GraniteForCausalLM
- granite
- GraniteMoeForCausalLM
- granitemoe
- InternLMForCausalLM
- llama
- InternLM2ForCausalLM
- internlm2
- InternLM2VEForCausalLM
- internlm2_ve
- JAISLMHeadModel
- jais
- JambaForCausalLM
- jamba
- LlamaForCausalLM
- llama
- LLaMAForCausalLM
- llama
- MambaForCausalLM
- mamba
- FalconMambaForCausalLM
- mamba
- MiniCPMForCausalLM
- minicpm
- MiniCPM3ForCausalLM
- minicpm3
- MistralForCausalLM
- llama
- MixtralForCausalLM
- mixtral
- QuantMixtralForCausalLM
- mixtral_quant
- MptForCausalLM
- mpt
- MPTForCausalLM
- mpt
- NemotronForCausalLM
- nemotron
- OlmoForCausalLM
- olmo
- OlmoeForCausalLM
- olmoe
- OPTForCausalLM
- opt
- OrionForCausalLM
- orion
- PersimmonForCausalLM
- persimmon
- PhiForCausalLM
- phi
- Phi3ForCausalLM
- phi3
- Phi3SmallForCausalLM
- phi3_small
- PhiMoEForCausalLM
- phimoe
- Qwen2ForCausalLM
- qwen2
- Qwen2MoeForCausalLM
- qwen2_moe
- RWForCausalLM
- falcon
- StableLMEpochForCausalLM
- stablelm
- StableLmForCausalLM
- stablelm
- Starcoder2ForCausalLM
- starcoder2
- SolarForCausalLM
- solar
- XverseForCausalLM
- xverse
Embedding
- BertModel
- bert
- RobertaModel
- roberta
- XLMRobertaModel
- roberta
- DeciLMForCausalLM
- decilm
- Gemma2Model
- gemma2
- LlamaModel
- llama
- MistralModel
- llama
- Phi3ForCausalLM
- phi3
- Qwen2Model
- qwen2
- Qwen2ForCausalLM
- qwen2
- Qwen2ForRewardModel
- qwen2_rm
- Qwen2ForSequenceClassification
- qwen2_cls
- LlavaNextForConditionalGeneration
- llava_next
- Phi3VForCausalLM
- phi3v
- Qwen2VLForConditionalGeneration
- qwen2_vl
Multi Modal
- Blip2ForConditionalGeneration
- blip2
- ChameleonForConditionalGeneration
- chameleon
- ChatGLMModel
- chatglm
- ChatGLMForConditionalGeneration
- chatglm
- FuyuForCausalLM
- fuyu
- H2OVLChatModel
- h2ovl
- InternVLChatModel
- internvl
- Idefics3ForConditionalGeneration
- idefics3
- LlavaForConditionalGeneration
- llava
- LlavaNextForConditionalGeneration
- llava_next
- LlavaNextVideoForConditionalGeneration
- llava_next_video
- LlavaOnevisionForConditionalGeneration
- llava_onevision
- Llama3.2VisionForConditionalGeneration
- llama3.2_vision
- MiniCPMV
- minicpmv
- MolmoForCausalLM
- molmo
- NVLM_D
- nvlm_d
- PaliGemmaForConditionalGeneration
- paligemma
- Phi3VForCausalLM
- phi3v
- PixtralForConditionalGeneration
- pixtral
- Qwen2VLForConditionalGeneration
- qwen2_vl
- Qwen2AudioForConditionalGeneration
- qwen2_audio
- UltravoxModel
- ultravox
- MllamaForConditionalGeneration
- mllama
Speculative Decoding
- EAGLEModel
- eagle
- MedusaModel
- medusa
- MLPSpeculatorPreTrainedModel
- mlp_speculator