- AquilaModel
- llama
- AquilaForCausalLM
- llama
- ArcticForCausalLM
- arctic
- BaiChuanForCausalLM
- baichuan
- BloomForCausalLM
- bloom
- CohereForCausalLM
- commandr
- DbrxForCausalLM
- dbrx
- DeciLMForCausalLM
- decilm
- DeepseekForCausalLM
- deepseek
- DeepseekV2ForCausalLM
- deepseek_v2
- ExaoneForCausalLM
- exaone
- FalconForCausalLM
- falcon
- GemmaForCausalLM
- gemma
- Gemma2ForCausalLM
- gemma2
- GPT2LMHeadModel
- gpt2
- GPTBigCodeForCausalLM
- gpt_bigcode
- GPTJForCausalLM
- gpt_j
- GPTNeoXForCausalLM
- gpt_neox
- GraniteForCausalLM
- granite
- GraniteMoeForCausalLM
- granitemoe
- InternLMForCausalLM
- llama
- InternLM2ForCausalLM
- internlm2
- InternLM2VEForCausalLM
- internlm2_ve
- JAISLMHeadModel
- jais
- JambaForCausalLM
- jamba
- LlamaForCausalLM
- llama
- LLaMAForCausalLM
- llama
- MambaForCausalLM
- mamba
- FalconMambaForCausalLM
- mamba
- MiniCPMForCausalLM
- minicpm
- MiniCPM3ForCausalLM
- minicpm3
- MistralForCausalLM
- llama
- MixtralForCausalLM
- mixtral
- QuantMixtralForCausalLM
- mixtral_quant
- MptForCausalLM
- mpt
- MPTForCausalLM
- mpt
- NemotronForCausalLM
- nemotron
- OlmoForCausalLM
- olmo
- OlmoeForCausalLM
- olmoe
- OPTForCausalLM
- opt
- OrionForCausalLM
- orion
- PersimmonForCausalLM
- persimmon
- PhiForCausalLM
- phi
- Phi3ForCausalLM
- phi3
- Phi3SmallForCausalLM
- phi3_small
- PhiMoEForCausalLM
- phimoe
- Qwen2ForCausalLM
- qwen2
- Qwen2MoeForCausalLM
- qwen2_moe
- RWForCausalLM
- falcon
- StableLMEpochForCausalLM
- stablelm
- StableLmForCausalLM
- stablelm
- Starcoder2ForCausalLM
- starcoder2
- SolarForCausalLM
- solar
- XverseForCausalLM
- xverse