Coverage Report

Created: 2026-01-11 07:13

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/llama.cpp/src/llama-chat.cpp
Line
Count
Source
1
#include "llama-chat.h"
2
3
#include "llama.h"
4
5
#include <map>
6
#include <sstream>
7
#include <algorithm>
8
9
#if __cplusplus >= 202000L
10
    #define LU8(x) (const char*)(u8##x)
11
#else
12
0
    #define LU8(x) u8##x
13
#endif
14
15
// trim whitespace from the beginning and end of a string
16
0
static std::string trim(const std::string & str) {
17
0
    size_t start = 0;
18
0
    size_t end = str.size();
19
0
    while (start < end && isspace(static_cast<unsigned char>(str[start]))) {
20
0
        start += 1;
21
0
    }
22
0
    while (end > start && isspace(static_cast<unsigned char>(str[end - 1]))) {
23
0
        end -= 1;
24
0
    }
25
0
    return str.substr(start, end - start);
26
0
}
27
28
static const std::map<std::string, llm_chat_template> LLM_CHAT_TEMPLATES = {
29
    { "chatml",            LLM_CHAT_TEMPLATE_CHATML            },
30
    { "llama2",            LLM_CHAT_TEMPLATE_LLAMA_2           },
31
    { "llama2-sys",        LLM_CHAT_TEMPLATE_LLAMA_2_SYS       },
32
    { "llama2-sys-bos",    LLM_CHAT_TEMPLATE_LLAMA_2_SYS_BOS   },
33
    { "llama2-sys-strip",  LLM_CHAT_TEMPLATE_LLAMA_2_SYS_STRIP },
34
    { "mistral-v1",        LLM_CHAT_TEMPLATE_MISTRAL_V1        },
35
    { "mistral-v3",        LLM_CHAT_TEMPLATE_MISTRAL_V3        },
36
    { "mistral-v3-tekken", LLM_CHAT_TEMPLATE_MISTRAL_V3_TEKKEN },
37
    { "mistral-v7",        LLM_CHAT_TEMPLATE_MISTRAL_V7        },
38
    { "mistral-v7-tekken", LLM_CHAT_TEMPLATE_MISTRAL_V7_TEKKEN },
39
    { "phi3",              LLM_CHAT_TEMPLATE_PHI_3             },
40
    { "phi4",              LLM_CHAT_TEMPLATE_PHI_4             },
41
    { "falcon3",           LLM_CHAT_TEMPLATE_FALCON_3          },
42
    { "zephyr",            LLM_CHAT_TEMPLATE_ZEPHYR            },
43
    { "monarch",           LLM_CHAT_TEMPLATE_MONARCH           },
44
    { "gemma",             LLM_CHAT_TEMPLATE_GEMMA             },
45
    { "orion",             LLM_CHAT_TEMPLATE_ORION             },
46
    { "openchat",          LLM_CHAT_TEMPLATE_OPENCHAT          },
47
    { "vicuna",            LLM_CHAT_TEMPLATE_VICUNA            },
48
    { "vicuna-orca",       LLM_CHAT_TEMPLATE_VICUNA_ORCA       },
49
    { "deepseek",          LLM_CHAT_TEMPLATE_DEEPSEEK          },
50
    { "deepseek2",         LLM_CHAT_TEMPLATE_DEEPSEEK_2        },
51
    { "deepseek3",         LLM_CHAT_TEMPLATE_DEEPSEEK_3        },
52
    { "command-r",         LLM_CHAT_TEMPLATE_COMMAND_R         },
53
    { "llama3",            LLM_CHAT_TEMPLATE_LLAMA_3           },
54
    { "chatglm3",          LLM_CHAT_TEMPLATE_CHATGLM_3         },
55
    { "chatglm4",          LLM_CHAT_TEMPLATE_CHATGLM_4         },
56
    { "glmedge",           LLM_CHAT_TEMPLATE_GLMEDGE           },
57
    { "minicpm",           LLM_CHAT_TEMPLATE_MINICPM           },
58
    { "exaone3",           LLM_CHAT_TEMPLATE_EXAONE_3          },
59
    { "exaone4",           LLM_CHAT_TEMPLATE_EXAONE_4          },
60
    { "rwkv-world",        LLM_CHAT_TEMPLATE_RWKV_WORLD        },
61
    { "granite",           LLM_CHAT_TEMPLATE_GRANITE           },
62
    { "gigachat",          LLM_CHAT_TEMPLATE_GIGACHAT          },
63
    { "megrez",            LLM_CHAT_TEMPLATE_MEGREZ            },
64
    { "yandex",            LLM_CHAT_TEMPLATE_YANDEX            },
65
    { "bailing",           LLM_CHAT_TEMPLATE_BAILING           },
66
    { "bailing-think",     LLM_CHAT_TEMPLATE_BAILING_THINK     },
67
    { "bailing2",          LLM_CHAT_TEMPLATE_BAILING2          },
68
    { "llama4",            LLM_CHAT_TEMPLATE_LLAMA4            },
69
    { "smolvlm",           LLM_CHAT_TEMPLATE_SMOLVLM           },
70
    { "hunyuan-moe",       LLM_CHAT_TEMPLATE_HUNYUAN_MOE       },
71
    { "gpt-oss",           LLM_CHAT_TEMPLATE_OPENAI_MOE        },
72
    { "hunyuan-dense",     LLM_CHAT_TEMPLATE_HUNYUAN_DENSE     },
73
    { "kimi-k2",           LLM_CHAT_TEMPLATE_KIMI_K2           },
74
    { "seed_oss",          LLM_CHAT_TEMPLATE_SEED_OSS          },
75
    { "grok-2",            LLM_CHAT_TEMPLATE_GROK_2            },
76
    { "pangu-embedded",    LLM_CHAT_TEMPLATE_PANGU_EMBED       },
77
    { "solar-open",        LLM_CHAT_TEMPLATE_SOLAR_OPEN        },
78
};
79
80
0
llm_chat_template llm_chat_template_from_str(const std::string & name) {
81
0
    return LLM_CHAT_TEMPLATES.at(name);
82
0
}
83
84
0
llm_chat_template llm_chat_detect_template(const std::string & tmpl) {
85
0
    try {
86
0
        return llm_chat_template_from_str(tmpl);
87
0
    } catch (const std::out_of_range &) {
88
        // ignore
89
0
    }
90
91
0
    auto tmpl_contains = [&tmpl](const char * haystack) -> bool {
92
0
        return tmpl.find(haystack) != std::string::npos;
93
0
    };
94
0
    if (tmpl_contains("<|im_start|>")) {
95
0
        return tmpl_contains("<|im_sep|>")
96
0
            ? LLM_CHAT_TEMPLATE_PHI_4
97
0
            : tmpl_contains("<end_of_utterance>")
98
0
                ? LLM_CHAT_TEMPLATE_SMOLVLM // SmolVLM uses <|im_start|> as BOS, but it is NOT chatml
99
0
                : LLM_CHAT_TEMPLATE_CHATML;
100
0
    } else if (tmpl.find("mistral") == 0 || tmpl_contains("[INST]")) {
101
0
        if (tmpl_contains("[SYSTEM_PROMPT]")) {
102
0
            return LLM_CHAT_TEMPLATE_MISTRAL_V7;
103
0
        } else if (
104
            // catches official 'v1' template
105
0
            tmpl_contains("' [INST] ' + system_message")
106
            // catches official 'v3' and 'v3-tekken' templates
107
0
            || tmpl_contains("[AVAILABLE_TOOLS]")
108
0
        ) {
109
            // Official mistral 'v1', 'v3' and 'v3-tekken' templates
110
            // See: https://github.com/mistralai/cookbook/blob/main/concept-deep-dive/tokenization/chat_templates.md
111
            // See: https://github.com/mistralai/cookbook/blob/main/concept-deep-dive/tokenization/templates.md
112
0
            if (tmpl_contains(" [INST]")) {
113
0
                return LLM_CHAT_TEMPLATE_MISTRAL_V1;
114
0
            } else if (tmpl_contains("\"[INST]\"")) {
115
0
                return LLM_CHAT_TEMPLATE_MISTRAL_V3_TEKKEN;
116
0
            }
117
0
            return LLM_CHAT_TEMPLATE_MISTRAL_V3;
118
0
        } else {
119
            // llama2 template and its variants
120
            // [variant] support system message
121
            // See: https://huggingface.co/blog/llama2#how-to-prompt-llama-2
122
0
            bool support_system_message = tmpl_contains("<<SYS>>");
123
0
            bool add_bos_inside_history = tmpl_contains("bos_token + '[INST]");
124
0
            bool strip_message = tmpl_contains("content.strip()");
125
0
            if (strip_message) {
126
0
                return LLM_CHAT_TEMPLATE_LLAMA_2_SYS_STRIP;
127
0
            } else if (add_bos_inside_history) {
128
0
                return LLM_CHAT_TEMPLATE_LLAMA_2_SYS_BOS;
129
0
            } else if (support_system_message) {
130
0
                return LLM_CHAT_TEMPLATE_LLAMA_2_SYS;
131
0
            } else {
132
0
                return LLM_CHAT_TEMPLATE_LLAMA_2;
133
0
            }
134
0
        }
135
0
    } else if (tmpl_contains("<|assistant|>") && tmpl_contains("<|end|>")) {
136
0
        return LLM_CHAT_TEMPLATE_PHI_3;
137
0
    } else if (tmpl_contains("[gMASK]<sop>")) {
138
0
        return LLM_CHAT_TEMPLATE_CHATGLM_4;
139
0
    } else if (tmpl_contains("<|assistant|>") && tmpl_contains("<|user|>")) {
140
0
        return tmpl_contains("</s>") ? LLM_CHAT_TEMPLATE_FALCON_3 : LLM_CHAT_TEMPLATE_GLMEDGE;
141
0
    } else if (tmpl_contains("<|{{ item['role'] }}|>") && tmpl_contains("<|begin_of_image|>")) {
142
0
        return LLM_CHAT_TEMPLATE_GLMEDGE;
143
0
    } else if (tmpl_contains("<|user|>") && tmpl_contains("<|endoftext|>")) {
144
0
        return LLM_CHAT_TEMPLATE_ZEPHYR;
145
0
    } else if (tmpl_contains("bos_token + message['role']")) {
146
0
        return LLM_CHAT_TEMPLATE_MONARCH;
147
0
    } else if (tmpl_contains("<start_of_turn>")) {
148
0
        return LLM_CHAT_TEMPLATE_GEMMA;
149
0
    } else if (tmpl_contains("'\\n\\nAssistant: ' + eos_token")) {
150
        // OrionStarAI/Orion-14B-Chat
151
0
        return LLM_CHAT_TEMPLATE_ORION;
152
0
    } else if (tmpl_contains("GPT4 Correct ")) {
153
        // openchat/openchat-3.5-0106
154
0
        return LLM_CHAT_TEMPLATE_OPENCHAT;
155
0
    } else if (tmpl_contains("USER: ") && tmpl_contains("ASSISTANT: ")) {
156
        // eachadea/vicuna-13b-1.1 (and Orca variant)
157
0
        if (tmpl_contains("SYSTEM: ")) {
158
0
            return LLM_CHAT_TEMPLATE_VICUNA_ORCA;
159
0
        }
160
0
        return LLM_CHAT_TEMPLATE_VICUNA;
161
0
    } else if (tmpl_contains("### Instruction:") && tmpl_contains("<|EOT|>")) {
162
        // deepseek-ai/deepseek-coder-33b-instruct
163
0
        return LLM_CHAT_TEMPLATE_DEEPSEEK;
164
0
    } else if (tmpl_contains("<|START_OF_TURN_TOKEN|>") && tmpl_contains("<|USER_TOKEN|>")) {
165
        // CohereForAI/c4ai-command-r-plus
166
0
        return LLM_CHAT_TEMPLATE_COMMAND_R;
167
0
    } else if (tmpl_contains("<|start_header_id|>") && tmpl_contains("<|end_header_id|>")) {
168
0
        return LLM_CHAT_TEMPLATE_LLAMA_3;
169
0
    } else if (tmpl_contains("[gMASK]sop")) {
170
        // chatglm3-6b
171
0
        return LLM_CHAT_TEMPLATE_CHATGLM_3;
172
0
    } else if (tmpl_contains(LU8("<用户>"))) {
173
        // MiniCPM-3B-OpenHermes-2.5-v2-GGUF
174
0
        return LLM_CHAT_TEMPLATE_MINICPM;
175
0
    } else if (tmpl_contains("'Assistant: ' + message['content'] + eos_token")) {
176
0
        return LLM_CHAT_TEMPLATE_DEEPSEEK_2;
177
0
    } else if (tmpl_contains(LU8("<|Assistant|>")) && tmpl_contains(LU8("<|User|>")) && tmpl_contains(LU8("<|end▁of▁sentence|>"))) {
178
0
        return LLM_CHAT_TEMPLATE_DEEPSEEK_3;
179
0
    } else if (tmpl_contains("[|system|]") && tmpl_contains("[|assistant|]") && tmpl_contains("[|endofturn|]")) {
180
0
        if (tmpl_contains("[|tool|]")) {
181
0
            return LLM_CHAT_TEMPLATE_EXAONE_4;
182
0
        }
183
        // ref: https://huggingface.co/LGAI-EXAONE/EXAONE-3.0-7.8B-Instruct/discussions/8#66bae61b1893d14ee8ed85bb
184
        // EXAONE-3.0-7.8B-Instruct
185
0
        return LLM_CHAT_TEMPLATE_EXAONE_3;
186
0
    } else if (tmpl_contains("rwkv-world") || tmpl_contains("{{- 'User: ' + message['content']|trim + '\\n\\n' -}}")) {
187
0
        return LLM_CHAT_TEMPLATE_RWKV_WORLD;
188
0
    } else if (tmpl_contains("<|start_of_role|>")) {
189
0
        return LLM_CHAT_TEMPLATE_GRANITE;
190
0
    } else if (tmpl_contains("message['role'] + additional_special_tokens[0] + message['content'] + additional_special_tokens[1]")) {
191
0
        return LLM_CHAT_TEMPLATE_GIGACHAT;
192
0
    } else if (tmpl_contains("<|role_start|>")) {
193
0
        return LLM_CHAT_TEMPLATE_MEGREZ;
194
0
    } else if (tmpl_contains(" Ассистент:")) {
195
0
        return LLM_CHAT_TEMPLATE_YANDEX;
196
0
    } else if (tmpl_contains("<role>ASSISTANT</role>") && tmpl_contains("'HUMAN'")) {
197
0
        return LLM_CHAT_TEMPLATE_BAILING;
198
0
    } else if (tmpl_contains("<role>ASSISTANT</role>") && tmpl_contains("\"HUMAN\"") && tmpl_contains("<think>")) {
199
0
        return LLM_CHAT_TEMPLATE_BAILING_THINK;
200
0
    } else if (tmpl_contains("<role>ASSISTANT</role>") && tmpl_contains("<role>HUMAN</role>") && tmpl_contains("<|role_end|>")) {
201
0
        return LLM_CHAT_TEMPLATE_BAILING2;
202
0
    } else if (tmpl_contains("<|header_start|>") && tmpl_contains("<|header_end|>")) {
203
0
        return LLM_CHAT_TEMPLATE_LLAMA4;
204
0
    } else if (tmpl_contains("<|endofuserprompt|>")) {
205
0
        return LLM_CHAT_TEMPLATE_DOTS1;
206
0
    } else if (tmpl_contains("<|extra_0|>") && tmpl_contains("<|extra_4|>")) {
207
0
        return LLM_CHAT_TEMPLATE_HUNYUAN_MOE;
208
0
    } else if (tmpl_contains("<|start|>") && tmpl_contains("<|channel|>")) {
209
0
        return LLM_CHAT_TEMPLATE_OPENAI_MOE;
210
0
    } else if (tmpl_contains("<|hy_Assistant|>") && tmpl_contains("<|hy_place▁holder▁no▁3|>")) {
211
0
        return LLM_CHAT_TEMPLATE_HUNYUAN_DENSE;
212
0
    } else if (tmpl_contains("<|im_assistant|>assistant<|im_middle|>")) {
213
0
        return LLM_CHAT_TEMPLATE_KIMI_K2;
214
0
    } else if (tmpl_contains("<seed:bos>")) {
215
0
        return LLM_CHAT_TEMPLATE_SEED_OSS;
216
0
    } else if (tmpl_contains("'Assistant: '  + message['content'] + '<|separator|>")) {
217
0
        return LLM_CHAT_TEMPLATE_GROK_2;
218
0
    } else if (tmpl_contains(LU8("[unused9]系统:[unused10]"))) {
219
0
        return LLM_CHAT_TEMPLATE_PANGU_EMBED;
220
0
    } else if (tmpl_contains("<|begin|>") && tmpl_contains("<|end|>") && tmpl_contains("<|content|>")) {
221
0
        return LLM_CHAT_TEMPLATE_SOLAR_OPEN;
222
0
    }
223
0
    return LLM_CHAT_TEMPLATE_UNKNOWN;
224
0
}
225
226
// Simple version of "llama_apply_chat_template" that only works with strings
227
// This function uses heuristic checks to determine commonly used template. It is not a jinja parser.
228
int32_t llm_chat_apply_template(
229
    llm_chat_template tmpl,
230
    const std::vector<const llama_chat_message *> & chat,
231
0
    std::string & dest, bool add_ass) {
232
    // Taken from the research: https://github.com/ggerganov/llama.cpp/issues/5527
233
0
    std::stringstream ss;
234
0
    if (tmpl == LLM_CHAT_TEMPLATE_CHATML) {
235
        // chatml template
236
0
        for (auto message : chat) {
237
0
            ss << "<|im_start|>" << message->role << "\n" << message->content << "<|im_end|>\n";
238
0
        }
239
0
        if (add_ass) {
240
0
            ss << "<|im_start|>assistant\n";
241
0
        }
242
0
    } else if (tmpl == LLM_CHAT_TEMPLATE_MISTRAL_V7 || tmpl == LLM_CHAT_TEMPLATE_MISTRAL_V7_TEKKEN) {
243
        // Official mistral 'v7' template
244
        // See: https://huggingface.co/mistralai/Mistral-Large-Instruct-2411#basic-instruct-template-v7
245
        //      https://huggingface.co/mistralai/Mistral-Small-3.1-24B-Instruct-2503#basic-instruct-template-v7-tekken
246
0
        const char * trailing_space = tmpl == LLM_CHAT_TEMPLATE_MISTRAL_V7 ? " " : "";
247
0
        for (auto message : chat) {
248
0
            std::string role(message->role);
249
0
            std::string content(message->content);
250
0
            if (role == "system") {
251
0
                ss << "[SYSTEM_PROMPT]" << trailing_space << content << "[/SYSTEM_PROMPT]";
252
0
            } else if (role == "user") {
253
0
                ss << "[INST]" << trailing_space << content << "[/INST]";
254
0
            } else {
255
0
                ss << trailing_space << content << "</s>";
256
0
            }
257
0
        }
258
0
    } else if (tmpl == LLM_CHAT_TEMPLATE_MISTRAL_V1
259
0
            || tmpl == LLM_CHAT_TEMPLATE_MISTRAL_V3
260
0
            || tmpl == LLM_CHAT_TEMPLATE_MISTRAL_V3_TEKKEN) {
261
        // See: https://github.com/mistralai/cookbook/blob/main/concept-deep-dive/tokenization/chat_templates.md
262
        // See: https://github.com/mistralai/cookbook/blob/main/concept-deep-dive/tokenization/templates.md
263
0
        std::string leading_space = tmpl == LLM_CHAT_TEMPLATE_MISTRAL_V1 ? " " : "";
264
0
        std::string trailing_space = tmpl == LLM_CHAT_TEMPLATE_MISTRAL_V3_TEKKEN ? "" : " ";
265
0
        bool trim_assistant_message = tmpl == LLM_CHAT_TEMPLATE_MISTRAL_V3;
266
0
        bool is_inside_turn = false;
267
0
        for (auto message : chat) {
268
0
            if (!is_inside_turn) {
269
0
                ss << leading_space << "[INST]" << trailing_space;
270
0
                is_inside_turn = true;
271
0
            }
272
0
            std::string role(message->role);
273
0
            std::string content(message->content);
274
0
            if (role == "system") {
275
0
                ss << content << "\n\n";
276
0
            } else if (role == "user") {
277
0
                ss << content << leading_space << "[/INST]";
278
0
            } else {
279
0
                ss << trailing_space << (trim_assistant_message ? trim(content) : content) << "</s>";
280
0
                is_inside_turn = false;
281
0
            }
282
0
        }
283
0
    } else if (
284
0
            tmpl == LLM_CHAT_TEMPLATE_LLAMA_2
285
0
            || tmpl == LLM_CHAT_TEMPLATE_LLAMA_2_SYS
286
0
            || tmpl == LLM_CHAT_TEMPLATE_LLAMA_2_SYS_BOS
287
0
            || tmpl == LLM_CHAT_TEMPLATE_LLAMA_2_SYS_STRIP) {
288
        // llama2 template and its variants
289
        // [variant] support system message
290
        // See: https://huggingface.co/blog/llama2#how-to-prompt-llama-2
291
0
        bool support_system_message = tmpl != LLM_CHAT_TEMPLATE_LLAMA_2;
292
        // [variant] add BOS inside history
293
0
        bool add_bos_inside_history = tmpl == LLM_CHAT_TEMPLATE_LLAMA_2_SYS_BOS;
294
        // [variant] trim spaces from the input message
295
0
        bool strip_message = tmpl == LLM_CHAT_TEMPLATE_LLAMA_2_SYS_STRIP;
296
        // construct the prompt
297
0
        bool is_inside_turn = true; // skip BOS at the beginning
298
0
        ss << "[INST] ";
299
0
        for (auto message : chat) {
300
0
            std::string content = strip_message ? trim(message->content) : message->content;
301
0
            std::string role(message->role);
302
0
            if (!is_inside_turn) {
303
0
                is_inside_turn = true;
304
0
                ss << (add_bos_inside_history ? "<s>[INST] " : "[INST] ");
305
0
            }
306
0
            if (role == "system") {
307
0
                if (support_system_message) {
308
0
                    ss << "<<SYS>>\n" << content << "\n<</SYS>>\n\n";
309
0
                } else {
310
                    // if the model does not support system message, we still include it in the first message, but without <<SYS>>
311
0
                    ss << content << "\n";
312
0
                }
313
0
            } else if (role == "user") {
314
0
                ss << content << " [/INST]";
315
0
            } else {
316
0
                ss << content << "</s>";
317
0
                is_inside_turn = false;
318
0
            }
319
0
        }
320
0
    } else if (tmpl == LLM_CHAT_TEMPLATE_PHI_3) {
321
        // Phi 3
322
0
        for (auto message : chat) {
323
0
            std::string role(message->role);
324
0
            ss << "<|" << role << "|>\n" << message->content << "<|end|>\n";
325
0
        }
326
0
        if (add_ass) {
327
0
            ss << "<|assistant|>\n";
328
0
        }
329
0
    } else if (tmpl == LLM_CHAT_TEMPLATE_PHI_4) {
330
        // chatml template
331
0
        for (auto message : chat) {
332
0
            ss << "<|im_start|>" << message->role << "<|im_sep|>" << message->content << "<|im_end|>";
333
0
        }
334
0
        if (add_ass) {
335
0
            ss << "<|im_start|>assistant<|im_sep|>";
336
0
        }
337
0
    } else if (tmpl == LLM_CHAT_TEMPLATE_FALCON_3) {
338
        // Falcon 3
339
0
        for (auto message : chat) {
340
0
            std::string role(message->role);
341
0
            ss << "<|" << role << "|>\n" << message->content << "\n";
342
0
        }
343
0
        if (add_ass) {
344
0
            ss << "<|assistant|>\n";
345
0
        }
346
0
    } else if (tmpl == LLM_CHAT_TEMPLATE_ZEPHYR) {
347
        // zephyr template
348
0
        for (auto message : chat) {
349
0
            ss << "<|" << message->role << "|>" << "\n" << message->content << "<|endoftext|>\n";
350
0
        }
351
0
        if (add_ass) {
352
0
            ss << "<|assistant|>\n";
353
0
        }
354
0
    } else if (tmpl == LLM_CHAT_TEMPLATE_MONARCH) {
355
        // mlabonne/AlphaMonarch-7B template (the <s> is included inside history)
356
0
        for (auto message : chat) {
357
0
            std::string bos = (message == chat.front()) ? "" : "<s>"; // skip BOS for first message
358
0
            ss << bos << message->role << "\n" << message->content << "</s>\n";
359
0
        }
360
0
        if (add_ass) {
361
0
            ss << "<s>assistant\n";
362
0
        }
363
0
    } else if (tmpl == LLM_CHAT_TEMPLATE_GEMMA) {
364
        // google/gemma-7b-it
365
0
        std::string system_prompt = "";
366
0
        for (auto message : chat) {
367
0
            std::string role(message->role);
368
0
            if (role == "system") {
369
                // there is no system message for gemma, but we will merge it with user prompt, so nothing is broken
370
0
                system_prompt += trim(message->content);
371
0
                continue;
372
0
            }
373
            // in gemma, "assistant" is "model"
374
0
            role = role == "assistant" ? "model" : message->role;
375
0
            ss << "<start_of_turn>" << role << "\n";
376
0
            if (!system_prompt.empty() && role != "model") {
377
0
                ss << system_prompt << "\n\n";
378
0
                system_prompt = "";
379
0
            }
380
0
            ss << trim(message->content) << "<end_of_turn>\n";
381
0
        }
382
0
        if (add_ass) {
383
0
            ss << "<start_of_turn>model\n";
384
0
        }
385
0
    } else if (tmpl == LLM_CHAT_TEMPLATE_ORION) {
386
        // OrionStarAI/Orion-14B-Chat
387
0
        std::string system_prompt = "";
388
0
        for (auto message : chat) {
389
0
            std::string role(message->role);
390
0
            if (role == "system") {
391
                // there is no system message support, we will merge it with user prompt
392
0
                system_prompt += message->content;
393
0
                continue;
394
0
            } else if (role == "user") {
395
0
                ss << "Human: ";
396
0
                if (!system_prompt.empty()) {
397
0
                    ss << system_prompt << "\n\n";
398
0
                    system_prompt = "";
399
0
                }
400
0
                ss << message->content << "\n\nAssistant: </s>";
401
0
            } else {
402
0
                ss << message->content << "</s>";
403
0
            }
404
0
        }
405
0
    } else if (tmpl == LLM_CHAT_TEMPLATE_OPENCHAT) {
406
        // openchat/openchat-3.5-0106,
407
0
        for (auto message : chat) {
408
0
            std::string role(message->role);
409
0
            if (role == "system") {
410
0
                ss << message->content << "<|end_of_turn|>";
411
0
            } else {
412
0
                role[0] = toupper(role[0]);
413
0
                ss << "GPT4 Correct " << role << ": " << message->content << "<|end_of_turn|>";
414
0
            }
415
0
        }
416
0
        if (add_ass) {
417
0
            ss << "GPT4 Correct Assistant:";
418
0
        }
419
0
    } else if (tmpl == LLM_CHAT_TEMPLATE_VICUNA || tmpl == LLM_CHAT_TEMPLATE_VICUNA_ORCA) {
420
        // eachadea/vicuna-13b-1.1 (and Orca variant)
421
0
        for (auto message : chat) {
422
0
            std::string role(message->role);
423
0
            if (role == "system") {
424
                // Orca-Vicuna variant uses a system prefix
425
0
                if (tmpl == LLM_CHAT_TEMPLATE_VICUNA_ORCA) {
426
0
                    ss << "SYSTEM: " << message->content << "\n";
427
0
                } else {
428
0
                    ss << message->content << "\n\n";
429
0
                }
430
0
            } else if (role == "user") {
431
0
                ss << "USER: " << message->content << "\n";
432
0
            } else if (role == "assistant") {
433
0
                ss << "ASSISTANT: " << message->content << "</s>\n";
434
0
            }
435
0
        }
436
0
        if (add_ass) {
437
0
            ss << "ASSISTANT:";
438
0
        }
439
0
    } else if (tmpl == LLM_CHAT_TEMPLATE_DEEPSEEK) {
440
        // deepseek-ai/deepseek-coder-33b-instruct
441
0
        for (auto message : chat) {
442
0
            std::string role(message->role);
443
0
            if (role == "system") {
444
0
                ss << message->content;
445
0
            } else if (role == "user") {
446
0
                ss << "### Instruction:\n" << message->content << "\n";
447
0
            } else if (role == "assistant") {
448
0
                ss << "### Response:\n" << message->content << "\n<|EOT|>\n";
449
0
            }
450
0
        }
451
0
        if (add_ass) {
452
0
            ss << "### Response:\n";
453
0
        }
454
0
    } else if (tmpl == LLM_CHAT_TEMPLATE_COMMAND_R) {
455
        // CohereForAI/c4ai-command-r-plus
456
0
        for (auto message : chat) {
457
0
            std::string role(message->role);
458
0
            if (role == "system") {
459
0
                ss << "<|START_OF_TURN_TOKEN|><|SYSTEM_TOKEN|>" << trim(message->content) << "<|END_OF_TURN_TOKEN|>";
460
0
            } else if (role == "user") {
461
0
                ss << "<|START_OF_TURN_TOKEN|><|USER_TOKEN|>" << trim(message->content) << "<|END_OF_TURN_TOKEN|>";
462
0
            } else if (role == "assistant") {
463
0
                ss << "<|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>" << trim(message->content) << "<|END_OF_TURN_TOKEN|>";
464
0
            }
465
0
        }
466
0
        if (add_ass) {
467
0
            ss << "<|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>";
468
0
        }
469
0
    } else if (tmpl == LLM_CHAT_TEMPLATE_LLAMA_3) {
470
        // Llama 3
471
0
        for (auto message : chat) {
472
0
            std::string role(message->role);
473
0
            ss << "<|start_header_id|>" << role << "<|end_header_id|>\n\n" << trim(message->content) << "<|eot_id|>";
474
0
        }
475
0
        if (add_ass) {
476
0
            ss << "<|start_header_id|>assistant<|end_header_id|>\n\n";
477
0
        }
478
0
    } else if (tmpl == LLM_CHAT_TEMPLATE_CHATGLM_3) {
479
        // chatglm3-6b
480
0
        ss << "[gMASK]" << "sop";
481
0
        for (auto message : chat) {
482
0
            std::string role(message->role);
483
0
            ss << "<|" << role << "|>" << "\n " << message->content;
484
0
        }
485
0
        if (add_ass) {
486
0
            ss << "<|assistant|>";
487
0
        }
488
0
    } else if (tmpl == LLM_CHAT_TEMPLATE_CHATGLM_4) {
489
0
        ss << "[gMASK]" << "<sop>";
490
0
        for (auto message : chat) {
491
0
            std::string role(message->role);
492
0
            ss << "<|" << role << "|>" << "\n" << message->content;
493
0
        }
494
0
        if (add_ass) {
495
0
            ss << "<|assistant|>\n";
496
0
        }
497
0
    } else if (tmpl == LLM_CHAT_TEMPLATE_GLMEDGE) {
498
0
        for (auto message : chat) {
499
0
            std::string role(message->role);
500
0
            ss << "<|" << role << "|>" << "\n" << message->content;
501
0
        }
502
0
        if (add_ass) {
503
0
            ss << "<|assistant|>";
504
0
        }
505
0
    } else if (tmpl == LLM_CHAT_TEMPLATE_MINICPM) {
506
        // MiniCPM-3B-OpenHermes-2.5-v2-GGUF
507
0
        for (auto message : chat) {
508
0
            std::string role(message->role);
509
0
            if (role == "user") {
510
0
                ss << LU8("<用户>");
511
0
                ss << trim(message->content);
512
0
                ss << "<AI>";
513
0
            } else {
514
0
                ss << trim(message->content);
515
0
            }
516
0
        }
517
0
    } else if (tmpl == LLM_CHAT_TEMPLATE_DEEPSEEK_2) {
518
        // DeepSeek-V2
519
0
        for (auto message : chat) {
520
0
            std::string role(message->role);
521
0
            if (role == "system") {
522
0
                ss << message->content << "\n\n";
523
0
            } else if (role == "user") {
524
0
                ss << "User: " << message->content << "\n\n";
525
0
            } else if (role == "assistant") {
526
0
                ss << "Assistant: " << message->content << LU8("<|end▁of▁sentence|>");
527
0
            }
528
0
        }
529
0
        if (add_ass) {
530
0
            ss << "Assistant:";
531
0
        }
532
0
    } else if (tmpl == LLM_CHAT_TEMPLATE_DEEPSEEK_3) {
533
        // DeepSeek-V3
534
0
        for (auto message : chat) {
535
0
            std::string role(message->role);
536
0
            if (role == "system") {
537
0
                ss << message->content << "\n\n";
538
0
            } else if (role == "user") {
539
0
                ss << LU8("<|User|>") << message->content;
540
0
            } else if (role == "assistant") {
541
0
                ss << LU8("<|Assistant|>") << message->content << LU8("<|end▁of▁sentence|>");
542
0
            }
543
0
        }
544
0
        if (add_ass) {
545
0
            ss << LU8("<|Assistant|>");
546
0
        }
547
0
    } else if (tmpl == LLM_CHAT_TEMPLATE_EXAONE_3) {
548
        // ref: https://huggingface.co/LGAI-EXAONE/EXAONE-3.0-7.8B-Instruct/discussions/8#66bae61b1893d14ee8ed85bb
549
        // EXAONE-3.0-7.8B-Instruct
550
0
        for (auto message : chat) {
551
0
            std::string role(message->role);
552
0
            if (role == "system") {
553
0
                ss << "[|system|]" << trim(message->content) << "[|endofturn|]\n";
554
0
            } else if (role == "user") {
555
0
                ss << "[|user|]" << trim(message->content) << "\n";
556
0
            } else if (role == "assistant") {
557
0
                ss << "[|assistant|]" << trim(message->content) << "[|endofturn|]\n";
558
0
            }
559
0
        }
560
0
        if (add_ass) {
561
0
            ss << "[|assistant|]";
562
0
        }
563
0
    } else if (tmpl == LLM_CHAT_TEMPLATE_EXAONE_4) {
564
0
        for (auto message : chat) {
565
0
            std::string role(message->role);
566
0
            if (role == "system") {
567
0
                ss << "[|system|]" << trim(message->content) << "[|endofturn|]\n";
568
0
            } else if (role == "user") {
569
0
                ss << "[|user|]" << trim(message->content) << "\n";
570
0
            } else if (role == "assistant") {
571
0
                ss << "[|assistant|]" << trim(message->content) << "[|endofturn|]\n";
572
0
            } else if (role == "tool") {
573
0
                ss << "[|tool|]" << trim(message->content) << "[|endofturn|]\n";
574
0
            }
575
0
        }
576
0
        if (add_ass) {
577
0
            ss << "[|assistant|]";
578
0
        }
579
0
    } else if (tmpl == LLM_CHAT_TEMPLATE_RWKV_WORLD) {
580
        // this template requires the model to have "\n\n" as EOT token
581
0
        for (size_t i = 0; i < chat.size(); i++) {
582
0
            std::string role(chat[i]->role);
583
0
            if (role == "system") {
584
0
                ss << "System: " << trim(chat[i]->content) << "\n\n";
585
0
            } else if (role == "user") {
586
0
                ss << "User: " << trim(chat[i]->content) << "\n\n";
587
0
                if (i == chat.size() - 1) {
588
0
                    ss << "Assistant:";
589
0
                }
590
0
            } else if (role == "assistant") {
591
0
                ss << "Assistant: " << trim(chat[i]->content) << "\n\n";
592
0
            }
593
0
        }
594
0
    } else if (tmpl == LLM_CHAT_TEMPLATE_GRANITE) {
595
        // IBM Granite template
596
0
        for (const auto & message : chat) {
597
0
            std::string role(message->role);
598
0
            ss << "<|start_of_role|>" << role << "<|end_of_role|>";
599
0
            if (role == "assistant_tool_call") {
600
0
                ss << "<|tool_call|>";
601
0
            }
602
0
            ss << message->content << "<|end_of_text|>\n";
603
0
        }
604
0
        if (add_ass) {
605
0
            ss << "<|start_of_role|>assistant<|end_of_role|>";
606
0
        }
607
0
    } else if (tmpl == LLM_CHAT_TEMPLATE_GIGACHAT) {
608
        // GigaChat template
609
0
        bool has_system = !chat.empty() && std::string(chat[0]->role) == "system";
610
611
        // Handle system message if present
612
0
        if (has_system) {
613
0
            ss << "<s>" << chat[0]->content << "<|message_sep|>";
614
0
        } else {
615
0
            ss << "<s>";
616
0
        }
617
618
        // Process remaining messages
619
0
        for (size_t i = has_system ? 1 : 0; i < chat.size(); i++) {
620
0
            std::string role(chat[i]->role);
621
0
            if (role == "user") {
622
0
                ss << "user<|role_sep|>" << chat[i]->content << "<|message_sep|>"
623
0
                << "available functions<|role_sep|>[]<|message_sep|>";
624
0
            } else if (role == "assistant") {
625
0
                ss << "assistant<|role_sep|>" << chat[i]->content << "<|message_sep|>";
626
0
            }
627
0
        }
628
629
        // Add generation prompt if needed
630
0
        if (add_ass) {
631
0
            ss << "assistant<|role_sep|>";
632
0
        }
633
0
    }  else if (tmpl == LLM_CHAT_TEMPLATE_MEGREZ) {
634
        // Megrez template
635
0
        for (auto message : chat) {
636
0
            std::string role(message->role);
637
0
            ss << "<|role_start|>" << role << "<|role_end|>" << message->content << "<|turn_end|>";
638
0
        }
639
640
0
        if (add_ass) {
641
0
            ss << "<|role_start|>assistant<|role_end|>";
642
0
        }
643
0
    } else if (tmpl == LLM_CHAT_TEMPLATE_YANDEX) {
644
        // Yandex template ("\n\n" is defined as EOT token)
645
646
0
        for (size_t i = 0; i < chat.size(); i++) {
647
0
            std::string role(chat[i]->role);
648
0
            if (role == "user") {
649
0
                ss << " Пользователь: " << chat[i]->content << "\n\n";
650
0
            } else if (role == "assistant") {
651
0
                ss << " Ассистент: " << chat[i]->content << "\n\n";
652
0
            }
653
0
        }
654
655
        // Add generation prompt if needed
656
0
        if (add_ass) {
657
0
            ss << " Ассистент:[SEP]";
658
0
        }
659
0
    } else if (tmpl == LLM_CHAT_TEMPLATE_BAILING || tmpl == LLM_CHAT_TEMPLATE_BAILING_THINK) {
660
        // Bailing (Ling/Ring) template
661
0
        for (auto message : chat) {
662
0
            std::string role(message->role);
663
664
0
            if (role == "user") {
665
0
                role = "HUMAN";
666
0
            } else {
667
0
                std::transform(role.begin(), role.end(), role.begin(), ::toupper);
668
0
            }
669
670
0
            ss << "<role>" << role << "</role>" << message->content;
671
0
        }
672
673
0
        if (add_ass) {
674
0
            ss << "<role>ASSISTANT</role>";
675
676
0
            if (tmpl == LLM_CHAT_TEMPLATE_BAILING_THINK) {
677
0
                ss << "<think>";
678
0
            }
679
0
        }
680
0
    } else if (tmpl == LLM_CHAT_TEMPLATE_BAILING2) {
681
        // Bailing2 (Ling 2.0) template
682
0
        bool has_system = !chat.empty() && std::string(chat[0]->role) == "system";
683
684
0
        if (!has_system) {
685
0
            ss << "<role>SYSTEM</role>detailed thinking off<|role_end|>";
686
0
        }
687
688
0
        for (auto message : chat) {
689
0
            std::string role(message->role);
690
691
0
            if (role == "user") {
692
0
                role = "HUMAN";
693
0
            } else {
694
0
                std::transform(role.begin(), role.end(), role.begin(), ::toupper);
695
0
            }
696
697
0
            ss << "<role>" << role << "</role>" << message->content << "<|role_end|>";
698
0
        }
699
700
0
        if (add_ass) {
701
0
            ss << "<role>ASSISTANT</role>";
702
0
        }
703
0
    } else if (tmpl == LLM_CHAT_TEMPLATE_LLAMA4) {
704
        // Llama 4
705
0
        for (auto message : chat) {
706
0
            std::string role(message->role);
707
0
            ss << "<|header_start|>" << role << "<|header_end|>\n\n" << trim(message->content) << "<|eot|>";
708
0
        }
709
0
        if (add_ass) {
710
0
            ss << "<|header_start|>assistant<|header_end|>\n\n";
711
0
        }
712
0
    } else if (tmpl == LLM_CHAT_TEMPLATE_SMOLVLM) {
713
        // SmolVLM
714
0
        ss << "<|im_start|>"; // uses <|im_start|> as BOS, but the actual content is NOT chatml
715
0
        for (auto message : chat) {
716
0
            std::string role(message->role);
717
0
            if (role == "system") {
718
0
                ss << message->content << "\n\n";
719
0
            } else if (role == "user") {
720
0
                ss << "User: " << message->content << "<end_of_utterance>\n";
721
0
            } else {
722
0
                ss << "Assistant: " << message->content << "<end_of_utterance>\n";
723
0
            }
724
0
        }
725
0
        if (add_ass) {
726
0
            ss << "Assistant:";
727
0
        }
728
0
    } else if (tmpl == LLM_CHAT_TEMPLATE_DOTS1) {
729
        // dots.llm1.inst (DOTS1)
730
0
        for (auto message : chat) {
731
0
            std::string role(message->role);
732
0
            if (role == "system") {
733
0
                ss << "<|system|>" << message->content << "<|endofsystem|>";
734
0
            } else if (role == "user") {
735
0
                ss << "<|userprompt|>" << message->content << "<|endofuserprompt|>";
736
0
            } else {
737
0
                ss << "<|response|>" << message->content << "<|endofresponse|>";
738
0
            }
739
0
        }
740
0
        if (add_ass) {
741
0
            ss << "<|response|>";
742
0
        }
743
0
    } else if (tmpl == LLM_CHAT_TEMPLATE_HUNYUAN_MOE) {
744
        // tencent/Hunyuan-A13B-Instruct
745
0
        for (auto message : chat) {
746
0
            std::string role(message->role);
747
0
            if (role == "system") {
748
0
                ss << "<|startoftext|>" << message->content << "<|extra_4|>";
749
0
            } else if (role == "assistant") {
750
0
                ss << message->content << "<|eos|>";
751
0
            } else {
752
0
                ss << "<|startoftext|>" << message->content << "<|extra_0|>";
753
0
            }
754
0
        }
755
0
    } else if (tmpl == LLM_CHAT_TEMPLATE_OPENAI_MOE) {
756
        // OpenAI MoE (based on Harmony chat template)
757
0
        for (auto message : chat) {
758
0
            std::string role(message->role);
759
0
            ss << "<|start|>" << role << "<|message|>" << message->content;
760
0
            ss << (role == "assistant" ? "<|return|>" : "<|end|>");
761
0
        }
762
0
        if (add_ass) {
763
0
            ss << "<|start|>assistant";
764
0
        }
765
0
    } else if (tmpl == LLM_CHAT_TEMPLATE_HUNYUAN_DENSE) {
766
        // tencent/Hunyuan-4B-Instruct
767
0
        for (size_t i = 0; i < chat.size(); i++) {
768
0
            std::string role(chat[i]->role);
769
0
            if (i == 0) {
770
0
                if (role == "system") {
771
0
                    ss << chat[i]->content << "<|hy_place▁holder▁no▁3|>";
772
0
                }
773
0
            }
774
775
0
            if (role == "assistant") {
776
0
                ss << "<|hy_Assistant|>" << chat[i]->content << "<|hy_place▁holder▁no▁2|>";
777
0
            } else if (role == "user") {
778
0
                ss << "<|hy_User|>" << chat[i]->content << "<|hy_Assistant|>";
779
0
            }
780
0
        }
781
0
    } else if (tmpl == LLM_CHAT_TEMPLATE_KIMI_K2) {
782
        // moonshotai/Kimi-K2-Instruct
783
0
        for (auto message : chat) {
784
0
            std::string role(message->role);
785
0
            if (role == "system") {
786
0
                ss << "<|im_system|>system<|im_middle|>";
787
0
            } else if (role == "user") {
788
0
                ss << "<|im_user|>user<|im_middle|>";
789
0
            } else if (role == "assistant") {
790
0
                ss << "<|im_assistant|>assistant<|im_middle|>";
791
0
            } else if (role == "tool") {
792
0
                ss << "<|im_system|>tool<|im_middle|>";
793
0
            }
794
795
0
            ss << message->content << "<|im_end|>";
796
0
        }
797
0
        if (add_ass) {
798
0
            ss << "<|im_assistant|>assistant<|im_middle|>";
799
0
        }
800
0
    } else if (tmpl == LLM_CHAT_TEMPLATE_SEED_OSS) {
801
0
        for (auto message: chat) {
802
0
            std::string role(message->role);
803
0
            ss << "<seed:bos>" << role << "\n" << (role == "assistant" ? trim(message->content) : message->content) << "<seed:eos>";
804
0
        }
805
0
        if (add_ass) {
806
0
            ss << "<seed:bos>assistant\n";
807
0
        }
808
0
    } else if (tmpl == LLM_CHAT_TEMPLATE_GROK_2) {
809
0
        for (auto message : chat) {
810
0
            std::string role(message->role);
811
0
            if (role == "system") {
812
0
                ss << "System: " << trim(message->content) << "<|separator|>\n\n";
813
0
            } else if (role == "user") {
814
0
                ss << "Human: " << trim(message->content) << "<|separator|>\n\n";
815
0
            } else if (role == "assistant") {
816
0
                ss << "Assistant: " << message->content << "<|separator|>\n\n";
817
0
            }
818
0
        }
819
0
        if (add_ass) {
820
0
            ss << "Assistant:";
821
0
        }
822
0
    }else if (tmpl == LLM_CHAT_TEMPLATE_PANGU_EMBED) {
823
        // [unused9]系统:xxx[unused10]
824
        // [unused9]用户:xxx[unused10]
825
        // [unused9]助手:xxx[unused10]
826
        // ...
827
0
        for (size_t i = 0; i < chat.size(); ++i) {
828
0
            const auto & msg = chat[i];
829
0
            const std::string & role = msg->role;
830
0
            const std::string & content = msg->content;
831
832
0
            if (i == 0 && role != "system") {
833
0
                ss << "[unused9]系统:[unused10]";
834
0
            }
835
836
0
            if (role == "system") {
837
0
                ss << "[unused9]系统:" << content << "[unused10]";
838
0
            } else if (role == "user") {
839
0
                ss << "[unused9]用户:" << content << "[unused10]";
840
0
            } else if (role == "assistant") {
841
0
                ss << "[unused9]助手:" << content << "[unused10]";
842
0
            } else if (role == "tool") {
843
0
                ss << "[unused9]工具:" << content << "[unused10]";
844
0
            } else if (role == "function") {
845
0
                ss << "[unused9]方法:" << content << "[unused10]";
846
0
            }
847
0
        }
848
0
        if (add_ass) {
849
0
            ss << "[unused9]助手:";
850
0
        }
851
0
    } else if (tmpl == LLM_CHAT_TEMPLATE_SOLAR_OPEN) {
852
0
        for (auto message : chat) {
853
0
            std::string role(message->role);
854
0
            ss << "<|begin|>" << role << "<|content|>" << message->content << "<|end|>";
855
0
        }
856
0
        if (add_ass) {
857
0
            ss << "<|begin|>assistant";
858
0
        }
859
0
    } else {
860
        // template not supported
861
0
        return -1;
862
0
    }
863
0
    dest = ss.str();
864
0
    return dest.size();
865
0
}
866
867
// public interface
868
869
0
int32_t llama_chat_builtin_templates(const char ** output, size_t len) {
870
0
    auto it = LLM_CHAT_TEMPLATES.begin();
871
0
    for (size_t i = 0; i < std::min(len, LLM_CHAT_TEMPLATES.size()); i++) {
872
0
        output[i] = it->first.c_str();
873
0
        std::advance(it, 1);
874
0
    }
875
0
    return (int32_t) LLM_CHAT_TEMPLATES.size();
876
0
}