Coverage Report

Created: 2025-11-28 06:56

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/llama.cpp/src/llama-chat.cpp
Line
Count
Source
1
#include "llama-chat.h"
2
3
#include "llama.h"
4
5
#include <map>
6
#include <sstream>
7
#include <algorithm>
8
9
#if __cplusplus >= 202000L
10
    #define LU8(x) (const char*)(u8##x)
11
#else
12
2.02k
    #define LU8(x) u8##x
13
#endif
14
15
// trim whitespace from the beginning and end of a string
16
2.32k
static std::string trim(const std::string & str) {
17
2.32k
    size_t start = 0;
18
2.32k
    size_t end = str.size();
19
325k
    while (start < end && isspace(static_cast<unsigned char>(str[start]))) {
20
323k
        start += 1;
21
323k
    }
22
1.05M
    while (end > start && isspace(static_cast<unsigned char>(str[end - 1]))) {
23
1.05M
        end -= 1;
24
1.05M
    }
25
2.32k
    return str.substr(start, end - start);
26
2.32k
}
27
28
static const std::map<std::string, llm_chat_template> LLM_CHAT_TEMPLATES = {
29
    { "chatml",            LLM_CHAT_TEMPLATE_CHATML            },
30
    { "llama2",            LLM_CHAT_TEMPLATE_LLAMA_2           },
31
    { "llama2-sys",        LLM_CHAT_TEMPLATE_LLAMA_2_SYS       },
32
    { "llama2-sys-bos",    LLM_CHAT_TEMPLATE_LLAMA_2_SYS_BOS   },
33
    { "llama2-sys-strip",  LLM_CHAT_TEMPLATE_LLAMA_2_SYS_STRIP },
34
    { "mistral-v1",        LLM_CHAT_TEMPLATE_MISTRAL_V1        },
35
    { "mistral-v3",        LLM_CHAT_TEMPLATE_MISTRAL_V3        },
36
    { "mistral-v3-tekken", LLM_CHAT_TEMPLATE_MISTRAL_V3_TEKKEN },
37
    { "mistral-v7",        LLM_CHAT_TEMPLATE_MISTRAL_V7        },
38
    { "mistral-v7-tekken", LLM_CHAT_TEMPLATE_MISTRAL_V7_TEKKEN },
39
    { "phi3",              LLM_CHAT_TEMPLATE_PHI_3             },
40
    { "phi4",              LLM_CHAT_TEMPLATE_PHI_4             },
41
    { "falcon3",           LLM_CHAT_TEMPLATE_FALCON_3          },
42
    { "zephyr",            LLM_CHAT_TEMPLATE_ZEPHYR            },
43
    { "monarch",           LLM_CHAT_TEMPLATE_MONARCH           },
44
    { "gemma",             LLM_CHAT_TEMPLATE_GEMMA             },
45
    { "orion",             LLM_CHAT_TEMPLATE_ORION             },
46
    { "openchat",          LLM_CHAT_TEMPLATE_OPENCHAT          },
47
    { "vicuna",            LLM_CHAT_TEMPLATE_VICUNA            },
48
    { "vicuna-orca",       LLM_CHAT_TEMPLATE_VICUNA_ORCA       },
49
    { "deepseek",          LLM_CHAT_TEMPLATE_DEEPSEEK          },
50
    { "deepseek2",         LLM_CHAT_TEMPLATE_DEEPSEEK_2        },
51
    { "deepseek3",         LLM_CHAT_TEMPLATE_DEEPSEEK_3        },
52
    { "command-r",         LLM_CHAT_TEMPLATE_COMMAND_R         },
53
    { "llama3",            LLM_CHAT_TEMPLATE_LLAMA_3           },
54
    { "chatglm3",          LLM_CHAT_TEMPLATE_CHATGLM_3         },
55
    { "chatglm4",          LLM_CHAT_TEMPLATE_CHATGLM_4         },
56
    { "glmedge",           LLM_CHAT_TEMPLATE_GLMEDGE           },
57
    { "minicpm",           LLM_CHAT_TEMPLATE_MINICPM           },
58
    { "exaone3",           LLM_CHAT_TEMPLATE_EXAONE_3          },
59
    { "exaone4",           LLM_CHAT_TEMPLATE_EXAONE_4          },
60
    { "rwkv-world",        LLM_CHAT_TEMPLATE_RWKV_WORLD        },
61
    { "granite",           LLM_CHAT_TEMPLATE_GRANITE           },
62
    { "gigachat",          LLM_CHAT_TEMPLATE_GIGACHAT          },
63
    { "megrez",            LLM_CHAT_TEMPLATE_MEGREZ            },
64
    { "yandex",            LLM_CHAT_TEMPLATE_YANDEX            },
65
    { "bailing",           LLM_CHAT_TEMPLATE_BAILING           },
66
    { "bailing-think",     LLM_CHAT_TEMPLATE_BAILING_THINK     },
67
    { "bailing2",          LLM_CHAT_TEMPLATE_BAILING2          },
68
    { "llama4",            LLM_CHAT_TEMPLATE_LLAMA4            },
69
    { "smolvlm",           LLM_CHAT_TEMPLATE_SMOLVLM           },
70
    { "hunyuan-moe",       LLM_CHAT_TEMPLATE_HUNYUAN_MOE       },
71
    { "gpt-oss",           LLM_CHAT_TEMPLATE_OPENAI_MOE        },
72
    { "hunyuan-dense",     LLM_CHAT_TEMPLATE_HUNYUAN_DENSE     },
73
    { "kimi-k2",           LLM_CHAT_TEMPLATE_KIMI_K2           },
74
    { "seed_oss",          LLM_CHAT_TEMPLATE_SEED_OSS          },
75
    { "grok-2",            LLM_CHAT_TEMPLATE_GROK_2            },
76
    { "pangu-embedded",    LLM_CHAT_TEMPLATE_PANGU_EMBED       },
77
};
78
79
1.51k
llm_chat_template llm_chat_template_from_str(const std::string & name) {
80
1.51k
    return LLM_CHAT_TEMPLATES.at(name);
81
1.51k
}
82
83
1.51k
llm_chat_template llm_chat_detect_template(const std::string & tmpl) {
84
1.51k
    try {
85
1.51k
        return llm_chat_template_from_str(tmpl);
86
1.51k
    } catch (const std::out_of_range &) {
87
        // ignore
88
982
    }
89
90
27.4k
    auto tmpl_contains = [&tmpl](const char * haystack) -> bool {
91
27.4k
        return tmpl.find(haystack) != std::string::npos;
92
27.4k
    };
93
982
    if (tmpl_contains("<|im_start|>")) {
94
4
        return tmpl_contains("<|im_sep|>")
95
4
            ? LLM_CHAT_TEMPLATE_PHI_4
96
4
            : tmpl_contains("<end_of_utterance>")
97
2
                ? LLM_CHAT_TEMPLATE_SMOLVLM // SmolVLM uses <|im_start|> as BOS, but it is NOT chatml
98
2
                : LLM_CHAT_TEMPLATE_CHATML;
99
978
    } else if (tmpl.find("mistral") == 0 || tmpl_contains("[INST]")) {
100
238
        if (tmpl_contains("[SYSTEM_PROMPT]")) {
101
6
            return LLM_CHAT_TEMPLATE_MISTRAL_V7;
102
232
        } else if (
103
            // catches official 'v1' template
104
232
            tmpl_contains("' [INST] ' + system_message")
105
            // catches official 'v3' and 'v3-tekken' templates
106
224
            || tmpl_contains("[AVAILABLE_TOOLS]")
107
232
        ) {
108
            // Official mistral 'v1', 'v3' and 'v3-tekken' templates
109
            // See: https://github.com/mistralai/cookbook/blob/main/concept-deep-dive/tokenization/chat_templates.md
110
            // See: https://github.com/mistralai/cookbook/blob/main/concept-deep-dive/tokenization/templates.md
111
43
            if (tmpl_contains(" [INST]")) {
112
18
                return LLM_CHAT_TEMPLATE_MISTRAL_V1;
113
25
            } else if (tmpl_contains("\"[INST]\"")) {
114
5
                return LLM_CHAT_TEMPLATE_MISTRAL_V3_TEKKEN;
115
5
            }
116
20
            return LLM_CHAT_TEMPLATE_MISTRAL_V3;
117
189
        } else {
118
            // llama2 template and its variants
119
            // [variant] support system message
120
            // See: https://huggingface.co/blog/llama2#how-to-prompt-llama-2
121
189
            bool support_system_message = tmpl_contains("<<SYS>>");
122
189
            bool add_bos_inside_history = tmpl_contains("bos_token + '[INST]");
123
189
            bool strip_message = tmpl_contains("content.strip()");
124
189
            if (strip_message) {
125
63
                return LLM_CHAT_TEMPLATE_LLAMA_2_SYS_STRIP;
126
126
            } else if (add_bos_inside_history) {
127
14
                return LLM_CHAT_TEMPLATE_LLAMA_2_SYS_BOS;
128
112
            } else if (support_system_message) {
129
4
                return LLM_CHAT_TEMPLATE_LLAMA_2_SYS;
130
108
            } else {
131
108
                return LLM_CHAT_TEMPLATE_LLAMA_2;
132
108
            }
133
189
        }
134
740
    } else if (tmpl_contains("<|assistant|>") && tmpl_contains("<|end|>")) {
135
9
        return LLM_CHAT_TEMPLATE_PHI_3;
136
731
    } else if (tmpl_contains("[gMASK]<sop>")) {
137
12
        return LLM_CHAT_TEMPLATE_CHATGLM_4;
138
719
    } else if (tmpl_contains("<|assistant|>") && tmpl_contains("<|user|>")) {
139
9
        return tmpl_contains("</s>") ? LLM_CHAT_TEMPLATE_FALCON_3 : LLM_CHAT_TEMPLATE_GLMEDGE;
140
710
    } else if (tmpl_contains("<|{{ item['role'] }}|>") && tmpl_contains("<|begin_of_image|>")) {
141
2
        return LLM_CHAT_TEMPLATE_GLMEDGE;
142
708
    } else if (tmpl_contains("<|user|>") && tmpl_contains("<|endoftext|>")) {
143
2
        return LLM_CHAT_TEMPLATE_ZEPHYR;
144
706
    } else if (tmpl_contains("bos_token + message['role']")) {
145
3
        return LLM_CHAT_TEMPLATE_MONARCH;
146
703
    } else if (tmpl_contains("<start_of_turn>")) {
147
2
        return LLM_CHAT_TEMPLATE_GEMMA;
148
701
    } else if (tmpl_contains("'\\n\\nAssistant: ' + eos_token")) {
149
        // OrionStarAI/Orion-14B-Chat
150
7
        return LLM_CHAT_TEMPLATE_ORION;
151
694
    } else if (tmpl_contains("GPT4 Correct ")) {
152
        // openchat/openchat-3.5-0106
153
3
        return LLM_CHAT_TEMPLATE_OPENCHAT;
154
691
    } else if (tmpl_contains("USER: ") && tmpl_contains("ASSISTANT: ")) {
155
        // eachadea/vicuna-13b-1.1 (and Orca variant)
156
6
        if (tmpl_contains("SYSTEM: ")) {
157
1
            return LLM_CHAT_TEMPLATE_VICUNA_ORCA;
158
1
        }
159
5
        return LLM_CHAT_TEMPLATE_VICUNA;
160
685
    } else if (tmpl_contains("### Instruction:") && tmpl_contains("<|EOT|>")) {
161
        // deepseek-ai/deepseek-coder-33b-instruct
162
1
        return LLM_CHAT_TEMPLATE_DEEPSEEK;
163
684
    } else if (tmpl_contains("<|START_OF_TURN_TOKEN|>") && tmpl_contains("<|USER_TOKEN|>")) {
164
        // CohereForAI/c4ai-command-r-plus
165
4
        return LLM_CHAT_TEMPLATE_COMMAND_R;
166
680
    } else if (tmpl_contains("<|start_header_id|>") && tmpl_contains("<|end_header_id|>")) {
167
2
        return LLM_CHAT_TEMPLATE_LLAMA_3;
168
678
    } else if (tmpl_contains("[gMASK]sop")) {
169
        // chatglm3-6b
170
4
        return LLM_CHAT_TEMPLATE_CHATGLM_3;
171
674
    } else if (tmpl_contains(LU8("<用户>"))) {
172
        // MiniCPM-3B-OpenHermes-2.5-v2-GGUF
173
12
        return LLM_CHAT_TEMPLATE_MINICPM;
174
662
    } else if (tmpl_contains("'Assistant: ' + message['content'] + eos_token")) {
175
5
        return LLM_CHAT_TEMPLATE_DEEPSEEK_2;
176
657
    } else if (tmpl_contains(LU8("<|Assistant|>")) && tmpl_contains(LU8("<|User|>")) && tmpl_contains(LU8("<|end▁of▁sentence|>"))) {
177
1
        return LLM_CHAT_TEMPLATE_DEEPSEEK_3;
178
656
    } else if (tmpl_contains("[|system|]") && tmpl_contains("[|assistant|]") && tmpl_contains("[|endofturn|]")) {
179
7
        if (tmpl_contains("[|tool|]")) {
180
1
            return LLM_CHAT_TEMPLATE_EXAONE_4;
181
1
        }
182
        // ref: https://huggingface.co/LGAI-EXAONE/EXAONE-3.0-7.8B-Instruct/discussions/8#66bae61b1893d14ee8ed85bb
183
        // EXAONE-3.0-7.8B-Instruct
184
6
        return LLM_CHAT_TEMPLATE_EXAONE_3;
185
649
    } else if (tmpl_contains("rwkv-world") || tmpl_contains("{{- 'User: ' + message['content']|trim + '\\n\\n' -}}")) {
186
5
        return LLM_CHAT_TEMPLATE_RWKV_WORLD;
187
644
    } else if (tmpl_contains("<|start_of_role|>")) {
188
6
        return LLM_CHAT_TEMPLATE_GRANITE;
189
638
    } else if (tmpl_contains("message['role'] + additional_special_tokens[0] + message['content'] + additional_special_tokens[1]")) {
190
0
        return LLM_CHAT_TEMPLATE_GIGACHAT;
191
638
    } else if (tmpl_contains("<|role_start|>")) {
192
3
        return LLM_CHAT_TEMPLATE_MEGREZ;
193
635
    } else if (tmpl_contains(" Ассистент:")) {
194
2
        return LLM_CHAT_TEMPLATE_YANDEX;
195
633
    } else if (tmpl_contains("<role>ASSISTANT</role>") && tmpl_contains("'HUMAN'")) {
196
1
        return LLM_CHAT_TEMPLATE_BAILING;
197
632
    } else if (tmpl_contains("<role>ASSISTANT</role>") && tmpl_contains("\"HUMAN\"") && tmpl_contains("<think>")) {
198
1
        return LLM_CHAT_TEMPLATE_BAILING_THINK;
199
631
    } else if (tmpl_contains("<role>ASSISTANT</role>") && tmpl_contains("<role>HUMAN</role>") && tmpl_contains("<|role_end|>")) {
200
1
        return LLM_CHAT_TEMPLATE_BAILING2;
201
630
    } else if (tmpl_contains("<|header_start|>") && tmpl_contains("<|header_end|>")) {
202
1
        return LLM_CHAT_TEMPLATE_LLAMA4;
203
629
    } else if (tmpl_contains("<|endofuserprompt|>")) {
204
2
        return LLM_CHAT_TEMPLATE_DOTS1;
205
627
    } else if (tmpl_contains("<|extra_0|>") && tmpl_contains("<|extra_4|>")) {
206
1
        return LLM_CHAT_TEMPLATE_HUNYUAN_MOE;
207
626
    } else if (tmpl_contains("<|start|>") && tmpl_contains("<|channel|>")) {
208
1
        return LLM_CHAT_TEMPLATE_OPENAI_MOE;
209
625
    } else if (tmpl_contains("<|hy_Assistant|>") && tmpl_contains("<|hy_place▁holder▁no▁3|>")) {
210
1
        return LLM_CHAT_TEMPLATE_HUNYUAN_DENSE;
211
624
    } else if (tmpl_contains("<|im_assistant|>assistant<|im_middle|>")) {
212
2
        return LLM_CHAT_TEMPLATE_KIMI_K2;
213
622
    } else if (tmpl_contains("<seed:bos>")) {
214
13
        return LLM_CHAT_TEMPLATE_SEED_OSS;
215
609
    } else if (tmpl_contains("'Assistant: '  + message['content'] + '<|separator|>")) {
216
3
        return LLM_CHAT_TEMPLATE_GROK_2;
217
606
    } else if (tmpl_contains(LU8("[unused9]系统:[unused10]"))) {
218
11
        return LLM_CHAT_TEMPLATE_PANGU_EMBED;
219
11
    }
220
595
    return LLM_CHAT_TEMPLATE_UNKNOWN;
221
982
}
222
223
// Simple version of "llama_apply_chat_template" that only works with strings
224
// This function uses heuristic checks to determine commonly used template. It is not a jinja parser.
225
int32_t llm_chat_apply_template(
226
    llm_chat_template tmpl,
227
    const std::vector<const llama_chat_message *> & chat,
228
917
    std::string & dest, bool add_ass) {
229
    // Taken from the research: https://github.com/ggerganov/llama.cpp/issues/5527
230
917
    std::stringstream ss;
231
917
    if (tmpl == LLM_CHAT_TEMPLATE_CHATML) {
232
        // chatml template
233
18
        for (auto message : chat) {
234
18
            ss << "<|im_start|>" << message->role << "\n" << message->content << "<|im_end|>\n";
235
18
        }
236
3
        if (add_ass) {
237
3
            ss << "<|im_start|>assistant\n";
238
3
        }
239
914
    } else if (tmpl == LLM_CHAT_TEMPLATE_MISTRAL_V7 || tmpl == LLM_CHAT_TEMPLATE_MISTRAL_V7_TEKKEN) {
240
        // Official mistral 'v7' template
241
        // See: https://huggingface.co/mistralai/Mistral-Large-Instruct-2411#basic-instruct-template-v7
242
        //      https://huggingface.co/mistralai/Mistral-Small-3.1-24B-Instruct-2503#basic-instruct-template-v7-tekken
243
79
        const char * trailing_space = tmpl == LLM_CHAT_TEMPLATE_MISTRAL_V7 ? " " : "";
244
474
        for (auto message : chat) {
245
474
            std::string role(message->role);
246
474
            std::string content(message->content);
247
474
            if (role == "system") {
248
79
                ss << "[SYSTEM_PROMPT]" << trailing_space << content << "[/SYSTEM_PROMPT]";
249
395
            } else if (role == "user") {
250
237
                ss << "[INST]" << trailing_space << content << "[/INST]";
251
237
            } else {
252
158
                ss << trailing_space << content << "</s>";
253
158
            }
254
474
        }
255
835
    } else if (tmpl == LLM_CHAT_TEMPLATE_MISTRAL_V1
256
717
            || tmpl == LLM_CHAT_TEMPLATE_MISTRAL_V3
257
678
            || tmpl == LLM_CHAT_TEMPLATE_MISTRAL_V3_TEKKEN) {
258
        // See: https://github.com/mistralai/cookbook/blob/main/concept-deep-dive/tokenization/chat_templates.md
259
        // See: https://github.com/mistralai/cookbook/blob/main/concept-deep-dive/tokenization/templates.md
260
163
        std::string leading_space = tmpl == LLM_CHAT_TEMPLATE_MISTRAL_V1 ? " " : "";
261
163
        std::string trailing_space = tmpl == LLM_CHAT_TEMPLATE_MISTRAL_V3_TEKKEN ? "" : " ";
262
163
        bool trim_assistant_message = tmpl == LLM_CHAT_TEMPLATE_MISTRAL_V3;
263
163
        bool is_inside_turn = false;
264
978
        for (auto message : chat) {
265
978
            if (!is_inside_turn) {
266
489
                ss << leading_space << "[INST]" << trailing_space;
267
489
                is_inside_turn = true;
268
489
            }
269
978
            std::string role(message->role);
270
978
            std::string content(message->content);
271
978
            if (role == "system") {
272
163
                ss << content << "\n\n";
273
815
            } else if (role == "user") {
274
489
                ss << content << leading_space << "[/INST]";
275
489
            } else {
276
326
                ss << trailing_space << (trim_assistant_message ? trim(content) : content) << "</s>";
277
326
                is_inside_turn = false;
278
326
            }
279
978
        }
280
672
    } else if (
281
672
            tmpl == LLM_CHAT_TEMPLATE_LLAMA_2
282
541
            || tmpl == LLM_CHAT_TEMPLATE_LLAMA_2_SYS
283
535
            || tmpl == LLM_CHAT_TEMPLATE_LLAMA_2_SYS_BOS
284
519
            || tmpl == LLM_CHAT_TEMPLATE_LLAMA_2_SYS_STRIP) {
285
        // llama2 template and its variants
286
        // [variant] support system message
287
        // See: https://huggingface.co/blog/llama2#how-to-prompt-llama-2
288
217
        bool support_system_message = tmpl != LLM_CHAT_TEMPLATE_LLAMA_2;
289
        // [variant] add BOS inside history
290
217
        bool add_bos_inside_history = tmpl == LLM_CHAT_TEMPLATE_LLAMA_2_SYS_BOS;
291
        // [variant] trim spaces from the input message
292
217
        bool strip_message = tmpl == LLM_CHAT_TEMPLATE_LLAMA_2_SYS_STRIP;
293
        // construct the prompt
294
217
        bool is_inside_turn = true; // skip BOS at the beginning
295
217
        ss << "[INST] ";
296
1.30k
        for (auto message : chat) {
297
1.30k
            std::string content = strip_message ? trim(message->content) : message->content;
298
1.30k
            std::string role(message->role);
299
1.30k
            if (!is_inside_turn) {
300
434
                is_inside_turn = true;
301
434
                ss << (add_bos_inside_history ? "<s>[INST] " : "[INST] ");
302
434
            }
303
1.30k
            if (role == "system") {
304
217
                if (support_system_message) {
305
86
                    ss << "<<SYS>>\n" << content << "\n<</SYS>>\n\n";
306
131
                } else {
307
                    // if the model does not support system message, we still include it in the first message, but without <<SYS>>
308
131
                    ss << content << "\n";
309
131
                }
310
1.08k
            } else if (role == "user") {
311
651
                ss << content << " [/INST]";
312
651
            } else {
313
434
                ss << content << "</s>";
314
434
                is_inside_turn = false;
315
434
            }
316
1.30k
        }
317
455
    } else if (tmpl == LLM_CHAT_TEMPLATE_PHI_3) {
318
        // Phi 3
319
60
        for (auto message : chat) {
320
60
            std::string role(message->role);
321
60
            ss << "<|" << role << "|>\n" << message->content << "<|end|>\n";
322
60
        }
323
10
        if (add_ass) {
324
10
            ss << "<|assistant|>\n";
325
10
        }
326
445
    } else if (tmpl == LLM_CHAT_TEMPLATE_PHI_4) {
327
        // chatml template
328
18
        for (auto message : chat) {
329
18
            ss << "<|im_start|>" << message->role << "<|im_sep|>" << message->content << "<|im_end|>";
330
18
        }
331
3
        if (add_ass) {
332
3
            ss << "<|im_start|>assistant<|im_sep|>";
333
3
        }
334
442
    } else if (tmpl == LLM_CHAT_TEMPLATE_FALCON_3) {
335
        // Falcon 3
336
18
        for (auto message : chat) {
337
18
            std::string role(message->role);
338
18
            ss << "<|" << role << "|>\n" << message->content << "\n";
339
18
        }
340
3
        if (add_ass) {
341
3
            ss << "<|assistant|>\n";
342
3
        }
343
439
    } else if (tmpl == LLM_CHAT_TEMPLATE_ZEPHYR) {
344
        // zephyr template
345
24
        for (auto message : chat) {
346
24
            ss << "<|" << message->role << "|>" << "\n" << message->content << "<|endoftext|>\n";
347
24
        }
348
4
        if (add_ass) {
349
4
            ss << "<|assistant|>\n";
350
4
        }
351
435
    } else if (tmpl == LLM_CHAT_TEMPLATE_MONARCH) {
352
        // mlabonne/AlphaMonarch-7B template (the <s> is included inside history)
353
24
        for (auto message : chat) {
354
24
            std::string bos = (message == chat.front()) ? "" : "<s>"; // skip BOS for first message
355
24
            ss << bos << message->role << "\n" << message->content << "</s>\n";
356
24
        }
357
4
        if (add_ass) {
358
4
            ss << "<s>assistant\n";
359
4
        }
360
431
    } else if (tmpl == LLM_CHAT_TEMPLATE_GEMMA) {
361
        // google/gemma-7b-it
362
190
        std::string system_prompt = "";
363
1.14k
        for (auto message : chat) {
364
1.14k
            std::string role(message->role);
365
1.14k
            if (role == "system") {
366
                // there is no system message for gemma, but we will merge it with user prompt, so nothing is broken
367
190
                system_prompt += trim(message->content);
368
190
                continue;
369
190
            }
370
            // in gemma, "assistant" is "model"
371
950
            role = role == "assistant" ? "model" : message->role;
372
950
            ss << "<start_of_turn>" << role << "\n";
373
950
            if (!system_prompt.empty() && role != "model") {
374
138
                ss << system_prompt << "\n\n";
375
138
                system_prompt = "";
376
138
            }
377
950
            ss << trim(message->content) << "<end_of_turn>\n";
378
950
        }
379
190
        if (add_ass) {
380
190
            ss << "<start_of_turn>model\n";
381
190
        }
382
241
    } else if (tmpl == LLM_CHAT_TEMPLATE_ORION) {
383
        // OrionStarAI/Orion-14B-Chat
384
13
        std::string system_prompt = "";
385
78
        for (auto message : chat) {
386
78
            std::string role(message->role);
387
78
            if (role == "system") {
388
                // there is no system message support, we will merge it with user prompt
389
13
                system_prompt += message->content;
390
13
                continue;
391
65
            } else if (role == "user") {
392
39
                ss << "Human: ";
393
39
                if (!system_prompt.empty()) {
394
5
                    ss << system_prompt << "\n\n";
395
5
                    system_prompt = "";
396
5
                }
397
39
                ss << message->content << "\n\nAssistant: </s>";
398
39
            } else {
399
26
                ss << message->content << "</s>";
400
26
            }
401
78
        }
402
228
    } else if (tmpl == LLM_CHAT_TEMPLATE_OPENCHAT) {
403
        // openchat/openchat-3.5-0106,
404
24
        for (auto message : chat) {
405
24
            std::string role(message->role);
406
24
            if (role == "system") {
407
4
                ss << message->content << "<|end_of_turn|>";
408
20
            } else {
409
20
                role[0] = toupper(role[0]);
410
20
                ss << "GPT4 Correct " << role << ": " << message->content << "<|end_of_turn|>";
411
20
            }
412
24
        }
413
4
        if (add_ass) {
414
4
            ss << "GPT4 Correct Assistant:";
415
4
        }
416
224
    } else if (tmpl == LLM_CHAT_TEMPLATE_VICUNA || tmpl == LLM_CHAT_TEMPLATE_VICUNA_ORCA) {
417
        // eachadea/vicuna-13b-1.1 (and Orca variant)
418
48
        for (auto message : chat) {
419
48
            std::string role(message->role);
420
48
            if (role == "system") {
421
                // Orca-Vicuna variant uses a system prefix
422
8
                if (tmpl == LLM_CHAT_TEMPLATE_VICUNA_ORCA) {
423
2
                    ss << "SYSTEM: " << message->content << "\n";
424
6
                } else {
425
6
                    ss << message->content << "\n\n";
426
6
                }
427
40
            } else if (role == "user") {
428
24
                ss << "USER: " << message->content << "\n";
429
24
            } else if (role == "assistant") {
430
16
                ss << "ASSISTANT: " << message->content << "</s>\n";
431
16
            }
432
48
        }
433
8
        if (add_ass) {
434
8
            ss << "ASSISTANT:";
435
8
        }
436
216
    } else if (tmpl == LLM_CHAT_TEMPLATE_DEEPSEEK) {
437
        // deepseek-ai/deepseek-coder-33b-instruct
438
12
        for (auto message : chat) {
439
12
            std::string role(message->role);
440
12
            if (role == "system") {
441
2
                ss << message->content;
442
10
            } else if (role == "user") {
443
6
                ss << "### Instruction:\n" << message->content << "\n";
444
6
            } else if (role == "assistant") {
445
4
                ss << "### Response:\n" << message->content << "\n<|EOT|>\n";
446
4
            }
447
12
        }
448
2
        if (add_ass) {
449
2
            ss << "### Response:\n";
450
2
        }
451
214
    } else if (tmpl == LLM_CHAT_TEMPLATE_COMMAND_R) {
452
        // CohereForAI/c4ai-command-r-plus
453
114
        for (auto message : chat) {
454
114
            std::string role(message->role);
455
114
            if (role == "system") {
456
19
                ss << "<|START_OF_TURN_TOKEN|><|SYSTEM_TOKEN|>" << trim(message->content) << "<|END_OF_TURN_TOKEN|>";
457
95
            } else if (role == "user") {
458
57
                ss << "<|START_OF_TURN_TOKEN|><|USER_TOKEN|>" << trim(message->content) << "<|END_OF_TURN_TOKEN|>";
459
57
            } else if (role == "assistant") {
460
38
                ss << "<|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>" << trim(message->content) << "<|END_OF_TURN_TOKEN|>";
461
38
            }
462
114
        }
463
19
        if (add_ass) {
464
19
            ss << "<|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>";
465
19
        }
466
195
    } else if (tmpl == LLM_CHAT_TEMPLATE_LLAMA_3) {
467
        // Llama 3
468
120
        for (auto message : chat) {
469
120
            std::string role(message->role);
470
120
            ss << "<|start_header_id|>" << role << "<|end_header_id|>\n\n" << trim(message->content) << "<|eot_id|>";
471
120
        }
472
20
        if (add_ass) {
473
20
            ss << "<|start_header_id|>assistant<|end_header_id|>\n\n";
474
20
        }
475
175
    } else if (tmpl == LLM_CHAT_TEMPLATE_CHATGLM_3) {
476
        // chatglm3-6b
477
5
        ss << "[gMASK]" << "sop";
478
30
        for (auto message : chat) {
479
30
            std::string role(message->role);
480
30
            ss << "<|" << role << "|>" << "\n " << message->content;
481
30
        }
482
5
        if (add_ass) {
483
5
            ss << "<|assistant|>";
484
5
        }
485
170
    } else if (tmpl == LLM_CHAT_TEMPLATE_CHATGLM_4) {
486
13
        ss << "[gMASK]" << "<sop>";
487
78
        for (auto message : chat) {
488
78
            std::string role(message->role);
489
78
            ss << "<|" << role << "|>" << "\n" << message->content;
490
78
        }
491
13
        if (add_ass) {
492
13
            ss << "<|assistant|>\n";
493
13
        }
494
157
    } else if (tmpl == LLM_CHAT_TEMPLATE_GLMEDGE) {
495
54
        for (auto message : chat) {
496
54
            std::string role(message->role);
497
54
            ss << "<|" << role << "|>" << "\n" << message->content;
498
54
        }
499
9
        if (add_ass) {
500
9
            ss << "<|assistant|>";
501
9
        }
502
148
    } else if (tmpl == LLM_CHAT_TEMPLATE_MINICPM) {
503
        // MiniCPM-3B-OpenHermes-2.5-v2-GGUF
504
114
        for (auto message : chat) {
505
114
            std::string role(message->role);
506
114
            if (role == "user") {
507
57
                ss << LU8("<用户>");
508
57
                ss << trim(message->content);
509
57
                ss << "<AI>";
510
57
            } else {
511
57
                ss << trim(message->content);
512
57
            }
513
114
        }
514
129
    } else if (tmpl == LLM_CHAT_TEMPLATE_DEEPSEEK_2) {
515
        // DeepSeek-V2
516
36
        for (auto message : chat) {
517
36
            std::string role(message->role);
518
36
            if (role == "system") {
519
6
                ss << message->content << "\n\n";
520
30
            } else if (role == "user") {
521
18
                ss << "User: " << message->content << "\n\n";
522
18
            } else if (role == "assistant") {
523
12
                ss << "Assistant: " << message->content << LU8("<|end▁of▁sentence|>");
524
12
            }
525
36
        }
526
6
        if (add_ass) {
527
6
            ss << "Assistant:";
528
6
        }
529
123
    } else if (tmpl == LLM_CHAT_TEMPLATE_DEEPSEEK_3) {
530
        // DeepSeek-V3
531
12
        for (auto message : chat) {
532
12
            std::string role(message->role);
533
12
            if (role == "system") {
534
2
                ss << message->content << "\n\n";
535
10
            } else if (role == "user") {
536
6
                ss << LU8("<|User|>") << message->content;
537
6
            } else if (role == "assistant") {
538
4
                ss << LU8("<|Assistant|>") << message->content << LU8("<|end▁of▁sentence|>");
539
4
            }
540
12
        }
541
2
        if (add_ass) {
542
2
            ss << LU8("<|Assistant|>");
543
2
        }
544
121
    } else if (tmpl == LLM_CHAT_TEMPLATE_EXAONE_3) {
545
        // ref: https://huggingface.co/LGAI-EXAONE/EXAONE-3.0-7.8B-Instruct/discussions/8#66bae61b1893d14ee8ed85bb
546
        // EXAONE-3.0-7.8B-Instruct
547
108
        for (auto message : chat) {
548
108
            std::string role(message->role);
549
108
            if (role == "system") {
550
18
                ss << "[|system|]" << trim(message->content) << "[|endofturn|]\n";
551
90
            } else if (role == "user") {
552
54
                ss << "[|user|]" << trim(message->content) << "\n";
553
54
            } else if (role == "assistant") {
554
36
                ss << "[|assistant|]" << trim(message->content) << "[|endofturn|]\n";
555
36
            }
556
108
        }
557
18
        if (add_ass) {
558
18
            ss << "[|assistant|]";
559
18
        }
560
103
    } else if (tmpl == LLM_CHAT_TEMPLATE_EXAONE_4) {
561
48
        for (auto message : chat) {
562
48
            std::string role(message->role);
563
48
            if (role == "system") {
564
8
                ss << "[|system|]" << trim(message->content) << "[|endofturn|]\n";
565
40
            } else if (role == "user") {
566
24
                ss << "[|user|]" << trim(message->content) << "\n";
567
24
            } else if (role == "assistant") {
568
16
                ss << "[|assistant|]" << trim(message->content) << "[|endofturn|]\n";
569
16
            } else if (role == "tool") {
570
0
                ss << "[|tool|]" << trim(message->content) << "[|endofturn|]\n";
571
0
            }
572
48
        }
573
8
        if (add_ass) {
574
8
            ss << "[|assistant|]";
575
8
        }
576
95
    } else if (tmpl == LLM_CHAT_TEMPLATE_RWKV_WORLD) {
577
        // this template requires the model to have "\n\n" as EOT token
578
91
        for (size_t i = 0; i < chat.size(); i++) {
579
78
            std::string role(chat[i]->role);
580
78
            if (role == "system") {
581
13
                ss << "System: " << trim(chat[i]->content) << "\n\n";
582
65
            } else if (role == "user") {
583
39
                ss << "User: " << trim(chat[i]->content) << "\n\n";
584
39
                if (i == chat.size() - 1) {
585
13
                    ss << "Assistant:";
586
13
                }
587
39
            } else if (role == "assistant") {
588
26
                ss << "Assistant: " << trim(chat[i]->content) << "\n\n";
589
26
            }
590
78
        }
591
82
    } else if (tmpl == LLM_CHAT_TEMPLATE_GRANITE) {
592
        // IBM Granite template
593
48
        for (const auto & message : chat) {
594
48
            std::string role(message->role);
595
48
            ss << "<|start_of_role|>" << role << "<|end_of_role|>";
596
48
            if (role == "assistant_tool_call") {
597
0
                ss << "<|tool_call|>";
598
0
            }
599
48
            ss << message->content << "<|end_of_text|>\n";
600
48
        }
601
8
        if (add_ass) {
602
8
            ss << "<|start_of_role|>assistant<|end_of_role|>";
603
8
        }
604
74
    } else if (tmpl == LLM_CHAT_TEMPLATE_GIGACHAT) {
605
        // GigaChat template
606
1
        bool has_system = !chat.empty() && std::string(chat[0]->role) == "system";
607
608
        // Handle system message if present
609
1
        if (has_system) {
610
1
            ss << "<s>" << chat[0]->content << "<|message_sep|>";
611
1
        } else {
612
0
            ss << "<s>";
613
0
        }
614
615
        // Process remaining messages
616
6
        for (size_t i = has_system ? 1 : 0; i < chat.size(); i++) {
617
5
            std::string role(chat[i]->role);
618
5
            if (role == "user") {
619
3
                ss << "user<|role_sep|>" << chat[i]->content << "<|message_sep|>"
620
3
                << "available functions<|role_sep|>[]<|message_sep|>";
621
3
            } else if (role == "assistant") {
622
2
                ss << "assistant<|role_sep|>" << chat[i]->content << "<|message_sep|>";
623
2
            }
624
5
        }
625
626
        // Add generation prompt if needed
627
1
        if (add_ass) {
628
1
            ss << "assistant<|role_sep|>";
629
1
        }
630
73
    }  else if (tmpl == LLM_CHAT_TEMPLATE_MEGREZ) {
631
        // Megrez template
632
18
        for (auto message : chat) {
633
18
            std::string role(message->role);
634
18
            ss << "<|role_start|>" << role << "<|role_end|>" << message->content << "<|turn_end|>";
635
18
        }
636
637
3
        if (add_ass) {
638
3
            ss << "<|role_start|>assistant<|role_end|>";
639
3
        }
640
70
    } else if (tmpl == LLM_CHAT_TEMPLATE_YANDEX) {
641
        // Yandex template ("\n\n" is defined as EOT token)
642
643
21
        for (size_t i = 0; i < chat.size(); i++) {
644
18
            std::string role(chat[i]->role);
645
18
            if (role == "user") {
646
9
                ss << " Пользователь: " << chat[i]->content << "\n\n";
647
9
            } else if (role == "assistant") {
648
6
                ss << " Ассистент: " << chat[i]->content << "\n\n";
649
6
            }
650
18
        }
651
652
        // Add generation prompt if needed
653
3
        if (add_ass) {
654
3
            ss << " Ассистент:[SEP]";
655
3
        }
656
67
    } else if (tmpl == LLM_CHAT_TEMPLATE_BAILING || tmpl == LLM_CHAT_TEMPLATE_BAILING_THINK) {
657
        // Bailing (Ling/Ring) template
658
24
        for (auto message : chat) {
659
24
            std::string role(message->role);
660
661
24
            if (role == "user") {
662
12
                role = "HUMAN";
663
12
            } else {
664
12
                std::transform(role.begin(), role.end(), role.begin(), ::toupper);
665
12
            }
666
667
24
            ss << "<role>" << role << "</role>" << message->content;
668
24
        }
669
670
4
        if (add_ass) {
671
4
            ss << "<role>ASSISTANT</role>";
672
673
4
            if (tmpl == LLM_CHAT_TEMPLATE_BAILING_THINK) {
674
2
                ss << "<think>";
675
2
            }
676
4
        }
677
63
    } else if (tmpl == LLM_CHAT_TEMPLATE_BAILING2) {
678
        // Bailing2 (Ling 2.0) template
679
2
        bool has_system = !chat.empty() && std::string(chat[0]->role) == "system";
680
681
2
        if (!has_system) {
682
0
            ss << "<role>SYSTEM</role>detailed thinking off<|role_end|>";
683
0
        }
684
685
12
        for (auto message : chat) {
686
12
            std::string role(message->role);
687
688
12
            if (role == "user") {
689
6
                role = "HUMAN";
690
6
            } else {
691
6
                std::transform(role.begin(), role.end(), role.begin(), ::toupper);
692
6
            }
693
694
12
            ss << "<role>" << role << "</role>" << message->content << "<|role_end|>";
695
12
        }
696
697
2
        if (add_ass) {
698
2
            ss << "<role>ASSISTANT</role>";
699
2
        }
700
61
    } else if (tmpl == LLM_CHAT_TEMPLATE_LLAMA4) {
701
        // Llama 4
702
42
        for (auto message : chat) {
703
42
            std::string role(message->role);
704
42
            ss << "<|header_start|>" << role << "<|header_end|>\n\n" << trim(message->content) << "<|eot|>";
705
42
        }
706
7
        if (add_ass) {
707
7
            ss << "<|header_start|>assistant<|header_end|>\n\n";
708
7
        }
709
54
    } else if (tmpl == LLM_CHAT_TEMPLATE_SMOLVLM) {
710
        // SmolVLM
711
1
        ss << "<|im_start|>"; // uses <|im_start|> as BOS, but the actual content is NOT chatml
712
6
        for (auto message : chat) {
713
6
            std::string role(message->role);
714
6
            if (role == "system") {
715
1
                ss << message->content << "\n\n";
716
5
            } else if (role == "user") {
717
3
                ss << "User: " << message->content << "<end_of_utterance>\n";
718
3
            } else {
719
2
                ss << "Assistant: " << message->content << "<end_of_utterance>\n";
720
2
            }
721
6
        }
722
1
        if (add_ass) {
723
1
            ss << "Assistant:";
724
1
        }
725
53
    } else if (tmpl == LLM_CHAT_TEMPLATE_DOTS1) {
726
        // dots.llm1.inst (DOTS1)
727
12
        for (auto message : chat) {
728
12
            std::string role(message->role);
729
12
            if (role == "system") {
730
2
                ss << "<|system|>" << message->content << "<|endofsystem|>";
731
10
            } else if (role == "user") {
732
6
                ss << "<|userprompt|>" << message->content << "<|endofuserprompt|>";
733
6
            } else {
734
4
                ss << "<|response|>" << message->content << "<|endofresponse|>";
735
4
            }
736
12
        }
737
2
        if (add_ass) {
738
2
            ss << "<|response|>";
739
2
        }
740
51
    } else if (tmpl == LLM_CHAT_TEMPLATE_HUNYUAN_MOE) {
741
        // tencent/Hunyuan-A13B-Instruct
742
12
        for (auto message : chat) {
743
12
            std::string role(message->role);
744
12
            if (role == "system") {
745
2
                ss << "<|startoftext|>" << message->content << "<|extra_4|>";
746
10
            } else if (role == "assistant") {
747
4
                ss << message->content << "<|eos|>";
748
6
            } else {
749
6
                ss << "<|startoftext|>" << message->content << "<|extra_0|>";
750
6
            }
751
12
        }
752
49
    } else if (tmpl == LLM_CHAT_TEMPLATE_OPENAI_MOE) {
753
        // OpenAI MoE (based on Harmony chat template)
754
12
        for (auto message : chat) {
755
12
            std::string role(message->role);
756
12
            ss << "<|start|>" << role << "<|message|>" << message->content;
757
12
            ss << (role == "assistant" ? "<|return|>" : "<|end|>");
758
12
        }
759
2
        if (add_ass) {
760
2
            ss << "<|start|>assistant";
761
2
        }
762
47
    } else if (tmpl == LLM_CHAT_TEMPLATE_HUNYUAN_DENSE) {
763
        // tencent/Hunyuan-4B-Instruct
764
14
        for (size_t i = 0; i < chat.size(); i++) {
765
12
            std::string role(chat[i]->role);
766
12
            if (i == 0) {
767
2
                if (role == "system") {
768
2
                    ss << chat[i]->content << "<|hy_place▁holder▁no▁3|>";
769
2
                }
770
2
            }
771
772
12
            if (role == "assistant") {
773
4
                ss << "<|hy_Assistant|>" << chat[i]->content << "<|hy_place▁holder▁no▁2|>";
774
8
            } else if (role == "user") {
775
6
                ss << "<|hy_User|>" << chat[i]->content << "<|hy_Assistant|>";
776
6
            }
777
12
        }
778
45
    } else if (tmpl == LLM_CHAT_TEMPLATE_KIMI_K2) {
779
        // moonshotai/Kimi-K2-Instruct
780
12
        for (auto message : chat) {
781
12
            std::string role(message->role);
782
12
            if (role == "system") {
783
2
                ss << "<|im_system|>system<|im_middle|>";
784
10
            } else if (role == "user") {
785
6
                ss << "<|im_user|>user<|im_middle|>";
786
6
            } else if (role == "assistant") {
787
4
                ss << "<|im_assistant|>assistant<|im_middle|>";
788
4
            } else if (role == "tool") {
789
0
                ss << "<|im_system|>tool<|im_middle|>";
790
0
            }
791
792
12
            ss << message->content << "<|im_end|>";
793
12
        }
794
2
        if (add_ass) {
795
2
            ss << "<|im_assistant|>assistant<|im_middle|>";
796
2
        }
797
43
    } else if (tmpl == LLM_CHAT_TEMPLATE_SEED_OSS) {
798
78
        for (auto message: chat) {
799
78
            std::string role(message->role);
800
78
            ss << "<seed:bos>" << role << "\n" << (role == "assistant" ? trim(message->content) : message->content) << "<seed:eos>";
801
78
        }
802
13
        if (add_ass) {
803
13
            ss << "<seed:bos>assistant\n";
804
13
        }
805
30
    } else if (tmpl == LLM_CHAT_TEMPLATE_GROK_2) {
806
114
        for (auto message : chat) {
807
114
            std::string role(message->role);
808
114
            if (role == "system") {
809
19
                ss << "System: " << trim(message->content) << "<|separator|>\n\n";
810
95
            } else if (role == "user") {
811
57
                ss << "Human: " << trim(message->content) << "<|separator|>\n\n";
812
57
            } else if (role == "assistant") {
813
38
                ss << "Assistant: " << message->content << "<|separator|>\n\n";
814
38
            }
815
114
        }
816
19
        if (add_ass) {
817
19
            ss << "Assistant:";
818
19
        }
819
19
    }else if (tmpl == LLM_CHAT_TEMPLATE_PANGU_EMBED) {
820
        // [unused9]系统:xxx[unused10]
821
        // [unused9]用户:xxx[unused10]
822
        // [unused9]助手:xxx[unused10]
823
        // ...
824
77
        for (size_t i = 0; i < chat.size(); ++i) {
825
66
            const auto & msg = chat[i];
826
66
            const std::string & role = msg->role;
827
66
            const std::string & content = msg->content;
828
829
66
            if (i == 0 && role != "system") {
830
0
                ss << "[unused9]系统:[unused10]";
831
0
            }
832
833
66
            if (role == "system") {
834
11
                ss << "[unused9]系统:" << content << "[unused10]";
835
55
            } else if (role == "user") {
836
33
                ss << "[unused9]用户:" << content << "[unused10]";
837
33
            } else if (role == "assistant") {
838
22
                ss << "[unused9]助手:" << content << "[unused10]";
839
22
            } else if (role == "tool") {
840
0
                ss << "[unused9]工具:" << content << "[unused10]";
841
0
            } else if (role == "function") {
842
0
                ss << "[unused9]方法:" << content << "[unused10]";
843
0
            }
844
66
        }
845
11
        if (add_ass) {
846
11
            ss << "[unused9]助手:";
847
11
        }
848
11
    } else {
849
        // template not supported
850
0
        return -1;
851
0
    }
852
917
    dest = ss.str();
853
917
    return dest.size();
854
917
}
855
856
// public interface
857
858
0
int32_t llama_chat_builtin_templates(const char ** output, size_t len) {
859
0
    auto it = LLM_CHAT_TEMPLATES.begin();
860
0
    for (size_t i = 0; i < std::min(len, LLM_CHAT_TEMPLATES.size()); i++) {
861
0
        output[i] = it->first.c_str();
862
0
        std::advance(it, 1);
863
0
    }
864
0
    return (int32_t) LLM_CHAT_TEMPLATES.size();
865
0
}