/src/llama.cpp/src/llama-chat.cpp
Line | Count | Source |
1 | | #include "llama-chat.h" |
2 | | |
3 | | #include "llama.h" |
4 | | |
5 | | #include <map> |
6 | | #include <sstream> |
7 | | #include <algorithm> |
8 | | |
9 | | #if __cplusplus >= 202000L |
10 | | #define LU8(x) (const char*)(u8##x) |
11 | | #else |
12 | 2.02k | #define LU8(x) u8##x |
13 | | #endif |
14 | | |
15 | | // trim whitespace from the beginning and end of a string |
16 | 2.32k | static std::string trim(const std::string & str) { |
17 | 2.32k | size_t start = 0; |
18 | 2.32k | size_t end = str.size(); |
19 | 325k | while (start < end && isspace(static_cast<unsigned char>(str[start]))) { |
20 | 323k | start += 1; |
21 | 323k | } |
22 | 1.05M | while (end > start && isspace(static_cast<unsigned char>(str[end - 1]))) { |
23 | 1.05M | end -= 1; |
24 | 1.05M | } |
25 | 2.32k | return str.substr(start, end - start); |
26 | 2.32k | } |
27 | | |
28 | | static const std::map<std::string, llm_chat_template> LLM_CHAT_TEMPLATES = { |
29 | | { "chatml", LLM_CHAT_TEMPLATE_CHATML }, |
30 | | { "llama2", LLM_CHAT_TEMPLATE_LLAMA_2 }, |
31 | | { "llama2-sys", LLM_CHAT_TEMPLATE_LLAMA_2_SYS }, |
32 | | { "llama2-sys-bos", LLM_CHAT_TEMPLATE_LLAMA_2_SYS_BOS }, |
33 | | { "llama2-sys-strip", LLM_CHAT_TEMPLATE_LLAMA_2_SYS_STRIP }, |
34 | | { "mistral-v1", LLM_CHAT_TEMPLATE_MISTRAL_V1 }, |
35 | | { "mistral-v3", LLM_CHAT_TEMPLATE_MISTRAL_V3 }, |
36 | | { "mistral-v3-tekken", LLM_CHAT_TEMPLATE_MISTRAL_V3_TEKKEN }, |
37 | | { "mistral-v7", LLM_CHAT_TEMPLATE_MISTRAL_V7 }, |
38 | | { "mistral-v7-tekken", LLM_CHAT_TEMPLATE_MISTRAL_V7_TEKKEN }, |
39 | | { "phi3", LLM_CHAT_TEMPLATE_PHI_3 }, |
40 | | { "phi4", LLM_CHAT_TEMPLATE_PHI_4 }, |
41 | | { "falcon3", LLM_CHAT_TEMPLATE_FALCON_3 }, |
42 | | { "zephyr", LLM_CHAT_TEMPLATE_ZEPHYR }, |
43 | | { "monarch", LLM_CHAT_TEMPLATE_MONARCH }, |
44 | | { "gemma", LLM_CHAT_TEMPLATE_GEMMA }, |
45 | | { "orion", LLM_CHAT_TEMPLATE_ORION }, |
46 | | { "openchat", LLM_CHAT_TEMPLATE_OPENCHAT }, |
47 | | { "vicuna", LLM_CHAT_TEMPLATE_VICUNA }, |
48 | | { "vicuna-orca", LLM_CHAT_TEMPLATE_VICUNA_ORCA }, |
49 | | { "deepseek", LLM_CHAT_TEMPLATE_DEEPSEEK }, |
50 | | { "deepseek2", LLM_CHAT_TEMPLATE_DEEPSEEK_2 }, |
51 | | { "deepseek3", LLM_CHAT_TEMPLATE_DEEPSEEK_3 }, |
52 | | { "command-r", LLM_CHAT_TEMPLATE_COMMAND_R }, |
53 | | { "llama3", LLM_CHAT_TEMPLATE_LLAMA_3 }, |
54 | | { "chatglm3", LLM_CHAT_TEMPLATE_CHATGLM_3 }, |
55 | | { "chatglm4", LLM_CHAT_TEMPLATE_CHATGLM_4 }, |
56 | | { "glmedge", LLM_CHAT_TEMPLATE_GLMEDGE }, |
57 | | { "minicpm", LLM_CHAT_TEMPLATE_MINICPM }, |
58 | | { "exaone3", LLM_CHAT_TEMPLATE_EXAONE_3 }, |
59 | | { "exaone4", LLM_CHAT_TEMPLATE_EXAONE_4 }, |
60 | | { "rwkv-world", LLM_CHAT_TEMPLATE_RWKV_WORLD }, |
61 | | { "granite", LLM_CHAT_TEMPLATE_GRANITE }, |
62 | | { "gigachat", LLM_CHAT_TEMPLATE_GIGACHAT }, |
63 | | { "megrez", LLM_CHAT_TEMPLATE_MEGREZ }, |
64 | | { "yandex", LLM_CHAT_TEMPLATE_YANDEX }, |
65 | | { "bailing", LLM_CHAT_TEMPLATE_BAILING }, |
66 | | { "bailing-think", LLM_CHAT_TEMPLATE_BAILING_THINK }, |
67 | | { "bailing2", LLM_CHAT_TEMPLATE_BAILING2 }, |
68 | | { "llama4", LLM_CHAT_TEMPLATE_LLAMA4 }, |
69 | | { "smolvlm", LLM_CHAT_TEMPLATE_SMOLVLM }, |
70 | | { "hunyuan-moe", LLM_CHAT_TEMPLATE_HUNYUAN_MOE }, |
71 | | { "gpt-oss", LLM_CHAT_TEMPLATE_OPENAI_MOE }, |
72 | | { "hunyuan-dense", LLM_CHAT_TEMPLATE_HUNYUAN_DENSE }, |
73 | | { "kimi-k2", LLM_CHAT_TEMPLATE_KIMI_K2 }, |
74 | | { "seed_oss", LLM_CHAT_TEMPLATE_SEED_OSS }, |
75 | | { "grok-2", LLM_CHAT_TEMPLATE_GROK_2 }, |
76 | | { "pangu-embedded", LLM_CHAT_TEMPLATE_PANGU_EMBED }, |
77 | | }; |
78 | | |
79 | 1.51k | llm_chat_template llm_chat_template_from_str(const std::string & name) { |
80 | 1.51k | return LLM_CHAT_TEMPLATES.at(name); |
81 | 1.51k | } |
82 | | |
83 | 1.51k | llm_chat_template llm_chat_detect_template(const std::string & tmpl) { |
84 | 1.51k | try { |
85 | 1.51k | return llm_chat_template_from_str(tmpl); |
86 | 1.51k | } catch (const std::out_of_range &) { |
87 | | // ignore |
88 | 982 | } |
89 | | |
90 | 27.4k | auto tmpl_contains = [&tmpl](const char * haystack) -> bool { |
91 | 27.4k | return tmpl.find(haystack) != std::string::npos; |
92 | 27.4k | }; |
93 | 982 | if (tmpl_contains("<|im_start|>")) { |
94 | 4 | return tmpl_contains("<|im_sep|>") |
95 | 4 | ? LLM_CHAT_TEMPLATE_PHI_4 |
96 | 4 | : tmpl_contains("<end_of_utterance>") |
97 | 2 | ? LLM_CHAT_TEMPLATE_SMOLVLM // SmolVLM uses <|im_start|> as BOS, but it is NOT chatml |
98 | 2 | : LLM_CHAT_TEMPLATE_CHATML; |
99 | 978 | } else if (tmpl.find("mistral") == 0 || tmpl_contains("[INST]")) { |
100 | 238 | if (tmpl_contains("[SYSTEM_PROMPT]")) { |
101 | 6 | return LLM_CHAT_TEMPLATE_MISTRAL_V7; |
102 | 232 | } else if ( |
103 | | // catches official 'v1' template |
104 | 232 | tmpl_contains("' [INST] ' + system_message") |
105 | | // catches official 'v3' and 'v3-tekken' templates |
106 | 224 | || tmpl_contains("[AVAILABLE_TOOLS]") |
107 | 232 | ) { |
108 | | // Official mistral 'v1', 'v3' and 'v3-tekken' templates |
109 | | // See: https://github.com/mistralai/cookbook/blob/main/concept-deep-dive/tokenization/chat_templates.md |
110 | | // See: https://github.com/mistralai/cookbook/blob/main/concept-deep-dive/tokenization/templates.md |
111 | 43 | if (tmpl_contains(" [INST]")) { |
112 | 18 | return LLM_CHAT_TEMPLATE_MISTRAL_V1; |
113 | 25 | } else if (tmpl_contains("\"[INST]\"")) { |
114 | 5 | return LLM_CHAT_TEMPLATE_MISTRAL_V3_TEKKEN; |
115 | 5 | } |
116 | 20 | return LLM_CHAT_TEMPLATE_MISTRAL_V3; |
117 | 189 | } else { |
118 | | // llama2 template and its variants |
119 | | // [variant] support system message |
120 | | // See: https://huggingface.co/blog/llama2#how-to-prompt-llama-2 |
121 | 189 | bool support_system_message = tmpl_contains("<<SYS>>"); |
122 | 189 | bool add_bos_inside_history = tmpl_contains("bos_token + '[INST]"); |
123 | 189 | bool strip_message = tmpl_contains("content.strip()"); |
124 | 189 | if (strip_message) { |
125 | 63 | return LLM_CHAT_TEMPLATE_LLAMA_2_SYS_STRIP; |
126 | 126 | } else if (add_bos_inside_history) { |
127 | 14 | return LLM_CHAT_TEMPLATE_LLAMA_2_SYS_BOS; |
128 | 112 | } else if (support_system_message) { |
129 | 4 | return LLM_CHAT_TEMPLATE_LLAMA_2_SYS; |
130 | 108 | } else { |
131 | 108 | return LLM_CHAT_TEMPLATE_LLAMA_2; |
132 | 108 | } |
133 | 189 | } |
134 | 740 | } else if (tmpl_contains("<|assistant|>") && tmpl_contains("<|end|>")) { |
135 | 9 | return LLM_CHAT_TEMPLATE_PHI_3; |
136 | 731 | } else if (tmpl_contains("[gMASK]<sop>")) { |
137 | 12 | return LLM_CHAT_TEMPLATE_CHATGLM_4; |
138 | 719 | } else if (tmpl_contains("<|assistant|>") && tmpl_contains("<|user|>")) { |
139 | 9 | return tmpl_contains("</s>") ? LLM_CHAT_TEMPLATE_FALCON_3 : LLM_CHAT_TEMPLATE_GLMEDGE; |
140 | 710 | } else if (tmpl_contains("<|{{ item['role'] }}|>") && tmpl_contains("<|begin_of_image|>")) { |
141 | 2 | return LLM_CHAT_TEMPLATE_GLMEDGE; |
142 | 708 | } else if (tmpl_contains("<|user|>") && tmpl_contains("<|endoftext|>")) { |
143 | 2 | return LLM_CHAT_TEMPLATE_ZEPHYR; |
144 | 706 | } else if (tmpl_contains("bos_token + message['role']")) { |
145 | 3 | return LLM_CHAT_TEMPLATE_MONARCH; |
146 | 703 | } else if (tmpl_contains("<start_of_turn>")) { |
147 | 2 | return LLM_CHAT_TEMPLATE_GEMMA; |
148 | 701 | } else if (tmpl_contains("'\\n\\nAssistant: ' + eos_token")) { |
149 | | // OrionStarAI/Orion-14B-Chat |
150 | 7 | return LLM_CHAT_TEMPLATE_ORION; |
151 | 694 | } else if (tmpl_contains("GPT4 Correct ")) { |
152 | | // openchat/openchat-3.5-0106 |
153 | 3 | return LLM_CHAT_TEMPLATE_OPENCHAT; |
154 | 691 | } else if (tmpl_contains("USER: ") && tmpl_contains("ASSISTANT: ")) { |
155 | | // eachadea/vicuna-13b-1.1 (and Orca variant) |
156 | 6 | if (tmpl_contains("SYSTEM: ")) { |
157 | 1 | return LLM_CHAT_TEMPLATE_VICUNA_ORCA; |
158 | 1 | } |
159 | 5 | return LLM_CHAT_TEMPLATE_VICUNA; |
160 | 685 | } else if (tmpl_contains("### Instruction:") && tmpl_contains("<|EOT|>")) { |
161 | | // deepseek-ai/deepseek-coder-33b-instruct |
162 | 1 | return LLM_CHAT_TEMPLATE_DEEPSEEK; |
163 | 684 | } else if (tmpl_contains("<|START_OF_TURN_TOKEN|>") && tmpl_contains("<|USER_TOKEN|>")) { |
164 | | // CohereForAI/c4ai-command-r-plus |
165 | 4 | return LLM_CHAT_TEMPLATE_COMMAND_R; |
166 | 680 | } else if (tmpl_contains("<|start_header_id|>") && tmpl_contains("<|end_header_id|>")) { |
167 | 2 | return LLM_CHAT_TEMPLATE_LLAMA_3; |
168 | 678 | } else if (tmpl_contains("[gMASK]sop")) { |
169 | | // chatglm3-6b |
170 | 4 | return LLM_CHAT_TEMPLATE_CHATGLM_3; |
171 | 674 | } else if (tmpl_contains(LU8("<用户>"))) { |
172 | | // MiniCPM-3B-OpenHermes-2.5-v2-GGUF |
173 | 12 | return LLM_CHAT_TEMPLATE_MINICPM; |
174 | 662 | } else if (tmpl_contains("'Assistant: ' + message['content'] + eos_token")) { |
175 | 5 | return LLM_CHAT_TEMPLATE_DEEPSEEK_2; |
176 | 657 | } else if (tmpl_contains(LU8("<|Assistant|>")) && tmpl_contains(LU8("<|User|>")) && tmpl_contains(LU8("<|end▁of▁sentence|>"))) { |
177 | 1 | return LLM_CHAT_TEMPLATE_DEEPSEEK_3; |
178 | 656 | } else if (tmpl_contains("[|system|]") && tmpl_contains("[|assistant|]") && tmpl_contains("[|endofturn|]")) { |
179 | 7 | if (tmpl_contains("[|tool|]")) { |
180 | 1 | return LLM_CHAT_TEMPLATE_EXAONE_4; |
181 | 1 | } |
182 | | // ref: https://huggingface.co/LGAI-EXAONE/EXAONE-3.0-7.8B-Instruct/discussions/8#66bae61b1893d14ee8ed85bb |
183 | | // EXAONE-3.0-7.8B-Instruct |
184 | 6 | return LLM_CHAT_TEMPLATE_EXAONE_3; |
185 | 649 | } else if (tmpl_contains("rwkv-world") || tmpl_contains("{{- 'User: ' + message['content']|trim + '\\n\\n' -}}")) { |
186 | 5 | return LLM_CHAT_TEMPLATE_RWKV_WORLD; |
187 | 644 | } else if (tmpl_contains("<|start_of_role|>")) { |
188 | 6 | return LLM_CHAT_TEMPLATE_GRANITE; |
189 | 638 | } else if (tmpl_contains("message['role'] + additional_special_tokens[0] + message['content'] + additional_special_tokens[1]")) { |
190 | 0 | return LLM_CHAT_TEMPLATE_GIGACHAT; |
191 | 638 | } else if (tmpl_contains("<|role_start|>")) { |
192 | 3 | return LLM_CHAT_TEMPLATE_MEGREZ; |
193 | 635 | } else if (tmpl_contains(" Ассистент:")) { |
194 | 2 | return LLM_CHAT_TEMPLATE_YANDEX; |
195 | 633 | } else if (tmpl_contains("<role>ASSISTANT</role>") && tmpl_contains("'HUMAN'")) { |
196 | 1 | return LLM_CHAT_TEMPLATE_BAILING; |
197 | 632 | } else if (tmpl_contains("<role>ASSISTANT</role>") && tmpl_contains("\"HUMAN\"") && tmpl_contains("<think>")) { |
198 | 1 | return LLM_CHAT_TEMPLATE_BAILING_THINK; |
199 | 631 | } else if (tmpl_contains("<role>ASSISTANT</role>") && tmpl_contains("<role>HUMAN</role>") && tmpl_contains("<|role_end|>")) { |
200 | 1 | return LLM_CHAT_TEMPLATE_BAILING2; |
201 | 630 | } else if (tmpl_contains("<|header_start|>") && tmpl_contains("<|header_end|>")) { |
202 | 1 | return LLM_CHAT_TEMPLATE_LLAMA4; |
203 | 629 | } else if (tmpl_contains("<|endofuserprompt|>")) { |
204 | 2 | return LLM_CHAT_TEMPLATE_DOTS1; |
205 | 627 | } else if (tmpl_contains("<|extra_0|>") && tmpl_contains("<|extra_4|>")) { |
206 | 1 | return LLM_CHAT_TEMPLATE_HUNYUAN_MOE; |
207 | 626 | } else if (tmpl_contains("<|start|>") && tmpl_contains("<|channel|>")) { |
208 | 1 | return LLM_CHAT_TEMPLATE_OPENAI_MOE; |
209 | 625 | } else if (tmpl_contains("<|hy_Assistant|>") && tmpl_contains("<|hy_place▁holder▁no▁3|>")) { |
210 | 1 | return LLM_CHAT_TEMPLATE_HUNYUAN_DENSE; |
211 | 624 | } else if (tmpl_contains("<|im_assistant|>assistant<|im_middle|>")) { |
212 | 2 | return LLM_CHAT_TEMPLATE_KIMI_K2; |
213 | 622 | } else if (tmpl_contains("<seed:bos>")) { |
214 | 13 | return LLM_CHAT_TEMPLATE_SEED_OSS; |
215 | 609 | } else if (tmpl_contains("'Assistant: ' + message['content'] + '<|separator|>")) { |
216 | 3 | return LLM_CHAT_TEMPLATE_GROK_2; |
217 | 606 | } else if (tmpl_contains(LU8("[unused9]系统:[unused10]"))) { |
218 | 11 | return LLM_CHAT_TEMPLATE_PANGU_EMBED; |
219 | 11 | } |
220 | 595 | return LLM_CHAT_TEMPLATE_UNKNOWN; |
221 | 982 | } |
222 | | |
223 | | // Simple version of "llama_apply_chat_template" that only works with strings |
224 | | // This function uses heuristic checks to determine commonly used template. It is not a jinja parser. |
225 | | int32_t llm_chat_apply_template( |
226 | | llm_chat_template tmpl, |
227 | | const std::vector<const llama_chat_message *> & chat, |
228 | 917 | std::string & dest, bool add_ass) { |
229 | | // Taken from the research: https://github.com/ggerganov/llama.cpp/issues/5527 |
230 | 917 | std::stringstream ss; |
231 | 917 | if (tmpl == LLM_CHAT_TEMPLATE_CHATML) { |
232 | | // chatml template |
233 | 18 | for (auto message : chat) { |
234 | 18 | ss << "<|im_start|>" << message->role << "\n" << message->content << "<|im_end|>\n"; |
235 | 18 | } |
236 | 3 | if (add_ass) { |
237 | 3 | ss << "<|im_start|>assistant\n"; |
238 | 3 | } |
239 | 914 | } else if (tmpl == LLM_CHAT_TEMPLATE_MISTRAL_V7 || tmpl == LLM_CHAT_TEMPLATE_MISTRAL_V7_TEKKEN) { |
240 | | // Official mistral 'v7' template |
241 | | // See: https://huggingface.co/mistralai/Mistral-Large-Instruct-2411#basic-instruct-template-v7 |
242 | | // https://huggingface.co/mistralai/Mistral-Small-3.1-24B-Instruct-2503#basic-instruct-template-v7-tekken |
243 | 79 | const char * trailing_space = tmpl == LLM_CHAT_TEMPLATE_MISTRAL_V7 ? " " : ""; |
244 | 474 | for (auto message : chat) { |
245 | 474 | std::string role(message->role); |
246 | 474 | std::string content(message->content); |
247 | 474 | if (role == "system") { |
248 | 79 | ss << "[SYSTEM_PROMPT]" << trailing_space << content << "[/SYSTEM_PROMPT]"; |
249 | 395 | } else if (role == "user") { |
250 | 237 | ss << "[INST]" << trailing_space << content << "[/INST]"; |
251 | 237 | } else { |
252 | 158 | ss << trailing_space << content << "</s>"; |
253 | 158 | } |
254 | 474 | } |
255 | 835 | } else if (tmpl == LLM_CHAT_TEMPLATE_MISTRAL_V1 |
256 | 717 | || tmpl == LLM_CHAT_TEMPLATE_MISTRAL_V3 |
257 | 678 | || tmpl == LLM_CHAT_TEMPLATE_MISTRAL_V3_TEKKEN) { |
258 | | // See: https://github.com/mistralai/cookbook/blob/main/concept-deep-dive/tokenization/chat_templates.md |
259 | | // See: https://github.com/mistralai/cookbook/blob/main/concept-deep-dive/tokenization/templates.md |
260 | 163 | std::string leading_space = tmpl == LLM_CHAT_TEMPLATE_MISTRAL_V1 ? " " : ""; |
261 | 163 | std::string trailing_space = tmpl == LLM_CHAT_TEMPLATE_MISTRAL_V3_TEKKEN ? "" : " "; |
262 | 163 | bool trim_assistant_message = tmpl == LLM_CHAT_TEMPLATE_MISTRAL_V3; |
263 | 163 | bool is_inside_turn = false; |
264 | 978 | for (auto message : chat) { |
265 | 978 | if (!is_inside_turn) { |
266 | 489 | ss << leading_space << "[INST]" << trailing_space; |
267 | 489 | is_inside_turn = true; |
268 | 489 | } |
269 | 978 | std::string role(message->role); |
270 | 978 | std::string content(message->content); |
271 | 978 | if (role == "system") { |
272 | 163 | ss << content << "\n\n"; |
273 | 815 | } else if (role == "user") { |
274 | 489 | ss << content << leading_space << "[/INST]"; |
275 | 489 | } else { |
276 | 326 | ss << trailing_space << (trim_assistant_message ? trim(content) : content) << "</s>"; |
277 | 326 | is_inside_turn = false; |
278 | 326 | } |
279 | 978 | } |
280 | 672 | } else if ( |
281 | 672 | tmpl == LLM_CHAT_TEMPLATE_LLAMA_2 |
282 | 541 | || tmpl == LLM_CHAT_TEMPLATE_LLAMA_2_SYS |
283 | 535 | || tmpl == LLM_CHAT_TEMPLATE_LLAMA_2_SYS_BOS |
284 | 519 | || tmpl == LLM_CHAT_TEMPLATE_LLAMA_2_SYS_STRIP) { |
285 | | // llama2 template and its variants |
286 | | // [variant] support system message |
287 | | // See: https://huggingface.co/blog/llama2#how-to-prompt-llama-2 |
288 | 217 | bool support_system_message = tmpl != LLM_CHAT_TEMPLATE_LLAMA_2; |
289 | | // [variant] add BOS inside history |
290 | 217 | bool add_bos_inside_history = tmpl == LLM_CHAT_TEMPLATE_LLAMA_2_SYS_BOS; |
291 | | // [variant] trim spaces from the input message |
292 | 217 | bool strip_message = tmpl == LLM_CHAT_TEMPLATE_LLAMA_2_SYS_STRIP; |
293 | | // construct the prompt |
294 | 217 | bool is_inside_turn = true; // skip BOS at the beginning |
295 | 217 | ss << "[INST] "; |
296 | 1.30k | for (auto message : chat) { |
297 | 1.30k | std::string content = strip_message ? trim(message->content) : message->content; |
298 | 1.30k | std::string role(message->role); |
299 | 1.30k | if (!is_inside_turn) { |
300 | 434 | is_inside_turn = true; |
301 | 434 | ss << (add_bos_inside_history ? "<s>[INST] " : "[INST] "); |
302 | 434 | } |
303 | 1.30k | if (role == "system") { |
304 | 217 | if (support_system_message) { |
305 | 86 | ss << "<<SYS>>\n" << content << "\n<</SYS>>\n\n"; |
306 | 131 | } else { |
307 | | // if the model does not support system message, we still include it in the first message, but without <<SYS>> |
308 | 131 | ss << content << "\n"; |
309 | 131 | } |
310 | 1.08k | } else if (role == "user") { |
311 | 651 | ss << content << " [/INST]"; |
312 | 651 | } else { |
313 | 434 | ss << content << "</s>"; |
314 | 434 | is_inside_turn = false; |
315 | 434 | } |
316 | 1.30k | } |
317 | 455 | } else if (tmpl == LLM_CHAT_TEMPLATE_PHI_3) { |
318 | | // Phi 3 |
319 | 60 | for (auto message : chat) { |
320 | 60 | std::string role(message->role); |
321 | 60 | ss << "<|" << role << "|>\n" << message->content << "<|end|>\n"; |
322 | 60 | } |
323 | 10 | if (add_ass) { |
324 | 10 | ss << "<|assistant|>\n"; |
325 | 10 | } |
326 | 445 | } else if (tmpl == LLM_CHAT_TEMPLATE_PHI_4) { |
327 | | // chatml template |
328 | 18 | for (auto message : chat) { |
329 | 18 | ss << "<|im_start|>" << message->role << "<|im_sep|>" << message->content << "<|im_end|>"; |
330 | 18 | } |
331 | 3 | if (add_ass) { |
332 | 3 | ss << "<|im_start|>assistant<|im_sep|>"; |
333 | 3 | } |
334 | 442 | } else if (tmpl == LLM_CHAT_TEMPLATE_FALCON_3) { |
335 | | // Falcon 3 |
336 | 18 | for (auto message : chat) { |
337 | 18 | std::string role(message->role); |
338 | 18 | ss << "<|" << role << "|>\n" << message->content << "\n"; |
339 | 18 | } |
340 | 3 | if (add_ass) { |
341 | 3 | ss << "<|assistant|>\n"; |
342 | 3 | } |
343 | 439 | } else if (tmpl == LLM_CHAT_TEMPLATE_ZEPHYR) { |
344 | | // zephyr template |
345 | 24 | for (auto message : chat) { |
346 | 24 | ss << "<|" << message->role << "|>" << "\n" << message->content << "<|endoftext|>\n"; |
347 | 24 | } |
348 | 4 | if (add_ass) { |
349 | 4 | ss << "<|assistant|>\n"; |
350 | 4 | } |
351 | 435 | } else if (tmpl == LLM_CHAT_TEMPLATE_MONARCH) { |
352 | | // mlabonne/AlphaMonarch-7B template (the <s> is included inside history) |
353 | 24 | for (auto message : chat) { |
354 | 24 | std::string bos = (message == chat.front()) ? "" : "<s>"; // skip BOS for first message |
355 | 24 | ss << bos << message->role << "\n" << message->content << "</s>\n"; |
356 | 24 | } |
357 | 4 | if (add_ass) { |
358 | 4 | ss << "<s>assistant\n"; |
359 | 4 | } |
360 | 431 | } else if (tmpl == LLM_CHAT_TEMPLATE_GEMMA) { |
361 | | // google/gemma-7b-it |
362 | 190 | std::string system_prompt = ""; |
363 | 1.14k | for (auto message : chat) { |
364 | 1.14k | std::string role(message->role); |
365 | 1.14k | if (role == "system") { |
366 | | // there is no system message for gemma, but we will merge it with user prompt, so nothing is broken |
367 | 190 | system_prompt += trim(message->content); |
368 | 190 | continue; |
369 | 190 | } |
370 | | // in gemma, "assistant" is "model" |
371 | 950 | role = role == "assistant" ? "model" : message->role; |
372 | 950 | ss << "<start_of_turn>" << role << "\n"; |
373 | 950 | if (!system_prompt.empty() && role != "model") { |
374 | 138 | ss << system_prompt << "\n\n"; |
375 | 138 | system_prompt = ""; |
376 | 138 | } |
377 | 950 | ss << trim(message->content) << "<end_of_turn>\n"; |
378 | 950 | } |
379 | 190 | if (add_ass) { |
380 | 190 | ss << "<start_of_turn>model\n"; |
381 | 190 | } |
382 | 241 | } else if (tmpl == LLM_CHAT_TEMPLATE_ORION) { |
383 | | // OrionStarAI/Orion-14B-Chat |
384 | 13 | std::string system_prompt = ""; |
385 | 78 | for (auto message : chat) { |
386 | 78 | std::string role(message->role); |
387 | 78 | if (role == "system") { |
388 | | // there is no system message support, we will merge it with user prompt |
389 | 13 | system_prompt += message->content; |
390 | 13 | continue; |
391 | 65 | } else if (role == "user") { |
392 | 39 | ss << "Human: "; |
393 | 39 | if (!system_prompt.empty()) { |
394 | 5 | ss << system_prompt << "\n\n"; |
395 | 5 | system_prompt = ""; |
396 | 5 | } |
397 | 39 | ss << message->content << "\n\nAssistant: </s>"; |
398 | 39 | } else { |
399 | 26 | ss << message->content << "</s>"; |
400 | 26 | } |
401 | 78 | } |
402 | 228 | } else if (tmpl == LLM_CHAT_TEMPLATE_OPENCHAT) { |
403 | | // openchat/openchat-3.5-0106, |
404 | 24 | for (auto message : chat) { |
405 | 24 | std::string role(message->role); |
406 | 24 | if (role == "system") { |
407 | 4 | ss << message->content << "<|end_of_turn|>"; |
408 | 20 | } else { |
409 | 20 | role[0] = toupper(role[0]); |
410 | 20 | ss << "GPT4 Correct " << role << ": " << message->content << "<|end_of_turn|>"; |
411 | 20 | } |
412 | 24 | } |
413 | 4 | if (add_ass) { |
414 | 4 | ss << "GPT4 Correct Assistant:"; |
415 | 4 | } |
416 | 224 | } else if (tmpl == LLM_CHAT_TEMPLATE_VICUNA || tmpl == LLM_CHAT_TEMPLATE_VICUNA_ORCA) { |
417 | | // eachadea/vicuna-13b-1.1 (and Orca variant) |
418 | 48 | for (auto message : chat) { |
419 | 48 | std::string role(message->role); |
420 | 48 | if (role == "system") { |
421 | | // Orca-Vicuna variant uses a system prefix |
422 | 8 | if (tmpl == LLM_CHAT_TEMPLATE_VICUNA_ORCA) { |
423 | 2 | ss << "SYSTEM: " << message->content << "\n"; |
424 | 6 | } else { |
425 | 6 | ss << message->content << "\n\n"; |
426 | 6 | } |
427 | 40 | } else if (role == "user") { |
428 | 24 | ss << "USER: " << message->content << "\n"; |
429 | 24 | } else if (role == "assistant") { |
430 | 16 | ss << "ASSISTANT: " << message->content << "</s>\n"; |
431 | 16 | } |
432 | 48 | } |
433 | 8 | if (add_ass) { |
434 | 8 | ss << "ASSISTANT:"; |
435 | 8 | } |
436 | 216 | } else if (tmpl == LLM_CHAT_TEMPLATE_DEEPSEEK) { |
437 | | // deepseek-ai/deepseek-coder-33b-instruct |
438 | 12 | for (auto message : chat) { |
439 | 12 | std::string role(message->role); |
440 | 12 | if (role == "system") { |
441 | 2 | ss << message->content; |
442 | 10 | } else if (role == "user") { |
443 | 6 | ss << "### Instruction:\n" << message->content << "\n"; |
444 | 6 | } else if (role == "assistant") { |
445 | 4 | ss << "### Response:\n" << message->content << "\n<|EOT|>\n"; |
446 | 4 | } |
447 | 12 | } |
448 | 2 | if (add_ass) { |
449 | 2 | ss << "### Response:\n"; |
450 | 2 | } |
451 | 214 | } else if (tmpl == LLM_CHAT_TEMPLATE_COMMAND_R) { |
452 | | // CohereForAI/c4ai-command-r-plus |
453 | 114 | for (auto message : chat) { |
454 | 114 | std::string role(message->role); |
455 | 114 | if (role == "system") { |
456 | 19 | ss << "<|START_OF_TURN_TOKEN|><|SYSTEM_TOKEN|>" << trim(message->content) << "<|END_OF_TURN_TOKEN|>"; |
457 | 95 | } else if (role == "user") { |
458 | 57 | ss << "<|START_OF_TURN_TOKEN|><|USER_TOKEN|>" << trim(message->content) << "<|END_OF_TURN_TOKEN|>"; |
459 | 57 | } else if (role == "assistant") { |
460 | 38 | ss << "<|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>" << trim(message->content) << "<|END_OF_TURN_TOKEN|>"; |
461 | 38 | } |
462 | 114 | } |
463 | 19 | if (add_ass) { |
464 | 19 | ss << "<|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>"; |
465 | 19 | } |
466 | 195 | } else if (tmpl == LLM_CHAT_TEMPLATE_LLAMA_3) { |
467 | | // Llama 3 |
468 | 120 | for (auto message : chat) { |
469 | 120 | std::string role(message->role); |
470 | 120 | ss << "<|start_header_id|>" << role << "<|end_header_id|>\n\n" << trim(message->content) << "<|eot_id|>"; |
471 | 120 | } |
472 | 20 | if (add_ass) { |
473 | 20 | ss << "<|start_header_id|>assistant<|end_header_id|>\n\n"; |
474 | 20 | } |
475 | 175 | } else if (tmpl == LLM_CHAT_TEMPLATE_CHATGLM_3) { |
476 | | // chatglm3-6b |
477 | 5 | ss << "[gMASK]" << "sop"; |
478 | 30 | for (auto message : chat) { |
479 | 30 | std::string role(message->role); |
480 | 30 | ss << "<|" << role << "|>" << "\n " << message->content; |
481 | 30 | } |
482 | 5 | if (add_ass) { |
483 | 5 | ss << "<|assistant|>"; |
484 | 5 | } |
485 | 170 | } else if (tmpl == LLM_CHAT_TEMPLATE_CHATGLM_4) { |
486 | 13 | ss << "[gMASK]" << "<sop>"; |
487 | 78 | for (auto message : chat) { |
488 | 78 | std::string role(message->role); |
489 | 78 | ss << "<|" << role << "|>" << "\n" << message->content; |
490 | 78 | } |
491 | 13 | if (add_ass) { |
492 | 13 | ss << "<|assistant|>\n"; |
493 | 13 | } |
494 | 157 | } else if (tmpl == LLM_CHAT_TEMPLATE_GLMEDGE) { |
495 | 54 | for (auto message : chat) { |
496 | 54 | std::string role(message->role); |
497 | 54 | ss << "<|" << role << "|>" << "\n" << message->content; |
498 | 54 | } |
499 | 9 | if (add_ass) { |
500 | 9 | ss << "<|assistant|>"; |
501 | 9 | } |
502 | 148 | } else if (tmpl == LLM_CHAT_TEMPLATE_MINICPM) { |
503 | | // MiniCPM-3B-OpenHermes-2.5-v2-GGUF |
504 | 114 | for (auto message : chat) { |
505 | 114 | std::string role(message->role); |
506 | 114 | if (role == "user") { |
507 | 57 | ss << LU8("<用户>"); |
508 | 57 | ss << trim(message->content); |
509 | 57 | ss << "<AI>"; |
510 | 57 | } else { |
511 | 57 | ss << trim(message->content); |
512 | 57 | } |
513 | 114 | } |
514 | 129 | } else if (tmpl == LLM_CHAT_TEMPLATE_DEEPSEEK_2) { |
515 | | // DeepSeek-V2 |
516 | 36 | for (auto message : chat) { |
517 | 36 | std::string role(message->role); |
518 | 36 | if (role == "system") { |
519 | 6 | ss << message->content << "\n\n"; |
520 | 30 | } else if (role == "user") { |
521 | 18 | ss << "User: " << message->content << "\n\n"; |
522 | 18 | } else if (role == "assistant") { |
523 | 12 | ss << "Assistant: " << message->content << LU8("<|end▁of▁sentence|>"); |
524 | 12 | } |
525 | 36 | } |
526 | 6 | if (add_ass) { |
527 | 6 | ss << "Assistant:"; |
528 | 6 | } |
529 | 123 | } else if (tmpl == LLM_CHAT_TEMPLATE_DEEPSEEK_3) { |
530 | | // DeepSeek-V3 |
531 | 12 | for (auto message : chat) { |
532 | 12 | std::string role(message->role); |
533 | 12 | if (role == "system") { |
534 | 2 | ss << message->content << "\n\n"; |
535 | 10 | } else if (role == "user") { |
536 | 6 | ss << LU8("<|User|>") << message->content; |
537 | 6 | } else if (role == "assistant") { |
538 | 4 | ss << LU8("<|Assistant|>") << message->content << LU8("<|end▁of▁sentence|>"); |
539 | 4 | } |
540 | 12 | } |
541 | 2 | if (add_ass) { |
542 | 2 | ss << LU8("<|Assistant|>"); |
543 | 2 | } |
544 | 121 | } else if (tmpl == LLM_CHAT_TEMPLATE_EXAONE_3) { |
545 | | // ref: https://huggingface.co/LGAI-EXAONE/EXAONE-3.0-7.8B-Instruct/discussions/8#66bae61b1893d14ee8ed85bb |
546 | | // EXAONE-3.0-7.8B-Instruct |
547 | 108 | for (auto message : chat) { |
548 | 108 | std::string role(message->role); |
549 | 108 | if (role == "system") { |
550 | 18 | ss << "[|system|]" << trim(message->content) << "[|endofturn|]\n"; |
551 | 90 | } else if (role == "user") { |
552 | 54 | ss << "[|user|]" << trim(message->content) << "\n"; |
553 | 54 | } else if (role == "assistant") { |
554 | 36 | ss << "[|assistant|]" << trim(message->content) << "[|endofturn|]\n"; |
555 | 36 | } |
556 | 108 | } |
557 | 18 | if (add_ass) { |
558 | 18 | ss << "[|assistant|]"; |
559 | 18 | } |
560 | 103 | } else if (tmpl == LLM_CHAT_TEMPLATE_EXAONE_4) { |
561 | 48 | for (auto message : chat) { |
562 | 48 | std::string role(message->role); |
563 | 48 | if (role == "system") { |
564 | 8 | ss << "[|system|]" << trim(message->content) << "[|endofturn|]\n"; |
565 | 40 | } else if (role == "user") { |
566 | 24 | ss << "[|user|]" << trim(message->content) << "\n"; |
567 | 24 | } else if (role == "assistant") { |
568 | 16 | ss << "[|assistant|]" << trim(message->content) << "[|endofturn|]\n"; |
569 | 16 | } else if (role == "tool") { |
570 | 0 | ss << "[|tool|]" << trim(message->content) << "[|endofturn|]\n"; |
571 | 0 | } |
572 | 48 | } |
573 | 8 | if (add_ass) { |
574 | 8 | ss << "[|assistant|]"; |
575 | 8 | } |
576 | 95 | } else if (tmpl == LLM_CHAT_TEMPLATE_RWKV_WORLD) { |
577 | | // this template requires the model to have "\n\n" as EOT token |
578 | 91 | for (size_t i = 0; i < chat.size(); i++) { |
579 | 78 | std::string role(chat[i]->role); |
580 | 78 | if (role == "system") { |
581 | 13 | ss << "System: " << trim(chat[i]->content) << "\n\n"; |
582 | 65 | } else if (role == "user") { |
583 | 39 | ss << "User: " << trim(chat[i]->content) << "\n\n"; |
584 | 39 | if (i == chat.size() - 1) { |
585 | 13 | ss << "Assistant:"; |
586 | 13 | } |
587 | 39 | } else if (role == "assistant") { |
588 | 26 | ss << "Assistant: " << trim(chat[i]->content) << "\n\n"; |
589 | 26 | } |
590 | 78 | } |
591 | 82 | } else if (tmpl == LLM_CHAT_TEMPLATE_GRANITE) { |
592 | | // IBM Granite template |
593 | 48 | for (const auto & message : chat) { |
594 | 48 | std::string role(message->role); |
595 | 48 | ss << "<|start_of_role|>" << role << "<|end_of_role|>"; |
596 | 48 | if (role == "assistant_tool_call") { |
597 | 0 | ss << "<|tool_call|>"; |
598 | 0 | } |
599 | 48 | ss << message->content << "<|end_of_text|>\n"; |
600 | 48 | } |
601 | 8 | if (add_ass) { |
602 | 8 | ss << "<|start_of_role|>assistant<|end_of_role|>"; |
603 | 8 | } |
604 | 74 | } else if (tmpl == LLM_CHAT_TEMPLATE_GIGACHAT) { |
605 | | // GigaChat template |
606 | 1 | bool has_system = !chat.empty() && std::string(chat[0]->role) == "system"; |
607 | | |
608 | | // Handle system message if present |
609 | 1 | if (has_system) { |
610 | 1 | ss << "<s>" << chat[0]->content << "<|message_sep|>"; |
611 | 1 | } else { |
612 | 0 | ss << "<s>"; |
613 | 0 | } |
614 | | |
615 | | // Process remaining messages |
616 | 6 | for (size_t i = has_system ? 1 : 0; i < chat.size(); i++) { |
617 | 5 | std::string role(chat[i]->role); |
618 | 5 | if (role == "user") { |
619 | 3 | ss << "user<|role_sep|>" << chat[i]->content << "<|message_sep|>" |
620 | 3 | << "available functions<|role_sep|>[]<|message_sep|>"; |
621 | 3 | } else if (role == "assistant") { |
622 | 2 | ss << "assistant<|role_sep|>" << chat[i]->content << "<|message_sep|>"; |
623 | 2 | } |
624 | 5 | } |
625 | | |
626 | | // Add generation prompt if needed |
627 | 1 | if (add_ass) { |
628 | 1 | ss << "assistant<|role_sep|>"; |
629 | 1 | } |
630 | 73 | } else if (tmpl == LLM_CHAT_TEMPLATE_MEGREZ) { |
631 | | // Megrez template |
632 | 18 | for (auto message : chat) { |
633 | 18 | std::string role(message->role); |
634 | 18 | ss << "<|role_start|>" << role << "<|role_end|>" << message->content << "<|turn_end|>"; |
635 | 18 | } |
636 | | |
637 | 3 | if (add_ass) { |
638 | 3 | ss << "<|role_start|>assistant<|role_end|>"; |
639 | 3 | } |
640 | 70 | } else if (tmpl == LLM_CHAT_TEMPLATE_YANDEX) { |
641 | | // Yandex template ("\n\n" is defined as EOT token) |
642 | | |
643 | 21 | for (size_t i = 0; i < chat.size(); i++) { |
644 | 18 | std::string role(chat[i]->role); |
645 | 18 | if (role == "user") { |
646 | 9 | ss << " Пользователь: " << chat[i]->content << "\n\n"; |
647 | 9 | } else if (role == "assistant") { |
648 | 6 | ss << " Ассистент: " << chat[i]->content << "\n\n"; |
649 | 6 | } |
650 | 18 | } |
651 | | |
652 | | // Add generation prompt if needed |
653 | 3 | if (add_ass) { |
654 | 3 | ss << " Ассистент:[SEP]"; |
655 | 3 | } |
656 | 67 | } else if (tmpl == LLM_CHAT_TEMPLATE_BAILING || tmpl == LLM_CHAT_TEMPLATE_BAILING_THINK) { |
657 | | // Bailing (Ling/Ring) template |
658 | 24 | for (auto message : chat) { |
659 | 24 | std::string role(message->role); |
660 | | |
661 | 24 | if (role == "user") { |
662 | 12 | role = "HUMAN"; |
663 | 12 | } else { |
664 | 12 | std::transform(role.begin(), role.end(), role.begin(), ::toupper); |
665 | 12 | } |
666 | | |
667 | 24 | ss << "<role>" << role << "</role>" << message->content; |
668 | 24 | } |
669 | | |
670 | 4 | if (add_ass) { |
671 | 4 | ss << "<role>ASSISTANT</role>"; |
672 | | |
673 | 4 | if (tmpl == LLM_CHAT_TEMPLATE_BAILING_THINK) { |
674 | 2 | ss << "<think>"; |
675 | 2 | } |
676 | 4 | } |
677 | 63 | } else if (tmpl == LLM_CHAT_TEMPLATE_BAILING2) { |
678 | | // Bailing2 (Ling 2.0) template |
679 | 2 | bool has_system = !chat.empty() && std::string(chat[0]->role) == "system"; |
680 | | |
681 | 2 | if (!has_system) { |
682 | 0 | ss << "<role>SYSTEM</role>detailed thinking off<|role_end|>"; |
683 | 0 | } |
684 | | |
685 | 12 | for (auto message : chat) { |
686 | 12 | std::string role(message->role); |
687 | | |
688 | 12 | if (role == "user") { |
689 | 6 | role = "HUMAN"; |
690 | 6 | } else { |
691 | 6 | std::transform(role.begin(), role.end(), role.begin(), ::toupper); |
692 | 6 | } |
693 | | |
694 | 12 | ss << "<role>" << role << "</role>" << message->content << "<|role_end|>"; |
695 | 12 | } |
696 | | |
697 | 2 | if (add_ass) { |
698 | 2 | ss << "<role>ASSISTANT</role>"; |
699 | 2 | } |
700 | 61 | } else if (tmpl == LLM_CHAT_TEMPLATE_LLAMA4) { |
701 | | // Llama 4 |
702 | 42 | for (auto message : chat) { |
703 | 42 | std::string role(message->role); |
704 | 42 | ss << "<|header_start|>" << role << "<|header_end|>\n\n" << trim(message->content) << "<|eot|>"; |
705 | 42 | } |
706 | 7 | if (add_ass) { |
707 | 7 | ss << "<|header_start|>assistant<|header_end|>\n\n"; |
708 | 7 | } |
709 | 54 | } else if (tmpl == LLM_CHAT_TEMPLATE_SMOLVLM) { |
710 | | // SmolVLM |
711 | 1 | ss << "<|im_start|>"; // uses <|im_start|> as BOS, but the actual content is NOT chatml |
712 | 6 | for (auto message : chat) { |
713 | 6 | std::string role(message->role); |
714 | 6 | if (role == "system") { |
715 | 1 | ss << message->content << "\n\n"; |
716 | 5 | } else if (role == "user") { |
717 | 3 | ss << "User: " << message->content << "<end_of_utterance>\n"; |
718 | 3 | } else { |
719 | 2 | ss << "Assistant: " << message->content << "<end_of_utterance>\n"; |
720 | 2 | } |
721 | 6 | } |
722 | 1 | if (add_ass) { |
723 | 1 | ss << "Assistant:"; |
724 | 1 | } |
725 | 53 | } else if (tmpl == LLM_CHAT_TEMPLATE_DOTS1) { |
726 | | // dots.llm1.inst (DOTS1) |
727 | 12 | for (auto message : chat) { |
728 | 12 | std::string role(message->role); |
729 | 12 | if (role == "system") { |
730 | 2 | ss << "<|system|>" << message->content << "<|endofsystem|>"; |
731 | 10 | } else if (role == "user") { |
732 | 6 | ss << "<|userprompt|>" << message->content << "<|endofuserprompt|>"; |
733 | 6 | } else { |
734 | 4 | ss << "<|response|>" << message->content << "<|endofresponse|>"; |
735 | 4 | } |
736 | 12 | } |
737 | 2 | if (add_ass) { |
738 | 2 | ss << "<|response|>"; |
739 | 2 | } |
740 | 51 | } else if (tmpl == LLM_CHAT_TEMPLATE_HUNYUAN_MOE) { |
741 | | // tencent/Hunyuan-A13B-Instruct |
742 | 12 | for (auto message : chat) { |
743 | 12 | std::string role(message->role); |
744 | 12 | if (role == "system") { |
745 | 2 | ss << "<|startoftext|>" << message->content << "<|extra_4|>"; |
746 | 10 | } else if (role == "assistant") { |
747 | 4 | ss << message->content << "<|eos|>"; |
748 | 6 | } else { |
749 | 6 | ss << "<|startoftext|>" << message->content << "<|extra_0|>"; |
750 | 6 | } |
751 | 12 | } |
752 | 49 | } else if (tmpl == LLM_CHAT_TEMPLATE_OPENAI_MOE) { |
753 | | // OpenAI MoE (based on Harmony chat template) |
754 | 12 | for (auto message : chat) { |
755 | 12 | std::string role(message->role); |
756 | 12 | ss << "<|start|>" << role << "<|message|>" << message->content; |
757 | 12 | ss << (role == "assistant" ? "<|return|>" : "<|end|>"); |
758 | 12 | } |
759 | 2 | if (add_ass) { |
760 | 2 | ss << "<|start|>assistant"; |
761 | 2 | } |
762 | 47 | } else if (tmpl == LLM_CHAT_TEMPLATE_HUNYUAN_DENSE) { |
763 | | // tencent/Hunyuan-4B-Instruct |
764 | 14 | for (size_t i = 0; i < chat.size(); i++) { |
765 | 12 | std::string role(chat[i]->role); |
766 | 12 | if (i == 0) { |
767 | 2 | if (role == "system") { |
768 | 2 | ss << chat[i]->content << "<|hy_place▁holder▁no▁3|>"; |
769 | 2 | } |
770 | 2 | } |
771 | | |
772 | 12 | if (role == "assistant") { |
773 | 4 | ss << "<|hy_Assistant|>" << chat[i]->content << "<|hy_place▁holder▁no▁2|>"; |
774 | 8 | } else if (role == "user") { |
775 | 6 | ss << "<|hy_User|>" << chat[i]->content << "<|hy_Assistant|>"; |
776 | 6 | } |
777 | 12 | } |
778 | 45 | } else if (tmpl == LLM_CHAT_TEMPLATE_KIMI_K2) { |
779 | | // moonshotai/Kimi-K2-Instruct |
780 | 12 | for (auto message : chat) { |
781 | 12 | std::string role(message->role); |
782 | 12 | if (role == "system") { |
783 | 2 | ss << "<|im_system|>system<|im_middle|>"; |
784 | 10 | } else if (role == "user") { |
785 | 6 | ss << "<|im_user|>user<|im_middle|>"; |
786 | 6 | } else if (role == "assistant") { |
787 | 4 | ss << "<|im_assistant|>assistant<|im_middle|>"; |
788 | 4 | } else if (role == "tool") { |
789 | 0 | ss << "<|im_system|>tool<|im_middle|>"; |
790 | 0 | } |
791 | | |
792 | 12 | ss << message->content << "<|im_end|>"; |
793 | 12 | } |
794 | 2 | if (add_ass) { |
795 | 2 | ss << "<|im_assistant|>assistant<|im_middle|>"; |
796 | 2 | } |
797 | 43 | } else if (tmpl == LLM_CHAT_TEMPLATE_SEED_OSS) { |
798 | 78 | for (auto message: chat) { |
799 | 78 | std::string role(message->role); |
800 | 78 | ss << "<seed:bos>" << role << "\n" << (role == "assistant" ? trim(message->content) : message->content) << "<seed:eos>"; |
801 | 78 | } |
802 | 13 | if (add_ass) { |
803 | 13 | ss << "<seed:bos>assistant\n"; |
804 | 13 | } |
805 | 30 | } else if (tmpl == LLM_CHAT_TEMPLATE_GROK_2) { |
806 | 114 | for (auto message : chat) { |
807 | 114 | std::string role(message->role); |
808 | 114 | if (role == "system") { |
809 | 19 | ss << "System: " << trim(message->content) << "<|separator|>\n\n"; |
810 | 95 | } else if (role == "user") { |
811 | 57 | ss << "Human: " << trim(message->content) << "<|separator|>\n\n"; |
812 | 57 | } else if (role == "assistant") { |
813 | 38 | ss << "Assistant: " << message->content << "<|separator|>\n\n"; |
814 | 38 | } |
815 | 114 | } |
816 | 19 | if (add_ass) { |
817 | 19 | ss << "Assistant:"; |
818 | 19 | } |
819 | 19 | }else if (tmpl == LLM_CHAT_TEMPLATE_PANGU_EMBED) { |
820 | | // [unused9]系统:xxx[unused10] |
821 | | // [unused9]用户:xxx[unused10] |
822 | | // [unused9]助手:xxx[unused10] |
823 | | // ... |
824 | 77 | for (size_t i = 0; i < chat.size(); ++i) { |
825 | 66 | const auto & msg = chat[i]; |
826 | 66 | const std::string & role = msg->role; |
827 | 66 | const std::string & content = msg->content; |
828 | | |
829 | 66 | if (i == 0 && role != "system") { |
830 | 0 | ss << "[unused9]系统:[unused10]"; |
831 | 0 | } |
832 | | |
833 | 66 | if (role == "system") { |
834 | 11 | ss << "[unused9]系统:" << content << "[unused10]"; |
835 | 55 | } else if (role == "user") { |
836 | 33 | ss << "[unused9]用户:" << content << "[unused10]"; |
837 | 33 | } else if (role == "assistant") { |
838 | 22 | ss << "[unused9]助手:" << content << "[unused10]"; |
839 | 22 | } else if (role == "tool") { |
840 | 0 | ss << "[unused9]工具:" << content << "[unused10]"; |
841 | 0 | } else if (role == "function") { |
842 | 0 | ss << "[unused9]方法:" << content << "[unused10]"; |
843 | 0 | } |
844 | 66 | } |
845 | 11 | if (add_ass) { |
846 | 11 | ss << "[unused9]助手:"; |
847 | 11 | } |
848 | 11 | } else { |
849 | | // template not supported |
850 | 0 | return -1; |
851 | 0 | } |
852 | 917 | dest = ss.str(); |
853 | 917 | return dest.size(); |
854 | 917 | } |
855 | | |
856 | | // public interface |
857 | | |
858 | 0 | int32_t llama_chat_builtin_templates(const char ** output, size_t len) { |
859 | 0 | auto it = LLM_CHAT_TEMPLATES.begin(); |
860 | 0 | for (size_t i = 0; i < std::min(len, LLM_CHAT_TEMPLATES.size()); i++) { |
861 | 0 | output[i] = it->first.c_str(); |
862 | 0 | std::advance(it, 1); |
863 | 0 | } |
864 | 0 | return (int32_t) LLM_CHAT_TEMPLATES.size(); |
865 | 0 | } |