/src/llama.cpp/src/llama-chat.cpp
Line | Count | Source |
1 | | #include "llama-chat.h" |
2 | | |
3 | | #include "llama.h" |
4 | | |
5 | | #include <map> |
6 | | #include <sstream> |
7 | | #include <algorithm> |
8 | | |
9 | | #if __cplusplus >= 202000L |
10 | | #define LU8(x) (const char*)(u8##x) |
11 | | #else |
12 | 2.66k | #define LU8(x) u8##x |
13 | | #endif |
14 | | |
15 | | // trim whitespace from the beginning and end of a string |
16 | 2.25k | static std::string trim(const std::string & str) { |
17 | 2.25k | size_t start = 0; |
18 | 2.25k | size_t end = str.size(); |
19 | 281k | while (start < end && isspace(static_cast<unsigned char>(str[start]))) { |
20 | 279k | start += 1; |
21 | 279k | } |
22 | 1.50M | while (end > start && isspace(static_cast<unsigned char>(str[end - 1]))) { |
23 | 1.50M | end -= 1; |
24 | 1.50M | } |
25 | 2.25k | return str.substr(start, end - start); |
26 | 2.25k | } |
27 | | |
28 | | static const std::map<std::string, llm_chat_template> LLM_CHAT_TEMPLATES = { |
29 | | { "chatml", LLM_CHAT_TEMPLATE_CHATML }, |
30 | | { "llama2", LLM_CHAT_TEMPLATE_LLAMA_2 }, |
31 | | { "llama2-sys", LLM_CHAT_TEMPLATE_LLAMA_2_SYS }, |
32 | | { "llama2-sys-bos", LLM_CHAT_TEMPLATE_LLAMA_2_SYS_BOS }, |
33 | | { "llama2-sys-strip", LLM_CHAT_TEMPLATE_LLAMA_2_SYS_STRIP }, |
34 | | { "mistral-v1", LLM_CHAT_TEMPLATE_MISTRAL_V1 }, |
35 | | { "mistral-v3", LLM_CHAT_TEMPLATE_MISTRAL_V3 }, |
36 | | { "mistral-v3-tekken", LLM_CHAT_TEMPLATE_MISTRAL_V3_TEKKEN }, |
37 | | { "mistral-v7", LLM_CHAT_TEMPLATE_MISTRAL_V7 }, |
38 | | { "mistral-v7-tekken", LLM_CHAT_TEMPLATE_MISTRAL_V7_TEKKEN }, |
39 | | { "phi3", LLM_CHAT_TEMPLATE_PHI_3 }, |
40 | | { "phi4", LLM_CHAT_TEMPLATE_PHI_4 }, |
41 | | { "falcon3", LLM_CHAT_TEMPLATE_FALCON_3 }, |
42 | | { "zephyr", LLM_CHAT_TEMPLATE_ZEPHYR }, |
43 | | { "monarch", LLM_CHAT_TEMPLATE_MONARCH }, |
44 | | { "gemma", LLM_CHAT_TEMPLATE_GEMMA }, |
45 | | { "orion", LLM_CHAT_TEMPLATE_ORION }, |
46 | | { "openchat", LLM_CHAT_TEMPLATE_OPENCHAT }, |
47 | | { "vicuna", LLM_CHAT_TEMPLATE_VICUNA }, |
48 | | { "vicuna-orca", LLM_CHAT_TEMPLATE_VICUNA_ORCA }, |
49 | | { "deepseek", LLM_CHAT_TEMPLATE_DEEPSEEK }, |
50 | | { "deepseek2", LLM_CHAT_TEMPLATE_DEEPSEEK_2 }, |
51 | | { "deepseek3", LLM_CHAT_TEMPLATE_DEEPSEEK_3 }, |
52 | | { "command-r", LLM_CHAT_TEMPLATE_COMMAND_R }, |
53 | | { "llama3", LLM_CHAT_TEMPLATE_LLAMA_3 }, |
54 | | { "chatglm3", LLM_CHAT_TEMPLATE_CHATGLM_3 }, |
55 | | { "chatglm4", LLM_CHAT_TEMPLATE_CHATGLM_4 }, |
56 | | { "glmedge", LLM_CHAT_TEMPLATE_GLMEDGE }, |
57 | | { "minicpm", LLM_CHAT_TEMPLATE_MINICPM }, |
58 | | { "exaone3", LLM_CHAT_TEMPLATE_EXAONE_3 }, |
59 | | { "exaone4", LLM_CHAT_TEMPLATE_EXAONE_4 }, |
60 | | { "exaone-moe", LLM_CHAT_TEMPLATE_EXAONE_MOE }, |
61 | | { "rwkv-world", LLM_CHAT_TEMPLATE_RWKV_WORLD }, |
62 | | { "granite", LLM_CHAT_TEMPLATE_GRANITE }, |
63 | | { "gigachat", LLM_CHAT_TEMPLATE_GIGACHAT }, |
64 | | { "megrez", LLM_CHAT_TEMPLATE_MEGREZ }, |
65 | | { "yandex", LLM_CHAT_TEMPLATE_YANDEX }, |
66 | | { "bailing", LLM_CHAT_TEMPLATE_BAILING }, |
67 | | { "bailing-think", LLM_CHAT_TEMPLATE_BAILING_THINK }, |
68 | | { "bailing2", LLM_CHAT_TEMPLATE_BAILING2 }, |
69 | | { "llama4", LLM_CHAT_TEMPLATE_LLAMA4 }, |
70 | | { "smolvlm", LLM_CHAT_TEMPLATE_SMOLVLM }, |
71 | | { "hunyuan-moe", LLM_CHAT_TEMPLATE_HUNYUAN_MOE }, |
72 | | { "gpt-oss", LLM_CHAT_TEMPLATE_OPENAI_MOE }, |
73 | | { "hunyuan-dense", LLM_CHAT_TEMPLATE_HUNYUAN_DENSE }, |
74 | | { "kimi-k2", LLM_CHAT_TEMPLATE_KIMI_K2 }, |
75 | | { "seed_oss", LLM_CHAT_TEMPLATE_SEED_OSS }, |
76 | | { "grok-2", LLM_CHAT_TEMPLATE_GROK_2 }, |
77 | | { "pangu-embedded", LLM_CHAT_TEMPLATE_PANGU_EMBED }, |
78 | | { "solar-open", LLM_CHAT_TEMPLATE_SOLAR_OPEN }, |
79 | | }; |
80 | | |
81 | 1.65k | llm_chat_template llm_chat_template_from_str(const std::string & name) { |
82 | 1.65k | return LLM_CHAT_TEMPLATES.at(name); |
83 | 1.65k | } |
84 | | |
85 | 1.65k | llm_chat_template llm_chat_detect_template(const std::string & tmpl) { |
86 | 1.65k | try { |
87 | 1.65k | return llm_chat_template_from_str(tmpl); |
88 | 1.65k | } catch (const std::out_of_range &) { |
89 | | // ignore |
90 | 1.08k | } |
91 | | |
92 | 35.6k | auto tmpl_contains = [&tmpl](const char * haystack) -> bool { |
93 | 35.6k | return tmpl.find(haystack) != std::string::npos; |
94 | 35.6k | }; |
95 | 1.08k | if (tmpl_contains("<|im_start|>")) { |
96 | 3 | return tmpl_contains("<|im_sep|>") |
97 | 3 | ? LLM_CHAT_TEMPLATE_PHI_4 |
98 | 3 | : tmpl_contains("<end_of_utterance>") |
99 | 2 | ? LLM_CHAT_TEMPLATE_SMOLVLM // SmolVLM uses <|im_start|> as BOS, but it is NOT chatml |
100 | 2 | : LLM_CHAT_TEMPLATE_CHATML; |
101 | 1.08k | } else if (tmpl.find("mistral") == 0 || tmpl_contains("[INST]")) { |
102 | 118 | if (tmpl_contains("[SYSTEM_PROMPT]")) { |
103 | 4 | return LLM_CHAT_TEMPLATE_MISTRAL_V7; |
104 | 114 | } else if ( |
105 | | // catches official 'v1' template |
106 | 114 | tmpl_contains("' [INST] ' + system_message") |
107 | | // catches official 'v3' and 'v3-tekken' templates |
108 | 111 | || tmpl_contains("[AVAILABLE_TOOLS]") |
109 | 114 | ) { |
110 | | // Official mistral 'v1', 'v3' and 'v3-tekken' templates |
111 | | // See: https://github.com/mistralai/cookbook/blob/main/concept-deep-dive/tokenization/chat_templates.md |
112 | | // See: https://github.com/mistralai/cookbook/blob/main/concept-deep-dive/tokenization/templates.md |
113 | 17 | if (tmpl_contains(" [INST]")) { |
114 | 6 | return LLM_CHAT_TEMPLATE_MISTRAL_V1; |
115 | 11 | } else if (tmpl_contains("\"[INST]\"")) { |
116 | 2 | return LLM_CHAT_TEMPLATE_MISTRAL_V3_TEKKEN; |
117 | 2 | } |
118 | 9 | return LLM_CHAT_TEMPLATE_MISTRAL_V3; |
119 | 97 | } else { |
120 | | // llama2 template and its variants |
121 | | // [variant] support system message |
122 | | // See: https://huggingface.co/blog/llama2#how-to-prompt-llama-2 |
123 | 97 | bool support_system_message = tmpl_contains("<<SYS>>"); |
124 | 97 | bool add_bos_inside_history = tmpl_contains("bos_token + '[INST]"); |
125 | 97 | bool strip_message = tmpl_contains("content.strip()"); |
126 | 97 | if (strip_message) { |
127 | 13 | return LLM_CHAT_TEMPLATE_LLAMA_2_SYS_STRIP; |
128 | 84 | } else if (add_bos_inside_history) { |
129 | 4 | return LLM_CHAT_TEMPLATE_LLAMA_2_SYS_BOS; |
130 | 80 | } else if (support_system_message) { |
131 | 3 | return LLM_CHAT_TEMPLATE_LLAMA_2_SYS; |
132 | 77 | } else { |
133 | 77 | return LLM_CHAT_TEMPLATE_LLAMA_2; |
134 | 77 | } |
135 | 97 | } |
136 | 964 | } else if (tmpl_contains("<|assistant|>") && tmpl_contains("<|end|>")) { |
137 | 9 | return LLM_CHAT_TEMPLATE_PHI_3; |
138 | 955 | } else if (tmpl_contains("[gMASK]<sop>")) { |
139 | 9 | return LLM_CHAT_TEMPLATE_CHATGLM_4; |
140 | 946 | } else if (tmpl_contains("<|assistant|>") && tmpl_contains("<|user|>")) { |
141 | 35 | if (tmpl_contains("<|tool_declare|>")) { |
142 | 4 | return LLM_CHAT_TEMPLATE_EXAONE_MOE; |
143 | 4 | } |
144 | 31 | return tmpl_contains("</s>") ? LLM_CHAT_TEMPLATE_FALCON_3 : LLM_CHAT_TEMPLATE_GLMEDGE; |
145 | 911 | } else if (tmpl_contains("<|{{ item['role'] }}|>") && tmpl_contains("<|begin_of_image|>")) { |
146 | 3 | return LLM_CHAT_TEMPLATE_GLMEDGE; |
147 | 908 | } else if (tmpl_contains("<|user|>") && tmpl_contains("<|endoftext|>")) { |
148 | 1 | return LLM_CHAT_TEMPLATE_ZEPHYR; |
149 | 907 | } else if (tmpl_contains("bos_token + message['role']")) { |
150 | 1 | return LLM_CHAT_TEMPLATE_MONARCH; |
151 | 906 | } else if (tmpl_contains("<start_of_turn>")) { |
152 | 3 | return LLM_CHAT_TEMPLATE_GEMMA; |
153 | 903 | } else if (tmpl_contains("'\\n\\nAssistant: ' + eos_token")) { |
154 | | // OrionStarAI/Orion-14B-Chat |
155 | 3 | return LLM_CHAT_TEMPLATE_ORION; |
156 | 900 | } else if (tmpl_contains("GPT4 Correct ")) { |
157 | | // openchat/openchat-3.5-0106 |
158 | 1 | return LLM_CHAT_TEMPLATE_OPENCHAT; |
159 | 899 | } else if (tmpl_contains("USER: ") && tmpl_contains("ASSISTANT: ")) { |
160 | | // eachadea/vicuna-13b-1.1 (and Orca variant) |
161 | 5 | if (tmpl_contains("SYSTEM: ")) { |
162 | 3 | return LLM_CHAT_TEMPLATE_VICUNA_ORCA; |
163 | 3 | } |
164 | 2 | return LLM_CHAT_TEMPLATE_VICUNA; |
165 | 894 | } else if (tmpl_contains("### Instruction:") && tmpl_contains("<|EOT|>")) { |
166 | | // deepseek-ai/deepseek-coder-33b-instruct |
167 | 3 | return LLM_CHAT_TEMPLATE_DEEPSEEK; |
168 | 891 | } else if (tmpl_contains("<|START_OF_TURN_TOKEN|>") && tmpl_contains("<|USER_TOKEN|>")) { |
169 | | // CohereForAI/c4ai-command-r-plus |
170 | 2 | return LLM_CHAT_TEMPLATE_COMMAND_R; |
171 | 889 | } else if (tmpl_contains("<|start_header_id|>") && tmpl_contains("<|end_header_id|>")) { |
172 | 3 | return LLM_CHAT_TEMPLATE_LLAMA_3; |
173 | 886 | } else if (tmpl_contains("[gMASK]sop")) { |
174 | | // chatglm3-6b |
175 | 2 | return LLM_CHAT_TEMPLATE_CHATGLM_3; |
176 | 884 | } else if (tmpl_contains(LU8("<用户>"))) { |
177 | | // MiniCPM-3B-OpenHermes-2.5-v2-GGUF |
178 | 10 | return LLM_CHAT_TEMPLATE_MINICPM; |
179 | 874 | } else if (tmpl_contains("'Assistant: ' + message['content'] + eos_token")) { |
180 | 3 | return LLM_CHAT_TEMPLATE_DEEPSEEK_2; |
181 | 871 | } else if (tmpl_contains(LU8("<|Assistant|>")) && tmpl_contains(LU8("<|User|>")) && tmpl_contains(LU8("<|end▁of▁sentence|>"))) { |
182 | 1 | return LLM_CHAT_TEMPLATE_DEEPSEEK_3; |
183 | 870 | } else if (tmpl_contains("[|system|]") && tmpl_contains("[|assistant|]") && tmpl_contains("[|endofturn|]")) { |
184 | 5 | if (tmpl_contains("[|tool|]")) { |
185 | 2 | return LLM_CHAT_TEMPLATE_EXAONE_4; |
186 | 2 | } |
187 | | // ref: https://huggingface.co/LGAI-EXAONE/EXAONE-3.0-7.8B-Instruct/discussions/8#66bae61b1893d14ee8ed85bb |
188 | | // EXAONE-3.0-7.8B-Instruct |
189 | 3 | return LLM_CHAT_TEMPLATE_EXAONE_3; |
190 | 865 | } else if (tmpl_contains("rwkv-world") || tmpl_contains("{{- 'User: ' + message['content']|trim + '\\n\\n' -}}")) { |
191 | 8 | return LLM_CHAT_TEMPLATE_RWKV_WORLD; |
192 | 857 | } else if (tmpl_contains("<|start_of_role|>")) { |
193 | 3 | return LLM_CHAT_TEMPLATE_GRANITE; |
194 | 854 | } else if (tmpl_contains("message['role'] + additional_special_tokens[0] + message['content'] + additional_special_tokens[1]")) { |
195 | 1 | return LLM_CHAT_TEMPLATE_GIGACHAT; |
196 | 853 | } else if (tmpl_contains("<|role_start|>")) { |
197 | 6 | return LLM_CHAT_TEMPLATE_MEGREZ; |
198 | 847 | } else if (tmpl_contains(" Ассистент:")) { |
199 | 3 | return LLM_CHAT_TEMPLATE_YANDEX; |
200 | 844 | } else if (tmpl_contains("<role>ASSISTANT</role>") && tmpl_contains("'HUMAN'")) { |
201 | 2 | return LLM_CHAT_TEMPLATE_BAILING; |
202 | 842 | } else if (tmpl_contains("<role>ASSISTANT</role>") && tmpl_contains("\"HUMAN\"") && tmpl_contains("<think>")) { |
203 | 1 | return LLM_CHAT_TEMPLATE_BAILING_THINK; |
204 | 841 | } else if (tmpl_contains("<role>ASSISTANT</role>") && tmpl_contains("<role>HUMAN</role>") && tmpl_contains("<|role_end|>")) { |
205 | 2 | return LLM_CHAT_TEMPLATE_BAILING2; |
206 | 839 | } else if (tmpl_contains("<|header_start|>") && tmpl_contains("<|header_end|>")) { |
207 | 3 | return LLM_CHAT_TEMPLATE_LLAMA4; |
208 | 836 | } else if (tmpl_contains("<|endofuserprompt|>")) { |
209 | 2 | return LLM_CHAT_TEMPLATE_DOTS1; |
210 | 834 | } else if (tmpl_contains("<|extra_0|>") && tmpl_contains("<|extra_4|>")) { |
211 | 1 | return LLM_CHAT_TEMPLATE_HUNYUAN_MOE; |
212 | 833 | } else if (tmpl_contains("<|start|>") && tmpl_contains("<|channel|>")) { |
213 | 3 | return LLM_CHAT_TEMPLATE_OPENAI_MOE; |
214 | 830 | } else if (tmpl_contains("<|hy_Assistant|>") && tmpl_contains("<|hy_place▁holder▁no▁3|>")) { |
215 | 1 | return LLM_CHAT_TEMPLATE_HUNYUAN_DENSE; |
216 | 829 | } else if (tmpl_contains("<|im_assistant|>assistant<|im_middle|>")) { |
217 | 3 | return LLM_CHAT_TEMPLATE_KIMI_K2; |
218 | 826 | } else if (tmpl_contains("<seed:bos>")) { |
219 | 5 | return LLM_CHAT_TEMPLATE_SEED_OSS; |
220 | 821 | } else if (tmpl_contains("'Assistant: ' + message['content'] + '<|separator|>")) { |
221 | 6 | return LLM_CHAT_TEMPLATE_GROK_2; |
222 | 815 | } else if (tmpl_contains(LU8("[unused9]系统:[unused10]"))) { |
223 | 3 | return LLM_CHAT_TEMPLATE_PANGU_EMBED; |
224 | 812 | } else if (tmpl_contains("<|begin|>") && tmpl_contains("<|end|>") && tmpl_contains("<|content|>")) { |
225 | 2 | return LLM_CHAT_TEMPLATE_SOLAR_OPEN; |
226 | 2 | } |
227 | 810 | return LLM_CHAT_TEMPLATE_UNKNOWN; |
228 | 1.08k | } |
229 | | |
230 | | // Simple version of "llama_apply_chat_template" that only works with strings |
231 | | // This function uses heuristic checks to determine commonly used template. It is not a jinja parser. |
232 | | int32_t llm_chat_apply_template( |
233 | | llm_chat_template tmpl, |
234 | | const std::vector<const llama_chat_message *> & chat, |
235 | 845 | std::string & dest, bool add_ass) { |
236 | | // Taken from the research: https://github.com/ggerganov/llama.cpp/issues/5527 |
237 | 845 | std::stringstream ss; |
238 | 845 | if (tmpl == LLM_CHAT_TEMPLATE_CHATML) { |
239 | | // chatml template |
240 | 18 | for (auto message : chat) { |
241 | 18 | ss << "<|im_start|>" << message->role << "\n" << message->content << "<|im_end|>\n"; |
242 | 18 | } |
243 | 3 | if (add_ass) { |
244 | 3 | ss << "<|im_start|>assistant\n"; |
245 | 3 | } |
246 | 842 | } else if (tmpl == LLM_CHAT_TEMPLATE_MISTRAL_V7 || tmpl == LLM_CHAT_TEMPLATE_MISTRAL_V7_TEKKEN) { |
247 | | // Official mistral 'v7' template |
248 | | // See: https://huggingface.co/mistralai/Mistral-Large-Instruct-2411#basic-instruct-template-v7 |
249 | | // https://huggingface.co/mistralai/Mistral-Small-3.1-24B-Instruct-2503#basic-instruct-template-v7-tekken |
250 | 64 | const char * trailing_space = tmpl == LLM_CHAT_TEMPLATE_MISTRAL_V7 ? " " : ""; |
251 | 384 | for (auto message : chat) { |
252 | 384 | std::string role(message->role); |
253 | 384 | std::string content(message->content); |
254 | 384 | if (role == "system") { |
255 | 64 | ss << "[SYSTEM_PROMPT]" << trailing_space << content << "[/SYSTEM_PROMPT]"; |
256 | 320 | } else if (role == "user") { |
257 | 192 | ss << "[INST]" << trailing_space << content << "[/INST]"; |
258 | 192 | } else { |
259 | 128 | ss << trailing_space << content << "</s>"; |
260 | 128 | } |
261 | 384 | } |
262 | 778 | } else if (tmpl == LLM_CHAT_TEMPLATE_MISTRAL_V1 |
263 | 696 | || tmpl == LLM_CHAT_TEMPLATE_MISTRAL_V3 |
264 | 661 | || tmpl == LLM_CHAT_TEMPLATE_MISTRAL_V3_TEKKEN) { |
265 | | // See: https://github.com/mistralai/cookbook/blob/main/concept-deep-dive/tokenization/chat_templates.md |
266 | | // See: https://github.com/mistralai/cookbook/blob/main/concept-deep-dive/tokenization/templates.md |
267 | 120 | std::string leading_space = tmpl == LLM_CHAT_TEMPLATE_MISTRAL_V1 ? " " : ""; |
268 | 120 | std::string trailing_space = tmpl == LLM_CHAT_TEMPLATE_MISTRAL_V3_TEKKEN ? "" : " "; |
269 | 120 | bool trim_assistant_message = tmpl == LLM_CHAT_TEMPLATE_MISTRAL_V3; |
270 | 120 | bool is_inside_turn = false; |
271 | 720 | for (auto message : chat) { |
272 | 720 | if (!is_inside_turn) { |
273 | 360 | ss << leading_space << "[INST]" << trailing_space; |
274 | 360 | is_inside_turn = true; |
275 | 360 | } |
276 | 720 | std::string role(message->role); |
277 | 720 | std::string content(message->content); |
278 | 720 | if (role == "system") { |
279 | 120 | ss << content << "\n\n"; |
280 | 600 | } else if (role == "user") { |
281 | 360 | ss << content << leading_space << "[/INST]"; |
282 | 360 | } else { |
283 | 240 | ss << trailing_space << (trim_assistant_message ? trim(content) : content) << "</s>"; |
284 | 240 | is_inside_turn = false; |
285 | 240 | } |
286 | 720 | } |
287 | 658 | } else if ( |
288 | 658 | tmpl == LLM_CHAT_TEMPLATE_LLAMA_2 |
289 | 551 | || tmpl == LLM_CHAT_TEMPLATE_LLAMA_2_SYS |
290 | 546 | || tmpl == LLM_CHAT_TEMPLATE_LLAMA_2_SYS_BOS |
291 | 541 | || tmpl == LLM_CHAT_TEMPLATE_LLAMA_2_SYS_STRIP) { |
292 | | // llama2 template and its variants |
293 | | // [variant] support system message |
294 | | // See: https://huggingface.co/blog/llama2#how-to-prompt-llama-2 |
295 | 181 | bool support_system_message = tmpl != LLM_CHAT_TEMPLATE_LLAMA_2; |
296 | | // [variant] add BOS inside history |
297 | 181 | bool add_bos_inside_history = tmpl == LLM_CHAT_TEMPLATE_LLAMA_2_SYS_BOS; |
298 | | // [variant] trim spaces from the input message |
299 | 181 | bool strip_message = tmpl == LLM_CHAT_TEMPLATE_LLAMA_2_SYS_STRIP; |
300 | | // construct the prompt |
301 | 181 | bool is_inside_turn = true; // skip BOS at the beginning |
302 | 181 | ss << "[INST] "; |
303 | 1.08k | for (auto message : chat) { |
304 | 1.08k | std::string content = strip_message ? trim(message->content) : message->content; |
305 | 1.08k | std::string role(message->role); |
306 | 1.08k | if (!is_inside_turn) { |
307 | 362 | is_inside_turn = true; |
308 | 362 | ss << (add_bos_inside_history ? "<s>[INST] " : "[INST] "); |
309 | 362 | } |
310 | 1.08k | if (role == "system") { |
311 | 181 | if (support_system_message) { |
312 | 74 | ss << "<<SYS>>\n" << content << "\n<</SYS>>\n\n"; |
313 | 107 | } else { |
314 | | // if the model does not support system message, we still include it in the first message, but without <<SYS>> |
315 | 107 | ss << content << "\n"; |
316 | 107 | } |
317 | 905 | } else if (role == "user") { |
318 | 543 | ss << content << " [/INST]"; |
319 | 543 | } else { |
320 | 362 | ss << content << "</s>"; |
321 | 362 | is_inside_turn = false; |
322 | 362 | } |
323 | 1.08k | } |
324 | 477 | } else if (tmpl == LLM_CHAT_TEMPLATE_PHI_3) { |
325 | | // Phi 3 |
326 | 60 | for (auto message : chat) { |
327 | 60 | std::string role(message->role); |
328 | 60 | ss << "<|" << role << "|>\n" << message->content << "<|end|>\n"; |
329 | 60 | } |
330 | 10 | if (add_ass) { |
331 | 10 | ss << "<|assistant|>\n"; |
332 | 10 | } |
333 | 467 | } else if (tmpl == LLM_CHAT_TEMPLATE_PHI_4) { |
334 | | // chatml template |
335 | 12 | for (auto message : chat) { |
336 | 12 | ss << "<|im_start|>" << message->role << "<|im_sep|>" << message->content << "<|im_end|>"; |
337 | 12 | } |
338 | 2 | if (add_ass) { |
339 | 2 | ss << "<|im_start|>assistant<|im_sep|>"; |
340 | 2 | } |
341 | 465 | } else if (tmpl == LLM_CHAT_TEMPLATE_FALCON_3) { |
342 | | // Falcon 3 |
343 | 96 | for (auto message : chat) { |
344 | 96 | std::string role(message->role); |
345 | 96 | ss << "<|" << role << "|>\n" << message->content << "\n"; |
346 | 96 | } |
347 | 16 | if (add_ass) { |
348 | 16 | ss << "<|assistant|>\n"; |
349 | 16 | } |
350 | 449 | } else if (tmpl == LLM_CHAT_TEMPLATE_ZEPHYR) { |
351 | | // zephyr template |
352 | 18 | for (auto message : chat) { |
353 | 18 | ss << "<|" << message->role << "|>" << "\n" << message->content << "<|endoftext|>\n"; |
354 | 18 | } |
355 | 3 | if (add_ass) { |
356 | 3 | ss << "<|assistant|>\n"; |
357 | 3 | } |
358 | 446 | } else if (tmpl == LLM_CHAT_TEMPLATE_MONARCH) { |
359 | | // mlabonne/AlphaMonarch-7B template (the <s> is included inside history) |
360 | 12 | for (auto message : chat) { |
361 | 12 | std::string bos = (message == chat.front()) ? "" : "<s>"; // skip BOS for first message |
362 | 12 | ss << bos << message->role << "\n" << message->content << "</s>\n"; |
363 | 12 | } |
364 | 2 | if (add_ass) { |
365 | 2 | ss << "<s>assistant\n"; |
366 | 2 | } |
367 | 444 | } else if (tmpl == LLM_CHAT_TEMPLATE_GEMMA) { |
368 | | // google/gemma-7b-it |
369 | 165 | std::string system_prompt = ""; |
370 | 990 | for (auto message : chat) { |
371 | 990 | std::string role(message->role); |
372 | 990 | if (role == "system") { |
373 | | // there is no system message for gemma, but we will merge it with user prompt, so nothing is broken |
374 | 165 | system_prompt += trim(message->content); |
375 | 165 | continue; |
376 | 165 | } |
377 | | // in gemma, "assistant" is "model" |
378 | 825 | role = role == "assistant" ? "model" : message->role; |
379 | 825 | ss << "<start_of_turn>" << role << "\n"; |
380 | 825 | if (!system_prompt.empty() && role != "model") { |
381 | 143 | ss << system_prompt << "\n\n"; |
382 | 143 | system_prompt = ""; |
383 | 143 | } |
384 | 825 | ss << trim(message->content) << "<end_of_turn>\n"; |
385 | 825 | } |
386 | 165 | if (add_ass) { |
387 | 165 | ss << "<start_of_turn>model\n"; |
388 | 165 | } |
389 | 279 | } else if (tmpl == LLM_CHAT_TEMPLATE_ORION) { |
390 | | // OrionStarAI/Orion-14B-Chat |
391 | 19 | std::string system_prompt = ""; |
392 | 114 | for (auto message : chat) { |
393 | 114 | std::string role(message->role); |
394 | 114 | if (role == "system") { |
395 | | // there is no system message support, we will merge it with user prompt |
396 | 19 | system_prompt += message->content; |
397 | 19 | continue; |
398 | 95 | } else if (role == "user") { |
399 | 57 | ss << "Human: "; |
400 | 57 | if (!system_prompt.empty()) { |
401 | 15 | ss << system_prompt << "\n\n"; |
402 | 15 | system_prompt = ""; |
403 | 15 | } |
404 | 57 | ss << message->content << "\n\nAssistant: </s>"; |
405 | 57 | } else { |
406 | 38 | ss << message->content << "</s>"; |
407 | 38 | } |
408 | 114 | } |
409 | 260 | } else if (tmpl == LLM_CHAT_TEMPLATE_OPENCHAT) { |
410 | | // openchat/openchat-3.5-0106, |
411 | 12 | for (auto message : chat) { |
412 | 12 | std::string role(message->role); |
413 | 12 | if (role == "system") { |
414 | 2 | ss << message->content << "<|end_of_turn|>"; |
415 | 10 | } else { |
416 | 10 | role[0] = toupper(role[0]); |
417 | 10 | ss << "GPT4 Correct " << role << ": " << message->content << "<|end_of_turn|>"; |
418 | 10 | } |
419 | 12 | } |
420 | 2 | if (add_ass) { |
421 | 2 | ss << "GPT4 Correct Assistant:"; |
422 | 2 | } |
423 | 258 | } else if (tmpl == LLM_CHAT_TEMPLATE_VICUNA || tmpl == LLM_CHAT_TEMPLATE_VICUNA_ORCA) { |
424 | | // eachadea/vicuna-13b-1.1 (and Orca variant) |
425 | 42 | for (auto message : chat) { |
426 | 42 | std::string role(message->role); |
427 | 42 | if (role == "system") { |
428 | | // Orca-Vicuna variant uses a system prefix |
429 | 7 | if (tmpl == LLM_CHAT_TEMPLATE_VICUNA_ORCA) { |
430 | 4 | ss << "SYSTEM: " << message->content << "\n"; |
431 | 4 | } else { |
432 | 3 | ss << message->content << "\n\n"; |
433 | 3 | } |
434 | 35 | } else if (role == "user") { |
435 | 21 | ss << "USER: " << message->content << "\n"; |
436 | 21 | } else if (role == "assistant") { |
437 | 14 | ss << "ASSISTANT: " << message->content << "</s>\n"; |
438 | 14 | } |
439 | 42 | } |
440 | 7 | if (add_ass) { |
441 | 7 | ss << "ASSISTANT:"; |
442 | 7 | } |
443 | 251 | } else if (tmpl == LLM_CHAT_TEMPLATE_DEEPSEEK) { |
444 | | // deepseek-ai/deepseek-coder-33b-instruct |
445 | 24 | for (auto message : chat) { |
446 | 24 | std::string role(message->role); |
447 | 24 | if (role == "system") { |
448 | 4 | ss << message->content; |
449 | 20 | } else if (role == "user") { |
450 | 12 | ss << "### Instruction:\n" << message->content << "\n"; |
451 | 12 | } else if (role == "assistant") { |
452 | 8 | ss << "### Response:\n" << message->content << "\n<|EOT|>\n"; |
453 | 8 | } |
454 | 24 | } |
455 | 4 | if (add_ass) { |
456 | 4 | ss << "### Response:\n"; |
457 | 4 | } |
458 | 247 | } else if (tmpl == LLM_CHAT_TEMPLATE_COMMAND_R) { |
459 | | // CohereForAI/c4ai-command-r-plus |
460 | 126 | for (auto message : chat) { |
461 | 126 | std::string role(message->role); |
462 | 126 | if (role == "system") { |
463 | 21 | ss << "<|START_OF_TURN_TOKEN|><|SYSTEM_TOKEN|>" << trim(message->content) << "<|END_OF_TURN_TOKEN|>"; |
464 | 105 | } else if (role == "user") { |
465 | 63 | ss << "<|START_OF_TURN_TOKEN|><|USER_TOKEN|>" << trim(message->content) << "<|END_OF_TURN_TOKEN|>"; |
466 | 63 | } else if (role == "assistant") { |
467 | 42 | ss << "<|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>" << trim(message->content) << "<|END_OF_TURN_TOKEN|>"; |
468 | 42 | } |
469 | 126 | } |
470 | 21 | if (add_ass) { |
471 | 21 | ss << "<|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>"; |
472 | 21 | } |
473 | 226 | } else if (tmpl == LLM_CHAT_TEMPLATE_LLAMA_3) { |
474 | | // Llama 3 |
475 | 90 | for (auto message : chat) { |
476 | 90 | std::string role(message->role); |
477 | 90 | ss << "<|start_header_id|>" << role << "<|end_header_id|>\n\n" << trim(message->content) << "<|eot_id|>"; |
478 | 90 | } |
479 | 15 | if (add_ass) { |
480 | 15 | ss << "<|start_header_id|>assistant<|end_header_id|>\n\n"; |
481 | 15 | } |
482 | 211 | } else if (tmpl == LLM_CHAT_TEMPLATE_CHATGLM_3) { |
483 | | // chatglm3-6b |
484 | 3 | ss << "[gMASK]" << "sop"; |
485 | 18 | for (auto message : chat) { |
486 | 18 | std::string role(message->role); |
487 | 18 | ss << "<|" << role << "|>" << "\n " << message->content; |
488 | 18 | } |
489 | 3 | if (add_ass) { |
490 | 3 | ss << "<|assistant|>"; |
491 | 3 | } |
492 | 208 | } else if (tmpl == LLM_CHAT_TEMPLATE_CHATGLM_4) { |
493 | 10 | ss << "[gMASK]" << "<sop>"; |
494 | 60 | for (auto message : chat) { |
495 | 60 | std::string role(message->role); |
496 | 60 | ss << "<|" << role << "|>" << "\n" << message->content; |
497 | 60 | } |
498 | 10 | if (add_ass) { |
499 | 10 | ss << "<|assistant|>\n"; |
500 | 10 | } |
501 | 198 | } else if (tmpl == LLM_CHAT_TEMPLATE_GLMEDGE) { |
502 | 120 | for (auto message : chat) { |
503 | 120 | std::string role(message->role); |
504 | 120 | ss << "<|" << role << "|>" << "\n" << message->content; |
505 | 120 | } |
506 | 20 | if (add_ass) { |
507 | 20 | ss << "<|assistant|>"; |
508 | 20 | } |
509 | 178 | } else if (tmpl == LLM_CHAT_TEMPLATE_MINICPM) { |
510 | | // MiniCPM-3B-OpenHermes-2.5-v2-GGUF |
511 | 102 | for (auto message : chat) { |
512 | 102 | std::string role(message->role); |
513 | 102 | if (role == "user") { |
514 | 51 | ss << LU8("<用户>"); |
515 | 51 | ss << trim(message->content); |
516 | 51 | ss << "<AI>"; |
517 | 51 | } else { |
518 | 51 | ss << trim(message->content); |
519 | 51 | } |
520 | 102 | } |
521 | 161 | } else if (tmpl == LLM_CHAT_TEMPLATE_DEEPSEEK_2) { |
522 | | // DeepSeek-V2 |
523 | 24 | for (auto message : chat) { |
524 | 24 | std::string role(message->role); |
525 | 24 | if (role == "system") { |
526 | 4 | ss << message->content << "\n\n"; |
527 | 20 | } else if (role == "user") { |
528 | 12 | ss << "User: " << message->content << "\n\n"; |
529 | 12 | } else if (role == "assistant") { |
530 | 8 | ss << "Assistant: " << message->content << LU8("<|end▁of▁sentence|>"); |
531 | 8 | } |
532 | 24 | } |
533 | 4 | if (add_ass) { |
534 | 4 | ss << "Assistant:"; |
535 | 4 | } |
536 | 157 | } else if (tmpl == LLM_CHAT_TEMPLATE_DEEPSEEK_3) { |
537 | | // DeepSeek-V3 |
538 | 12 | for (auto message : chat) { |
539 | 12 | std::string role(message->role); |
540 | 12 | if (role == "system") { |
541 | 2 | ss << message->content << "\n\n"; |
542 | 10 | } else if (role == "user") { |
543 | 6 | ss << LU8("<|User|>") << message->content; |
544 | 6 | } else if (role == "assistant") { |
545 | 4 | ss << LU8("<|Assistant|>") << message->content << LU8("<|end▁of▁sentence|>"); |
546 | 4 | } |
547 | 12 | } |
548 | 2 | if (add_ass) { |
549 | 2 | ss << LU8("<|Assistant|>"); |
550 | 2 | } |
551 | 155 | } else if (tmpl == LLM_CHAT_TEMPLATE_EXAONE_3) { |
552 | | // ref: https://huggingface.co/LGAI-EXAONE/EXAONE-3.0-7.8B-Instruct/discussions/8#66bae61b1893d14ee8ed85bb |
553 | | // EXAONE-3.0-7.8B-Instruct |
554 | 72 | for (auto message : chat) { |
555 | 72 | std::string role(message->role); |
556 | 72 | if (role == "system") { |
557 | 12 | ss << "[|system|]" << trim(message->content) << "[|endofturn|]\n"; |
558 | 60 | } else if (role == "user") { |
559 | 36 | ss << "[|user|]" << trim(message->content) << "\n"; |
560 | 36 | } else if (role == "assistant") { |
561 | 24 | ss << "[|assistant|]" << trim(message->content) << "[|endofturn|]\n"; |
562 | 24 | } |
563 | 72 | } |
564 | 12 | if (add_ass) { |
565 | 12 | ss << "[|assistant|]"; |
566 | 12 | } |
567 | 143 | } else if (tmpl == LLM_CHAT_TEMPLATE_EXAONE_4) { |
568 | 84 | for (auto message : chat) { |
569 | 84 | std::string role(message->role); |
570 | 84 | if (role == "system") { |
571 | 14 | ss << "[|system|]" << trim(message->content) << "[|endofturn|]\n"; |
572 | 70 | } else if (role == "user") { |
573 | 42 | ss << "[|user|]" << trim(message->content) << "\n"; |
574 | 42 | } else if (role == "assistant") { |
575 | 28 | ss << "[|assistant|]" << trim(message->content) << "[|endofturn|]\n"; |
576 | 28 | } else if (role == "tool") { |
577 | 0 | ss << "[|tool|]" << trim(message->content) << "[|endofturn|]\n"; |
578 | 0 | } |
579 | 84 | } |
580 | 14 | if (add_ass) { |
581 | 14 | ss << "[|assistant|]"; |
582 | 14 | } |
583 | 129 | } else if (tmpl == LLM_CHAT_TEMPLATE_EXAONE_MOE) { |
584 | 72 | for (auto message : chat) { |
585 | 72 | std::string role(message->role); |
586 | 72 | if (role == "system") { |
587 | 12 | ss << "<|system|>\n" << trim(message->content) << "<|endofturn|>\n"; |
588 | 60 | } else if (role == "user") { |
589 | 36 | ss << "<|user|>\n" << trim(message->content) << "<|endofturn|>\n"; |
590 | 36 | } else if (role == "assistant") { |
591 | 24 | ss << "<|assistant|>\n" << trim(message->content) << "<|endofturn|>\n"; |
592 | 24 | } else if (role == "tool") { |
593 | 0 | ss << "<|tool|>\n" << trim(message->content) << "<|endofturn|>\n"; |
594 | 0 | } |
595 | 72 | } |
596 | 12 | if (add_ass) { |
597 | 12 | ss << "<|assistant|>\n"; |
598 | 12 | } |
599 | 117 | } else if (tmpl == LLM_CHAT_TEMPLATE_RWKV_WORLD) { |
600 | | // this template requires the model to have "\n\n" as EOT token |
601 | 119 | for (size_t i = 0; i < chat.size(); i++) { |
602 | 102 | std::string role(chat[i]->role); |
603 | 102 | if (role == "system") { |
604 | 17 | ss << "System: " << trim(chat[i]->content) << "\n\n"; |
605 | 85 | } else if (role == "user") { |
606 | 51 | ss << "User: " << trim(chat[i]->content) << "\n\n"; |
607 | 51 | if (i == chat.size() - 1) { |
608 | 17 | ss << "Assistant:"; |
609 | 17 | } |
610 | 51 | } else if (role == "assistant") { |
611 | 34 | ss << "Assistant: " << trim(chat[i]->content) << "\n\n"; |
612 | 34 | } |
613 | 102 | } |
614 | 100 | } else if (tmpl == LLM_CHAT_TEMPLATE_GRANITE) { |
615 | | // IBM Granite template |
616 | 30 | for (const auto & message : chat) { |
617 | 30 | std::string role(message->role); |
618 | 30 | ss << "<|start_of_role|>" << role << "<|end_of_role|>"; |
619 | 30 | if (role == "assistant_tool_call") { |
620 | 0 | ss << "<|tool_call|>"; |
621 | 0 | } |
622 | 30 | ss << message->content << "<|end_of_text|>\n"; |
623 | 30 | } |
624 | 5 | if (add_ass) { |
625 | 5 | ss << "<|start_of_role|>assistant<|end_of_role|>"; |
626 | 5 | } |
627 | 95 | } else if (tmpl == LLM_CHAT_TEMPLATE_GIGACHAT) { |
628 | | // GigaChat template |
629 | 2 | bool has_system = !chat.empty() && std::string(chat[0]->role) == "system"; |
630 | | |
631 | | // Handle system message if present |
632 | 2 | if (has_system) { |
633 | 2 | ss << "<s>" << chat[0]->content << "<|message_sep|>"; |
634 | 2 | } else { |
635 | 0 | ss << "<s>"; |
636 | 0 | } |
637 | | |
638 | | // Process remaining messages |
639 | 12 | for (size_t i = has_system ? 1 : 0; i < chat.size(); i++) { |
640 | 10 | std::string role(chat[i]->role); |
641 | 10 | if (role == "user") { |
642 | 6 | ss << "user<|role_sep|>" << chat[i]->content << "<|message_sep|>" |
643 | 6 | << "available functions<|role_sep|>[]<|message_sep|>"; |
644 | 6 | } else if (role == "assistant") { |
645 | 4 | ss << "assistant<|role_sep|>" << chat[i]->content << "<|message_sep|>"; |
646 | 4 | } |
647 | 10 | } |
648 | | |
649 | | // Add generation prompt if needed |
650 | 2 | if (add_ass) { |
651 | 2 | ss << "assistant<|role_sep|>"; |
652 | 2 | } |
653 | 93 | } else if (tmpl == LLM_CHAT_TEMPLATE_MEGREZ) { |
654 | | // Megrez template |
655 | 42 | for (auto message : chat) { |
656 | 42 | std::string role(message->role); |
657 | 42 | ss << "<|role_start|>" << role << "<|role_end|>" << message->content << "<|turn_end|>"; |
658 | 42 | } |
659 | | |
660 | 7 | if (add_ass) { |
661 | 7 | ss << "<|role_start|>assistant<|role_end|>"; |
662 | 7 | } |
663 | 86 | } else if (tmpl == LLM_CHAT_TEMPLATE_YANDEX) { |
664 | | // Yandex template ("\n\n" is defined as EOT token) |
665 | | |
666 | 28 | for (size_t i = 0; i < chat.size(); i++) { |
667 | 24 | std::string role(chat[i]->role); |
668 | 24 | if (role == "user") { |
669 | 12 | ss << " Пользователь: " << chat[i]->content << "\n\n"; |
670 | 12 | } else if (role == "assistant") { |
671 | 8 | ss << " Ассистент: " << chat[i]->content << "\n\n"; |
672 | 8 | } |
673 | 24 | } |
674 | | |
675 | | // Add generation prompt if needed |
676 | 4 | if (add_ass) { |
677 | 4 | ss << " Ассистент:[SEP]"; |
678 | 4 | } |
679 | 82 | } else if (tmpl == LLM_CHAT_TEMPLATE_BAILING || tmpl == LLM_CHAT_TEMPLATE_BAILING_THINK) { |
680 | | // Bailing (Ling/Ring) template |
681 | 30 | for (auto message : chat) { |
682 | 30 | std::string role(message->role); |
683 | | |
684 | 30 | if (role == "user") { |
685 | 15 | role = "HUMAN"; |
686 | 15 | } else { |
687 | 15 | std::transform(role.begin(), role.end(), role.begin(), ::toupper); |
688 | 15 | } |
689 | | |
690 | 30 | ss << "<role>" << role << "</role>" << message->content; |
691 | 30 | } |
692 | | |
693 | 5 | if (add_ass) { |
694 | 5 | ss << "<role>ASSISTANT</role>"; |
695 | | |
696 | 5 | if (tmpl == LLM_CHAT_TEMPLATE_BAILING_THINK) { |
697 | 2 | ss << "<think>"; |
698 | 2 | } |
699 | 5 | } |
700 | 77 | } else if (tmpl == LLM_CHAT_TEMPLATE_BAILING2) { |
701 | | // Bailing2 (Ling 2.0) template |
702 | 4 | bool has_system = !chat.empty() && std::string(chat[0]->role) == "system"; |
703 | | |
704 | 4 | if (!has_system) { |
705 | 0 | ss << "<role>SYSTEM</role>detailed thinking off<|role_end|>"; |
706 | 0 | } |
707 | | |
708 | 24 | for (auto message : chat) { |
709 | 24 | std::string role(message->role); |
710 | | |
711 | 24 | if (role == "user") { |
712 | 12 | role = "HUMAN"; |
713 | 12 | } else { |
714 | 12 | std::transform(role.begin(), role.end(), role.begin(), ::toupper); |
715 | 12 | } |
716 | | |
717 | 24 | ss << "<role>" << role << "</role>" << message->content << "<|role_end|>"; |
718 | 24 | } |
719 | | |
720 | 4 | if (add_ass) { |
721 | 4 | ss << "<role>ASSISTANT</role>"; |
722 | 4 | } |
723 | 73 | } else if (tmpl == LLM_CHAT_TEMPLATE_LLAMA4) { |
724 | | // Llama 4 |
725 | 66 | for (auto message : chat) { |
726 | 66 | std::string role(message->role); |
727 | 66 | ss << "<|header_start|>" << role << "<|header_end|>\n\n" << trim(message->content) << "<|eot|>"; |
728 | 66 | } |
729 | 11 | if (add_ass) { |
730 | 11 | ss << "<|header_start|>assistant<|header_end|>\n\n"; |
731 | 11 | } |
732 | 62 | } else if (tmpl == LLM_CHAT_TEMPLATE_SMOLVLM) { |
733 | | // SmolVLM |
734 | 1 | ss << "<|im_start|>"; // uses <|im_start|> as BOS, but the actual content is NOT chatml |
735 | 6 | for (auto message : chat) { |
736 | 6 | std::string role(message->role); |
737 | 6 | if (role == "system") { |
738 | 1 | ss << message->content << "\n\n"; |
739 | 5 | } else if (role == "user") { |
740 | 3 | ss << "User: " << message->content << "<end_of_utterance>\n"; |
741 | 3 | } else { |
742 | 2 | ss << "Assistant: " << message->content << "<end_of_utterance>\n"; |
743 | 2 | } |
744 | 6 | } |
745 | 1 | if (add_ass) { |
746 | 1 | ss << "Assistant:"; |
747 | 1 | } |
748 | 61 | } else if (tmpl == LLM_CHAT_TEMPLATE_DOTS1) { |
749 | | // dots.llm1.inst (DOTS1) |
750 | 12 | for (auto message : chat) { |
751 | 12 | std::string role(message->role); |
752 | 12 | if (role == "system") { |
753 | 2 | ss << "<|system|>" << message->content << "<|endofsystem|>"; |
754 | 10 | } else if (role == "user") { |
755 | 6 | ss << "<|userprompt|>" << message->content << "<|endofuserprompt|>"; |
756 | 6 | } else { |
757 | 4 | ss << "<|response|>" << message->content << "<|endofresponse|>"; |
758 | 4 | } |
759 | 12 | } |
760 | 2 | if (add_ass) { |
761 | 2 | ss << "<|response|>"; |
762 | 2 | } |
763 | 59 | } else if (tmpl == LLM_CHAT_TEMPLATE_HUNYUAN_MOE) { |
764 | | // tencent/Hunyuan-A13B-Instruct |
765 | 12 | for (auto message : chat) { |
766 | 12 | std::string role(message->role); |
767 | 12 | if (role == "system") { |
768 | 2 | ss << "<|startoftext|>" << message->content << "<|extra_4|>"; |
769 | 10 | } else if (role == "assistant") { |
770 | 4 | ss << message->content << "<|eos|>"; |
771 | 6 | } else { |
772 | 6 | ss << "<|startoftext|>" << message->content << "<|extra_0|>"; |
773 | 6 | } |
774 | 12 | } |
775 | 57 | } else if (tmpl == LLM_CHAT_TEMPLATE_OPENAI_MOE) { |
776 | | // OpenAI MoE (based on Harmony chat template) |
777 | 24 | for (auto message : chat) { |
778 | 24 | std::string role(message->role); |
779 | 24 | ss << "<|start|>" << role << "<|message|>" << message->content; |
780 | 24 | ss << (role == "assistant" ? "<|return|>" : "<|end|>"); |
781 | 24 | } |
782 | 4 | if (add_ass) { |
783 | 4 | ss << "<|start|>assistant"; |
784 | 4 | } |
785 | 53 | } else if (tmpl == LLM_CHAT_TEMPLATE_HUNYUAN_DENSE) { |
786 | | // tencent/Hunyuan-4B-Instruct |
787 | 14 | for (size_t i = 0; i < chat.size(); i++) { |
788 | 12 | std::string role(chat[i]->role); |
789 | 12 | if (i == 0) { |
790 | 2 | if (role == "system") { |
791 | 2 | ss << chat[i]->content << "<|hy_place▁holder▁no▁3|>"; |
792 | 2 | } |
793 | 2 | } |
794 | | |
795 | 12 | if (role == "assistant") { |
796 | 4 | ss << "<|hy_Assistant|>" << chat[i]->content << "<|hy_place▁holder▁no▁2|>"; |
797 | 8 | } else if (role == "user") { |
798 | 6 | ss << "<|hy_User|>" << chat[i]->content << "<|hy_Assistant|>"; |
799 | 6 | } |
800 | 12 | } |
801 | 51 | } else if (tmpl == LLM_CHAT_TEMPLATE_KIMI_K2) { |
802 | | // moonshotai/Kimi-K2-Instruct |
803 | 24 | for (auto message : chat) { |
804 | 24 | std::string role(message->role); |
805 | 24 | if (role == "system") { |
806 | 4 | ss << "<|im_system|>system<|im_middle|>"; |
807 | 20 | } else if (role == "user") { |
808 | 12 | ss << "<|im_user|>user<|im_middle|>"; |
809 | 12 | } else if (role == "assistant") { |
810 | 8 | ss << "<|im_assistant|>assistant<|im_middle|>"; |
811 | 8 | } else if (role == "tool") { |
812 | 0 | ss << "<|im_system|>tool<|im_middle|>"; |
813 | 0 | } |
814 | | |
815 | 24 | ss << message->content << "<|im_end|>"; |
816 | 24 | } |
817 | 4 | if (add_ass) { |
818 | 4 | ss << "<|im_assistant|>assistant<|im_middle|>"; |
819 | 4 | } |
820 | 47 | } else if (tmpl == LLM_CHAT_TEMPLATE_SEED_OSS) { |
821 | 90 | for (auto message: chat) { |
822 | 90 | std::string role(message->role); |
823 | 90 | ss << "<seed:bos>" << role << "\n" << (role == "assistant" ? trim(message->content) : message->content) << "<seed:eos>"; |
824 | 90 | } |
825 | 15 | if (add_ass) { |
826 | 15 | ss << "<seed:bos>assistant\n"; |
827 | 15 | } |
828 | 32 | } else if (tmpl == LLM_CHAT_TEMPLATE_GROK_2) { |
829 | 96 | for (auto message : chat) { |
830 | 96 | std::string role(message->role); |
831 | 96 | if (role == "system") { |
832 | 16 | ss << "System: " << trim(message->content) << "<|separator|>\n\n"; |
833 | 80 | } else if (role == "user") { |
834 | 48 | ss << "Human: " << trim(message->content) << "<|separator|>\n\n"; |
835 | 48 | } else if (role == "assistant") { |
836 | 32 | ss << "Assistant: " << message->content << "<|separator|>\n\n"; |
837 | 32 | } |
838 | 96 | } |
839 | 16 | if (add_ass) { |
840 | 16 | ss << "Assistant:"; |
841 | 16 | } |
842 | 16 | }else if (tmpl == LLM_CHAT_TEMPLATE_PANGU_EMBED) { |
843 | | // [unused9]系统:xxx[unused10] |
844 | | // [unused9]用户:xxx[unused10] |
845 | | // [unused9]助手:xxx[unused10] |
846 | | // ... |
847 | 91 | for (size_t i = 0; i < chat.size(); ++i) { |
848 | 78 | const auto & msg = chat[i]; |
849 | 78 | const std::string & role = msg->role; |
850 | 78 | const std::string & content = msg->content; |
851 | | |
852 | 78 | if (i == 0 && role != "system") { |
853 | 0 | ss << "[unused9]系统:[unused10]"; |
854 | 0 | } |
855 | | |
856 | 78 | if (role == "system") { |
857 | 13 | ss << "[unused9]系统:" << content << "[unused10]"; |
858 | 65 | } else if (role == "user") { |
859 | 39 | ss << "[unused9]用户:" << content << "[unused10]"; |
860 | 39 | } else if (role == "assistant") { |
861 | 26 | ss << "[unused9]助手:" << content << "[unused10]"; |
862 | 26 | } else if (role == "tool") { |
863 | 0 | ss << "[unused9]工具:" << content << "[unused10]"; |
864 | 0 | } else if (role == "function") { |
865 | 0 | ss << "[unused9]方法:" << content << "[unused10]"; |
866 | 0 | } |
867 | 78 | } |
868 | 13 | if (add_ass) { |
869 | 13 | ss << "[unused9]助手:"; |
870 | 13 | } |
871 | 13 | } else if (tmpl == LLM_CHAT_TEMPLATE_SOLAR_OPEN) { |
872 | 18 | for (auto message : chat) { |
873 | 18 | std::string role(message->role); |
874 | 18 | ss << "<|begin|>" << role << "<|content|>" << message->content << "<|end|>"; |
875 | 18 | } |
876 | 3 | if (add_ass) { |
877 | 3 | ss << "<|begin|>assistant"; |
878 | 3 | } |
879 | 3 | } else { |
880 | | // template not supported |
881 | 0 | return -1; |
882 | 0 | } |
883 | 845 | dest = ss.str(); |
884 | 845 | return dest.size(); |
885 | 845 | } |
886 | | |
887 | | // public interface |
888 | | |
889 | 0 | int32_t llama_chat_builtin_templates(const char ** output, size_t len) { |
890 | 0 | auto it = LLM_CHAT_TEMPLATES.begin(); |
891 | 0 | for (size_t i = 0; i < std::min(len, LLM_CHAT_TEMPLATES.size()); i++) { |
892 | 0 | output[i] = it->first.c_str(); |
893 | 0 | std::advance(it, 1); |
894 | 0 | } |
895 | 0 | return (int32_t) LLM_CHAT_TEMPLATES.size(); |
896 | 0 | } |