/src/llama.cpp/src/llama-chat.cpp
Line | Count | Source |
1 | | #include "llama-chat.h" |
2 | | |
3 | | #include "llama.h" |
4 | | |
5 | | #include <map> |
6 | | #include <sstream> |
7 | | #include <algorithm> |
8 | | |
9 | | #if __cplusplus >= 202000L |
10 | | #define LU8(x) (const char*)(u8##x) |
11 | | #else |
12 | 0 | #define LU8(x) u8##x |
13 | | #endif |
14 | | |
15 | | // trim whitespace from the beginning and end of a string |
16 | 0 | static std::string trim(const std::string & str) { |
17 | 0 | size_t start = 0; |
18 | 0 | size_t end = str.size(); |
19 | 0 | while (start < end && isspace(static_cast<unsigned char>(str[start]))) { |
20 | 0 | start += 1; |
21 | 0 | } |
22 | 0 | while (end > start && isspace(static_cast<unsigned char>(str[end - 1]))) { |
23 | 0 | end -= 1; |
24 | 0 | } |
25 | 0 | return str.substr(start, end - start); |
26 | 0 | } |
27 | | |
28 | | static const std::map<std::string, llm_chat_template> LLM_CHAT_TEMPLATES = { |
29 | | { "chatml", LLM_CHAT_TEMPLATE_CHATML }, |
30 | | { "llama2", LLM_CHAT_TEMPLATE_LLAMA_2 }, |
31 | | { "llama2-sys", LLM_CHAT_TEMPLATE_LLAMA_2_SYS }, |
32 | | { "llama2-sys-bos", LLM_CHAT_TEMPLATE_LLAMA_2_SYS_BOS }, |
33 | | { "llama2-sys-strip", LLM_CHAT_TEMPLATE_LLAMA_2_SYS_STRIP }, |
34 | | { "mistral-v1", LLM_CHAT_TEMPLATE_MISTRAL_V1 }, |
35 | | { "mistral-v3", LLM_CHAT_TEMPLATE_MISTRAL_V3 }, |
36 | | { "mistral-v3-tekken", LLM_CHAT_TEMPLATE_MISTRAL_V3_TEKKEN }, |
37 | | { "mistral-v7", LLM_CHAT_TEMPLATE_MISTRAL_V7 }, |
38 | | { "mistral-v7-tekken", LLM_CHAT_TEMPLATE_MISTRAL_V7_TEKKEN }, |
39 | | { "phi3", LLM_CHAT_TEMPLATE_PHI_3 }, |
40 | | { "phi4", LLM_CHAT_TEMPLATE_PHI_4 }, |
41 | | { "falcon3", LLM_CHAT_TEMPLATE_FALCON_3 }, |
42 | | { "zephyr", LLM_CHAT_TEMPLATE_ZEPHYR }, |
43 | | { "monarch", LLM_CHAT_TEMPLATE_MONARCH }, |
44 | | { "gemma", LLM_CHAT_TEMPLATE_GEMMA }, |
45 | | { "orion", LLM_CHAT_TEMPLATE_ORION }, |
46 | | { "openchat", LLM_CHAT_TEMPLATE_OPENCHAT }, |
47 | | { "vicuna", LLM_CHAT_TEMPLATE_VICUNA }, |
48 | | { "vicuna-orca", LLM_CHAT_TEMPLATE_VICUNA_ORCA }, |
49 | | { "deepseek", LLM_CHAT_TEMPLATE_DEEPSEEK }, |
50 | | { "deepseek2", LLM_CHAT_TEMPLATE_DEEPSEEK_2 }, |
51 | | { "deepseek3", LLM_CHAT_TEMPLATE_DEEPSEEK_3 }, |
52 | | { "command-r", LLM_CHAT_TEMPLATE_COMMAND_R }, |
53 | | { "llama3", LLM_CHAT_TEMPLATE_LLAMA_3 }, |
54 | | { "chatglm3", LLM_CHAT_TEMPLATE_CHATGLM_3 }, |
55 | | { "chatglm4", LLM_CHAT_TEMPLATE_CHATGLM_4 }, |
56 | | { "glmedge", LLM_CHAT_TEMPLATE_GLMEDGE }, |
57 | | { "minicpm", LLM_CHAT_TEMPLATE_MINICPM }, |
58 | | { "exaone3", LLM_CHAT_TEMPLATE_EXAONE_3 }, |
59 | | { "exaone4", LLM_CHAT_TEMPLATE_EXAONE_4 }, |
60 | | { "rwkv-world", LLM_CHAT_TEMPLATE_RWKV_WORLD }, |
61 | | { "granite", LLM_CHAT_TEMPLATE_GRANITE }, |
62 | | { "gigachat", LLM_CHAT_TEMPLATE_GIGACHAT }, |
63 | | { "megrez", LLM_CHAT_TEMPLATE_MEGREZ }, |
64 | | { "yandex", LLM_CHAT_TEMPLATE_YANDEX }, |
65 | | { "bailing", LLM_CHAT_TEMPLATE_BAILING }, |
66 | | { "bailing-think", LLM_CHAT_TEMPLATE_BAILING_THINK }, |
67 | | { "bailing2", LLM_CHAT_TEMPLATE_BAILING2 }, |
68 | | { "llama4", LLM_CHAT_TEMPLATE_LLAMA4 }, |
69 | | { "smolvlm", LLM_CHAT_TEMPLATE_SMOLVLM }, |
70 | | { "hunyuan-moe", LLM_CHAT_TEMPLATE_HUNYUAN_MOE }, |
71 | | { "gpt-oss", LLM_CHAT_TEMPLATE_OPENAI_MOE }, |
72 | | { "hunyuan-dense", LLM_CHAT_TEMPLATE_HUNYUAN_DENSE }, |
73 | | { "kimi-k2", LLM_CHAT_TEMPLATE_KIMI_K2 }, |
74 | | { "seed_oss", LLM_CHAT_TEMPLATE_SEED_OSS }, |
75 | | { "grok-2", LLM_CHAT_TEMPLATE_GROK_2 }, |
76 | | { "pangu-embedded", LLM_CHAT_TEMPLATE_PANGU_EMBED }, |
77 | | }; |
78 | | |
79 | 0 | llm_chat_template llm_chat_template_from_str(const std::string & name) { |
80 | 0 | return LLM_CHAT_TEMPLATES.at(name); |
81 | 0 | } |
82 | | |
83 | 0 | llm_chat_template llm_chat_detect_template(const std::string & tmpl) { |
84 | 0 | try { |
85 | 0 | return llm_chat_template_from_str(tmpl); |
86 | 0 | } catch (const std::out_of_range &) { |
87 | | // ignore |
88 | 0 | } |
89 | | |
90 | 0 | auto tmpl_contains = [&tmpl](const char * haystack) -> bool { |
91 | 0 | return tmpl.find(haystack) != std::string::npos; |
92 | 0 | }; |
93 | 0 | if (tmpl_contains("<|im_start|>")) { |
94 | 0 | return tmpl_contains("<|im_sep|>") |
95 | 0 | ? LLM_CHAT_TEMPLATE_PHI_4 |
96 | 0 | : tmpl_contains("<end_of_utterance>") |
97 | 0 | ? LLM_CHAT_TEMPLATE_SMOLVLM // SmolVLM uses <|im_start|> as BOS, but it is NOT chatml |
98 | 0 | : LLM_CHAT_TEMPLATE_CHATML; |
99 | 0 | } else if (tmpl.find("mistral") == 0 || tmpl_contains("[INST]")) { |
100 | 0 | if (tmpl_contains("[SYSTEM_PROMPT]")) { |
101 | 0 | return LLM_CHAT_TEMPLATE_MISTRAL_V7; |
102 | 0 | } else if ( |
103 | | // catches official 'v1' template |
104 | 0 | tmpl_contains("' [INST] ' + system_message") |
105 | | // catches official 'v3' and 'v3-tekken' templates |
106 | 0 | || tmpl_contains("[AVAILABLE_TOOLS]") |
107 | 0 | ) { |
108 | | // Official mistral 'v1', 'v3' and 'v3-tekken' templates |
109 | | // See: https://github.com/mistralai/cookbook/blob/main/concept-deep-dive/tokenization/chat_templates.md |
110 | | // See: https://github.com/mistralai/cookbook/blob/main/concept-deep-dive/tokenization/templates.md |
111 | 0 | if (tmpl_contains(" [INST]")) { |
112 | 0 | return LLM_CHAT_TEMPLATE_MISTRAL_V1; |
113 | 0 | } else if (tmpl_contains("\"[INST]\"")) { |
114 | 0 | return LLM_CHAT_TEMPLATE_MISTRAL_V3_TEKKEN; |
115 | 0 | } |
116 | 0 | return LLM_CHAT_TEMPLATE_MISTRAL_V3; |
117 | 0 | } else { |
118 | | // llama2 template and its variants |
119 | | // [variant] support system message |
120 | | // See: https://huggingface.co/blog/llama2#how-to-prompt-llama-2 |
121 | 0 | bool support_system_message = tmpl_contains("<<SYS>>"); |
122 | 0 | bool add_bos_inside_history = tmpl_contains("bos_token + '[INST]"); |
123 | 0 | bool strip_message = tmpl_contains("content.strip()"); |
124 | 0 | if (strip_message) { |
125 | 0 | return LLM_CHAT_TEMPLATE_LLAMA_2_SYS_STRIP; |
126 | 0 | } else if (add_bos_inside_history) { |
127 | 0 | return LLM_CHAT_TEMPLATE_LLAMA_2_SYS_BOS; |
128 | 0 | } else if (support_system_message) { |
129 | 0 | return LLM_CHAT_TEMPLATE_LLAMA_2_SYS; |
130 | 0 | } else { |
131 | 0 | return LLM_CHAT_TEMPLATE_LLAMA_2; |
132 | 0 | } |
133 | 0 | } |
134 | 0 | } else if (tmpl_contains("<|assistant|>") && tmpl_contains("<|end|>")) { |
135 | 0 | return LLM_CHAT_TEMPLATE_PHI_3; |
136 | 0 | } else if (tmpl_contains("[gMASK]<sop>")) { |
137 | 0 | return LLM_CHAT_TEMPLATE_CHATGLM_4; |
138 | 0 | } else if (tmpl_contains("<|assistant|>") && tmpl_contains("<|user|>")) { |
139 | 0 | return tmpl_contains("</s>") ? LLM_CHAT_TEMPLATE_FALCON_3 : LLM_CHAT_TEMPLATE_GLMEDGE; |
140 | 0 | } else if (tmpl_contains("<|{{ item['role'] }}|>") && tmpl_contains("<|begin_of_image|>")) { |
141 | 0 | return LLM_CHAT_TEMPLATE_GLMEDGE; |
142 | 0 | } else if (tmpl_contains("<|user|>") && tmpl_contains("<|endoftext|>")) { |
143 | 0 | return LLM_CHAT_TEMPLATE_ZEPHYR; |
144 | 0 | } else if (tmpl_contains("bos_token + message['role']")) { |
145 | 0 | return LLM_CHAT_TEMPLATE_MONARCH; |
146 | 0 | } else if (tmpl_contains("<start_of_turn>")) { |
147 | 0 | return LLM_CHAT_TEMPLATE_GEMMA; |
148 | 0 | } else if (tmpl_contains("'\\n\\nAssistant: ' + eos_token")) { |
149 | | // OrionStarAI/Orion-14B-Chat |
150 | 0 | return LLM_CHAT_TEMPLATE_ORION; |
151 | 0 | } else if (tmpl_contains("GPT4 Correct ")) { |
152 | | // openchat/openchat-3.5-0106 |
153 | 0 | return LLM_CHAT_TEMPLATE_OPENCHAT; |
154 | 0 | } else if (tmpl_contains("USER: ") && tmpl_contains("ASSISTANT: ")) { |
155 | | // eachadea/vicuna-13b-1.1 (and Orca variant) |
156 | 0 | if (tmpl_contains("SYSTEM: ")) { |
157 | 0 | return LLM_CHAT_TEMPLATE_VICUNA_ORCA; |
158 | 0 | } |
159 | 0 | return LLM_CHAT_TEMPLATE_VICUNA; |
160 | 0 | } else if (tmpl_contains("### Instruction:") && tmpl_contains("<|EOT|>")) { |
161 | | // deepseek-ai/deepseek-coder-33b-instruct |
162 | 0 | return LLM_CHAT_TEMPLATE_DEEPSEEK; |
163 | 0 | } else if (tmpl_contains("<|START_OF_TURN_TOKEN|>") && tmpl_contains("<|USER_TOKEN|>")) { |
164 | | // CohereForAI/c4ai-command-r-plus |
165 | 0 | return LLM_CHAT_TEMPLATE_COMMAND_R; |
166 | 0 | } else if (tmpl_contains("<|start_header_id|>") && tmpl_contains("<|end_header_id|>")) { |
167 | 0 | return LLM_CHAT_TEMPLATE_LLAMA_3; |
168 | 0 | } else if (tmpl_contains("[gMASK]sop")) { |
169 | | // chatglm3-6b |
170 | 0 | return LLM_CHAT_TEMPLATE_CHATGLM_3; |
171 | 0 | } else if (tmpl_contains(LU8("<用户>"))) { |
172 | | // MiniCPM-3B-OpenHermes-2.5-v2-GGUF |
173 | 0 | return LLM_CHAT_TEMPLATE_MINICPM; |
174 | 0 | } else if (tmpl_contains("'Assistant: ' + message['content'] + eos_token")) { |
175 | 0 | return LLM_CHAT_TEMPLATE_DEEPSEEK_2; |
176 | 0 | } else if (tmpl_contains(LU8("<|Assistant|>")) && tmpl_contains(LU8("<|User|>")) && tmpl_contains(LU8("<|end▁of▁sentence|>"))) { |
177 | 0 | return LLM_CHAT_TEMPLATE_DEEPSEEK_3; |
178 | 0 | } else if (tmpl_contains("[|system|]") && tmpl_contains("[|assistant|]") && tmpl_contains("[|endofturn|]")) { |
179 | 0 | if (tmpl_contains("[|tool|]")) { |
180 | 0 | return LLM_CHAT_TEMPLATE_EXAONE_4; |
181 | 0 | } |
182 | | // ref: https://huggingface.co/LGAI-EXAONE/EXAONE-3.0-7.8B-Instruct/discussions/8#66bae61b1893d14ee8ed85bb |
183 | | // EXAONE-3.0-7.8B-Instruct |
184 | 0 | return LLM_CHAT_TEMPLATE_EXAONE_3; |
185 | 0 | } else if (tmpl_contains("rwkv-world") || tmpl_contains("{{- 'User: ' + message['content']|trim + '\\n\\n' -}}")) { |
186 | 0 | return LLM_CHAT_TEMPLATE_RWKV_WORLD; |
187 | 0 | } else if (tmpl_contains("<|start_of_role|>")) { |
188 | 0 | return LLM_CHAT_TEMPLATE_GRANITE; |
189 | 0 | } else if (tmpl_contains("message['role'] + additional_special_tokens[0] + message['content'] + additional_special_tokens[1]")) { |
190 | 0 | return LLM_CHAT_TEMPLATE_GIGACHAT; |
191 | 0 | } else if (tmpl_contains("<|role_start|>")) { |
192 | 0 | return LLM_CHAT_TEMPLATE_MEGREZ; |
193 | 0 | } else if (tmpl_contains(" Ассистент:")) { |
194 | 0 | return LLM_CHAT_TEMPLATE_YANDEX; |
195 | 0 | } else if (tmpl_contains("<role>ASSISTANT</role>") && tmpl_contains("'HUMAN'")) { |
196 | 0 | return LLM_CHAT_TEMPLATE_BAILING; |
197 | 0 | } else if (tmpl_contains("<role>ASSISTANT</role>") && tmpl_contains("\"HUMAN\"") && tmpl_contains("<think>")) { |
198 | 0 | return LLM_CHAT_TEMPLATE_BAILING_THINK; |
199 | 0 | } else if (tmpl_contains("<role>ASSISTANT</role>") && tmpl_contains("<role>HUMAN</role>") && tmpl_contains("<|role_end|>")) { |
200 | 0 | return LLM_CHAT_TEMPLATE_BAILING2; |
201 | 0 | } else if (tmpl_contains("<|header_start|>") && tmpl_contains("<|header_end|>")) { |
202 | 0 | return LLM_CHAT_TEMPLATE_LLAMA4; |
203 | 0 | } else if (tmpl_contains("<|endofuserprompt|>")) { |
204 | 0 | return LLM_CHAT_TEMPLATE_DOTS1; |
205 | 0 | } else if (tmpl_contains("<|extra_0|>") && tmpl_contains("<|extra_4|>")) { |
206 | 0 | return LLM_CHAT_TEMPLATE_HUNYUAN_MOE; |
207 | 0 | } else if (tmpl_contains("<|start|>") && tmpl_contains("<|channel|>")) { |
208 | 0 | return LLM_CHAT_TEMPLATE_OPENAI_MOE; |
209 | 0 | } else if (tmpl_contains("<|hy_Assistant|>") && tmpl_contains("<|hy_place▁holder▁no▁3|>")) { |
210 | 0 | return LLM_CHAT_TEMPLATE_HUNYUAN_DENSE; |
211 | 0 | } else if (tmpl_contains("<|im_assistant|>assistant<|im_middle|>")) { |
212 | 0 | return LLM_CHAT_TEMPLATE_KIMI_K2; |
213 | 0 | } else if (tmpl_contains("<seed:bos>")) { |
214 | 0 | return LLM_CHAT_TEMPLATE_SEED_OSS; |
215 | 0 | } else if (tmpl_contains("'Assistant: ' + message['content'] + '<|separator|>")) { |
216 | 0 | return LLM_CHAT_TEMPLATE_GROK_2; |
217 | 0 | } else if (tmpl_contains(LU8("[unused9]系统:[unused10]"))) { |
218 | 0 | return LLM_CHAT_TEMPLATE_PANGU_EMBED; |
219 | 0 | } |
220 | 0 | return LLM_CHAT_TEMPLATE_UNKNOWN; |
221 | 0 | } |
222 | | |
223 | | // Simple version of "llama_apply_chat_template" that only works with strings |
224 | | // This function uses heuristic checks to determine commonly used template. It is not a jinja parser. |
225 | | int32_t llm_chat_apply_template( |
226 | | llm_chat_template tmpl, |
227 | | const std::vector<const llama_chat_message *> & chat, |
228 | 0 | std::string & dest, bool add_ass) { |
229 | | // Taken from the research: https://github.com/ggerganov/llama.cpp/issues/5527 |
230 | 0 | std::stringstream ss; |
231 | 0 | if (tmpl == LLM_CHAT_TEMPLATE_CHATML) { |
232 | | // chatml template |
233 | 0 | for (auto message : chat) { |
234 | 0 | ss << "<|im_start|>" << message->role << "\n" << message->content << "<|im_end|>\n"; |
235 | 0 | } |
236 | 0 | if (add_ass) { |
237 | 0 | ss << "<|im_start|>assistant\n"; |
238 | 0 | } |
239 | 0 | } else if (tmpl == LLM_CHAT_TEMPLATE_MISTRAL_V7 || tmpl == LLM_CHAT_TEMPLATE_MISTRAL_V7_TEKKEN) { |
240 | | // Official mistral 'v7' template |
241 | | // See: https://huggingface.co/mistralai/Mistral-Large-Instruct-2411#basic-instruct-template-v7 |
242 | | // https://huggingface.co/mistralai/Mistral-Small-3.1-24B-Instruct-2503#basic-instruct-template-v7-tekken |
243 | 0 | const char * trailing_space = tmpl == LLM_CHAT_TEMPLATE_MISTRAL_V7 ? " " : ""; |
244 | 0 | for (auto message : chat) { |
245 | 0 | std::string role(message->role); |
246 | 0 | std::string content(message->content); |
247 | 0 | if (role == "system") { |
248 | 0 | ss << "[SYSTEM_PROMPT]" << trailing_space << content << "[/SYSTEM_PROMPT]"; |
249 | 0 | } else if (role == "user") { |
250 | 0 | ss << "[INST]" << trailing_space << content << "[/INST]"; |
251 | 0 | } else { |
252 | 0 | ss << trailing_space << content << "</s>"; |
253 | 0 | } |
254 | 0 | } |
255 | 0 | } else if (tmpl == LLM_CHAT_TEMPLATE_MISTRAL_V1 |
256 | 0 | || tmpl == LLM_CHAT_TEMPLATE_MISTRAL_V3 |
257 | 0 | || tmpl == LLM_CHAT_TEMPLATE_MISTRAL_V3_TEKKEN) { |
258 | | // See: https://github.com/mistralai/cookbook/blob/main/concept-deep-dive/tokenization/chat_templates.md |
259 | | // See: https://github.com/mistralai/cookbook/blob/main/concept-deep-dive/tokenization/templates.md |
260 | 0 | std::string leading_space = tmpl == LLM_CHAT_TEMPLATE_MISTRAL_V1 ? " " : ""; |
261 | 0 | std::string trailing_space = tmpl == LLM_CHAT_TEMPLATE_MISTRAL_V3_TEKKEN ? "" : " "; |
262 | 0 | bool trim_assistant_message = tmpl == LLM_CHAT_TEMPLATE_MISTRAL_V3; |
263 | 0 | bool is_inside_turn = false; |
264 | 0 | for (auto message : chat) { |
265 | 0 | if (!is_inside_turn) { |
266 | 0 | ss << leading_space << "[INST]" << trailing_space; |
267 | 0 | is_inside_turn = true; |
268 | 0 | } |
269 | 0 | std::string role(message->role); |
270 | 0 | std::string content(message->content); |
271 | 0 | if (role == "system") { |
272 | 0 | ss << content << "\n\n"; |
273 | 0 | } else if (role == "user") { |
274 | 0 | ss << content << leading_space << "[/INST]"; |
275 | 0 | } else { |
276 | 0 | ss << trailing_space << (trim_assistant_message ? trim(content) : content) << "</s>"; |
277 | 0 | is_inside_turn = false; |
278 | 0 | } |
279 | 0 | } |
280 | 0 | } else if ( |
281 | 0 | tmpl == LLM_CHAT_TEMPLATE_LLAMA_2 |
282 | 0 | || tmpl == LLM_CHAT_TEMPLATE_LLAMA_2_SYS |
283 | 0 | || tmpl == LLM_CHAT_TEMPLATE_LLAMA_2_SYS_BOS |
284 | 0 | || tmpl == LLM_CHAT_TEMPLATE_LLAMA_2_SYS_STRIP) { |
285 | | // llama2 template and its variants |
286 | | // [variant] support system message |
287 | | // See: https://huggingface.co/blog/llama2#how-to-prompt-llama-2 |
288 | 0 | bool support_system_message = tmpl != LLM_CHAT_TEMPLATE_LLAMA_2; |
289 | | // [variant] add BOS inside history |
290 | 0 | bool add_bos_inside_history = tmpl == LLM_CHAT_TEMPLATE_LLAMA_2_SYS_BOS; |
291 | | // [variant] trim spaces from the input message |
292 | 0 | bool strip_message = tmpl == LLM_CHAT_TEMPLATE_LLAMA_2_SYS_STRIP; |
293 | | // construct the prompt |
294 | 0 | bool is_inside_turn = true; // skip BOS at the beginning |
295 | 0 | ss << "[INST] "; |
296 | 0 | for (auto message : chat) { |
297 | 0 | std::string content = strip_message ? trim(message->content) : message->content; |
298 | 0 | std::string role(message->role); |
299 | 0 | if (!is_inside_turn) { |
300 | 0 | is_inside_turn = true; |
301 | 0 | ss << (add_bos_inside_history ? "<s>[INST] " : "[INST] "); |
302 | 0 | } |
303 | 0 | if (role == "system") { |
304 | 0 | if (support_system_message) { |
305 | 0 | ss << "<<SYS>>\n" << content << "\n<</SYS>>\n\n"; |
306 | 0 | } else { |
307 | | // if the model does not support system message, we still include it in the first message, but without <<SYS>> |
308 | 0 | ss << content << "\n"; |
309 | 0 | } |
310 | 0 | } else if (role == "user") { |
311 | 0 | ss << content << " [/INST]"; |
312 | 0 | } else { |
313 | 0 | ss << content << "</s>"; |
314 | 0 | is_inside_turn = false; |
315 | 0 | } |
316 | 0 | } |
317 | 0 | } else if (tmpl == LLM_CHAT_TEMPLATE_PHI_3) { |
318 | | // Phi 3 |
319 | 0 | for (auto message : chat) { |
320 | 0 | std::string role(message->role); |
321 | 0 | ss << "<|" << role << "|>\n" << message->content << "<|end|>\n"; |
322 | 0 | } |
323 | 0 | if (add_ass) { |
324 | 0 | ss << "<|assistant|>\n"; |
325 | 0 | } |
326 | 0 | } else if (tmpl == LLM_CHAT_TEMPLATE_PHI_4) { |
327 | | // chatml template |
328 | 0 | for (auto message : chat) { |
329 | 0 | ss << "<|im_start|>" << message->role << "<|im_sep|>" << message->content << "<|im_end|>"; |
330 | 0 | } |
331 | 0 | if (add_ass) { |
332 | 0 | ss << "<|im_start|>assistant<|im_sep|>"; |
333 | 0 | } |
334 | 0 | } else if (tmpl == LLM_CHAT_TEMPLATE_FALCON_3) { |
335 | | // Falcon 3 |
336 | 0 | for (auto message : chat) { |
337 | 0 | std::string role(message->role); |
338 | 0 | ss << "<|" << role << "|>\n" << message->content << "\n"; |
339 | 0 | } |
340 | 0 | if (add_ass) { |
341 | 0 | ss << "<|assistant|>\n"; |
342 | 0 | } |
343 | 0 | } else if (tmpl == LLM_CHAT_TEMPLATE_ZEPHYR) { |
344 | | // zephyr template |
345 | 0 | for (auto message : chat) { |
346 | 0 | ss << "<|" << message->role << "|>" << "\n" << message->content << "<|endoftext|>\n"; |
347 | 0 | } |
348 | 0 | if (add_ass) { |
349 | 0 | ss << "<|assistant|>\n"; |
350 | 0 | } |
351 | 0 | } else if (tmpl == LLM_CHAT_TEMPLATE_MONARCH) { |
352 | | // mlabonne/AlphaMonarch-7B template (the <s> is included inside history) |
353 | 0 | for (auto message : chat) { |
354 | 0 | std::string bos = (message == chat.front()) ? "" : "<s>"; // skip BOS for first message |
355 | 0 | ss << bos << message->role << "\n" << message->content << "</s>\n"; |
356 | 0 | } |
357 | 0 | if (add_ass) { |
358 | 0 | ss << "<s>assistant\n"; |
359 | 0 | } |
360 | 0 | } else if (tmpl == LLM_CHAT_TEMPLATE_GEMMA) { |
361 | | // google/gemma-7b-it |
362 | 0 | std::string system_prompt = ""; |
363 | 0 | for (auto message : chat) { |
364 | 0 | std::string role(message->role); |
365 | 0 | if (role == "system") { |
366 | | // there is no system message for gemma, but we will merge it with user prompt, so nothing is broken |
367 | 0 | system_prompt += trim(message->content); |
368 | 0 | continue; |
369 | 0 | } |
370 | | // in gemma, "assistant" is "model" |
371 | 0 | role = role == "assistant" ? "model" : message->role; |
372 | 0 | ss << "<start_of_turn>" << role << "\n"; |
373 | 0 | if (!system_prompt.empty() && role != "model") { |
374 | 0 | ss << system_prompt << "\n\n"; |
375 | 0 | system_prompt = ""; |
376 | 0 | } |
377 | 0 | ss << trim(message->content) << "<end_of_turn>\n"; |
378 | 0 | } |
379 | 0 | if (add_ass) { |
380 | 0 | ss << "<start_of_turn>model\n"; |
381 | 0 | } |
382 | 0 | } else if (tmpl == LLM_CHAT_TEMPLATE_ORION) { |
383 | | // OrionStarAI/Orion-14B-Chat |
384 | 0 | std::string system_prompt = ""; |
385 | 0 | for (auto message : chat) { |
386 | 0 | std::string role(message->role); |
387 | 0 | if (role == "system") { |
388 | | // there is no system message support, we will merge it with user prompt |
389 | 0 | system_prompt += message->content; |
390 | 0 | continue; |
391 | 0 | } else if (role == "user") { |
392 | 0 | ss << "Human: "; |
393 | 0 | if (!system_prompt.empty()) { |
394 | 0 | ss << system_prompt << "\n\n"; |
395 | 0 | system_prompt = ""; |
396 | 0 | } |
397 | 0 | ss << message->content << "\n\nAssistant: </s>"; |
398 | 0 | } else { |
399 | 0 | ss << message->content << "</s>"; |
400 | 0 | } |
401 | 0 | } |
402 | 0 | } else if (tmpl == LLM_CHAT_TEMPLATE_OPENCHAT) { |
403 | | // openchat/openchat-3.5-0106, |
404 | 0 | for (auto message : chat) { |
405 | 0 | std::string role(message->role); |
406 | 0 | if (role == "system") { |
407 | 0 | ss << message->content << "<|end_of_turn|>"; |
408 | 0 | } else { |
409 | 0 | role[0] = toupper(role[0]); |
410 | 0 | ss << "GPT4 Correct " << role << ": " << message->content << "<|end_of_turn|>"; |
411 | 0 | } |
412 | 0 | } |
413 | 0 | if (add_ass) { |
414 | 0 | ss << "GPT4 Correct Assistant:"; |
415 | 0 | } |
416 | 0 | } else if (tmpl == LLM_CHAT_TEMPLATE_VICUNA || tmpl == LLM_CHAT_TEMPLATE_VICUNA_ORCA) { |
417 | | // eachadea/vicuna-13b-1.1 (and Orca variant) |
418 | 0 | for (auto message : chat) { |
419 | 0 | std::string role(message->role); |
420 | 0 | if (role == "system") { |
421 | | // Orca-Vicuna variant uses a system prefix |
422 | 0 | if (tmpl == LLM_CHAT_TEMPLATE_VICUNA_ORCA) { |
423 | 0 | ss << "SYSTEM: " << message->content << "\n"; |
424 | 0 | } else { |
425 | 0 | ss << message->content << "\n\n"; |
426 | 0 | } |
427 | 0 | } else if (role == "user") { |
428 | 0 | ss << "USER: " << message->content << "\n"; |
429 | 0 | } else if (role == "assistant") { |
430 | 0 | ss << "ASSISTANT: " << message->content << "</s>\n"; |
431 | 0 | } |
432 | 0 | } |
433 | 0 | if (add_ass) { |
434 | 0 | ss << "ASSISTANT:"; |
435 | 0 | } |
436 | 0 | } else if (tmpl == LLM_CHAT_TEMPLATE_DEEPSEEK) { |
437 | | // deepseek-ai/deepseek-coder-33b-instruct |
438 | 0 | for (auto message : chat) { |
439 | 0 | std::string role(message->role); |
440 | 0 | if (role == "system") { |
441 | 0 | ss << message->content; |
442 | 0 | } else if (role == "user") { |
443 | 0 | ss << "### Instruction:\n" << message->content << "\n"; |
444 | 0 | } else if (role == "assistant") { |
445 | 0 | ss << "### Response:\n" << message->content << "\n<|EOT|>\n"; |
446 | 0 | } |
447 | 0 | } |
448 | 0 | if (add_ass) { |
449 | 0 | ss << "### Response:\n"; |
450 | 0 | } |
451 | 0 | } else if (tmpl == LLM_CHAT_TEMPLATE_COMMAND_R) { |
452 | | // CohereForAI/c4ai-command-r-plus |
453 | 0 | for (auto message : chat) { |
454 | 0 | std::string role(message->role); |
455 | 0 | if (role == "system") { |
456 | 0 | ss << "<|START_OF_TURN_TOKEN|><|SYSTEM_TOKEN|>" << trim(message->content) << "<|END_OF_TURN_TOKEN|>"; |
457 | 0 | } else if (role == "user") { |
458 | 0 | ss << "<|START_OF_TURN_TOKEN|><|USER_TOKEN|>" << trim(message->content) << "<|END_OF_TURN_TOKEN|>"; |
459 | 0 | } else if (role == "assistant") { |
460 | 0 | ss << "<|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>" << trim(message->content) << "<|END_OF_TURN_TOKEN|>"; |
461 | 0 | } |
462 | 0 | } |
463 | 0 | if (add_ass) { |
464 | 0 | ss << "<|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>"; |
465 | 0 | } |
466 | 0 | } else if (tmpl == LLM_CHAT_TEMPLATE_LLAMA_3) { |
467 | | // Llama 3 |
468 | 0 | for (auto message : chat) { |
469 | 0 | std::string role(message->role); |
470 | 0 | ss << "<|start_header_id|>" << role << "<|end_header_id|>\n\n" << trim(message->content) << "<|eot_id|>"; |
471 | 0 | } |
472 | 0 | if (add_ass) { |
473 | 0 | ss << "<|start_header_id|>assistant<|end_header_id|>\n\n"; |
474 | 0 | } |
475 | 0 | } else if (tmpl == LLM_CHAT_TEMPLATE_CHATGLM_3) { |
476 | | // chatglm3-6b |
477 | 0 | ss << "[gMASK]" << "sop"; |
478 | 0 | for (auto message : chat) { |
479 | 0 | std::string role(message->role); |
480 | 0 | ss << "<|" << role << "|>" << "\n " << message->content; |
481 | 0 | } |
482 | 0 | if (add_ass) { |
483 | 0 | ss << "<|assistant|>"; |
484 | 0 | } |
485 | 0 | } else if (tmpl == LLM_CHAT_TEMPLATE_CHATGLM_4) { |
486 | 0 | ss << "[gMASK]" << "<sop>"; |
487 | 0 | for (auto message : chat) { |
488 | 0 | std::string role(message->role); |
489 | 0 | ss << "<|" << role << "|>" << "\n" << message->content; |
490 | 0 | } |
491 | 0 | if (add_ass) { |
492 | 0 | ss << "<|assistant|>\n"; |
493 | 0 | } |
494 | 0 | } else if (tmpl == LLM_CHAT_TEMPLATE_GLMEDGE) { |
495 | 0 | for (auto message : chat) { |
496 | 0 | std::string role(message->role); |
497 | 0 | ss << "<|" << role << "|>" << "\n" << message->content; |
498 | 0 | } |
499 | 0 | if (add_ass) { |
500 | 0 | ss << "<|assistant|>"; |
501 | 0 | } |
502 | 0 | } else if (tmpl == LLM_CHAT_TEMPLATE_MINICPM) { |
503 | | // MiniCPM-3B-OpenHermes-2.5-v2-GGUF |
504 | 0 | for (auto message : chat) { |
505 | 0 | std::string role(message->role); |
506 | 0 | if (role == "user") { |
507 | 0 | ss << LU8("<用户>"); |
508 | 0 | ss << trim(message->content); |
509 | 0 | ss << "<AI>"; |
510 | 0 | } else { |
511 | 0 | ss << trim(message->content); |
512 | 0 | } |
513 | 0 | } |
514 | 0 | } else if (tmpl == LLM_CHAT_TEMPLATE_DEEPSEEK_2) { |
515 | | // DeepSeek-V2 |
516 | 0 | for (auto message : chat) { |
517 | 0 | std::string role(message->role); |
518 | 0 | if (role == "system") { |
519 | 0 | ss << message->content << "\n\n"; |
520 | 0 | } else if (role == "user") { |
521 | 0 | ss << "User: " << message->content << "\n\n"; |
522 | 0 | } else if (role == "assistant") { |
523 | 0 | ss << "Assistant: " << message->content << LU8("<|end▁of▁sentence|>"); |
524 | 0 | } |
525 | 0 | } |
526 | 0 | if (add_ass) { |
527 | 0 | ss << "Assistant:"; |
528 | 0 | } |
529 | 0 | } else if (tmpl == LLM_CHAT_TEMPLATE_DEEPSEEK_3) { |
530 | | // DeepSeek-V3 |
531 | 0 | for (auto message : chat) { |
532 | 0 | std::string role(message->role); |
533 | 0 | if (role == "system") { |
534 | 0 | ss << message->content << "\n\n"; |
535 | 0 | } else if (role == "user") { |
536 | 0 | ss << LU8("<|User|>") << message->content; |
537 | 0 | } else if (role == "assistant") { |
538 | 0 | ss << LU8("<|Assistant|>") << message->content << LU8("<|end▁of▁sentence|>"); |
539 | 0 | } |
540 | 0 | } |
541 | 0 | if (add_ass) { |
542 | 0 | ss << LU8("<|Assistant|>"); |
543 | 0 | } |
544 | 0 | } else if (tmpl == LLM_CHAT_TEMPLATE_EXAONE_3) { |
545 | | // ref: https://huggingface.co/LGAI-EXAONE/EXAONE-3.0-7.8B-Instruct/discussions/8#66bae61b1893d14ee8ed85bb |
546 | | // EXAONE-3.0-7.8B-Instruct |
547 | 0 | for (auto message : chat) { |
548 | 0 | std::string role(message->role); |
549 | 0 | if (role == "system") { |
550 | 0 | ss << "[|system|]" << trim(message->content) << "[|endofturn|]\n"; |
551 | 0 | } else if (role == "user") { |
552 | 0 | ss << "[|user|]" << trim(message->content) << "\n"; |
553 | 0 | } else if (role == "assistant") { |
554 | 0 | ss << "[|assistant|]" << trim(message->content) << "[|endofturn|]\n"; |
555 | 0 | } |
556 | 0 | } |
557 | 0 | if (add_ass) { |
558 | 0 | ss << "[|assistant|]"; |
559 | 0 | } |
560 | 0 | } else if (tmpl == LLM_CHAT_TEMPLATE_EXAONE_4) { |
561 | 0 | for (auto message : chat) { |
562 | 0 | std::string role(message->role); |
563 | 0 | if (role == "system") { |
564 | 0 | ss << "[|system|]" << trim(message->content) << "[|endofturn|]\n"; |
565 | 0 | } else if (role == "user") { |
566 | 0 | ss << "[|user|]" << trim(message->content) << "\n"; |
567 | 0 | } else if (role == "assistant") { |
568 | 0 | ss << "[|assistant|]" << trim(message->content) << "[|endofturn|]\n"; |
569 | 0 | } else if (role == "tool") { |
570 | 0 | ss << "[|tool|]" << trim(message->content) << "[|endofturn|]\n"; |
571 | 0 | } |
572 | 0 | } |
573 | 0 | if (add_ass) { |
574 | 0 | ss << "[|assistant|]"; |
575 | 0 | } |
576 | 0 | } else if (tmpl == LLM_CHAT_TEMPLATE_RWKV_WORLD) { |
577 | | // this template requires the model to have "\n\n" as EOT token |
578 | 0 | for (size_t i = 0; i < chat.size(); i++) { |
579 | 0 | std::string role(chat[i]->role); |
580 | 0 | if (role == "system") { |
581 | 0 | ss << "System: " << trim(chat[i]->content) << "\n\n"; |
582 | 0 | } else if (role == "user") { |
583 | 0 | ss << "User: " << trim(chat[i]->content) << "\n\n"; |
584 | 0 | if (i == chat.size() - 1) { |
585 | 0 | ss << "Assistant:"; |
586 | 0 | } |
587 | 0 | } else if (role == "assistant") { |
588 | 0 | ss << "Assistant: " << trim(chat[i]->content) << "\n\n"; |
589 | 0 | } |
590 | 0 | } |
591 | 0 | } else if (tmpl == LLM_CHAT_TEMPLATE_GRANITE) { |
592 | | // IBM Granite template |
593 | 0 | for (const auto & message : chat) { |
594 | 0 | std::string role(message->role); |
595 | 0 | ss << "<|start_of_role|>" << role << "<|end_of_role|>"; |
596 | 0 | if (role == "assistant_tool_call") { |
597 | 0 | ss << "<|tool_call|>"; |
598 | 0 | } |
599 | 0 | ss << message->content << "<|end_of_text|>\n"; |
600 | 0 | } |
601 | 0 | if (add_ass) { |
602 | 0 | ss << "<|start_of_role|>assistant<|end_of_role|>"; |
603 | 0 | } |
604 | 0 | } else if (tmpl == LLM_CHAT_TEMPLATE_GIGACHAT) { |
605 | | // GigaChat template |
606 | 0 | bool has_system = !chat.empty() && std::string(chat[0]->role) == "system"; |
607 | | |
608 | | // Handle system message if present |
609 | 0 | if (has_system) { |
610 | 0 | ss << "<s>" << chat[0]->content << "<|message_sep|>"; |
611 | 0 | } else { |
612 | 0 | ss << "<s>"; |
613 | 0 | } |
614 | | |
615 | | // Process remaining messages |
616 | 0 | for (size_t i = has_system ? 1 : 0; i < chat.size(); i++) { |
617 | 0 | std::string role(chat[i]->role); |
618 | 0 | if (role == "user") { |
619 | 0 | ss << "user<|role_sep|>" << chat[i]->content << "<|message_sep|>" |
620 | 0 | << "available functions<|role_sep|>[]<|message_sep|>"; |
621 | 0 | } else if (role == "assistant") { |
622 | 0 | ss << "assistant<|role_sep|>" << chat[i]->content << "<|message_sep|>"; |
623 | 0 | } |
624 | 0 | } |
625 | | |
626 | | // Add generation prompt if needed |
627 | 0 | if (add_ass) { |
628 | 0 | ss << "assistant<|role_sep|>"; |
629 | 0 | } |
630 | 0 | } else if (tmpl == LLM_CHAT_TEMPLATE_MEGREZ) { |
631 | | // Megrez template |
632 | 0 | for (auto message : chat) { |
633 | 0 | std::string role(message->role); |
634 | 0 | ss << "<|role_start|>" << role << "<|role_end|>" << message->content << "<|turn_end|>"; |
635 | 0 | } |
636 | |
|
637 | 0 | if (add_ass) { |
638 | 0 | ss << "<|role_start|>assistant<|role_end|>"; |
639 | 0 | } |
640 | 0 | } else if (tmpl == LLM_CHAT_TEMPLATE_YANDEX) { |
641 | | // Yandex template ("\n\n" is defined as EOT token) |
642 | |
|
643 | 0 | for (size_t i = 0; i < chat.size(); i++) { |
644 | 0 | std::string role(chat[i]->role); |
645 | 0 | if (role == "user") { |
646 | 0 | ss << " Пользователь: " << chat[i]->content << "\n\n"; |
647 | 0 | } else if (role == "assistant") { |
648 | 0 | ss << " Ассистент: " << chat[i]->content << "\n\n"; |
649 | 0 | } |
650 | 0 | } |
651 | | |
652 | | // Add generation prompt if needed |
653 | 0 | if (add_ass) { |
654 | 0 | ss << " Ассистент:[SEP]"; |
655 | 0 | } |
656 | 0 | } else if (tmpl == LLM_CHAT_TEMPLATE_BAILING || tmpl == LLM_CHAT_TEMPLATE_BAILING_THINK) { |
657 | | // Bailing (Ling/Ring) template |
658 | 0 | for (auto message : chat) { |
659 | 0 | std::string role(message->role); |
660 | |
|
661 | 0 | if (role == "user") { |
662 | 0 | role = "HUMAN"; |
663 | 0 | } else { |
664 | 0 | std::transform(role.begin(), role.end(), role.begin(), ::toupper); |
665 | 0 | } |
666 | |
|
667 | 0 | ss << "<role>" << role << "</role>" << message->content; |
668 | 0 | } |
669 | |
|
670 | 0 | if (add_ass) { |
671 | 0 | ss << "<role>ASSISTANT</role>"; |
672 | |
|
673 | 0 | if (tmpl == LLM_CHAT_TEMPLATE_BAILING_THINK) { |
674 | 0 | ss << "<think>"; |
675 | 0 | } |
676 | 0 | } |
677 | 0 | } else if (tmpl == LLM_CHAT_TEMPLATE_BAILING2) { |
678 | | // Bailing2 (Ling 2.0) template |
679 | 0 | bool has_system = !chat.empty() && std::string(chat[0]->role) == "system"; |
680 | |
|
681 | 0 | if (!has_system) { |
682 | 0 | ss << "<role>SYSTEM</role>detailed thinking off<|role_end|>"; |
683 | 0 | } |
684 | |
|
685 | 0 | for (auto message : chat) { |
686 | 0 | std::string role(message->role); |
687 | |
|
688 | 0 | if (role == "user") { |
689 | 0 | role = "HUMAN"; |
690 | 0 | } else { |
691 | 0 | std::transform(role.begin(), role.end(), role.begin(), ::toupper); |
692 | 0 | } |
693 | |
|
694 | 0 | ss << "<role>" << role << "</role>" << message->content << "<|role_end|>"; |
695 | 0 | } |
696 | |
|
697 | 0 | if (add_ass) { |
698 | 0 | ss << "<role>ASSISTANT</role>"; |
699 | 0 | } |
700 | 0 | } else if (tmpl == LLM_CHAT_TEMPLATE_LLAMA4) { |
701 | | // Llama 4 |
702 | 0 | for (auto message : chat) { |
703 | 0 | std::string role(message->role); |
704 | 0 | ss << "<|header_start|>" << role << "<|header_end|>\n\n" << trim(message->content) << "<|eot|>"; |
705 | 0 | } |
706 | 0 | if (add_ass) { |
707 | 0 | ss << "<|header_start|>assistant<|header_end|>\n\n"; |
708 | 0 | } |
709 | 0 | } else if (tmpl == LLM_CHAT_TEMPLATE_SMOLVLM) { |
710 | | // SmolVLM |
711 | 0 | ss << "<|im_start|>"; // uses <|im_start|> as BOS, but the actual content is NOT chatml |
712 | 0 | for (auto message : chat) { |
713 | 0 | std::string role(message->role); |
714 | 0 | if (role == "system") { |
715 | 0 | ss << message->content << "\n\n"; |
716 | 0 | } else if (role == "user") { |
717 | 0 | ss << "User: " << message->content << "<end_of_utterance>\n"; |
718 | 0 | } else { |
719 | 0 | ss << "Assistant: " << message->content << "<end_of_utterance>\n"; |
720 | 0 | } |
721 | 0 | } |
722 | 0 | if (add_ass) { |
723 | 0 | ss << "Assistant:"; |
724 | 0 | } |
725 | 0 | } else if (tmpl == LLM_CHAT_TEMPLATE_DOTS1) { |
726 | | // dots.llm1.inst (DOTS1) |
727 | 0 | for (auto message : chat) { |
728 | 0 | std::string role(message->role); |
729 | 0 | if (role == "system") { |
730 | 0 | ss << "<|system|>" << message->content << "<|endofsystem|>"; |
731 | 0 | } else if (role == "user") { |
732 | 0 | ss << "<|userprompt|>" << message->content << "<|endofuserprompt|>"; |
733 | 0 | } else { |
734 | 0 | ss << "<|response|>" << message->content << "<|endofresponse|>"; |
735 | 0 | } |
736 | 0 | } |
737 | 0 | if (add_ass) { |
738 | 0 | ss << "<|response|>"; |
739 | 0 | } |
740 | 0 | } else if (tmpl == LLM_CHAT_TEMPLATE_HUNYUAN_MOE) { |
741 | | // tencent/Hunyuan-A13B-Instruct |
742 | 0 | for (auto message : chat) { |
743 | 0 | std::string role(message->role); |
744 | 0 | if (role == "system") { |
745 | 0 | ss << "<|startoftext|>" << message->content << "<|extra_4|>"; |
746 | 0 | } else if (role == "assistant") { |
747 | 0 | ss << message->content << "<|eos|>"; |
748 | 0 | } else { |
749 | 0 | ss << "<|startoftext|>" << message->content << "<|extra_0|>"; |
750 | 0 | } |
751 | 0 | } |
752 | 0 | } else if (tmpl == LLM_CHAT_TEMPLATE_OPENAI_MOE) { |
753 | | // OpenAI MoE (based on Harmony chat template) |
754 | 0 | for (auto message : chat) { |
755 | 0 | std::string role(message->role); |
756 | 0 | ss << "<|start|>" << role << "<|message|>" << message->content; |
757 | 0 | ss << (role == "assistant" ? "<|return|>" : "<|end|>"); |
758 | 0 | } |
759 | 0 | if (add_ass) { |
760 | 0 | ss << "<|start|>assistant"; |
761 | 0 | } |
762 | 0 | } else if (tmpl == LLM_CHAT_TEMPLATE_HUNYUAN_DENSE) { |
763 | | // tencent/Hunyuan-4B-Instruct |
764 | 0 | for (size_t i = 0; i < chat.size(); i++) { |
765 | 0 | std::string role(chat[i]->role); |
766 | 0 | if (i == 0) { |
767 | 0 | if (role == "system") { |
768 | 0 | ss << chat[i]->content << "<|hy_place▁holder▁no▁3|>"; |
769 | 0 | } |
770 | 0 | } |
771 | |
|
772 | 0 | if (role == "assistant") { |
773 | 0 | ss << "<|hy_Assistant|>" << chat[i]->content << "<|hy_place▁holder▁no▁2|>"; |
774 | 0 | } else if (role == "user") { |
775 | 0 | ss << "<|hy_User|>" << chat[i]->content << "<|hy_Assistant|>"; |
776 | 0 | } |
777 | 0 | } |
778 | 0 | } else if (tmpl == LLM_CHAT_TEMPLATE_KIMI_K2) { |
779 | | // moonshotai/Kimi-K2-Instruct |
780 | 0 | for (auto message : chat) { |
781 | 0 | std::string role(message->role); |
782 | 0 | if (role == "system") { |
783 | 0 | ss << "<|im_system|>system<|im_middle|>"; |
784 | 0 | } else if (role == "user") { |
785 | 0 | ss << "<|im_user|>user<|im_middle|>"; |
786 | 0 | } else if (role == "assistant") { |
787 | 0 | ss << "<|im_assistant|>assistant<|im_middle|>"; |
788 | 0 | } else if (role == "tool") { |
789 | 0 | ss << "<|im_system|>tool<|im_middle|>"; |
790 | 0 | } |
791 | |
|
792 | 0 | ss << message->content << "<|im_end|>"; |
793 | 0 | } |
794 | 0 | if (add_ass) { |
795 | 0 | ss << "<|im_assistant|>assistant<|im_middle|>"; |
796 | 0 | } |
797 | 0 | } else if (tmpl == LLM_CHAT_TEMPLATE_SEED_OSS) { |
798 | 0 | for (auto message: chat) { |
799 | 0 | std::string role(message->role); |
800 | 0 | ss << "<seed:bos>" << role << "\n" << (role == "assistant" ? trim(message->content) : message->content) << "<seed:eos>"; |
801 | 0 | } |
802 | 0 | if (add_ass) { |
803 | 0 | ss << "<seed:bos>assistant\n"; |
804 | 0 | } |
805 | 0 | } else if (tmpl == LLM_CHAT_TEMPLATE_GROK_2) { |
806 | 0 | for (auto message : chat) { |
807 | 0 | std::string role(message->role); |
808 | 0 | if (role == "system") { |
809 | 0 | ss << "System: " << trim(message->content) << "<|separator|>\n\n"; |
810 | 0 | } else if (role == "user") { |
811 | 0 | ss << "Human: " << trim(message->content) << "<|separator|>\n\n"; |
812 | 0 | } else if (role == "assistant") { |
813 | 0 | ss << "Assistant: " << message->content << "<|separator|>\n\n"; |
814 | 0 | } |
815 | 0 | } |
816 | 0 | if (add_ass) { |
817 | 0 | ss << "Assistant:"; |
818 | 0 | } |
819 | 0 | }else if (tmpl == LLM_CHAT_TEMPLATE_PANGU_EMBED) { |
820 | | // [unused9]系统:xxx[unused10] |
821 | | // [unused9]用户:xxx[unused10] |
822 | | // [unused9]助手:xxx[unused10] |
823 | | // ... |
824 | 0 | for (size_t i = 0; i < chat.size(); ++i) { |
825 | 0 | const auto & msg = chat[i]; |
826 | 0 | const std::string & role = msg->role; |
827 | 0 | const std::string & content = msg->content; |
828 | |
|
829 | 0 | if (i == 0 && role != "system") { |
830 | 0 | ss << "[unused9]系统:[unused10]"; |
831 | 0 | } |
832 | |
|
833 | 0 | if (role == "system") { |
834 | 0 | ss << "[unused9]系统:" << content << "[unused10]"; |
835 | 0 | } else if (role == "user") { |
836 | 0 | ss << "[unused9]用户:" << content << "[unused10]"; |
837 | 0 | } else if (role == "assistant") { |
838 | 0 | ss << "[unused9]助手:" << content << "[unused10]"; |
839 | 0 | } else if (role == "tool") { |
840 | 0 | ss << "[unused9]工具:" << content << "[unused10]"; |
841 | 0 | } else if (role == "function") { |
842 | 0 | ss << "[unused9]方法:" << content << "[unused10]"; |
843 | 0 | } |
844 | 0 | } |
845 | 0 | if (add_ass) { |
846 | 0 | ss << "[unused9]助手:"; |
847 | 0 | } |
848 | 0 | } else { |
849 | | // template not supported |
850 | 0 | return -1; |
851 | 0 | } |
852 | 0 | dest = ss.str(); |
853 | 0 | return dest.size(); |
854 | 0 | } |
855 | | |
856 | | // public interface |
857 | | |
858 | 0 | int32_t llama_chat_builtin_templates(const char ** output, size_t len) { |
859 | 0 | auto it = LLM_CHAT_TEMPLATES.begin(); |
860 | 0 | for (size_t i = 0; i < std::min(len, LLM_CHAT_TEMPLATES.size()); i++) { |
861 | 0 | output[i] = it->first.c_str(); |
862 | 0 | std::advance(it, 1); |
863 | 0 | } |
864 | 0 | return (int32_t) LLM_CHAT_TEMPLATES.size(); |
865 | 0 | } |