/src/llama.cpp/src/llama-chat.cpp
Line | Count | Source |
1 | | #include "llama-chat.h" |
2 | | |
3 | | #include "llama.h" |
4 | | |
5 | | #include <map> |
6 | | #include <sstream> |
7 | | #include <algorithm> |
8 | | |
9 | | #if __cplusplus >= 202000L |
10 | | #define LU8(x) (const char*)(u8##x) |
11 | | #else |
12 | 0 | #define LU8(x) u8##x |
13 | | #endif |
14 | | |
15 | | // trim whitespace from the beginning and end of a string |
16 | 0 | static std::string trim(const std::string & str) { |
17 | 0 | size_t start = 0; |
18 | 0 | size_t end = str.size(); |
19 | 0 | while (start < end && isspace(static_cast<unsigned char>(str[start]))) { |
20 | 0 | start += 1; |
21 | 0 | } |
22 | 0 | while (end > start && isspace(static_cast<unsigned char>(str[end - 1]))) { |
23 | 0 | end -= 1; |
24 | 0 | } |
25 | 0 | return str.substr(start, end - start); |
26 | 0 | } |
27 | | |
28 | | static const std::map<std::string, llm_chat_template> LLM_CHAT_TEMPLATES = { |
29 | | { "chatml", LLM_CHAT_TEMPLATE_CHATML }, |
30 | | { "llama2", LLM_CHAT_TEMPLATE_LLAMA_2 }, |
31 | | { "llama2-sys", LLM_CHAT_TEMPLATE_LLAMA_2_SYS }, |
32 | | { "llama2-sys-bos", LLM_CHAT_TEMPLATE_LLAMA_2_SYS_BOS }, |
33 | | { "llama2-sys-strip", LLM_CHAT_TEMPLATE_LLAMA_2_SYS_STRIP }, |
34 | | { "mistral-v1", LLM_CHAT_TEMPLATE_MISTRAL_V1 }, |
35 | | { "mistral-v3", LLM_CHAT_TEMPLATE_MISTRAL_V3 }, |
36 | | { "mistral-v3-tekken", LLM_CHAT_TEMPLATE_MISTRAL_V3_TEKKEN }, |
37 | | { "mistral-v7", LLM_CHAT_TEMPLATE_MISTRAL_V7 }, |
38 | | { "mistral-v7-tekken", LLM_CHAT_TEMPLATE_MISTRAL_V7_TEKKEN }, |
39 | | { "phi3", LLM_CHAT_TEMPLATE_PHI_3 }, |
40 | | { "phi4", LLM_CHAT_TEMPLATE_PHI_4 }, |
41 | | { "falcon3", LLM_CHAT_TEMPLATE_FALCON_3 }, |
42 | | { "zephyr", LLM_CHAT_TEMPLATE_ZEPHYR }, |
43 | | { "monarch", LLM_CHAT_TEMPLATE_MONARCH }, |
44 | | { "gemma", LLM_CHAT_TEMPLATE_GEMMA }, |
45 | | { "orion", LLM_CHAT_TEMPLATE_ORION }, |
46 | | { "openchat", LLM_CHAT_TEMPLATE_OPENCHAT }, |
47 | | { "vicuna", LLM_CHAT_TEMPLATE_VICUNA }, |
48 | | { "vicuna-orca", LLM_CHAT_TEMPLATE_VICUNA_ORCA }, |
49 | | { "deepseek", LLM_CHAT_TEMPLATE_DEEPSEEK }, |
50 | | { "deepseek2", LLM_CHAT_TEMPLATE_DEEPSEEK_2 }, |
51 | | { "deepseek3", LLM_CHAT_TEMPLATE_DEEPSEEK_3 }, |
52 | | { "command-r", LLM_CHAT_TEMPLATE_COMMAND_R }, |
53 | | { "llama3", LLM_CHAT_TEMPLATE_LLAMA_3 }, |
54 | | { "chatglm3", LLM_CHAT_TEMPLATE_CHATGLM_3 }, |
55 | | { "chatglm4", LLM_CHAT_TEMPLATE_CHATGLM_4 }, |
56 | | { "glmedge", LLM_CHAT_TEMPLATE_GLMEDGE }, |
57 | | { "minicpm", LLM_CHAT_TEMPLATE_MINICPM }, |
58 | | { "exaone3", LLM_CHAT_TEMPLATE_EXAONE_3 }, |
59 | | { "exaone4", LLM_CHAT_TEMPLATE_EXAONE_4 }, |
60 | | { "rwkv-world", LLM_CHAT_TEMPLATE_RWKV_WORLD }, |
61 | | { "granite", LLM_CHAT_TEMPLATE_GRANITE }, |
62 | | { "gigachat", LLM_CHAT_TEMPLATE_GIGACHAT }, |
63 | | { "megrez", LLM_CHAT_TEMPLATE_MEGREZ }, |
64 | | { "yandex", LLM_CHAT_TEMPLATE_YANDEX }, |
65 | | { "bailing", LLM_CHAT_TEMPLATE_BAILING }, |
66 | | { "bailing-think", LLM_CHAT_TEMPLATE_BAILING_THINK }, |
67 | | { "bailing2", LLM_CHAT_TEMPLATE_BAILING2 }, |
68 | | { "llama4", LLM_CHAT_TEMPLATE_LLAMA4 }, |
69 | | { "smolvlm", LLM_CHAT_TEMPLATE_SMOLVLM }, |
70 | | { "hunyuan-moe", LLM_CHAT_TEMPLATE_HUNYUAN_MOE }, |
71 | | { "gpt-oss", LLM_CHAT_TEMPLATE_OPENAI_MOE }, |
72 | | { "hunyuan-dense", LLM_CHAT_TEMPLATE_HUNYUAN_DENSE }, |
73 | | { "kimi-k2", LLM_CHAT_TEMPLATE_KIMI_K2 }, |
74 | | { "seed_oss", LLM_CHAT_TEMPLATE_SEED_OSS }, |
75 | | { "grok-2", LLM_CHAT_TEMPLATE_GROK_2 }, |
76 | | { "pangu-embedded", LLM_CHAT_TEMPLATE_PANGU_EMBED }, |
77 | | { "solar-open", LLM_CHAT_TEMPLATE_SOLAR_OPEN }, |
78 | | }; |
79 | | |
80 | 0 | llm_chat_template llm_chat_template_from_str(const std::string & name) { |
81 | 0 | return LLM_CHAT_TEMPLATES.at(name); |
82 | 0 | } |
83 | | |
84 | 0 | llm_chat_template llm_chat_detect_template(const std::string & tmpl) { |
85 | 0 | try { |
86 | 0 | return llm_chat_template_from_str(tmpl); |
87 | 0 | } catch (const std::out_of_range &) { |
88 | | // ignore |
89 | 0 | } |
90 | | |
91 | 0 | auto tmpl_contains = [&tmpl](const char * haystack) -> bool { |
92 | 0 | return tmpl.find(haystack) != std::string::npos; |
93 | 0 | }; |
94 | 0 | if (tmpl_contains("<|im_start|>")) { |
95 | 0 | return tmpl_contains("<|im_sep|>") |
96 | 0 | ? LLM_CHAT_TEMPLATE_PHI_4 |
97 | 0 | : tmpl_contains("<end_of_utterance>") |
98 | 0 | ? LLM_CHAT_TEMPLATE_SMOLVLM // SmolVLM uses <|im_start|> as BOS, but it is NOT chatml |
99 | 0 | : LLM_CHAT_TEMPLATE_CHATML; |
100 | 0 | } else if (tmpl.find("mistral") == 0 || tmpl_contains("[INST]")) { |
101 | 0 | if (tmpl_contains("[SYSTEM_PROMPT]")) { |
102 | 0 | return LLM_CHAT_TEMPLATE_MISTRAL_V7; |
103 | 0 | } else if ( |
104 | | // catches official 'v1' template |
105 | 0 | tmpl_contains("' [INST] ' + system_message") |
106 | | // catches official 'v3' and 'v3-tekken' templates |
107 | 0 | || tmpl_contains("[AVAILABLE_TOOLS]") |
108 | 0 | ) { |
109 | | // Official mistral 'v1', 'v3' and 'v3-tekken' templates |
110 | | // See: https://github.com/mistralai/cookbook/blob/main/concept-deep-dive/tokenization/chat_templates.md |
111 | | // See: https://github.com/mistralai/cookbook/blob/main/concept-deep-dive/tokenization/templates.md |
112 | 0 | if (tmpl_contains(" [INST]")) { |
113 | 0 | return LLM_CHAT_TEMPLATE_MISTRAL_V1; |
114 | 0 | } else if (tmpl_contains("\"[INST]\"")) { |
115 | 0 | return LLM_CHAT_TEMPLATE_MISTRAL_V3_TEKKEN; |
116 | 0 | } |
117 | 0 | return LLM_CHAT_TEMPLATE_MISTRAL_V3; |
118 | 0 | } else { |
119 | | // llama2 template and its variants |
120 | | // [variant] support system message |
121 | | // See: https://huggingface.co/blog/llama2#how-to-prompt-llama-2 |
122 | 0 | bool support_system_message = tmpl_contains("<<SYS>>"); |
123 | 0 | bool add_bos_inside_history = tmpl_contains("bos_token + '[INST]"); |
124 | 0 | bool strip_message = tmpl_contains("content.strip()"); |
125 | 0 | if (strip_message) { |
126 | 0 | return LLM_CHAT_TEMPLATE_LLAMA_2_SYS_STRIP; |
127 | 0 | } else if (add_bos_inside_history) { |
128 | 0 | return LLM_CHAT_TEMPLATE_LLAMA_2_SYS_BOS; |
129 | 0 | } else if (support_system_message) { |
130 | 0 | return LLM_CHAT_TEMPLATE_LLAMA_2_SYS; |
131 | 0 | } else { |
132 | 0 | return LLM_CHAT_TEMPLATE_LLAMA_2; |
133 | 0 | } |
134 | 0 | } |
135 | 0 | } else if (tmpl_contains("<|assistant|>") && tmpl_contains("<|end|>")) { |
136 | 0 | return LLM_CHAT_TEMPLATE_PHI_3; |
137 | 0 | } else if (tmpl_contains("[gMASK]<sop>")) { |
138 | 0 | return LLM_CHAT_TEMPLATE_CHATGLM_4; |
139 | 0 | } else if (tmpl_contains("<|assistant|>") && tmpl_contains("<|user|>")) { |
140 | 0 | return tmpl_contains("</s>") ? LLM_CHAT_TEMPLATE_FALCON_3 : LLM_CHAT_TEMPLATE_GLMEDGE; |
141 | 0 | } else if (tmpl_contains("<|{{ item['role'] }}|>") && tmpl_contains("<|begin_of_image|>")) { |
142 | 0 | return LLM_CHAT_TEMPLATE_GLMEDGE; |
143 | 0 | } else if (tmpl_contains("<|user|>") && tmpl_contains("<|endoftext|>")) { |
144 | 0 | return LLM_CHAT_TEMPLATE_ZEPHYR; |
145 | 0 | } else if (tmpl_contains("bos_token + message['role']")) { |
146 | 0 | return LLM_CHAT_TEMPLATE_MONARCH; |
147 | 0 | } else if (tmpl_contains("<start_of_turn>")) { |
148 | 0 | return LLM_CHAT_TEMPLATE_GEMMA; |
149 | 0 | } else if (tmpl_contains("'\\n\\nAssistant: ' + eos_token")) { |
150 | | // OrionStarAI/Orion-14B-Chat |
151 | 0 | return LLM_CHAT_TEMPLATE_ORION; |
152 | 0 | } else if (tmpl_contains("GPT4 Correct ")) { |
153 | | // openchat/openchat-3.5-0106 |
154 | 0 | return LLM_CHAT_TEMPLATE_OPENCHAT; |
155 | 0 | } else if (tmpl_contains("USER: ") && tmpl_contains("ASSISTANT: ")) { |
156 | | // eachadea/vicuna-13b-1.1 (and Orca variant) |
157 | 0 | if (tmpl_contains("SYSTEM: ")) { |
158 | 0 | return LLM_CHAT_TEMPLATE_VICUNA_ORCA; |
159 | 0 | } |
160 | 0 | return LLM_CHAT_TEMPLATE_VICUNA; |
161 | 0 | } else if (tmpl_contains("### Instruction:") && tmpl_contains("<|EOT|>")) { |
162 | | // deepseek-ai/deepseek-coder-33b-instruct |
163 | 0 | return LLM_CHAT_TEMPLATE_DEEPSEEK; |
164 | 0 | } else if (tmpl_contains("<|START_OF_TURN_TOKEN|>") && tmpl_contains("<|USER_TOKEN|>")) { |
165 | | // CohereForAI/c4ai-command-r-plus |
166 | 0 | return LLM_CHAT_TEMPLATE_COMMAND_R; |
167 | 0 | } else if (tmpl_contains("<|start_header_id|>") && tmpl_contains("<|end_header_id|>")) { |
168 | 0 | return LLM_CHAT_TEMPLATE_LLAMA_3; |
169 | 0 | } else if (tmpl_contains("[gMASK]sop")) { |
170 | | // chatglm3-6b |
171 | 0 | return LLM_CHAT_TEMPLATE_CHATGLM_3; |
172 | 0 | } else if (tmpl_contains(LU8("<用户>"))) { |
173 | | // MiniCPM-3B-OpenHermes-2.5-v2-GGUF |
174 | 0 | return LLM_CHAT_TEMPLATE_MINICPM; |
175 | 0 | } else if (tmpl_contains("'Assistant: ' + message['content'] + eos_token")) { |
176 | 0 | return LLM_CHAT_TEMPLATE_DEEPSEEK_2; |
177 | 0 | } else if (tmpl_contains(LU8("<|Assistant|>")) && tmpl_contains(LU8("<|User|>")) && tmpl_contains(LU8("<|end▁of▁sentence|>"))) { |
178 | 0 | return LLM_CHAT_TEMPLATE_DEEPSEEK_3; |
179 | 0 | } else if (tmpl_contains("[|system|]") && tmpl_contains("[|assistant|]") && tmpl_contains("[|endofturn|]")) { |
180 | 0 | if (tmpl_contains("[|tool|]")) { |
181 | 0 | return LLM_CHAT_TEMPLATE_EXAONE_4; |
182 | 0 | } |
183 | | // ref: https://huggingface.co/LGAI-EXAONE/EXAONE-3.0-7.8B-Instruct/discussions/8#66bae61b1893d14ee8ed85bb |
184 | | // EXAONE-3.0-7.8B-Instruct |
185 | 0 | return LLM_CHAT_TEMPLATE_EXAONE_3; |
186 | 0 | } else if (tmpl_contains("rwkv-world") || tmpl_contains("{{- 'User: ' + message['content']|trim + '\\n\\n' -}}")) { |
187 | 0 | return LLM_CHAT_TEMPLATE_RWKV_WORLD; |
188 | 0 | } else if (tmpl_contains("<|start_of_role|>")) { |
189 | 0 | return LLM_CHAT_TEMPLATE_GRANITE; |
190 | 0 | } else if (tmpl_contains("message['role'] + additional_special_tokens[0] + message['content'] + additional_special_tokens[1]")) { |
191 | 0 | return LLM_CHAT_TEMPLATE_GIGACHAT; |
192 | 0 | } else if (tmpl_contains("<|role_start|>")) { |
193 | 0 | return LLM_CHAT_TEMPLATE_MEGREZ; |
194 | 0 | } else if (tmpl_contains(" Ассистент:")) { |
195 | 0 | return LLM_CHAT_TEMPLATE_YANDEX; |
196 | 0 | } else if (tmpl_contains("<role>ASSISTANT</role>") && tmpl_contains("'HUMAN'")) { |
197 | 0 | return LLM_CHAT_TEMPLATE_BAILING; |
198 | 0 | } else if (tmpl_contains("<role>ASSISTANT</role>") && tmpl_contains("\"HUMAN\"") && tmpl_contains("<think>")) { |
199 | 0 | return LLM_CHAT_TEMPLATE_BAILING_THINK; |
200 | 0 | } else if (tmpl_contains("<role>ASSISTANT</role>") && tmpl_contains("<role>HUMAN</role>") && tmpl_contains("<|role_end|>")) { |
201 | 0 | return LLM_CHAT_TEMPLATE_BAILING2; |
202 | 0 | } else if (tmpl_contains("<|header_start|>") && tmpl_contains("<|header_end|>")) { |
203 | 0 | return LLM_CHAT_TEMPLATE_LLAMA4; |
204 | 0 | } else if (tmpl_contains("<|endofuserprompt|>")) { |
205 | 0 | return LLM_CHAT_TEMPLATE_DOTS1; |
206 | 0 | } else if (tmpl_contains("<|extra_0|>") && tmpl_contains("<|extra_4|>")) { |
207 | 0 | return LLM_CHAT_TEMPLATE_HUNYUAN_MOE; |
208 | 0 | } else if (tmpl_contains("<|start|>") && tmpl_contains("<|channel|>")) { |
209 | 0 | return LLM_CHAT_TEMPLATE_OPENAI_MOE; |
210 | 0 | } else if (tmpl_contains("<|hy_Assistant|>") && tmpl_contains("<|hy_place▁holder▁no▁3|>")) { |
211 | 0 | return LLM_CHAT_TEMPLATE_HUNYUAN_DENSE; |
212 | 0 | } else if (tmpl_contains("<|im_assistant|>assistant<|im_middle|>")) { |
213 | 0 | return LLM_CHAT_TEMPLATE_KIMI_K2; |
214 | 0 | } else if (tmpl_contains("<seed:bos>")) { |
215 | 0 | return LLM_CHAT_TEMPLATE_SEED_OSS; |
216 | 0 | } else if (tmpl_contains("'Assistant: ' + message['content'] + '<|separator|>")) { |
217 | 0 | return LLM_CHAT_TEMPLATE_GROK_2; |
218 | 0 | } else if (tmpl_contains(LU8("[unused9]系统:[unused10]"))) { |
219 | 0 | return LLM_CHAT_TEMPLATE_PANGU_EMBED; |
220 | 0 | } else if (tmpl_contains("<|begin|>") && tmpl_contains("<|end|>") && tmpl_contains("<|content|>")) { |
221 | 0 | return LLM_CHAT_TEMPLATE_SOLAR_OPEN; |
222 | 0 | } |
223 | 0 | return LLM_CHAT_TEMPLATE_UNKNOWN; |
224 | 0 | } |
225 | | |
226 | | // Simple version of "llama_apply_chat_template" that only works with strings |
227 | | // This function uses heuristic checks to determine commonly used template. It is not a jinja parser. |
228 | | int32_t llm_chat_apply_template( |
229 | | llm_chat_template tmpl, |
230 | | const std::vector<const llama_chat_message *> & chat, |
231 | 0 | std::string & dest, bool add_ass) { |
232 | | // Taken from the research: https://github.com/ggerganov/llama.cpp/issues/5527 |
233 | 0 | std::stringstream ss; |
234 | 0 | if (tmpl == LLM_CHAT_TEMPLATE_CHATML) { |
235 | | // chatml template |
236 | 0 | for (auto message : chat) { |
237 | 0 | ss << "<|im_start|>" << message->role << "\n" << message->content << "<|im_end|>\n"; |
238 | 0 | } |
239 | 0 | if (add_ass) { |
240 | 0 | ss << "<|im_start|>assistant\n"; |
241 | 0 | } |
242 | 0 | } else if (tmpl == LLM_CHAT_TEMPLATE_MISTRAL_V7 || tmpl == LLM_CHAT_TEMPLATE_MISTRAL_V7_TEKKEN) { |
243 | | // Official mistral 'v7' template |
244 | | // See: https://huggingface.co/mistralai/Mistral-Large-Instruct-2411#basic-instruct-template-v7 |
245 | | // https://huggingface.co/mistralai/Mistral-Small-3.1-24B-Instruct-2503#basic-instruct-template-v7-tekken |
246 | 0 | const char * trailing_space = tmpl == LLM_CHAT_TEMPLATE_MISTRAL_V7 ? " " : ""; |
247 | 0 | for (auto message : chat) { |
248 | 0 | std::string role(message->role); |
249 | 0 | std::string content(message->content); |
250 | 0 | if (role == "system") { |
251 | 0 | ss << "[SYSTEM_PROMPT]" << trailing_space << content << "[/SYSTEM_PROMPT]"; |
252 | 0 | } else if (role == "user") { |
253 | 0 | ss << "[INST]" << trailing_space << content << "[/INST]"; |
254 | 0 | } else { |
255 | 0 | ss << trailing_space << content << "</s>"; |
256 | 0 | } |
257 | 0 | } |
258 | 0 | } else if (tmpl == LLM_CHAT_TEMPLATE_MISTRAL_V1 |
259 | 0 | || tmpl == LLM_CHAT_TEMPLATE_MISTRAL_V3 |
260 | 0 | || tmpl == LLM_CHAT_TEMPLATE_MISTRAL_V3_TEKKEN) { |
261 | | // See: https://github.com/mistralai/cookbook/blob/main/concept-deep-dive/tokenization/chat_templates.md |
262 | | // See: https://github.com/mistralai/cookbook/blob/main/concept-deep-dive/tokenization/templates.md |
263 | 0 | std::string leading_space = tmpl == LLM_CHAT_TEMPLATE_MISTRAL_V1 ? " " : ""; |
264 | 0 | std::string trailing_space = tmpl == LLM_CHAT_TEMPLATE_MISTRAL_V3_TEKKEN ? "" : " "; |
265 | 0 | bool trim_assistant_message = tmpl == LLM_CHAT_TEMPLATE_MISTRAL_V3; |
266 | 0 | bool is_inside_turn = false; |
267 | 0 | for (auto message : chat) { |
268 | 0 | if (!is_inside_turn) { |
269 | 0 | ss << leading_space << "[INST]" << trailing_space; |
270 | 0 | is_inside_turn = true; |
271 | 0 | } |
272 | 0 | std::string role(message->role); |
273 | 0 | std::string content(message->content); |
274 | 0 | if (role == "system") { |
275 | 0 | ss << content << "\n\n"; |
276 | 0 | } else if (role == "user") { |
277 | 0 | ss << content << leading_space << "[/INST]"; |
278 | 0 | } else { |
279 | 0 | ss << trailing_space << (trim_assistant_message ? trim(content) : content) << "</s>"; |
280 | 0 | is_inside_turn = false; |
281 | 0 | } |
282 | 0 | } |
283 | 0 | } else if ( |
284 | 0 | tmpl == LLM_CHAT_TEMPLATE_LLAMA_2 |
285 | 0 | || tmpl == LLM_CHAT_TEMPLATE_LLAMA_2_SYS |
286 | 0 | || tmpl == LLM_CHAT_TEMPLATE_LLAMA_2_SYS_BOS |
287 | 0 | || tmpl == LLM_CHAT_TEMPLATE_LLAMA_2_SYS_STRIP) { |
288 | | // llama2 template and its variants |
289 | | // [variant] support system message |
290 | | // See: https://huggingface.co/blog/llama2#how-to-prompt-llama-2 |
291 | 0 | bool support_system_message = tmpl != LLM_CHAT_TEMPLATE_LLAMA_2; |
292 | | // [variant] add BOS inside history |
293 | 0 | bool add_bos_inside_history = tmpl == LLM_CHAT_TEMPLATE_LLAMA_2_SYS_BOS; |
294 | | // [variant] trim spaces from the input message |
295 | 0 | bool strip_message = tmpl == LLM_CHAT_TEMPLATE_LLAMA_2_SYS_STRIP; |
296 | | // construct the prompt |
297 | 0 | bool is_inside_turn = true; // skip BOS at the beginning |
298 | 0 | ss << "[INST] "; |
299 | 0 | for (auto message : chat) { |
300 | 0 | std::string content = strip_message ? trim(message->content) : message->content; |
301 | 0 | std::string role(message->role); |
302 | 0 | if (!is_inside_turn) { |
303 | 0 | is_inside_turn = true; |
304 | 0 | ss << (add_bos_inside_history ? "<s>[INST] " : "[INST] "); |
305 | 0 | } |
306 | 0 | if (role == "system") { |
307 | 0 | if (support_system_message) { |
308 | 0 | ss << "<<SYS>>\n" << content << "\n<</SYS>>\n\n"; |
309 | 0 | } else { |
310 | | // if the model does not support system message, we still include it in the first message, but without <<SYS>> |
311 | 0 | ss << content << "\n"; |
312 | 0 | } |
313 | 0 | } else if (role == "user") { |
314 | 0 | ss << content << " [/INST]"; |
315 | 0 | } else { |
316 | 0 | ss << content << "</s>"; |
317 | 0 | is_inside_turn = false; |
318 | 0 | } |
319 | 0 | } |
320 | 0 | } else if (tmpl == LLM_CHAT_TEMPLATE_PHI_3) { |
321 | | // Phi 3 |
322 | 0 | for (auto message : chat) { |
323 | 0 | std::string role(message->role); |
324 | 0 | ss << "<|" << role << "|>\n" << message->content << "<|end|>\n"; |
325 | 0 | } |
326 | 0 | if (add_ass) { |
327 | 0 | ss << "<|assistant|>\n"; |
328 | 0 | } |
329 | 0 | } else if (tmpl == LLM_CHAT_TEMPLATE_PHI_4) { |
330 | | // chatml template |
331 | 0 | for (auto message : chat) { |
332 | 0 | ss << "<|im_start|>" << message->role << "<|im_sep|>" << message->content << "<|im_end|>"; |
333 | 0 | } |
334 | 0 | if (add_ass) { |
335 | 0 | ss << "<|im_start|>assistant<|im_sep|>"; |
336 | 0 | } |
337 | 0 | } else if (tmpl == LLM_CHAT_TEMPLATE_FALCON_3) { |
338 | | // Falcon 3 |
339 | 0 | for (auto message : chat) { |
340 | 0 | std::string role(message->role); |
341 | 0 | ss << "<|" << role << "|>\n" << message->content << "\n"; |
342 | 0 | } |
343 | 0 | if (add_ass) { |
344 | 0 | ss << "<|assistant|>\n"; |
345 | 0 | } |
346 | 0 | } else if (tmpl == LLM_CHAT_TEMPLATE_ZEPHYR) { |
347 | | // zephyr template |
348 | 0 | for (auto message : chat) { |
349 | 0 | ss << "<|" << message->role << "|>" << "\n" << message->content << "<|endoftext|>\n"; |
350 | 0 | } |
351 | 0 | if (add_ass) { |
352 | 0 | ss << "<|assistant|>\n"; |
353 | 0 | } |
354 | 0 | } else if (tmpl == LLM_CHAT_TEMPLATE_MONARCH) { |
355 | | // mlabonne/AlphaMonarch-7B template (the <s> is included inside history) |
356 | 0 | for (auto message : chat) { |
357 | 0 | std::string bos = (message == chat.front()) ? "" : "<s>"; // skip BOS for first message |
358 | 0 | ss << bos << message->role << "\n" << message->content << "</s>\n"; |
359 | 0 | } |
360 | 0 | if (add_ass) { |
361 | 0 | ss << "<s>assistant\n"; |
362 | 0 | } |
363 | 0 | } else if (tmpl == LLM_CHAT_TEMPLATE_GEMMA) { |
364 | | // google/gemma-7b-it |
365 | 0 | std::string system_prompt = ""; |
366 | 0 | for (auto message : chat) { |
367 | 0 | std::string role(message->role); |
368 | 0 | if (role == "system") { |
369 | | // there is no system message for gemma, but we will merge it with user prompt, so nothing is broken |
370 | 0 | system_prompt += trim(message->content); |
371 | 0 | continue; |
372 | 0 | } |
373 | | // in gemma, "assistant" is "model" |
374 | 0 | role = role == "assistant" ? "model" : message->role; |
375 | 0 | ss << "<start_of_turn>" << role << "\n"; |
376 | 0 | if (!system_prompt.empty() && role != "model") { |
377 | 0 | ss << system_prompt << "\n\n"; |
378 | 0 | system_prompt = ""; |
379 | 0 | } |
380 | 0 | ss << trim(message->content) << "<end_of_turn>\n"; |
381 | 0 | } |
382 | 0 | if (add_ass) { |
383 | 0 | ss << "<start_of_turn>model\n"; |
384 | 0 | } |
385 | 0 | } else if (tmpl == LLM_CHAT_TEMPLATE_ORION) { |
386 | | // OrionStarAI/Orion-14B-Chat |
387 | 0 | std::string system_prompt = ""; |
388 | 0 | for (auto message : chat) { |
389 | 0 | std::string role(message->role); |
390 | 0 | if (role == "system") { |
391 | | // there is no system message support, we will merge it with user prompt |
392 | 0 | system_prompt += message->content; |
393 | 0 | continue; |
394 | 0 | } else if (role == "user") { |
395 | 0 | ss << "Human: "; |
396 | 0 | if (!system_prompt.empty()) { |
397 | 0 | ss << system_prompt << "\n\n"; |
398 | 0 | system_prompt = ""; |
399 | 0 | } |
400 | 0 | ss << message->content << "\n\nAssistant: </s>"; |
401 | 0 | } else { |
402 | 0 | ss << message->content << "</s>"; |
403 | 0 | } |
404 | 0 | } |
405 | 0 | } else if (tmpl == LLM_CHAT_TEMPLATE_OPENCHAT) { |
406 | | // openchat/openchat-3.5-0106, |
407 | 0 | for (auto message : chat) { |
408 | 0 | std::string role(message->role); |
409 | 0 | if (role == "system") { |
410 | 0 | ss << message->content << "<|end_of_turn|>"; |
411 | 0 | } else { |
412 | 0 | role[0] = toupper(role[0]); |
413 | 0 | ss << "GPT4 Correct " << role << ": " << message->content << "<|end_of_turn|>"; |
414 | 0 | } |
415 | 0 | } |
416 | 0 | if (add_ass) { |
417 | 0 | ss << "GPT4 Correct Assistant:"; |
418 | 0 | } |
419 | 0 | } else if (tmpl == LLM_CHAT_TEMPLATE_VICUNA || tmpl == LLM_CHAT_TEMPLATE_VICUNA_ORCA) { |
420 | | // eachadea/vicuna-13b-1.1 (and Orca variant) |
421 | 0 | for (auto message : chat) { |
422 | 0 | std::string role(message->role); |
423 | 0 | if (role == "system") { |
424 | | // Orca-Vicuna variant uses a system prefix |
425 | 0 | if (tmpl == LLM_CHAT_TEMPLATE_VICUNA_ORCA) { |
426 | 0 | ss << "SYSTEM: " << message->content << "\n"; |
427 | 0 | } else { |
428 | 0 | ss << message->content << "\n\n"; |
429 | 0 | } |
430 | 0 | } else if (role == "user") { |
431 | 0 | ss << "USER: " << message->content << "\n"; |
432 | 0 | } else if (role == "assistant") { |
433 | 0 | ss << "ASSISTANT: " << message->content << "</s>\n"; |
434 | 0 | } |
435 | 0 | } |
436 | 0 | if (add_ass) { |
437 | 0 | ss << "ASSISTANT:"; |
438 | 0 | } |
439 | 0 | } else if (tmpl == LLM_CHAT_TEMPLATE_DEEPSEEK) { |
440 | | // deepseek-ai/deepseek-coder-33b-instruct |
441 | 0 | for (auto message : chat) { |
442 | 0 | std::string role(message->role); |
443 | 0 | if (role == "system") { |
444 | 0 | ss << message->content; |
445 | 0 | } else if (role == "user") { |
446 | 0 | ss << "### Instruction:\n" << message->content << "\n"; |
447 | 0 | } else if (role == "assistant") { |
448 | 0 | ss << "### Response:\n" << message->content << "\n<|EOT|>\n"; |
449 | 0 | } |
450 | 0 | } |
451 | 0 | if (add_ass) { |
452 | 0 | ss << "### Response:\n"; |
453 | 0 | } |
454 | 0 | } else if (tmpl == LLM_CHAT_TEMPLATE_COMMAND_R) { |
455 | | // CohereForAI/c4ai-command-r-plus |
456 | 0 | for (auto message : chat) { |
457 | 0 | std::string role(message->role); |
458 | 0 | if (role == "system") { |
459 | 0 | ss << "<|START_OF_TURN_TOKEN|><|SYSTEM_TOKEN|>" << trim(message->content) << "<|END_OF_TURN_TOKEN|>"; |
460 | 0 | } else if (role == "user") { |
461 | 0 | ss << "<|START_OF_TURN_TOKEN|><|USER_TOKEN|>" << trim(message->content) << "<|END_OF_TURN_TOKEN|>"; |
462 | 0 | } else if (role == "assistant") { |
463 | 0 | ss << "<|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>" << trim(message->content) << "<|END_OF_TURN_TOKEN|>"; |
464 | 0 | } |
465 | 0 | } |
466 | 0 | if (add_ass) { |
467 | 0 | ss << "<|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>"; |
468 | 0 | } |
469 | 0 | } else if (tmpl == LLM_CHAT_TEMPLATE_LLAMA_3) { |
470 | | // Llama 3 |
471 | 0 | for (auto message : chat) { |
472 | 0 | std::string role(message->role); |
473 | 0 | ss << "<|start_header_id|>" << role << "<|end_header_id|>\n\n" << trim(message->content) << "<|eot_id|>"; |
474 | 0 | } |
475 | 0 | if (add_ass) { |
476 | 0 | ss << "<|start_header_id|>assistant<|end_header_id|>\n\n"; |
477 | 0 | } |
478 | 0 | } else if (tmpl == LLM_CHAT_TEMPLATE_CHATGLM_3) { |
479 | | // chatglm3-6b |
480 | 0 | ss << "[gMASK]" << "sop"; |
481 | 0 | for (auto message : chat) { |
482 | 0 | std::string role(message->role); |
483 | 0 | ss << "<|" << role << "|>" << "\n " << message->content; |
484 | 0 | } |
485 | 0 | if (add_ass) { |
486 | 0 | ss << "<|assistant|>"; |
487 | 0 | } |
488 | 0 | } else if (tmpl == LLM_CHAT_TEMPLATE_CHATGLM_4) { |
489 | 0 | ss << "[gMASK]" << "<sop>"; |
490 | 0 | for (auto message : chat) { |
491 | 0 | std::string role(message->role); |
492 | 0 | ss << "<|" << role << "|>" << "\n" << message->content; |
493 | 0 | } |
494 | 0 | if (add_ass) { |
495 | 0 | ss << "<|assistant|>\n"; |
496 | 0 | } |
497 | 0 | } else if (tmpl == LLM_CHAT_TEMPLATE_GLMEDGE) { |
498 | 0 | for (auto message : chat) { |
499 | 0 | std::string role(message->role); |
500 | 0 | ss << "<|" << role << "|>" << "\n" << message->content; |
501 | 0 | } |
502 | 0 | if (add_ass) { |
503 | 0 | ss << "<|assistant|>"; |
504 | 0 | } |
505 | 0 | } else if (tmpl == LLM_CHAT_TEMPLATE_MINICPM) { |
506 | | // MiniCPM-3B-OpenHermes-2.5-v2-GGUF |
507 | 0 | for (auto message : chat) { |
508 | 0 | std::string role(message->role); |
509 | 0 | if (role == "user") { |
510 | 0 | ss << LU8("<用户>"); |
511 | 0 | ss << trim(message->content); |
512 | 0 | ss << "<AI>"; |
513 | 0 | } else { |
514 | 0 | ss << trim(message->content); |
515 | 0 | } |
516 | 0 | } |
517 | 0 | } else if (tmpl == LLM_CHAT_TEMPLATE_DEEPSEEK_2) { |
518 | | // DeepSeek-V2 |
519 | 0 | for (auto message : chat) { |
520 | 0 | std::string role(message->role); |
521 | 0 | if (role == "system") { |
522 | 0 | ss << message->content << "\n\n"; |
523 | 0 | } else if (role == "user") { |
524 | 0 | ss << "User: " << message->content << "\n\n"; |
525 | 0 | } else if (role == "assistant") { |
526 | 0 | ss << "Assistant: " << message->content << LU8("<|end▁of▁sentence|>"); |
527 | 0 | } |
528 | 0 | } |
529 | 0 | if (add_ass) { |
530 | 0 | ss << "Assistant:"; |
531 | 0 | } |
532 | 0 | } else if (tmpl == LLM_CHAT_TEMPLATE_DEEPSEEK_3) { |
533 | | // DeepSeek-V3 |
534 | 0 | for (auto message : chat) { |
535 | 0 | std::string role(message->role); |
536 | 0 | if (role == "system") { |
537 | 0 | ss << message->content << "\n\n"; |
538 | 0 | } else if (role == "user") { |
539 | 0 | ss << LU8("<|User|>") << message->content; |
540 | 0 | } else if (role == "assistant") { |
541 | 0 | ss << LU8("<|Assistant|>") << message->content << LU8("<|end▁of▁sentence|>"); |
542 | 0 | } |
543 | 0 | } |
544 | 0 | if (add_ass) { |
545 | 0 | ss << LU8("<|Assistant|>"); |
546 | 0 | } |
547 | 0 | } else if (tmpl == LLM_CHAT_TEMPLATE_EXAONE_3) { |
548 | | // ref: https://huggingface.co/LGAI-EXAONE/EXAONE-3.0-7.8B-Instruct/discussions/8#66bae61b1893d14ee8ed85bb |
549 | | // EXAONE-3.0-7.8B-Instruct |
550 | 0 | for (auto message : chat) { |
551 | 0 | std::string role(message->role); |
552 | 0 | if (role == "system") { |
553 | 0 | ss << "[|system|]" << trim(message->content) << "[|endofturn|]\n"; |
554 | 0 | } else if (role == "user") { |
555 | 0 | ss << "[|user|]" << trim(message->content) << "\n"; |
556 | 0 | } else if (role == "assistant") { |
557 | 0 | ss << "[|assistant|]" << trim(message->content) << "[|endofturn|]\n"; |
558 | 0 | } |
559 | 0 | } |
560 | 0 | if (add_ass) { |
561 | 0 | ss << "[|assistant|]"; |
562 | 0 | } |
563 | 0 | } else if (tmpl == LLM_CHAT_TEMPLATE_EXAONE_4) { |
564 | 0 | for (auto message : chat) { |
565 | 0 | std::string role(message->role); |
566 | 0 | if (role == "system") { |
567 | 0 | ss << "[|system|]" << trim(message->content) << "[|endofturn|]\n"; |
568 | 0 | } else if (role == "user") { |
569 | 0 | ss << "[|user|]" << trim(message->content) << "\n"; |
570 | 0 | } else if (role == "assistant") { |
571 | 0 | ss << "[|assistant|]" << trim(message->content) << "[|endofturn|]\n"; |
572 | 0 | } else if (role == "tool") { |
573 | 0 | ss << "[|tool|]" << trim(message->content) << "[|endofturn|]\n"; |
574 | 0 | } |
575 | 0 | } |
576 | 0 | if (add_ass) { |
577 | 0 | ss << "[|assistant|]"; |
578 | 0 | } |
579 | 0 | } else if (tmpl == LLM_CHAT_TEMPLATE_RWKV_WORLD) { |
580 | | // this template requires the model to have "\n\n" as EOT token |
581 | 0 | for (size_t i = 0; i < chat.size(); i++) { |
582 | 0 | std::string role(chat[i]->role); |
583 | 0 | if (role == "system") { |
584 | 0 | ss << "System: " << trim(chat[i]->content) << "\n\n"; |
585 | 0 | } else if (role == "user") { |
586 | 0 | ss << "User: " << trim(chat[i]->content) << "\n\n"; |
587 | 0 | if (i == chat.size() - 1) { |
588 | 0 | ss << "Assistant:"; |
589 | 0 | } |
590 | 0 | } else if (role == "assistant") { |
591 | 0 | ss << "Assistant: " << trim(chat[i]->content) << "\n\n"; |
592 | 0 | } |
593 | 0 | } |
594 | 0 | } else if (tmpl == LLM_CHAT_TEMPLATE_GRANITE) { |
595 | | // IBM Granite template |
596 | 0 | for (const auto & message : chat) { |
597 | 0 | std::string role(message->role); |
598 | 0 | ss << "<|start_of_role|>" << role << "<|end_of_role|>"; |
599 | 0 | if (role == "assistant_tool_call") { |
600 | 0 | ss << "<|tool_call|>"; |
601 | 0 | } |
602 | 0 | ss << message->content << "<|end_of_text|>\n"; |
603 | 0 | } |
604 | 0 | if (add_ass) { |
605 | 0 | ss << "<|start_of_role|>assistant<|end_of_role|>"; |
606 | 0 | } |
607 | 0 | } else if (tmpl == LLM_CHAT_TEMPLATE_GIGACHAT) { |
608 | | // GigaChat template |
609 | 0 | bool has_system = !chat.empty() && std::string(chat[0]->role) == "system"; |
610 | | |
611 | | // Handle system message if present |
612 | 0 | if (has_system) { |
613 | 0 | ss << "<s>" << chat[0]->content << "<|message_sep|>"; |
614 | 0 | } else { |
615 | 0 | ss << "<s>"; |
616 | 0 | } |
617 | | |
618 | | // Process remaining messages |
619 | 0 | for (size_t i = has_system ? 1 : 0; i < chat.size(); i++) { |
620 | 0 | std::string role(chat[i]->role); |
621 | 0 | if (role == "user") { |
622 | 0 | ss << "user<|role_sep|>" << chat[i]->content << "<|message_sep|>" |
623 | 0 | << "available functions<|role_sep|>[]<|message_sep|>"; |
624 | 0 | } else if (role == "assistant") { |
625 | 0 | ss << "assistant<|role_sep|>" << chat[i]->content << "<|message_sep|>"; |
626 | 0 | } |
627 | 0 | } |
628 | | |
629 | | // Add generation prompt if needed |
630 | 0 | if (add_ass) { |
631 | 0 | ss << "assistant<|role_sep|>"; |
632 | 0 | } |
633 | 0 | } else if (tmpl == LLM_CHAT_TEMPLATE_MEGREZ) { |
634 | | // Megrez template |
635 | 0 | for (auto message : chat) { |
636 | 0 | std::string role(message->role); |
637 | 0 | ss << "<|role_start|>" << role << "<|role_end|>" << message->content << "<|turn_end|>"; |
638 | 0 | } |
639 | |
|
640 | 0 | if (add_ass) { |
641 | 0 | ss << "<|role_start|>assistant<|role_end|>"; |
642 | 0 | } |
643 | 0 | } else if (tmpl == LLM_CHAT_TEMPLATE_YANDEX) { |
644 | | // Yandex template ("\n\n" is defined as EOT token) |
645 | |
|
646 | 0 | for (size_t i = 0; i < chat.size(); i++) { |
647 | 0 | std::string role(chat[i]->role); |
648 | 0 | if (role == "user") { |
649 | 0 | ss << " Пользователь: " << chat[i]->content << "\n\n"; |
650 | 0 | } else if (role == "assistant") { |
651 | 0 | ss << " Ассистент: " << chat[i]->content << "\n\n"; |
652 | 0 | } |
653 | 0 | } |
654 | | |
655 | | // Add generation prompt if needed |
656 | 0 | if (add_ass) { |
657 | 0 | ss << " Ассистент:[SEP]"; |
658 | 0 | } |
659 | 0 | } else if (tmpl == LLM_CHAT_TEMPLATE_BAILING || tmpl == LLM_CHAT_TEMPLATE_BAILING_THINK) { |
660 | | // Bailing (Ling/Ring) template |
661 | 0 | for (auto message : chat) { |
662 | 0 | std::string role(message->role); |
663 | |
|
664 | 0 | if (role == "user") { |
665 | 0 | role = "HUMAN"; |
666 | 0 | } else { |
667 | 0 | std::transform(role.begin(), role.end(), role.begin(), ::toupper); |
668 | 0 | } |
669 | |
|
670 | 0 | ss << "<role>" << role << "</role>" << message->content; |
671 | 0 | } |
672 | |
|
673 | 0 | if (add_ass) { |
674 | 0 | ss << "<role>ASSISTANT</role>"; |
675 | |
|
676 | 0 | if (tmpl == LLM_CHAT_TEMPLATE_BAILING_THINK) { |
677 | 0 | ss << "<think>"; |
678 | 0 | } |
679 | 0 | } |
680 | 0 | } else if (tmpl == LLM_CHAT_TEMPLATE_BAILING2) { |
681 | | // Bailing2 (Ling 2.0) template |
682 | 0 | bool has_system = !chat.empty() && std::string(chat[0]->role) == "system"; |
683 | |
|
684 | 0 | if (!has_system) { |
685 | 0 | ss << "<role>SYSTEM</role>detailed thinking off<|role_end|>"; |
686 | 0 | } |
687 | |
|
688 | 0 | for (auto message : chat) { |
689 | 0 | std::string role(message->role); |
690 | |
|
691 | 0 | if (role == "user") { |
692 | 0 | role = "HUMAN"; |
693 | 0 | } else { |
694 | 0 | std::transform(role.begin(), role.end(), role.begin(), ::toupper); |
695 | 0 | } |
696 | |
|
697 | 0 | ss << "<role>" << role << "</role>" << message->content << "<|role_end|>"; |
698 | 0 | } |
699 | |
|
700 | 0 | if (add_ass) { |
701 | 0 | ss << "<role>ASSISTANT</role>"; |
702 | 0 | } |
703 | 0 | } else if (tmpl == LLM_CHAT_TEMPLATE_LLAMA4) { |
704 | | // Llama 4 |
705 | 0 | for (auto message : chat) { |
706 | 0 | std::string role(message->role); |
707 | 0 | ss << "<|header_start|>" << role << "<|header_end|>\n\n" << trim(message->content) << "<|eot|>"; |
708 | 0 | } |
709 | 0 | if (add_ass) { |
710 | 0 | ss << "<|header_start|>assistant<|header_end|>\n\n"; |
711 | 0 | } |
712 | 0 | } else if (tmpl == LLM_CHAT_TEMPLATE_SMOLVLM) { |
713 | | // SmolVLM |
714 | 0 | ss << "<|im_start|>"; // uses <|im_start|> as BOS, but the actual content is NOT chatml |
715 | 0 | for (auto message : chat) { |
716 | 0 | std::string role(message->role); |
717 | 0 | if (role == "system") { |
718 | 0 | ss << message->content << "\n\n"; |
719 | 0 | } else if (role == "user") { |
720 | 0 | ss << "User: " << message->content << "<end_of_utterance>\n"; |
721 | 0 | } else { |
722 | 0 | ss << "Assistant: " << message->content << "<end_of_utterance>\n"; |
723 | 0 | } |
724 | 0 | } |
725 | 0 | if (add_ass) { |
726 | 0 | ss << "Assistant:"; |
727 | 0 | } |
728 | 0 | } else if (tmpl == LLM_CHAT_TEMPLATE_DOTS1) { |
729 | | // dots.llm1.inst (DOTS1) |
730 | 0 | for (auto message : chat) { |
731 | 0 | std::string role(message->role); |
732 | 0 | if (role == "system") { |
733 | 0 | ss << "<|system|>" << message->content << "<|endofsystem|>"; |
734 | 0 | } else if (role == "user") { |
735 | 0 | ss << "<|userprompt|>" << message->content << "<|endofuserprompt|>"; |
736 | 0 | } else { |
737 | 0 | ss << "<|response|>" << message->content << "<|endofresponse|>"; |
738 | 0 | } |
739 | 0 | } |
740 | 0 | if (add_ass) { |
741 | 0 | ss << "<|response|>"; |
742 | 0 | } |
743 | 0 | } else if (tmpl == LLM_CHAT_TEMPLATE_HUNYUAN_MOE) { |
744 | | // tencent/Hunyuan-A13B-Instruct |
745 | 0 | for (auto message : chat) { |
746 | 0 | std::string role(message->role); |
747 | 0 | if (role == "system") { |
748 | 0 | ss << "<|startoftext|>" << message->content << "<|extra_4|>"; |
749 | 0 | } else if (role == "assistant") { |
750 | 0 | ss << message->content << "<|eos|>"; |
751 | 0 | } else { |
752 | 0 | ss << "<|startoftext|>" << message->content << "<|extra_0|>"; |
753 | 0 | } |
754 | 0 | } |
755 | 0 | } else if (tmpl == LLM_CHAT_TEMPLATE_OPENAI_MOE) { |
756 | | // OpenAI MoE (based on Harmony chat template) |
757 | 0 | for (auto message : chat) { |
758 | 0 | std::string role(message->role); |
759 | 0 | ss << "<|start|>" << role << "<|message|>" << message->content; |
760 | 0 | ss << (role == "assistant" ? "<|return|>" : "<|end|>"); |
761 | 0 | } |
762 | 0 | if (add_ass) { |
763 | 0 | ss << "<|start|>assistant"; |
764 | 0 | } |
765 | 0 | } else if (tmpl == LLM_CHAT_TEMPLATE_HUNYUAN_DENSE) { |
766 | | // tencent/Hunyuan-4B-Instruct |
767 | 0 | for (size_t i = 0; i < chat.size(); i++) { |
768 | 0 | std::string role(chat[i]->role); |
769 | 0 | if (i == 0) { |
770 | 0 | if (role == "system") { |
771 | 0 | ss << chat[i]->content << "<|hy_place▁holder▁no▁3|>"; |
772 | 0 | } |
773 | 0 | } |
774 | |
|
775 | 0 | if (role == "assistant") { |
776 | 0 | ss << "<|hy_Assistant|>" << chat[i]->content << "<|hy_place▁holder▁no▁2|>"; |
777 | 0 | } else if (role == "user") { |
778 | 0 | ss << "<|hy_User|>" << chat[i]->content << "<|hy_Assistant|>"; |
779 | 0 | } |
780 | 0 | } |
781 | 0 | } else if (tmpl == LLM_CHAT_TEMPLATE_KIMI_K2) { |
782 | | // moonshotai/Kimi-K2-Instruct |
783 | 0 | for (auto message : chat) { |
784 | 0 | std::string role(message->role); |
785 | 0 | if (role == "system") { |
786 | 0 | ss << "<|im_system|>system<|im_middle|>"; |
787 | 0 | } else if (role == "user") { |
788 | 0 | ss << "<|im_user|>user<|im_middle|>"; |
789 | 0 | } else if (role == "assistant") { |
790 | 0 | ss << "<|im_assistant|>assistant<|im_middle|>"; |
791 | 0 | } else if (role == "tool") { |
792 | 0 | ss << "<|im_system|>tool<|im_middle|>"; |
793 | 0 | } |
794 | |
|
795 | 0 | ss << message->content << "<|im_end|>"; |
796 | 0 | } |
797 | 0 | if (add_ass) { |
798 | 0 | ss << "<|im_assistant|>assistant<|im_middle|>"; |
799 | 0 | } |
800 | 0 | } else if (tmpl == LLM_CHAT_TEMPLATE_SEED_OSS) { |
801 | 0 | for (auto message: chat) { |
802 | 0 | std::string role(message->role); |
803 | 0 | ss << "<seed:bos>" << role << "\n" << (role == "assistant" ? trim(message->content) : message->content) << "<seed:eos>"; |
804 | 0 | } |
805 | 0 | if (add_ass) { |
806 | 0 | ss << "<seed:bos>assistant\n"; |
807 | 0 | } |
808 | 0 | } else if (tmpl == LLM_CHAT_TEMPLATE_GROK_2) { |
809 | 0 | for (auto message : chat) { |
810 | 0 | std::string role(message->role); |
811 | 0 | if (role == "system") { |
812 | 0 | ss << "System: " << trim(message->content) << "<|separator|>\n\n"; |
813 | 0 | } else if (role == "user") { |
814 | 0 | ss << "Human: " << trim(message->content) << "<|separator|>\n\n"; |
815 | 0 | } else if (role == "assistant") { |
816 | 0 | ss << "Assistant: " << message->content << "<|separator|>\n\n"; |
817 | 0 | } |
818 | 0 | } |
819 | 0 | if (add_ass) { |
820 | 0 | ss << "Assistant:"; |
821 | 0 | } |
822 | 0 | }else if (tmpl == LLM_CHAT_TEMPLATE_PANGU_EMBED) { |
823 | | // [unused9]系统:xxx[unused10] |
824 | | // [unused9]用户:xxx[unused10] |
825 | | // [unused9]助手:xxx[unused10] |
826 | | // ... |
827 | 0 | for (size_t i = 0; i < chat.size(); ++i) { |
828 | 0 | const auto & msg = chat[i]; |
829 | 0 | const std::string & role = msg->role; |
830 | 0 | const std::string & content = msg->content; |
831 | |
|
832 | 0 | if (i == 0 && role != "system") { |
833 | 0 | ss << "[unused9]系统:[unused10]"; |
834 | 0 | } |
835 | |
|
836 | 0 | if (role == "system") { |
837 | 0 | ss << "[unused9]系统:" << content << "[unused10]"; |
838 | 0 | } else if (role == "user") { |
839 | 0 | ss << "[unused9]用户:" << content << "[unused10]"; |
840 | 0 | } else if (role == "assistant") { |
841 | 0 | ss << "[unused9]助手:" << content << "[unused10]"; |
842 | 0 | } else if (role == "tool") { |
843 | 0 | ss << "[unused9]工具:" << content << "[unused10]"; |
844 | 0 | } else if (role == "function") { |
845 | 0 | ss << "[unused9]方法:" << content << "[unused10]"; |
846 | 0 | } |
847 | 0 | } |
848 | 0 | if (add_ass) { |
849 | 0 | ss << "[unused9]助手:"; |
850 | 0 | } |
851 | 0 | } else if (tmpl == LLM_CHAT_TEMPLATE_SOLAR_OPEN) { |
852 | 0 | for (auto message : chat) { |
853 | 0 | std::string role(message->role); |
854 | 0 | ss << "<|begin|>" << role << "<|content|>" << message->content << "<|end|>"; |
855 | 0 | } |
856 | 0 | if (add_ass) { |
857 | 0 | ss << "<|begin|>assistant"; |
858 | 0 | } |
859 | 0 | } else { |
860 | | // template not supported |
861 | 0 | return -1; |
862 | 0 | } |
863 | 0 | dest = ss.str(); |
864 | 0 | return dest.size(); |
865 | 0 | } |
866 | | |
867 | | // public interface |
868 | | |
869 | 0 | int32_t llama_chat_builtin_templates(const char ** output, size_t len) { |
870 | 0 | auto it = LLM_CHAT_TEMPLATES.begin(); |
871 | 0 | for (size_t i = 0; i < std::min(len, LLM_CHAT_TEMPLATES.size()); i++) { |
872 | 0 | output[i] = it->first.c_str(); |
873 | 0 | std::advance(it, 1); |
874 | 0 | } |
875 | 0 | return (int32_t) LLM_CHAT_TEMPLATES.size(); |
876 | 0 | } |