/src/node/src/node_task_runner.cc
Line  | Count  | Source  | 
1  |  | #include "node_task_runner.h"  | 
2  |  | #include "util-inl.h"  | 
3  |  |  | 
4  |  | #include <regex>  // NOLINT(build/c++11)  | 
5  |  |  | 
6  |  | namespace node::task_runner { | 
7  |  |  | 
8  |  | #ifdef _WIN32  | 
9  |  | static constexpr const char* env_var_separator = ";";  | 
10  |  | #else  | 
11  |  | static constexpr const char* env_var_separator = ":";  | 
12  |  | #endif  // _WIN32  | 
13  |  |  | 
14  |  | ProcessRunner::ProcessRunner(std::shared_ptr<InitializationResultImpl> result,  | 
15  |  |                              const std::filesystem::path& package_json_path,  | 
16  |  |                              std::string_view script_name,  | 
17  |  |                              std::string_view command,  | 
18  |  |                              std::string_view path_env_var,  | 
19  |  |                              const PositionalArgs& positional_args)  | 
20  | 0  |     : init_result_(std::move(result)),  | 
21  | 0  |       package_json_path_(package_json_path),  | 
22  | 0  |       script_name_(script_name),  | 
23  | 0  |       path_env_var_(path_env_var) { | 
24  | 0  |   memset(&options_, 0, sizeof(uv_process_options_t));  | 
25  |  |  | 
26  |  |   // Inherit stdin, stdout, and stderr from the parent process.  | 
27  | 0  |   options_.stdio_count = 3;  | 
28  | 0  |   child_stdio_[0].flags = UV_INHERIT_FD;  | 
29  | 0  |   child_stdio_[0].data.fd = 0;  | 
30  | 0  |   child_stdio_[1].flags = UV_INHERIT_FD;  | 
31  | 0  |   child_stdio_[1].data.fd = 1;  | 
32  | 0  |   child_stdio_[2].flags = UV_INHERIT_FD;  | 
33  | 0  |   child_stdio_[2].data.fd = 2;  | 
34  | 0  |   options_.stdio = child_stdio_;  | 
35  | 0  |   options_.exit_cb = ExitCallback;  | 
36  |  | 
  | 
37  |  | #ifdef _WIN32  | 
38  |  |   options_.flags |= UV_PROCESS_WINDOWS_VERBATIM_ARGUMENTS;  | 
39  |  | #endif  | 
40  |  |  | 
41  |  |   // Set the process handle data to this class instance.  | 
42  |  |   // This is used to access the class instance from the OnExit callback.  | 
43  |  |   // It is required because libuv doesn't allow passing lambda functions as a  | 
44  |  |   // callback.  | 
45  | 0  |   process_.data = this;  | 
46  |  | 
  | 
47  | 0  |   SetEnvironmentVariables();  | 
48  |  | 
  | 
49  | 0  |   std::string command_str(command);  | 
50  |  |  | 
51  |  |   // Use the stored reference on the instance.  | 
52  | 0  |   options_.file = file_.c_str();  | 
53  |  |  | 
54  |  |   // Add positional arguments to the command string.  | 
55  |  |   // Note that each argument needs to be escaped.  | 
56  | 0  |   if (!positional_args.empty()) { | 
57  | 0  |     for (const auto& arg : positional_args) { | 
58  | 0  |       command_str += " " + EscapeShell(arg);  | 
59  | 0  |     }  | 
60  | 0  |   }  | 
61  |  | 
  | 
62  |  | #ifdef _WIN32  | 
63  |  |   if (file_.ends_with("cmd.exe")) { | 
64  |  |     // If the file is cmd.exe, use the following command line arguments:  | 
65  |  |     // "/c" Carries out the command and exit.  | 
66  |  |     // "/d" Disables execution of AutoRun commands.  | 
67  |  |     // "/s" Strip the first and last quotes (") around the <string> but leaves | 
68  |  |     // the rest of the command unchanged.  | 
69  |  |     command_args_ = { | 
70  |  |         options_.file, "/d", "/s", "/c", "\"" + command_str + "\""};  | 
71  |  |   } else { | 
72  |  |     // If the file is not cmd.exe, and it is unclear which shell is being used,  | 
73  |  |     // so assume -c is the correct syntax (Unix-like shells use -c for this  | 
74  |  |     // purpose).  | 
75  |  |     command_args_ = {options_.file, "-c", command_str}; | 
76  |  |   }  | 
77  |  | #else  | 
78  | 0  |   command_args_ = {options_.file, "-c", command_str}; | 
79  | 0  | #endif  // _WIN32  | 
80  |  | 
  | 
81  | 0  |   auto argc = command_args_.size();  | 
82  | 0  |   CHECK_GE(argc, 1);  | 
83  | 0  |   arg_ = std::unique_ptr<char*[]>(new char*[argc + 1]);  | 
84  | 0  |   options_.args = arg_.get();  | 
85  | 0  |   for (size_t i = 0; i < argc; ++i) { | 
86  | 0  |     options_.args[i] = const_cast<char*>(command_args_[i].c_str());  | 
87  | 0  |   }  | 
88  | 0  |   options_.args[argc] = nullptr;  | 
89  | 0  | }  | 
90  |  |  | 
91  | 0  | void ProcessRunner::SetEnvironmentVariables() { | 
92  | 0  |   uv_env_item_t* env_items;  | 
93  | 0  |   int env_count;  | 
94  | 0  |   CHECK_EQ(0, uv_os_environ(&env_items, &env_count));  | 
95  |  |  | 
96  |  |   // Iterate over environment variables once to store them in the current  | 
97  |  |   // ProcessRunner instance.  | 
98  | 0  |   for (int i = 0; i < env_count; i++) { | 
99  | 0  |     std::string name = env_items[i].name;  | 
100  | 0  |     std::string value = env_items[i].value;  | 
101  |  | 
  | 
102  |  | #ifdef _WIN32  | 
103  |  |     // We use comspec environment variable to find cmd.exe path on Windows  | 
104  |  |     // Example: 'C:\\Windows\\system32\\cmd.exe'  | 
105  |  |     // If we don't find it, we fallback to 'cmd.exe' for Windows  | 
106  |  |     if (StringEqualNoCase(name.c_str(), "comspec")) { | 
107  |  |       file_ = value;  | 
108  |  |     }  | 
109  |  | #endif  // _WIN32  | 
110  |  | 
  | 
111  | 0  |     if (StringEqualNoCase(name.c_str(), "path")) { | 
112  |  |       // Add path env variable to the beginning of the PATH  | 
113  | 0  |       value = path_env_var_ + value;  | 
114  | 0  |     }  | 
115  | 0  |     env_vars_.push_back(name + "=" + value);  | 
116  | 0  |   }  | 
117  | 0  |   uv_os_free_environ(env_items, env_count);  | 
118  |  |  | 
119  |  |   // Add NODE_RUN_SCRIPT_NAME environment variable to the environment  | 
120  |  |   // to indicate which script is being run.  | 
121  | 0  |   env_vars_.push_back("NODE_RUN_SCRIPT_NAME=" + script_name_); | 
122  |  |  | 
123  |  |   // Add NODE_RUN_PACKAGE_JSON_PATH environment variable to the environment to  | 
124  |  |   // indicate which package.json is being processed.  | 
125  | 0  |   env_vars_.push_back("NODE_RUN_PACKAGE_JSON_PATH=" + | 
126  | 0  |                       package_json_path_.string());  | 
127  |  | 
  | 
128  | 0  |   env_ = std::unique_ptr<char*[]>(new char*[env_vars_.size() + 1]);  | 
129  | 0  |   options_.env = env_.get();  | 
130  | 0  |   for (size_t i = 0; i < env_vars_.size(); i++) { | 
131  | 0  |     options_.env[i] = const_cast<char*>(env_vars_[i].c_str());  | 
132  | 0  |   }  | 
133  | 0  |   options_.env[env_vars_.size()] = nullptr;  | 
134  | 0  | }  | 
135  |  |  | 
136  |  | // EscapeShell escapes a string to be used as a command line argument.  | 
137  |  | // Under Windows, we follow:  | 
138  |  | // https://daviddeley.com/autohotkey/parameters/parameters.htm  | 
139  |  | // Elsewhere:  | 
140  |  | // It replaces single quotes with "\\'" and double quotes with "\\\"".  | 
141  |  | // It also removes excessive quote pairs and handles edge cases.  | 
142  | 0  | std::string EscapeShell(const std::string_view input) { | 
143  |  |   // If the input is an empty string, return a pair of quotes  | 
144  | 0  |   if (input.empty()) { | 
145  |  | #ifdef _WIN32  | 
146  |  |     return "\"\"";  | 
147  |  | #else  | 
148  | 0  |     return "''";  | 
149  | 0  | #endif  | 
150  | 0  |   }  | 
151  |  |  | 
152  | 0  |   static constexpr std::string_view forbidden_characters =  | 
153  | 0  |       "[\t\n\r \"#$&'()*;<>?\\\\`|~]";  | 
154  |  |  | 
155  |  |   // Check if input contains any forbidden characters  | 
156  |  |   // If it doesn't, return the input as is.  | 
157  | 0  |   if (input.find_first_of(forbidden_characters) == std::string::npos) { | 
158  | 0  |     return std::string(input);  | 
159  | 0  |   }  | 
160  |  |  | 
161  | 0  |   static const std::regex leadingQuotePairs("^(?:'')+(?!$)"); | 
162  |  | 
  | 
163  |  | #ifdef _WIN32  | 
164  |  |   // Replace double quotes with single quotes and surround the string  | 
165  |  |   // with double quotes for Windows.  | 
166  |  |   std::string escaped =  | 
167  |  |       std::regex_replace(std::string(input), std::regex("\""), "\"\""); | 
168  |  |   escaped = "\"" + escaped + "\"";  | 
169  |  |   // Remove excessive quote pairs and handle edge cases  | 
170  |  |   static const std::regex tripleSingleQuote("\\\\\"\"\""); | 
171  |  |   escaped = std::regex_replace(escaped, leadingQuotePairs, "");  | 
172  |  |   escaped = std::regex_replace(escaped, tripleSingleQuote, "\\\"");  | 
173  |  | #else  | 
174  |  |   // Replace single quotes("'") with "\\'" and wrap the result | 
175  |  |   // in single quotes.  | 
176  | 0  |   std::string escaped =  | 
177  | 0  |       std::regex_replace(std::string(input), std::regex("'"), "\\'"); | 
178  | 0  |   escaped = "'" + escaped + "'";  | 
179  |  |   // Remove excessive quote pairs and handle edge cases  | 
180  | 0  |   static const std::regex tripleSingleQuote("\\\\'''"); | 
181  | 0  |   escaped = std::regex_replace(escaped, leadingQuotePairs, "");  | 
182  | 0  |   escaped = std::regex_replace(escaped, tripleSingleQuote, "\\'");  | 
183  | 0  | #endif  // _WIN32  | 
184  |  | 
  | 
185  | 0  |   return escaped;  | 
186  | 0  | }  | 
187  |  |  | 
188  |  | // ExitCallback is the callback function that is called when the process exits.  | 
189  |  | // It closes the process handle and calls the OnExit function.  | 
190  |  | // It is defined as a static function due to the limitations of libuv.  | 
191  |  | void ProcessRunner::ExitCallback(uv_process_t* handle,  | 
192  |  |                                  int64_t exit_status,  | 
193  | 0  |                                  int term_signal) { | 
194  | 0  |   const auto self = static_cast<ProcessRunner*>(handle->data);  | 
195  | 0  |   uv_close(reinterpret_cast<uv_handle_t*>(handle), nullptr);  | 
196  | 0  |   self->OnExit(exit_status, term_signal);  | 
197  | 0  | }  | 
198  |  |  | 
199  | 0  | void ProcessRunner::OnExit(int64_t exit_status, int term_signal) { | 
200  | 0  |   if (exit_status > 0) { | 
201  | 0  |     init_result_->exit_code_ = ExitCode::kGenericUserError;  | 
202  | 0  |   } else { | 
203  | 0  |     init_result_->exit_code_ = ExitCode::kNoFailure;  | 
204  | 0  |   }  | 
205  | 0  | }  | 
206  |  |  | 
207  | 0  | void ProcessRunner::Run() { | 
208  |  |   // keeps the string alive until destructor  | 
209  | 0  |   cwd_ = package_json_path_.parent_path().string();  | 
210  | 0  |   options_.cwd = cwd_.c_str();  | 
211  | 0  |   if (int r = uv_spawn(loop_, &process_, &options_)) { | 
212  | 0  |     fprintf(stderr, "Error: %s\n", uv_strerror(r));  | 
213  | 0  |   }  | 
214  |  | 
  | 
215  | 0  |   uv_run(loop_, UV_RUN_DEFAULT);  | 
216  | 0  | }  | 
217  |  |  | 
218  |  | std::optional<std::tuple<std::filesystem::path, std::string, std::string>>  | 
219  | 0  | FindPackageJson(const std::filesystem::path& cwd) { | 
220  | 0  |   auto package_json_path = cwd / "package.json";  | 
221  | 0  |   std::string raw_content;  | 
222  | 0  |   std::string path_env_var;  | 
223  | 0  |   auto root_path = cwd.root_path();  | 
224  |  | 
  | 
225  | 0  |   for (auto directory_path = cwd;  | 
226  | 0  |        !std::filesystem::equivalent(root_path, directory_path);  | 
227  | 0  |        directory_path = directory_path.parent_path()) { | 
228  |  |     // Append "path/node_modules/.bin" to the env var, if it is a directory.  | 
229  | 0  |     auto node_modules_bin = directory_path / "node_modules" / ".bin";  | 
230  | 0  |     if (std::filesystem::is_directory(node_modules_bin)) { | 
231  | 0  |       path_env_var += node_modules_bin.string() + env_var_separator;  | 
232  | 0  |     }  | 
233  |  | 
  | 
234  | 0  |     if (raw_content.empty()) { | 
235  | 0  |       package_json_path = directory_path / "package.json";  | 
236  |  |       // This is required for Windows because std::filesystem::path::c_str()  | 
237  |  |       // returns wchar_t* on Windows, and char* on other platforms.  | 
238  | 0  |       std::string contents = package_json_path.string();  | 
239  | 0  |       USE(ReadFileSync(&raw_content, contents.c_str()) > 0);  | 
240  | 0  |     }  | 
241  | 0  |   }  | 
242  |  |  | 
243  |  |   // This means that there is no package.json until the root directory.  | 
244  |  |   // In this case, we just return nullopt, which will terminate the process..  | 
245  | 0  |   if (raw_content.empty()) { | 
246  | 0  |     return std::nullopt;  | 
247  | 0  |   }  | 
248  |  |  | 
249  | 0  |   return {{package_json_path, raw_content, path_env_var}}; | 
250  | 0  | }  | 
251  |  |  | 
252  |  | void RunTask(const std::shared_ptr<InitializationResultImpl>& result,  | 
253  |  |              std::string_view command_id,  | 
254  | 0  |              const std::vector<std::string_view>& positional_args) { | 
255  | 0  |   auto cwd = std::filesystem::current_path();  | 
256  | 0  |   auto package_json = FindPackageJson(cwd);  | 
257  |  | 
  | 
258  | 0  |   if (!package_json.has_value()) { | 
259  | 0  |     fprintf(stderr,  | 
260  | 0  |             "Can't find package.json for directory %s\n",  | 
261  | 0  |             cwd.string().c_str());  | 
262  | 0  |     result->exit_code_ = ExitCode::kGenericUserError;  | 
263  | 0  |     return;  | 
264  | 0  |   }  | 
265  |  |  | 
266  |  |   // - path: Path to the package.json file.  | 
267  |  |   // - raw_json: Raw content of the package.json file.  | 
268  |  |   // - path_env_var: This represents the `PATH` environment variable.  | 
269  |  |   //   It always ends with ";" or ":" depending on the platform.  | 
270  | 0  |   auto [path, raw_json, path_env_var] = *package_json;  | 
271  |  | 
  | 
272  | 0  |   simdjson::ondemand::parser json_parser;  | 
273  | 0  |   simdjson::ondemand::document document;  | 
274  | 0  |   simdjson::ondemand::object main_object;  | 
275  |  | 
  | 
276  | 0  |   if (json_parser.iterate(raw_json).get(document)) { | 
277  | 0  |     fprintf(stderr, "Can't parse %s\n", path.string().c_str());  | 
278  | 0  |     result->exit_code_ = ExitCode::kGenericUserError;  | 
279  | 0  |     return;  | 
280  | 0  |   }  | 
281  |  |   // If document is not an object, throw an error.  | 
282  | 0  |   if (auto root_error = document.get_object().get(main_object)) { | 
283  | 0  |     if (root_error == simdjson::error_code::INCORRECT_TYPE) { | 
284  | 0  |       fprintf(stderr,  | 
285  | 0  |               "Root value unexpected not an object for %s\n\n",  | 
286  | 0  |               path.string().c_str());  | 
287  | 0  |     } else { | 
288  | 0  |       fprintf(stderr, "Can't parse %s\n", path.string().c_str());  | 
289  | 0  |     }  | 
290  | 0  |     result->exit_code_ = ExitCode::kGenericUserError;  | 
291  | 0  |     return;  | 
292  | 0  |   }  | 
293  |  |  | 
294  |  |   // If package_json object doesn't have "scripts" field, throw an error.  | 
295  | 0  |   simdjson::ondemand::object scripts_object;  | 
296  | 0  |   if (main_object["scripts"].get_object().get(scripts_object)) { | 
297  | 0  |     fprintf(  | 
298  | 0  |         stderr, "Can't find \"scripts\" field in %s\n", path.string().c_str());  | 
299  | 0  |     result->exit_code_ = ExitCode::kGenericUserError;  | 
300  | 0  |     return;  | 
301  | 0  |   }  | 
302  |  |  | 
303  |  |   // If the command_id is not found in the scripts object, throw an error.  | 
304  | 0  |   std::string_view command;  | 
305  | 0  |   if (auto command_error =  | 
306  | 0  |           scripts_object[command_id].get_string().get(command)) { | 
307  | 0  |     if (command_error == simdjson::error_code::INCORRECT_TYPE) { | 
308  | 0  |       fprintf(stderr,  | 
309  | 0  |               "Script \"%.*s\" is unexpectedly not a string for %s\n\n",  | 
310  | 0  |               static_cast<int>(command_id.size()),  | 
311  | 0  |               command_id.data(),  | 
312  | 0  |               path.string().c_str());  | 
313  | 0  |     } else { | 
314  | 0  |       fprintf(stderr,  | 
315  | 0  |               "Missing script: \"%.*s\" for %s\n\n",  | 
316  | 0  |               static_cast<int>(command_id.size()),  | 
317  | 0  |               command_id.data(),  | 
318  | 0  |               path.string().c_str());  | 
319  | 0  |       fprintf(stderr, "Available scripts are:\n");  | 
320  |  |  | 
321  |  |       // Reset the object to iterate over it again  | 
322  | 0  |       scripts_object.reset();  | 
323  | 0  |       simdjson::ondemand::value value;  | 
324  | 0  |       for (auto field : scripts_object) { | 
325  | 0  |         std::string_view key_str;  | 
326  | 0  |         std::string_view value_str;  | 
327  | 0  |         if (!field.unescaped_key().get(key_str) && !field.value().get(value) &&  | 
328  | 0  |             !value.get_string().get(value_str)) { | 
329  | 0  |           fprintf(stderr,  | 
330  | 0  |                   "  %.*s: %.*s\n",  | 
331  | 0  |                   static_cast<int>(key_str.size()),  | 
332  | 0  |                   key_str.data(),  | 
333  | 0  |                   static_cast<int>(value_str.size()),  | 
334  | 0  |                   value_str.data());  | 
335  | 0  |         }  | 
336  | 0  |       }  | 
337  | 0  |     }  | 
338  | 0  |     result->exit_code_ = ExitCode::kGenericUserError;  | 
339  | 0  |     return;  | 
340  | 0  |   }  | 
341  |  |  | 
342  | 0  |   auto runner = ProcessRunner(  | 
343  | 0  |       result, path, command_id, command, path_env_var, positional_args);  | 
344  | 0  |   runner.Run();  | 
345  | 0  | }  | 
346  |  |  | 
347  |  | // GetPositionalArgs returns the positional arguments from the command line.  | 
348  |  | // If the "--" flag is not found, it returns an empty optional.  | 
349  |  | // Otherwise, it returns the positional arguments as a single string.  | 
350  |  | // Example: "node -- script.js arg1 arg2" returns "arg1 arg2".  | 
351  | 0  | PositionalArgs GetPositionalArgs(const std::vector<std::string>& args) { | 
352  |  |   // If the "--" flag is not found, return an empty optional  | 
353  |  |   // Otherwise, return the positional arguments as a single string  | 
354  | 0  |   if (auto dash_dash = std::ranges::find(args, "--"); dash_dash != args.end()) { | 
355  | 0  |     PositionalArgs positional_args{}; | 
356  | 0  |     positional_args.reserve(args.size() - (dash_dash - args.begin()));  | 
357  | 0  |     for (auto it = dash_dash + 1; it != args.end(); ++it) { | 
358  |  |       // SAFETY: The following code is safe because the lifetime of the  | 
359  |  |       // arguments is guaranteed to be valid until the end of the task runner.  | 
360  | 0  |       positional_args.emplace_back(it->c_str(), it->size());  | 
361  | 0  |     }  | 
362  | 0  |     return positional_args;  | 
363  | 0  |   }  | 
364  |  |  | 
365  | 0  |   return {}; | 
366  | 0  | }  | 
367  |  |  | 
368  |  | }  // namespace node::task_runner  |