Improve Windows quoting robustness
[bpt/emacs.git] / nt / cmdproxy.c
1 /* Proxy shell designed for use with Emacs on Windows 95 and NT.
2 Copyright (C) 1997, 2001-2011 Free Software Foundation, Inc.
3
4 Accepts subset of Unix sh(1) command-line options, for compatibility
5 with elisp code written for Unix. When possible, executes external
6 programs directly (a common use of /bin/sh by Emacs), otherwise
7 invokes the user-specified command processor to handle built-in shell
8 commands, batch files and interactive mode.
9
10 The main function is simply to process the "-c string" option in the
11 way /bin/sh does, since the standard Windows command shells use the
12 convention that everything after "/c" (the Windows equivalent of
13 "-c") is the input string.
14
15 This file is part of GNU Emacs.
16
17 GNU Emacs is free software: you can redistribute it and/or modify
18 it under the terms of the GNU General Public License as published by
19 the Free Software Foundation, either version 3 of the License, or
20 (at your option) any later version.
21
22 GNU Emacs is distributed in the hope that it will be useful,
23 but WITHOUT ANY WARRANTY; without even the implied warranty of
24 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
25 GNU General Public License for more details.
26
27 You should have received a copy of the GNU General Public License
28 along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. */
29
30 #include <windows.h>
31
32 #include <stdarg.h> /* va_args */
33 #include <malloc.h> /* alloca */
34 #include <stdlib.h> /* getenv */
35 #include <string.h> /* strlen */
36 #include <ctype.h> /* isspace, isalpha */
37
38 /* We don't want to include stdio.h because we are already duplicating
39 lots of it here */
40 extern int _snprintf (char *buffer, size_t count, const char *format, ...);
41
42 /******* Mock C library routines *********************************/
43
44 /* These routines are used primarily to minimize the executable size. */
45
46 #define stdout GetStdHandle (STD_OUTPUT_HANDLE)
47 #define stderr GetStdHandle (STD_ERROR_HANDLE)
48
49 int
50 vfprintf (HANDLE hnd, const char * msg, va_list args)
51 {
52 DWORD bytes_written;
53 char buf[1024];
54
55 wvsprintf (buf, msg, args);
56 return WriteFile (hnd, buf, strlen (buf), &bytes_written, NULL);
57 }
58
59 int
60 fprintf (HANDLE hnd, const char * msg, ...)
61 {
62 va_list args;
63 int rc;
64
65 va_start (args, msg);
66 rc = vfprintf (hnd, msg, args);
67 va_end (args);
68
69 return rc;
70 }
71
72 int
73 printf (const char * msg, ...)
74 {
75 va_list args;
76 int rc;
77
78 va_start (args, msg);
79 rc = vfprintf (stdout, msg, args);
80 va_end (args);
81
82 return rc;
83 }
84
85 void
86 fail (const char * msg, ...)
87 {
88 va_list args;
89
90 va_start (args, msg);
91 vfprintf (stderr, msg, args);
92 va_end (args);
93
94 exit (-1);
95 }
96
97 void
98 warn (const char * msg, ...)
99 {
100 va_list args;
101
102 va_start (args, msg);
103 vfprintf (stderr, msg, args);
104 va_end (args);
105 }
106
107 /******************************************************************/
108
109 char *
110 canon_filename (char *fname)
111 {
112 char *p = fname;
113
114 while (*p)
115 {
116 if (*p == '/')
117 *p = '\\';
118 p++;
119 }
120
121 return fname;
122 }
123
124 const char *
125 skip_space (const char *str)
126 {
127 while (isspace (*str)) str++;
128 return str;
129 }
130
131 const char *
132 skip_nonspace (const char *str)
133 {
134 while (*str && !isspace (*str)) str++;
135 return str;
136 }
137
138 int escape_char = '\\';
139
140 /* Get next token from input, advancing pointer. */
141 int
142 get_next_token (char * buf, const char ** pSrc)
143 {
144 const char * p = *pSrc;
145 char * o = buf;
146
147 p = skip_space (p);
148 if (*p == '"')
149 {
150 int escape_char_run = 0;
151
152 /* Go through src until an ending quote is found, unescaping
153 quotes along the way. If the escape char is not quote, then do
154 special handling of multiple escape chars preceding a quote
155 char (ie. the reverse of what Emacs does to escape quotes). */
156 p++;
157 while (1)
158 {
159 if (p[0] == escape_char && escape_char != '"')
160 {
161 escape_char_run++;
162 p++;
163 continue;
164 }
165 else if (p[0] == '"')
166 {
167 while (escape_char_run > 1)
168 {
169 *o++ = escape_char;
170 escape_char_run -= 2;
171 }
172
173 if (escape_char_run > 0)
174 {
175 /* escaped quote */
176 *o++ = *p++;
177 escape_char_run = 0;
178 }
179 else if (p[1] == escape_char && escape_char == '"')
180 {
181 /* quote escaped by doubling */
182 *o++ = *p;
183 p += 2;
184 }
185 else
186 {
187 /* The ending quote. */
188 *o = '\0';
189 /* Leave input pointer after token. */
190 p++;
191 break;
192 }
193 }
194 else if (p[0] == '\0')
195 {
196 /* End of string, but no ending quote found. We might want to
197 flag this as an error, but for now will consider the end as
198 the end of the token. */
199 *o = '\0';
200 break;
201 }
202 else
203 {
204 *o++ = *p++;
205 }
206 }
207 }
208 else
209 {
210 /* Next token is delimited by whitespace. */
211 const char * p1 = skip_nonspace (p);
212 memcpy (o, p, p1 - p);
213 o += (p1 - p);
214 *o = '\0';
215 p = p1;
216 }
217
218 *pSrc = p;
219
220 return o - buf;
221 }
222
223 /* Search for EXEC file in DIR. If EXEC does not have an extension,
224 DIR is searched for EXEC with the standard extensions appended. */
225 int
226 search_dir (const char *dir, const char *exec, int bufsize, char *buffer)
227 {
228 const char *exts[] = {".bat", ".cmd", ".exe", ".com"};
229 int n_exts = sizeof (exts) / sizeof (char *);
230 char *dummy;
231 int i, rc;
232
233 /* Search the directory for the program. */
234 for (i = 0; i < n_exts; i++)
235 {
236 rc = SearchPath (dir, exec, exts[i], bufsize, buffer, &dummy);
237 if (rc > 0)
238 return rc;
239 }
240
241 return 0;
242 }
243
244 /* Return the absolute name of executable file PROG, including
245 any file extensions. If an absolute name for PROG cannot be found,
246 return NULL. */
247 char *
248 make_absolute (const char *prog)
249 {
250 char absname[MAX_PATH];
251 char dir[MAX_PATH];
252 char curdir[MAX_PATH];
253 char *p, *path;
254 const char *fname;
255
256 /* At least partial absolute path specified; search there. */
257 if ((isalpha (prog[0]) && prog[1] == ':') ||
258 (prog[0] == '\\'))
259 {
260 /* Split the directory from the filename. */
261 fname = strrchr (prog, '\\');
262 if (!fname)
263 /* Only a drive specifier is given. */
264 fname = prog + 2;
265 strncpy (dir, prog, fname - prog);
266 dir[fname - prog] = '\0';
267
268 /* Search the directory for the program. */
269 if (search_dir (dir, prog, MAX_PATH, absname) > 0)
270 return strdup (absname);
271 else
272 return NULL;
273 }
274
275 if (GetCurrentDirectory (MAX_PATH, curdir) <= 0)
276 return NULL;
277
278 /* Relative path; search in current dir. */
279 if (strpbrk (prog, "\\"))
280 {
281 if (search_dir (curdir, prog, MAX_PATH, absname) > 0)
282 return strdup (absname);
283 else
284 return NULL;
285 }
286
287 /* Just filename; search current directory then PATH. */
288 path = alloca (strlen (getenv ("PATH")) + strlen (curdir) + 2);
289 strcpy (path, curdir);
290 strcat (path, ";");
291 strcat (path, getenv ("PATH"));
292
293 while (*path)
294 {
295 /* Get next directory from path. */
296 p = path;
297 while (*p && *p != ';') p++;
298 strncpy (dir, path, p - path);
299 dir[p - path] = '\0';
300
301 /* Search the directory for the program. */
302 if (search_dir (dir, prog, MAX_PATH, absname) > 0)
303 return strdup (absname);
304
305 /* Move to the next directory. */
306 path = p + 1;
307 }
308
309 return NULL;
310 }
311
312 /* Try to decode the given command line the way cmd would do it. On
313 success, return 1 with cmdline dequoted. Otherwise, when we've
314 found constructs only cmd can properly interpret, return 0 and
315 leave cmdline unchanged. */
316 int
317 try_dequote_cmdline (char* cmdline)
318 {
319 /* Dequoting can only subtract characters, so the length of the
320 original command line is a bound on the amount of scratch space
321 we need. This length, in turn, is bounded by the 32k
322 CreateProces limit. */
323 char * old_pos = cmdline;
324 char * new_cmdline = alloca (strlen(cmdline));
325 char * new_pos = new_cmdline;
326 char c;
327
328 enum {
329 NORMAL,
330 AFTER_CARET,
331 INSIDE_QUOTE
332 } state = NORMAL;
333
334 while ((c = *old_pos++))
335 {
336 switch (state)
337 {
338 case NORMAL:
339 switch(c)
340 {
341 case '"':
342 *new_pos++ = c;
343 state = INSIDE_QUOTE;
344 break;
345 case '^':
346 state = AFTER_CARET;
347 break;
348 case '<': case '>':
349 case '&': case '|':
350 case '(': case ')':
351 case '%': case '!':
352 /* We saw an unquoted shell metacharacter and we don't
353 understand it. Bail out. */
354 return 0;
355 default:
356 *new_pos++ = c;
357 break;
358 }
359 break;
360 case AFTER_CARET:
361 *new_pos++ = c;
362 state = NORMAL;
363 break;
364 case INSIDE_QUOTE:
365 *new_pos++ = c;
366 if (c == '"')
367 state = NORMAL;
368
369 break;
370 }
371 }
372
373 /* We were able to dequote the entire string. Copy our scratch
374 buffer on top of the original buffer and return success. */
375 memcpy (cmdline, new_cmdline, new_pos - new_cmdline);
376 cmdline[new_pos - new_cmdline] = '\0';
377 return 1;
378 }
379
380 /*****************************************************************/
381
382 #if 0
383 char ** _argv;
384 int _argc;
385
386 /* Parse commandline into argv array, allowing proper quoting of args. */
387 void
388 setup_argv (void)
389 {
390 char * cmdline = GetCommandLine ();
391 int arg_bytes = 0;
392
393
394 }
395 #endif
396
397 /* Information about child proc is global, to allow for automatic
398 termination when interrupted. At the moment, only one child process
399 can be running at any one time. */
400
401 PROCESS_INFORMATION child;
402 int interactive = TRUE;
403
404 BOOL
405 console_event_handler (DWORD event)
406 {
407 switch (event)
408 {
409 case CTRL_C_EVENT:
410 case CTRL_BREAK_EVENT:
411 if (!interactive)
412 {
413 /* Both command.com and cmd.exe have the annoying behavior of
414 prompting "Terminate batch job (y/n)?" when interrupted
415 while running a batch file, even if running in
416 non-interactive (-c) mode. Try to make up for this
417 deficiency by forcibly terminating the subprocess if
418 running non-interactively. */
419 if (child.hProcess &&
420 WaitForSingleObject (child.hProcess, 500) != WAIT_OBJECT_0)
421 TerminateProcess (child.hProcess, 0);
422 exit (STATUS_CONTROL_C_EXIT);
423 }
424 break;
425
426 #if 0
427 default:
428 /* CLOSE, LOGOFF and SHUTDOWN events - actually we don't get these
429 under Windows 95. */
430 fail ("cmdproxy: received %d event\n", event);
431 if (child.hProcess)
432 TerminateProcess (child.hProcess, 0);
433 #endif
434 }
435 return TRUE;
436 }
437
438 /* Change from normal usage; return value indicates whether spawn
439 succeeded or failed - program return code is returned separately. */
440 int
441 spawn (const char *progname, char *cmdline, const char *dir, int *retcode)
442 {
443 BOOL success = FALSE;
444 SECURITY_ATTRIBUTES sec_attrs;
445 STARTUPINFO start;
446 /* In theory, passing NULL for the environment block to CreateProcess
447 is the same as passing the value of GetEnvironmentStrings, but
448 doing this explicitly seems to cure problems running DOS programs
449 in some cases. */
450 char * envblock = GetEnvironmentStrings ();
451
452 sec_attrs.nLength = sizeof (sec_attrs);
453 sec_attrs.lpSecurityDescriptor = NULL;
454 sec_attrs.bInheritHandle = FALSE;
455
456 memset (&start, 0, sizeof (start));
457 start.cb = sizeof (start);
458
459 if (CreateProcess (progname, cmdline, &sec_attrs, NULL, TRUE,
460 0, envblock, dir, &start, &child))
461 {
462 success = TRUE;
463 /* wait for completion and pass on return code */
464 WaitForSingleObject (child.hProcess, INFINITE);
465 if (retcode)
466 GetExitCodeProcess (child.hProcess, (DWORD *)retcode);
467 CloseHandle (child.hThread);
468 CloseHandle (child.hProcess);
469 child.hProcess = NULL;
470 }
471
472 FreeEnvironmentStrings (envblock);
473
474 return success;
475 }
476
477 /* Return size of current environment block. */
478 int
479 get_env_size (void)
480 {
481 char * start = GetEnvironmentStrings ();
482 char * tmp = start;
483
484 while (tmp[0] || tmp[1])
485 ++tmp;
486 FreeEnvironmentStrings (start);
487 return tmp + 2 - start;
488 }
489
490 /******* Main program ********************************************/
491
492 int
493 main (int argc, char ** argv)
494 {
495 int rc;
496 int need_shell;
497 char * cmdline;
498 char * progname;
499 int envsize;
500 char **pass_through_args;
501 int num_pass_through_args;
502 char modname[MAX_PATH];
503 char path[MAX_PATH];
504 char dir[MAX_PATH];
505
506
507 interactive = TRUE;
508
509 SetConsoleCtrlHandler ((PHANDLER_ROUTINE) console_event_handler, TRUE);
510
511 if (!GetCurrentDirectory (sizeof (dir), dir))
512 fail ("error: GetCurrentDirectory failed\n");
513
514 /* We serve double duty: we can be called either as a proxy for the
515 real shell (that is, because we are defined to be the user shell),
516 or in our role as a helper application for running DOS programs.
517 In the former case, we interpret the command line options as if we
518 were a Unix shell, but in the latter case we simply pass our
519 command line to CreateProcess. We know which case we are dealing
520 with by whether argv[0] refers to ourself or to some other program.
521 (This relies on an arcane feature of CreateProcess, where we can
522 specify cmdproxy as the module to run, but specify a different
523 program in the command line - the MSVC startup code sets argv[0]
524 from the command line.) */
525
526 if (!GetModuleFileName (NULL, modname, sizeof (modname)))
527 fail ("error: GetModuleFileName failed\n");
528
529 /* Change directory to location of .exe so startup directory can be
530 deleted. */
531 progname = strrchr (modname, '\\');
532 *progname = '\0';
533 SetCurrentDirectory (modname);
534 *progname = '\\';
535
536 /* Due to problems with interaction between API functions that use "OEM"
537 codepage vs API functions that use the "ANSI" codepage, we need to
538 make things consistent by choosing one and sticking with it. */
539 SetConsoleCP (GetACP ());
540 SetConsoleOutputCP (GetACP ());
541
542 /* Although Emacs always sets argv[0] to an absolute pathname, we
543 might get run in other ways as well, so convert argv[0] to an
544 absolute name before comparing to the module name. Don't get
545 caught out by mixed short and long names. */
546 GetShortPathName (modname, modname, sizeof (modname));
547 path[0] = '\0';
548 if (!SearchPath (NULL, argv[0], ".exe", sizeof (path), path, &progname)
549 || !GetShortPathName (path, path, sizeof (path))
550 || stricmp (modname, path) != 0)
551 {
552 /* We are being used as a helper to run a DOS app; just pass
553 command line to DOS app without change. */
554 /* TODO: fill in progname. */
555 if (spawn (NULL, GetCommandLine (), dir, &rc))
556 return rc;
557 fail ("Could not run %s\n", GetCommandLine ());
558 }
559
560 /* Process command line. If running interactively (-c or /c not
561 specified) then spawn a real command shell, passing it the command
562 line arguments.
563
564 If not running interactively, then attempt to execute the specified
565 command directly. If necessary, spawn a real shell to execute the
566 command.
567
568 */
569
570 progname = NULL;
571 cmdline = NULL;
572 /* If no args, spawn real shell for interactive use. */
573 need_shell = TRUE;
574 interactive = TRUE;
575 /* Ask command.com to create an environment block with a reasonable
576 amount of free space. */
577 envsize = get_env_size () + 300;
578 pass_through_args = (char **) alloca (argc * sizeof (char *));
579 num_pass_through_args = 0;
580
581 while (--argc > 0)
582 {
583 ++argv;
584 /* Act on switches we recognize (mostly single letter switches,
585 except for -e); all unrecognized switches and extra args are
586 passed on to real shell if used (only really of benefit for
587 interactive use, but allow for batch use as well). Accept / as
588 switch char for compatibility with cmd.exe. */
589 if (((*argv)[0] == '-' || (*argv)[0] == '/') && (*argv)[1] != '\0')
590 {
591 if (((*argv)[1] == 'c' || (*argv)[1] == 'C') && ((*argv)[2] == '\0'))
592 {
593 if (--argc == 0)
594 fail ("error: expecting arg for %s\n", *argv);
595 cmdline = *(++argv);
596 interactive = FALSE;
597 }
598 else if (((*argv)[1] == 'i' || (*argv)[1] == 'I') && ((*argv)[2] == '\0'))
599 {
600 if (cmdline)
601 warn ("warning: %s ignored because of -c\n", *argv);
602 }
603 else if (((*argv)[1] == 'e' || (*argv)[1] == 'E') && ((*argv)[2] == ':'))
604 {
605 int requested_envsize = atoi (*argv + 3);
606 /* Enforce a reasonable minimum size, as above. */
607 if (requested_envsize > envsize)
608 envsize = requested_envsize;
609 /* For sanity, enforce a reasonable maximum. */
610 if (envsize > 32768)
611 envsize = 32768;
612 }
613 else
614 {
615 /* warn ("warning: unknown option %s ignored", *argv); */
616 pass_through_args[num_pass_through_args++] = *argv;
617 }
618 }
619 else
620 break;
621 }
622
623 #if 0
624 /* I think this is probably not useful - cmd.exe ignores extra
625 (non-switch) args in interactive mode, and they cannot be passed on
626 when -c was given. */
627
628 /* Collect any remaining args after (initial) switches. */
629 while (argc-- > 0)
630 {
631 pass_through_args[num_pass_through_args++] = *argv++;
632 }
633 #else
634 /* Probably a mistake for there to be extra args; not fatal. */
635 if (argc > 0)
636 warn ("warning: extra args ignored after '%s'\n", argv[-1]);
637 #endif
638
639 pass_through_args[num_pass_through_args] = NULL;
640
641 /* If -c option, determine if we must spawn a real shell, or if we can
642 execute the command directly ourself. */
643 if (cmdline)
644 {
645 const char *args;
646
647 /* The program name is the first token of cmdline. Since
648 filenames cannot legally contain embedded quotes, the value
649 of escape_char doesn't matter. */
650 args = cmdline;
651 if (!get_next_token (path, &args))
652 fail ("error: no program name specified.\n");
653
654 canon_filename (path);
655 progname = make_absolute (path);
656
657 /* If we found the program and the rest of the command line does
658 not contain unquoted shell metacharacters, run the program
659 directly (if not found it might be an internal shell command,
660 so don't fail). */
661 if (progname != NULL && try_dequote_cmdline (cmdline))
662 need_shell = FALSE;
663 else
664 progname = NULL;
665 }
666
667 pass_to_shell:
668 if (need_shell)
669 {
670 char * p;
671 int extra_arg_space = 0;
672 int maxlen, remlen;
673 int run_command_dot_com;
674
675 progname = getenv ("COMSPEC");
676 if (!progname)
677 fail ("error: COMSPEC is not set\n");
678
679 canon_filename (progname);
680 progname = make_absolute (progname);
681
682 if (progname == NULL || strchr (progname, '\\') == NULL)
683 fail ("error: the program %s could not be found.\n", getenv ("COMSPEC"));
684
685 /* Need to set environment size when running command.com. */
686 run_command_dot_com =
687 (stricmp (strrchr (progname, '\\'), "command.com") == 0);
688
689 /* Work out how much extra space is required for
690 pass_through_args. */
691 for (argv = pass_through_args; *argv != NULL; ++argv)
692 /* We don't expect to have to quote switches. */
693 extra_arg_space += strlen (*argv) + 2;
694
695 if (cmdline)
696 {
697 char * buf;
698
699 /* Convert to syntax expected by cmd.exe/command.com for
700 running non-interactively. Always quote program name in
701 case path contains spaces (fortunately it can't contain
702 quotes, since they are illegal in path names). */
703
704 remlen = maxlen =
705 strlen (progname) + extra_arg_space + strlen (cmdline) + 16;
706 buf = p = alloca (maxlen + 1);
707
708 /* Quote progname in case it contains spaces. */
709 p += _snprintf (p, remlen, "\"%s\"", progname);
710 remlen = maxlen - (p - buf);
711
712 /* Include pass_through_args verbatim; these are just switches
713 so should not need quoting. */
714 for (argv = pass_through_args; *argv != NULL; ++argv)
715 {
716 p += _snprintf (p, remlen, " %s", *argv);
717 remlen = maxlen - (p - buf);
718 }
719
720 if (run_command_dot_com)
721 _snprintf (p, remlen, " /e:%d /c %s", envsize, cmdline);
722 else
723 _snprintf (p, remlen, " /c %s", cmdline);
724 cmdline = buf;
725 }
726 else
727 {
728 if (run_command_dot_com)
729 {
730 /* Provide dir arg expected by command.com when first
731 started interactively (the "command search path"). To
732 avoid potential problems with spaces in command dir
733 (which cannot be quoted - command.com doesn't like it),
734 we always use the 8.3 form. */
735 GetShortPathName (progname, path, sizeof (path));
736 p = strrchr (path, '\\');
737 /* Trailing slash is acceptable, so always leave it. */
738 *(++p) = '\0';
739 }
740 else
741 path[0] = '\0';
742
743 remlen = maxlen =
744 strlen (progname) + extra_arg_space + strlen (path) + 13;
745 cmdline = p = alloca (maxlen + 1);
746
747 /* Quote progname in case it contains spaces. */
748 p += _snprintf (p, remlen, "\"%s\" %s", progname, path);
749 remlen = maxlen - (p - cmdline);
750
751 /* Include pass_through_args verbatim; these are just switches
752 so should not need quoting. */
753 for (argv = pass_through_args; *argv != NULL; ++argv)
754 {
755 p += _snprintf (p, remlen, " %s", *argv);
756 remlen = maxlen - (p - cmdline);
757 }
758
759 if (run_command_dot_com)
760 _snprintf (p, remlen, " /e:%d", envsize);
761 }
762 }
763
764 if (!progname)
765 fail ("Internal error: program name not defined\n");
766
767 if (!cmdline)
768 cmdline = progname;
769
770 if (spawn (progname, cmdline, dir, &rc))
771 return rc;
772
773 if (!need_shell)
774 {
775 need_shell = TRUE;
776 goto pass_to_shell;
777 }
778
779 fail ("Could not run %s\n", progname);
780
781 return 0;
782 }
783