separate out cmdline stuff

This commit is contained in:
bumbread 2022-08-05 15:41:01 +11:00
parent c93371977a
commit 7c38794787
3 changed files with 128 additions and 125 deletions

125
src/_win/cmdline.c Normal file
View File

@ -0,0 +1,125 @@
#define CMDLINE_CMD_MAX 32767
#define CMDLINE_ARGV_MAX (16384+(98298+(int)sizeof(char*))/(int)sizeof(char*))
// https://github.com/skeeto/scratch/blob/master/misc/cmdline.c#L27
static int cmdline_to_argv8(const wchar_t *cmd, char **argv) {
int argc = 1; // worst case: argv[0] is an empty string
int state = 6; // special argv[0] state
int slash = 0;
char *buf = (char *)(argv + 16384); // second half: byte buffer
argv[0] = buf;
while (*cmd) {
int c = *cmd++;
if (c>>10 == 0x36 && *cmd>>10 == 0x37) { // surrogates?
c = 0x10000 + ((c - 0xd800)<<10) + (*cmd++ - 0xdc00);
}
switch (state) {
case 0: switch (c) { // outside token
case 0x09:
case 0x20: continue;
case 0x22: argv[argc++] = buf;
state = 2;
continue;
case 0x5c: argv[argc++] = buf;
slash = 1;
state = 3;
break;
default : argv[argc++] = buf;
state = 1;
} break;
case 1: switch (c) { // inside unquoted token
case 0x09:
case 0x20: *buf++ = 0;
state = 0;
continue;
case 0x22: state = 2;
continue;
case 0x5c: slash = 1;
state = 3;
break;
} break;
case 2: switch (c) { // inside quoted token
case 0x22: state = 5;
continue;
case 0x5c: slash = 1;
state = 4;
break;
} break;
case 3:
case 4: switch (c) { // backslash sequence
case 0x22: buf -= (1 + slash) >> 1;
if (slash & 1) {
state -= 2;
break;
} // fallthrough
default : cmd--;
state -= 2;
continue;
case 0x5c: slash++;
} break;
case 5: switch (c) { // quoted token exit
default : cmd--;
state = 1;
continue;
case 0x22: state = 1;
} break;
case 6: switch (c) { // begin argv[0]
case 0x09:
case 0x20: *buf++ = 0;
state = 0;
continue;
case 0x22: state = 8;
continue;
default : state = 7;
} break;
case 7: switch (c) { // unquoted argv[0]
case 0x09:
case 0x20: *buf++ = 0;
state = 0;
continue;
} break;
case 8: switch (c) { // quoted argv[0]
case 0x22: *buf++ = 0;
state = 0;
continue;
} break;
}
switch (c & 0x1f0880) { // WTF-8/UTF-8 encoding
case 0x00000: *buf++ = 0x00 | ((c >> 0) ); break;
case 0x00080: *buf++ = 0xc0 | ((c >> 6) );
*buf++ = 0x80 | ((c >> 0) & 63); break;
case 0x00800:
case 0x00880: *buf++ = 0xe0 | ((c >> 12) );
*buf++ = 0x80 | ((c >> 6) & 63);
*buf++ = 0x80 | ((c >> 0) & 63); break;
default : *buf++ = 0xf0 | ((c >> 18) );
*buf++ = 0x80 | ((c >> 12) & 63);
*buf++ = 0x80 | ((c >> 6) & 63);
*buf++ = 0x80 | ((c >> 0) & 63);
}
}
*buf = 0;
argv[argc] = 0;
return argc;
}
static wchar_t *get_wcmdline() {
// That's right, that's where windows hid the command line
TEB *teb = (TEB *)__readgsqword(offsetof(NT_TIB, Self));
PEB *peb = teb->ProcessEnvironmentBlock;
RTL_USER_PROCESS_PARAMETERS *params = peb->ProcessParameters;
UNICODE_STRING command_line_str = params->CommandLine;
return command_line_str.Buffer;
}
static char **get_command_args(int *argc_ptr) {
static char *argv_buffer[CMDLINE_ARGV_MAX];
wchar_t *cmdline = get_wcmdline();
*argc_ptr = cmdline_to_argv8(cmdline, argv_buffer);
return argv_buffer;
}

View File

@ -2,10 +2,8 @@
// Windows symbols because windows // Windows symbols because windows
int _fltused=0; int _fltused=0;
#define CMDLINE_CMD_MAX 32767
#define CMDLINE_ARGV_MAX (16384+(98298+(int)sizeof(char*))/(int)sizeof(char*))
extern int main(int argc, char** argv); extern int main(int argc, char** argv);
extern int wmain(int argc, wchar_t** argv, wchar_t **envp);
#pragma comment(lib, "kernel32.lib") #pragma comment(lib, "kernel32.lib")
#pragma comment(lib, "DbgHelp.lib") #pragma comment(lib, "DbgHelp.lib")
@ -20,7 +18,7 @@ static int atqexit_func_count;
static char **get_command_args(int *argc_ptr); static char **get_command_args(int *argc_ptr);
void mainCRTStartup() { _Noreturn void mainCRTStartup() {
// Set-up some platform stuff // Set-up some platform stuff
_setup_eh(); _setup_eh();
_setup_heap(); _setup_heap();
@ -41,7 +39,6 @@ void mainCRTStartup() {
exit(exit_code); exit(exit_code);
} }
_Noreturn void _Exit(int status) { _Noreturn void _Exit(int status) {
ExitProcess(status); ExitProcess(status);
#if defined(_MSC_VER) #if defined(_MSC_VER)
@ -164,123 +161,3 @@ int _wcsicmp(wchar_t const* s1, wchar_t const* s2) {
return diff; return diff;
} }
static int cmdline_to_argv8(const wchar_t *cmd, char **argv);
static char **get_command_args(int *argc_ptr) {
static char *argv_buffer[CMDLINE_ARGV_MAX];
// That's right, that's where windows hid the command line
TEB *teb = (TEB *)__readgsqword(offsetof(NT_TIB, Self));
PEB *peb = teb->ProcessEnvironmentBlock;
RTL_USER_PROCESS_PARAMETERS *params = peb->ProcessParameters;
UNICODE_STRING command_line_str = params->CommandLine;
wchar_t *cmdline = command_line_str.Buffer;
// Now we can do the actual job
*argc_ptr = cmdline_to_argv8(cmdline, argv_buffer);
return argv_buffer;
}
// https://github.com/skeeto/scratch/blob/master/misc/cmdline.c#L27
static int cmdline_to_argv8(const wchar_t *cmd, char **argv) {
int argc = 1; // worst case: argv[0] is an empty string
int state = 6; // special argv[0] state
int slash = 0;
char *buf = (char *)(argv + 16384); // second half: byte buffer
argv[0] = buf;
while (*cmd) {
int c = *cmd++;
if (c>>10 == 0x36 && *cmd>>10 == 0x37) { // surrogates?
c = 0x10000 + ((c - 0xd800)<<10) + (*cmd++ - 0xdc00);
}
switch (state) {
case 0: switch (c) { // outside token
case 0x09:
case 0x20: continue;
case 0x22: argv[argc++] = buf;
state = 2;
continue;
case 0x5c: argv[argc++] = buf;
slash = 1;
state = 3;
break;
default : argv[argc++] = buf;
state = 1;
} break;
case 1: switch (c) { // inside unquoted token
case 0x09:
case 0x20: *buf++ = 0;
state = 0;
continue;
case 0x22: state = 2;
continue;
case 0x5c: slash = 1;
state = 3;
break;
} break;
case 2: switch (c) { // inside quoted token
case 0x22: state = 5;
continue;
case 0x5c: slash = 1;
state = 4;
break;
} break;
case 3:
case 4: switch (c) { // backslash sequence
case 0x22: buf -= (1 + slash) >> 1;
if (slash & 1) {
state -= 2;
break;
} // fallthrough
default : cmd--;
state -= 2;
continue;
case 0x5c: slash++;
} break;
case 5: switch (c) { // quoted token exit
default : cmd--;
state = 1;
continue;
case 0x22: state = 1;
} break;
case 6: switch (c) { // begin argv[0]
case 0x09:
case 0x20: *buf++ = 0;
state = 0;
continue;
case 0x22: state = 8;
continue;
default : state = 7;
} break;
case 7: switch (c) { // unquoted argv[0]
case 0x09:
case 0x20: *buf++ = 0;
state = 0;
continue;
} break;
case 8: switch (c) { // quoted argv[0]
case 0x22: *buf++ = 0;
state = 0;
continue;
} break;
}
switch (c & 0x1f0880) { // WTF-8/UTF-8 encoding
case 0x00000: *buf++ = 0x00 | ((c >> 0) ); break;
case 0x00080: *buf++ = 0xc0 | ((c >> 6) );
*buf++ = 0x80 | ((c >> 0) & 63); break;
case 0x00800:
case 0x00880: *buf++ = 0xe0 | ((c >> 12) );
*buf++ = 0x80 | ((c >> 6) & 63);
*buf++ = 0x80 | ((c >> 0) & 63); break;
default : *buf++ = 0xf0 | ((c >> 18) );
*buf++ = 0x80 | ((c >> 12) & 63);
*buf++ = 0x80 | ((c >> 6) & 63);
*buf++ = 0x80 | ((c >> 0) & 63);
}
}
*buf = 0;
argv[argc] = 0;
return argc;
}

View File

@ -65,6 +65,7 @@
#if defined(CIABATTA_WIN) #if defined(CIABATTA_WIN)
#include "_win/win.h" #include "_win/win.h"
#include "_win/assert.c" #include "_win/assert.c"
#include "_win/cmdline.c"
#include "_win/environment.c" #include "_win/environment.c"
#include "_win/heap.c" #include "_win/heap.c"
#include "_win/signal.c" #include "_win/signal.c"