From 603a06ec3955df869c9fb6bdf4e63d4318a3f051 Mon Sep 17 00:00:00 2001 From: TriMill Date: Fri, 17 Feb 2023 13:55:12 -0500 Subject: [PATCH] initial commit --- .gitignore | 2 + Makefile | 13 ++ commands | 333 ++++++++++++++++++++++++++++++++++++++++++++++++++ src/eval.c | 63 ++++++++++ src/eval.h | 5 + src/main.c | 34 ++++++ src/parser.c | 138 +++++++++++++++++++++ src/parser.h | 53 ++++++++ src/scanner.c | 196 +++++++++++++++++++++++++++++ src/scanner.h | 33 +++++ src/trie.h | 3 + triegen.py | 74 +++++++++++ 12 files changed, 947 insertions(+) create mode 100644 .gitignore create mode 100644 Makefile create mode 100644 commands create mode 100644 src/eval.c create mode 100644 src/eval.h create mode 100644 src/main.c create mode 100644 src/parser.c create mode 100644 src/parser.h create mode 100644 src/scanner.c create mode 100644 src/scanner.h create mode 100644 src/trie.h create mode 100755 triegen.py diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..0957a71 --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +bin +src/trie.c diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..4fc1b6c --- /dev/null +++ b/Makefile @@ -0,0 +1,13 @@ +make: trie $(wildcard src/*.c) + mkdir -p bin + gcc src/*.c -Wall -Wextra -pedantic -ggdb -o bin/sysh + +trie: triegen.py commands + python triegen.py commands src/trie.c + +clean: + rm -f src/trie.c + rm -rf bin + +run: make + ./bin/sysh diff --git a/commands b/commands new file mode 100644 index 0000000..d8b4e31 --- /dev/null +++ b/commands @@ -0,0 +1,333 @@ +read 0 +write 1 +open 2 +close 3 +stat 4 +fstat 5 +lstat 6 +poll 7 +lseek 8 +mmap 9 +mprotect 10 +munmap 11 +brk 12 +rt_sigaction 13 +rt_sigprocmask 14 +rt_sigreturn 15 +ioctl 16 +pread64 17 +pwrite64 18 +readv 19 +writev 20 +access 21 +pipe 22 +select 23 +sched_yield 24 +mremap 25 +msync 26 +mincore 27 +madvise 28 +shmget 29 +shmat 30 +shmctl 31 +dup 32 +dup2 33 +pause 34 +nanosleep 35 +getitimer 36 +alarm 37 +setitimer 38 +getpid 39 +sendfile 40 +socket 41 +connect 42 +accept 43 +sendto 44 +recvfrom 45 +sendmsg 46 +recvmsg 47 +shutdown 48 +bind 49 +listen 50 +getsockname 51 +getpeername 52 +socketpair 53 +setsockopt 54 +getsockopt 55 +clone 56 +fork 57 +vfork 58 +execve 59 +exit 60 +wait4 61 +kill 62 +uname 63 +semget 64 +semop 65 +semctl 66 +shmdt 67 +msgget 68 +msgsnd 69 +msgrcv 70 +msgctl 71 +fcntl 72 +flock 73 +fsync 74 +fdatasync 75 +truncate 76 +ftruncate 77 +getdents 78 +getcwd 79 +chdir 80 +fchdir 81 +rename 82 +mkdir 83 +rmdir 84 +creat 85 +link 86 +unlink 87 +symlink 88 +readlink 89 +chmod 90 +fchmod 91 +chown 92 +fchown 93 +lchown 94 +umask 95 +gettimeofday 96 +getrlimit 97 +getrusage 98 +sysinfo 99 +times 100 +ptrace 101 +getuid 102 +syslog 103 +getgid 104 +setuid 105 +setgid 106 +geteuid 107 +getegid 108 +setpgid 109 +getppid 110 +getpgrp 111 +setsid 112 +setreuid 113 +setregid 114 +getgroups 115 +setgroups 116 +setresuid 117 +getresuid 118 +setresgid 119 +getresgid 120 +getpgid 121 +setfsuid 122 +setfsgid 123 +getsid 124 +capget 125 +capset 126 +rt_sigpending 127 +rt_sigtimedwait 128 +rt_sigqueueinfo 129 +rt_sigsuspend 130 +sigaltstack 131 +utime 132 +mknod 133 +uselib 134 +personality 135 +ustat 136 +statfs 137 +fstatfs 138 +sysfs 139 +getpriority 140 +setpriority 141 +sched_setparam 142 +sched_getparam 143 +sched_setscheduler 144 +sched_getscheduler 145 +sched_get_priority_max 146 +sched_get_priority_min 147 +sched_rr_get_interval 148 +mlock 149 +munlock 150 +mlockall 151 +munlockall 152 +vhangup 153 +modify_ldt 154 +pivot_root 155 +_sysctl 156 +prctl 157 +arch_prctl 158 +adjtimex 159 +setrlimit 160 +chroot 161 +sync 162 +acct 163 +settimeofday 164 +mount 165 +umount2 166 +swapon 167 +swapoff 168 +reboot 169 +sethostname 170 +setdomainname 171 +iopl 172 +ioperm 173 +create_module 174 +init_module 175 +delete_module 176 +get_kernel_syms 177 +query_module 178 +quotactl 179 +nfsservctl 180 +getpmsg 181 +putpmsg 182 +afs_syscall 183 +tuxcall 184 +security 185 +gettid 186 +readahead 187 +setxattr 188 +lsetxattr 189 +fsetxattr 190 +getxattr 191 +lgetxattr 192 +fgetxattr 193 +listxattr 194 +llistxattr 195 +flistxattr 196 +removexattr 197 +lremovexattr 198 +fremovexattr 199 +tkill 200 +time 201 +futex 202 +sched_setaffinity 203 +sched_getaffinity 204 +set_thread_area 205 +io_setup 206 +io_destroy 207 +io_getevents 208 +io_submit 209 +io_cancel 210 +get_thread_area 211 +lookup_dcookie 212 +epoll_create 213 +epoll_ctl_old 214 +epoll_wait_old 215 +remap_file_pages 216 +getdents64 217 +set_tid_address 218 +restart_syscall 219 +semtimedop 220 +fadvise64 221 +timer_create 222 +timer_settime 223 +timer_gettime 224 +timer_getoverrun 225 +timer_delete 226 +clock_settime 227 +clock_gettime 228 +clock_getres 229 +clock_nanosleep 230 +exit_group 231 +epoll_wait 232 +epoll_ctl 233 +tgkill 234 +utimes 235 +vserver 236 +mbind 237 +set_mempolicy 238 +get_mempolicy 239 +mq_open 240 +mq_unlink 241 +mq_timedsend 242 +mq_timedreceive 243 +mq_notify 244 +mq_getsetattr 245 +kexec_load 246 +waitid 247 +add_key 248 +request_key 249 +keyctl 250 +ioprio_set 251 +ioprio_get 252 +inotify_init 253 +inotify_add_watch 254 +inotify_rm_watch 255 +migrate_pages 256 +openat 257 +mkdirat 258 +mknodat 259 +fchownat 260 +futimesat 261 +newfstatat 262 +unlinkat 263 +renameat 264 +linkat 265 +symlinkat 266 +readlinkat 267 +fchmodat 268 +faccessat 269 +pselect6 270 +ppoll 271 +unshare 272 +set_robust_list 273 +get_robust_list 274 +splice 275 +tee 276 +sync_file_range 277 +vmsplice 278 +move_pages 279 +utimensat 280 +epoll_pwait 281 +signalfd 282 +timerfd_create 283 +eventfd 284 +fallocate 285 +timerfd_settime 286 +timerfd_gettime 287 +accept4 288 +signalfd4 289 +eventfd2 290 +epoll_create1 291 +dup3 292 +pipe2 293 +inotify_init1 294 +preadv 295 +pwritev 296 +rt_tgsigqueueinfo 297 +perf_event_open 298 +recvmmsg 299 +fanotify_init 300 +fanotify_mark 301 +prlimit64 302 +name_to_handle_at 303 +open_by_handle_at 304 +clock_adjtime 305 +syncfs 306 +sendmmsg 307 +setns 308 +getcpu 309 +process_vm_readv 310 +process_vm_writev 311 +kcmp 312 +finit_module 313 +sched_setattr 314 +sched_getattr 315 +renameat2 316 +seccomp 317 +getrandom 318 +memfd_create 319 +kexec_file_load 320 +bpf 321 +execveat 322 +userfaultfd 323 +membarrier 324 +mlock2 325 +copy_file_range 326 +preadv2 327 +pwritev2 328 +pkey_mprotect 329 +pkey_alloc 330 +pkey_free 331 +statx 332 diff --git a/src/eval.c b/src/eval.c new file mode 100644 index 0000000..4fc3e73 --- /dev/null +++ b/src/eval.c @@ -0,0 +1,63 @@ +#include +#include +#include +#include +#include + +#include "eval.h" +#include "parser.h" +#include "scanner.h" + +static long eval_syscall(Line* line) { + if(line->len > 6) { + fprintf(stderr, "sysh: too many args for syscall\n"); + return 0; + } + long args[6] = {0,0,0,0,0,0}; + bool cloned[6] = {0,0,0,0,0,0}; + for(int i = 0; i < line->len; i++) { + Argument arg = line->args[i]; + if(arg.type == ARG_NUM) { + args[i] = arg.as.num; + } else if(arg.type == ARG_STR) { + int len = strlen(arg.as.str); + char* buf = malloc(len + 1); + strcpy(buf, arg.as.str); + args[i] = (long)buf; + cloned[i] = true; + } else { + for(int j = 0; j < line->len; j++) { + if(cloned[j]) free((void*)args[j]); + } + fprintf(stderr, "sysh: invalid arg type\n"); + return 0; + } + } + long result = syscall(line->id, args[0], args[1], args[2], args[3], args[4], args[5]); + for(int i = 0; i < line->len; i++) { + if(cloned[i]) free((void*)args[i]); + } + return result; +} + +static long eval_line(Line* line) { + if(line->id >= 0) { + return eval_syscall(line); + } else { + // TODO + return 0; + } +} + + +long eval_block(Block* block) { + long result = 0; + for(int i = 0; i < block->len; i++) { + result = eval_line(&block->lines[i]); + if(errno > 0) { + fprintf(stderr, "E%d: %s\n", errno, strerror(errno)); + } + break; + } + return result; +} diff --git a/src/eval.h b/src/eval.h new file mode 100644 index 0000000..ea2bfa1 --- /dev/null +++ b/src/eval.h @@ -0,0 +1,5 @@ +#pragma once + +#include "parser.h" + +long eval_block(Block* block); diff --git a/src/main.c b/src/main.c new file mode 100644 index 0000000..d93a12b --- /dev/null +++ b/src/main.c @@ -0,0 +1,34 @@ +#include +#include +#include +#include + +#include "eval.h" +#include "parser.h" +#include "scanner.h" + +#define LINE_LEN 1024 +#define PROMPT "[%ld]sysh$ " +#define EPROMPT "[E]sysh$ " + +void repl() { + char buf[LINE_LEN]; + printf(PROMPT, 0L); + while(fgets(buf, LINE_LEN, stdin)) { + Scanner sc = init_scanner(buf); + BlockResult br = parse(&sc); + if(!br.is_ok) { + printf("sysh: %s\n", br.as.err); + printf(EPROMPT); + } else if(br.as.ok.len > 0) { + long result = eval_block(&br.as.ok); + printf(PROMPT, result); + block_free(&br.as.ok); + } + } +} + +int main(void) { + repl(); + return 0; +} diff --git a/src/parser.c b/src/parser.c new file mode 100644 index 0000000..88f7527 --- /dev/null +++ b/src/parser.c @@ -0,0 +1,138 @@ +#include +#include +#include + +#include "parser.h" +#include "scanner.h" +#include "trie.h" + +void block_init(Block* b) { + b->len = 0; + b->capacity = 0; + b->lines = NULL; +} + +void block_add(Block* b, Line l) { + if(b->capacity <= b->len) { + int new_capacity = (b->capacity == 0 ? 8 : 2*(b->capacity)); + b->lines = realloc(b->lines, new_capacity * sizeof(Line)); + b->capacity = new_capacity; + } + b->lines[b->len] = l; + b->len++; +} + +void block_free(Block* b) { + for(int i = 0; i < b->len; i++) { + line_free(&b->lines[i]); + } + free(b->lines); + block_init(b); +} + +void line_init(Line* l, long id) { + l->id = id; + l->len = 0; + l->capacity = 0; + l->args = NULL; +} + +void line_add(Line* l, Argument arg) { + if(l->capacity <= l->len) { + int new_capacity = (l->capacity == 0 ? 8 : 2*(l->capacity)); + l->args = realloc(l->args, new_capacity * sizeof(Argument)); + l->capacity = new_capacity; + } + l->args[l->len] = arg; + l->len++; +} + +void line_free(Line* line) { + for(int i = 0; i < line->len; i++) { + switch(line->args[i].type) { + case ARG_BLOCK: + block_free(&line->args[i].as.block); + break; + case ARG_STR: + case ARG_CMD: + case ARG_VAR: + free((char*)line->args[i].as.str); + break; + case ARG_NUM: + break; + } + } + free(line->args); +} + + +static LineResult parse_line(Scanner* sc, int id) { + Line line; + line_init(&line, id); + while(true) { + Token tok = scanner_next(sc); + switch(tok.type) { + case TOK_EOF: + case TOK_EOL: + return OK(line, LineResult); + case TOK_ERR: + line_free(&line); + return ERR(tok.as.str, LineResult); + case TOK_INT: + line_add(&line, (Argument){.type = ARG_NUM, .as.num = tok.as.num}); + break; + case TOK_VAR: + line_add(&line, (Argument){.type = ARG_VAR, .as.str = tok.as.str}); + break; + case TOK_STR: + line_add(&line, (Argument){.type = ARG_STR, .as.str = tok.as.str}); + break; + case TOK_CMD: + line_add(&line, (Argument){.type = ARG_CMD, .as.str = tok.as.str}); + break; + default: + line_free(&line); + return ERR("unexpected token", LineResult); + } + } +} + +static BlockResult parse_block(Scanner* sc, bool braced) { + Block block; + block_init(&block); + while(true) { + Token tok = scanner_next(sc); + if((!braced && tok.type == TOK_EOF) || (braced && tok.type == TOK_RBRACE)) { + return OK(block, BlockResult); + } + switch(tok.type) { + case TOK_ERR: + block_free(&block); + return ERR(tok.as.str, BlockResult); + case TOK_EOL: + continue; + case TOK_CMD: { + long id = trie_get(tok.as.str); + token_free(&tok); + if(id == -1) { + block_free(&block); + return ERR("invalid syscall or command name", BlockResult); + } + LineResult sr = parse_line(sc, id); + if(!sr.is_ok) { + block_free(&block); + return ERR(sr.as.err, BlockResult); + } + block_add(&block, sr.as.ok); + } break; + default: + block_free(&block); + return ERR("unexpected token", BlockResult); + } + } + return OK(block, BlockResult); +} + +BlockResult parse(Scanner* sc) { + return parse_block(sc, false); +} diff --git a/src/parser.h b/src/parser.h new file mode 100644 index 0000000..ed4db05 --- /dev/null +++ b/src/parser.h @@ -0,0 +1,53 @@ +#pragma once + +#include +#include "scanner.h" + +#define RESULT(T, E) struct { bool is_ok; union { T ok; E err; } as; } +#define OK(val, R) (R){.is_ok = 1, .as.ok = (val) } +#define ERR(val, R) (R){.is_ok = 0, .as.err = (val) } + +typedef struct Argument_s Argument; + +typedef struct { + long id; + int len; + int capacity; + Argument* args; +} Line; + +typedef struct { + int len; + int capacity; + Line* lines; +} Block; + +typedef enum { + ARG_BLOCK, + ARG_STR, + ARG_NUM, + ARG_VAR, + ARG_CMD, +} ArgType; + +struct Argument_s { + ArgType type; + union { + Block block; + const char* str; + long num; + } as; +}; + +typedef RESULT(Block, const char*) BlockResult; +typedef RESULT(Line, const char*) LineResult; + +void block_init(Block* b); +void block_add(Block* b, Line l); +void block_free(Block* b); + +void line_init(Line* l, long id); +void line_add(Line* l, Argument a); +void line_free(Line* l); + +BlockResult parse(Scanner* sc); diff --git a/src/scanner.c b/src/scanner.c new file mode 100644 index 0000000..a1cb077 --- /dev/null +++ b/src/scanner.c @@ -0,0 +1,196 @@ +#include +#include +#include +#include "scanner.h" + +// Based heavily on the scanner implementation +// from Crafting Interpreters by Robert Nystrom + +Scanner init_scanner(char *src) { + return (Scanner){.start=src, .current=src, .eof=(*src == '\0')}; +} + +void token_free(Token* tok) { + if(tok->type == TOK_STR || tok->type == TOK_CMD || tok->type == TOK_VAR) { + free((char*)(tok->as.str)); + } +} + +static char peek(const Scanner* sc) { + return *sc->current; +} + +static char next(Scanner* sc) { + if(*sc->current == '\0') { + sc->eof = true; + return '\0'; + } + char c = *sc->current; + sc->current++; + return c; +} + +static bool is_alnum(char c) { + return (c >= 'a' && c <= 'z') + || (c >= 'A' && c <= 'Z') + || (c >= '0' && c <= '9') || c == '_'; +} + +static bool is_digit(char c) { + return c >= '0' && c <= '9'; +} + +static Token err_token(const char* msg) { + Token token = { + .type = TOK_ERR, + .as.str = msg, + }; + return token; +} + +static void skip_ws(Scanner* sc) { + while(true) { + char c = peek(sc); + switch(c) { + case ' ': + case '\t': + next(sc); + break; + case '#': + while(peek(sc) != '\n' && peek(sc) != '\0') { + next(sc); + } + return; + default: + return; + } + } +} + +static Token scan_string(Scanner* sc) { + while(peek(sc) != '\0' && peek(sc) != '\'') next(sc); + if(peek(sc) == '\0') return err_token("EOF while scanning raw string"); + next(sc); + + int len = sc->current - sc->start - 2; + char* buf = malloc((len + 1) * sizeof(char)); + memcpy(buf, sc->start + 1, len); + buf[len] = '\0'; + return (Token){ + .type = TOK_STR, + .as.str = buf, + }; +} + +static char* add_char(char* buf, int* len, int* capacity, char new) { + if(*len == *capacity) { + int new_capacity = (*capacity == 0 ? 8 : 2*(*capacity)); + buf = realloc(buf, new_capacity); + *capacity = new_capacity; + } + buf[*len] = new; + (*len)++; + return buf; +} + +static Token scan_escape_string(Scanner* sc) { + char* buf = NULL; + int len = 0; + int capacity = 0; + char c; + while(true) { + c = next(sc); + if(c == '"') break; + if(c == '\0') { + free(buf); + return err_token("EOF while scanning double-quoted string"); + } + if(c == '\\') { + switch(next(sc)) { + case '\\': buf = add_char(buf, &len, &capacity, '\\'); break; + case '"': buf = add_char(buf, &len, &capacity, '"'); break; + case 'n': buf = add_char(buf, &len, &capacity, '\n'); break; + case 'r': buf = add_char(buf, &len, &capacity, '\r'); break; + case 't': buf = add_char(buf, &len, &capacity, '\t'); break; + case '0': buf = add_char(buf, &len, &capacity, '\0'); break; + default: { + free(buf); + return err_token("unknown escape sequence"); + } + } + } else { + buf = add_char(buf, &len, &capacity, c); + } + } + + buf = add_char(buf, &len, &capacity, '\0'); + buf = realloc(buf, len); + + return (Token){ + .type = TOK_STR, + .as.str = buf, + }; +} + +static Token scan_var(Scanner* sc) { + while(is_alnum(peek(sc))) next(sc); + + int len = sc->current - sc->start - 1; + char* buf = malloc((len + 1) * sizeof(char)); + memcpy(buf, sc->start + 1, len); + buf[len] = '\0'; + return (Token){ + .type = TOK_VAR, + .as.str = buf, + }; +} + +static Token scan_cmd(Scanner* sc) { + while(is_alnum(peek(sc))) next(sc); + + int len = sc->current - sc->start; + char* buf = malloc((len + 1) * sizeof(char)); + memcpy(buf, sc->start, len); + buf[len] = '\0'; + return (Token){ + .type = TOK_CMD, + .as.str = buf, + }; +} + +static Token scan_num(Scanner* sc) { + while(is_digit(peek(sc))) next(sc); + // TODO base + int len = sc->current - sc->start; + char buf[len+1]; + memcpy(buf, sc->start, len); + buf[len] = '\0'; + long num = strtol(buf, NULL, 10); + return (Token){ + .type = TOK_INT, + .as.num = num, + }; +} + +Token scanner_next(Scanner* sc) { + skip_ws(sc); + sc->start = sc->current; + char c = next(sc); + if(c == '-' || is_digit(c)) { + return scan_num(sc); + } + if(c == '.' || is_alnum(c)) { + return scan_cmd(sc); + } + switch(c) { + case '\0': return (Token){.type = TOK_EOF}; + case '\n': + case ';': return (Token){.type = TOK_EOL}; + case '{': return (Token){.type = TOK_LBRACE}; + case '}': return (Token){.type = TOK_RBRACE}; + case '$': return scan_var(sc); + case '\'': return scan_string(sc); + case '\"': return scan_escape_string(sc); + default: return err_token("Unexpected character"); + } +} diff --git a/src/scanner.h b/src/scanner.h new file mode 100644 index 0000000..5cbd5a7 --- /dev/null +++ b/src/scanner.h @@ -0,0 +1,33 @@ +#pragma once + +typedef enum { + TOK_ERR, + TOK_EOF, + TOK_EOL, + TOK_CMD, + TOK_STR, + TOK_INT, + TOK_VAR, + TOK_LBRACE, + TOK_RBRACE, +} TokenType; + +// TOK_STR, TOK_CMD, TOK_VAR contain allocated data, the rest do not +typedef struct { + TokenType type; + union { + const char* str; + long num; + } as; +} Token; + +typedef struct { + char* start; + char* current; + bool eof; +} Scanner; + +Scanner init_scanner(char* src); +Token scanner_next(Scanner* sc); + +void token_free(Token* tok); diff --git a/src/trie.h b/src/trie.h new file mode 100644 index 0000000..46fcd3b --- /dev/null +++ b/src/trie.h @@ -0,0 +1,3 @@ +#pragma once + +int trie_get(const char* key); diff --git a/triegen.py b/triegen.py new file mode 100755 index 0000000..40eb3a3 --- /dev/null +++ b/triegen.py @@ -0,0 +1,74 @@ +#!/usr/bin/env python + +# Python script to generate a trie using switch statements in C + +import sys +import re + +if len(sys.argv) < 3: + print("Not enough arguments. Usage: triegen.py ") + sys.exit(1) + +input_file = sys.argv[1] +output_file = sys.argv[2] + +with open(input_file, 'r') as f: + data = [re.split('\s+', l.strip()) for l in f.read().split('\n') if len(l.strip()) > 0] + +trie = {} + +for line in data: + key = line[0] + '\0' + val = int(line[1]) + trie_local = trie + for c in key: + if c == '\0': + trie_local[c] = val + elif trie_local.get(c) != None: + trie_local = trie_local[c] + else: + trie_local[c] = {} + trie_local = trie_local[c] + +def matches_exact(trie, start): + if len(trie) != 1: + return False + if trie.get('\0') != None: + if(start): + return False + return ('', trie.get('\0')) + k, v = list(trie.items())[0] + res = matches_exact(v, False) + if res == False: + return False + return (k + res[0], res[1]) + +def write_trie(f, trie, depth): + if line := matches_exact(trie, True): + if len(line[0]) == 1: + f.write('if(key[%s] == \'%s\') { return %s; } break;\n' % (depth, line[0], line[1])) + else: + f.write('if(strcmp(key + %s, "%s") == 0) { return %s; } break;\n' % (depth, line[0], line[1])) + return + ws = " " * (depth + 2) + f.write("switch(key[%s]) {\n" % (depth)) + for k, v in trie.items(): + f.write("%scase %s: " % (ws, repr(k))) + if k == '\0': + f.write("return %s;\n" % (v)) + else: + write_trie(f, v, depth + 1) + f.write("%s}" % (" " * (depth + 1))) + if depth != 0: + f.write(" break;") + f.write("\n") + +with open(output_file, 'w') as f: + f.write("#include \n") + f.write("#include \"trie.h\"\n\n") + f.write("/* auto-generated by triegen.py */\n\n") + f.write("int trie_get(const char* key) {\n ") + write_trie(f, trie, 0) + f.write(" return -1;\n}\n") + +