initial commit

This commit is contained in:
TriMill 2023-02-17 13:55:12 -05:00
commit 603a06ec39
12 changed files with 947 additions and 0 deletions

2
.gitignore vendored Normal file
View file

@ -0,0 +1,2 @@
bin
src/trie.c

13
Makefile Normal file
View file

@ -0,0 +1,13 @@
make: trie $(wildcard src/*.c)
mkdir -p bin
gcc src/*.c -Wall -Wextra -pedantic -ggdb -o bin/sysh
trie: triegen.py commands
python triegen.py commands src/trie.c
clean:
rm -f src/trie.c
rm -rf bin
run: make
./bin/sysh

333
commands Normal file
View file

@ -0,0 +1,333 @@
read 0
write 1
open 2
close 3
stat 4
fstat 5
lstat 6
poll 7
lseek 8
mmap 9
mprotect 10
munmap 11
brk 12
rt_sigaction 13
rt_sigprocmask 14
rt_sigreturn 15
ioctl 16
pread64 17
pwrite64 18
readv 19
writev 20
access 21
pipe 22
select 23
sched_yield 24
mremap 25
msync 26
mincore 27
madvise 28
shmget 29
shmat 30
shmctl 31
dup 32
dup2 33
pause 34
nanosleep 35
getitimer 36
alarm 37
setitimer 38
getpid 39
sendfile 40
socket 41
connect 42
accept 43
sendto 44
recvfrom 45
sendmsg 46
recvmsg 47
shutdown 48
bind 49
listen 50
getsockname 51
getpeername 52
socketpair 53
setsockopt 54
getsockopt 55
clone 56
fork 57
vfork 58
execve 59
exit 60
wait4 61
kill 62
uname 63
semget 64
semop 65
semctl 66
shmdt 67
msgget 68
msgsnd 69
msgrcv 70
msgctl 71
fcntl 72
flock 73
fsync 74
fdatasync 75
truncate 76
ftruncate 77
getdents 78
getcwd 79
chdir 80
fchdir 81
rename 82
mkdir 83
rmdir 84
creat 85
link 86
unlink 87
symlink 88
readlink 89
chmod 90
fchmod 91
chown 92
fchown 93
lchown 94
umask 95
gettimeofday 96
getrlimit 97
getrusage 98
sysinfo 99
times 100
ptrace 101
getuid 102
syslog 103
getgid 104
setuid 105
setgid 106
geteuid 107
getegid 108
setpgid 109
getppid 110
getpgrp 111
setsid 112
setreuid 113
setregid 114
getgroups 115
setgroups 116
setresuid 117
getresuid 118
setresgid 119
getresgid 120
getpgid 121
setfsuid 122
setfsgid 123
getsid 124
capget 125
capset 126
rt_sigpending 127
rt_sigtimedwait 128
rt_sigqueueinfo 129
rt_sigsuspend 130
sigaltstack 131
utime 132
mknod 133
uselib 134
personality 135
ustat 136
statfs 137
fstatfs 138
sysfs 139
getpriority 140
setpriority 141
sched_setparam 142
sched_getparam 143
sched_setscheduler 144
sched_getscheduler 145
sched_get_priority_max 146
sched_get_priority_min 147
sched_rr_get_interval 148
mlock 149
munlock 150
mlockall 151
munlockall 152
vhangup 153
modify_ldt 154
pivot_root 155
_sysctl 156
prctl 157
arch_prctl 158
adjtimex 159
setrlimit 160
chroot 161
sync 162
acct 163
settimeofday 164
mount 165
umount2 166
swapon 167
swapoff 168
reboot 169
sethostname 170
setdomainname 171
iopl 172
ioperm 173
create_module 174
init_module 175
delete_module 176
get_kernel_syms 177
query_module 178
quotactl 179
nfsservctl 180
getpmsg 181
putpmsg 182
afs_syscall 183
tuxcall 184
security 185
gettid 186
readahead 187
setxattr 188
lsetxattr 189
fsetxattr 190
getxattr 191
lgetxattr 192
fgetxattr 193
listxattr 194
llistxattr 195
flistxattr 196
removexattr 197
lremovexattr 198
fremovexattr 199
tkill 200
time 201
futex 202
sched_setaffinity 203
sched_getaffinity 204
set_thread_area 205
io_setup 206
io_destroy 207
io_getevents 208
io_submit 209
io_cancel 210
get_thread_area 211
lookup_dcookie 212
epoll_create 213
epoll_ctl_old 214
epoll_wait_old 215
remap_file_pages 216
getdents64 217
set_tid_address 218
restart_syscall 219
semtimedop 220
fadvise64 221
timer_create 222
timer_settime 223
timer_gettime 224
timer_getoverrun 225
timer_delete 226
clock_settime 227
clock_gettime 228
clock_getres 229
clock_nanosleep 230
exit_group 231
epoll_wait 232
epoll_ctl 233
tgkill 234
utimes 235
vserver 236
mbind 237
set_mempolicy 238
get_mempolicy 239
mq_open 240
mq_unlink 241
mq_timedsend 242
mq_timedreceive 243
mq_notify 244
mq_getsetattr 245
kexec_load 246
waitid 247
add_key 248
request_key 249
keyctl 250
ioprio_set 251
ioprio_get 252
inotify_init 253
inotify_add_watch 254
inotify_rm_watch 255
migrate_pages 256
openat 257
mkdirat 258
mknodat 259
fchownat 260
futimesat 261
newfstatat 262
unlinkat 263
renameat 264
linkat 265
symlinkat 266
readlinkat 267
fchmodat 268
faccessat 269
pselect6 270
ppoll 271
unshare 272
set_robust_list 273
get_robust_list 274
splice 275
tee 276
sync_file_range 277
vmsplice 278
move_pages 279
utimensat 280
epoll_pwait 281
signalfd 282
timerfd_create 283
eventfd 284
fallocate 285
timerfd_settime 286
timerfd_gettime 287
accept4 288
signalfd4 289
eventfd2 290
epoll_create1 291
dup3 292
pipe2 293
inotify_init1 294
preadv 295
pwritev 296
rt_tgsigqueueinfo 297
perf_event_open 298
recvmmsg 299
fanotify_init 300
fanotify_mark 301
prlimit64 302
name_to_handle_at 303
open_by_handle_at 304
clock_adjtime 305
syncfs 306
sendmmsg 307
setns 308
getcpu 309
process_vm_readv 310
process_vm_writev 311
kcmp 312
finit_module 313
sched_setattr 314
sched_getattr 315
renameat2 316
seccomp 317
getrandom 318
memfd_create 319
kexec_file_load 320
bpf 321
execveat 322
userfaultfd 323
membarrier 324
mlock2 325
copy_file_range 326
preadv2 327
pwritev2 328
pkey_mprotect 329
pkey_alloc 330
pkey_free 331
statx 332

63
src/eval.c Normal file
View file

@ -0,0 +1,63 @@
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <errno.h>
#include "eval.h"
#include "parser.h"
#include "scanner.h"
static long eval_syscall(Line* line) {
if(line->len > 6) {
fprintf(stderr, "sysh: too many args for syscall\n");
return 0;
}
long args[6] = {0,0,0,0,0,0};
bool cloned[6] = {0,0,0,0,0,0};
for(int i = 0; i < line->len; i++) {
Argument arg = line->args[i];
if(arg.type == ARG_NUM) {
args[i] = arg.as.num;
} else if(arg.type == ARG_STR) {
int len = strlen(arg.as.str);
char* buf = malloc(len + 1);
strcpy(buf, arg.as.str);
args[i] = (long)buf;
cloned[i] = true;
} else {
for(int j = 0; j < line->len; j++) {
if(cloned[j]) free((void*)args[j]);
}
fprintf(stderr, "sysh: invalid arg type\n");
return 0;
}
}
long result = syscall(line->id, args[0], args[1], args[2], args[3], args[4], args[5]);
for(int i = 0; i < line->len; i++) {
if(cloned[i]) free((void*)args[i]);
}
return result;
}
static long eval_line(Line* line) {
if(line->id >= 0) {
return eval_syscall(line);
} else {
// TODO
return 0;
}
}
long eval_block(Block* block) {
long result = 0;
for(int i = 0; i < block->len; i++) {
result = eval_line(&block->lines[i]);
if(errno > 0) {
fprintf(stderr, "E%d: %s\n", errno, strerror(errno));
}
break;
}
return result;
}

5
src/eval.h Normal file
View file

@ -0,0 +1,5 @@
#pragma once
#include "parser.h"
long eval_block(Block* block);

34
src/main.c Normal file
View file

@ -0,0 +1,34 @@
#include <stdbool.h>
#include <stdio.h>
#include <assert.h>
#include <string.h>
#include "eval.h"
#include "parser.h"
#include "scanner.h"
#define LINE_LEN 1024
#define PROMPT "[%ld]sysh$ "
#define EPROMPT "[E]sysh$ "
void repl() {
char buf[LINE_LEN];
printf(PROMPT, 0L);
while(fgets(buf, LINE_LEN, stdin)) {
Scanner sc = init_scanner(buf);
BlockResult br = parse(&sc);
if(!br.is_ok) {
printf("sysh: %s\n", br.as.err);
printf(EPROMPT);
} else if(br.as.ok.len > 0) {
long result = eval_block(&br.as.ok);
printf(PROMPT, result);
block_free(&br.as.ok);
}
}
}
int main(void) {
repl();
return 0;
}

138
src/parser.c Normal file
View file

@ -0,0 +1,138 @@
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "parser.h"
#include "scanner.h"
#include "trie.h"
void block_init(Block* b) {
b->len = 0;
b->capacity = 0;
b->lines = NULL;
}
void block_add(Block* b, Line l) {
if(b->capacity <= b->len) {
int new_capacity = (b->capacity == 0 ? 8 : 2*(b->capacity));
b->lines = realloc(b->lines, new_capacity * sizeof(Line));
b->capacity = new_capacity;
}
b->lines[b->len] = l;
b->len++;
}
void block_free(Block* b) {
for(int i = 0; i < b->len; i++) {
line_free(&b->lines[i]);
}
free(b->lines);
block_init(b);
}
void line_init(Line* l, long id) {
l->id = id;
l->len = 0;
l->capacity = 0;
l->args = NULL;
}
void line_add(Line* l, Argument arg) {
if(l->capacity <= l->len) {
int new_capacity = (l->capacity == 0 ? 8 : 2*(l->capacity));
l->args = realloc(l->args, new_capacity * sizeof(Argument));
l->capacity = new_capacity;
}
l->args[l->len] = arg;
l->len++;
}
void line_free(Line* line) {
for(int i = 0; i < line->len; i++) {
switch(line->args[i].type) {
case ARG_BLOCK:
block_free(&line->args[i].as.block);
break;
case ARG_STR:
case ARG_CMD:
case ARG_VAR:
free((char*)line->args[i].as.str);
break;
case ARG_NUM:
break;
}
}
free(line->args);
}
static LineResult parse_line(Scanner* sc, int id) {
Line line;
line_init(&line, id);
while(true) {
Token tok = scanner_next(sc);
switch(tok.type) {
case TOK_EOF:
case TOK_EOL:
return OK(line, LineResult);
case TOK_ERR:
line_free(&line);
return ERR(tok.as.str, LineResult);
case TOK_INT:
line_add(&line, (Argument){.type = ARG_NUM, .as.num = tok.as.num});
break;
case TOK_VAR:
line_add(&line, (Argument){.type = ARG_VAR, .as.str = tok.as.str});
break;
case TOK_STR:
line_add(&line, (Argument){.type = ARG_STR, .as.str = tok.as.str});
break;
case TOK_CMD:
line_add(&line, (Argument){.type = ARG_CMD, .as.str = tok.as.str});
break;
default:
line_free(&line);
return ERR("unexpected token", LineResult);
}
}
}
static BlockResult parse_block(Scanner* sc, bool braced) {
Block block;
block_init(&block);
while(true) {
Token tok = scanner_next(sc);
if((!braced && tok.type == TOK_EOF) || (braced && tok.type == TOK_RBRACE)) {
return OK(block, BlockResult);
}
switch(tok.type) {
case TOK_ERR:
block_free(&block);
return ERR(tok.as.str, BlockResult);
case TOK_EOL:
continue;
case TOK_CMD: {
long id = trie_get(tok.as.str);
token_free(&tok);
if(id == -1) {
block_free(&block);
return ERR("invalid syscall or command name", BlockResult);
}
LineResult sr = parse_line(sc, id);
if(!sr.is_ok) {
block_free(&block);
return ERR(sr.as.err, BlockResult);
}
block_add(&block, sr.as.ok);
} break;
default:
block_free(&block);
return ERR("unexpected token", BlockResult);
}
}
return OK(block, BlockResult);
}
BlockResult parse(Scanner* sc) {
return parse_block(sc, false);
}

53
src/parser.h Normal file
View file

@ -0,0 +1,53 @@
#pragma once
#include <stdbool.h>
#include "scanner.h"
#define RESULT(T, E) struct { bool is_ok; union { T ok; E err; } as; }
#define OK(val, R) (R){.is_ok = 1, .as.ok = (val) }
#define ERR(val, R) (R){.is_ok = 0, .as.err = (val) }
typedef struct Argument_s Argument;
typedef struct {
long id;
int len;
int capacity;
Argument* args;
} Line;
typedef struct {
int len;
int capacity;
Line* lines;
} Block;
typedef enum {
ARG_BLOCK,
ARG_STR,
ARG_NUM,
ARG_VAR,
ARG_CMD,
} ArgType;
struct Argument_s {
ArgType type;
union {
Block block;
const char* str;
long num;
} as;
};
typedef RESULT(Block, const char*) BlockResult;
typedef RESULT(Line, const char*) LineResult;
void block_init(Block* b);
void block_add(Block* b, Line l);
void block_free(Block* b);
void line_init(Line* l, long id);
void line_add(Line* l, Argument a);
void line_free(Line* l);
BlockResult parse(Scanner* sc);

196
src/scanner.c Normal file
View file

@ -0,0 +1,196 @@
#include <stdlib.h>
#include <string.h>
#include <stdbool.h>
#include "scanner.h"
// Based heavily on the scanner implementation
// from Crafting Interpreters by Robert Nystrom
Scanner init_scanner(char *src) {
return (Scanner){.start=src, .current=src, .eof=(*src == '\0')};
}
void token_free(Token* tok) {
if(tok->type == TOK_STR || tok->type == TOK_CMD || tok->type == TOK_VAR) {
free((char*)(tok->as.str));
}
}
static char peek(const Scanner* sc) {
return *sc->current;
}
static char next(Scanner* sc) {
if(*sc->current == '\0') {
sc->eof = true;
return '\0';
}
char c = *sc->current;
sc->current++;
return c;
}
static bool is_alnum(char c) {
return (c >= 'a' && c <= 'z')
|| (c >= 'A' && c <= 'Z')
|| (c >= '0' && c <= '9') || c == '_';
}
static bool is_digit(char c) {
return c >= '0' && c <= '9';
}
static Token err_token(const char* msg) {
Token token = {
.type = TOK_ERR,
.as.str = msg,
};
return token;
}
static void skip_ws(Scanner* sc) {
while(true) {
char c = peek(sc);
switch(c) {
case ' ':
case '\t':
next(sc);
break;
case '#':
while(peek(sc) != '\n' && peek(sc) != '\0') {
next(sc);
}
return;
default:
return;
}
}
}
static Token scan_string(Scanner* sc) {
while(peek(sc) != '\0' && peek(sc) != '\'') next(sc);
if(peek(sc) == '\0') return err_token("EOF while scanning raw string");
next(sc);
int len = sc->current - sc->start - 2;
char* buf = malloc((len + 1) * sizeof(char));
memcpy(buf, sc->start + 1, len);
buf[len] = '\0';
return (Token){
.type = TOK_STR,
.as.str = buf,
};
}
static char* add_char(char* buf, int* len, int* capacity, char new) {
if(*len == *capacity) {
int new_capacity = (*capacity == 0 ? 8 : 2*(*capacity));
buf = realloc(buf, new_capacity);
*capacity = new_capacity;
}
buf[*len] = new;
(*len)++;
return buf;
}
static Token scan_escape_string(Scanner* sc) {
char* buf = NULL;
int len = 0;
int capacity = 0;
char c;
while(true) {
c = next(sc);
if(c == '"') break;
if(c == '\0') {
free(buf);
return err_token("EOF while scanning double-quoted string");
}
if(c == '\\') {
switch(next(sc)) {
case '\\': buf = add_char(buf, &len, &capacity, '\\'); break;
case '"': buf = add_char(buf, &len, &capacity, '"'); break;
case 'n': buf = add_char(buf, &len, &capacity, '\n'); break;
case 'r': buf = add_char(buf, &len, &capacity, '\r'); break;
case 't': buf = add_char(buf, &len, &capacity, '\t'); break;
case '0': buf = add_char(buf, &len, &capacity, '\0'); break;
default: {
free(buf);
return err_token("unknown escape sequence");
}
}
} else {
buf = add_char(buf, &len, &capacity, c);
}
}
buf = add_char(buf, &len, &capacity, '\0');
buf = realloc(buf, len);
return (Token){
.type = TOK_STR,
.as.str = buf,
};
}
static Token scan_var(Scanner* sc) {
while(is_alnum(peek(sc))) next(sc);
int len = sc->current - sc->start - 1;
char* buf = malloc((len + 1) * sizeof(char));
memcpy(buf, sc->start + 1, len);
buf[len] = '\0';
return (Token){
.type = TOK_VAR,
.as.str = buf,
};
}
static Token scan_cmd(Scanner* sc) {
while(is_alnum(peek(sc))) next(sc);
int len = sc->current - sc->start;
char* buf = malloc((len + 1) * sizeof(char));
memcpy(buf, sc->start, len);
buf[len] = '\0';
return (Token){
.type = TOK_CMD,
.as.str = buf,
};
}
static Token scan_num(Scanner* sc) {
while(is_digit(peek(sc))) next(sc);
// TODO base
int len = sc->current - sc->start;
char buf[len+1];
memcpy(buf, sc->start, len);
buf[len] = '\0';
long num = strtol(buf, NULL, 10);
return (Token){
.type = TOK_INT,
.as.num = num,
};
}
Token scanner_next(Scanner* sc) {
skip_ws(sc);
sc->start = sc->current;
char c = next(sc);
if(c == '-' || is_digit(c)) {
return scan_num(sc);
}
if(c == '.' || is_alnum(c)) {
return scan_cmd(sc);
}
switch(c) {
case '\0': return (Token){.type = TOK_EOF};
case '\n':
case ';': return (Token){.type = TOK_EOL};
case '{': return (Token){.type = TOK_LBRACE};
case '}': return (Token){.type = TOK_RBRACE};
case '$': return scan_var(sc);
case '\'': return scan_string(sc);
case '\"': return scan_escape_string(sc);
default: return err_token("Unexpected character");
}
}

33
src/scanner.h Normal file
View file

@ -0,0 +1,33 @@
#pragma once
typedef enum {
TOK_ERR,
TOK_EOF,
TOK_EOL,
TOK_CMD,
TOK_STR,
TOK_INT,
TOK_VAR,
TOK_LBRACE,
TOK_RBRACE,
} TokenType;
// TOK_STR, TOK_CMD, TOK_VAR contain allocated data, the rest do not
typedef struct {
TokenType type;
union {
const char* str;
long num;
} as;
} Token;
typedef struct {
char* start;
char* current;
bool eof;
} Scanner;
Scanner init_scanner(char* src);
Token scanner_next(Scanner* sc);
void token_free(Token* tok);

3
src/trie.h Normal file
View file

@ -0,0 +1,3 @@
#pragma once
int trie_get(const char* key);

74
triegen.py Executable file
View file

@ -0,0 +1,74 @@
#!/usr/bin/env python
# Python script to generate a trie using switch statements in C
import sys
import re
if len(sys.argv) < 3:
print("Not enough arguments. Usage: triegen.py <input> <output>")
sys.exit(1)
input_file = sys.argv[1]
output_file = sys.argv[2]
with open(input_file, 'r') as f:
data = [re.split('\s+', l.strip()) for l in f.read().split('\n') if len(l.strip()) > 0]
trie = {}
for line in data:
key = line[0] + '\0'
val = int(line[1])
trie_local = trie
for c in key:
if c == '\0':
trie_local[c] = val
elif trie_local.get(c) != None:
trie_local = trie_local[c]
else:
trie_local[c] = {}
trie_local = trie_local[c]
def matches_exact(trie, start):
if len(trie) != 1:
return False
if trie.get('\0') != None:
if(start):
return False
return ('', trie.get('\0'))
k, v = list(trie.items())[0]
res = matches_exact(v, False)
if res == False:
return False
return (k + res[0], res[1])
def write_trie(f, trie, depth):
if line := matches_exact(trie, True):
if len(line[0]) == 1:
f.write('if(key[%s] == \'%s\') { return %s; } break;\n' % (depth, line[0], line[1]))
else:
f.write('if(strcmp(key + %s, "%s") == 0) { return %s; } break;\n' % (depth, line[0], line[1]))
return
ws = " " * (depth + 2)
f.write("switch(key[%s]) {\n" % (depth))
for k, v in trie.items():
f.write("%scase %s: " % (ws, repr(k)))
if k == '\0':
f.write("return %s;\n" % (v))
else:
write_trie(f, v, depth + 1)
f.write("%s}" % (" " * (depth + 1)))
if depth != 0:
f.write(" break;")
f.write("\n")
with open(output_file, 'w') as f:
f.write("#include <string.h>\n")
f.write("#include \"trie.h\"\n\n")
f.write("/* auto-generated by triegen.py */\n\n")
f.write("int trie_get(const char* key) {\n ")
write_trie(f, trie, 0)
f.write(" return -1;\n}\n")