xv6-sh魔改版实现详解

前言

关于xv6

抄了一下百度百科

Xv6是由麻省理工学院(MIT)为操作系统工程的课程（代号6.828）,开发的一个教学目的的操作系统。

Xv6是在x86处理器上(x即指x86)用ANSI标准C重新实现的Unix第六版(Unix V6，通常直接被称为V6)。Unix V6是1975年发布的，基于DEC PDP-11小型机，当时还没有x86系列CPU，而现在PDP的机器已经很少见了，当时使用是在标准ANSI C发布之前的旧式C语言。

与Linux或BSD系统不同，Xv6很简单，可以在一个学期讲完，全部代码只有8千行多，但仍包括了Unix的重要概念和组织结构。由于是基于较早的Unix V6，Xv6的结构与现代操作系统，如Linux，Windows的差距较大。

在MIT以外，很多其它大学也在操作系统课程中使用了Xv6或其变种，如耶鲁，清华等。

另外一个类似的教学用类Unix系统是著名的Minix。

关于xv6-sh魔改版

这是南京大学操作系统的yjj老师从xv6本身的sh实现中魔改的一个版本

它拥有如下优点：

能够实现基本的shell的大部分操作，包括：
- 命令执行
- 管道符
- ()括号表达式
- <>输出输入重定向
- ；分号分隔命令
- & 后台运行
库函数0依赖，不需要依赖libc，甚至不需要main函数
通过_start函数作为程序入口，通过-ffreestanding来编译，可以直接作为操作系统的init程序
可以学到一些c语言编写技巧

当然，他也有很多局限性，远远达不到现代可使用的优秀的shell的功能，比如没有环境变量。

分析

先给出源码，这里删掉了许多注释，在文章的最后我会给出带注释版本的源码

需要注意的是，程序的编译需要使用-ffreestanding参数

gcc -ffreestanding -static -O2 -g -c sh-xv6.c && ld sh-xv6.o && ./a.out

#include <fcntl.h>
#include <stdarg.h>
#include <stddef.h>
#include <sys/syscall.h>
enum { EXEC = 1, REDIR, PIPE, LIST, BACK };

#define MAXARGS 10
#define NULL ((void *)0)

struct cmd {
  int type;
};

struct execcmd {
  int type;
  char *argv[MAXARGS], *eargv[MAXARGS];
};

struct redircmd {
  int type, fd, mode;
  char *file, *efile;
  struct cmd* cmd;
};

struct pipecmd {
  int type;
  struct cmd *left, *right;
};

struct listcmd {
  int type;
  struct cmd *left, *right;
};

struct backcmd {
  int type;
  struct cmd* cmd;
};

struct cmd* parsecmd(char*);

long syscall(int num, ...) {
  va_list ap;
  va_start(ap, num);
  register long a0 asm ("rax") = num;
  register long a1 asm ("rdi") = va_arg(ap, long);
  register long a2 asm ("rsi") = va_arg(ap, long);
  register long a3 asm ("rdx") = va_arg(ap, long);
  register long a4 asm ("r10") = va_arg(ap, long);
  va_end(ap);
  asm volatile("syscall"
    : "+r"(a0) : "r"(a1), "r"(a2), "r"(a3), "r"(a4)
    : "memory", "rcx", "r8", "r9", "r11");
  return a0;
}

size_t strlen(const char *s) {
  size_t len = 0;
  for (; *s; s++) len++;
  return len;
}

char *strchr(const char *s, int c) {
  for (; *s; s++) {
    if (*s == c) return (char *)s;
  }
  return NULL;
}

void print(const char *s, ...) {
  va_list ap;
  va_start(ap, s);
  while (s) {
    syscall(SYS_write, 2, s, strlen(s));
    s = va_arg(ap, const char *);
  }
  va_end(ap);
}


#define assert(cond) \
  do { if (!(cond)) { \
    print("Assertion failed.\n", NULL); \
    syscall(SYS_exit, 1); } \
  } while (0)

static char mem[4096], *freem = mem;
void *zalloc(size_t sz) {
  assert(freem + sz < mem + sizeof(mem));
  void *ret = freem;
  freem += sz;
  return ret;
}

// Execute cmd.  Never returns.
void runcmd(struct cmd* cmd) {
  int p[2];
  struct backcmd* bcmd;
  struct execcmd* ecmd;
  struct listcmd* lcmd;
  struct pipecmd* pcmd;
  struct redircmd* rcmd;

  if (cmd == 0) syscall(SYS_exit, 1);

  switch (cmd->type) {
    case EXEC:
      ecmd = (struct execcmd*)cmd;
      if (ecmd->argv[0] == 0) syscall(SYS_exit, 1);
      syscall(SYS_execve, ecmd->argv[0], ecmd->argv, NULL);
      print("Failed to exec ", ecmd->argv[0], "\n", NULL);
      break;

    case REDIR:
      rcmd = (struct redircmd*)cmd;
      syscall(SYS_close, rcmd->fd);
      if (syscall(SYS_open, rcmd->file, rcmd->mode, 0644) < 0) {
        print("Failed to open ", rcmd->file, "\n", NULL);
        syscall(SYS_exit, 1);
      }
      runcmd(rcmd->cmd);
      break;

    case LIST:
      lcmd = (struct listcmd*)cmd;
      if (syscall(SYS_fork) == 0) runcmd(lcmd->left);
      syscall(SYS_wait4, -1, 0, 0, 0);
      runcmd(lcmd->right);
      break;

    case PIPE:
      pcmd = (struct pipecmd*)cmd;
      assert(syscall(SYS_pipe, p) >= 0);
      if (syscall(SYS_fork) == 0) {
        syscall(SYS_close, 1);
        syscall(SYS_dup, p[1]);
        syscall(SYS_close, p[0]);
        syscall(SYS_close, p[1]);
        runcmd(pcmd->left);
      }
      if (syscall(SYS_fork) == 0) {
        syscall(SYS_close, 0);
        syscall(SYS_dup, p[0]);
        syscall(SYS_close, p[0]);
        syscall(SYS_close, p[1]);
        runcmd(pcmd->right);
      }
      syscall(SYS_close, p[0]);
      syscall(SYS_close, p[1]);
      syscall(SYS_wait4, -1, 0, 0, 0);
      syscall(SYS_wait4, -1, 0, 0, 0);
      break;

    case BACK:
      bcmd = (struct backcmd*)cmd;
      if (syscall(SYS_fork) == 0) runcmd(bcmd->cmd);
      break;

    default:
      assert(0);
  }
  syscall(SYS_exit, 0);
}

int getcmd(char* buf, int nbuf) {
  print("@ ", NULL);
  for (int i = 0; i < nbuf; i++) buf[i] = '\0';

  while (nbuf-- > 1) {
    int nread = syscall(SYS_read, 0, buf, 1);
    if (nread <= 0) return -1;
    if (*(buf++) == '\n') break;
  }
  return 0;
}

void _start() {
  static char buf[100];

  // Read and run input commands.
  while (getcmd(buf, sizeof(buf)) >= 0) {
    if (buf[0] == 'c' && buf[1] == 'd' && buf[2] == ' ') {
      // Chdir must be called by the parent, not the child.
      buf[strlen(buf) - 1] = 0;  // chop \n
      if (syscall(SYS_chdir, buf + 3) < 0) print("Can not cd to ", buf + 3, "\n", NULL);
      continue;
    }
    if (syscall(SYS_fork) == 0) runcmd(parsecmd(buf));
    syscall(SYS_wait4, -1, 0, 0, 0);
  }
  syscall(SYS_exit, 0);
}

// Constructors

struct cmd* execcmd(void) {
  struct execcmd* cmd;
  cmd = zalloc(sizeof(*cmd));
  cmd->type = EXEC;
  return (struct cmd*)cmd;
}

struct cmd* redircmd(struct cmd* subcmd, char* file, char* efile, int mode,
                     int fd) {
  struct redircmd* cmd;

  cmd = zalloc(sizeof(*cmd));
  cmd->type = REDIR;
  cmd->cmd = subcmd;
  cmd->file = file;
  cmd->efile = efile;
  cmd->mode = mode;
  cmd->fd = fd;
  return (struct cmd*)cmd;
}

struct cmd* pipecmd(struct cmd* left, struct cmd* right) {
  struct pipecmd* cmd;

  cmd = zalloc(sizeof(*cmd));
  cmd->type = PIPE;
  cmd->left = left;
  cmd->right = right;
  return (struct cmd*)cmd;
}

struct cmd* listcmd(struct cmd* left, struct cmd* right) {
  struct listcmd* cmd;

  cmd = zalloc(sizeof(*cmd));
  cmd->type = LIST;
  cmd->left = left;
  cmd->right = right;
  return (struct cmd*)cmd;
}

struct cmd* backcmd(struct cmd* subcmd) {
  struct backcmd* cmd;

  cmd = zalloc(sizeof(*cmd));
  cmd->type = BACK;
  cmd->cmd = subcmd;
  return (struct cmd*)cmd;
}

// Parsing

char whitespace[] = " \t\r\n\v";
char symbols[] = "<|>&;()";

int gettoken(char** ps, char* es, char** q, char** eq) {
  char* s;
  int ret;

  s = *ps;
  while (s < es && strchr(whitespace, *s)) s++;
  if (q) *q = s;
  ret = *s;
  switch (*s) {
    case 0:
      break;
    case '|': case '(': case ')': case ';': case '&': case '<':
      s++;
      break;
    case '>':
      s++;
      if (*s == '>') {
        ret = '+'; s++;
      }
      break;
    default:
      ret = 'a';
      while (s < es && !strchr(whitespace, *s) && !strchr(symbols, *s)) s++;
      break;
  }
  if (eq) *eq = s;

  while (s < es && strchr(whitespace, *s)) s++;
  *ps = s;
  return ret;
}

int peek(char** ps, char* es, char* toks) {
  char* s;

  s = *ps;
  while (s < es && strchr(whitespace, *s)) s++;
  *ps = s;
  return *s && strchr(toks, *s);
}

struct cmd* parseline(char**, char*);
struct cmd* parsepipe(char**, char*);
struct cmd* parseexec(char**, char*);
struct cmd* nulterminate(struct cmd*);

struct cmd* parsecmd(char* s) {
  char* es;
  struct cmd* cmd;

  es = s + strlen(s);
  cmd = parseline(&s, es);
  peek(&s, es, "");
  assert(s == es);
  nulterminate(cmd);
  return cmd;
}

struct cmd* parseline(char** ps, char* es) {
  struct cmd* cmd;

  cmd = parsepipe(ps, es);
  while (peek(ps, es, "&")) {
    gettoken(ps, es, 0, 0);
    cmd = backcmd(cmd);
  }
  if (peek(ps, es, ";")) {
    gettoken(ps, es, 0, 0);
    cmd = listcmd(cmd, parseline(ps, es));
  }
  return cmd;
}

struct cmd* parsepipe(char** ps, char* es) {
  struct cmd* cmd;

  cmd = parseexec(ps, es);
  if (peek(ps, es, "|")) {
    gettoken(ps, es, 0, 0);
    cmd = pipecmd(cmd, parsepipe(ps, es));
  }
  return cmd;
}

struct cmd* parseredirs(struct cmd* cmd, char** ps, char* es) {
  int tok;
  char *q, *eq;

  while (peek(ps, es, "<>")) {
    tok = gettoken(ps, es, 0, 0);
    assert(gettoken(ps, es, &q, &eq) == 'a');
    switch (tok) {
      case '<':
        cmd = redircmd(cmd, q, eq, O_RDONLY, 0);
        break;
      case '>':
        cmd = redircmd(cmd, q, eq, O_WRONLY | O_CREAT | O_TRUNC, 1);
        break;
      case '+':  // >>
        cmd = redircmd(cmd, q, eq, O_WRONLY | O_CREAT, 1);
        break;
    }
  }
  return cmd;
}

struct cmd* parseblock(char** ps, char* es) {
  struct cmd* cmd;

  assert(peek(ps, es, "("));
  gettoken(ps, es, 0, 0);
  cmd = parseline(ps, es);
  assert(peek(ps, es, ")"));
  gettoken(ps, es, 0, 0);
  cmd = parseredirs(cmd, ps, es);
  return cmd;
}

struct cmd* parseexec(char** ps, char* es) {
  char *q, *eq;
  int tok, argc;
  struct execcmd* cmd;
  struct cmd* ret;

  if (peek(ps, es, "(")) return parseblock(ps, es);

  ret = execcmd();
  cmd = (struct execcmd*)ret;

  argc = 0;
  ret = parseredirs(ret, ps, es);
  while (!peek(ps, es, "|)&;")) {
    if ((tok = gettoken(ps, es, &q, &eq)) == 0) break;
    assert(tok == 'a');
    cmd->argv[argc] = q;
    cmd->eargv[argc] = eq;
    assert(++argc < MAXARGS);
    ret = parseredirs(ret, ps, es);
  }
  cmd->argv[argc] = 0;
  cmd->eargv[argc] = 0;
  return ret;
}

struct cmd* nulterminate(struct cmd* cmd) {
  int i;
  struct backcmd* bcmd;
  struct execcmd* ecmd;
  struct listcmd* lcmd;
  struct pipecmd* pcmd;
  struct redircmd* rcmd;

  if (cmd == 0) return 0;

  switch (cmd->type) {
    case EXEC:
      ecmd = (struct execcmd*)cmd;
      for (i = 0; ecmd->argv[i]; i++) *ecmd->eargv[i] = 0;
      break;

    case REDIR:
      rcmd = (struct redircmd*)cmd;
      nulterminate(rcmd->cmd);
      *rcmd->efile = 0;
      break;

    case PIPE:
      pcmd = (struct pipecmd*)cmd;
      nulterminate(pcmd->left);
      nulterminate(pcmd->right);
      break;

    case LIST:
      lcmd = (struct listcmd*)cmd;
      nulterminate(lcmd->left);
      nulterminate(lcmd->right);
      break;

    case BACK:
      bcmd = (struct backcmd*)cmd;
      nulterminate(bcmd->cmd);
      break;
  }
  return cmd;
}

总体流程

一个标准的shell流程来说还算是比较简单，通过阅读_start()大致可以有如下流程

接受输入
判断是否是cd命令
执行命令
- 拆分各种不同类型的命令
- 按照对应的命令类型执行并返回输出
重复以上流程

(0) `_start()`源码

void _start() {
  static char buf[100];

  // Read and run input commands.
  while (getcmd(buf, sizeof(buf)) >= 0) {
    if (buf[0] == 'c' && buf[1] == 'd' && buf[2] == ' ') {
      // Chdir must be called by the parent, not the child.
      buf[strlen(buf) - 1] = 0;  // chop \n
      if (syscall(SYS_chdir, buf + 3) < 0) print("Can not cd to ", buf + 3, "\n", NULL);
      continue;
    }
    if (syscall(SYS_fork) == 0) runcmd(parsecmd(buf));
    syscall(SYS_wait4, -1, 0, 0, 0);
  }
  syscall(SYS_exit, 0);
}

(1) 接受用户输入的cmdline

通过while语句以及getcmd函数来来循环接收用户输入，并执行。

getcmd()

其中getcmd里面会打印我们的输入命令提示符@ ，然后通过SYS_read系统调用来读取stdin的数据。

使用man 2 read查阅SYS_read系统调用的相关文档。

#include <unistd.h>

*ssize_t read(int fd, void buf, size_t count);

read() attempts to read up to count bytes from file descriptor fd into the buffer starting at buf.

顺带说一句，stdin的软连接关系是：/dev/stdin => /proc/self/0

值得一提的是因为没有libc，这里的print也是使用syscall来简单执行的。

int getcmd(char* buf, int nbuf) {
  print("@ ", NULL);
  for (int i = 0; i < nbuf; i++) buf[i] = '\0';

  while (nbuf-- > 1) {
    int nread = syscall(SYS_read, 0, buf, 1);
    if (nread <= 0) return -1;
    if (*(buf++) == '\n') break;
  }
  return 0;
}

print()

自制print函数里面涉及到了va_list可变函数参数，依然可以通过文档查阅，并且阅读代码很好理解在做些什么。

man va_list

va_list Include: <stdarg>. Alternatively, <stdio.h>, or <wchar.h>.

Used by functions with a varying number of arguments of varying types. The function must declare an object of type va_list which is used by the macros va_start(3), va_arg(3), va_copy(3), and va_end(3) to traverse the list of arguments.

Conforming to: C99 and later; POSIX.1-2001 and later.

See also: va_start(3), va_arg(3), va_copy(3), va_end(3)

其中调用了SYS_write往stderr输出内容，其实也可以往stdout输入内容，这里在这个程序里面体现出来的效果是一样的，只是在这个程序里面更多的是在程序出错的时候调用print函数，所以直接往stderr里面写了。同样为了学习系统调用，我们可以查阅文档。

man 2 write

*ssize_t write(int fd, const void buf, size_t count);

write() writes up to count bytes from the buffer starting at buf to the file referred to by the file descriptor fd.

其中的符号链接关系

/dev/stdout => /proc/self/1

/dev/stderr => /proc/self/2

当然，依然是没有libc的缘故，我们的strlen函数也是自己简单实现的，因为代码很简单，可以简单看一眼。

void print(const char *s, ...) {
  va_list ap;
  va_start(ap, s);
  while (s) {
    syscall(SYS_write, 2, s, strlen(s));
    s = va_arg(ap, const char *);
  }
  va_end(ap);
}

(2) 判断是否是cd命令

这里有一句很重要的判断是，判断我们的命令是否是cd命令

if (buf[0] == 'c' && buf[1] == 'd' && buf[2] == ' ') {
      // Chdir must be called by the parent, not the child.
      buf[strlen(buf) - 1] = 0;  // chop \n
      if (syscall(SYS_chdir, buf + 3) < 0) print("Can not cd to ", buf + 3, "\n", NULL);
      continue;
}

听jyy老师说这里写判断的c语言很古老，也很具有Unix风格~

因为cd这个命令比较特殊，cd在linux里面不是一个标准的程序，没法通过fork+execve执行，而是通过syscall交给linux内核处理。具体linux内核怎么处理呢，猜测是将对应cd的目标文件夹链接到此程序的/proc/[PID]/cwd（大致在porc里面看是这样，具体的实现需要在文件系统实现那边具体学习）

(3) SYS_fork

SYS_fork解释起来比较麻烦，建议读者自行百度，fork()就是把你现在这个程序的状态机完完整整的复制一份，SYS_fork系统调用返回0给子进程，返回子进程的pid给父进程。fork+execve是操作系统底层创建新进程的基本逻辑。

这里fork就是为接下来执行命令准备的。

具体可以查阅fork的手册man 2 fork

fork() creates a new process by duplicating the calling process. The new process is referred to as the child process. The calling process is referred to as the parent process.

(4) 解析cmdline

解析cmdline会涉及到许多精巧的算法和技巧，本人这方面很薄弱，不多做解读。

parsecmd()

struct cmd* parsecmd(char* s) {
  char* es;
  struct cmd* cmd;

  es = s + strlen(s);
  cmd = parseline(&s, es);
  peek(&s, es, "");
  assert(s == es);
  nulterminate(cmd);
  return cmd;
}

parsecmd()通过parseline()和nulterminate()两个函数来解析命令的类型和内容

parseline()

struct cmd* parseline(char** ps, char* es) {
  struct cmd* cmd;

  cmd = parsepipe(ps, es);
  while (peek(ps, es, "&")) {
    gettoken(ps, es, 0, 0);
    cmd = backcmd(cmd);
  }
  if (peek(ps, es, ";")) {
    gettoken(ps, es, 0, 0);
    cmd = listcmd(cmd, parseline(ps, es));
  }
  return cmd;
}

解析&以及;以及|。并递归解析。

在解析管道符的时候调用parsepipe()，里面会递归解析，以及调用parseexec()深度解析命令

parsepipe()

struct cmd* parsepipe(char** ps, char* es) {
  struct cmd* cmd;

  cmd = parseexec(ps, es);
  if (peek(ps, es, "|")) {
    gettoken(ps, es, 0, 0);
    cmd = pipecmd(cmd, parsepipe(ps, es));
  }
  return cmd;
}

parseexec()

struct cmd* parseexec(char** ps, char* es) {
  char *q, *eq;
  int tok, argc;
  struct execcmd* cmd;
  struct cmd* ret;

  if (peek(ps, es, "(")) return parseblock(ps, es);

  ret = execcmd();
  cmd = (struct execcmd*)ret;

  argc = 0;
  ret = parseredirs(ret, ps, es);
  while (!peek(ps, es, "|)&;")) {
    if ((tok = gettoken(ps, es, &q, &eq)) == 0) break;
    assert(tok == 'a');
    cmd->argv[argc] = q;
    cmd->eargv[argc] = eq;
    assert(++argc < MAXARGS);
    ret = parseredirs(ret, ps, es);
  }
  cmd->argv[argc] = 0;
  cmd->eargv[argc] = 0;
  return ret;
}

当出现(时调用paarseblock()，最后再调用parseredirs()函数完成对文件重定向的操作解析。

最后的最后，递归调用nulterminate()完成对所有cmd的初始化。

(5) SYS_execve

runcmd()

runcmd()函数的实现也十分的精巧，通过不同类型的命令，在parsecmd()生成不同的命令二叉树，再通过下面系统调用的机制来执行命令二叉树。

void runcmd(struct cmd* cmd) {
  int p[2];
  struct backcmd* bcmd;
  struct execcmd* ecmd;
  struct listcmd* lcmd;
  struct pipecmd* pcmd;
  struct redircmd* rcmd;

  if (cmd == 0) syscall(SYS_exit, 1);

  switch (cmd->type) {
    case EXEC:
      ecmd = (struct execcmd*)cmd;
      if (ecmd->argv[0] == 0) syscall(SYS_exit, 1);
      syscall(SYS_execve, ecmd->argv[0], ecmd->argv, NULL);
      print("Failed to exec ", ecmd->argv[0], "\n", NULL);
      break;

    case REDIR:
      rcmd = (struct redircmd*)cmd;
      syscall(SYS_close, rcmd->fd);
      if (syscall(SYS_open, rcmd->file, rcmd->mode, 0644) < 0) {
        print("Failed to open ", rcmd->file, "\n", NULL);
        syscall(SYS_exit, 1);
      }
      runcmd(rcmd->cmd);
      break;

    case LIST:
      lcmd = (struct listcmd*)cmd;
      if (syscall(SYS_fork) == 0) runcmd(lcmd->left);
      syscall(SYS_wait4, -1, 0, 0, 0);
      runcmd(lcmd->right);
      break;

    case PIPE:
      pcmd = (struct pipecmd*)cmd;
      assert(syscall(SYS_pipe, p) >= 0);
      if (syscall(SYS_fork) == 0) {
        syscall(SYS_close, 1);
        syscall(SYS_dup, p[1]);
        syscall(SYS_close, p[0]);
        syscall(SYS_close, p[1]);
        // recursive call
        runcmd(pcmd->left);
      }
      if (syscall(SYS_fork) == 0) {
        syscall(SYS_close, 0);
        syscall(SYS_dup, p[0]);
        syscall(SYS_close, p[0]);
        syscall(SYS_close, p[1]);
        // recursive call
        runcmd(pcmd->right);
      }
      syscall(SYS_close, p[0]);
      syscall(SYS_close, p[1]);
      // pid -1 meaning wait for any child process.
      // man 2 wait4
      syscall(SYS_wait4, -1, 0, 0, 0);
      syscall(SYS_wait4, -1, 0, 0, 0);
      break;

    case BACK:
      bcmd = (struct backcmd*)cmd;
      if (syscall(SYS_fork) == 0) runcmd(bcmd->cmd);
      break;

    default:
      assert(0);
  }
  syscall(SYS_exit, 0);
}

EXEC

很简单，就是execve()系统调用即可，重置当前状态机为新程序的初始化状态。

PIPE

PIPE的实现就很绝妙了，我们查看SYS_pipe的手册，man 2 pipe

pipe() creates a pipe, a unidirectional data channel that can be used for interprocess communication.

The array pipefd is used to return two file descriptors referring to the ends of the pipe.

pipefd[0] refers to the read end of the pipe. pipefd[1] refers to the write end of the pipe. Data written to the write end of the pipe is buffered by the kernel until it is read from the read end of the pipe.

For further details, see pipe(7).

EXAMPLE

The following program creates a pipe, and then fork(2)s to create a child process; the child inherits a dupli‐

cate set of file descriptors that refer to the same pipe. After the fork(2), each process closes the file de‐

scriptors that it doesn’t need for the pipe (see pipe(7)). The parent then writes the string contained in the

program’s command-line argument to the pipe, and the child reads this string a byte at a time from the pipe

and echoes it on standard output.

其实看手册就可以很清楚的了解到如何在一个c语言程序里面用一个管道连接两个应用程序。

SYS_pipe返回的数组的0号元素是数据写口的fd，然后通过fork，将程序的stdout连接到上面，关闭其他无用的fd。

同理，将第二个fork后的子进程的stdin连接到数组的1号元素，关闭其他无用的fd。

这样就实现了两个进程的PIPE连接。

(6) 父进程SYS_wait4

syscall(SYS_wait4, -1, 0, 0, 0);

man 2 wait4

pid -1 meaning wait for any child process.

wait子进程执行完毕之后，进入下一轮的流程。

socket编程部分

这部分是老师要求加入的功能，需要通过tcp把你执行过的命令传输到另外一台主机(127.0.0.1)然后写到log文件里面。

其实很简单，思路如下：

通过socket编程把cmdline的内容传输到server端
- 在sh执行命令的时候加上一个发送的函数即可。
server端调用open，write等syscall写文件即可
- Server端后台启动一直运行并忽略标准输出，nohup ./server > /dev/null &

下面是示例代码

server

// stdio
#include <stdio.h>
// string处理字符串
#include <string.h>
// socket编程需要的库
#include <sys/types.h>
#include <sys/socket.h>
#include <netinet/in.h>
#include <netinet/ip.h>
// #include <pthread.h>
// syscall
#include <syscall.h>
// SYS_open
// #include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>

int main(void)
{
	pthread_t stPid = 0; 
	int iRecvLen = 0;
	int iSocketFD = 0;
	char acBuf[512] = {0};
	struct sockaddr_in stLocalAddr = {0};

	struct sockaddr_in stRemoteAddr = {0};
	socklen_t iRemoteAddrLen = 0;

	/* 创建socket */
	iSocketFD = socket(AF_INET, SOCK_DGRAM, 0);
	if(iSocketFD < 0)
	{
		printf("Create socket failed!\n");
		return 0;
	}

	/* 填写地址 */
	stLocalAddr.sin_family = AF_INET;
	stLocalAddr.sin_port   = htons(11451);
	stLocalAddr.sin_addr.s_addr = 0;

	/* 绑定地址 */
	if(0 > bind(iSocketFD, (void *)&stLocalAddr, sizeof(stLocalAddr)))
	{
		printf("Bind to port 11451 failed!\n");
		close(iSocketFD);
		return 0;
	}
	// pthread_create(&stPid, NULL, test, NULL); 
	
	while(1)     //实现了循环监听
	{
		iRecvLen = recvfrom(iSocketFD, acBuf, sizeof(acBuf), 0, (void *)&stRemoteAddr, &iRemoteAddrLen);

		char cmdline[512] = {0}; 
        sprintf(cmdline, "[*] recived cmdline: %s\n", acBuf);
        // check log file and create if no such file
        int fd = -1;
        fd = syscall(SYS_open, "./cmdline_history", O_CREAT, S_IWUSR | S_IRUSR);
        syscall(SYS_close, fd);

        // append to log file
        fd = syscall(SYS_open, "./cmdline_history", O_WRONLY | O_APPEND);
        syscall(SYS_write, fd, cmdline, strlen(cmdline));
        syscall(SYS_close, fd);

	}
	close(iSocketFD);

	return 0;
}

client（集成到sh里面的一个函数）

#include <stdio.h>
#include <string.h>
#include <sys/types.h>
#include <sys/socket.h>
#include <netinet/in.h>
#include <netinet/ip.h>
#include <arpa/inet.h>

void sendCmdlinesToServer(const char * cmdline) {
    int iRecvLen = 0;
	int iSocketFD = 0;
	int iRemotAddr = 0;
	char acBuf[4096] = {0};
	struct sockaddr_in stLocalAddr = {0};
	struct sockaddr_in stRemoteAddr = {0};
	socklen_t iRemoteAddrLen = 0;
	/* 创建socket */
	iSocketFD = socket(AF_INET, SOCK_DGRAM, 0);
	if(iSocketFD < 0)
	{
		printf("Send cmdline to server error, Create socket failed \n");
	}
	stLocalAddr.sin_family = AF_INET;
	stLocalAddr.sin_port   = htons(11451);
	inet_pton(AF_INET, "127.0.0.1", (void *)&iRemotAddr);
	stLocalAddr.sin_addr.s_addr = iRemotAddr;
	iRecvLen = sendto(iSocketFD, cmdline, strlen(cmdline), 0, (void *)&stLocalAddr, sizeof(stLocalAddr));
	close(iSocketFD);
}

说明

没有解决tcp粘包的问题
还可以进行优化，但是先把作业交了再说 :)
client集成后，多加入了很多标准库，所以需要把_start()改成main()，并且去掉-ffreestanding
注意打开文件的时候，第一次要判断文件是否存在，不存在要先用O_CREAT试探的创造文件
- 具体可以参照write的手册，man 2 write
注意调用SYS_close
最后效果截图

心得

理解逻辑、找对材料、用对工具，你就拥有了Linux的全世界~