100行代码实现tcp/ip协议栈

以太网协议头:

ip协议头:

udp协议头:

tcp协议头:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
#define ETH_ADDR_LEN 6

struct ethhder {
unsigned char dst_mac[ETH_ADDR_LEN];
unsigned char src_mac[ETH_ADDR_LEN];
unsigned short type; // 分为ip和arp
};

struct iphdr {
unsigned char version:4,
hdrlen:4; // 4*2^4=60, 最多60字节
unsigned char tos; // 以前的ip电话
unsigned short totlen; // 2^8 数据包的长度,最长65535,不同于MTU
unsigned short id;
unsigned short flag:3,
offset:13;
unsigned char ttl; // 默认64
unsigned char proto; // 包在盒子外面,指名里面上tcp/udp
unsigned short check;
unsigned int sip; // 地址在ip层
unsigned int dip;
};

struct udphdr {
unsigned char sport; // 端口在传输层
unsigned char dport;
unsigned char len;
unsigned char check;
};

struct udppkt {
struct ethhdr eh;
struct iphdr ip;
struct udphdr udp;
unsigned char payload[0]; // 柔性数组 sizeof 为0,内存池里面也有用
};

raw socket/netmap/dpdk/pf_ring/ebpf 可以从网络里面抓取完整的数据包,写完后用网络调试助手测试。

关键代码:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
if (pfd.revents & POLLIN) {
unsigned char *stream = NULL;
nty_nic_read(ctx, &stream); // 读取原始数据
nty_eth_process(ctx, stream);
}

static int nty_eth_process(nty_nic_context *ctx, unsigned char *stream) {

struct ethhdr *eh = (struct ethhdr*)stream;

if (ntohs(eh->h_proto) == PROTO_IP) {
nty_ipv4_process(ctx, stream); // 分协议进行处理
} else if (ntohs(eh->h_proto) == PROTO_ARP) {
nty_arp_process(ctx, stream);
}

return 0;
}

int nty_ipv4_process(nty_nic_context *ctx, unsigned char *stream) {

struct iphdr *iph = (struct iphdr*)(stream + sizeof(struct ethhdr));
if (ip_fast_csum(iph, iph->ihl)) return -1;

if (iph->protocol == PROTO_UDP) {
nty_udp_process(ctx, stream); // udp处理
} else if (iph->protocol == PROTO_TCP) {
nty_tcp_process(ctx, stream);
} else if (iph->protocol == PROTO_ICMP) {
nty_icmp_process(ctx, stream);
}
return 0;
}

int nty_udp_process(nty_nic_context *ctx, unsigned char *stream) {
struct udppkt *udph = (struct udppkt *)stream;

int udp_length = ntohs(udph->udp.len);
udph->body[udp_length-8] = '\0';
struct udppkt udph_rt;
nty_udp_pkt(udph, &udph_rt); // 打包发送
nty_nic_write(ctx, &udph_rt, sizeof(struct udppkt)); // struct给了地址和大小就发送

return 0;
}

void nty_udp_pkt(struct udppkt *udp, struct udppkt *udp_rt) {
memcpy(udp_rt, udp, sizeof(struct udppkt));

memcpy(udp_rt->eh.h_dest, udp->eh.h_source, ETH_ALEN);
memcpy(udp_rt->eh.h_source, udp->eh.h_dest, ETH_ALEN);

memcpy(&udp_rt->ip.saddr, &udp->ip.daddr, sizeof(udp->ip.saddr));
memcpy(&udp_rt->ip.daddr, &udp->ip.saddr, sizeof(udp->ip.saddr));

memcpy(&udp_rt->udp.source, &udp->udp.dest, sizeof(udp->udp.source));
memcpy(&udp_rt->udp.dest, &udp->udp.source, sizeof(udp->udp.dest));
}

int nty_nic_write(nty_nic_context *ctx, const void *stream, int length) {
if (ctx == NULL) return -1;
if (stream == NULL) return -2;
if (length == 0) return 0;
nm_inject(ctx->nmr, stream, length); // 发送

return 0;
}

ping的实现:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
int fd = socket(AF_INET, SOCK_RAW, IPPROTO_ICMP); // 创建原始套接字
setsockopt(fd, IPPROTO_IP, IP_HDRINCL, &on, sizeof(int)); // 设置 IP_HDRINCL 选项,禁止操作系统添加 IP 头部
setsockopt(fd, SOL_SOCKET, SO_BROADCAST, &on, sizeof(int)); // 设置 SO_BROADCAST 选项,允许广播
setsockopt(fd, SOL_SOCKET, SO_RCVTIMEO, (char *)&tv, sizeof(tv)); // 设置接收超时
// 构造 ICMP 报文
struct iphdr *ip_hdr = (struct iphdr *)packet;
struct icmphdr *icmp_hdr = (struct icmphdr *) (packet + sizeof(struct iphdr));
int total_len = sizeof(struct iphdr) + sizeof(struct icmphdr) + data_len;
char *icmp_payload = (char*)(packet + sizeof(struct iphdr) + sizeof(struct icmphdr));
ip_hdr = (struct iphdr *)packet;
ip_hdr->ihl = 5;
ip_hdr->version = 4;
ip_hdr->tos = 0;
ip_hdr->tot_len = htons(total_len);
ip_hdr->id = htons(19924);
ip_hdr->frag_off = htons(0x4000);
ip_hdr->protocol = IPPROTO_ICMP;
ip_hdr->ttl = 64;
ip_hdr->saddr = 0;
ip_hdr->daddr = ip;
ip_hdr->check = 0;
icmp_hdr->type = ICMP_ECHO;
icmp_hdr->code = 0;
icmp_hdr->un.echo.id = htons(pid);
icmp_hdr->un.echo.sequence = htons(seq);
memset(icmp_payload, 0, data_len);
memcpy(icmp_payload, &now, sizeof(now));
icmp_hdr->checksum = 0;
icmp_hdr->checksum = Checksum((unsigned short *)icmp_hdr, sizeof(struct icmphdr) + data_len);
// 发送和接收
result = sendto(fd, packet, len, 0, (struct sockaddr *)&dst_addr, sizeof(dst_addr));
result = recvfrom(fd, packet, sizeof(packet), 0, (struct sockaddr*)&src_addr, &sock_len);

ip相关的结构体:https://man7.org/linux/man-pages/man7/ip.7.html、https://man7.org/linux/man-pages/man7/ipv6.7.html、https://man7.org/linux/man-pages/man2/bind.2.html
第一个字段都是sa_family,所以可以作为联合体。
bind的时候是调用sockaddr,初始化设置addr参数的时候是使用对应的sockaddr_in或sockaddr_in6。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
struct sockaddr {
sa_family_t sa_family;
char sa_data[14];
}

struct sockaddr_in {
sa_family_t sin_family; /* address family: AF_INET */
in_port_t sin_port; /* port in network byte order */
struct in_addr sin_addr; /* internet address */
};

/* Internet address */
struct in_addr {
uint32_t s_addr; /* address in network byte order */
};

struct sockaddr_in6 {
sa_family_t sin6_family; /* AF_INET6 */
in_port_t sin6_port; /* port number */
uint32_t sin6_flowinfo; /* IPv6 flow information */
struct in6_addr sin6_addr; /* IPv6 address */
uint32_t sin6_scope_id; /* Scope ID (new in 2.4) */
};

struct in6_addr {
unsigned char s6_addr[16]; /* IPv6 address */
};

// 服务端监听
structsockaddr_inserver_addr; /*服务器端IP地址*/
structsockaddr_inclient_addr; /*客户端IP地址*/
sockfd=socket(AF_INET,SOCK_STREAM,0);
bzero(&server_addr, sizeof(structsockaddr_in));
server_addr.sin_family=AF_INET;
server_addr.sin_addr.s_addr=htonl(INADDR_ANY);
server_addr.sin_port=htons(80);
// ipv6服务端监听
structsockaddr_in6server_addr; /*服务器端IP地址*/
structsockaddr_in6client_addr; /*客户端IP地址*/
server_socket=socket(PF_INET6,SOCK_STREAM,0));
bzero(&server_addr,sizeof(structsockaddr_in6));
server_addr.sin6_family=PF_INET6;
server_addr.sin6_addr=in6addr_any;
server_addr.sin6_port=htons(8080);

inet_addr、inet_ntoa、inet_aton、inet_ntop和inet_pton区分

  1. 把ip地址转化为用于网络传输的二进制数值
    int inet_aton(const char cp, struct in_addr inp);
    inet_aton()函数用于将点分十进制IP地址转换成网络字节序IP地址,并存储在struct in_addr结构中,即第二个参数inp,函数返回非0表示cp主机有地有效,返回0表示主机地址无效。
    in_addr_t inet_addr(const char
    cp);
    inet_addr函数转换网络主机地址(如192.168.1.10)为网络字节序二进制值,如果参数char *cp无效,函数返回-1(INADDR_NONE),这个函数在处理地址为255.255.255.255时也返回-1,255.255.255.255是一个有效的地址,不过inet_addr无法处理;

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    int ret = inet_aton(ip, &addr);  //返回网络字节序
    if(0 == ret){
    printf("inet_aton return -1/n");
    } else {
    printf("inet_aton ip: %ld/n", addr.s_addr);
    }

    struct sockaddr_in addr;
    memset(&addr, 0, sizeof(addr));
    addr.sin_family = AF_INET;
    addr.sin_addr.s_addr = inet_addr("127.0.0.1");
  2. 将网络传输的二进制数值转化为成点分十进制的ip地址
    char *inet_ntoa(struct in_addr in);
    inet_ntoa 函数转换网络字节排序的地址为标准的ASCII以点分开的地址,该函数返回指向点分开的字符串地址(如192.168.1.10)的指针,该字符串的空间为静态分配的,这意味着在第二次调用该函数时,上一次调用将会被重写(覆盖),所以如果需要保存该串最后复制出来自己管理!

    1
    2
    3
    printf("%s : %s\n", inet_ntoa(addr1), inet_ntoa(addr2)); //注意这一句的运行结果
    printf("%s\n", inet_ntoa(addr1));
    printf("%s\n", inet_ntoa(addr2));
  3. 新型网路地址转化函数inet_pton和inet_ntop
    这两个函数是随IPv6出现的函数,对于IPv4地址和IPv6地址都适用,函数中p和n分别代表表达(presentation)和数值(numeric)。地址的表达格式通常是ASCII字符串,数值格式则是存放到套接字地址结构的二进制值。

    1
    2
    3
    4
    5
    6
    7
    8
    #include <arpe/inet.h>
    int inet_pton(int family, const char *strptr, void *addrptr);
    //将点分十进制的ip地址转化为用于网络传输的数值格式
    //返回值:若成功则为1,若输入不是有效的表达式则为0,若出错则为-1

    const char * inet_ntop(int family, const void *addrptr, char *strptr, size_t len);
    //将数值格式转化为点分十进制的ip地址格式
    //返回值:若成功则为指向结构的指针,若出错则为NULL
nephen wechat
欢迎您扫一扫上面的微信公众号,订阅我的博客!
坚持原创技术分享,您的支持将鼓励我继续创作!