深入理解Linux网络技术内幕——L4层协议与Raw IP的处理
我们简单了解下L4层协议和Raw IP是如何与IP层进行交互的。
L4层协议
L4层协议可以通过静态编译和模块配置两种方式加入内核。
比较重要的协议如TCP、UDP、ICMP通常是静态编译至内核。
一些不常用的或者比较特殊的协议,则是通过内核配置加入内核。如IGMP,SCTP,IPIP等等。
L4层协议的注册
L4层协议有net_protocol结构定义:
/* This is used to register protocols. */ struct net_protocol { int (*handler)(struct sk_buff *skb); //由协议注册的,用于处理入口封包的函数 //由ICMP协议处理函数所用的函数,当收到ICMP UNREACHABLE 时,通知L4层 void (*err_handler)(struct sk_buff *skb, u32 info); int (*gso_send_check)(struct sk_buff *skb); struct sk_buff *(*gso_segment)(struct sk_buff *skb, int features); struct sk_buff **(*gro_receive)(struct sk_buff **head, struct sk_buff *skb); int (*gro_complete)(struct sk_buff *skb); unsigned int no_policy:1, //使协议免于IPsec检查 netns_ok:1; };
/* * Add a protocol handler to the hash tables */ int inet_add_protocol(const struct net_protocol *prot, unsigned char protocol) { int hash, ret; hash = protocol & (MAX_INET_PROTOS - 1); spin_lock_bh(&inet_proto_lock); if (inet_protos[hash]) { ret = -1; } else { inet_protos[hash] = prot; ret = 0; } spin_unlock_bh(&inet_proto_lock); return ret; } /* * Remove a protocol from the hash tables. */ int inet_del_protocol(const struct net_protocol *prot, unsigned char protocol) { int hash, ret; hash = protocol & (MAX_INET_PROTOS - 1); spin_lock_bh(&inet_proto_lock); if (inet_protos[hash] == prot) { inet_protos[hash] = NULL; ret = 0; } else { ret = -1; } spin_unlock_bh(&inet_proto_lock); synchronize_net(); return ret; } EXPORT_SYMBOL(inet_add_protocol); EXPORT_SYMBOL(inet_del_protocol);
我们看下具体的例子:
下面是TCP、UDP、ICMP协议结构体的初始化,这要先完成:
static const struct net_protocol tcp_protocol = { .handler = tcp_v4_rcv, .err_handler = tcp_v4_err, .gso_send_check = tcp_v4_gso_send_check, .gso_segment = tcp_tso_segment, .gro_receive = tcp4_gro_receive, .gro_complete = tcp4_gro_complete, .no_policy = 1, .netns_ok = 1, }; static const struct net_protocol udp_protocol = { .handler = udp_rcv, .err_handler = udp_err, .gso_send_check = udp4_ufo_send_check, .gso_segment = udp4_ufo_fragment, .no_policy = 1, .netns_ok = 1, }; static const struct net_protocol icmp_protocol = { .handler = icmp_rcv, .no_policy = 1, .netns_ok = 1, };
协议结构体初始化结束后,在inet_init中,在把各个协议加入内核。
static int __init inet_init(void) { struct sk_buff *dummy_skb; struct inet_protosw *q; struct list_head *r; int rc = -EINVAL; BUILD_BUG_ON(sizeof(struct inet_skb_parm) > sizeof(dummy_skb->cb)); rc = proto_register(&tcp_prot, 1); if (rc) goto out; rc = proto_register(&udp_prot, 1); if (rc) goto out_unregister_tcp_proto; rc = proto_register(&raw_prot, 1); if (rc) goto out_unregister_udp_proto; /* * Tell SOCKET that we are alive... */ (void)sock_register(&inet_family_ops); #ifdef CONFIG_SYSCTL ip_static_sysctl_init(); #endif /* * Add all the base protocols. */ if (inet_add_protocol(&icmp_protocol, IPPROTO_ICMP) < 0) printk(KERN_CRIT "inet_init: Cannot add ICMP protocol\n"); if (inet_add_protocol(&udp_protocol, IPPROTO_UDP) < 0) printk(KERN_CRIT "inet_init: Cannot add UDP protocol\n"); if (inet_add_protocol(&tcp_protocol, IPPROTO_TCP) < 0) printk(KERN_CRIT "inet_init: Cannot add TCP protocol\n"); #ifdef CONFIG_IP_MULTICAST if (inet_add_protocol(&igmp_protocol, IPPROTO_IGMP) < 0) printk(KERN_CRIT "inet_init: Cannot add IGMP protocol\n"); #endif /* Register the socket-side information for inet_create. */ for (r = &inetsw[0]; r < &inetsw[SOCK_MAX]; ++r) INIT_LIST_HEAD(r); for (q = inetsw_array; q < &inetsw_array[INETSW_ARRAY_LEN]; ++q) inet_register_protosw(q); /* * Set the ARP module up */ arp_init(); /* * Set the IP module up */ ip_init(); tcp_v4_init(); /* Setup TCP slab cache for open requests. */ tcp_init(); /* Setup UDP memory threshold */ udp_init(); /* Add UDP-Lite (RFC 3828) */ udplite4_register(); /* * Set the ICMP layer up */ if (icmp_init() < 0) panic("Failed to create the ICMP control socket.\n"); /* * Initialise the multicast router */ #if defined(CONFIG_IP_MROUTE) if (ip_mr_init()) printk(KERN_CRIT "inet_init: Cannot init ipv4 mroute\n"); #endif /* * Initialise per-cpu ipv4 mibs */ if (init_ipv4_mibs()) printk(KERN_CRIT "inet_init: Cannot init ipv4 mibs\n"); ipv4_proc_init(); ipfrag_init(); dev_add_pack(&ip_packet_type); rc = 0; out: return rc; out_unregister_udp_proto: proto_unregister(&udp_prot); out_unregister_tcp_proto: proto_unregister(&tcp_prot); goto out; }
从代码中可以看出,TCP、UDP、ICMP等是直接静态编译至内核的。而IGMP只有内核配置了组播才会以模块的方式加入内核。
L3到L4的封包传递:ip_local_deliver_finish
Raw套接字和Raw IP
我们要知道,并不是所有的L4层处理,都是在内核实现的。应用程序可以通过Raw 套接字和Raw IP跳过L4层协议,直接与IP层进行交互。
郑重声明:本站内容如果来自互联网及其他传播媒体,其版权均属原媒体及文章作者所有。转载目的在于传递更多信息及用于网络分享,并不代表本站赞同其观点和对其真实性负责,也不构成任何其他建议。