Connection Hash Load Balancer for P4 switch
In this lab, I will use source ip, source port, destination ip, destination port, and protocol (tcp or udp) as a hash key to get a HTTP server.
[Topology]
P4-topo.yml
defaults: switch: bmv2: ../../bmv2 p4c: ../../p4c-bmv2 p4src : load_balance.p4 dump: true port: 22222 verbose: 'debug' host: - ip: 10.0.1.1/24 mac: 00:00:00:00:01:01 name: h1 command: - arp -s 10.0.1.254 00:00:00:01:01:01 - ip route add default via 10.0.1.254 - ethtool -K h1-eth0 tx off rx off - ip: 10.0.2.2/24 mac: 00:00:00:00:02:02 name: h2 command: - arp -s 10.0.2.254 00:00:00:02:02:02 - ip route add default via 10.0.2.254 - ethtool -K h2-eth0 tx off rx off - ip: 10.0.3.3/24 mac: 00:00:00:00:03:03 name: h3 command: - arp -s 10.0.3.254 00:00:00:03:03:03 - ip route add default via 10.0.3.254 - ethtool -K h3-eth0 tx off rx off switch: - name: s1 commands: s1-commands.txt link: - source: h1 destination: s1 - source: s1 destination: h2 - source: s1 destination: h3 |
s1-commands.txt
table_set_default forward nop table_set_default ecmp_group nop table_set_default ecmp_nhop nop table_set_default send_frame nop table_add forward set_nhop 10.0.1.1/32 => 00:00:00:00:01:01 1 table_add forward set_nhop 10.0.2.2/32 => 00:00:00:00:02:02 2 table_add forward set_nhop 10.0.3.3/32 => 00:00:00:00:03:03 3 table_add ecmp_group set_ecmp_select 10.0.0.1/32 => 0 2 table_add ecmp_nhop set_ecmp_nhop 1 => 00:00:00:00:02:02 10.0.2.2 2 table_add ecmp_nhop set_ecmp_nhop 2 => 00:00:00:00:03:03 10.0.3.3 3 table_add send_frame rewrite_sip 1 => 10.0.0.1 |
Load_balance.p4
#define ETHERTYPE_IPV4 0x0800 #define ETHERTYPE_ARP 0x0806 #define IPPROTO_ICMP 0x01 #define IP_PROTOCOLS_TCP 6 #define IP_PROTOCOLS_UDP 17 #define ARP_HTYPE_ETHERNET 0x0001 #define ARP_PTYPE_IPV4 0x0800 #define ARP_HLEN_ETHERNET 6 #define ARP_PLEN_IPV4 4 #define ARP_OPER_REQUEST 1 #define ARP_OPER_REPLY 2 #define ICMP_ECHO_REQUEST 8 #define ICMP_ECHO_REPLY 0 header_type ethernet_t { fields { dstAddr : 48; srcAddr : 48; etherType : 16; } } header_type ipv4_t { fields { version : 4; ihl : 4; diffserv : 8; totalLen : 16; identification : 16; flags : 3; fragOffset : 13; ttl : 8; protocol : 8; hdrChecksum : 16; srcAddr : 32; dstAddr: 32; } } header_type arp_t { fields { htype : 16; ptype : 16; hlen : 8; plen : 8; opcode : 16; hwSrcAddr : 48; protoSrcAddr : 32; hwDstAddr : 48; protoDstAddr : 32; } } header_type tcp_t { fields { srcPort : 16; dstPort : 16; seqNo : 32; ackNo : 32; dataOffset : 4; res : 4; flags : 8; window : 16; checksum : 16; urgentPtr : 16; } } header_type udp_t { fields { srcPort : 16; dstPort : 16; length_ : 16; checksum : 16; } } header_type mymetadata_t { fields { ecmp_select : 14; } } metadata mymetadata_t mymetadata; header ethernet_t ethernet; parser start { set_metadata(meta.if_index, standard_metadata.ingress_port); return parse_ethernet; } parser parse_ethernet { extract(ethernet); return select(latest.etherType){ ETHERTYPE_IPV4 : parse_ipv4; ETHERTYPE_ARP : parse_arp; default : ingress; } } header ipv4_t ipv4; parser parse_ipv4 { extract(ipv4); set_metadata(meta.ipv4_sa, ipv4.srcAddr); set_metadata(meta.ipv4_da, ipv4.dstAddr); set_metadata(meta.tcpLength, ipv4.totalLen - 20); return select(latest.protocol) { IP_PROTOCOLS_TCP : parse_tcp; IP_PROTOCOLS_UDP : parse_udp; default: ingress; } } header arp_t arp; parser parse_arp{ extract(arp); return ingress; } header tcp_t tcp; parser parse_tcp { extract(tcp); set_metadata(meta.tcp_sp, tcp.srcPort); set_metadata(meta.tcp_dp, tcp.dstPort); return ingress; } header udp_t udp; parser parse_udp { extract(udp); return ingress; } header_type meta_t { fields { do_forward : 1; ipv4_sa : 32; ipv4_da : 32; tcp_sp : 16; tcp_dp : 16; nhop_ipv4 : 32; if_ipv4_addr : 32; if_mac_addr : 48; is_ext_if : 1; tcpLength : 16; if_index : 8; } } metadata meta_t meta; field_list ipv4_checksum_list{ ipv4.version; ipv4.ihl; ipv4.diffserv; ipv4.totalLen; ipv4.identification; ipv4.flags; ipv4.fragOffset; ipv4.ttl; ipv4.protocol; ipv4.srcAddr; ipv4.dstAddr; } field_list_calculation ipv4_checksum{ input { ipv4_checksum_list; } algorithm : csum16; output_width : 16; } calculated_field ipv4.hdrChecksum { verify ipv4_checksum; update ipv4_checksum; } field_list tcp_checksum_list { ipv4.srcAddr; ipv4.dstAddr; 8'0; ipv4.protocol; meta.tcpLength; tcp.srcPort; tcp.dstPort; tcp.seqNo; tcp.ackNo; tcp.dataOffset; tcp.res; tcp.flags; tcp.window; tcp.urgentPtr; payload; } field_list_calculation tcp_checksum { input { tcp_checksum_list; } algorithm : csum16; output_width : 16; } calculated_field tcp.checksum { verify tcp_checksum; update tcp_checksum; } field_list my_hash_fields { ipv4.srcAddr; ipv4.dstAddr; ipv4.protocol; tcp.srcPort; tcp.dstPort; } field_list_calculation my_map_hash { input { my_hash_fields; } algorithm : crc16; output_width : 14; } action _drop() { drop(); } action nop() {} action set_ecmp_nhop( nhop_mac, nhop_ipv4, port) { modify_field(standard_metadata.egress_spec, port); modify_field(ipv4.dstAddr, nhop_ipv4); modify_field(ethernet.dstAddr, nhop_mac); add_to_field(ipv4.ttl, -1); } action set_ecmp_select(ecmp_base, ecmp_count) { modify_field_with_hash_based_offset(mymetadata.ecmp_select, ecmp_base, my_map_hash, ecmp_count); add_to_field(mymetadata.ecmp_select, 1); } table ecmp_group { reads { ipv4.dstAddr: lpm; } actions { _drop; set_ecmp_select; nop; } size: 1024; } table ecmp_nhop { reads { mymetadata.ecmp_select: exact; } actions { _drop; set_ecmp_nhop; nop; } size: 2; } action set_nhop(dmac, port) { modify_field(standard_metadata.egress_spec, port); modify_field(ethernet.dstAddr, dmac); add_to_field(ipv4.ttl, -1); } table forward { reads { ipv4.dstAddr: lpm; } actions { _drop; set_nhop; nop; } size: 1024; } action rewrite_sip(sip) { modify_field(ipv4.srcAddr, sip); } table send_frame { reads { standard_metadata.egress_port: exact; } actions { _drop; rewrite_sip; nop; } size: 256; } control ingress { apply(forward); apply(ecmp_group); apply(ecmp_nhop); } control egress { apply(send_frame); } |
[Execution]
Use xterm to open terminals for h1, h2, and h3
At h2 and h3, start the http server
At h1, use curl to get the webpage. (Each time, the source port number will be different. So the chosen server may be different)
(updated: 2020/5/21) Add Health Check
[Topology]
The controller will check the health status of web servers. If the web server is not working, the controller will inform the LB not to dispatch the http request to the malfunctioned server.
[ConnectionHash.p4]
#include
<core.p4> #include
<v1model.p4> struct
meta_t { bit<1> do_forward; bit<32> ipv4_sa; bit<32> ipv4_da; bit<16> tcp_sp; bit<16> tcp_dp; bit<32> nhop_ipv4; bit<32>
if_ipv4_addr; bit<48> if_mac_addr; bit<1> is_ext_if; bit<16> tcpLength; bit<8> if_index; } struct
mymetadata_t { bit<13> flowlet_map_index; bit<3> ecmp_select; bit<1> server1; bit<1> server2; bit<1> server3; bit<1> server4; } header
arp_t { bit<16> htype; bit<16> ptype; bit<8> hlen; bit<8> plen; bit<16> opcode; bit<48> hwSrcAddr; bit<32> protoSrcAddr; bit<48> hwDstAddr; bit<32> protoDstAddr; } header
ethernet_t { bit<48> dstAddr; bit<48> srcAddr; bit<16> etherType; } header
ipv4_t { bit<4> version; bit<4> ihl; bit<8> diffserv; bit<16> totalLen; bit<16>
identification; bit<3> flags; bit<13> fragOffset; bit<8> ttl; bit<8> protocol; bit<16> hdrChecksum; bit<32> srcAddr; bit<32> dstAddr; } header
tcp_t { bit<16> srcPort; bit<16> dstPort; bit<32> seqNo; bit<32> ackNo; bit<4> dataOffset; bit<4> res; bit<8> flags; bit<16> window; bit<16> checksum; bit<16> urgentPtr; } header
udp_t { bit<16> srcPort; bit<16> dstPort; bit<16> length_; bit<16> checksum; } struct
metadata { @name(".meta") meta_t meta; @name(".mymetadata") mymetadata_t
mymetadata; } struct
headers { @name(".arp") arp_t arp;
@name(".ethernet") ethernet_t
ethernet; @name(".ipv4") ipv4_t ipv4; @name(".tcp") tcp_t tcp; @name(".udp") udp_t udp; } parser
ParserImpl(packet_in
packet, out headers hdr, inout
metadata meta, inout standard_metadata_t
standard_metadata) { @name(".parse_arp") state parse_arp
{ packet.extract(hdr.arp);
transition accept; } @name(".parse_ethernet") state parse_ethernet
{ packet.extract(hdr.ethernet);
transition select(hdr.ethernet.etherType) {
16w0x800: parse_ipv4;
16w0x806: parse_arp;
default: accept; } } @name(".parse_ipv4")
state parse_ipv4 { packet.extract(hdr.ipv4);
meta.meta.ipv4_sa = hdr.ipv4.srcAddr;
meta.meta.ipv4_da = hdr.ipv4.dstAddr; meta.meta.tcpLength = hdr.ipv4.totalLen - 16w20;
transition select(hdr.ipv4.protocol) {
8w6: parse_tcp;
8w17: parse_udp;
default: accept; } } @name(".parse_tcp") state parse_tcp
{ packet.extract(hdr.tcp); meta.meta.tcp_sp = hdr.tcp.srcPort; meta.meta.tcp_dp = hdr.tcp.dstPort;
transition accept; } @name(".parse_udp") state parse_udp
{ packet.extract(hdr.udp);
transition accept; } @name(".start")
state start { meta.mymetadata.server1=0; meta.mymetadata.server2=0; meta.meta.if_index = (bit<8>)standard_metadata.ingress_port;
transition parse_ethernet; } } control
egress(inout headers hdr,
inout metadata meta, inout
standard_metadata_t standard_metadata)
{ @name("._drop")
action _drop() { mark_to_drop(standard_metadata); } @name(".rewrite_sip") action rewrite_sip(bit<32>
sip) {
hdr.ipv4.srcAddr = sip; } @name(".nop") action nop() { } @name(".send_frame") table send_frame
{
actions = {
_drop;
rewrite_sip;
nop; }
key = {
standard_metadata.egress_port: exact; }
size = 256; } apply { send_frame.apply(); } } register<bit<3>>(32w8192)
flowlet_select; control
ingress(inout headers hdr,
inout metadata meta, inout
standard_metadata_t standard_metadata)
{ @name("._drop")
action _drop() { mark_to_drop(standard_metadata); } action _fail1(bit<1>
fail){ meta.mymetadata.server1=fail; } action _fail2(bit<1>
fail){ meta.mymetadata.server2=fail; } action _fail3(bit<1>
fail){ meta.mymetadata.server3=fail; } action _fail4(bit<1>
fail){ meta.mymetadata.server4=fail; } @name(".set_ecmp_select") action set_ecmp_select(bit<8>
ecmp_base, bit<8> ecmp_count)
{
hash(meta.mymetadata.ecmp_select,
HashAlgorithm.crc16, (bit<13>)ecmp_base, {
hdr.ipv4.srcAddr, hdr.ipv4.dstAddr, hdr.ipv4.protocol, hdr.tcp.srcPort,
hdr.tcp.dstPort }, (bit<26>)ecmp_count); meta.mymetadata.ecmp_select = meta.mymetadata.ecmp_select
+ 1; hash(meta.mymetadata.flowlet_map_index, HashAlgorithm.crc16,
(bit<13>)0, { hdr.ipv4.srcAddr, hdr.ipv4.dstAddr, hdr.ipv4.protocol, hdr.tcp.srcPort, hdr.tcp.dstPort
}, (bit<26>)8192); flowlet_select.write((bit<32>)meta.mymetadata.flowlet_map_index,
meta.mymetadata.ecmp_select); } action read_flowlet_select()
{
hash(meta.mymetadata.flowlet_map_index,
HashAlgorithm.crc16, (bit<13>)0, { hdr.ipv4.srcAddr, hdr.ipv4.dstAddr,
hdr.ipv4.protocol, hdr.tcp.srcPort, hdr.tcp.dstPort }, (bit<26>)8192); flowlet_select.read(meta.mymetadata.ecmp_select,
(bit<32>)meta.mymetadata.flowlet_map_index); } @name(".nop") action nop() { } @name(".set_ecmp_nhop") action set_ecmp_nhop(bit<48>
nhop_mac, bit<32> nhop_ipv4, bit<9>
port) { standard_metadata.egress_spec = port;
hdr.ipv4.dstAddr = nhop_ipv4; hdr.ethernet.dstAddr = nhop_mac;
hdr.ipv4.ttl = hdr.ipv4.ttl - 8w1; } @name(".set_nhop") action set_nhop(bit<48>
dmac, bit<9> port) { standard_metadata.egress_spec = port; hdr.ethernet.dstAddr = dmac;
hdr.ipv4.ttl = hdr.ipv4.ttl - 8w1; } @name(".ecmp_group") table ecmp_group
{
actions = {
_drop;
set_ecmp_select;
nop; }
key = {
hdr.ipv4.dstAddr: lpm; }
size = 1024; } @name(".ecmp_nhop") table ecmp_nhop
{
actions = {
_drop;
set_ecmp_nhop; nop; }
key = {
meta.mymetadata.ecmp_select: exact; }
size = 1024; }
@name(".forward") table forward {
actions = {
_drop;
set_nhop;
nop;
read_flowlet_select; }
key = {
hdr.ipv4.dstAddr: lpm; }
size = 1024; } table set_status1 { actions
= {
_fail1; }
key = {
hdr.ipv4.dstAddr: lpm; }
size = 1; } table set_status2 { actions
= {
_fail2; }
key = {
hdr.ipv4.dstAddr: lpm; }
size = 1; } table set_status3 { actions
= {
_fail3; }
key = {
hdr.ipv4.dstAddr: lpm; }
size = 1; } table set_status4 { actions
= {
_fail4; }
key = {
hdr.ipv4.dstAddr: lpm; }
size = 1; } apply { forward.apply(); if
(hdr.tcp.flags & 8w2 != 8w0) {
ecmp_group.apply(); }
if( set_status1.apply().hit && hdr.tcp.flags
& 8w2 != 8w0 ) {
// server1 fails and the lb chooses server1 if(meta.mymetadata.server1 == 1
&& meta.mymetadata.ecmp_select==1){ hash(meta.mymetadata.flowlet_map_index,
HashAlgorithm.crc16, (bit<13>)0, { hdr.ipv4.srcAddr, hdr.ipv4.dstAddr,
hdr.ipv4.protocol, hdr.tcp.srcPort, hdr.tcp.dstPort }, (bit<26>)8192);
//see server2 whether it fails or not. if not, choose server 2
if(meta.mymetadata.server2 != 1){
flowlet_select.write((bit<32>)meta.mymetadata.flowlet_map_index, 2);
meta.mymetadata.ecmp_select=2; //see server3 whether it
fails or not. if not, choose server 3
} else if(meta.mymetadata.server3 != 1){ flowlet_select.write((bit<32>)meta.mymetadata.flowlet_map_index,
3);
meta.mymetadata.ecmp_select=3;
//see server4 whether it fails or not. if not, choose server 4
} else if(meta.mymetadata.server4 != 1){ flowlet_select.write((bit<32>)meta.mymetadata.flowlet_map_index,
4);
meta.mymetadata.ecmp_select=4;
//all servers fail
}else { flowlet_select.write((bit<32>)meta.mymetadata.flowlet_map_index,
0); meta.mymetadata.ecmp_select=0;
_drop(); } } }
if( set_status2.apply().hit && hdr.tcp.flags
& 8w2 != 8w0 ) { // server2 fails and the lb chooses server2 if(meta.mymetadata.server2 == 1
&& meta.mymetadata.ecmp_select==2){ hash(meta.mymetadata.flowlet_map_index,
HashAlgorithm.crc16, (bit<13>)0, { hdr.ipv4.srcAddr, hdr.ipv4.dstAddr,
hdr.ipv4.protocol, hdr.tcp.srcPort, hdr.tcp.dstPort }, (bit<26>)8192); //see server3 whether it
fails or not. if not, choose server 3
if(meta.mymetadata.server3 != 1){
flowlet_select.write((bit<32>)meta.mymetadata.flowlet_map_index, 3); meta.mymetadata.ecmp_select=3;
//see server4 whether it fails or not. if not, choose server 4
} else if(meta.mymetadata.server4 != 1){ flowlet_select.write((bit<32>)meta.mymetadata.flowlet_map_index,
4);
meta.mymetadata.ecmp_select=4; //see
server1 whether it fails or not. if not, choose server 1
} else if(meta.mymetadata.server4 != 1){ flowlet_select.write((bit<32>)meta.mymetadata.flowlet_map_index,
1);
meta.mymetadata.ecmp_select=1; //all
servers fail } else {
flowlet_select.write((bit<32>)meta.mymetadata.flowlet_map_index, 0); meta.mymetadata.ecmp_select=0;
_drop(); } } }
if( set_status3.apply().hit && hdr.tcp.flags
& 8w2 != 8w0 ) { // server3 fails and the lb chooses server3 if(meta.mymetadata.server3 == 1
&& meta.mymetadata.ecmp_select==3){ hash(meta.mymetadata.flowlet_map_index,
HashAlgorithm.crc16, (bit<13>)0, { hdr.ipv4.srcAddr, hdr.ipv4.dstAddr,
hdr.ipv4.protocol, hdr.tcp.srcPort, hdr.tcp.dstPort }, (bit<26>)8192); //see server4 whether it
fails or not. if not, choose server 4
if(meta.mymetadata.server4 != 1){
flowlet_select.write((bit<32>)meta.mymetadata.flowlet_map_index, 4); meta.mymetadata.ecmp_select=4;
//see server1 whether it fails or not. if not, choose server 1
} else if(meta.mymetadata.server1 != 1){ flowlet_select.write((bit<32>)meta.mymetadata.flowlet_map_index,
1);
meta.mymetadata.ecmp_select=1;
//see server2 whether it fails or not. if not, choose server 2
} else if(meta.mymetadata.server2 != 1){ flowlet_select.write((bit<32>)meta.mymetadata.flowlet_map_index,
2);
meta.mymetadata.ecmp_select=2;
//all servers fail } else { flowlet_select.write((bit<32>)meta.mymetadata.flowlet_map_index,
0); meta.mymetadata.ecmp_select=0;
_drop(); } } }
if( set_status4.apply().hit && hdr.tcp.flags
& 8w2 != 8w0 ) { // server4 fails and the lb chooses server4 if(meta.mymetadata.server4 == 1
&& meta.mymetadata.ecmp_select==4){ hash(meta.mymetadata.flowlet_map_index,
HashAlgorithm.crc16, (bit<13>)0, { hdr.ipv4.srcAddr, hdr.ipv4.dstAddr,
hdr.ipv4.protocol, hdr.tcp.srcPort, hdr.tcp.dstPort }, (bit<26>)8192); //see server1 whether it
fails or not. if not, choose server 1
if(meta.mymetadata.server1 != 1){
flowlet_select.write((bit<32>)meta.mymetadata.flowlet_map_index, 1); meta.mymetadata.ecmp_select=1;
//see server2 whether it fails or not. if not, choose server 2
} else if(meta.mymetadata.server2 != 1){ flowlet_select.write((bit<32>)meta.mymetadata.flowlet_map_index,
2);
meta.mymetadata.ecmp_select=2; //see
server3 whether it fails or not. if not, choose server 3
} else if(meta.mymetadata.server4 != 1){ flowlet_select.write((bit<32>)meta.mymetadata.flowlet_map_index,
3);
meta.mymetadata.ecmp_select=3; //all
servers fail } else {
flowlet_select.write((bit<32>)meta.mymetadata.flowlet_map_index, 0); meta.mymetadata.ecmp_select=0;
_drop(); } } }
if(hdr.ipv4.isValid()){
ecmp_nhop.apply(); } } } control
DeparserImpl(packet_out
packet, in headers hdr) { apply { packet.emit(hdr.ethernet); packet.emit(hdr.arp); packet.emit(hdr.ipv4); packet.emit(hdr.udp); packet.emit(hdr.tcp); } } control
verifyChecksum(inout
headers hdr, inout
metadata meta) { apply { verify_checksum(true, { hdr.ipv4.version, hdr.ipv4.ihl,
hdr.ipv4.diffserv, hdr.ipv4.totalLen, hdr.ipv4.identification,
hdr.ipv4.flags, hdr.ipv4.fragOffset, hdr.ipv4.ttl, hdr.ipv4.protocol,
hdr.ipv4.srcAddr, hdr.ipv4.dstAddr }, hdr.ipv4.hdrChecksum,
HashAlgorithm.csum16); verify_checksum_with_payload(true, { hdr.ipv4.srcAddr,
hdr.ipv4.dstAddr, 8w0, hdr.ipv4.protocol, meta.meta.tcpLength,
hdr.tcp.srcPort, hdr.tcp.dstPort,
hdr.tcp.seqNo, hdr.tcp.ackNo,
hdr.tcp.dataOffset, hdr.tcp.res, hdr.tcp.flags, hdr.tcp.window, hdr.tcp.urgentPtr }, hdr.tcp.checksum,
HashAlgorithm.csum16); } } control
computeChecksum(inout
headers hdr, inout
metadata meta) { apply { update_checksum(true, { hdr.ipv4.version, hdr.ipv4.ihl,
hdr.ipv4.diffserv, hdr.ipv4.totalLen, hdr.ipv4.identification,
hdr.ipv4.flags, hdr.ipv4.fragOffset, hdr.ipv4.ttl, hdr.ipv4.protocol,
hdr.ipv4.srcAddr, hdr.ipv4.dstAddr }, hdr.ipv4.hdrChecksum,
HashAlgorithm.csum16); update_checksum_with_payload(true, { hdr.ipv4.srcAddr,
hdr.ipv4.dstAddr, 8w0, hdr.ipv4.protocol, meta.meta.tcpLength,
hdr.tcp.srcPort, hdr.tcp.dstPort,
hdr.tcp.seqNo, hdr.tcp.ackNo,
hdr.tcp.dataOffset, hdr.tcp.res, hdr.tcp.flags, hdr.tcp.window, hdr.tcp.urgentPtr }, hdr.tcp.checksum,
HashAlgorithm.csum16); } } V1Switch(ParserImpl(), verifyChecksum(),
ingress(), egress(), computeChecksum(), DeparserImpl()) main; |
cmd.txt
table_set_default forward nop table_set_default ecmp_group nop table_set_default ecmp_nhop nop table_set_default send_frame nop table_add set_status1 _fail1 10.0.0.1/32 => 0 table_add set_status2 _fail2 10.0.0.1/32 => 0 table_add set_status3 _fail3 10.0.0.1/32 => 0 table_add set_status4 _fail4 10.0.0.1/32 => 0 table_add forward set_nhop 10.0.1.1/32 =>
00:00:00:00:01:01 1 table_add forward set_nhop 10.0.2.2/32 =>
00:00:00:00:02:02 2 table_add forward set_nhop 10.0.3.3/32 =>
00:00:00:00:03:03 3 table_add forward set_nhop 10.0.4.4/32 =>
00:00:00:00:04:04 4 table_add forward set_nhop 10.0.5.5/32 =>
00:00:00:00:05:05 5 table_add forward read_flowlet_select 10.0.0.1/32
=> table_add ecmp_group set_ecmp_select
10.0.0.1/32 => 0 4 table_add ecmp_nhop set_ecmp_nhop
1 => 00:00:00:00:02:02 10.0.2.2 2 table_add ecmp_nhop set_ecmp_nhop
2 => 00:00:00:00:03:03 10.0.3.3 3 table_add ecmp_nhop set_ecmp_nhop
3 => 00:00:00:00:04:04 10.0.4.4 4 table_add ecmp_nhop set_ecmp_nhop
4 => 00:00:00:00:05:05 10.0.5.5 5 table_add send_frame rewrite_sip
1 => 10.0.0.1 |
[test_topo.py]
import
os from
mininet.net import Containernet from
mininet.topo import Topo from
mininet.log import setLogLevel, info from
mininet.cli import CLI from
mininet.link import TCLink from
mininet.node import RemoteController from
mininet.node import Docker from
p4_mininet import P4Switch, P4Host import
argparse from
time import sleep parser
= argparse.ArgumentParser(description='Mininet
demo') parser.add_argument('--behavioral-exe', help='Path to behavioral executable',
type=str, action="store", required=False, default='simple_switch' ) parser.add_argument('--thrift-port', help='Thrift server port for table updates', type=int,
action="store", default=9090) parser.add_argument('--num-hosts', help='Number of hosts to connect to switch',
type=int, action="store", default=2) parser.add_argument('--mode', choices=['l2', 'l3'], type=str, default='l3') parser.add_argument('--json', help='Path to JSON config file',
type=str, action="store", required=True) parser.add_argument('--pcap-dump', help='Dump packets on
interfaces to pcap files',
type=str, action="store", required=False, default=False) args
= parser.parse_args() def
main(): net = Containernet(host
= P4Host, link=TCLink, controller = None) switch1 = net.addSwitch('s1', sw_path = args.behavioral_exe, json_path
= args.json, thrift_port
= args.thrift_port, cls =
P4Switch, pcap_dump = args.pcap_dump)
host1 = net.addHost('h1', mac = '00:00:00:00:01:01', ip="10.0.1.1/24") host2 = net.addDocker('h2', mac = '00:00:00:00:02:02', ip="10.0.2.2/24", dimage="apache-php-mysql:v7",cpu_period=50000,
cpu_quota=1000) host3 = net.addDocker('h3', mac = '00:00:00:00:03:03', ip="10.0.3.3/24", dimage="apache-php-mysql:v7",cpu_period=50000,
cpu_quota=1000) host4 = net.addDocker('h4', mac = '00:00:00:00:04:04', ip="10.0.4.4/24", dimage="apache-php-mysql:v7",cpu_period=50000,
cpu_quota=1000) host5 = net.addDocker('h5', mac = '00:00:00:00:05:05', ip="10.0.5.5/24", dimage="apache-php-mysql:v7",cpu_period=50000,
cpu_quota=1000) net.addLink(host1,
switch1, port1 = 0, port2 = 1, cls=TCLink, bw=10) net.addLink(host2,
switch1, port1 = 0, port2 = 2, cls=TCLink, bw=10) net.addLink(host3,
switch1, port1 = 0, port2 = 3, cls=TCLink, bw=10) net.addLink(host4,
switch1, port1 = 0, port2 = 4, cls=TCLink, bw=10) net.addLink(host5,
switch1, port1 = 0, port2 = 5, cls=TCLink, bw=10) net.start() h1,h2,h3,h4,h5=net.get('h1','h2','h3','h4','h5') h1.cmd("arp -s 10.0.1.254 00:00:00:01:01:01") h1.cmd("ip route add default via 10.0.1.254") h2.cmd("arp -s 10.0.2.254 00:00:00:02:02:02") h2.cmd("ip route del default") h2.cmd("ip route add default via 10.0.2.254") h2.cmd("cd
/var/www/html; echo h2 > a.htm ; python -m SimpleHTTPServer
80 &") h3.cmd("arp -s 10.0.3.254 00:00:00:03:03:03") h3.cmd("ip route del default") h3.cmd("ip route add default via 10.0.3.254") h3.cmd("cd
/var/www/html; echo h3 > a.htm ; python -m SimpleHTTPServer
80 &") h4.cmd("arp -s 10.0.4.254 00:00:00:04:04:04") h4.cmd("ip route del default") h4.cmd("ip route add default via 10.0.4.254") h4.cmd("cd /var/www/html;
echo h4 > a.htm; python -m SimpleHTTPServer 80
&") h5.cmd("arp -s 10.0.5.254 00:00:00:05:05:05") h5.cmd("ip route del default") h5.cmd("ip route add default via 10.0.5.254") h5.cmd("ethtool -K h5-eth0 tx off rx off") #Assume
h5 is down #h5.cmd("cd
/var/www/html; echo h5 > a.htm; python -m SimpleHTTPServer
80 &") sleep(1) os.system('sudo
/home/vagrant/behavioral-model/targets/simple_switch/simple_switch_CLI
--thrift-port=9090 < cmd.txt') #disable
health check, enable health check: remove # #os.system("sudo
/home/p4/mytest/p4-ConnectionHash/check_server.sh &>/dev/null
&") print('\033[0;32m'), print "Gotcha!" print('\033[0m') CLI(net) try: net.stop() except:
print('\033[0;31m'),
print('Stop error! Trying sudo mn -c')
print('\033[0m') os.system('sudo mn -c') os.system("kill `cat check_server.pid`")
print('\033[0;32m'),
print ('Stop successfully!')
print('\033[0m') if
__name__ == '__main__': setLogLevel('info') main() |
[controller: check_server.sh]
#!/bin/bash CLI_PATH=/home/vagrant/behavioral-model/targets/simple_switch/simple_switch_CLI echo
$$ > check_server.pid while
true do >fail.txt >ok.txt for ip in
`cat ip.txt` do { #ping -c1 -W1 $ip &>/dev/null mycode=`curl
-m 1 -s -w %{http_code} http://$ip -o /dev/null` if [ "$mycode" -ne 200 ]; then
echo $ip >> fail.txt else
echo $ip >> ok.txt fi }& done wait if [ -s fail.txt ];then for ip
in `cat fail.txt` do #echo $ip if [ "$ip" = "172.17.0.2" ];then
#echo "server1 fails" echo
"table_modify set_status1 _fail1 0 1" |
$CLI_PATH --thrift-port 9090 &>/dev/null elif
[ "$ip" = "172.17.0.3" ];then #echo
"server2 fails" echo
"table_modify set_status2 _fail2 0 1" |
$CLI_PATH --thrift-port 9090 &>/dev/null elif
[ "$ip" = "172.17.0.4" ];then
#echo "server3 fails" echo
"table_modify set_status3 _fail3 0 1" |
$CLI_PATH --thrift-port 9090 &>/dev/null elif
[ "$ip" = "172.17.0.5" ];then
#echo "server4 fails" echo
"table_modify set_status4 _fail4 0 1" |
$CLI_PATH --thrift-port 9090 &>/dev/null fi done fi if [ -s ok.txt ];then for ip
in `cat ok.txt` do #echo $ip if [ "$ip" = "172.17.0.2" ];then
#echo "server1 ok" echo
"table_modify set_status1 _fail1 0 0" |
$CLI_PATH --thrift-port 9090 &>/dev/null elif
[ "$ip" = "172.17.0.3" ];then #echo
"server2 ok" echo
"table_modify set_status2 _fail2 0 0" |
$CLI_PATH --thrift-port 9090 &>/dev/null elif
[ "$ip" = "172.17.0.4" ];then
#echo "server3 ok" echo
"table_modify set_status3 _fail3 0 0" |
$CLI_PATH --thrift-port 9090 &>/dev/null elif
[ "$ip" = "172.17.0.5" ];then
#echo "server4 ok" echo
"table_modify set_status4 _fail4 0 0" |
$CLI_PATH --thrift-port 9090 &>/dev/null fi done fi sleep 1 done |
[mycurl.sh]
#!/bin/bash thread=5 tmp_fifofile=/tmp/$$.fifo mkfifo $tmp_fifofile exec
8<> $tmp_fifofile rm $tmp_fifofile ok=/tmp/ok.txt fail=/tmp/fail.txt >$ok >$fail for i in `seq $thread` do echo
>&8 done for i in {1..1000} do read
-u 8 { curl
http://10.0.0.1/a.htm &>/dev/null if
[ $? -eq 0 ]; then echo
"curl ok" >> $ok else echo
"curl fail" >> $fail fi echo
>&8 }& done wait exec
8>&- echo
"all finish..." yes=`wc -l $ok` no=`wc -l $fail` echo
"ok:" $yes echo
"fail:" $no |
Execution:
No controller case
We use curl to send out 1000 times http request. Only 688 is ok. 312 fail. Because the LB does not know that h5 is down.
With controller case:
#disable health check,
enable health check: remove #
os.system("sudo
/home/p4/mytest/p4-ConnectionHash/check_server.sh &>/dev/null
&")
re-run the program again.
1000 request is ok. Because the controller tells the LB does not dispatch the request to h5.
Dr. Chih-Heng Ke (smallko@gmail.com)
Department of Computer Science and Information
Engineering,
National Quemoy University, Kinmen, Taiwan.