Connection Hash Load Balancer for P4 switch

 

In this lab, I will use source ip, source port, destination ip, destination port, and protocol (tcp or udp) as a hash key to get a HTTP server.

 

[Topology]

P4-topo.yml

defaults:

  switch:

    bmv2: ../../bmv2

    p4c: ../../p4c-bmv2

    p4src : load_balance.p4

    dump: true

    port: 22222

    verbose: 'debug'

 

host:

- ip: 10.0.1.1/24

  mac: 00:00:00:00:01:01

  name: h1

  command:

    - arp -s 10.0.1.254 00:00:00:01:01:01

    - ip route add default via 10.0.1.254

    - ethtool -K h1-eth0 tx off rx off

- ip: 10.0.2.2/24

  mac: 00:00:00:00:02:02

  name: h2

  command:

    - arp -s 10.0.2.254 00:00:00:02:02:02

    - ip route add default via 10.0.2.254

    - ethtool -K h2-eth0 tx off rx off

- ip: 10.0.3.3/24

  mac: 00:00:00:00:03:03

  name: h3

  command:

    - arp -s 10.0.3.254 00:00:00:03:03:03

    - ip route add default via 10.0.3.254

    - ethtool -K h3-eth0 tx off rx off

 

switch:

- name: s1

  commands: s1-commands.txt

 

link:

- source: h1

  destination: s1

- source: s1

  destination: h2

- source: s1

  destination: h3

 

s1-commands.txt

table_set_default forward nop

table_set_default ecmp_group nop

table_set_default ecmp_nhop nop

table_set_default send_frame nop

table_add forward set_nhop 10.0.1.1/32 => 00:00:00:00:01:01 1

table_add forward set_nhop 10.0.2.2/32 => 00:00:00:00:02:02 2

table_add forward set_nhop 10.0.3.3/32 => 00:00:00:00:03:03 3 

table_add ecmp_group set_ecmp_select 10.0.0.1/32 => 0 2

table_add ecmp_nhop set_ecmp_nhop 1 => 00:00:00:00:02:02 10.0.2.2 2

table_add ecmp_nhop set_ecmp_nhop 2 => 00:00:00:00:03:03 10.0.3.3 3

table_add send_frame rewrite_sip 1 => 10.0.0.1

 

Load_balance.p4

#define ETHERTYPE_IPV4 0x0800

#define ETHERTYPE_ARP  0x0806

#define IPPROTO_ICMP   0x01

#define IP_PROTOCOLS_TCP 6

#define IP_PROTOCOLS_UDP 17

#define ARP_HTYPE_ETHERNET  0x0001

#define ARP_PTYPE_IPV4      0x0800

#define ARP_HLEN_ETHERNET   6

#define ARP_PLEN_IPV4     4

#define ARP_OPER_REQUEST    1

#define ARP_OPER_REPLY      2

#define ICMP_ECHO_REQUEST   8

#define ICMP_ECHO_REPLY     0

 

header_type ethernet_t {

    fields {

        dstAddr : 48;

        srcAddr : 48;

        etherType : 16;

    }

}

 

header_type ipv4_t {

    fields {

        version : 4;

        ihl : 4;

        diffserv : 8;

        totalLen : 16;

        identification : 16;

        flags : 3;

        fragOffset : 13;

        ttl : 8;

        protocol : 8;

        hdrChecksum : 16;

        srcAddr : 32;

        dstAddr: 32;

    }

}

 

header_type arp_t {

    fields {

        htype : 16;

        ptype : 16;

        hlen : 8;

        plen : 8;

        opcode : 16;

        hwSrcAddr : 48;

        protoSrcAddr : 32;

        hwDstAddr : 48;

        protoDstAddr : 32;

    }

}

 

header_type tcp_t {

    fields {

        srcPort : 16;

        dstPort : 16;

        seqNo : 32;

        ackNo : 32;

        dataOffset : 4;

        res : 4;

        flags : 8;

        window : 16;

        checksum : 16;

        urgentPtr : 16;

    }

}

 

header_type udp_t {

    fields {

        srcPort : 16;

        dstPort : 16;

        length_ : 16;

       checksum : 16;

    }

}

 

header_type mymetadata_t {

    fields {

        ecmp_select : 14;

    }

}

 

metadata mymetadata_t mymetadata;

 

header ethernet_t ethernet;

 

parser start {

    set_metadata(meta.if_index, standard_metadata.ingress_port);

    return parse_ethernet;

}

 

parser parse_ethernet {

    extract(ethernet);

    return select(latest.etherType){

      ETHERTYPE_IPV4 : parse_ipv4;

      ETHERTYPE_ARP  : parse_arp;

      default : ingress;

    }

}

 

header ipv4_t ipv4;

 

parser parse_ipv4 {

    extract(ipv4);

    set_metadata(meta.ipv4_sa, ipv4.srcAddr);

    set_metadata(meta.ipv4_da, ipv4.dstAddr);

    set_metadata(meta.tcpLength, ipv4.totalLen - 20);

    return select(latest.protocol) {

        IP_PROTOCOLS_TCP : parse_tcp;

        IP_PROTOCOLS_UDP : parse_udp;

        default: ingress;

    }

}

 

header arp_t arp;

 

parser parse_arp{

    extract(arp);

    return ingress;

}

 

header tcp_t tcp;

 

parser parse_tcp {

    extract(tcp);

    set_metadata(meta.tcp_sp, tcp.srcPort);

    set_metadata(meta.tcp_dp, tcp.dstPort);

    return ingress;

}

 

header udp_t udp;

 

parser parse_udp {

    extract(udp);

    return ingress;

}

 

header_type meta_t {

    fields {

        do_forward : 1;

        ipv4_sa : 32;

        ipv4_da : 32;

        tcp_sp : 16;

        tcp_dp : 16;

        nhop_ipv4 : 32;

        if_ipv4_addr : 32;

        if_mac_addr : 48;

        is_ext_if : 1;

        tcpLength : 16;

        if_index : 8;

    }

}

 

metadata meta_t meta;

 

field_list ipv4_checksum_list{

    ipv4.version;

    ipv4.ihl;

    ipv4.diffserv;

    ipv4.totalLen;

    ipv4.identification;

    ipv4.flags;

    ipv4.fragOffset;

    ipv4.ttl;

    ipv4.protocol;

    ipv4.srcAddr;

    ipv4.dstAddr;  

}

 

field_list_calculation ipv4_checksum{

    input {

      ipv4_checksum_list;

    }

    algorithm : csum16;

    output_width : 16;

}

 

calculated_field ipv4.hdrChecksum {

    verify ipv4_checksum;

    update ipv4_checksum;

}

 

field_list tcp_checksum_list {

        ipv4.srcAddr;

        ipv4.dstAddr;

        8'0;

        ipv4.protocol;

        meta.tcpLength;

        tcp.srcPort;

        tcp.dstPort;

        tcp.seqNo;

        tcp.ackNo;

        tcp.dataOffset;

        tcp.res;

        tcp.flags;

        tcp.window;

        tcp.urgentPtr;

        payload;

}

 

field_list_calculation tcp_checksum {

    input {

        tcp_checksum_list;

    }

    algorithm : csum16;

    output_width : 16;

}

 

calculated_field tcp.checksum {

    verify tcp_checksum;

    update tcp_checksum;

}

 

field_list my_hash_fields {

    ipv4.srcAddr;

    ipv4.dstAddr;

    ipv4.protocol;

    tcp.srcPort;

    tcp.dstPort;

}

 

field_list_calculation my_map_hash {

    input {

        my_hash_fields;

    }

    algorithm : crc16;

    output_width : 14;

}

 

action _drop() {

    drop();

}

 

action nop() {}

 

action set_ecmp_nhop( nhop_mac, nhop_ipv4, port) {

    modify_field(standard_metadata.egress_spec, port);

    modify_field(ipv4.dstAddr, nhop_ipv4);

    modify_field(ethernet.dstAddr, nhop_mac);

    add_to_field(ipv4.ttl, -1);

}

 

action set_ecmp_select(ecmp_base, ecmp_count) {

    modify_field_with_hash_based_offset(mymetadata.ecmp_select, ecmp_base,

                                        my_map_hash, ecmp_count);

    add_to_field(mymetadata.ecmp_select, 1);

}

 

table ecmp_group {

    reads {

      ipv4.dstAddr: lpm;

    }

    actions {

        _drop;

        set_ecmp_select;

        nop;

    }

    size: 1024;

}

 

table ecmp_nhop {

    reads {

      mymetadata.ecmp_select: exact;

    }

    actions {

        _drop;

        set_ecmp_nhop;

        nop;

    }

    size: 2;

}

 

action set_nhop(dmac, port) {

    modify_field(standard_metadata.egress_spec, port);

    modify_field(ethernet.dstAddr, dmac);

    add_to_field(ipv4.ttl, -1);

}

 

table forward {

    reads {

      ipv4.dstAddr: lpm;

    }

    actions {

        _drop;

        set_nhop;

        nop;

    }

    size: 1024;

}

 

action rewrite_sip(sip) {

    modify_field(ipv4.srcAddr, sip);

}

 

table send_frame {

    reads {

        standard_metadata.egress_port: exact;

    }

    actions {

        _drop;

        rewrite_sip;

        nop;

    }

    size: 256;

}

 

control ingress {

    apply(forward);

    apply(ecmp_group);

    apply(ecmp_nhop);

}

 

control egress {

    apply(send_frame);

}

 

[Execution]

Use xterm to open terminals for h1, h2, and h3

 

At h2 and h3, start the http server

 

At h1, use curl to get the webpage. (Each time, the source port number will be different. So the chosen server may be different)

(updated: 2020/5/21) Add Health Check

 

[Topology]

 

The controller will check the health status of web servers. If the web server is not working, the controller will inform the LB not to dispatch the http request to the malfunctioned server.

 

[ConnectionHash.p4]

#include <core.p4>

#include <v1model.p4>

 

struct meta_t {

    bit<1>  do_forward;

    bit<32> ipv4_sa;

    bit<32> ipv4_da;

    bit<16> tcp_sp;

    bit<16> tcp_dp;

    bit<32> nhop_ipv4;

    bit<32> if_ipv4_addr;

    bit<48> if_mac_addr;

    bit<1>  is_ext_if;

    bit<16> tcpLength;

    bit<8>  if_index;   

}

 

struct mymetadata_t {

    bit<13> flowlet_map_index;  

    bit<3>  ecmp_select;   

    bit<1>  server1;

    bit<1>  server2;

    bit<1>  server3;

    bit<1>  server4;   

}

 

header arp_t {

    bit<16> htype;

    bit<16> ptype;

    bit<8>  hlen;

    bit<8>  plen;

    bit<16> opcode;

    bit<48> hwSrcAddr;

    bit<32> protoSrcAddr;

    bit<48> hwDstAddr;

    bit<32> protoDstAddr;

}

 

header ethernet_t {

    bit<48> dstAddr;

    bit<48> srcAddr;

    bit<16> etherType;

}

 

header ipv4_t {

    bit<4>  version;

    bit<4>  ihl;

    bit<8>  diffserv;

    bit<16> totalLen;

    bit<16> identification;

    bit<3>  flags;

    bit<13> fragOffset;

    bit<8>  ttl;

    bit<8>  protocol;

    bit<16> hdrChecksum;

    bit<32> srcAddr;

    bit<32> dstAddr;

}

 

header tcp_t {

    bit<16> srcPort;

    bit<16> dstPort;

    bit<32> seqNo;

    bit<32> ackNo;

    bit<4>  dataOffset;

    bit<4>  res;

    bit<8>  flags;

    bit<16> window;

    bit<16> checksum;

    bit<16> urgentPtr;

}

 

header udp_t {

    bit<16> srcPort;

    bit<16> dstPort;

    bit<16> length_;

    bit<16> checksum;

}

 

struct metadata {

    @name(".meta")

    meta_t       meta;

    @name(".mymetadata")

    mymetadata_t mymetadata;

}

 

struct headers {

    @name(".arp")

    arp_t      arp;

    @name(".ethernet")

    ethernet_t ethernet;

    @name(".ipv4")

    ipv4_t     ipv4;

    @name(".tcp")

    tcp_t      tcp;

    @name(".udp")

    udp_t      udp;

}

 

parser ParserImpl(packet_in packet, out headers hdr, inout metadata meta, inout standard_metadata_t standard_metadata) {

    @name(".parse_arp") state parse_arp {

        packet.extract(hdr.arp);

        transition accept;

    }

    @name(".parse_ethernet") state parse_ethernet {

        packet.extract(hdr.ethernet);

        transition select(hdr.ethernet.etherType) {

            16w0x800: parse_ipv4;

            16w0x806: parse_arp;

            default: accept;

        }

    }

    @name(".parse_ipv4") state parse_ipv4 {

        packet.extract(hdr.ipv4);

        meta.meta.ipv4_sa = hdr.ipv4.srcAddr;

        meta.meta.ipv4_da = hdr.ipv4.dstAddr;

        meta.meta.tcpLength = hdr.ipv4.totalLen - 16w20;

        transition select(hdr.ipv4.protocol) {

            8w6: parse_tcp;

            8w17: parse_udp;

            default: accept;

        }

    }

    @name(".parse_tcp") state parse_tcp {

        packet.extract(hdr.tcp);

        meta.meta.tcp_sp = hdr.tcp.srcPort;

        meta.meta.tcp_dp = hdr.tcp.dstPort;

        transition accept;

    }

    @name(".parse_udp") state parse_udp {

        packet.extract(hdr.udp);

        transition accept;

    }

    @name(".start") state start {

        meta.mymetadata.server1=0;

            meta.mymetadata.server2=0;

        meta.meta.if_index = (bit<8>)standard_metadata.ingress_port;

        transition parse_ethernet;

    }

}

 

control egress(inout headers hdr, inout metadata meta, inout standard_metadata_t standard_metadata) {

    @name("._drop") action _drop() {

        mark_to_drop(standard_metadata);

    }

    @name(".rewrite_sip") action rewrite_sip(bit<32> sip) {

        hdr.ipv4.srcAddr = sip;

    }

    @name(".nop") action nop() {

    }

    @name(".send_frame") table send_frame {

        actions = {

            _drop;

            rewrite_sip;

            nop;

        }

        key = {

            standard_metadata.egress_port: exact;

        }

        size = 256;

    }

    apply {

        send_frame.apply();

    }

}

 

register<bit<3>>(32w8192) flowlet_select;

 

control ingress(inout headers hdr, inout metadata meta, inout standard_metadata_t standard_metadata) {

    @name("._drop") action _drop() {

        mark_to_drop(standard_metadata);     

    }

 

    action _fail1(bit<1> fail){

        meta.mymetadata.server1=fail;

    }

  

    action _fail2(bit<1> fail){

        meta.mymetadata.server2=fail;

    }

   

    action _fail3(bit<1> fail){

        meta.mymetadata.server3=fail;

    }

 

    action _fail4(bit<1> fail){

        meta.mymetadata.server4=fail;

    }

       

    @name(".set_ecmp_select") action set_ecmp_select(bit<8> ecmp_base, bit<8> ecmp_count) {

        hash(meta.mymetadata.ecmp_select, HashAlgorithm.crc16, (bit<13>)ecmp_base, { hdr.ipv4.srcAddr, hdr.ipv4.dstAddr, hdr.ipv4.protocol, hdr.tcp.srcPort, hdr.tcp.dstPort }, (bit<26>)ecmp_count);

        meta.mymetadata.ecmp_select = meta.mymetadata.ecmp_select + 1;

 

        hash(meta.mymetadata.flowlet_map_index, HashAlgorithm.crc16, (bit<13>)0, { hdr.ipv4.srcAddr, hdr.ipv4.dstAddr, hdr.ipv4.protocol, hdr.tcp.srcPort, hdr.tcp.dstPort }, (bit<26>)8192);

        flowlet_select.write((bit<32>)meta.mymetadata.flowlet_map_index, meta.mymetadata.ecmp_select);

    }

    action read_flowlet_select() {

        hash(meta.mymetadata.flowlet_map_index, HashAlgorithm.crc16, (bit<13>)0, { hdr.ipv4.srcAddr, hdr.ipv4.dstAddr, hdr.ipv4.protocol, hdr.tcp.srcPort, hdr.tcp.dstPort }, (bit<26>)8192);

        flowlet_select.read(meta.mymetadata.ecmp_select, (bit<32>)meta.mymetadata.flowlet_map_index);

    }

    @name(".nop") action nop() {

    }

    @name(".set_ecmp_nhop") action set_ecmp_nhop(bit<48> nhop_mac, bit<32> nhop_ipv4, bit<9> port) {

        standard_metadata.egress_spec = port;

        hdr.ipv4.dstAddr = nhop_ipv4;

        hdr.ethernet.dstAddr = nhop_mac;

        hdr.ipv4.ttl = hdr.ipv4.ttl - 8w1;

    }

    @name(".set_nhop") action set_nhop(bit<48> dmac, bit<9> port) {

        standard_metadata.egress_spec = port;

        hdr.ethernet.dstAddr = dmac;

        hdr.ipv4.ttl = hdr.ipv4.ttl - 8w1;

    }

    @name(".ecmp_group") table ecmp_group {

        actions = {

            _drop;

            set_ecmp_select;

            nop;

        }

        key = {

            hdr.ipv4.dstAddr: lpm;

        }

        size = 1024;

    }

    @name(".ecmp_nhop") table ecmp_nhop {

        actions = {

            _drop;

            set_ecmp_nhop;

            nop;

        }

        key = {

            meta.mymetadata.ecmp_select: exact;

        }

        size = 1024;

    }

    @name(".forward") table forward {

        actions = {

            _drop;

            set_nhop;

            nop;

            read_flowlet_select;

        }

        key = {

            hdr.ipv4.dstAddr: lpm;

        }

        size = 1024;

    }

 

    table set_status1 {

        actions = {

            _fail1;

        }

        key = {

            hdr.ipv4.dstAddr: lpm;

        }

        size = 1;

    }

 

    table set_status2 {

        actions = {

            _fail2;

        }

        key = {

            hdr.ipv4.dstAddr: lpm;

        }

        size = 1;

    }

 

    table set_status3 {

        actions = {

            _fail3;

        }

        key = {

            hdr.ipv4.dstAddr: lpm;

        }

        size = 1;

    }

 

    table set_status4 {

        actions = {

            _fail4;

        }

        key = {

            hdr.ipv4.dstAddr: lpm;

        }

        size = 1;

    }

       

    apply {

        forward.apply();

        if (hdr.tcp.flags & 8w2 != 8w0) {

            ecmp_group.apply();      

        }

 

        if( set_status1.apply().hit && hdr.tcp.flags & 8w2 != 8w0 ) {

          // server1 fails and the lb chooses server1

          if(meta.mymetadata.server1 == 1 && meta.mymetadata.ecmp_select==1){

            hash(meta.mymetadata.flowlet_map_index, HashAlgorithm.crc16, (bit<13>)0, { hdr.ipv4.srcAddr, hdr.ipv4.dstAddr, hdr.ipv4.protocol, hdr.tcp.srcPort, hdr.tcp.dstPort }, (bit<26>)8192);

           

            //see server2 whether it fails or not. if not, choose server 2

            if(meta.mymetadata.server2 != 1){

                flowlet_select.write((bit<32>)meta.mymetadata.flowlet_map_index, 2);

                meta.mymetadata.ecmp_select=2;

    

            //see server3 whether it fails or not. if not, choose server 3

            } else if(meta.mymetadata.server3 != 1){

                flowlet_select.write((bit<32>)meta.mymetadata.flowlet_map_index, 3);

                meta.mymetadata.ecmp_select=3;

 

            //see server4 whether it fails or not. if not, choose server 4

            } else if(meta.mymetadata.server4 != 1){

                flowlet_select.write((bit<32>)meta.mymetadata.flowlet_map_index, 4);

                meta.mymetadata.ecmp_select=4;

 

            //all servers fail

            }else {

                flowlet_select.write((bit<32>)meta.mymetadata.flowlet_map_index, 0);  

                meta.mymetadata.ecmp_select=0;

                _drop();

            }

          }

        }

 

        if( set_status2.apply().hit && hdr.tcp.flags & 8w2 != 8w0 ) {

          // server2 fails and the lb chooses server2

          if(meta.mymetadata.server2 == 1 && meta.mymetadata.ecmp_select==2){

            hash(meta.mymetadata.flowlet_map_index, HashAlgorithm.crc16, (bit<13>)0, { hdr.ipv4.srcAddr, hdr.ipv4.dstAddr, hdr.ipv4.protocol, hdr.tcp.srcPort, hdr.tcp.dstPort }, (bit<26>)8192);

 

            //see server3 whether it fails or not. if not, choose server 3        

            if(meta.mymetadata.server3 != 1){

                flowlet_select.write((bit<32>)meta.mymetadata.flowlet_map_index, 3);

                meta.mymetadata.ecmp_select=3;

 

            //see server4 whether it fails or not. if not, choose server 4

            } else if(meta.mymetadata.server4 != 1){

                flowlet_select.write((bit<32>)meta.mymetadata.flowlet_map_index, 4);

                meta.mymetadata.ecmp_select=4;

 

            //see server1 whether it fails or not. if not, choose server 1

            } else if(meta.mymetadata.server4 != 1){

                flowlet_select.write((bit<32>)meta.mymetadata.flowlet_map_index, 1);

                meta.mymetadata.ecmp_select=1;

 

            //all servers fail

            } else {

                flowlet_select.write((bit<32>)meta.mymetadata.flowlet_map_index, 0);  

                meta.mymetadata.ecmp_select=0;   

                _drop();

            }

          }  

        }

 

        if( set_status3.apply().hit && hdr.tcp.flags & 8w2 != 8w0 ) {

          // server3 fails and the lb chooses server3

          if(meta.mymetadata.server3 == 1 && meta.mymetadata.ecmp_select==3){

            hash(meta.mymetadata.flowlet_map_index, HashAlgorithm.crc16, (bit<13>)0, { hdr.ipv4.srcAddr, hdr.ipv4.dstAddr, hdr.ipv4.protocol, hdr.tcp.srcPort, hdr.tcp.dstPort }, (bit<26>)8192);

 

            //see server4 whether it fails or not. if not, choose server 4        

            if(meta.mymetadata.server4 != 1){

                flowlet_select.write((bit<32>)meta.mymetadata.flowlet_map_index, 4);

                meta.mymetadata.ecmp_select=4;

 

            //see server1 whether it fails or not. if not, choose server 1

            } else if(meta.mymetadata.server1 != 1){

                flowlet_select.write((bit<32>)meta.mymetadata.flowlet_map_index, 1);

                meta.mymetadata.ecmp_select=1;

 

            //see server2 whether it fails or not. if not, choose server 2

            } else if(meta.mymetadata.server2 != 1){

                flowlet_select.write((bit<32>)meta.mymetadata.flowlet_map_index, 2);

                meta.mymetadata.ecmp_select=2;

 

            //all servers fail

            } else {

                flowlet_select.write((bit<32>)meta.mymetadata.flowlet_map_index, 0);  

                meta.mymetadata.ecmp_select=0;

                _drop();

            }

          }  

        }

 

        if( set_status4.apply().hit && hdr.tcp.flags & 8w2 != 8w0 ) {

          // server4 fails and the lb chooses server4

          if(meta.mymetadata.server4 == 1 && meta.mymetadata.ecmp_select==4){

            hash(meta.mymetadata.flowlet_map_index, HashAlgorithm.crc16, (bit<13>)0, { hdr.ipv4.srcAddr, hdr.ipv4.dstAddr, hdr.ipv4.protocol, hdr.tcp.srcPort, hdr.tcp.dstPort }, (bit<26>)8192);

 

            //see server1 whether it fails or not. if not, choose server 1        

            if(meta.mymetadata.server1 != 1){

                flowlet_select.write((bit<32>)meta.mymetadata.flowlet_map_index, 1);

                meta.mymetadata.ecmp_select=1;

 

            //see server2 whether it fails or not. if not, choose server 2

            } else if(meta.mymetadata.server2 != 1){

                flowlet_select.write((bit<32>)meta.mymetadata.flowlet_map_index, 2);

                meta.mymetadata.ecmp_select=2;

 

            //see server3 whether it fails or not. if not, choose server 3

            } else if(meta.mymetadata.server4 != 1){

                flowlet_select.write((bit<32>)meta.mymetadata.flowlet_map_index, 3);

                meta.mymetadata.ecmp_select=3;

 

            //all servers fail

            } else {

                flowlet_select.write((bit<32>)meta.mymetadata.flowlet_map_index, 0);  

                meta.mymetadata.ecmp_select=0;

                _drop();

            }

          }  

        }

 

 

        if(hdr.ipv4.isValid()){

          ecmp_nhop.apply();

        }

    }

}

 

control DeparserImpl(packet_out packet, in headers hdr) {

    apply {

        packet.emit(hdr.ethernet);

        packet.emit(hdr.arp);

        packet.emit(hdr.ipv4);

        packet.emit(hdr.udp);

        packet.emit(hdr.tcp);

    }

}

 

control verifyChecksum(inout headers hdr, inout metadata meta) {

    apply {

        verify_checksum(true, { hdr.ipv4.version, hdr.ipv4.ihl, hdr.ipv4.diffserv, hdr.ipv4.totalLen, hdr.ipv4.identification, hdr.ipv4.flags, hdr.ipv4.fragOffset, hdr.ipv4.ttl, hdr.ipv4.protocol, hdr.ipv4.srcAddr, hdr.ipv4.dstAddr }, hdr.ipv4.hdrChecksum, HashAlgorithm.csum16);

        verify_checksum_with_payload(true, { hdr.ipv4.srcAddr, hdr.ipv4.dstAddr, 8w0, hdr.ipv4.protocol, meta.meta.tcpLength, hdr.tcp.srcPort, hdr.tcp.dstPort, hdr.tcp.seqNo, hdr.tcp.ackNo, hdr.tcp.dataOffset, hdr.tcp.res, hdr.tcp.flags, hdr.tcp.window, hdr.tcp.urgentPtr }, hdr.tcp.checksum, HashAlgorithm.csum16);

    }

}

 

control computeChecksum(inout headers hdr, inout metadata meta) {

    apply {

        update_checksum(true, { hdr.ipv4.version, hdr.ipv4.ihl, hdr.ipv4.diffserv, hdr.ipv4.totalLen, hdr.ipv4.identification, hdr.ipv4.flags, hdr.ipv4.fragOffset, hdr.ipv4.ttl, hdr.ipv4.protocol, hdr.ipv4.srcAddr, hdr.ipv4.dstAddr }, hdr.ipv4.hdrChecksum, HashAlgorithm.csum16);

        update_checksum_with_payload(true, { hdr.ipv4.srcAddr, hdr.ipv4.dstAddr, 8w0, hdr.ipv4.protocol, meta.meta.tcpLength, hdr.tcp.srcPort, hdr.tcp.dstPort, hdr.tcp.seqNo, hdr.tcp.ackNo, hdr.tcp.dataOffset, hdr.tcp.res, hdr.tcp.flags, hdr.tcp.window, hdr.tcp.urgentPtr }, hdr.tcp.checksum, HashAlgorithm.csum16);

    }

}

 

V1Switch(ParserImpl(), verifyChecksum(), ingress(), egress(), computeChecksum(), DeparserImpl()) main;

 

cmd.txt

table_set_default forward nop

table_set_default ecmp_group nop

table_set_default ecmp_nhop nop

table_set_default send_frame nop

table_add set_status1 _fail1 10.0.0.1/32 => 0

table_add set_status2 _fail2 10.0.0.1/32 => 0

table_add set_status3 _fail3 10.0.0.1/32 => 0

table_add set_status4 _fail4 10.0.0.1/32 => 0

table_add forward set_nhop 10.0.1.1/32 => 00:00:00:00:01:01 1

table_add forward set_nhop 10.0.2.2/32 => 00:00:00:00:02:02 2

table_add forward set_nhop 10.0.3.3/32 => 00:00:00:00:03:03 3

table_add forward set_nhop 10.0.4.4/32 => 00:00:00:00:04:04 4

table_add forward set_nhop 10.0.5.5/32 => 00:00:00:00:05:05 5

table_add forward read_flowlet_select 10.0.0.1/32 =>

table_add ecmp_group set_ecmp_select 10.0.0.1/32 => 0 4

table_add ecmp_nhop set_ecmp_nhop 1 => 00:00:00:00:02:02 10.0.2.2 2

table_add ecmp_nhop set_ecmp_nhop 2 => 00:00:00:00:03:03 10.0.3.3 3

table_add ecmp_nhop set_ecmp_nhop 3 => 00:00:00:00:04:04 10.0.4.4 4

table_add ecmp_nhop set_ecmp_nhop 4 => 00:00:00:00:05:05 10.0.5.5 5

table_add send_frame rewrite_sip 1 => 10.0.0.1

 

[test_topo.py]

import os

from mininet.net import Containernet

from mininet.topo import Topo

from mininet.log import setLogLevel, info

from mininet.cli import CLI

from mininet.link import TCLink

from mininet.node import RemoteController

from mininet.node import Docker

from p4_mininet import P4Switch, P4Host

 

import argparse

from time import sleep

 

parser = argparse.ArgumentParser(description='Mininet demo')

parser.add_argument('--behavioral-exe', help='Path to behavioral executable',

                    type=str, action="store", required=False, default='simple_switch' )

parser.add_argument('--thrift-port', help='Thrift server port for table updates',

                    type=int, action="store", default=9090)

parser.add_argument('--num-hosts', help='Number of hosts to connect to switch',

                    type=int, action="store", default=2)

parser.add_argument('--mode', choices=['l2', 'l3'], type=str, default='l3')

parser.add_argument('--json', help='Path to JSON config file',

                    type=str, action="store", required=True)

parser.add_argument('--pcap-dump', help='Dump packets on interfaces to pcap files',

                    type=str, action="store", required=False, default=False)

 

 

args = parser.parse_args()

       

def main():

    net = Containernet(host = P4Host, link=TCLink, controller = None)

    switch1 = net.addSwitch('s1', sw_path = args.behavioral_exe, json_path = args.json, thrift_port = args.thrift_port, cls = P4Switch, pcap_dump = args.pcap_dump)

                       

    host1 = net.addHost('h1', mac = '00:00:00:00:01:01',  ip="10.0.1.1/24")

    host2 = net.addDocker('h2', mac = '00:00:00:00:02:02', ip="10.0.2.2/24", dimage="apache-php-mysql:v7",cpu_period=50000, cpu_quota=1000)

    host3 = net.addDocker('h3', mac = '00:00:00:00:03:03', ip="10.0.3.3/24", dimage="apache-php-mysql:v7",cpu_period=50000, cpu_quota=1000)

    host4 = net.addDocker('h4', mac = '00:00:00:00:04:04', ip="10.0.4.4/24", dimage="apache-php-mysql:v7",cpu_period=50000, cpu_quota=1000)

    host5 = net.addDocker('h5', mac = '00:00:00:00:05:05', ip="10.0.5.5/24", dimage="apache-php-mysql:v7",cpu_period=50000, cpu_quota=1000)

    net.addLink(host1, switch1, port1 = 0, port2 = 1, cls=TCLink, bw=10)

    net.addLink(host2, switch1, port1 = 0, port2 = 2, cls=TCLink, bw=10)

    net.addLink(host3, switch1, port1 = 0, port2 = 3, cls=TCLink, bw=10)

    net.addLink(host4, switch1, port1 = 0, port2 = 4, cls=TCLink, bw=10)

    net.addLink(host5, switch1, port1 = 0, port2 = 5, cls=TCLink, bw=10)        

    net.start()

    h1,h2,h3,h4,h5=net.get('h1','h2','h3','h4','h5')

    h1.cmd("arp -s 10.0.1.254 00:00:00:01:01:01")

    h1.cmd("ip route add default via 10.0.1.254")

    h2.cmd("arp -s 10.0.2.254 00:00:00:02:02:02")

    h2.cmd("ip route del default")

    h2.cmd("ip route add default via 10.0.2.254")

    h2.cmd("cd /var/www/html; echo h2 > a.htm ; python -m SimpleHTTPServer 80 &")

    h3.cmd("arp -s 10.0.3.254 00:00:00:03:03:03")

    h3.cmd("ip route del default")

    h3.cmd("ip route add default via 10.0.3.254")

    h3.cmd("cd /var/www/html; echo h3 > a.htm ; python -m SimpleHTTPServer 80 &")

    h4.cmd("arp -s 10.0.4.254 00:00:00:04:04:04")

    h4.cmd("ip route del default")

    h4.cmd("ip route add default via 10.0.4.254")

    h4.cmd("cd /var/www/html; echo h4 > a.htm; python -m SimpleHTTPServer 80 &")

    h5.cmd("arp -s 10.0.5.254 00:00:00:05:05:05")

    h5.cmd("ip route del default")

    h5.cmd("ip route add default via 10.0.5.254")

    h5.cmd("ethtool -K h5-eth0 tx off rx off")

    #Assume h5 is down

    #h5.cmd("cd /var/www/html; echo h5 > a.htm; python -m SimpleHTTPServer 80 &")

   sleep(1)

    os.system('sudo /home/vagrant/behavioral-model/targets/simple_switch/simple_switch_CLI --thrift-port=9090 < cmd.txt')

    #disable health check, enable health check: remove #

    #os.system("sudo /home/p4/mytest/p4-ConnectionHash/check_server.sh &>/dev/null &")

  

    print('\033[0;32m'),

    print "Gotcha!"

    print('\033[0m')

 

    CLI(net)

    try:

        net.stop()

    except:

        print('\033[0;31m'),

        print('Stop error! Trying sudo mn -c')

        print('\033[0m')

        os.system('sudo mn -c')

       os.system("kill `cat check_server.pid`")

        print('\033[0;32m'),

        print ('Stop successfully!')

        print('\033[0m')

 

if __name__ == '__main__':

    setLogLevel('info')

    main()

 

[controller: check_server.sh]

#!/bin/bash

CLI_PATH=/home/vagrant/behavioral-model/targets/simple_switch/simple_switch_CLI

echo $$ > check_server.pid

while true

do

  >fail.txt

  >ok.txt

  for ip in `cat ip.txt`

  do

   {

     #ping -c1 -W1 $ip &>/dev/null

     mycode=`curl -m 1 -s -w %{http_code} http://$ip -o /dev/null`

     if [ "$mycode" -ne 200 ]; then

        echo $ip >> fail.txt

     else

        echo $ip >> ok.txt

     fi   

   }&

  done

  wait

 

  if [ -s fail.txt ];then

    for ip in `cat fail.txt`

    do

     #echo $ip

     if [ "$ip" = "172.17.0.2" ];then

        #echo "server1 fails"

        echo "table_modify set_status1 _fail1 0 1" | $CLI_PATH --thrift-port 9090 &>/dev/null

     elif [ "$ip" = "172.17.0.3" ];then

        #echo "server2 fails"

        echo "table_modify set_status2 _fail2 0 1" | $CLI_PATH --thrift-port 9090 &>/dev/null

     elif [ "$ip" = "172.17.0.4" ];then

        #echo "server3 fails"

        echo "table_modify set_status3 _fail3 0 1" | $CLI_PATH --thrift-port 9090 &>/dev/null

     elif [ "$ip" = "172.17.0.5" ];then

        #echo "server4 fails"

        echo "table_modify set_status4 _fail4 0 1" | $CLI_PATH --thrift-port 9090 &>/dev/null

     fi

    done

  fi

 

  if [ -s ok.txt ];then

    for ip in `cat ok.txt`

    do

     #echo $ip

     if [ "$ip" = "172.17.0.2" ];then

        #echo "server1 ok"

        echo "table_modify set_status1 _fail1 0 0" | $CLI_PATH --thrift-port 9090 &>/dev/null

     elif [ "$ip" = "172.17.0.3" ];then

        #echo "server2 ok"

        echo "table_modify set_status2 _fail2 0 0" | $CLI_PATH --thrift-port 9090 &>/dev/null

     elif [ "$ip" = "172.17.0.4" ];then

        #echo "server3 ok"

        echo "table_modify set_status3 _fail3 0 0" | $CLI_PATH --thrift-port 9090 &>/dev/null

     elif [ "$ip" = "172.17.0.5" ];then

        #echo "server4 ok"

        echo "table_modify set_status4 _fail4 0 0" | $CLI_PATH --thrift-port 9090 &>/dev/null

     fi

    done

  fi

 

  sleep 1

done

 

[mycurl.sh]

#!/bin/bash

thread=5

tmp_fifofile=/tmp/$$.fifo

 

mkfifo $tmp_fifofile

exec 8<> $tmp_fifofile

rm $tmp_fifofile

 

ok=/tmp/ok.txt

fail=/tmp/fail.txt

>$ok

>$fail

 

for i in `seq $thread`

do

        echo >&8

done

 

for i in {1..1000}

do

        read -u 8

        {

        curl http://10.0.0.1/a.htm &>/dev/null

        if [ $? -eq 0 ]; then  

                echo "curl ok" >> $ok

        else

                echo "curl fail" >> $fail

        fi

        echo >&8

        }&

done

wait

exec 8>&-

echo "all finish..."

yes=`wc -l $ok`

no=`wc -l $fail`

echo "ok:" $yes

echo "fail:" $no

 

Execution:

No controller case

 

We use curl to send out 1000 times http request. Only 688 is ok. 312 fail. Because the LB does not know that h5 is down.

 

With controller case:

#disable health check, enable health check: remove #

os.system("sudo /home/p4/mytest/p4-ConnectionHash/check_server.sh &>/dev/null &")

 

re-run the program again.

 

1000 request is ok. Because the controller tells the LB does not dispatch the request to h5.

 

Dr. Chih-Heng Ke (smallko@gmail.com)

Department of Computer Science and Information Engineering,

National Quemoy University, Kinmen, Taiwan.