P4-utils – L3 Routing

 

Before running the examples, please refer to p4-utils. This lab also refers to Simple L3 Forwarding 1 and Simple L3 Forwarding 2.

[Topology]

 

There are two different implementations for routing, i.e. one table and two tables. In one table implementation, see the following table for s1. S1 only use the destination IP address/mask for packet routing. Note that  00:00:0a:00:01:01 is the mac address of h1 and  00:02:00:00:00:01 (we assume) is the mac address of S2.

table_add ipv4_lpm set_nhop 10.0.1.1/32 => 00:00:0a:00:01:01 1

table_add ipv4_lpm set_nhop 10.0.4.2/24 => 00:02:00:00:00:01 2

 

In the two tables, see the following table for s1. You can think the first table is used to store the routing policy. The second table stores the next or destination MAC address and port number. So h1 wants to send packets to h2. There are two choices. Path One: s1-s2-s4. Path Two:S1-S3-S4. Hence, For 10.0.4.0/24, you can choose policy 2 (from Path 1) or policy 3 (from Path 2). In the following table, I use policy 3 (from Path 2). You can refer to Simple L3 Forwarding 1 and Simple L3 Forwarding 2 for performance comparison.

table_set_default ipv4_lpm drop

table_add ipv4_lpm set_nhop_index 10.0.1.1/32 => 1

table_add ipv4_lpm set_nhop_index 10.0.4.0/24 => 3

table_add forward _forward 1 => 00:00:0a:00:01:01 1

table_add forward _forward 2 => 00:02:00:00:00:01 2

table_add forward _forward 3 => 00:03:00:00:00:01 3

 

One table

[ip_forward.p4]

#include <core.p4>

#include <v1model.p4>

typedef bit<48> macAddr_t;

typedef bit<9> egressSpec_t;

 

header arp_t {

    bit<16> htype;

    bit<16> ptype;

    bit<8>  hlen;

    bit<8>  plen;

    bit<16> opcode;

    bit<48> hwSrcAddr;

    bit<32> protoSrcAddr;

    bit<48> hwDstAddr;

    bit<32> protoDstAddr;

}

 

header ethernet_t {

    bit<48> dstAddr;

    bit<48> srcAddr;

    bit<16> etherType;

}

 

header ipv4_t {

    bit<4>  version;

    bit<4>  ihl;

    bit<8>  diffserv;

    bit<16> totalLen;

    bit<16> identification;

    bit<3>  flags;

    bit<13> fragOffset;

    bit<8>  ttl;

    bit<8>  protocol;

    bit<16> hdrChecksum;

    bit<32> srcAddr;

    bit<32> dstAddr;

}

 

struct metadata {

}

 

struct headers {

    @name(".arp")

    arp_t      arp;

    @name(".ethernet")

    ethernet_t ethernet;

    @name(".ipv4")

    ipv4_t     ipv4;

}

 

parser ParserImpl(packet_in packet, out headers hdr, inout metadata meta, inout standard_metadata_t standard_metadata) {

    @name(".parse_arp") state parse_arp {

        packet.extract(hdr.arp);

        transition accept;

    }

    @name(".parse_ethernet") state parse_ethernet {

        packet.extract(hdr.ethernet);

        transition select(hdr.ethernet.etherType) {

            16w0x800: parse_ipv4;

            16w0x806: parse_arp;

            default: accept;

        }

    }

    @name(".parse_ipv4") state parse_ipv4 {

        packet.extract(hdr.ipv4);

        transition accept;

    }

    @name(".start") state start {

        transition parse_ethernet;

    }

}

 

control egress(inout headers hdr, inout metadata meta, inout standard_metadata_t standard_metadata) {

    apply {

    }

}

 

control ingress(inout headers hdr, inout metadata meta, inout standard_metadata_t standard_metadata) {

    @name(".set_nhop") action set_nhop(macAddr_t dstAddr, egressSpec_t port) {

        //set the src mac address as the previous dst, this is not correct right?

        hdr.ethernet.srcAddr = hdr.ethernet.dstAddr;

 

        //set the destination mac address that we got from the match in the table

        hdr.ethernet.dstAddr = dstAddr;

 

        //set the output port that we also get from the table

        standard_metadata.egress_spec = port;

 

        //decrease ttl by 1

        hdr.ipv4.ttl = hdr.ipv4.ttl - 1;

    }

    @name("._drop") action _drop() {

        mark_to_drop(standard_metadata);

    }

    @name(".ipv4_lpm") table ipv4_lpm {

        actions = {

            set_nhop;

            _drop;

        }

        key = {

            hdr.ipv4.dstAddr: lpm;

        }

        size = 512;

        const default_action = _drop();

    }

    apply {

        ipv4_lpm.apply();

    }

}

 

control DeparserImpl(packet_out packet, in headers hdr) {

    apply {

        packet.emit(hdr.ethernet);

        packet.emit(hdr.arp);

        packet.emit(hdr.ipv4);

    }

}

 

control verifyChecksum(inout headers hdr, inout metadata meta) {

    apply {

        verify_checksum(true, { hdr.ipv4.version, hdr.ipv4.ihl, hdr.ipv4.diffserv, hdr.ipv4.totalLen, hdr.ipv4.identification, hdr.ipv4.flags, hdr.ipv4.fragOffset, hdr.ipv4.ttl, hdr.ipv4.protocol, hdr.ipv4.srcAddr, hdr.ipv4.dstAddr }, hdr.ipv4.hdrChecksum, HashAlgorithm.csum16);

    }

}

 

control computeChecksum(inout headers hdr, inout metadata meta) {

    apply {

        update_checksum(true, { hdr.ipv4.version, hdr.ipv4.ihl, hdr.ipv4.diffserv, hdr.ipv4.totalLen, hdr.ipv4.identification, hdr.ipv4.flags, hdr.ipv4.fragOffset, hdr.ipv4.ttl, hdr.ipv4.protocol, hdr.ipv4.srcAddr, hdr.ipv4.dstAddr }, hdr.ipv4.hdrChecksum, HashAlgorithm.csum16);

    }

}

 

V1Switch(ParserImpl(), verifyChecksum(), ingress(), egress(), computeChecksum(), DeparserImpl()) main;

 

 

[cmd_s1.txt]

table_add ipv4_lpm set_nhop 10.0.1.1/32 => 00:00:0a:00:01:01 1

table_add ipv4_lpm set_nhop 10.0.4.2/24 => 00:02:00:00:00:01 2

 

[cmd_s2.txt]

table_add ipv4_lpm set_nhop 10.0.1.0/24 => 00:01:00:00:00:01 1

table_add ipv4_lpm set_nhop 10.0.4.0/24 => 00:04:00:00:00:01 2

 

[cmd_s3.txt]

table_add ipv4_lpm set_nhop 10.0.1.0/24 => 00:01:00:00:00:01 1

table_add ipv4_lpm set_nhop 10.0.4.0/24 => 00:04:00:00:00:01 2

 

[cmd_s4.txt]

table_add ipv4_lpm set_nhop 10.0.1.0/24 => 00:02:00:00:00:01 1

table_add ipv4_lpm set_nhop 10.0.4.2/32 => 00:00:0a:00:04:02 3

 

[p4app.json]

{

  "program": "ip_forward.p4",

  "switch": "simple_switch",

  "compiler": "p4c",

  "options": "--target bmv2 --arch v1model --std p4-16",

  "switch_cli": "simple_switch_CLI",

  "cli": true,

  "pcap_dump": true,

  "enable_log": true,

  "topo_module": {

    "file_path": "",

    "module_name": "p4utils.mininetlib.apptopo",

    "object_name": "AppTopoStrategies"

  },

  "controller_module": null,

  "topodb_module": {

    "file_path": "",

    "module_name": "p4utils.utils.topology",

    "object_name": "Topology"

  },

  "mininet_module": {

    "file_path": "",

    "module_name": "p4utils.mininetlib.p4net",

    "object_name": "P4Mininet"

  },

  "topology": {

    "assignment_strategy": "mixed",

    "auto_arp_tables": "true",

    "auto_gw_arp": "true",

    "links": [["h1", "s1"], ["s1", "s2"], ["s1", "s3"], ["s2", "s4"], ["s3", "s4"], ["s4", "h2"]],

    "hosts": {

      "h1": {

      },

      "h2": {

      }

    },

    "switches": {

      "s1": {

        "cli_input": "cmd_s1.txt",

        "program": "ip_forward.p4"

      },

      "s2": {

        "cli_input": "cmd_s2.txt",

        "program": "ip_forward.p4"

      },

      "s3": {

        "cli_input": "cmd_s3.txt",

        "program": "ip_forward.p4"

      },

      "s4": {

        "cli_input": "cmd_s4.txt",

        "program": "ip_forward.p4"

      }  

    }

  }

}

 

[execution]

You can see that the packets are sent from s1 to s2.

 

Two Tables

[ip_forward.p4]

#include <core.p4>

#include <v1model.p4>

typedef bit<48> macAddr_t;

typedef bit<9> egressSpec_t;

 

header arp_t {

    bit<16> htype;

    bit<16> ptype;

    bit<8>  hlen;

    bit<8>  plen;

    bit<16> opcode;

    bit<48> hwSrcAddr;

    bit<32> protoSrcAddr;

    bit<48> hwDstAddr;

    bit<32> protoDstAddr;

}

 

header ethernet_t {

    bit<48> dstAddr;

    bit<48> srcAddr;

    bit<16> etherType;

}

 

header ipv4_t {

    bit<4>  version;

    bit<4>  ihl;

    bit<8>  diffserv;

    bit<16> totalLen;

    bit<16> identification;

    bit<3>  flags;

    bit<13> fragOffset;

    bit<8>  ttl;

    bit<8>  protocol;

    bit<16> hdrChecksum;

    bit<32> srcAddr;

    bit<32> dstAddr;

}

 

struct metadata {   

    bit<8> nhop_index;

}

 

struct headers {

    @name(".arp")

    arp_t      arp;

    @name(".ethernet")

    ethernet_t ethernet;

    @name(".ipv4")

    ipv4_t     ipv4;

}

 

parser ParserImpl(packet_in packet, out headers hdr, inout metadata meta, inout standard_metadata_t standard_metadata) {

    @name(".parse_arp") state parse_arp {

        packet.extract(hdr.arp);

        transition accept;

    }

    @name(".parse_ethernet") state parse_ethernet {

        packet.extract(hdr.ethernet);

        transition select(hdr.ethernet.etherType) {

            16w0x800: parse_ipv4;

            16w0x806: parse_arp;

            default: accept;

        }

    }

    @name(".parse_ipv4") state parse_ipv4 {

        packet.extract(hdr.ipv4);

        transition accept;

    }

    @name(".start") state start {

        transition parse_ethernet;

    }

}

 

control egress(inout headers hdr, inout metadata meta, inout standard_metadata_t standard_metadata) {

    apply {

    }

}

 

control ingress(inout headers hdr, inout metadata meta, inout standard_metadata_t standard_metadata) {

    action drop() {

        mark_to_drop(standard_metadata);

    }

 

    action set_nhop_index(bit<8> index){

        meta.nhop_index = index;

    }

 

    action _forward(macAddr_t dstAddr, egressSpec_t port) {

 

        //set the src mac address as the previous dst, this is not correct right?

        hdr.ethernet.srcAddr = hdr.ethernet.dstAddr;

 

       //set the destination mac address that we got from the match in the table

        hdr.ethernet.dstAddr = dstAddr;

 

        //set the output port that we also get from the table

        standard_metadata.egress_spec = port;

 

        //decrease ttl by 1

        hdr.ipv4.ttl = hdr.ipv4.ttl -1;

 

    }

    table ipv4_lpm {

        key = {

            hdr.ipv4.dstAddr: lpm;

        }

        actions = {

            set_nhop_index;

            drop;

            NoAction;

        }

        size = 1024;

        default_action = NoAction();

    }

    table forward {

        key = {

            meta.nhop_index: exact;

        }

        actions = {

            _forward;

            NoAction;

        }

        size = 64;

        default_action = NoAction();

    }

    apply {

        if (hdr.ipv4.isValid()){

            if (ipv4_lpm.apply().hit) {

                forward.apply();

            }

        }

    }

}

 

control DeparserImpl(packet_out packet, in headers hdr) {

    apply {

        packet.emit(hdr.ethernet);

        packet.emit(hdr.arp);

        packet.emit(hdr.ipv4);

    }

}

 

control verifyChecksum(inout headers hdr, inout metadata meta) {

    apply {

        verify_checksum(true, { hdr.ipv4.version, hdr.ipv4.ihl, hdr.ipv4.diffserv, hdr.ipv4.totalLen, hdr.ipv4.identification, hdr.ipv4.flags, hdr.ipv4.fragOffset, hdr.ipv4.ttl, hdr.ipv4.protocol, hdr.ipv4.srcAddr, hdr.ipv4.dstAddr }, hdr.ipv4.hdrChecksum, HashAlgorithm.csum16);

    }

}

 

control computeChecksum(inout headers hdr, inout metadata meta) {

    apply {

        update_checksum(true, { hdr.ipv4.version, hdr.ipv4.ihl, hdr.ipv4.diffserv, hdr.ipv4.totalLen, hdr.ipv4.identification, hdr.ipv4.flags, hdr.ipv4.fragOffset, hdr.ipv4.ttl, hdr.ipv4.protocol, hdr.ipv4.srcAddr, hdr.ipv4.dstAddr }, hdr.ipv4.hdrChecksum, HashAlgorithm.csum16);

    }

}

 

V1Switch(ParserImpl(), verifyChecksum(), ingress(), egress(), computeChecksum(), DeparserImpl()) main;

 

 

[cmd_s1.txt]

table_set_default ipv4_lpm drop

table_add ipv4_lpm set_nhop_index 10.0.1.1/32 => 1

table_add ipv4_lpm set_nhop_index 10.0.4.0/24 => 3

table_add forward _forward 1 => 00:00:0a:00:01:01 1

table_add forward _forward 2 => 00:02:00:00:00:01 2

table_add forward _forward 3 => 00:03:00:00:00:01 3

 

[cmd_s2.txt]

table_set_default ipv4_lpm drop

table_add ipv4_lpm set_nhop_index 10.0.1.0/24 => 1

table_add ipv4_lpm set_nhop_index 10.0.4.0/24 => 2

table_add forward _forward 1 => 00:01:00:00:00:01 1

table_add forward _forward 2 => 00:04:00:00:00:01 2

 

[cmd_s3.txt]

table_set_default ipv4_lpm drop

table_add ipv4_lpm set_nhop_index 10.0.1.0/24 => 1

table_add ipv4_lpm set_nhop_index 10.0.4.0/24 => 2

table_add forward _forward 1 => 00:01:00:00:00:01 1

table_add forward _forward 2 => 00:04:00:00:00:01 2

 

[cmd_s4.txt]

table_set_default ipv4_lpm drop

table_add ipv4_lpm set_nhop_index 10.0.1.0/24 => 2

table_add ipv4_lpm set_nhop_index 10.0.4.2/32 => 3

table_add forward _forward 1 => 00:02:00:00:00:01 1

table_add forward _forward 2 => 00:03:00:00:00:01 2

table_add forward _forward 3 => 00:00:0a:00:04:02 3

 

[p4app.json]

{

  "program": "ip_forward.p4",

  "switch": "simple_switch",

  "compiler": "p4c",

  "options": "--target bmv2 --arch v1model --std p4-16",

  "switch_cli": "simple_switch_CLI",

  "cli": true,

  "pcap_dump": true,

  "enable_log": true,

  "topo_module": {

    "file_path": "",

    "module_name": "p4utils.mininetlib.apptopo",

    "object_name": "AppTopoStrategies"

  },

  "controller_module": null,

  "topodb_module": {

    "file_path": "",

    "module_name": "p4utils.utils.topology",

    "object_name": "Topology"

  },

  "mininet_module": {

    "file_path": "",

    "module_name": "p4utils.mininetlib.p4net",

    "object_name": "P4Mininet"

  },

  "topology": {

    "assignment_strategy": "mixed",

    "auto_arp_tables": "true",

    "auto_gw_arp": "true",

    "links": [["h1", "s1"], ["s1", "s2"], ["s1", "s3"], ["s2", "s4"], ["s3", "s4"], ["s4", "h2"]],

    "hosts": {

      "h1": {

      },

      "h2": {

      }

    },

    "switches": {

      "s1": {

        "cli_input": "cmd_s1.txt",

        "program": "ip_forward.p4"

      },

      "s2": {

        "cli_input": "cmd_s2.txt",

        "program": "ip_forward.p4"

      },

      "s3": {

        "cli_input": "cmd_s3.txt",

        "program": "ip_forward.p4"

      },

      "s4": {

        "cli_input": "cmd_s4.txt",

        "program": "ip_forward.p4"

      }  

    }

  }

}

 

Execution

 

You can see that the packets are sent from s1 to s3.

 

 

Change the routing path during the run time. In Table Two example, I will make s1-eth3 down. So the h1 can not ping h2. At this moment, I will run a simple controller to change the rules in S1 so that packets go from s1-s2-s4 to h2.

 

[change_path.sh]

#!/bin/bash

 

CLI_PATH=/usr/local/bin/simple_switch_CLI

echo "table_modify ipv4_lpm set_nhop_index 1 2" | $CLI_PATH --thrift-port 9090

echo "table_modify ipv4_lpm set_nhop_index 0 1" | $CLI_PATH --thrift-port 9093

echo "table_dump ipv4_lpm" |  $CLI_PATH --thrift-port 9090

echo "table_dump ipv4_lpm" |  $CLI_PATH --thrift-port 9093

 

Execution

h1 ping h2.

 

Open another terminal to make s1-eth3 down. And you will see the ping stop.

Run the script and ping start again.

 

Dr. Chih-Heng Ke

Department of Computer Science and Information Engineering, National Quemoy University, Kinmen, Taiwan

Email: smallko@gmail.com