The Second version of implementation of my MOST project
Please check this first.
[Topology]
H1 (10.0.1.1) --------(10.0.1.254) (Nginx Load Balancer)
(10.0.0.254) ----------(10.0.0.1) Web1
(10.0.2.254)----------(10.0.2.1) Web2
[Difficulties]
In Nginx, H1 will create a TCP connection will Nginx Load
Balancer first. Then Nginx will create another TCP connection with backend web
server. So it is easy for nginx
to parse the client request (HTTP GET request) to know whether it is a static
web page request or a dynamic web page request. Then Nginx Load Balancer can
forward the request to web1 or web 2 according to the parsed result. However,
in P4, we can not know the type of user’s request in
advance. In the second version, we can let the user and P4 LB finish three-way-handshake
first. Then after the user send out the HTTP request. The P4 LB can tell that
this request is for web1 or web2. Then the P4 LB resets the connection. The
user may re-initiate the connection again. In this time, the P4 LB can forward
the sync to the corresponding web server. See the following figures.


[basic16.p4]
|
#include
<core.p4> #include
<v1model.p4> const
bit<32> TYPE_HTTP_REQ_GET = 0x47455420; const
bit<24> TYPE_A = 0x204854; // space + H + T const
bit<24> TYPE_JPG = 0x6a7067; #define
TCP_FLAG_ACK 0x10 #define
TCP_FLAG_RST 0x04 #define
TCP_FLAG_SYN 0x02 #define
TCP_FLAG_FIN 0x01 register
<bit<2>>(32w8192) flowlet_flag; register
<bit<2>>(1) myregister; header
ethernet_t { bit<48> dstAddr; bit<48> srcAddr; bit<16> etherType; } header
ipv4_t { bit<4> version; bit<4> ihl; bit<8> diffserv; bit<16> totalLen; bit<16>
identification; bit<3> flags; bit<13> fragOffset; bit<8> ttl; bit<8> protocol; bit<16> hdrChecksum; bit<32> srcAddr; bit<32> dstAddr; } header
tcp_t { bit<16> srcPort; bit<16> dstPort; bit<32> seqNo; bit<32> ackNo; bit<4> dataOffset; bit<4> res; bit<8> flags; bit<16> window; bit<16> checksum; bit<16> urgentPtr; } header
tcp_options_t { varbit<320>
options; } struct
metadata { bit<128> tmp; bit<2> flag; bit<13> flowlet_map_index; bit<48> eth_sa; bit<48> eth_da; bit<32> ipv4_sa; bit<32> ipv4_da; bit<16> tcp_sp; bit<16> tcp_dp; bit<16> tcp_length; bit<32> tcp_ack_no; bit<32> tcp_seq_no; } struct
headers { ethernet_t
ethernet; ipv4_t ipv4; tcp_t tcp; tcp_options_t
tcp_options; } error
{ noAppLayerData, TcpDataOffsetTooSmall } parser
ParserImpl(packet_in packet, out headers hdr, inout metadata meta, inout standard_metadata_t standard_metadata) { state start { meta.flag=0; standard_metadata.mcast_grp=0;
transition parse_ethernet; } state parse_ethernet
{ packet.extract(hdr.ethernet); meta.eth_da=hdr.ethernet.dstAddr; meta.eth_sa=hdr.ethernet.srcAddr;
transition select(hdr.ethernet.etherType)
{
16w0x800: parse_ipv4;
default: accept; } } state parse_ipv4 { packet.extract(hdr.ipv4); meta.ipv4_sa=hdr.ipv4.srcAddr; meta.ipv4_da=hdr.ipv4.dstAddr; meta.tcp_length=hdr.ipv4.totalLen
- 16w20;
transition select(hdr.ipv4.protocol) {
8w6: parse_tcp;
default: accept; } } state parse_tcp
{ packet.extract(hdr.tcp); meta.tcp_sp=hdr.tcp.srcPort; meta.tcp_dp=hdr.tcp.dstPort; meta.tcp_seq_no=hdr.tcp.seqNo; meta.tcp_ack_no=hdr.tcp.ackNo; verify(hdr.tcp.dataOffset
>=5, error.TcpDataOffsetTooSmall); transition
select(hdr.tcp.dataOffset){
5: accept; default: parse_tcp_options; } } state parse_tcp_options
{
bit<10> len = ((bit<10>)(hdr.tcp.dataOffset - 5) *
4 * 8); packet.extract(hdr.tcp_options, (bit<32>)len); transition
parse_app; } state parse_app
{
transition select(hdr.tcp.dstPort)
{
80: parse_http;
default: accept; } } state parse_http
{ verify(hdr.ipv4.totalLen >
(bit<16>)(hdr.ipv4.ihl+hdr.tcp.dataOffset)*4, error.noAppLayerData); transition select(packet.lookahead<bit<32>>())
{
TYPE_HTTP_REQ_GET: parse_a; default: accept; } } state parse_a
{ meta.tmp=packet.lookahead<bit<128>>(); meta.tmp=(meta.tmp<<32); transition
parse_b; } state parse_b
{
bit<24> choice=meta.tmp[127:104]; meta.tmp=(meta.tmp<<8); transition
select(choice) { TYPE_A: parse_c; TYPE_JPG: parse_d; default: parse_b; } } state parse_c
{ meta.flag=1;
transition accept; } state parse_d
{ meta.flag=2;
transition accept; } } control
egress(inout headers hdr, inout metadata meta, inout standard_metadata_t standard_metadata) { apply{ } } control
ingress(inout headers hdr, inout metadata meta, inout standard_metadata_t standard_metadata) { action drop(){ mark_to_drop(standard_metadata); } action nop(){ } action doforward(bit<9>
port, bit<32> dst_ip, bit<48> dst_mac) { standard_metadata.egress_spec =
port; hdr.ipv4.dstAddr= dst_ip; hdr.ethernet.dstAddr = dst_mac;
} action reply_sa(){ hdr.ethernet.srcAddr=meta.eth_da; hdr.ethernet.dstAddr=meta.eth_sa; hdr.ipv4.srcAddr=meta.ipv4_da; hdr.ipv4.dstAddr=meta.ipv4_sa; hdr.tcp.srcPort=meta.tcp_dp; hdr.tcp.dstPort=meta.tcp_sp; hdr.tcp.flags=TCP_FLAG_ACK|TCP_FLAG_SYN; hdr.tcp.ackNo=hdr.tcp.seqNo+1; hdr.tcp.seqNo=1; hdr.tcp.window=28960; } action reply_sa2(){ hdr.ethernet.srcAddr=meta.eth_da; hdr.ethernet.dstAddr=meta.eth_sa; hdr.ipv4.srcAddr=meta.ipv4_da; hdr.ipv4.dstAddr=meta.ipv4_sa; hdr.tcp.srcPort=meta.tcp_dp; hdr.tcp.dstPort=meta.tcp_sp; hdr.tcp.flags=TCP_FLAG_RST; hdr.tcp.seqNo=meta.tcp_ack_no; hdr.tcp.ackNo=0; hdr.tcp.window=28960; } table ip_forward
{
key = { hdr.ipv4.dstAddr:
exact; }
actions = { doforward;
drop; nop; } size
= 1024; default_action = nop(); } table myforward
{
key = { meta.flag:
exact; }
actions = { doforward;
drop; } size
= 1024; default_action = drop(); } apply { bit<2>
tmp; //bit<2>
tmp1; //hash(meta.flowlet_map_index,
HashAlgorithm.crc16,(bit<13>)0, {hdr.ipv4.srcAddr, hdr.ipv4.dstAddr,
hdr.ipv4.protocol, hdr.tcp.srcPort, hdr.tcp.dstPort},(bit<26>)8192); if(hdr.ipv4.srcAddr==0x0a000101 &&
hdr.ipv4.dstAddr==0x0a000001 && hdr.tcp.flags
& TCP_FLAG_FIN == TCP_FLAG_FIN) { myregister.write(0,
0); } myregister.read(tmp, 0); if(tmp!=0 &&
hdr.ipv4.srcAddr==0x0a000101){ meta.flag=tmp;. myforward.apply(); }
else if(tmp==0 && meta.flag!=0 && hdr.ipv4.srcAddr==0x0a000101){ myregister.write(0, meta.flag); reply_sa2();;
// got "GET" information } if(hdr.tcp.flags &
TCP_FLAG_SYN == TCP_FLAG_SYN && tmp==0
&& hdr.ipv4.srcAddr==0x0a000101){ //if(hdr.tcp.flags &
TCP_FLAG_SYN == TCP_FLAG_SYN && tmp1==0 &&
hdr.ipv4.srcAddr==0x0a000101){ reply_sa(); } if(hdr.ipv4.srcAddr!=0x0a000101 &&
hdr.ipv4.dstAddr==0x0a000101){ hdr.ipv4.srcAddr=0x0a000001; } ip_forward.apply(); } } control
DeparserImpl(packet_out packet, in headers hdr) { apply { packet.emit(hdr.ethernet); packet.emit(hdr.ipv4); packet.emit(hdr.tcp); packet.emit(hdr.tcp_options); } } control
verifyChecksum(inout headers hdr, inout metadata meta) { apply { } } control
computeChecksum(inout headers hdr, inout metadata meta) { apply { update_checksum(true, { hdr.ipv4.version, hdr.ipv4.ihl,
hdr.ipv4.diffserv, hdr.ipv4.totalLen, hdr.ipv4.identification,
hdr.ipv4.flags, hdr.ipv4.fragOffset, hdr.ipv4.ttl, hdr.ipv4.protocol,
hdr.ipv4.srcAddr, hdr.ipv4.dstAddr }, hdr.ipv4.hdrChecksum,
HashAlgorithm.csum16);
update_checksum_with_payload(hdr.ipv4.isValid()&&hdr.tcp.isValid()&&!hdr.tcp_options.isValid(),
{ hdr.ipv4.srcAddr, hdr.ipv4.dstAddr, 8w0, hdr.ipv4.protocol, meta.tcp_length, hdr.tcp.srcPort,
hdr.tcp.dstPort, hdr.tcp.seqNo,
hdr.tcp.ackNo, hdr.tcp.dataOffset,
hdr.tcp.res, hdr.tcp.flags, hdr.tcp.window,
hdr.tcp.urgentPtr }, hdr.tcp.checksum,
HashAlgorithm.csum16); update_checksum_with_payload(hdr.ipv4.isValid()&&hdr.tcp.isValid()&&hdr.tcp_options.isValid(),
{ hdr.ipv4.srcAddr, hdr.ipv4.dstAddr, 8w0, hdr.ipv4.protocol, meta.tcp_length, hdr.tcp.srcPort,
hdr.tcp.dstPort, hdr.tcp.seqNo,
hdr.tcp.ackNo, hdr.tcp.dataOffset,
hdr.tcp.res, hdr.tcp.flags, hdr.tcp.window,
hdr.tcp.urgentPtr, hdr.tcp_options.options
}, hdr.tcp.checksum, HashAlgorithm.csum16); } } V1Switch(ParserImpl(), verifyChecksum(), ingress(), egress(), computeChecksum(), DeparserImpl()) main; |
[test_topo.py]
|
import os from mininet.net import Containernet from mininet.topo import Topo from mininet.log import setLogLevel, info from mininet.cli
import CLI from mininet.node import RemoteController from mininet.node import Docker from p4_mininet import P4Switch, P4Host import argparse from time import sleep parser = argparse.ArgumentParser(description='Mininet demo') parser.add_argument('--behavioral-exe', help='Path to behavioral executable',
type=str, action="store", required=False, default='simple_switch' ) parser.add_argument('--thrift-port', help='Thrift server port for table updates',
type=int, action="store", default=9090) parser.add_argument('--num-hosts', help='Number of hosts to connect to switch',
type=int, action="store", default=2) parser.add_argument('--mode', choices=['l2', 'l3'], type=str, default='l3') parser.add_argument('--json', help='Path to JSON config file',
type=str, action="store", required=True) parser.add_argument('--pcap-dump', help='Dump packets on
interfaces to pcap files',
type=str,
action="store", required=False, default=False) args = parser.parse_args() def main(): net = Containernet(host =
P4Host, controller = None) switch1 = net.addSwitch('s1', sw_path = args.behavioral_exe, json_path = "basic.json",
thrift_port = args.thrift_port,
cls = P4Switch, pcap_dump
= args.pcap_dump) host1 = net.addHost('h1', mac =
'00:00:00:00:01:01', ip="10.0.1.1/24") #host2 = net.addHost('h2', mac =
'00:00:00:00:02:02', ip="10.0.2.2/24") #host3 = net.addHost('h3', mac =
'00:00:00:00:03:03', ip="10.0.3.3/24") host2 = net.addDocker('h2', mac =
'00:00:00:00:02:02', ip= '10.0.2.2/24', dimage="apache-php-mysql:v4") host3 = net.addDocker('h3', mac =
'00:00:00:00:03:03', ip= '10.0.3.3/24', dimage="apache-php-mysql:v4") net.addLink(host1, switch1, port1 = 0, port2 = 1) net.addLink(host2, switch1, port1 = 0, port2 = 2) net.addLink(host3, switch1, port1 = 0, port2 = 3) net.start() h1,h2,h3=net.get('h1','h2','h3') h1.cmd("arp -s 10.0.1.254 00:00:00:01:01:01") h1.cmd("ip route add default via 10.0.1.254") #h1.cmd("ethtool -K eth0 tx off rx off") h2.cmd("arp -s 10.0.2.254 00:00:00:02:02:02") h2.cmd("ip route del default") h2.cmd("ip route add default via 10.0.2.254") #h2.cmd("ethtool -K eth0 tx off rx off") #h2.cmd("python
-m SimpleHTTPServer 80 &") h2.cmd("/etc/init.d/php7.2-fpm
start") h2.cmd("mysqld_safe --skip-grant-tables &") h2.cmd("/etc/init.d/apache2 start") h3.cmd("arp -s 10.0.3.254 00:00:00:03:03:03") h3.cmd("ip route del default") h3.cmd("ip route add default via 10.0.3.254") #h3.cmd("ethtool -K eth0 tx off rx off") #h3.cmd("python
-m SimpleHTTPServer 80 &") h3.cmd("/etc/init.d/php7.2-fpm
start") h3.cmd("mysqld_safe --skip-grant-tables &") h3.cmd("/etc/init.d/apache2 start") sleep(1) print('\033[0;32m'), print "Gotcha!" print('\033[0m') CLI(net) try: net.stop() except:
print('\033[0;31m'), print('Stop error! Trying sudo mn -c')
print('\033[0m') os.system('sudo
mn -c')
print('\033[0;32m'),
print ('Stop successfully!')
print('\033[0m') if __name__ == '__main__': setLogLevel('info') main() |
[cmd.txt]
|
table_add ip_forward doforward
10.0.1.1 => 1 10.0.1.1 00:00:00:00:01:01 table_add myforward doforward
1 => 2 10.0.2.2 00:00:00:00:02:02 table_add myforward doforward 2 => 3 10.0.3.3 00:00:00:00:03:03 |
[cmd_add.py]
|
import os os.system('sudo /home/vagrant/behavioral-model/targets/simple_switch/simple_switch_CLI --thrift-port=9090 < cmd.txt') |
[start_test_topo.py]
|
import os os.system("sudo python test_topo.py --behavioral-exe /home/vagrant/behavioral-model/targets/simple_switch/simple_switch --json basic.json") |
[p4_mininet.py]
|
# Copyright 2013-present Barefoot Networks, Inc. # # Licensed under the Apache License, Version 2.0 (the
"License"); # you may not use this file except in compliance with
the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in
writing, software # distributed under the License is distributed on an
"AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
express or implied. # See the License for the specific language governing
permissions and # limitations under the License. # from mininet.net import Mininet from mininet.node
import Switch, Host from mininet.log import setLogLevel,
info, error, debug from mininet.moduledeps
import pathCheck from sys import exit import os import tempfile import socket class P4Host(Host): def config(self,
**params): r = super(Host, self).config(**params) self.defaultIntf().rename("eth0") for off in
["rx", "tx",
"sg"]:
cmd = "/sbin/ethtool --offload eth0 %s off" % off
self.cmd(cmd) # disable
IPv6 self.cmd("sysctl -w
net.ipv6.conf.all.disable_ipv6=1") self.cmd("sysctl -w
net.ipv6.conf.default.disable_ipv6=1") self.cmd("sysctl -w
net.ipv6.conf.lo.disable_ipv6=1") return r def describe(self): print
"**********" print
self.name print
"default interface: %s\t%s\t%s"
%(
self.defaultIntf().name,
self.defaultIntf().IP(),
self.defaultIntf().MAC() ) print
"**********" class P4Switch(Switch): """P4 virtual
switch""" device_id
= 0 def __init__(self, name, sw_path = None, json_path = None,
thrift_port = None,
pcap_dump = False,
log_console = True,
verbose = False,
device_id = None,
enable_debugger = False,
**kwargs): Switch.__init__(self, name, **kwargs) assert(sw_path) assert(json_path) # make sure
that the provided sw_path is valid pathCheck(sw_path) # make sure
that the provided JSON file exists if not os.path.isfile(json_path):
error("Invalid JSON file.\n")
exit(1) self.sw_path = sw_path self.json_path = json_path self.verbose = verbose logfile =
"/tmp/p4s.{}.log".format(self.name) self.output = open(logfile, 'w') self.thrift_port = thrift_port self.pcap_dump = pcap_dump self.enable_debugger = enable_debugger self.log_console = log_console if device_id is not None:
self.device_id = device_id
P4Switch.device_id = max(P4Switch.device_id, device_id) else:
self.device_id =
P4Switch.device_id
P4Switch.device_id += 1 self.nanomsg = "ipc:///tmp/bm-{}-log.ipc".format(self.device_id) @classmethod def setup(cls): pass def check_switch_started(self, pid):
"""While the process is running (pid
exists), we check if the Thrift server has
been started. If the Thrift server is ready, we assume that the switch
was started successfully. This is only reliable if the Thrift server is
started at the end of the init
process""" while True:
if not os.path.exists(os.path.join("/proc", str(pid))):
return False
sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
sock.settimeout(0.5)
result = sock.connect_ex(("localhost",
self.thrift_port))
if result == 0:
return True def start(self,
controllers): "Start
up a new P4 switch" info("Starting P4 switch {}.\n".format(self.name)) args = [self.sw_path] for port, intf in self.intfs.items():
if not intf.IP():
args.extend(['-i', str(port) + "@" + intf.name]) #wuwzhs edit in 2017/11/10 #args.extend(['-i 3@veth1']) if self.pcap_dump:
args.append("--pcap")
# args.append("--useFiles") if self.thrift_port:
args.extend(['--thrift-port',
str(self.thrift_port)]) if self.nanomsg:
args.extend(['--nanolog', self.nanomsg]) args.extend(['--device-id', str(self.device_id)])
P4Switch.device_id += 1 args.append(self.json_path) if self.enable_debugger:
args.append("--debugger") if self.log_console:
args.append("--log-console") logfile =
"/tmp/p4s.{}.log".format(self.name) info(' '.join(args) +
"\n") pid = None with tempfile.NamedTemporaryFile() as
f:
# self.cmd(' '.join(args)
+ ' > /dev/null 2>&1 &')
self.cmd(' '.join(args) +
' >' + logfile + ' 2>&1 & echo $! >> ' + f.name)
pid = int(f.read()) debug("P4 switch {} PID is {}.\n".format(self.name,
pid)) if not self.check_switch_started(pid):
error("P4 switch {} did not start correctly.\n".format(self.name))
exit(1) info("P4 switch {} has been started.\n".format(self.name)) def stop(self):
"Terminate P4 switch." self.output.flush() self.cmd('kill %' + self.sw_path)
self.cmd('wait') self.deleteIntfs() def attach(self,
intf):
"Connect a data port" assert(0) def detach(self,
intf):
"Disconnect a data port" assert(0) |
[execution]

Open two terminals for h1. One for wireshark and the other is for firefox (Firefox will reconnect to the server again after receiving the TCP packet with “reset” flag.)

From the following figure, we can tell the connection will be reset. The Firefox will reconnect to the server again to get the hi.php.

Similarly, see the following figure for a.jpg HTTP request.

Dr. Chih-Heng Ke (smallko@gmail.com)
Department
of Computer Science and Information Engineering,
National
Quemoy University, Kinmen, Taiwan.