CCOD-DQN (Contention Window Optimization in IEEE 802.11ax Networks with Deep Reinforcement Learning)

 

Please read "Contention Window Optimization in IEEE 802.11ax Networks with Deep Reinforcement Learning" article published at WCNC 2021. Preprint available at Arxivhttps://arxiv.org/pdf/2003.01492 first. The original code for the paper can be found at https://github.com/wwydmanski/RLinWiFi. But in this lab, I will try to show how to measure the throughputs for the traditional 802.11ax (CSMACA) and CCOD-DQN. But for CCOD-DQN, I use PARL framework, not tensorflow or keras, to do the reinforcement learning.

 

Please follow the instructions at https://github.com/tkn-tub/ns3-gym to install ns3-gym. Also, follow the instructions at https://github.com/PaddlePaddle/PARL to install PARL.

 

[steps]

Prepare cw.cc and test_dqn.py under scratch/myrlwifi

 

cw.cc (code is from https://github.com/wwydmanski/RLinWiFi/blob/master/linear-mesh/cw.cc)

#include "ns3/core-module.h"

#include "ns3/network-module.h"

#include "ns3/applications-module.h"

#include "ns3/wifi-module.h"

#include "ns3/mobility-module.h"

#include "ns3/csma-module.h"

#include "ns3/internet-module.h"

#include "ns3/flow-monitor-module.h"

#include "ns3/opengym-module.h"

#include "ns3/propagation-module.h"

#include "ns3/ipv4-flow-classifier.h"

#include "ns3/yans-wifi-channel.h"

#include <cmath>

#include <ctime>

#include <sstream>

#include <fstream>

#include <string>

#include <math.h>

#include <ctime>  

#include <iomanip>

#include <deque>

#include <algorithm>

#include <csignal>

 

#define PI 3.14159265

 

using namespace ns3;

using namespace std;

 

NS_LOG_COMPONENT_DEFINE ("wifi1");

void recordHistory();

 

double SimTime = 100.0;

uint64_t lastTotalRx = 0;

uint32_t mactxno,macrxno,phyrxok,phyrxerror,phytx;

 

Ptr<FlowMonitor> monitor;

FlowMonitorHelper flowmon;

 

double envStepTime = 0.1;

double simulationTime = 10;

double current_time = 0.0;

bool verbose = false;

int end_delay = 0;

bool dry_run = false;

 

uint32_t CW = 0;

uint32_t history_length = 20;

string type = "discrete";

deque<float> history;

 

Ptr<PacketSink> sinkApp;

 

Ptr<OpenGymSpace> MyGetObservationSpace(void)

{

    current_time += envStepTime;

 

    float low = 0.0;

    float high = 10.0;

    std::vector<uint32_t> shape = {

        history_length,

    };

    std::string dtype = TypeNameGet<float>();

    Ptr<OpenGymBoxSpace> space = CreateObject<OpenGymBoxSpace>(low, high, shape, dtype);

    if (verbose)

        NS_LOG_UNCOND("MyGetObservationSpace: " << space);

    return space;

}

 

Ptr<OpenGymSpace> MyGetActionSpace(void)

{

    float low = 0.0;

    float high = 10.0;

    std::vector<uint32_t> shape = {

        1,

    };

    std::string dtype = TypeNameGet<float>();

    Ptr<OpenGymBoxSpace> space = CreateObject<OpenGymBoxSpace>(low, high, shape, dtype);

    if (verbose)

        NS_LOG_UNCOND("MyGetActionSpace: " << space);

    return space;

}

 

uint64_t g_rxPktNum = 0;

uint64_t g_txPktNum = 0;

uint64_t my_rxPktNum=0;

 

std::string MyGetExtraInfo(void)

{

    static float ticks = 0.0;

    static float lastValue = 0.0;

    //g_rxPktNum = sinkApp->GetTotalRxPkt();

    g_rxPktNum = my_rxPktNum;

    //std::cout << "in MyGetExtraInfo(), g_rxPktNum=" << g_rxPktNum << std::endl;

    float obs = g_rxPktNum - lastValue;

    lastValue = g_rxPktNum;

    ticks += envStepTime;

 

    float sentMbytes = obs * (1500 - 20 - 8 - 8) * 8.0 / 1024 / 1024;

 

    std::string myInfo = std::to_string(sentMbytes);

    myInfo = myInfo + "|" + to_string(CW);  

 

    if (verbose)

        NS_LOG_UNCOND("MyGetExtraInfo: " << myInfo);

 

    return myInfo;

}

 

bool MyExecuteActions(Ptr<OpenGymDataContainer> action)

{

    if (verbose)

        NS_LOG_UNCOND("MyExecuteActions: " << action);

 

    Ptr<OpenGymBoxContainer<float>> box = DynamicCast<OpenGymBoxContainer<float>>(action);

    std::vector<float> actionVector = box->GetData();

 

    if (type == "discrete")

    {

        CW = pow(2, int(4 + actionVector.at(0)));

    }

    else if (type == "continuous")

    {

        CW = pow(2, actionVector.at(0) + 4);

    }

    else if (type == "direct_continuous")

    {

        CW = actionVector.at(0);

    }

    else

    {

        std::cout << "Unsupported agent type!" << endl;

        exit(0);

    }

 

    if (verbose) {

        NS_LOG_UNCOND("actionVector.at(0): " << actionVector.at(0));

    }

 

    uint32_t min_cw = 16;

    uint32_t max_cw = 1024;

 

    CW = min(max_cw, max(CW, min_cw));

 

    if (verbose) {

        NS_LOG_UNCOND("CW: " << CW);

    }

 

    if(!dry_run){

        //Config::Set("/$ns3::NodeListPriv/NodeList/*/$ns3::Node/DeviceList/*/$ns3::WifiNetDevice/Mac/$ns3::RegularWifiMac/BE_Txop/$ns3::QosTxop/MinCw", UintegerValue(CW));

        //Config::Set("/$ns3::NodeListPriv/NodeList/*/$ns3::Node/DeviceList/*/$ns3::WifiNetDevice/Mac/$ns3::RegularWifiMac/BE_Txop/$ns3::QosTxop/MaxCw", UintegerValue(CW));

        Config::Set("/NodeList/*/DeviceList/*/Mac/Txop/MinCw", UintegerValue(CW));

        Config::Set("/NodeList/*/DeviceList/*/Mac/Txop/MaxCw", UintegerValue(CW));

        //std::cout << "MinCw and MaxCw are set to " << CW << std::endl;

    }

    return true;

}

 

float MyGetReward(void)

{

    static float ticks = 0.0;

    static uint32_t last_packets = 0;

    static float last_reward = 0.0;

    ticks += envStepTime;

    g_rxPktNum = my_rxPktNum;

    //g_rxPktNum = sinkApp->GetTotalRxPkt();

    float res = g_rxPktNum - last_packets;

    //Need to understand why

    float reward = res * (1500 - 20 - 8 - 8) * 8.0 / 1024 / 1024 / (5 * 150 * envStepTime) * 10;

 

    last_packets = g_rxPktNum;

 

    if (ticks <= 2 * envStepTime)

        return 0.0;

 

    if (verbose)

        NS_LOG_UNCOND("MyGetReward: " << reward);

 

    if(reward>1.0f || reward<0.0f)

        reward = last_reward;

    last_reward = reward;

    return last_reward;

}

 

Ptr<OpenGymDataContainer> MyGetObservation()

{

    recordHistory();

 

    std::vector<uint32_t> shape = {

        history_length,

    };

    Ptr<OpenGymBoxContainer<float>> box = CreateObject<OpenGymBoxContainer<float>>(shape);

 

    for (uint32_t i = 0; i < history.size(); i++)

    {

        if (history[i] >= -100 && history[i] <= 100)

            box->AddValue(history[i]);

        else

            box->AddValue(0);

    }

    for (uint32_t i = history.size(); i < history_length; i++)

    {

        box->AddValue(0);

    }

    if (verbose)

        NS_LOG_UNCOND("MyGetObservation: " << box);

    return box;

}

 

bool MyGetGameOver(void)

{

    // bool isGameOver = (ns3::Simulator::Now().GetSeconds() > simulationTime + end_delay + 1.0);

    /*

    if (verbose) {

        bool isGameOver = false;

        static float stepCounter = 0.0;

        stepCounter += 1;

        if (stepCounter == 200) {

           isGameOver = true;

        }

        NS_LOG_UNCOND("MyGetGameOver: " << isGameOver);

        return isGameOver;

    }

    */

    return false;

}

 

void ScheduleNextStateRead(double envStepTime, Ptr<OpenGymInterface> openGymInterface)

{

    // if(ns3::Simulator::Now().GetSeconds()<simulationTime + end_delay + 1.0)

    // {

    Simulator::Schedule(Seconds(envStepTime), &ScheduleNextStateRead, envStepTime, openGymInterface);

    // }

    openGymInterface->NotifyCurrentState();

}

 

void recordHistory()

{

    static uint32_t last_rx = 0;

    static uint32_t last_tx = 0;

    static uint32_t calls = 0;

    calls++;

    g_rxPktNum = my_rxPktNum;

    //g_rxPktNum = sinkApp->GetTotalRxPkt();

    float received = g_rxPktNum - last_rx;

    float sent = g_txPktNum - last_tx;

    float errs = sent - received;

    float ratio;

 

    ratio = errs / sent;

    history.push_front(ratio);

 

    if (history.size() > history_length)

    {

        history.pop_back();

    }

    last_rx = g_rxPktNum;

    last_tx = g_txPktNum;

}

 

void packetReceived(Ptr<const Packet> packet)

{

    //std::cout << "packetReceived() is called, pktsize=" << packet->GetSize() << "bytes" << std::endl;

    my_rxPktNum++;

}

 

 

void packetSent(Ptr<const Packet> packet)

{  

    //std::cout << "packetSent() is called, pktsize=" << packet->GetSize() << "bytes" << std::endl;

    g_txPktNum++;

}

 

void signalHandler(int signum)

{

    cout << "Interrupt signal " << signum << " received.\n";

    exit(signum);

}

 

int

main(int argc, char *argv[])

{

  uint32_t nSta = 1;

  uint32_t cwmin = 15;

  uint32_t cwmax = 1023;

  uint32_t openGymPort = 5555;

 

  double txStartTime = 0.1;

 

  int mcs = 6;

  int channelWidth = 20;

  int guardInterval = 800;

 

  signal(SIGTERM, signalHandler);

 

  CommandLine cmd;

  cmd.AddValue("openGymPort", "Specify port number. Default: 5555", openGymPort);

  cmd.AddValue("CW", "Value of Contention Window", CW);

  cmd.AddValue("historyLength", "Length of history window", history_length);

  cmd.AddValue("verbose", "Tell echo applications to log if true", verbose);

  cmd.AddValue("dryRun", "Execute scenario with BEB and no agent interaction", dry_run);

  cmd.AddValue("simTime", "Simulation time in seconds. Default: 10s", simulationTime);

  cmd.AddValue("envStepTime", "Step time in seconds. Default: 0.1s", envStepTime);

  cmd.AddValue ("nSta", "Number of wifi STA devices", nSta);

  cmd.AddValue ("cwmin", "Minimum contention window size", cwmin);

  cmd.AddValue ("cwmax", "Maximum contention window size", cwmax);

  cmd.AddValue ("agentType", "Agent Type", type);

  cmd.Parse (argc, argv);

 

  Config::SetDefault ("ns3::WifiRemoteStationManager::FragmentationThreshold", StringValue ("2200"));

  Config::SetDefault ("ns3::WifiRemoteStationManager::RtsCtsThreshold", StringValue ("2200"));

 

  NS_LOG_UNCOND("Ns3Env parameters:");

  NS_LOG_UNCOND("--nSta: " << nSta);

  NS_LOG_UNCOND("--simulationTime: " << simulationTime);

  NS_LOG_UNCOND("--openGymPort: " << openGymPort);

  NS_LOG_UNCOND("--envStepTime: " << envStepTime);

  NS_LOG_UNCOND("--agentType: " << type);

  NS_LOG_UNCOND("--dryRun: " << dry_run);

  NS_LOG_UNCOND("--verbose: " << verbose);

 

  WifiMacHelper wifiMac;

  WifiHelper wifiHelper;

  wifiHelper.SetStandard (WIFI_PHY_STANDARD_80211ax_5GHZ);

  std::ostringstream oss;

  oss << "HeMcs" << mcs;

  wifiHelper.SetRemoteStationManager("ns3::ConstantRateWifiManager", "DataMode", StringValue(oss.str()), "ControlMode", StringValue(oss.str()));

 

  Ptr<MatrixPropagationLossModel> lossModel = CreateObject<MatrixPropagationLossModel>();

  lossModel->SetDefaultLoss(50);

  YansWifiChannelHelper channel = YansWifiChannelHelper::Default ();

  Ptr<YansWifiChannel> chan = channel.Create();

  chan->SetPropagationLossModel(lossModel);

  chan->SetPropagationDelayModel(CreateObject<ConstantSpeedPropagationDelayModel>());

  YansWifiPhyHelper wifiPhy;

  wifiPhy = YansWifiPhyHelper::Default();

  wifiPhy.SetChannel(chan);

  wifiPhy.Set("GuardInterval", TimeValue(NanoSeconds(guardInterval)));

 

  NodeContainer wifiStaNodes;

  wifiStaNodes.Create (nSta);

  NodeContainer wifiApNode;

  wifiApNode.Create (uint32_t (1));

 

  Ssid ssid = Ssid ("wifi1");

  wifiMac.SetType ("ns3::ApWifiMac",

                   "Ssid", SsidValue (ssid));

 

  NetDeviceContainer apDevice;

  apDevice = wifiHelper.Install (wifiPhy, wifiMac, wifiApNode);

 

  wifiMac.SetType ("ns3::StaWifiMac",

                   "Ssid", SsidValue (ssid));

 

  NetDeviceContainer staDevices;

  staDevices = wifiHelper.Install (wifiPhy, wifiMac, wifiStaNodes);

 

  Config::Set("/NodeList/*/DeviceList/*/$ns3::WifiNetDevice/Phy/ChannelWidth", UintegerValue(channelWidth));

 

  std::cout << "----------------------------" << std::endl;

  if (!dry_run)

  {

       Config::Set("/NodeList/*/DeviceList/*/Mac/Txop/MinCw", UintegerValue(CW));

       Config::Set("/NodeList/*/DeviceList/*/Mac/Txop/MaxCw", UintegerValue(CW));

  }

  else

  {

       NS_LOG_UNCOND("Default CW");

       Config::Set("/NodeList/*/DeviceList/*/Mac/Txop/MinCw", UintegerValue(cwmin));

       Config::Set("/NodeList/*/DeviceList/*/Mac/Txop/MaxCw", UintegerValue(cwmax));

  }

 

  MobilityHelper mobility;

  Ptr<ListPositionAllocator> positionAlloc = CreateObject<ListPositionAllocator> ();

  positionAlloc->Add (Vector (0.0, 0.0, 0.0));

  float rho = 0.5;

  for (uint32_t i = 0; i < nSta; i++)

  {

    double theta = i * 2 * PI / nSta;

    positionAlloc->Add (Vector (rho * cos(theta), rho * sin(theta), 0.0));

    std::cout << "node " << i << " position:" << "(" << rho * cos(theta) << "," << rho * sin(theta) << ",0.0)" << std::endl;

  }

 

  mobility.SetPositionAllocator (positionAlloc);

  mobility.SetMobilityModel ("ns3::ConstantPositionMobilityModel");

  mobility.Install (wifiApNode);

  mobility.Install (wifiStaNodes);

 

  InternetStackHelper stack;

  stack.Install(wifiApNode);

  stack.Install(wifiStaNodes);

 

  Ipv4AddressHelper address;

  address.SetBase("10.1.1.0", "255.255.255.0");

  Ipv4InterfaceContainer ApInterface = address.Assign(apDevice);

  Ipv4InterfaceContainer StaInterface =       address.Assign(staDevices);

 

  for (uint32_t i = 0; i < nSta; i++)

  {

          OnOffHelper onoff("ns3::UdpSocketFactory", Address(InetSocketAddress(ApInterface.GetAddress(0), 9)));

          onoff.SetConstantRate(DataRate ("100000kb/s"), 1500 - 20 - 8 - 8);

          ApplicationContainer temp = onoff.Install(wifiStaNodes.Get (i));

          temp.Start(Seconds(txStartTime));

          temp.Stop(Seconds(simulationTime));

  }

 

  PacketSinkHelper sink ("ns3::UdpSocketFactory", Address(InetSocketAddress(ApInterface.GetAddress(0), 9)));

  ApplicationContainer Serverapp = sink.Install(wifiApNode.Get (0));

  Serverapp.Start(Seconds (0.0));

 

  //Config::ConnectWithoutContext("/NodeList/*/DeviceList/*/$ns3::WifiNetDevice/Phy/PhyTxBegin", MakeCallback(&packetSent));

  Config::ConnectWithoutContext("/NodeList/*/DeviceList/*/Mac/MacTx", MakeCallback(&packetSent));

  Config::ConnectWithoutContext ("/NodeList/*/DeviceList/*/Mac/MacRx", MakeCallback (&packetReceived));

 

  Ipv4GlobalRoutingHelper::PopulateRoutingTables ();

 

  sinkApp = DynamicCast<PacketSink> (Serverapp.Get (0));

  FlowMonitorHelper flowmon;

  Ptr<FlowMonitor> monitor = flowmon.InstallAll ();

 

  Ptr<OpenGymInterface> openGymInterface = CreateObject<OpenGymInterface>(openGymPort);

  openGymInterface->SetGetActionSpaceCb(MakeCallback(&MyGetActionSpace));

  openGymInterface->SetGetObservationSpaceCb(MakeCallback(&MyGetObservationSpace));

  openGymInterface->SetGetGameOverCb(MakeCallback(&MyGetGameOver));

  openGymInterface->SetGetObservationCb(MakeCallback(&MyGetObservation));

  openGymInterface->SetGetRewardCb(MakeCallback(&MyGetReward));

  openGymInterface->SetGetExtraInfoCb(MakeCallback(&MyGetExtraInfo));

  openGymInterface->SetExecuteActionsCb(MakeCallback(&MyExecuteActions));

  Simulator::Schedule(Seconds(1.0), &ScheduleNextStateRead, envStepTime, openGymInterface);

 

  Simulator::Stop(Seconds(simulationTime + 1.0 + envStepTime*(history_length+1)));

  Simulator::Run();

 

  Ptr<Ipv4FlowClassifier> classifier = DynamicCast<Ipv4FlowClassifier>(flowmon.GetClassifier());

  std::map<FlowId, FlowMonitor::FlowStats> stats = monitor->GetFlowStats();

  double lastRxTime = 0;

  double firstRxTime = simulationTime + 10;;

  double flowThr;

  double timediff;

  uint32_t totalRx =0;

  uint32_t totalTx =0;

  uint32_t totalRxBytes =0;

 

  for(std::map<FlowId, FlowMonitor::FlowStats>::const_iterator set = stats.begin(); set != stats.end(); set++)

  {

    if(lastRxTime < set->second.timeLastRxPacket.GetSeconds())

    {

            lastRxTime = set->second.timeLastRxPacket.GetSeconds();

    }

    if(firstRxTime > set->second.timeFirstRxPacket.GetSeconds())

    {

            firstRxTime = set->second.timeFirstRxPacket.GetSeconds();

    }

 

    totalRx +=  set->second.rxPackets;

    totalTx +=  set->second.txPackets;

    totalRxBytes += set->second.rxBytes;

 

    Ipv4FlowClassifier::FiveTuple t = classifier->FindFlow(set->first);

    timediff = set->second.timeLastRxPacket.GetSeconds() - set->second.timeFirstRxPacket.GetSeconds();

    flowThr = set->second.rxBytes * 8.0 / timediff / 1000 / 1000;

    std::cout << "Flow " << set->first << " (" << t.sourceAddress << " -> " << t.destinationAddress << ")\tThroughput: " << flowThr << " Mbps\tTime: " << set->second.timeLastRxPacket.GetSeconds() - set->second.timeFirstRxPacket.GetSeconds() << " s\tRx packets " << set->second.rxPackets << std::endl;

    //std::cout << "packetsDropped:" << set->second.packetsDropped.size() << std::endl;

  }

 

  std::cout << "totalTx:" << totalTx << " totalRx:" << totalRx << std::endl;

  std::cout << "sinkApp->GetTotalRxPkt()=" << sinkApp->GetTotalRxPkt() << std::endl;

  std::cout << "g_txPktNum=" << g_txPktNum << std::endl;

  std::cout << "my_rxPktNum=" << my_rxPktNum << std::endl;

  //std::cout << "totalRxBytes=" << totalRxBytes << std::endl;

  //std::cout << "sinkApp->GetTotalRx()=" << sinkApp->GetTotalRx() << std::endl;

 

  double totalBytes = sinkApp->GetTotalRx();

  float throughput = totalBytes * 8.0/1000/1000/(lastRxTime - firstRxTime);

  std::cout << "throughput:\t" << throughput << " Mbps" << std::endl;

  //std::cout << "cwmin: " << cwmin << ", cwmax: " << cwmax << ", nSta: " << nSta << std::endl;

  //std::cout << "firstRxTime: " << firstRxTime << "sec,\t lastRxTime: " << lastRxTime << "sec" << std::endl;

 

  openGymInterface->NotifySimulationEnd();

  Simulator::Destroy ();

}

 

test_dqn.py

#!/usr/bin/env python3

# -*- coding: utf-8 -*-

 

import argparse

from ns3gym import ns3env

 

import numpy as np

import random

import os

import parl

from parl import layers 

import copy

import paddle.fluid as fluid

import collections

 

MEMORY_SIZE = 20000 

MEMORY_WARMUP_SIZE = 100 

BATCH_SIZE = 32

LEARNING_RATE = 0.001

GAMMA = 0.9

 

class Model(parl.Model):

    def __init__(self, act_dim):

        hid1_size = 128

        hid2_size = 128

        self.fc1 = layers.fc(size=hid1_size, act='relu')

        self.fc2 = layers.fc(size=hid2_size, act='relu')

        self.fc3 = layers.fc(size=act_dim, act=None)

 

    def value(self, obs):

        h1 = self.fc1(obs)

        h2 = self.fc2(h1)

        Q = self.fc3(h2)

        return Q

 

class DQN(parl.Algorithm):

    def __init__(self, model, act_dim=None, gamma=None, lr=None):

        self.model = model

        self.target_model = copy.deepcopy(model)

 

        assert isinstance(act_dim, int)

        assert isinstance(gamma, float)

        assert isinstance(lr, float)

        self.act_dim = act_dim

        self.gamma = gamma

        self.lr = lr

 

    def predict(self, obs):

        return self.model.value(obs)

 

    def learn(self, obs, action, reward, next_obs, terminal):

        next_pred_value = self.target_model.value(next_obs)

        best_v = layers.reduce_max(next_pred_value, dim=1)

        best_v.stop_gradient = True 

        terminal = layers.cast(terminal, dtype='float32')

        target = reward + (1.0 - terminal) * self.gamma * best_v

 

        pred_value = self.model.value(obs)

        action_onehot = layers.one_hot(action, self.act_dim)

        action_onehot = layers.cast(action_onehot, dtype='float32')

        pred_action_value = layers.reduce_sum(

            layers.elementwise_mul(action_onehot, pred_value), dim=1)

 

        cost = layers.square_error_cost(pred_action_value, target)

        cost = layers.reduce_mean(cost)

        optimizer = fluid.optimizer.Adam(learning_rate=self.lr)

        optimizer.minimize(cost)

        return cost

 

    def sync_target(self):

        self.model.sync_weights_to(self.target_model)

 

class Agent(parl.Agent):

    def __init__(self,

                 algorithm,

                 obs_dim,

                 act_dim,

                 e_greed=0.1,

                 e_greed_decrement=0):

        assert isinstance(obs_dim, int)

        assert isinstance(act_dim, int)

        self.obs_dim = obs_dim

        self.act_dim = act_dim

        super(Agent, self).__init__(algorithm)

 

        self.global_step = 0

        self.update_target_steps = 200 

 

        self.e_greed = e_greed

        self.e_greed_decrement = e_greed_decrement

 

    def build_program(self):

        self.pred_program = fluid.Program()

        self.learn_program = fluid.Program()

 

        with fluid.program_guard(self.pred_program):

            obs = layers.data(

                name='obs', shape=[self.obs_dim], dtype='float32')

            self.value = self.alg.predict(obs)

 

        with fluid.program_guard(self.learn_program):

            obs = layers.data(

                name='obs', shape=[self.obs_dim], dtype='float32')

            action = layers.data(name='act', shape=[1], dtype='int32')

            reward = layers.data(name='reward', shape=[], dtype='float32')

            next_obs = layers.data(

                name='next_obs', shape=[self.obs_dim], dtype='float32')

            terminal = layers.data(name='terminal', shape=[], dtype='bool')

            self.cost = self.alg.learn(obs, action, reward, next_obs, terminal)

 

    def sample(self, obs):

        sample = np.random.rand() 

        if sample < self.e_greed:

            act = np.random.randint(self.act_dim) 

        else:

            act = self.predict(obs)

        self.e_greed = max(

            0.01, self.e_greed - self.e_greed_decrement)

        return act

 

    def predict(self, obs):

        obs = np.expand_dims(obs, axis=0)

        pred_Q = self.fluid_executor.run(

            self.pred_program,

            feed={'obs': obs.astype('float32')},

            fetch_list=[self.value])[0]

        pred_Q = np.squeeze(pred_Q, axis=0)

        act = np.argmax(pred_Q) 

        return act

 

    def learn(self, obs, act, reward, next_obs, terminal):

        if self.global_step % self.update_target_steps == 0:

            self.alg.sync_target()

        self.global_step += 1

 

        act = np.expand_dims(act, -1)

        feed = {

            'obs': obs.astype('float32'),

            'act': act.astype('int32'),

            'reward': reward,

            'next_obs': next_obs.astype('float32'),

            'terminal': terminal

        }

        cost = self.fluid_executor.run(

            self.learn_program, feed=feed, fetch_list=[self.cost])[0]

        return cost

 

class ReplayMemory(object):

    def __init__(self, max_size):

        self.buffer = collections.deque(maxlen=max_size)

 

    def append(self, exp):

        self.buffer.append(exp)

 

    def sample(self, batch_size):

        mini_batch = random.sample(self.buffer, batch_size)

        obs_batch, action_batch, reward_batch, next_obs_batch, done_batch = [], [], [], [], []

 

        for experience in mini_batch:

            s, a, r, s_p, done = experience

            obs_batch.append(s)

            action_batch.append(a)

            reward_batch.append(r)

            next_obs_batch.append(s_p)

            done_batch.append(done)

 

        return np.array(obs_batch).astype('float32'), \

            np.array(action_batch).astype('float32'), np.array(reward_batch).astype('float32'),\

            np.array(next_obs_batch).astype('float32'), np.array(done_batch).astype('float32')

 

    def __len__(self):

        return len(self.buffer)

 

 

port = 5555

seed = 1

env = ns3env.Ns3Env(port=port, simSeed=seed)

env.reset()

 

stepIdx = 0

obs_dim=20

act_dim=7

 

model = Model(act_dim=act_dim)

algorithm = DQN(model, act_dim=act_dim, gamma=GAMMA, lr=LEARNING_RATE)

agent = Agent(

        algorithm,

        obs_dim=obs_dim,

        act_dim=act_dim,

        e_greed=0.1, 

        e_greed_decrement=1e-6)

 

rpm = ReplayMemory(MEMORY_SIZE)

 

try:

        obs = env.reset()

        print("Step: ", stepIdx)

        print("---obs:", obs)

    

        while True:

            while len(rpm) < MEMORY_WARMUP_SIZE:

              stepIdx += 1

              action = agent.sample(obs)  

              action2 = np.array([action])                    

              print("---action: ", action)

              next_obs, reward, done, info = env.step(action2)

              print("---obs, reward, done, info: ", next_obs, reward, done, info)

              rpm.append((obs, action, reward, next_obs, done))

              obs=next_obs

 

              print("Step: ", stepIdx)

 

            if stepIdx%5==0:

              print("="*20,"agent learn","="*20)

              (batch_obs, batch_action, batch_reward, batch_next_obs, batch_done) = rpm.sample(BATCH_SIZE)

              train_loss = agent.learn(batch_obs, batch_action, batch_reward, batch_next_obs, batch_done)

 

            action = agent.sample(obs)  

            action2 = np.array([action])                    

            print("---action: ", action)

 

            next_obs, reward, done, info = env.step(action2)

            print("---obs, reward, done, info: ", next_obs, reward, done, info)

            rpm.append((obs, action, reward, next_obs, done))

            obs=next_obs

            if done:

              print("done")

              break

 

            stepIdx += 1

            print("Step: ", stepIdx)

   

except KeyboardInterrupt:

    print("Ctrl-C -> Exit")

finally:

    env.close()

    print("Done")

 

[Executions] (My Test OS environment: ubuntu18.04)

(for CSMACA, 30 nodes, simulation time:10 sec)

 

Open another terminal

 

(Wait)

In the first terminal, you can see that the throughput for CSMACA is 39.3 Mbps.

 

For CCOD-DQN

 

Open another terminal

 

(Wait)

You can find the throughput for CCOD-DQN is 51.5933Mbps (better than CSMACA)

 

If you are interested in improving 802.11 throughput via reinforcement learning, you can also refer to my work at https://nqucsie.myqnapcloud.com/smallko/setl-rl.htm

 

Back to NS3 Learning Guide

Last Modified: 2022/2/27 done

 

[Author]

Dr. Chih-Heng Ke

Department of Computer Science and Information Engineering, National Quemoy University, Kinmen, Taiwan

Email: smallko@gmail.com