Contention window optimization using reinforcement learning (multi-agent version)

Please refer to my paper,

C. H. Ke and L. Astuti, "Applying Deep Reinforcement Learning to Improve Throughput and Reduce Collision Rate in IEEE 802.11 Networks," KSII Transactions on Internet and Information Systems, vol. 16, no. 1, pp. 334-349, 2022. DOI: 10.3837/tiis.2022.01.019. (SCI) (code) (single-agent)

C. H. Ke and L. Astuti, “Applying Multi-Agent Deep Reinforcement Learning for Contention Window Optimization to Enhance Wireless Network Performance”, ICT Express (SCI)

For more information.

In this lab, I will user PARL to optimize the contention window to enhance CSMA/CA performance.

p.s. Single agent: the agent is placed at the access points. When the agent decides the contention window, the contention windows size will be sent to the mobile stations via beacon packet. When the mobile station receives the beacon, it will set its contention window accordingly.

Multiagent version: the agent is placed at each mobile station. The mobile station will send its throughput to the AP, the AP will sum up all the throughputs and send the aggregate throughput back to the mobile station. The mobile throughput will use the aggregate throughput as the reward to get the contention window size. For the current python code, we assume that each mobile station has the same condition. (The same distance from the access point. The same antenna gain….). In the future, I will use NS3 to do more real simulations.

CSMA/CA (802.11b) (no reinforcement learning)

import numpy as np

import random

_n=50 # number of nodes

_simTime=500 # sec

rate=11 # 11, 5.5, 2 or 1 Mbps

_cwmin=32

_cwmax=1024

SIFS=10

DIFS=50

EIFS=SIFS+DIFS+192+112

SLOT=20

M=1000000

_pktSize=1000 # bytes

stat_succ=0

stat_coll=0

stat_pkts=np.zeros(_n)

cw=np.zeros(_n)

bo=np.zeros(_n)

now=0.0

def init_bo():

for i in range(0,_n):

cw[i]=_cwmin

bo[i]=random.randint(0,_cwmax)%cw[i]

#print("cw[",i,"]=",cw[i]," bo[",i,"]=",bo[i])

def Tdata():

global rate

time=192+((_pktSize+28)*8.0)/rate

return time

def Tack():

time=192+(14*8.0)/1

return time

def getMinBoAllStationsIndex():

index=0

min=bo[index]

for i in range(0,_n):

if bo[i]<min:

index=i

min=bo[index]

return index

def getCountMinBoAllStations(min):

count=0

for i in range(0,_n):

if(bo[i]==min):

count+=1

return count

def subMinBoFromAll(min,count):

global _cwmin,_cwmax

for i in range(0,_n):

if bo[i]<min:

print("<Error> min=",min," bo=",bo[i])

exit(1)

if(bo[i]>min):

bo[i] = bo[i]- min -1

elif bo[i]==min:

if count==1:

cw[i]=_cwmin

bo[i] = random.randint(0, _cwmax) % cw[i]

elif count>1:

if(cw[i]<_cwmax):

cw[i]*=2

else:

cw[i]=_cwmax

bo[i] = random.randint(0, _cwmax) % cw[i]

else:

print("<Error> count=",count)

exit(1)

def setStats(min,index,count):

global stat_succ,stat_coll

if count==1:

stat_pkts[index]+=1

stat_succ+=1

else:

stat_coll+=1

for i in range(0,_n):

if bo[i]<min:

print("<Error> min=", min, " bo=", bo[i])

exit(1)

#elif bo[i]==min:

# print("Collision with min=", min)

def setNow(min,count):

global M, now, SIFS, DIFS, EIFS, SLOT

if count==1:

now+=min*SLOT/M

now+=Tdata()/M

now+=SIFS/M

now+=Tack()/M

now+=DIFS/M

elif count>1:

now+=min*SLOT/M

now+=SIFS/M

now+=Tdata()/M

now+=EIFS/M

else:

print("<Error> count=", count)

exit(1)

def resolve():

index=getMinBoAllStationsIndex()

min=bo[index]

count=getCountMinBoAllStations(min)

setNow(min, count)

setStats(min, index, count)

subMinBoFromAll(min, count)

def printStats():

print("\nGeneral Statistics\n")

print("-"*50)

numPkts=0

for i in range(0,_n):

numPkts+=stat_pkts[i]

print("Total num of packets:", numPkts)

print("Collision rate:", stat_coll/(stat_succ+stat_coll)*100, "%")

print("Aggregate Throughput:", numPkts*(_pktSize*8.0)/now)

def main():

global now, _simTime, DIFS, M

random.seed(1)

init_bo()

now+=DIFS/M

while now < _simTime:

resolve()

printStats()

main()

Execution

Collision Rate:~33%, Throughput:~4Mbps for 50 nodes. (11b)

CSMA/CA (802.11a) (no reinforcement learning)

import numpy as np

import random

import math

#reference: https://github.com/cecyliaborek/DCF-NumPy-simulation/blob/master/simulation.py

_n=50 # number of nodes

_simTime=500 # sec

rate=54 #Mbps

control_rate=24 #Mbps

_cwmin=16

_cwmax=1024

SIFS=16

DIFS=34

SLOT=9

M=1000000

_pktSize=1000 # bytes

stat_succ=0

stat_coll=0

stat_pkts=np.zeros(_n)

cw=np.zeros(_n)

bo=np.zeros(_n)

now=0.0

def init_bo():

for i in range(0,_n):

cw[i]=_cwmin

bo[i]=random.randint(0,_cwmax)%cw[i]

#print("cw[",i,"]=",cw[i]," bo[",i,"]=",bo[i])

def Tdata():

global rate, _pktSize

# dictionary: (data rate, bits per symbol)

bits_per_symbol = dict([(6, 48), (9, 48), (12, 96),

(18, 96), (24, 192), (36, 192), (48, 288), (54, 288)])

ofdm_preamble = 16 # us

ofdm_signal = 24 # bits

ofdm_signal_duration = ofdm_signal / (control_rate) # us

service = 16 # bits

tail = 6 # bits

mac_header = 36 * 8 # bits

mac_tail = 4 * 8 # bits

mac_frame = mac_header + _pktSize * 8 + mac_tail # bits

padding = (math.ceil((service + mac_frame + tail) / bits_per_symbol[rate]) * bits_per_symbol[rate]) - (service + mac_frame + tail) # bits

data_duration = ofdm_preamble + ofdm_signal_duration + (service + mac_frame + tail + padding) / (rate) # us

return data_duration

def Tack():

global contro_rate

ofdm_preamble = 16 # us

ofdm_signal = 24 # bits

ofdm_signal_duration = ofdm_signal / (control_rate) # us

service = 16 # bits

tail = 6 # bits

# ack frame

ack = 14 * 8 # bits

ack_duration = ofdm_preamble + ofdm_signal_duration + \

(service + ack + tail) / (control_rate) # us

return ack_duration

#EIFS=SIFS+ACK+DIFS

EIFS=SIFS+DIFS+Tack()

def getMinBoAllStationsIndex():

index=0

min=bo[index]

for i in range(0,_n):

if bo[i]<min:

index=i

min=bo[index]

return index

def getCountMinBoAllStations(min):

count=0

for i in range(0,_n):

if(bo[i]==min):

count+=1

return count

def subMinBoFromAll(min,count):

global _cwmin,_cwmax

for i in range(0,_n):

if bo[i]<min:

print("<Error> min=",min," bo=",bo[i])

exit(1)

if(bo[i]>min):

bo[i] = bo[i]- min -1

elif bo[i]==min:

if count==1:

cw[i]=_cwmin

bo[i] = random.randint(0, _cwmax) % cw[i]

elif count>1:

if(cw[i]<_cwmax):

cw[i]*=2

else:

cw[i]=_cwmax

bo[i] = random.randint(0, _cwmax) % cw[i]

else:

print("<Error> count=",count)

exit(1)

def setStats(min,index,count):

global stat_succ,stat_coll

if count==1:

stat_pkts[index]+=1

stat_succ+=1

else:

stat_coll+=1

for i in range(0,_n):

if bo[i]<min:

print("<Error> min=", min, " bo=", bo[i])

exit(1)

#elif bo[i]==min:

# print("Collision with min=", min)

def setNow(min,count):

global M, now, SIFS, DIFS, EIFS, SLOT

if count==1:

now+=min*SLOT/M

now+=Tdata()/M

now+=SIFS/M

now+=Tack()/M

now+=DIFS/M

elif count>1:

now+=min*SLOT/M

now+=SIFS/M

now+=Tdata()/M

now+=EIFS/M

else:

print("<Error> count=", count)

exit(1)

def resolve():

index=getMinBoAllStationsIndex()

min=bo[index]

count=getCountMinBoAllStations(min)

setNow(min, count)

setStats(min, index, count)

subMinBoFromAll(min, count)

def printStats():

print("\nGeneral Statistics\n")

print("-"*50)

numPkts=0

for i in range(0,_n):

numPkts+=stat_pkts[i]

print("Total num of packets:", numPkts)

print("stat_coll:", stat_coll, " stat_succ:", stat_succ)

collision_rate=stat_coll*1.0/(stat_succ+stat_coll)*100

print("Collision rate:", collision_rate, "%")

print("Aggregate Throughput:", numPkts*(_pktSize*8.0)/now)

def main():

global now, _simTime, DIFS, M

random.seed(1)

init_bo()

now+=DIFS/M

while now < _simTime:

resolve()

printStats()

main()

Exection:

Collision Rate:~38%, Throughput:~19Mbps for 50 nodes. (11a)

Multiagent.py

import numpy as np

import random

import os

import parl

from parl import layers

import copy

import paddle.fluid as fluid

import collections

MEMORY_SIZE = 20000

MEMORY_WARMUP_SIZE = 100

BATCH_SIZE = 10

LEARNING_RATE = 0.001

GAMMA = 0.9

pre_time=0.0

pre_stat_succ=0

pre_stat_coll=0

_n=50 # number of nodes

_simTime=2000 # sec

rate=11 # 11, 5.5, 2 or 1 Mbps

_cwmin=32

_cwmax=1024

SIFS=10

DIFS=50

EIFS=SIFS+DIFS+192+112

SLOT=20

M=1000000

_pktSize=1000 # bytes

stat_succ=0

stat_coll=0

sta_coll=np.zeros(_n) #number of failed transmission for each station

stat_pkts=np.zeros(_n)

cw=np.zeros(_n)

bo=np.zeros(_n)

sta_cw=np.zeros(_n)

now=0.0

class Model(parl.Model):

def __init__(self, act_dim):

hid1_size = 128

hid2_size = 128

self.fc1 = layers.fc(size=hid1_size, act='relu')

self.fc2 = layers.fc(size=hid2_size, act='relu')

self.fc3 = layers.fc(size=act_dim, act=None)

def value(self, obs):

h1 = self.fc1(obs)

h2 = self.fc2(h1)

Q = self.fc3(h2)

return Q

class DQN(parl.Algorithm):

def __init__(self, model, act_dim=None, gamma=None, lr=None):

self.model = model

self.target_model = copy.deepcopy(model)

assert isinstance(act_dim, int)

assert isinstance(gamma, float)

assert isinstance(lr, float)

self.act_dim = act_dim

self.gamma = gamma

self.lr = lr

def predict(self, obs):

return self.model.value(obs)

def learn(self, obs, action, reward, next_obs, terminal):

next_pred_value = self.target_model.value(next_obs)

best_v = layers.reduce_max(next_pred_value, dim=1)

best_v.stop_gradient = True

terminal = layers.cast(terminal, dtype='float32')

target = reward + (1.0 - terminal) * self.gamma * best_v

pred_value = self.model.value(obs)

action_onehot = layers.one_hot(action, self.act_dim)

action_onehot = layers.cast(action_onehot, dtype='float32')

pred_action_value = layers.reduce_sum(

layers.elementwise_mul(action_onehot, pred_value), dim=1)

cost = layers.square_error_cost(pred_action_value, target)

cost = layers.reduce_mean(cost)

optimizer = fluid.optimizer.Adam(learning_rate=self.lr)

optimizer.minimize(cost)

return cost

def sync_target(self):

self.model.sync_weights_to(self.target_model)

class Agent(parl.Agent):

def __init__(self,

algorithm,

obs_dim,

act_dim,

e_greed=0.1,

e_greed_decrement=0):

assert isinstance(obs_dim, int)

assert isinstance(act_dim, int)

self.obs_dim = obs_dim

self.act_dim = act_dim

super(Agent, self).__init__(algorithm)

self.global_step = 0

self.update_target_steps = 200

self.e_greed = e_greed

self.e_greed_decrement = e_greed_decrement

def build_program(self):

self.pred_program = fluid.Program()

self.learn_program = fluid.Program()

with fluid.program_guard(self.pred_program):

obs = layers.data(

name='obs', shape=[self.obs_dim], dtype='float32')

self.value = self.alg.predict(obs)

with fluid.program_guard(self.learn_program):

obs = layers.data(

name='obs', shape=[self.obs_dim], dtype='float32')

action = layers.data(name='act', shape=[1], dtype='int32')

reward = layers.data(name='reward', shape=[], dtype='float32')

next_obs = layers.data(

name='next_obs', shape=[self.obs_dim], dtype='float32')

terminal = layers.data(name='terminal', shape=[], dtype='bool')

self.cost = self.alg.learn(obs, action, reward, next_obs, terminal)

def sample(self, obs):

sample = np.random.rand()

if sample < self.e_greed:

act = np.random.randint(self.act_dim)

else:

act = self.predict(obs)

self.e_greed = max(

0.01, self.e_greed - self.e_greed_decrement)

return act

def predict(self, obs):

obs = np.expand_dims(obs, axis=0)

pred_Q = self.fluid_executor.run(

self.pred_program,

feed={'obs': obs.astype('float32')},

fetch_list=[self.value])[0]

pred_Q = np.squeeze(pred_Q, axis=0)

act = np.argmax(pred_Q)

return act

def learn(self, obs, act, reward, next_obs, terminal):

if self.global_step % self.update_target_steps == 0:

self.alg.sync_target()

self.global_step += 1

act = np.expand_dims(act, -1)

feed = {

'obs': obs.astype('float32'),

'act': act.astype('int32'),

'reward': reward,

'next_obs': next_obs.astype('float32'),

'terminal': terminal

}

cost = self.fluid_executor.run(

self.learn_program, feed=feed, fetch_list=[self.cost])[0]

return cost

class ReplayMemory(object):

def __init__(self, max_size):

self.buffer = collections.deque(maxlen=max_size)

def append(self, exp):

self.buffer.append(exp)

def sample(self, batch_size):

mini_batch = random.sample(self.buffer, batch_size)

obs_batch, action_batch, reward_batch, next_obs_batch, done_batch = [], [], [], [], []

for experience in mini_batch:

s, a, r, s_p, done = experience

obs_batch.append(s)

action_batch.append(a)

reward_batch.append(r)

next_obs_batch.append(s_p)

done_batch.append(done)

return np.array(obs_batch).astype('float32'), \

np.array(action_batch).astype('float32'), np.array(reward_batch).astype('float32'),\

np.array(next_obs_batch).astype('float32'), np.array(done_batch).astype('float32')

def __len__(self):

return len(self.buffer)

def init_bo():

global sta_cw

for i in range(0,_n):

sta_cw[i]=_cwmin

cw[i]= sta_cw[i]

bo[i]=random.randint(0,_cwmax)%cw[i]

#print("cw[",i,"]=",cw[i]," bo[",i,"]=",bo[i])

def Trts():

time=192+(20*8)/1

return time

def Tcts():

time=192+(14*8)/1

return time

def Tdata():

global rate

time=192+((_pktSize+28)*8.0)/rate

return time

def Tack():

time=192+(14*8.0)/1

return time

def getMinBoAllStationsIndex():

index=0

min=bo[index]

for i in range(0,_n):

if bo[i]<min:

index=i

min=bo[index]

return index

def getCountMinBoAllStations(min):

global sta_coll

count=0

for i in range(0,_n):

if(bo[i]==min):

count+=1

if count>1:

for i in range(0,_n):

if(bo[i]==min):

sta_coll[i]+=1

return count

def subMinBoFromAll(min,count):

global _cwmin,_cwmax,sta_cw

for i in range(0,_n):

if bo[i]<min:

print("<Error> min=",min," bo=",bo[i])

exit(1)

if(bo[i]>min):

bo[i]-=min

elif bo[i]==min:

if count==1: # contention window is only decided by the RL result

cw[i]=sta_cw[i]

bo[i] = random.randint(0, _cwmax) % cw[i]

elif count>1:

cw[i]=sta_cw[i]

bo[i] = random.randint(0, _cwmax) % cw[i]

else:

print("<Error> count=",count)

exit(1)

def setStats(min,index,count):

global stat_succ,stat_pkts,stat_coll

if count==1:

stat_pkts[index]+=1

stat_succ+=1

else:

stat_coll+=1

for i in range(0,_n):

if bo[i]<min:

print("<Error> min=", min, " bo=", bo[i])

exit(1)

#elif bo[i]==min:

# print("Collision with min=", min)

def setNow(min,count):

global M, now, SIFS, DIFS, EIFS, SLOT

if count==1:

now+=min*SLOT/M

now+=Tdata()/M

now+=SIFS/M

now+=Tack()/M

now+=DIFS/M

elif count>1:

now+=min*SLOT/M

now+=SIFS/M

now+=Tdata()/M

now+=EIFS/M

else:

print("<Error> count=", count)

exit(1)

def resolve():

index=getMinBoAllStationsIndex()

min=bo[index]

count=getCountMinBoAllStations(min)

setNow(min, count)

setStats(min, index, count)

subMinBoFromAll(min, count)

def printStats():

global stat_coll, stat_succ

print("\nGeneral Statistics\n")

print("-"*50)

print("Collision rate:", stat_coll/(stat_succ+stat_coll)*100, "%")

print("Aggregate Throughput:", (stat_succ)*(_pktSize*8.0)/now)

def main():

global _n, now, _simTime, stat_succ, stat_coll, pre_stat_succ, pre_stat_coll, sta_coll, _pktSize, pre_time,sta_cw,stat_pkts

pre_collision_rate=0.0

pre_sta_succ=np.zeros(_n)

pre_sta_coll=np.zeros(_n)

sta_coll_rate=np.zeros(_n)

pre_sta_coll_rate=np.zeros(_n)

sta_thr=np.zeros(_n)

random.seed(1)

np.random.seed(1)

init_bo()

obs_dim=2

act_dim=6

print("obs_dim=",obs_dim,"act_dim=",act_dim)

mymodel=[]

myalgorithm=[]

myagent=[]

myrpm=[]

for i in range(_n):

mymodel.append(Model(act_dim=act_dim))

myalgorithm.append(DQN(mymodel[i], act_dim=act_dim, gamma=GAMMA, lr=LEARNING_RATE))

myagent.append(Agent(

myalgorithm[i],

obs_dim=obs_dim,

act_dim=act_dim,

e_greed=0.1,

e_greed_decrement=1e-6))

myrpm.append(ReplayMemory(MEMORY_SIZE))

step=0

show=0

state=[0.0, 0.0]

sta_state=[]

sta_reward=[]

sta_obs=[]

sta_next_obs=[]

sta_action=[]

for i in range(_n):

sta_state.append(state)

sta_obs.append(np.array(sta_state[i]))

sta_next_obs.append(np.array(sta_state[i]))

sta_reward.append(0)

sta_action.append(0)

k=0

init_bo()

now+=DIFS/M

while now < _simTime:

#print("now=", now)

while k < MEMORY_WARMUP_SIZE:

k+=1

#print("#"*50)

for i in range(_n):

sta_obs[i]=np.array(sta_state[i])

sta_action[i]=myagent[i].sample(sta_obs[i])

sta_cw[i]=pow(2, 5 + sta_action[i])

#print("sta_obs[",i,"]=",sta_obs[i])

#print("sta_action[",i,"]=",sta_action[i])

#print("sta_cw[",i,"]=",sta_cw[i])

#print("@"*50)

t1=now

while True:

resolve()

if now - t1 > 0.1:

break

for i in range(0, _n):

if stat_pkts[i] + sta_coll[i] - pre_sta_succ[i] - pre_sta_coll[i] == 0:

sta_coll_rate[i]=0.0

else:

sta_coll_rate[i]=(sta_coll[i] - pre_sta_coll[i]) / (stat_pkts[i] + sta_coll[i] - pre_sta_succ[i] - pre_sta_coll[i]) * 100

thr = (stat_succ - pre_stat_succ) * (_pktSize * 8.0) / (now - pre_time)

reward = thr / rate / M

sta_reward[i] = thr / rate / M

#sta_thr[i]=(stat_pkts[i] - pre_sta_succ[i]) * (_pktSize * 8.0) / (now - pre_time)

#sta_reward[i] = sta_thr[i] / rate / M

#print("sta_coll_rate[",i,"]=",sta_coll_rate[i])

#print("sta_thr[",i,"]=",sta_thr[i])

#print("sta_reward[",i,"]=",sta_reward[i])

next_state=[]

next_state.append(sta_coll_rate[i])

next_state.append(pre_sta_coll_rate[i])

#print("next_state=", next_state)

pre_sta_succ[i] = stat_pkts[i]

pre_sta_coll[i] = sta_coll[i]

pre_sta_coll_rate[i] = sta_coll_rate[i]

pre_stat_succ = stat_succ

sta_next_obs[i]=np.array(next_state)

done = False

myrpm[i].append((sta_obs[i], sta_action[i], sta_reward[i], sta_next_obs[i], done))

sta_state[i] = next_state

#print("myrpm[",i,"]=",myrpm[i])

#print("sta_state[",i,"]=",sta_state[i])

pre_time = now

#print("!!!!!!!!!!!!memory_warm_up finished!!!!!!!!!!!!")

if step%5==0:

for i in range(0, _n):

(batch_obs, batch_action, batch_reward, batch_next_obs, batch_done) = myrpm[i].sample(BATCH_SIZE)

train_loss = myagent[i].learn(batch_obs, batch_action, batch_reward, batch_next_obs, batch_done)

for i in range(_n):

sta_obs[i]=np.array(sta_state[i])

sta_action[i]=myagent[i].sample(sta_obs[i])

sta_cw[i]=pow(2, 5 + sta_action[i])

t1=now

while True:

resolve()

if now - t1 > 0.1:

break

for i in range(0, _n):

if stat_pkts[i] + sta_coll[i] - pre_sta_succ[i] - pre_sta_coll[i] == 0:

sta_coll_rate[i]=0.0

else:

sta_coll_rate[i]=(sta_coll[i] - pre_sta_coll[i]) / (stat_pkts[i] + sta_coll[i] - pre_sta_succ[i] - pre_sta_coll[i]) * 100

thr = (stat_succ - pre_stat_succ) * (_pktSize * 8.0) / (now - pre_time)

reward = thr / rate / M

sta_reward[i] = thr / rate / M

next_state=[]

next_state.append(sta_coll_rate[i])

next_state.append(pre_sta_coll_rate[i])

pre_sta_succ[i] = stat_pkts[i]

pre_sta_coll[i] = sta_coll[i]

pre_sta_coll_rate[i] = sta_coll_rate[i]

sta_next_obs[i]=np.array(next_state)

done = False

myrpm[i].append((sta_obs[i], sta_action[i], sta_reward[i], sta_next_obs[i], done))

sta_state[i] = next_state

pre_stat_succ=stat_succ

pre_time = now

step += 1

printStats()

now=pre_time=0.0

state=[0.0, 0.0]

sta_state=[]

sta_reward=[]

sta_obs=[]

sta_next_obs=[]

sta_action=[]

pre_collision_rate=0.0

pre_sta_succ=np.zeros(_n)

pre_sta_coll=np.zeros(_n)

sta_coll_rate=np.zeros(_n)

pre_sta_coll_rate=np.zeros(_n)

sta_thr=np.zeros(_n)

sta_cw=np.zeros(_n)

stat_succ=0

stat_coll=0

pre_stat_succ = 0

sta_coll=np.zeros(_n)

stat_pkts=np.zeros(_n)

for i in range(_n):

sta_state.append(state)

sta_obs.append(np.array(sta_state[i]))

sta_next_obs.append(np.array(sta_state[i]))

sta_reward.append(0)

sta_action.append(0)

now+=DIFS/M

while now < 5:

print("Evaluation, time=", now)

for i in range(_n):

sta_obs[i]=np.array(sta_state[i])

sta_action[i]=myagent[i].predict(sta_obs[i])

sta_cw[i]=pow(2, 5 + sta_action[i])

t1=now

while True:

resolve()

if now - t1 > 0.1:

break

for i in range(0, _n):

if stat_pkts[i] + sta_coll[i] - pre_sta_succ[i] - pre_sta_coll[i] == 0:

sta_coll_rate[i]=0.0

else:

sta_coll_rate[i]=(sta_coll[i] - pre_sta_coll[i]) / (stat_pkts[i] + sta_coll[i] - pre_sta_succ[i] - pre_sta_coll[i]) * 100

thr = (stat_succ - pre_stat_succ) * (_pktSize * 8.0) / (now - pre_time)

reward = thr / rate / M

sta_reward[i] = thr / rate / M

next_state=[]

next_state.append(sta_coll_rate[i])

next_state.append(pre_sta_coll_rate[i])

pre_sta_succ[i] = stat_pkts[i]

pre_sta_coll[i] = sta_coll[i]

pre_sta_coll_rate[i] = sta_coll_rate[i]

sta_next_obs[i]=np.array(next_state)

done = False

myrpm[i].append((sta_obs[i], sta_action[i], sta_reward[i], sta_next_obs[i], done))

sta_state[i] = next_state

pre_stat_succ = stat_succ

pre_time = now

step += 1

printStats()

main()

Execution

If we are using multiagent, we can get around 5Mbps throughput.

Dr. Chih-Heng Ke (柯志亨)

Department of Computer Science and Information Engineering, National Quemoy University, Kinmen, Taiwan

Email: smallko@gmail.com