Applying Deep Reinforcement Learning to Improve Throughput and Reduce Collision Rate in IEEE 802.11 Networks

I will provide the basic simulation code for my paper. If you use my work, please cite my paper in your work.

C. Ke and L. Astuti, "Applying Deep Reinforcement Learning to Improve Throughput and Reduce Collision Rate in IEEE 802.11 Networks," KSII Transactions on Internet and Information Systems, vol. 16, no. 1, pp. 334-349, 2022. DOI: 10.3837/tiis.2022.01.019. (SCI)

By the way, the parameters used in the code is for IEEE 802.11b environment. Also, the DQN was implemented using PARL framework. You can download the VM. (user/user, root/ubuntu) You can directly run the following code in this VM.

CSMACA

import numpy as np

import random

_n=50 # number of nodes

_simTime=2000 # sec

rate=11 # 11, 5.5, 2 or 1 Mbps

_cwmin=32

_cwmax=1024

rtsmode=0 #0: data->ack; 1:rts->cts->data->ack

SIFS=10

DIFS=50

EIFS=SIFS+DIFS+192+112

SLOT=20

M=1000000

_pktSize=1000 # bytes

stat_succ=0

stat_coll=0

stat_pkts=np.zeros(_n)

cw=np.zeros(_n)

bo=np.zeros(_n)

now=0.0

def init_bo():

for i in range(0,_n):

cw[i]=_cwmin

bo[i]=random.randint(0,_cwmax)%cw[i]

#print("cw[",i,"]=",cw[i]," bo[",i,"]=",bo[i])

def Trts():

time=192+(20*8)/1

return time

def Tcts():

time=192+(14*8)/1

return time

def Tdata():

global rate

time=192+((_pktSize+28)*8.0)/rate

return time

def Tack():

time=192+(14*8.0)/1

return time

def getMinBoAllStationsIndex():

index=0

min=bo[index]

for i in range(0,_n):

if bo[i]<min:

index=i

min=bo[index]

return index

def getCountMinBoAllStations(min):

count=0

for i in range(0,_n):

if(bo[i]==min):

count+=1

return count

def subMinBoFromAll(min,count):

global _cwmin,_cwmax

for i in range(0,_n):

if bo[i]<min:

print("<Error> min=",min," bo=",bo[i])

exit(1)

if(bo[i]>min):

bo[i]-=min

elif bo[i]==min:

if count==1:

cw[i]=_cwmin

bo[i] = random.randint(0, _cwmax) % cw[i]

elif count>1:

if(cw[i]<_cwmax):

cw[i]*=2

else:

cw[i]=_cwmax

bo[i] = random.randint(0, _cwmax) % cw[i]

else:

print("<Error> count=",count)

exit(1)

def setStats(min,index,count):

global stat_succ,stat_coll

if count==1:

stat_pkts[index]+=1

stat_succ+=1

else:

stat_coll+=1

for i in range(0,_n):

if bo[i]<min:

print("<Error> min=", min, " bo=", bo[i])

exit(1)

#elif bo[i]==min:

# print("Collision with min=", min)

def setNow(min,count):

global M, now, SIFS, DIFS, EIFS, SLOT

if count==1:

now+=DIFS/M

now+=min*SLOT/M

if(rtsmode==1):

now+=Trts()/M;

now+=SIFS/M

now+=Tcts()/M

now+=SIFS/M

now+=Tdata()/M

now+=SIFS/M

now+=Tack()/M

elif count>1:

now+=DIFS/M

now+=min*SLOT/M

if rtsmode==1:

now+=Trts()/M;

now+=EIFS/M

else:

now+=Tdata()/M

now+=EIFS/M

else:

print("<Error> count=", count)

exit(1)

def resolve():

index=getMinBoAllStationsIndex()

min=bo[index]

count=getCountMinBoAllStations(min)

setNow(min, count)

setStats(min, index, count)

subMinBoFromAll(min, count)

def printStats():

global stat_succ, stat_coll, stat_pkts

print("\nGeneral Statistics\n")

print("-"*50)

numPkts=0

for i in range(0,_n):

numPkts+=stat_pkts[i]

print("stat_coll:", stat_coll, "stat_succ:", stat_succ)

print("Collision rate:", stat_coll/(stat_succ+stat_coll)*100, "%")

print("Aggregate Throughput:", numPkts*(_pktSize*8.0)/now)

def main():

global now, _simTime

random.seed(1)

init_bo()

while now < _simTime:

resolve()

printStats()

main()

Execution

CCOD-DQN

# -*- coding: UTF-8 -*-

import numpy as np

import random

import os

import parl

from parl import layers

import copy

import paddle.fluid as fluid

import collections

MEMORY_SIZE = 20000

MEMORY_WARMUP_SIZE = 200

BATCH_SIZE = 32

LEARNING_RATE = 0.001

GAMMA = 0.9

pre_time=0.0

pre_stat_succ=0

pre_stat_coll=0

_n=50

_simTime=2000

rate=11

_cwmin=32

_cwmax=1024

rtsmode=0

SIFS=10

DIFS=50

EIFS=SIFS+DIFS+192+112

SLOT=20

M=1000000

_pktSize=1000

stat_succ=0

stat_coll=0

stat_pkts=np.zeros(_n)

cw=np.zeros(_n)

bo=np.zeros(_n)

now=0.0

class Model(parl.Model):

def __init__(self, act_dim):

hid1_size = 128

hid2_size = 128

self.fc1 = layers.fc(size=hid1_size, act='relu')

self.fc2 = layers.fc(size=hid2_size, act='relu')

self.fc3 = layers.fc(size=act_dim, act=None)

def value(self, obs):

h1 = self.fc1(obs)

h2 = self.fc2(h1)

Q = self.fc3(h2)

return Q

class DQN(parl.Algorithm):

def __init__(self, model, act_dim=None, gamma=None, lr=None):

self.model = model

self.target_model = copy.deepcopy(model)

assert isinstance(act_dim, int)

assert isinstance(gamma, float)

assert isinstance(lr, float)

self.act_dim = act_dim

self.gamma = gamma

self.lr = lr

def predict(self, obs):

return self.model.value(obs)

def learn(self, obs, action, reward, next_obs, terminal):

next_pred_value = self.target_model.value(next_obs)

best_v = layers.reduce_max(next_pred_value, dim=1)

best_v.stop_gradient = True

terminal = layers.cast(terminal, dtype='float32')

target = reward + (1.0 - terminal) * self.gamma * best_v

pred_value = self.model.value(obs)

action_onehot = layers.one_hot(action, self.act_dim)

action_onehot = layers.cast(action_onehot, dtype='float32')

pred_action_value = layers.reduce_sum(

layers.elementwise_mul(action_onehot, pred_value), dim=1)

cost = layers.square_error_cost(pred_action_value, target)

cost = layers.reduce_mean(cost)

optimizer = fluid.optimizer.Adam(learning_rate=self.lr)

optimizer.minimize(cost)

return cost

def sync_target(self):

self.model.sync_weights_to(self.target_model)

class Agent(parl.Agent):

def __init__(self,

algorithm,

obs_dim,

act_dim,

e_greed=0.1,

e_greed_decrement=0):

assert isinstance(obs_dim, int)

assert isinstance(act_dim, int)

self.obs_dim = obs_dim

self.act_dim = act_dim

super(Agent, self).__init__(algorithm)

self.global_step = 0

self.update_target_steps = 200

self.e_greed = e_greed

self.e_greed_decrement = e_greed_decrement

def build_program(self):

self.pred_program = fluid.Program()

self.learn_program = fluid.Program()

with fluid.program_guard(self.pred_program):

obs = layers.data(

name='obs', shape=[self.obs_dim], dtype='float32')

self.value = self.alg.predict(obs)

with fluid.program_guard(self.learn_program):

obs = layers.data(

name='obs', shape=[self.obs_dim], dtype='float32')

action = layers.data(name='act', shape=[1], dtype='int32')

reward = layers.data(name='reward', shape=[], dtype='float32')

next_obs = layers.data(

name='next_obs', shape=[self.obs_dim], dtype='float32')

terminal = layers.data(name='terminal', shape=[], dtype='bool')

self.cost = self.alg.learn(obs, action, reward, next_obs, terminal)

def sample(self, obs):

sample = np.random.rand()

if sample < self.e_greed:

act = np.random.randint(self.act_dim)

else:

act = self.predict(obs)

self.e_greed = max(

0.01, self.e_greed - self.e_greed_decrement)

return act

def predict(self, obs):

obs = np.expand_dims(obs, axis=0)

pred_Q = self.fluid_executor.run(

self.pred_program,

feed={'obs': obs.astype('float32')},

fetch_list=[self.value])[0]

pred_Q = np.squeeze(pred_Q, axis=0)

act = np.argmax(pred_Q)

return act

def learn(self, obs, act, reward, next_obs, terminal):

if self.global_step % self.update_target_steps == 0:

self.alg.sync_target()

self.global_step += 1

act = np.expand_dims(act, -1)

feed = {

'obs': obs.astype('float32'),

'act': act.astype('int32'),

'reward': reward,

'next_obs': next_obs.astype('float32'),

'terminal': terminal

}

cost = self.fluid_executor.run(

self.learn_program, feed=feed, fetch_list=[self.cost])[0]

return cost

class ReplayMemory(object):

def __init__(self, max_size):

self.buffer = collections.deque(maxlen=max_size)

def append(self, exp):

self.buffer.append(exp)

def sample(self, batch_size):

mini_batch = random.sample(self.buffer, batch_size)

obs_batch, action_batch, reward_batch, next_obs_batch, done_batch = [], [], [], [], []

for experience in mini_batch:

s, a, r, s_p, done = experience

obs_batch.append(s)

action_batch.append(a)

reward_batch.append(r)

next_obs_batch.append(s_p)

done_batch.append(done)

return np.array(obs_batch).astype('float32'), \

np.array(action_batch).astype('float32'), np.array(reward_batch).astype('float32'),\

np.array(next_obs_batch).astype('float32'), np.array(done_batch).astype('float32')

def __len__(self):

return len(self.buffer)

def init_bo():

for i in range(0,_n):

cw[i]=_cwmin

bo[i]=random.randint(0,_cwmax)%cw[i]

def Trts():

time=192+(20*8)/1

return time

def Tcts():

time=192+(14*8)/1

return time

def Tdata():

global rate

time=192+((_pktSize+28)*8.0)/rate

return time

def Tack():

time=192+(14*8.0)/1

return time

def getMinBoAllStationsIndex():

index=0

min=bo[index]

for i in range(0,_n):

if bo[i]<min:

index=i

min=bo[index]

return index

def getCountMinBoAllStations(min):

count=0

for i in range(0,_n):

if(bo[i]==min):

count+=1

return count

def subMinBoFromAll(min,count):

global _cwmin,_cwmax

for i in range(0,_n):

if bo[i]<min:

print("<Error> min=",min," bo=",bo[i])

exit(1)

if(bo[i]>min):

bo[i]-=min

elif bo[i]==min:

if count==1:

cw[i]=_cwmin

bo[i] = random.randint(0, _cwmax) % cw[i]

elif count>1:

if(cw[i]<_cwmax):

cw[i]*=2

else:

cw[i]=_cwmax

bo[i] = random.randint(0, _cwmax) % cw[i]

else:

print("<Error> count=",count)

exit(1)

def setStats(min,index,count):

global stat_succ,stat_coll

if count==1:

stat_pkts[index]+=1

stat_succ+=1

else:

stat_coll+=1

for i in range(0,_n):

if bo[i]<min:

print("<Error> min=", min, " bo=", bo[i])

exit(1)

def setNow(min,count):

global M, now, SIFS, DIFS, EIFS, SLOT

if count==1:

now+=DIFS/M

now+=min*SLOT/M

if(rtsmode==1):

now+=Trts()/M;

now+=SIFS/M

now+=Tcts()/M

now+=SIFS/M

now+=Tdata()/M

now+=SIFS/M

now+=Tack()/M

elif count>1:

now+=DIFS/M

now+=min*SLOT/M

if rtsmode==1:

now+=Trts()/M;

now+=EIFS/M

else:

now+=Tdata()/M

now+=EIFS/M

else:

print("<Error> count=", count)

exit(1)

def resolve():

index=getMinBoAllStationsIndex()

min=bo[index]

count=getCountMinBoAllStations(min)

setNow(min, count)

setStats(min, index, count)

subMinBoFromAll(min, count)

def new_resolve(new_cw):

global _cwmin,_cwmax

_cwmin=new_cw

_cwmax=new_cw

index=getMinBoAllStationsIndex()

min=bo[index]

count=getCountMinBoAllStations(min)

setNow(min, count)

setStats(min, index, count)

subMinBoFromAll(min, count)

def printStats():

print("\nGeneral Statistics\n")

print("-"*50)

print("stat_succ:",stat_succ,"stat_coll:",stat_coll)

print("Collision rate:", stat_coll/(stat_succ+stat_coll)*100, "%")

print("Aggregate Throughput:", (stat_succ)*(_pktSize*8.0)/now)

def main():

global _n, now, _simTime, stat_succ, stat_coll, pre_stat_succ, pre_stat_coll, _pktSize, pre_time

pre_collision_rate=0.0

random.seed(1)

np.random.seed(1)

init_bo()

obs_dim=2

act_dim=6

print("obs_dim=",obs_dim,"act_dim=",act_dim)

model = Model(act_dim=act_dim)

algorithm = DQN(model, act_dim=act_dim, gamma=GAMMA, lr=LEARNING_RATE)

agent = Agent(

algorithm,

obs_dim=obs_dim,

act_dim=act_dim,

e_greed=0.1,

e_greed_decrement=1e-6)

rpm = ReplayMemory(MEMORY_SIZE)

#save_path = './dnq_model.ckpt'

step=0

reward=0.0

state = [0.0, 0.0]

show=0

while now < _simTime:

while len(rpm) < MEMORY_WARMUP_SIZE:

obs = np.array(state)

action = agent.sample(obs)

new_cw = pow(2, 5 + action)

t1=now

while True:

new_resolve(new_cw)

if now - t1 > 0.1:

break

collision_rate = (stat_coll - pre_stat_coll) / (stat_succ + stat_coll - pre_stat_succ - pre_stat_coll) * 100

thr = (stat_succ - pre_stat_succ) * (_pktSize * 8.0) / (now - pre_time)

reward = thr / rate / M

next_state=[]

next_state.append(pre_collision_rate)

next_state.append(collision_rate)

pre_stat_succ = stat_succ

pre_stat_coll = stat_coll

pre_collision_rate = collision_rate

pre_time = now

next_obs=np.array(next_state)

done = False

rpm.append((obs, action, reward, next_obs, done))

state = next_state

if step%5==0:

(batch_obs, batch_action, batch_reward, batch_next_obs, batch_done) = rpm.sample(BATCH_SIZE)

train_loss = agent.learn(batch_obs, batch_action, batch_reward, batch_next_obs, batch_done)

obs = np.array(state)

action = agent.sample(obs)

new_cw = pow(2, 5 + action)

t1=now

while True:

new_resolve(new_cw)

if now - t1 > 0.1:

break

collision_rate = (stat_coll - pre_stat_coll) / (stat_succ + stat_coll - pre_stat_succ - pre_stat_coll) * 100

thr = (stat_succ - pre_stat_succ) * (_pktSize * 8.0) / (now - pre_time)

reward = thr / rate / M

next_state = []

next_state.append(pre_collision_rate)

next_state.append(collision_rate)

step += 1

pre_stat_succ = stat_succ

pre_stat_coll = stat_coll

pre_collision_rate = collision_rate

pre_time = now

next_obs = np.array(next_state)

done = False

rpm.append((obs, action, reward, next_obs, done))

if now > show:

print("now=", now, "obs=", obs, " action=", action, " next_obs=", next_obs, " reward=", reward)

show+=100

state = next_state

printStats()

#agent.save(save_path)

now=pre_time=0.0

state = [0.0, 0.0]

stat_coll=pre_stat_coll=0

stat_succ=pre_stat_succ=0

stat_pkts=np.zeros(_n)

while now < 5:

obs = np.array(state)

action = agent.predict(obs)

new_cw = pow(2, 5 + action)

print("new_cw=", new_cw)

t1=now

while True:

new_resolve(new_cw)

if now - t1 > 0.1:

break

collision_rate = (stat_coll - pre_stat_coll) / (stat_succ + stat_coll - pre_stat_succ - pre_stat_coll) * 100

thr = (stat_succ - pre_stat_succ) * (_pktSize * 8.0) / (now - pre_time)

next_state = []

next_state.append(pre_collision_rate)

next_state.append(collision_rate)

pre_stat_succ = stat_succ

pre_stat_coll = stat_coll

pre_collision_rate = collision_rate

pre_time = now

print("now=", now, " collison rate=",collision_rate," thr=", thr)

state = next_state

print("="*25, " Evaluation Result:")

printStats()

main()

Execution

…………………..

SETL-DQN

import numpy as np

import random

import os

import parl

from parl import layers

import copy

import paddle.fluid as fluid

import collections

MEMORY_SIZE = 20000

MEMORY_WARMUP_SIZE = 200

BATCH_SIZE = 32

LEARNING_RATE = 0.001

GAMMA = 0.9

pre_time=0.0

pre_stat_succ=0

pre_stat_coll=0

_n=50 # number of nodes

_simTime=2000 # sec

rate=11 # 11, 5.5, 2 or 1 Mbps

_cwmin=32

_cwmax=1024

rtsmode=0 #0: data->ack; 1:rts->cts->data->ack

cwthreshold=512

SIFS=10

DIFS=50

EIFS=SIFS+DIFS+192+112

SLOT=20

M=1000000

_pktSize=1000 # bytes

stat_succ=0

stat_coll=0

stat_pkts=np.zeros(_n)

cw=np.zeros(_n)

bo=np.zeros(_n)

now=0.0

class Model(parl.Model):

def __init__(self, act_dim):

hid1_size = 128

hid2_size = 128

self.fc1 = layers.fc(size=hid1_size, act='relu')

self.fc2 = layers.fc(size=hid2_size, act='relu')

self.fc3 = layers.fc(size=act_dim, act=None)

def value(self, obs):

h1 = self.fc1(obs)

h2 = self.fc2(h1)

Q = self.fc3(h2)

return Q

class DQN(parl.Algorithm):

def __init__(self, model, act_dim=None, gamma=None, lr=None):

self.model = model

self.target_model = copy.deepcopy(model)

assert isinstance(act_dim, int)

assert isinstance(gamma, float)

assert isinstance(lr, float)

self.act_dim = act_dim

self.gamma = gamma

self.lr = lr

def predict(self, obs):

return self.model.value(obs)

def learn(self, obs, action, reward, next_obs, terminal):

next_pred_value = self.target_model.value(next_obs)

best_v = layers.reduce_max(next_pred_value, dim=1)

best_v.stop_gradient = True

terminal = layers.cast(terminal, dtype='float32')

target = reward + (1.0 - terminal) * self.gamma * best_v

pred_value = self.model.value(obs)

action_onehot = layers.one_hot(action, self.act_dim)

action_onehot = layers.cast(action_onehot, dtype='float32')

pred_action_value = layers.reduce_sum(

layers.elementwise_mul(action_onehot, pred_value), dim=1)

cost = layers.square_error_cost(pred_action_value, target)

cost = layers.reduce_mean(cost)

optimizer = fluid.optimizer.Adam(learning_rate=self.lr)

optimizer.minimize(cost)

return cost

def sync_target(self):

self.model.sync_weights_to(self.target_model)

class Agent(parl.Agent):

def __init__(self,

algorithm,

obs_dim,

act_dim,

e_greed=0.1,

e_greed_decrement=0):

assert isinstance(obs_dim, int)

assert isinstance(act_dim, int)

self.obs_dim = obs_dim

self.act_dim = act_dim

super(Agent, self).__init__(algorithm)

self.global_step = 0

self.update_target_steps = 200

self.e_greed = e_greed

self.e_greed_decrement = e_greed_decrement

def build_program(self):

self.pred_program = fluid.Program()

self.learn_program = fluid.Program()

with fluid.program_guard(self.pred_program):

obs = layers.data(

name='obs', shape=[self.obs_dim], dtype='float32')

self.value = self.alg.predict(obs)

with fluid.program_guard(self.learn_program):

obs = layers.data(

name='obs', shape=[self.obs_dim], dtype='float32')

action = layers.data(name='act', shape=[1], dtype='int32')

reward = layers.data(name='reward', shape=[], dtype='float32')

next_obs = layers.data(

name='next_obs', shape=[self.obs_dim], dtype='float32')

terminal = layers.data(name='terminal', shape=[], dtype='bool')

self.cost = self.alg.learn(obs, action, reward, next_obs, terminal)

def sample(self, obs):

sample = np.random.rand()

if sample < self.e_greed:

act = np.random.randint(self.act_dim)

else:

act = self.predict(obs)

self.e_greed = max(

0.01, self.e_greed - self.e_greed_decrement)

return act

def predict(self, obs):

obs = np.expand_dims(obs, axis=0)

pred_Q = self.fluid_executor.run(

self.pred_program,

feed={'obs': obs.astype('float32')},

fetch_list=[self.value])[0]

pred_Q = np.squeeze(pred_Q, axis=0)

act = np.argmax(pred_Q)

return act

def learn(self, obs, act, reward, next_obs, terminal):

if self.global_step % self.update_target_steps == 0:

self.alg.sync_target()

self.global_step += 1

act = np.expand_dims(act, -1)

feed = {

'obs': obs.astype('float32'),

'act': act.astype('int32'),

'reward': reward,

'next_obs': next_obs.astype('float32'),

'terminal': terminal

}

cost = self.fluid_executor.run(

self.learn_program, feed=feed, fetch_list=[self.cost])[0]

return cost

class ReplayMemory(object):

def __init__(self, max_size):

self.buffer = collections.deque(maxlen=max_size)

def append(self, exp):

self.buffer.append(exp)

def sample(self, batch_size):

mini_batch = random.sample(self.buffer, batch_size)

obs_batch, action_batch, reward_batch, next_obs_batch, done_batch = [], [], [], [], []

for experience in mini_batch:

s, a, r, s_p, done = experience

obs_batch.append(s)

action_batch.append(a)

reward_batch.append(r)

next_obs_batch.append(s_p)

done_batch.append(done)

return np.array(obs_batch).astype('float32'), \

np.array(action_batch).astype('float32'), np.array(reward_batch).astype('float32'),\

np.array(next_obs_batch).astype('float32'), np.array(done_batch).astype('float32')

def __len__(self):

return len(self.buffer)

def init_bo():

for i in range(0,_n):

cw[i]=_cwmin

bo[i]=random.randint(0,_cwmax)%cw[i]

#print("cw[",i,"]=",cw[i]," bo[",i,"]=",bo[i])

def Trts():

time=192+(20*8)/1

return time

def Tcts():

time=192+(14*8)/1

return time

def Tdata():

global rate

time=192+((_pktSize+28)*8.0)/rate

return time

def Tack():

time=192+(14*8.0)/1

return time

def getMinBoAllStationsIndex():

index=0

min=bo[index]

for i in range(0,_n):

if bo[i]<min:

index=i

min=bo[index]

return index

def getCountMinBoAllStations(min):

count=0

for i in range(0,_n):

if(bo[i]==min):

count+=1

return count

def subMinBoFromAll(min,count):

global _cwmin, _cwmax, cwthreshold

for i in range(0,_n):

if bo[i]<min:

print("<Error> min=",min," bo=",bo[i])

exit(1)

if(bo[i]>min):

bo[i]-=min

elif bo[i]==min:

if count==1:

if (cw[i]> cwthreshold):

cw[i]-=32

elif (cw[i]>_cwmin):

cw[i]= cw[i]/2

else:

cw[i]=_cwmin;

bo[i] = random.randint(0, _cwmax) % cw[i]

elif count>1:

if (cw[i]<cwthreshold):

cw[i]*=2

elif (cw[i]<_cwmax):

cw[i]+=32;

else:

cw[i]=_cwmax

bo[i] = random.randint(0, _cwmax) % cw[i]

else:

print("<Error> count=",count)

exit(1)

def setStats(min,index,count):

global stat_succ,stat_coll

if count==1:

stat_pkts[index]+=1

stat_succ+=1

else:

stat_coll+=1

for i in range(0,_n):

if bo[i]<min:

print("<Error> min=", min, " bo=", bo[i])

exit(1)

#elif bo[i]==min:

# print("Collision with min=", min)

def setNow(min,count):

global M, now, SIFS, DIFS, EIFS, SLOT

if rtsmode==1:

now+=Trts()/M;

if count==1:

if(rtsmode==1):

now+=SIFS/M

now+=Tcts()/M

now+=SIFS/M

now+=DIFS/M

now+=min*SLOT/M

now+=Tdata()/M

now+=SIFS/M

now+=Tack()/M

elif count>1:

if rtsmode==1:

now+=EIFS/M

now+=min*SLOT/M

else:

now+=EIFS/M

now+=min*SLOT/M

now+=Tdata()/M

else:

print("<Error> count=", count)

exit(1)

def new_resolve(new_cwthreshold):

global cwthreshold

cwthreshold=new_cwthreshold

index=getMinBoAllStationsIndex()

min=bo[index]

count=getCountMinBoAllStations(min)

setNow(min, count)

setStats(min, index, count)

subMinBoFromAll(min, count)

def printStats():

print("\nGeneral Statistics\n")

print("-"*50)

print("stat_succ:",stat_succ,"stat_coll:",stat_coll)

print("Collision rate:", stat_coll/(stat_succ+stat_coll)*100, "%")

print("Aggregate Throughput:", (stat_succ)*(_pktSize*8.0)/now)

def main():

global _n, now, _simTime, stat_succ, stat_coll, pre_stat_succ, pre_stat_coll, _pktSize, pre_time

pre_collision_rate=0.0

random.seed(1)

np.random.seed(1)

init_bo()

obs_dim=2

act_dim=8

print("obs_dim=",obs_dim,"act_dim=",act_dim)

model = Model(act_dim=act_dim)

algorithm = DQN(model, act_dim=act_dim, gamma=GAMMA, lr=LEARNING_RATE)

agent = Agent(

algorithm,

obs_dim=obs_dim,

act_dim=act_dim,

e_greed=0.1,

e_greed_decrement=1e-6)

rpm = ReplayMemory(MEMORY_SIZE)

#save_path = './dnq_model.ckpt'

#if os.path.isfile(save_path):

# agent.restore(save_path)

step=0

reward=0.0

state = [0.0, 0.0]

show=0

while now < _simTime:

while len(rpm) < MEMORY_WARMUP_SIZE:

obs = np.array(state)

action = agent.sample(obs)

new_cwthreshold = 128*(1+action)

t1=now

while True:

new_resolve(new_cwthreshold)

if now - t1 > 0.1:

break

collision_rate = (stat_coll - pre_stat_coll) / (stat_succ + stat_coll - pre_stat_succ - pre_stat_coll) * 100

thr = (stat_succ - pre_stat_succ) * (_pktSize * 8.0) / (now - pre_time)

reward = thr / rate / M

next_state=[]

next_state.append(collision_rate)

next_state.append(pre_collision_rate)

#step += 1

pre_stat_succ = stat_succ

pre_stat_coll = stat_coll

pre_collision_rate = collision_rate

pre_time = now

#print("now=", now, " collison rate=",collision_rate," thr=", thr)

next_obs=np.array(next_state)

done = False

rpm.append((obs, action, reward, next_obs, done))

#print("len(rpm)=", len(rpm), "obs=", obs, " action=", action, " next_obs=", next_obs, " reward=", reward)

state = next_state

#if step>=5:

# exit()

if step%5==0:

(batch_obs, batch_action, batch_reward, batch_next_obs, batch_done) = rpm.sample(BATCH_SIZE)

train_loss = agent.learn(batch_obs, batch_action, batch_reward, batch_next_obs, batch_done)

#print("agent.learn() is called, train_loss=", train_loss)

obs = np.array(state)

action = agent.sample(obs)

new_cwthreshold = 128*(1+action)

t1=now

while True:

new_resolve(new_cwthreshold)

if now - t1 > 0.1:

break

collision_rate = (stat_coll - pre_stat_coll) / (stat_succ + stat_coll - pre_stat_succ - pre_stat_coll) * 100

thr = (stat_succ - pre_stat_succ) * (_pktSize * 8.0) / (now - pre_time)

reward = thr / rate / M

next_state = []

next_state.append(collision_rate)

next_state.append(pre_collision_rate)

step += 1

pre_stat_succ = stat_succ

pre_stat_coll = stat_coll

pre_collision_rate = collision_rate

pre_time = now

# print("now=", now, " collison rate=",collision_rate," thr=", thr)

next_obs = np.array(next_state)

done = False

rpm.append((obs, action, reward, next_obs, done))

if now > show:

print("now=", now, "obs=", obs, " action=", action, " next_obs=", next_obs, " reward=", reward)

show+=100

state = next_state

printStats()

#agent.save(save_path)

#evalution

now=pre_time=0.0

state = [0.0, 0.0]

stat_coll=pre_stat_coll=0

stat_succ=pre_stat_succ=0

stat_pkts=np.zeros(_n)

while now < 5:

obs = np.array(state)

action = agent.predict(obs)

new_cwthreshold = 128*(1+action)

print("new_cwthreshold=", new_cwthreshold)

t1=now

while True:

new_resolve(new_cwthreshold)

if now - t1 > 0.1:

break

collision_rate = (stat_coll - pre_stat_coll) / (stat_succ + stat_coll - pre_stat_succ - pre_stat_coll) * 100

thr = (stat_succ - pre_stat_succ) * (_pktSize * 8.0) / (now - pre_time)

next_state = []

next_state.append(collision_rate)

next_state.append(pre_collision_rate)

pre_stat_succ = stat_succ

pre_stat_coll = stat_coll

pre_collision_rate = collision_rate

pre_time = now

print("now=", now, " collison rate=",collision_rate," thr=", thr)

state = next_state

print("="*25, " Evaluation Result:")

printStats()

main()

Execution

……………………….

Last Modified: 2022/2/5

Dr. Chih-Heng Ke

Department of Computer Science and Information Engineering, National Quemoy University, Kinmen, Taiwan

Email: smallko@gmail.com