Contention window optimization using reinforcement learning (multi-agent version)
Please refer to my paper,
C. H. Ke and L. Astuti, "Applying Deep Reinforcement Learning
to Improve Throughput and Reduce Collision Rate in IEEE 802.11 Networks,"
KSII Transactions on Internet and Information Systems, vol. 16, no. 1, pp.
334-349, 2022. DOI: 10.3837/tiis.2022.01.019. (SCI) (code)
(single-agent)
C. H. Ke and L. Astuti, “Applying Multi-Agent Deep Reinforcement Learning for Contention Window
Optimization to Enhance Wireless Network Performance”, ICT Express (SCI)
For more information.
In this lab, I will user PARL to optimize the contention
window to enhance CSMA/CA performance.
p.s. Single agent: the agent is placed at the
access points. When the agent decides the contention window, the contention
windows size will be sent to the mobile stations via beacon packet. When the
mobile station receives the beacon, it will set its contention window
accordingly.
Multiagent version: the agent is placed at each
mobile station. The mobile station will send its throughput to the AP, the AP
will sum up all the throughputs and send the aggregate throughput back to the
mobile station. The mobile throughput will use the aggregate throughput as the
reward to get the contention window size. For the current python code, we
assume that each mobile station has the same condition. (The same distance from
the access point. The same antenna gain….). In the future, I will use NS3 to do
more real simulations.
CSMA/CA (802.11b) (no reinforcement learning)
import numpy as np import random _n=50 # number of nodes _simTime=500 # sec rate=11 # 11, 5.5, 2 or 1 Mbps _cwmin=32 _cwmax=1024 SIFS=10 DIFS=50 EIFS=SIFS+DIFS+192+112 SLOT=20 M=1000000 _pktSize=1000 # bytes stat_succ=0 stat_coll=0 stat_pkts=np.zeros(_n) cw=np.zeros(_n) bo=np.zeros(_n) now=0.0 def init_bo(): for i
in range(0,_n): cw[i]=_cwmin bo[i]=random.randint(0,_cwmax)%cw[i]
#print("cw[",i,"]=",cw[i]," bo[",i,"]=",bo[i]) def Tdata(): global rate
time=192+((_pktSize+28)*8.0)/rate return time def Tack(): time=192+(14*8.0)/1 return time def getMinBoAllStationsIndex(): index=0 min=bo[index] for i
in range(0,_n): if
bo[i]<min:
index=i
min=bo[index] return index def getCountMinBoAllStations(min): count=0 for i
in range(0,_n):
if(bo[i]==min):
count+=1 return count def subMinBoFromAll(min,count): global _cwmin,_cwmax for i
in range(0,_n): if
bo[i]<min:
print("<Error> min=",min," bo=",bo[i])
exit(1)
if(bo[i]>min):
bo[i] = bo[i]- min -1 elif bo[i]==min:
if count==1:
cw[i]=_cwmin
bo[i] = random.randint(0, _cwmax) % cw[i]
elif count>1:
if(cw[i]<_cwmax):
cw[i]*=2
else:
cw[i]=_cwmax
bo[i] = random.randint(0, _cwmax) % cw[i]
else:
print("<Error> count=",count)
exit(1) def setStats(min,index,count): global stat_succ,stat_coll if count==1: stat_pkts[index]+=1 stat_succ+=1 else: stat_coll+=1
for i in range(0,_n):
if bo[i]<min:
print("<Error> min=", min, " bo=",
bo[i])
exit(1)
#elif bo[i]==min:
#
print("Collision with min=", min) def setNow(min,count): global M, now, SIFS, DIFS,
EIFS, SLOT if count==1:
now+=min*SLOT/M
now+=Tdata()/M
now+=SIFS/M
now+=Tack()/M
now+=DIFS/M elif
count>1:
now+=min*SLOT/M
now+=SIFS/M
now+=Tdata()/M
now+=EIFS/M else:
print("<Error> count=", count)
exit(1) def resolve(): index=getMinBoAllStationsIndex() min=bo[index] count=getCountMinBoAllStations(min) setNow(min,
count) setStats(min,
index, count) subMinBoFromAll(min,
count) def printStats(): print("\nGeneral Statistics\n") print("-"*50) numPkts=0 for i
in range(0,_n): numPkts+=stat_pkts[i] print("Total num of
packets:", numPkts) print("Collision
rate:", stat_coll/(stat_succ+stat_coll)*100,
"%") print("Aggregate
Throughput:", numPkts*(_pktSize*8.0)/now) def main(): global now, _simTime, DIFS, M random.seed(1) init_bo() now+=DIFS/M while now < _simTime:
resolve() printStats() main() |
Execution
Collision Rate:~33%, Throughput:~4Mbps for 50
nodes. (11b)
CSMA/CA (802.11a) (no reinforcement learning)
import numpy as np import random import math #reference:
https://github.com/cecyliaborek/DCF-NumPy-simulation/blob/master/simulation.py _n=50 # number of nodes _simTime=500 # sec rate=54 #Mbps control_rate=24 #Mbps _cwmin=16 _cwmax=1024 SIFS=16 DIFS=34 SLOT=9 M=1000000 _pktSize=1000 # bytes stat_succ=0 stat_coll=0 stat_pkts=np.zeros(_n) cw=np.zeros(_n) bo=np.zeros(_n) now=0.0 def init_bo(): for i
in range(0,_n): cw[i]=_cwmin bo[i]=random.randint(0,_cwmax)%cw[i]
#print("cw[",i,"]=",cw[i]," bo[",i,"]=",bo[i]) def Tdata(): global rate, _pktSize # dictionary: (data rate,
bits per symbol) bits_per_symbol
= dict([(6, 48), (9, 48), (12, 96),
(18,
96), (24, 192), (36, 192), (48, 288), (54, 288)])
ofdm_preamble
= 16 # us ofdm_signal
= 24 # bits ofdm_signal_duration
= ofdm_signal / (control_rate) # us service = 16 # bits tail = 6 # bits mac_header
= 36 * 8 # bits mac_tail
= 4 * 8 # bits mac_frame
= mac_header + _pktSize *
8 + mac_tail
# bits padding = (math.ceil((service + mac_frame
+ tail) / bits_per_symbol[rate]) * bits_per_symbol[rate]) - (service + mac_frame
+ tail) # bits data_duration
= ofdm_preamble + ofdm_signal_duration
+ (service + mac_frame + tail + padding) /
(rate) # us return data_duration def Tack(): global contro_rate ofdm_preamble
= 16 # us ofdm_signal
= 24 # bits ofdm_signal_duration
= ofdm_signal / (control_rate) # us service = 16 # bits tail = 6 # bits # ack frame ack = 14 * 8 # bits ack_duration
= ofdm_preamble + ofdm_signal_duration
+ \
(service + ack + tail) / (control_rate) # us return ack_duration #EIFS=SIFS+ACK+DIFS EIFS=SIFS+DIFS+Tack() def getMinBoAllStationsIndex(): index=0 min=bo[index] for i
in range(0,_n): if
bo[i]<min:
index=i
min=bo[index] return index def getCountMinBoAllStations(min): count=0 for i
in range(0,_n):
if(bo[i]==min):
count+=1 return count def subMinBoFromAll(min,count): global _cwmin,_cwmax for i
in range(0,_n): if
bo[i]<min:
print("<Error> min=",min," bo=",bo[i])
exit(1)
if(bo[i]>min):
bo[i] = bo[i]- min -1 elif bo[i]==min:
if count==1:
cw[i]=_cwmin
bo[i] = random.randint(0, _cwmax) % cw[i]
elif count>1:
if(cw[i]<_cwmax):
cw[i]*=2
else:
cw[i]=_cwmax
bo[i] = random.randint(0, _cwmax) % cw[i]
else:
print("<Error> count=",count)
exit(1) def setStats(min,index,count): global stat_succ,stat_coll if count==1: stat_pkts[index]+=1 stat_succ+=1 else: stat_coll+=1
for i in range(0,_n):
if bo[i]<min:
print("<Error> min=", min, " bo=",
bo[i])
exit(1)
#elif bo[i]==min:
#
print("Collision with min=", min) def setNow(min,count): global M, now, SIFS, DIFS,
EIFS, SLOT if count==1:
now+=min*SLOT/M
now+=Tdata()/M
now+=SIFS/M
now+=Tack()/M
now+=DIFS/M elif
count>1:
now+=min*SLOT/M
now+=SIFS/M
now+=Tdata()/M
now+=EIFS/M else:
print("<Error> count=", count)
exit(1) def resolve(): index=getMinBoAllStationsIndex() min=bo[index] count=getCountMinBoAllStations(min) setNow(min,
count) setStats(min,
index, count) subMinBoFromAll(min,
count) def printStats(): print("\nGeneral Statistics\n") print("-"*50) numPkts=0 for i
in range(0,_n): numPkts+=stat_pkts[i] print("Total num of
packets:", numPkts) print("stat_coll:", stat_coll,
" stat_succ:", stat_succ) collision_rate=stat_coll*1.0/(stat_succ+stat_coll)*100 print("Collision
rate:", collision_rate, "%") print("Aggregate
Throughput:", numPkts*(_pktSize*8.0)/now) def main(): global now, _simTime, DIFS, M random.seed(1) init_bo() now+=DIFS/M while now < _simTime:
resolve() printStats() main() |
Exection:
Collision Rate:~38%, Throughput:~19Mbps for 50
nodes. (11a)
Multiagent.py
import numpy as np import random import os import parl from parl import
layers import copy import paddle.fluid
as fluid import collections MEMORY_SIZE = 20000 MEMORY_WARMUP_SIZE = 100 BATCH_SIZE = 10 LEARNING_RATE = 0.001 GAMMA = 0.9 pre_time=0.0 pre_stat_succ=0 pre_stat_coll=0 _n=50 # number of nodes _simTime=2000 # sec rate=11 # 11, 5.5, 2 or 1 Mbps _cwmin=32 _cwmax=1024 SIFS=10 DIFS=50 EIFS=SIFS+DIFS+192+112 SLOT=20 M=1000000 _pktSize=1000 # bytes stat_succ=0 stat_coll=0 sta_coll=np.zeros(_n) #number of failed
transmission for each station stat_pkts=np.zeros(_n) cw=np.zeros(_n) bo=np.zeros(_n) sta_cw=np.zeros(_n) now=0.0 class Model(parl.Model): def __init__(self,
act_dim):
hid1_size = 128
hid2_size = 128
self.fc1 = layers.fc(size=hid1_size, act='relu')
self.fc2 = layers.fc(size=hid2_size, act='relu')
self.fc3 = layers.fc(size=act_dim, act=None) def value(self, obs): h1
= self.fc1(obs) h2
= self.fc2(h1) Q
= self.fc3(h2)
return Q class DQN(parl.Algorithm): def __init__(self,
model, act_dim=None, gamma=None, lr=None): self.model = model self.target_model = copy.deepcopy(model)
assert isinstance(act_dim,
int) assert
isinstance(gamma, float)
assert isinstance(lr,
float) self.act_dim = act_dim self.gamma = gamma
self.lr = lr def predict(self, obs):
return self.model.value(obs) def learn(self, obs, action, reward, next_obs,
terminal): next_pred_value = self.target_model.value(next_obs) best_v = layers.reduce_max(next_pred_value, dim=1) best_v.stop_gradient = True
terminal = layers.cast(terminal, dtype='float32')
target = reward + (1.0 - terminal) * self.gamma
* best_v pred_value = self.model.value(obs) action_onehot = layers.one_hot(action,
self.act_dim) action_onehot = layers.cast(action_onehot, dtype='float32') pred_action_value = layers.reduce_sum(
layers.elementwise_mul(action_onehot,
pred_value), dim=1)
cost = layers.square_error_cost(pred_action_value, target)
cost = layers.reduce_mean(cost)
optimizer = fluid.optimizer.Adam(learning_rate=self.lr) optimizer.minimize(cost)
return cost def sync_target(self):
self.model.sync_weights_to(self.target_model) class Agent(parl.Agent): def __init__(self,
algorithm,
obs_dim,
act_dim,
e_greed=0.1,
e_greed_decrement=0):
assert isinstance(obs_dim,
int)
assert isinstance(act_dim,
int) self.obs_dim = obs_dim self.act_dim = act_dim super(Agent,
self).__init__(algorithm) self.global_step = 0 self.update_target_steps = 200 self.e_greed = e_greed self.e_greed_decrement = e_greed_decrement def build_program(self): self.pred_program = fluid.Program() self.learn_program = fluid.Program()
with fluid.program_guard(self.pred_program):
obs = layers.data(
name='obs', shape=[self.obs_dim],
dtype='float32')
self.value = self.alg.predict(obs)
with fluid.program_guard(self.learn_program):
obs = layers.data(
name='obs', shape=[self.obs_dim],
dtype='float32')
action = layers.data(name='act', shape=[1], dtype='int32') reward
= layers.data(name='reward', shape=[], dtype='float32')
next_obs = layers.data(
name='next_obs', shape=[self.obs_dim],
dtype='float32')
terminal = layers.data(name='terminal',
shape=[], dtype='bool') self.cost = self.alg.learn(obs, action, reward, next_obs,
terminal) def sample(self, obs):
sample = np.random.rand() if
sample < self.e_greed:
act = np.random.randint(self.act_dim)
else:
act = self.predict(obs) self.e_greed = max(
0.01, self.e_greed - self.e_greed_decrement)
return act def predict(self, obs): obs = np.expand_dims(obs, axis=0) pred_Q = self.fluid_executor.run(
self.pred_program,
feed={'obs': obs.astype('float32')},
fetch_list=[self.value])[0] pred_Q = np.squeeze(pred_Q, axis=0)
act = np.argmax(pred_Q)
return act def learn(self, obs, act, reward, next_obs, terminal): if
self.global_step % self.update_target_steps
== 0:
self.alg.sync_target() self.global_step += 1
act = np.expand_dims(act, -1)
feed = {
'obs': obs.astype('float32'),
'act': act.astype('int32'),
'reward': reward,
'next_obs': next_obs.astype('float32'),
'terminal': terminal }
cost = self.fluid_executor.run(
self.learn_program, feed=feed, fetch_list=[self.cost])[0]
return cost class ReplayMemory(object): def __init__(self,
max_size): self.buffer = collections.deque(maxlen=max_size) def append(self, exp): self.buffer.append(exp) def sample(self, batch_size): mini_batch = random.sample(self.buffer, batch_size) obs_batch, action_batch, reward_batch, next_obs_batch, done_batch = [], [], [], [], []
for experience in mini_batch:
s, a, r, s_p, done = experience
obs_batch.append(s)
action_batch.append(a)
reward_batch.append(r)
next_obs_batch.append(s_p)
done_batch.append(done)
return np.array(obs_batch).astype('float32'), \
np.array(action_batch).astype('float32'), np.array(reward_batch).astype('float32'),\
np.array(next_obs_batch).astype('float32'), np.array(done_batch).astype('float32') def __len__(self):
return len(self.buffer) def init_bo(): global sta_cw for i
in range(0,_n): sta_cw[i]=_cwmin cw[i]= sta_cw[i] bo[i]=random.randint(0,_cwmax)%cw[i]
#print("cw[",i,"]=",cw[i]," bo[",i,"]=",bo[i]) def Trts(): time=192+(20*8)/1 return time def Tcts(): time=192+(14*8)/1 return time def Tdata(): global rate
time=192+((_pktSize+28)*8.0)/rate return time def Tack(): time=192+(14*8.0)/1 return time def getMinBoAllStationsIndex(): index=0 min=bo[index] for i
in range(0,_n): if
bo[i]<min:
index=i
min=bo[index] return index def getCountMinBoAllStations(min): global sta_coll count=0 for i
in range(0,_n):
if(bo[i]==min):
count+=1 if count>1: for i in range(0,_n): if(bo[i]==min):
sta_coll[i]+=1 return count def subMinBoFromAll(min,count): global _cwmin,_cwmax,sta_cw for i
in range(0,_n): if
bo[i]<min:
print("<Error> min=",min," bo=",bo[i])
exit(1)
if(bo[i]>min):
bo[i]-=min elif bo[i]==min:
if count==1: # contention window is only decided by the RL result
cw[i]=sta_cw[i]
bo[i] = random.randint(0, _cwmax) % cw[i] elif
count>1:
cw[i]=sta_cw[i]
bo[i] = random.randint(0, _cwmax) % cw[i]
else:
print("<Error> count=",count)
exit(1) def setStats(min,index,count): global stat_succ,stat_pkts,stat_coll if count==1: stat_pkts[index]+=1 stat_succ+=1 else: stat_coll+=1
for i in range(0,_n):
if bo[i]<min:
print("<Error> min=", min, " bo=",
bo[i])
exit(1) #elif bo[i]==min:
#
print("Collision with min=", min) def setNow(min,count): global M, now, SIFS, DIFS,
EIFS, SLOT if count==1:
now+=min*SLOT/M
now+=Tdata()/M
now+=SIFS/M now+=Tack()/M
now+=DIFS/M elif
count>1:
now+=min*SLOT/M
now+=SIFS/M
now+=Tdata()/M
now+=EIFS/M else:
print("<Error> count=", count)
exit(1) def resolve(): index=getMinBoAllStationsIndex() min=bo[index] count=getCountMinBoAllStations(min) setNow(min,
count) setStats(min,
index, count) subMinBoFromAll(min,
count) def printStats(): global stat_coll,
stat_succ print("\nGeneral Statistics\n") print("-"*50) print("Collision
rate:", stat_coll/(stat_succ+stat_coll)*100,
"%") print("Aggregate
Throughput:", (stat_succ)*(_pktSize*8.0)/now) def main(): global _n, now, _simTime, stat_succ, stat_coll, pre_stat_succ, pre_stat_coll, sta_coll, _pktSize, pre_time,sta_cw,stat_pkts pre_collision_rate=0.0 pre_sta_succ=np.zeros(_n) pre_sta_coll=np.zeros(_n) sta_coll_rate=np.zeros(_n) pre_sta_coll_rate=np.zeros(_n) sta_thr=np.zeros(_n) random.seed(1) np.random.seed(1) init_bo() obs_dim=2 act_dim=6 print("obs_dim=",obs_dim,"act_dim=",act_dim) mymodel=[] myalgorithm=[] myagent=[] myrpm=[] for i
in range(_n): mymodel.append(Model(act_dim=act_dim)) myalgorithm.append(DQN(mymodel[i], act_dim=act_dim,
gamma=GAMMA, lr=LEARNING_RATE)) myagent.append(Agent( myalgorithm[i], obs_dim=obs_dim, act_dim=act_dim, e_greed=0.1, e_greed_decrement=1e-6)) myrpm.append(ReplayMemory(MEMORY_SIZE)) step=0 show=0 state=[0.0, 0.0] sta_state=[] sta_reward=[] sta_obs=[] sta_next_obs=[] sta_action=[] for i
in range(_n): sta_state.append(state) sta_obs.append(np.array(sta_state[i])) sta_next_obs.append(np.array(sta_state[i])) sta_reward.append(0) sta_action.append(0) k=0 init_bo()
now+=DIFS/M while now < _simTime:
#print("now=", now) while k < MEMORY_WARMUP_SIZE:
k+=1
#print("#"*50)
for i in range(_n):
sta_obs[i]=np.array(sta_state[i])
sta_action[i]=myagent[i].sample(sta_obs[i])
sta_cw[i]=pow(2, 5
+ sta_action[i])
#print("sta_obs[",i,"]=",sta_obs[i])
#print("sta_action[",i,"]=",sta_action[i])
#print("sta_cw[",i,"]=",sta_cw[i])
#print("@"*50)
t1=now
while True:
resolve()
if now - t1 > 0.1: break
for i in range(0, _n):
if stat_pkts[i] + sta_coll[i] - pre_sta_succ[i] - pre_sta_coll[i] == 0:
sta_coll_rate[i]=0.0
else:
sta_coll_rate[i]=(sta_coll[i] - pre_sta_coll[i]) / (stat_pkts[i] + sta_coll[i] - pre_sta_succ[i] - pre_sta_coll[i]) * 100
thr = (stat_succ -
pre_stat_succ) * (_pktSize
* 8.0) / (now - pre_time)
reward = thr / rate / M
sta_reward[i] = thr / rate / M
#sta_thr[i]=(stat_pkts[i] - pre_sta_succ[i]) * (_pktSize * 8.0) / (now -
pre_time)
#sta_reward[i] = sta_thr[i]
/ rate / M
#print("sta_coll_rate[",i,"]=",sta_coll_rate[i])
#print("sta_thr[",i,"]=",sta_thr[i])
#print("sta_reward[",i,"]=",sta_reward[i])
next_state=[]
next_state.append(sta_coll_rate[i])
next_state.append(pre_sta_coll_rate[i])
#print("next_state=", next_state)
pre_sta_succ[i] = stat_pkts[i]
pre_sta_coll[i] = sta_coll[i]
pre_sta_coll_rate[i]
= sta_coll_rate[i]
pre_stat_succ = stat_succ
sta_next_obs[i]=np.array(next_state)
done = False
myrpm[i].append((sta_obs[i], sta_action[i], sta_reward[i], sta_next_obs[i], done))
sta_state[i] = next_state
#print("myrpm[",i,"]=",myrpm[i])
#print("sta_state[",i,"]=",sta_state[i])
pre_time = now #print("!!!!!!!!!!!!memory_warm_up
finished!!!!!!!!!!!!") if step%5==0:
for i in range(0, _n):
(batch_obs, batch_action,
batch_reward, batch_next_obs,
batch_done) = myrpm[i].sample(BATCH_SIZE)
train_loss = myagent[i].learn(batch_obs, batch_action, batch_reward, batch_next_obs, batch_done)
for i in range(_n):
sta_obs[i]=np.array(sta_state[i])
sta_action[i]=myagent[i].sample(sta_obs[i])
sta_cw[i]=pow(2, 5
+ sta_action[i]) t1=now while True:
resolve() if
now - t1 > 0.1:
break for i in range(0, _n):
if stat_pkts[i] + sta_coll[i] - pre_sta_succ[i] - pre_sta_coll[i] == 0:
sta_coll_rate[i]=0.0
else:
sta_coll_rate[i]=(sta_coll[i] - pre_sta_coll[i]) / (stat_pkts[i] + sta_coll[i] - pre_sta_succ[i] - pre_sta_coll[i]) * 100
thr = (stat_succ -
pre_stat_succ) * (_pktSize
* 8.0) / (now - pre_time)
reward = thr / rate / M
sta_reward[i] = thr / rate / M
next_state=[]
next_state.append(sta_coll_rate[i])
next_state.append(pre_sta_coll_rate[i])
pre_sta_succ[i] = stat_pkts[i]
pre_sta_coll[i] = sta_coll[i]
pre_sta_coll_rate[i]
= sta_coll_rate[i]
sta_next_obs[i]=np.array(next_state)
done = False
myrpm[i].append((sta_obs[i], sta_action[i], sta_reward[i], sta_next_obs[i], done))
sta_state[i] = next_state pre_stat_succ=stat_succ pre_time = now step += 1 printStats() now=pre_time=0.0 state=[0.0, 0.0] sta_state=[] sta_reward=[] sta_obs=[] sta_next_obs=[] sta_action=[] pre_collision_rate=0.0 pre_sta_succ=np.zeros(_n) pre_sta_coll=np.zeros(_n) sta_coll_rate=np.zeros(_n) pre_sta_coll_rate=np.zeros(_n) sta_thr=np.zeros(_n) sta_cw=np.zeros(_n) stat_succ=0
stat_coll=0 pre_stat_succ
= 0 sta_coll=np.zeros(_n) stat_pkts=np.zeros(_n) for i
in range(_n): sta_state.append(state) sta_obs.append(np.array(sta_state[i])) sta_next_obs.append(np.array(sta_state[i])) sta_reward.append(0) sta_action.append(0) now+=DIFS/M while now < 5:
print("Evaluation, time=", now) for i in range(_n):
sta_obs[i]=np.array(sta_state[i])
sta_action[i]=myagent[i].predict(sta_obs[i])
sta_cw[i]=pow(2, 5
+ sta_action[i]) t1=now while True:
resolve()
if now - t1 > 0.1:
break for i in range(0, _n):
if stat_pkts[i] + sta_coll[i] - pre_sta_succ[i] - pre_sta_coll[i] == 0:
sta_coll_rate[i]=0.0
else:
sta_coll_rate[i]=(sta_coll[i] - pre_sta_coll[i]) / (stat_pkts[i] + sta_coll[i] - pre_sta_succ[i] - pre_sta_coll[i]) * 100
thr = (stat_succ -
pre_stat_succ) * (_pktSize
* 8.0) / (now - pre_time)
reward = thr / rate / M sta_reward[i] = thr / rate / M
next_state=[]
next_state.append(sta_coll_rate[i])
next_state.append(pre_sta_coll_rate[i])
pre_sta_succ[i] = stat_pkts[i]
pre_sta_coll[i] = sta_coll[i]
pre_sta_coll_rate[i]
= sta_coll_rate[i]
sta_next_obs[i]=np.array(next_state)
done = False
myrpm[i].append((sta_obs[i], sta_action[i], sta_reward[i], sta_next_obs[i], done))
sta_state[i] = next_state pre_stat_succ = stat_succ pre_time = now step += 1 printStats() main() |
Execution
If we are using multiagent, we can get around
5Mbps throughput.
Dr.
Chih-Heng Ke (柯志亨)
Department
of Computer Science and Information Engineering, National Quemoy University,
Kinmen, Taiwan
Email: smallko@gmail.com