Skip to content
GitLab
Explore
Sign in
Register
Primary navigation
Search or go to…
Project
Bipedal Walker Evo
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Package registry
Container registry
Model registry
Operate
Environments
Terraform modules
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
GitLab community forum
Contribute to GitLab
Provide feedback
Terms and privacy
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
Philip Maas
Bipedal Walker Evo
Commits
caf3014d
Commit
caf3014d
authored
Jan 17, 2022
by
Philip Maas
Browse files
Options
Downloads
Patches
Plain Diff
First Upload
parent
5826010f
No related branches found
No related tags found
1 merge request
!1
Evaluations
Changes
4
Show whitespace changes
Inline
Side-by-side
Showing
4 changed files
brain.py
+40
-0
40 additions, 0 deletions
brain.py
main.py
+26
-0
26 additions, 0 deletions
main.py
population.py
+109
-0
109 additions, 0 deletions
population.py
walker.py
+42
-0
42 additions, 0 deletions
walker.py
with
217 additions
and
0 deletions
brain.py
0 → 100644
+
40
−
0
View file @
caf3014d
import
numpy
as
np
import
random
import
copy
class
Brain
:
def
__init__
(
self
,
size
):
self
.
directions
=
[]
self
.
step
=
0
self
.
increase_moves
(
size
)
def
get_move
(
self
):
move
=
self
.
directions
[
self
.
step
]
self
.
step
+=
1
return
move
# we want different and random movements
def
increase_moves
(
self
,
size
):
for
i
in
range
(
size
):
self
.
directions
.
append
(
np
.
random
.
uniform
(
-
1
,
1
,
4
))
# returns a copy of the given brain
def
clone
(
self
):
clone
=
Brain
(
len
(
self
.
directions
))
for
i
in
range
(
len
(
self
.
directions
)):
clone
.
directions
[
i
]
=
copy
.
copy
(
self
.
directions
[
i
])
return
clone
# mutates the brain by setting some of the directions to random movements
def
mutate
(
self
):
for
i
in
range
(
len
(
self
.
directions
)):
if
random
.
random
()
<
0.1
:
# changes roughly 10% of the movements
self
.
directions
[
i
]
=
np
.
random
.
uniform
(
-
1
,
1
,
4
)
if
__name__
==
'
__main__
'
:
# for debugging
brain_inst
=
Brain
(
100
)
print
(
brain_inst
.
directions
)
print
(
len
(
brain_inst
.
directions
))
This diff is collapsed.
Click to expand it.
main.py
0 → 100644
+
26
−
0
View file @
caf3014d
import
gym
from
population
import
Population
import
time
import
numpy
as
np
import
matplotlib.pyplot
as
plt
INCREASE_BY
=
5
GAME_CANCELLED
=
False
env
=
gym
.
make
(
'
BipedalWalker-v3
'
)
if
__name__
==
'
__main__
'
:
population
=
Population
(
50
)
while
GAME_CANCELLED
is
False
:
# this is our game
if
population
.
all_players_finished
():
# this is our genetic algorithm after one generation of players
population
.
natural_selection
()
population
.
mutate_babies
()
population
.
increase_moves
(
INCREASE_BY
)
population
.
reset_environments
()
print
(
f
'
Best Index:
{
population
.
best_walker_index
}
'
)
print
(
f
'
Best Fitness:
{
population
.
fitnesses
[
population
.
best_walker_index
]
}
'
)
print
(
f
'
Max Steps:
{
population
.
max_steps
}
'
)
else
:
population
.
update
()
# time.sleep(0.1)
This diff is collapsed.
Click to expand it.
population.py
0 → 100644
+
109
−
0
View file @
caf3014d
import
numpy
as
np
from
walker
import
Walker
import
gym
import
random
import
logging
import
copy
MAX_STEPS
=
10000
class
Population
:
def
__init__
(
self
,
size
):
self
.
size
=
size
self
.
fitness_sum
=
0.0
self
.
gen
=
1
self
.
best_walker_index
=
0
# index of the best player in self.players
self
.
best_walker_fitness
=
0.0
self
.
max_steps
=
MAX_STEPS
self
.
walkers
=
[]
self
.
envs
=
[]
self
.
fitnesses
=
None
for
i
in
range
(
self
.
size
):
self
.
envs
.
append
(
gym
.
make
(
'
BipedalWalker-v3
'
))
self
.
walkers
.
append
(
Walker
(
self
.
envs
[
i
]))
self
.
reset_environments
()
def
reset_environments
(
self
):
for
env
in
self
.
envs
:
env
.
reset
()
def
update
(
self
):
for
walker
in
self
.
walkers
:
# if the player has taken more steps than the best player needed to reach the goal, he's dead
if
walker
.
brain
.
step
>
self
.
max_steps
:
walker
.
dead
=
True
else
:
walker
.
update
()
"""
def calculate_fitness(self): # calculate the fitness of all players
for walker in self.walkers:
walker.calculate_fitness()
"""
def
calculate_fitness_sum
(
self
):
self
.
fitness_sum
=
0
self
.
fitnesses
=
np
.
zeros
(
self
.
size
)
for
i
in
range
(
self
.
size
):
self
.
fitnesses
[
i
]
=
self
.
walkers
[
i
].
fitness
self
.
fitnesses
-=
np
.
min
(
self
.
fitnesses
)
# maybe offset: +1
self
.
fitness_sum
=
np
.
sum
(
self
.
fitnesses
)
def
all_players_finished
(
self
):
# returns whether all the players are either dead or have reached the goal
for
walker
in
self
.
walkers
:
if
walker
.
dead
is
False
and
walker
.
reached_goal
is
False
:
return
False
return
True
def
natural_selection
(
self
):
# gets the next generation of players
new_walkers
=
[]
for
i
in
range
(
self
.
size
):
new_walkers
.
append
(
Walker
(
self
.
envs
[
i
]))
self
.
calculate_fitness_sum
()
self
.
set_best_walker
()
# the champion lives on
new_walkers
[
0
]
=
self
.
walkers
[
self
.
best_walker_index
].
get_baby
()
new_walkers
[
0
].
is_best
=
True
for
i
in
range
(
1
,
len
(
new_walkers
)):
parent
=
self
.
select_parent
()
# select parent based on fitness
new_walkers
[
i
]
=
parent
.
get_baby
()
# get baby from them
self
.
walkers
=
copy
.
copy
(
new_walkers
)
self
.
gen
+=
1
# chooses a player from the population to return randomly(considering fitness)
# this function works by randomly choosing a value between 0 and the fitness-sum
# then go through all the players and add their fitness to a running sum.
# If that sum is greater than the random value generated, that player is chosen
# since players with a higher fitness function add more to the running sum they have a higher chance of being chosen
def
select_parent
(
self
):
arrow
=
random
.
uniform
(
0
,
self
.
fitness_sum
)
running_sum
=
0.0
# those are the bars we add together
for
i
in
range
(
self
.
size
):
running_sum
+=
self
.
fitnesses
[
i
]
if
running_sum
>
arrow
:
return
self
.
walkers
[
i
]
# should never get to this point
logging
.
error
(
"
Critical Error in select_parent
"
)
return
None
def
mutate_babies
(
self
):
# mutates all the brains of the babies
for
i
in
range
(
1
,
len
(
self
.
walkers
)):
# we don't want to mutate the champion's brain
self
.
walkers
[
i
].
brain
.
mutate
()
def
set_best_walker
(
self
):
# finds the player with the highest fitness and sets it as the best one
max_index
=
np
.
argmax
(
self
.
fitnesses
)
self
.
best_walker_index
=
max_index
self
.
best_walker_fitness
=
self
.
walkers
[
max_index
].
fitness
# if this dot reached the goal then reset the minimum number of steps it takes to get to the goal
if
self
.
walkers
[
max_index
].
reached_goal
:
self
.
max_steps
=
self
.
walkers
[
max_index
].
brain
.
step
logging
.
info
(
'
Found goal?!
'
)
logging
.
info
(
"
step:
"
,
self
.
max_steps
)
def
increase_moves
(
self
,
size
):
# increase the number of directions for the brain
if
len
(
self
.
walkers
[
0
].
brain
.
directions
)
<
self
.
max_steps
:
for
walker
in
self
.
walkers
:
walker
.
brain
.
increase_moves
(
size
)
This diff is collapsed.
Click to expand it.
walker.py
0 → 100644
+
42
−
0
View file @
caf3014d
from
brain
import
Brain
class
Walker
:
def
__init__
(
self
,
env
):
self
.
brain
=
Brain
(
50
)
# new brain with X instructions
self
.
dead
=
False
self
.
reached_goal
=
False
self
.
is_best
=
False
# true if this dot is the best dot from the previous generation
self
.
fitness
=
0.0
self
.
env
=
env
# self.pos = copy.copy(self.map.startpoint)
def
update
(
self
):
# moves the dot according to the brains directions
if
(
self
.
dead
is
True
)
or
(
self
.
reached_goal
is
True
):
return
if
self
.
brain
.
step
>=
len
(
self
.
brain
.
directions
):
self
.
dead
=
True
return
observation
,
reward
,
done
,
info
=
self
.
env
.
step
(
self
.
brain
.
get_move
())
self
.
fitness
+=
reward
if
reward
==
-
100
:
self
.
dead
=
True
elif
done
is
True
:
self
.
reached_goal
=
True
self
.
fitness
+=
10000000
if
self
.
is_best
:
self
.
env
.
render
()
"""
def get_fitness(self):
if self.reached_goal:
# if the dot reached the goal then the fitness is based on the amount of steps it took to get there
self.fitness = 1 / 16 + 10000.0 / (self.brain.step ** 2)
else: # if the dot didn
'
t reach the goal then the fitness is based on how close it is to the goal
self.fitness = 1 / (self.map.get_closest_distance(self.pos[X], self.pos[Y]) ** 2)
return self.fitness
"""
def
get_baby
(
self
):
baby
=
Walker
(
self
.
env
)
baby
.
brain
=
self
.
brain
.
clone
()
# babies have the same brain as their parents
return
baby
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment