Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
32 commits
Select commit Hold shift + click to select a range
589e76a
add logger (#41)
dasimagin Jan 24, 2023
c54bd4b
fix logging
dasimagin Feb 4, 2023
f8f717b
add new simulator
dasimagin Feb 4, 2023
805eba1
add layouts
dasimagin Feb 4, 2023
7d2743d
clean up main readme
dasimagin Feb 4, 2023
a3d2673
init cartpole guide
dasimagin Feb 5, 2023
674527e
fix error problem
dasimagin Feb 5, 2023
9096800
fix log
dasimagin Feb 5, 2023
7d4cb07
disable log tests
dasimagin Feb 5, 2023
8e96e2d
add simulator tests
dasimagin Feb 5, 2023
5370a5a
remove old docs
dasimagin Feb 5, 2023
11b2b6c
update readme.md
dasimagin Apr 29, 2023
f8c0045
fix log tests
dasimagin May 6, 2023
71de62a
rm docs/lib_fixes.md
dasimagin Jun 17, 2023
fe6ffd8
update dynamics (#44)
dasimagin Jun 21, 2023
b6da5ed
render svg
dasimagin Jun 21, 2023
3551992
fix readme
dasimagin Jun 21, 2023
b378a1a
Documentation Site (#45)
AndBondStyle Jun 28, 2023
6d0371b
update readme
dasimagin Jun 28, 2023
468f267
move image at the end of readme
dasimagin Jun 28, 2023
05d50f6
update docs
dasimagin Jun 30, 2023
596d7e3
add docker
dasimagin Sep 24, 2023
82615d5
parameter estimation
dasimagin Oct 7, 2023
02271e4
container 0.2.0
dasimagin Oct 8, 2023
340c4ac
small dynamics refactoring
dasimagin Oct 12, 2023
9edd31f
add lqr
dasimagin Oct 14, 2023
02915ff
container 0.3.0
dasimagin Oct 14, 2023
3e920cd
Docstrings (#48)
dasimagin Feb 18, 2024
6acd423
add missed package
dasimagin Feb 18, 2024
305fa63
fix workflow
dasimagin Feb 18, 2024
0f5329a
toc depth
dasimagin Feb 18, 2024
b423192
Changed default env configs and fixed model
Aug 22, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
47 changes: 47 additions & 0 deletions .devcontainer/devcontainer.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
// For format details, see https://aka.ms/devcontainer.json. For config options, see the
// README at: https://github.com/devcontainers/templates/tree/main/src/docker-existing-docker-compose
{
"name": "cart-pole",

// Update the 'dockerComposeFile' list if you have more compose files or use different names.
// The .devcontainer/docker-compose.yml file contains any overrides you need/want to make.
"dockerComposeFile": ["../docker-compose.yaml"],

// The 'service' property is the name of the service for the container that VS Code should
// use. Update this value and .devcontainer/docker-compose.yml to the real service name.
"service": "cartpole",

// The optional 'workspaceFolder' property is the path VS Code should open by default when
// connected. This is typically a file mount in .devcontainer/docker-compose.yml
"workspaceFolder": "/cartpole"

// Features to add to the dev container. More info: https://containers.dev/features.
// "features": {},

// Use 'forwardPorts' to make a list of ports inside the container available locally.
// "forwardPorts": [],

// Uncomment the next line if you want start specific services in your Docker Compose config.
// "runServices": [],

// Uncomment the next line if you want to keep your containers running after VS Code shuts down.
// "shutdownAction": "none",

// Uncomment the next line to run commands after the container is created.
// "postCreateCommand": "cat /etc/os-release",

// Configure tool-specific properties.
"customizations": {
"vscode": {
"extensions": [
"ms-python.python",
"ms-python.vscode-pylance",
"charliermarsh.ruff",
"yzhang.markdown-all-in-one"
]
}
}

// Uncomment to connect as an existing user other than the container default. More info: https://aka.ms/dev-containers-non-root.
// "remoteUser": "cartpole"
}
1 change: 1 addition & 0 deletions .devcontainer/docker-compose.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
version: '3.9'
33 changes: 0 additions & 33 deletions .github/workflows/alt_ci.yml

This file was deleted.

30 changes: 0 additions & 30 deletions .github/workflows/ci.yml

This file was deleted.

28 changes: 28 additions & 0 deletions .github/workflows/docs.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
name: docs

on:
push:
branches:
- master
- cart-pole-3 # TODO: Remove after merging to master

permissions:
contents: write

jobs:
deploy:
runs-on: self-hosted
steps:
- uses: actions/checkout@v3
with:
fetch-depth: 0
- uses: actions/setup-python@v4
with:
python-version: 3.9
# TODO: Add proper chaching
# TODO: Maybe use poetry-based action
- run: pip install mkdocs-material mkdocs-material-extensions mkdocstrings[python] mike poetry
- run: |
git config user.name github-actions
git config user.email [email protected]
- run: mike deploy --push --force --update-aliases $(poetry version -s) latest
31 changes: 31 additions & 0 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
FROM ubuntu:22.04

ENV DEBIAN_FRONTEND=noninteractive
ENV SHELL /bin/bash
SHELL ["/bin/bash", "-c"]

WORKDIR /tmp

### COMMON

RUN apt update -q \
&& apt install -yq --no-install-recommends \
build-essential \
coinor-libipopt-dev \
git \
python3 \
python3-dev \
python3-pip \
&& pip3 install --no-cache-dir -U pip \
&& rm -rf /var/lib/apt/lists/* && apt-get clean

### POETRY

RUN pip3 install --no-cache-dir -U poetry \
&& poetry completions bash >> ~/.bash_completion

## PYTHON DEPENDENCIES
COPY pyproject.toml /tmp/pyproject.toml

RUN poetry config virtualenvs.create false \
&& poetry install --no-interaction --no-ansi --no-root
34 changes: 34 additions & 0 deletions SAC/config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
environment:
state_size: 5
action_size: 1
max_action: 1
delta: 0.1

trainer:
device_name: cpu
gamma: 0.99
max_buffer_size: 10**5
start_timesteps: 5000
timesteps_per_epoch: 1
batch_size: 128
max_grad_norm: 10
tau: 0.005
policy_update_freq: 1
alpha: 0.1

actor:
lr: 3e-4
tau: 1
layer_sizes:
- 256
- 256
- 256

critic:
lr_1: 3e-4
lr_2: 3e-4
tau: 1
layers:
- 256
- 256
- 256
87 changes: 87 additions & 0 deletions SAC/models.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
import torch
from torch import nn
from torch.nn import functional as F

import numpy as np

from torch.distributions import Normal

from replay_buffer import state_to_tensor

DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
# LOG_STD_MIN, LOG_STD_MAX = -20, 2

class Actor(nn.Module):
def __init__(self, state_dim, action_dim):
super().__init__()

self.h = 2056 #change
self.action_dim = action_dim

self.actor_model = nn.Sequential(
nn.Linear(in_features=state_dim, out_features=self.h),
nn.ReLU(),
nn.Linear(in_features=self.h, out_features=self.h),
nn.ReLU(),
nn.Linear(in_features=self.h, out_features=self.h),
nn.ReLU(),
nn.Linear(in_features=self.h, out_features=self.h),
nn.ReLU(),
nn.Linear(in_features=self.h, out_features=action_dim*2)
)

def apply(self, states):
m = -20
M = 2
states = states.to(DEVICE)
output = self.actor_model(states)
means = output[..., :self.action_dim]
var = torch.tanh(output[..., self.action_dim:]) + 1
var = 0.5 * (M - m) * var + m
var = torch.exp(var)
normal_distr = Normal(means, var)

actions_first = normal_distr.rsample()
actions = torch.tanh(actions_first)
log_prob = normal_distr.log_prob(actions_first) - torch.log(1 - actions**2 + 1e-6)

#this is a more numerically stable version of the appendix C eq.21 https://arxiv.org/pdf/1801.01290.pdf

return actions, log_prob

def get_action(self, states):

with torch.no_grad():

states = state_to_tensor(states)
actions, _ = self.apply(states)
actions = actions.cpu().detach().numpy()

assert isinstance(actions, (list,np.ndarray))
assert actions.max() <= 1. and actions.min() >= -1, "actions must be in the range [-1, 1]"
return actions.item()


class Critic(nn.Module):

def __init__(self, state_dim, action_dim):
super().__init__()
self.h = 2056
input_dim = state_dim + action_dim
self.critic_model = nn.Sequential(
nn.Linear(in_features=input_dim, out_features=self.h),
nn.ReLU(),
nn.Linear(in_features=self.h, out_features=self.h),
nn.ReLU(),
nn.Linear(in_features=self.h, out_features=self.h),
nn.ReLU(),
nn.Linear(in_features=self.h, out_features=1)
)

def get_qvalues(self, states, actions):

batch = torch.cat([states, actions], dim=1)
qvalues = self.critic_model(batch)

# assert len(qvalues.shape) == 1 and qvalues.shape[0] == states.shape[0]
return qvalues
62 changes: 62 additions & 0 deletions SAC/replay_buffer.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
import collections
import numpy as np
import torch
import random

from math import pi, cos, tanh, sin

from cartpole import State, Error



def state_to_tensor(state: State):
return torch.tensor([
(state.cart_position / 0.25),
(state.cart_velocity / 5.0),
(state.cart_acceleration / 7.5),
cos(state.pole_angle),
state.pole_angle / (6 * np.pi), #important
state.pole_angular_velocity / (6 * np.pi) #change and normalize
])

def make_tensor(from_state, to_state, action, reward):
return torch.concat([
state_to_tensor(from_state),
state_to_tensor(to_state),
torch.tensor([action]),
torch.tensor([reward]),
torch.tensor([to_state.error != Error.NO_ERROR])
])


class ReplayBuffer:

def __init__(self, state_dim: int, maxlen: int):
self.state_dim = state_dim
self.states = torch.zeros((maxlen, self.state_dim*2 + 3))
self.maxlen = maxlen
self.ptr = 0
self.length = 0

def __len__(self):
return len(self.states)

def add(self, from_state: State, to_state: State, action: float, reward: float):
self.states[self.ptr] = make_tensor(from_state, to_state, action, reward)
self.ptr = (self.ptr + 1) % self.maxlen
self.length = min(self.length + 1, self.maxlen)

def sample(self, sample_size: int, device):
sample_size = min(self.length, sample_size)
sample = self.states[random.sample(range(self.length), sample_size)]
return (
sample[:, :self.state_dim].to(device),
sample[:, self.state_dim:self.state_dim*2].to(device),
sample[:, self.state_dim*2].reshape(-1, 1).to(device),
sample[:, self.state_dim*2+1].reshape(-1, 1).to(device),
sample[:, self.state_dim*2+2].reshape(-1, 1).to(device)
)
def get_last(self):
if len(self.states) == 0:
return None
return self.states[(self.ptr - 1) % self.maxlen]
Loading