Skip to content

Commit

Permalink
Merge pull request #8 from rail-berkeley/update-config-and-to-pass-gpu
Browse files Browse the repository at this point in the history
minor config refactor and support setting gpu information
  • Loading branch information
vitchyr authored Apr 23, 2021
2 parents bbf305e + 13ac5f0 commit 12afc76
Show file tree
Hide file tree
Showing 4 changed files with 58 additions and 30 deletions.
15 changes: 13 additions & 2 deletions doodad/wrappers/easy_launch/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,13 @@
]
LOCAL_LOG_DIR = '/home/user/logs/'

# see https://docs.microsoft.com/en-us/azure/virtual-machines/ncv3-series
DEFAULT_AZURE_GPU_MODEL = 'nvidia-tesla-v100'
DEFAULT_AZURE_INSTANCE_TYPE = 'Standard_DS1_v2'
DEFAULT_AZURE_REGION = 'westus'

DEFAULT_DOCKER = 'python:3'

import os
try:
AZ_SUB_ID=os.environ['AZURE_SUBSCRIPTION_ID']
Expand All @@ -19,9 +26,13 @@
AZ_CONTAINER=os.environ['AZURE_STORAGE_CONTAINER']
AZ_CONN_STR=os.environ['AZURE_STORAGE_CONNECTION_STRING']
except:
print('azure config not set')
print('config.py: Azure environment variables not set')

try:
from doodad.wrappers.easy_launch.config_private import *
except ImportError:
print('Please set config_private.py')
print("""
Consider copying config.py to config_private.py, i.e.
cp doodad/wrappers/easy_launch/config.py doodad/wrappers/easy_launch/config_private.py
""")
4 changes: 2 additions & 2 deletions doodad/wrappers/easy_launch/metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
'DoodadConfig',
[
('use_gpu', bool),
('gpu_id', Union[int, str]),
('num_gpu', int),
('git_infos', List[GitInfo]),
('script_name', str),
('output_directory', str),
Expand Down Expand Up @@ -75,7 +75,7 @@ def save_doodad_config(doodad_config: DoodadConfig):
)
f.write('\n')
f.write('use_gpu={}\n'.format(doodad_config.use_gpu))
f.write('gpu_id={}\n'.format(doodad_config.gpu_id))
f.write('num_gpu={}\n'.format(doodad_config.num_gpu))


def save_script_name(script_name: str, log_dir: str):
Expand Down
53 changes: 31 additions & 22 deletions doodad/wrappers/easy_launch/sweep_function.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@

import doodad
from doodad.wrappers.easy_launch import run_experiment, metadata
from doodad.wrappers.easy_launch.config_private import AZ_SUB_ID, AZ_CLIENT_ID, AZ_TENANT_ID, AZ_SECRET, AZ_CONN_STR, AZ_CONTAINER, CODE_DIRS_TO_MOUNT, NON_CODE_DIRS_TO_MOUNT, LOCAL_LOG_DIR
from doodad.wrappers.easy_launch import config
from doodad.wrappers.easy_launch.metadata import save_doodad_config
from doodad.wrappers.sweeper import DoodadSweeper
from doodad.wrappers.sweeper.hyper_sweep import Sweeper
Expand All @@ -22,10 +22,11 @@ def sweep_function(
add_date_to_logname=True,
mode='azure',
use_gpu=False,
gpu_id=0,
num_gpu=1,
name_runs_by_id=True,
add_time_to_run_id=True,
start_run_id=0
start_run_id=0,
docker_img=config.DEFAULT_DOCKER,
):
"""
Usage:
Expand Down Expand Up @@ -81,12 +82,12 @@ def function(doodad_config, variant):
datestamp = time.strftime("%y-%m-%d")
log_path = '%s_%s' % (datestamp, log_path)
target = osp.join(REPO_DIR, 'doodad/wrappers/easy_launch/run_experiment.py')
sweeper, output_mount = _create_sweeper_and_output_mount(mode, log_path)
sweeper, output_mount = _create_sweeper_and_output_mount(mode, log_path, docker_img)
git_infos = metadata.generate_git_infos()

doodad_config = metadata.DoodadConfig(
use_gpu=use_gpu,
gpu_id=gpu_id,
num_gpu=num_gpu,
git_infos=git_infos,
script_name=' '.join(sys.argv),
output_directory=output_mount.mount_point,
Expand Down Expand Up @@ -147,6 +148,11 @@ def _run_sweep():
log_path=log_path,
add_date_to_logname=False,
postprocess_config_and_run_mode=postprocess_config_and_run_mode,
instance_type=config.DEFAULT_AZURE_INSTANCE_TYPE,
gpu_model=config.DEFAULT_AZURE_GPU_MODEL,
use_gpu=use_gpu,
num_gpu=num_gpu,
region=config.DEFAULT_AZURE_REGION,
)
elif mode == 'gcp':
sweeper.run_sweep_gcp(
Expand All @@ -156,6 +162,8 @@ def _run_sweep():
log_prefix=log_path,
add_date_to_logname=False,
postprocess_config_and_run_mode=postprocess_config_and_run_mode,
num_gpu=num_gpu,
use_gpu=use_gpu,
)
elif mode == 'local':
sweeper.run_sweep_local(
Expand All @@ -175,47 +183,47 @@ def _run_method_here_no_doodad(
create_final_log_path
):
sweeper = Sweeper(params, default_params)
for xid, config in enumerate(sweeper):
for xid, param in enumerate(sweeper):
new_log_path = create_final_log_path(log_path, xid)
doodad_config = doodad_config._replace(
output_directory=osp.join(LOCAL_LOG_DIR, new_log_path),
output_directory=osp.join(config.LOCAL_LOG_DIR, new_log_path),
)
save_doodad_config(doodad_config)
method_call(doodad_config, config)
method_call(doodad_config, param)


def _create_mounts():
NON_CODE_MOUNTS = [
doodad.MountLocal(**non_code_mapping)
for non_code_mapping in NON_CODE_DIRS_TO_MOUNT
for non_code_mapping in config.NON_CODE_DIRS_TO_MOUNT
]
if REPO_DIR not in CODE_DIRS_TO_MOUNT:
CODE_DIRS_TO_MOUNT.append(REPO_DIR)
if REPO_DIR not in config.CODE_DIRS_TO_MOUNT:
config.CODE_DIRS_TO_MOUNT.append(REPO_DIR)
CODE_MOUNTS = [
doodad.MountLocal(local_dir=code_dir, pythonpath=True)
for code_dir in CODE_DIRS_TO_MOUNT
for code_dir in config.CODE_DIRS_TO_MOUNT
]
mounts = CODE_MOUNTS + NON_CODE_MOUNTS
return mounts


def _create_sweeper_and_output_mount(mode, log_path):
def _create_sweeper_and_output_mount(mode, log_path, docker_img):
mounts = _create_mounts()
az_mount = doodad.MountAzure(
'',
mount_point='/output',
)
sweeper = DoodadSweeper(
mounts=mounts,
docker_img='vitchyr/railrl_v12_cuda10-1_mj2-0-2-2_torch1-1-0_gym0-12-5_py3-6-5:latest',
azure_subscription_id=AZ_SUB_ID,
azure_storage_connection_str=AZ_CONN_STR,
azure_client_id=AZ_CLIENT_ID,
azure_authentication_key=AZ_SECRET,
azure_tenant_id=AZ_TENANT_ID,
azure_storage_container=AZ_CONTAINER,
docker_img=docker_img,
azure_subscription_id=config.AZ_SUB_ID,
azure_storage_connection_str=config.AZ_CONN_STR,
azure_client_id=config.AZ_CLIENT_ID,
azure_authentication_key=config.AZ_SECRET,
azure_tenant_id=config.AZ_TENANT_ID,
azure_storage_container=config.AZ_CONTAINER,
mount_out_azure=az_mount,
local_output_dir=osp.join(LOCAL_LOG_DIR, log_path), # TODO: how to make this vary in local mode?
local_output_dir=osp.join(config.LOCAL_LOG_DIR, log_path), # TODO: how to make this vary in local mode?
)
# TODO: the sweeper should probably only have one output mount that is
# set rather than read based on the mode
Expand Down Expand Up @@ -258,6 +266,7 @@ def example_function(doodad_config, variant):
example_function,
params_to_sweep,
default_params=default_params,
log_path='test_easy_launch_{}_mode'.format(mode),
log_path='test_gpu_easy_launch_{}'.format(mode),
mode=mode,
use_gpu=True,
)
16 changes: 12 additions & 4 deletions doodad/wrappers/sweeper/launcher.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,9 @@
import getpass
import os
from datetime import datetime

import doodad
import doodad.mode
import doodad.mount as mount
from doodad.utils import REPO_DIR
from doodad.wrappers.sweeper import hyper_sweep


Expand Down Expand Up @@ -85,6 +83,7 @@ def run_sweep_local(self, target, params, extra_mounts=None, num_chunks=-1, **kw
def run_sweep_gcp(self, target, params,
log_prefix=None, add_date_to_logname=True,
region='us-west1-a', instance_type='n1-standard-4', args=None,
num_gpu=1,
extra_mounts=None, num_chunks=-1, **kwargs):
"""
Run a grid search on GCP
Expand All @@ -104,7 +103,8 @@ def run_sweep_gcp(self, target, params,
zone=region,
instance_type=instance_type,
gcp_image=self.gcp_image,
gcp_image_project=self.gcp_project
gcp_image_project=self.gcp_project,
num_gpu=num_gpu,
)
if num_chunks > 0:
hyper_sweep.run_sweep_doodad_chunked(target, params,
Expand All @@ -125,7 +125,12 @@ def run_sweep_azure(self, target, params,
region='westus',
instance_type='Standard_DS1_v2',
tags=None,
extra_mounts=None, num_chunks=-1, **kwargs):
extra_mounts=None,
num_chunks=-1,
use_gpu=False,
num_gpu=1,
gpu_model='nvidia-tesla-k80',
**kwargs):
"""
Run a grid search on GCP
"""
Expand All @@ -148,6 +153,9 @@ def run_sweep_azure(self, target, params,
region=region,
instance_type=instance_type,
tags=tags,
use_gpu=use_gpu,
gpu_model=gpu_model,
num_gpu=num_gpu,
)
if num_chunks > 0:
hyper_sweep.run_sweep_doodad_chunked(target, params,
Expand Down

0 comments on commit 12afc76

Please sign in to comment.