Source code for sequifier.make
import os
preprocess_config_string = """project_path: .
data_path: PLEASE FILL
read_format: csv
write_format: parquet
selected_columns: [EXAMPLE_INPUT_COLUMN_NAME] # should include all target column, can include additional columns
group_proportions:
- 0.8
- 0.1
- 0.1
seq_length: 48
seq_step_sizes:
- 1
- 1
- 1
max_rows: null
"""
train_config_string = """project_path: .
model_name: default
read_format: parquet
ddconfig_path: PLEASE FILL
selected_columns: [EXAMPLE_INPUT_COLUMN_NAME] # should include all target column, can include additional columns
target_columns: [EXAMPLE_TARGET_COLUMN_NAME]
target_column_types: # 'criterion' in training_spec must also be adapted
EXAMPLE_TARGET_COLUMN_NAME: real
seq_length: 48
inference_batch_size: 10
export_generative_model: PLEASE FILL # true or false
export_embedding_model: PLEASE FILL # true or false
export_onnx: true
model_spec:
d_model: 128
d_model_by_column: # the size of the embedding of individual variables, must sum to d_model
EXAMPLE_INPUT_COLUMN_NAME: # can be left out if either all input variables are real or all are categorical
nhead: 16
d_hid: 128
nlayers: 3
training_spec:
device: cuda
epochs: 1000
iter_save: 10
batch_size: 100
log_interval: 10
lr: 0.0001
accumulation_steps: 1
dropout: 0.2
criterion:
EXAMPLE_TARGET_COLUMN_NAME: MSELoss
optimizer:
name: AdamW
scheduler:
name: CosineAnnealingLR
T_max: 111
eta_min: 0.00001
continue_training: true
"""
infer_config_string = """project_path: .
ddconfig_path: PLEASE FILL
model_type: PLEASE_FILL # generative or embedding
model_path: PLEASE FILL
data_path: PLEASE FILL
selected_columns: [EXAMPLE_INPUT_COLUMN_NAME] # should include all target column, can include additional columns
target_columns: [EXAMPLE_TARGET_COLUMN_NAME]
target_column_types:
EXAMPLE_TARGET_COLUMN_NAME: real
output_probabilities: false
map_to_id: false
device: cpu
seq_length: 48
inference_batch_size: 10
autoregression: true
"""
gitignore_string = """models/
logs/
checkpoints/
outputs/
data/
.DS_Store"""
[docs]
def make(args):
"""Creates a new sequifier project.
Args:
args: The command-line arguments.
"""
project_name = args.project_name
assert (
project_name is not None and len(project_name) > 0
), f"project_name '{project_name}' is not admissible"
os.makedirs(f"{project_name}/configs")
with open(f"{project_name}/.gitignore", "w") as f:
f.write(gitignore_string)
with open(f"{project_name}/configs/preprocess.yaml", "w") as f:
f.write(preprocess_config_string)
with open(f"{project_name}/configs/train.yaml", "w") as f:
f.write(train_config_string)
with open(f"{project_name}/configs/infer.yaml", "w") as f:
f.write(infer_config_string)