Source code for sequifier.make
import os
preprocess_config_string = """project_root: .
data_path: PLEASE FILL
read_format: csv
write_format: parquet
selected_columns: [EXAMPLE_INPUT_COLUMN_NAME] # should include all target column, can include additional columns
split_ratios:
- 0.8
- 0.1
- 0.1
seq_length: 48
stride_by_split:
- 1
- 1
- 1
max_rows: null
"""
train_config_string = """project_root: .
model_name: PLEASE FILL
read_format: parquet
metadata_config_path: PLEASE FILL
input_columns: [EXAMPLE_INPUT_COLUMN_NAME] # should include all target column, can include additional columns
target_columns: [EXAMPLE_TARGET_COLUMN_NAME]
target_column_types: # 'criterion' in training_spec must also be adapted
EXAMPLE_TARGET_COLUMN_NAME: real
seq_length: 48
inference_batch_size: 10
export_generative_model: PLEASE FILL # true or false
export_embedding_model: PLEASE FILL # true or false
export_onnx: true
model_spec:
initial_embedding_dim: 128
feature_embedding_dims: # the size of the embedding of individual variables, must sum to dim_model
EXAMPLE_INPUT_COLUMN_NAME: # can be left out if either all input variables are real or all are categorical
joint_embedding_dim: null
dim_model: 128
n_head: 16
dim_feedforward: 128
num_layers: 3
prediction_length: 1
training_spec:
device: cuda
epochs: 1000
save_interval_epochs: 10
batch_size: 100
log_interval: 10
learning_rate: 0.0001
accumulation_steps: 1
dropout: 0.2
criterion:
EXAMPLE_TARGET_COLUMN_NAME: MSELoss
optimizer:
name: AdamW
scheduler:
name: CosineAnnealingLR
T_max: 111
eta_min: 0.00001
continue_training: true
"""
infer_config_string = """project_root: .
metadata_config_path: PLEASE FILL
model_type: PLEASE_FILL # generative or embedding
model_path: PLEASE FILL
data_path: PLEASE FILL
input_columns: [EXAMPLE_INPUT_COLUMN_NAME] # should include all target column, can include additional columns
target_columns: [EXAMPLE_TARGET_COLUMN_NAME]
target_column_types:
EXAMPLE_TARGET_COLUMN_NAME: real
output_probabilities: false
map_to_id: false
device: cpu
seq_length: 48
inference_batch_size: 10
autoregression: true
"""
gitignore_string = """models/
logs/
checkpoints/
outputs/
data/
.DS_Store"""
[docs]
def make(args):
"""Creates a new sequifier project.
Args:
args: The command-line arguments.
"""
project_name = args.project_name
if not (project_name and len(project_name) > 0):
raise ValueError(f"project_name '{project_name}' is not admissible")
os.makedirs(f"{project_name}/configs")
with open(f"{project_name}/.gitignore", "w") as f:
f.write(gitignore_string)
with open(f"{project_name}/configs/preprocess.yaml", "w") as f:
f.write(preprocess_config_string)
with open(f"{project_name}/configs/train.yaml", "w") as f:
f.write(train_config_string)
with open(f"{project_name}/configs/infer.yaml", "w") as f:
f.write(infer_config_string)