50 lines
		
	
	
		
			1.3 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			50 lines
		
	
	
		
			1.3 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
| import time
 | |
| from contextlib import nullcontext
 | |
| 
 | |
| import torch
 | |
| from torch.profiler import (
 | |
|     ProfilerActivity,
 | |
|     profile,
 | |
|     schedule,
 | |
|     tensorboard_trace_handler,
 | |
| )
 | |
| 
 | |
| 
 | |
| class DummyProfiler:
 | |
|     def __init__(self):
 | |
|         self.step_number = 0
 | |
| 
 | |
|     def step(self):
 | |
|         self.step_number += 1
 | |
| 
 | |
| 
 | |
| # Randomly Generated Data
 | |
| def get_data(batch_size, seq_len, vocab_size):
 | |
|     input_ids = torch.randint(
 | |
|         0, vocab_size, (batch_size, seq_len), device=torch.cuda.current_device()
 | |
|     )
 | |
|     attention_mask = torch.ones_like(input_ids)
 | |
|     return input_ids, attention_mask
 | |
| 
 | |
| 
 | |
| def get_tflops(model_numel, batch_size, seq_len, step_time):
 | |
|     return model_numel * batch_size * seq_len * 8 / 1e12 / (step_time + 1e-12)
 | |
| 
 | |
| 
 | |
| def get_profile_context(enable_flag, warmup_steps, active_steps, save_dir):
 | |
|     if enable_flag:
 | |
|         return profile(
 | |
|             activities=[ProfilerActivity.CPU, ProfilerActivity.CUDA],
 | |
|             schedule=schedule(wait=0, warmup=warmup_steps, active=active_steps),
 | |
|             on_trace_ready=tensorboard_trace_handler(save_dir),
 | |
|             record_shapes=True,
 | |
|             profile_memory=True,
 | |
|         )
 | |
|     else:
 | |
|         return nullcontext(DummyProfiler())
 | |
| 
 | |
| 
 | |
| def get_time_stamp():
 | |
|     cur_time = time.strftime("%d-%H:%M", time.localtime())
 | |
|     return cur_time
 | 
