3 lat temu · f613b7c0a8
--- a/PyTorch/Detection/Efficientdet/train.py
+++ b/PyTorch/Detection/Efficientdet/train.py
@@ -521,12 +521,14 @@ def train_epoch(
 
				 
			
 
				     model.train()
			
 
				 
			
 
				+    torch.cuda.synchronize()
			
 
				     end = time.time()
			
 
				     last_idx = steps_per_epoch - 1
			
 
				     num_updates = epoch * steps_per_epoch
			
 
				     for batch_idx in range(steps_per_epoch):
			
 
				         input, target = next(loader_iter)
			
 
				         last_batch = batch_idx == last_idx
			
 
				+        torch.cuda.synchronize()
			
 
				         data_time_m.update(time.time() - end)
			
 
				 
			
 
				         with torch.cuda.amp.autocast(enabled=use_amp):
			
@@ -575,6 +577,7 @@ def train_epoch(
 
				         if lr_scheduler is not None:
			
 
				             lr_scheduler.step_update(num_updates=num_updates, metric=losses_m.avg)
			
 
				 
			
 
				+        torch.cuda.synchronize()
			
 
				         end = time.time()
			
 
				         if args.benchmark:
			
 
				             if batch_idx >= args.benchmark_steps:
			
@@ -597,6 +600,7 @@ def validate(model, loader, args, evaluator=None, epoch=0, log_suffix=''):
 
				 
			
 
				     model.eval()
			
 
				 
			
 
				+    torch.cuda.synchronize()
			
 
				     end = time.time()
			
 
				     last_idx = len(loader) - 1
			
 
				     with torch.no_grad():
			
--- a/PyTorch/Detection/Efficientdet/validate.py
+++ b/PyTorch/Detection/Efficientdet/validate.py
@@ -208,12 +208,14 @@ def validate(args):
 
				     bench.eval()
			
 
				     batch_time = AverageMeter()
			
 
				     throughput = AverageMeter()
			
 
				+    torch.cuda.synchronize()
			
 
				     end = time.time()
			
 
				     total_time_start = time.time()
			
 
				     with torch.no_grad():
			
 
				         for i, (input, target) in enumerate(loader):
			
 
				             with torch.cuda.amp.autocast(enabled=args.amp):
			
 
				                 output = bench(input, target['img_scale'], target['img_size'])
			
 
				+            torch.cuda.synchronize()
			
 
				             batch_time.update(time.time() - end)
			
 
				             throughput.update(input.size(0) / batch_time.val)
			
 
				             evaluator.add_predictions(output, target)
			
@@ -235,6 +237,7 @@ def validate(args):
 
				                 )
			
 
				             end = time.time()
			
 
				 
			
 
				+    torch.cuda.synchronize()
			
 
				     dllogger_metric['total_inference_time'] = time.time() - total_time_start
			
 
				     dllogger_metric['inference_throughput'] = throughput.avg
			
 
				     dllogger_metric['inference_time'] = 1000 / throughput.avg
			
@@ -245,6 +248,7 @@ def validate(args):
 
				             mean_ap = evaluator.evaluate()
			
 
				         else:
			
 
				             evaluator.save_predictions(args.results)
			
 
				+        torch.cuda.synchronize()
			
 
				         dllogger_metric['map'] = mean_ap
			
 
				         dllogger_metric['total_eval_time'] = time.time() - total_time_start
			
 
				     else: