preprocess.py 1.5 KB

12345678910111213141516171819202122232425262728293031323334353637
  1. import os
  2. import time
  3. from argparse import ArgumentDefaultsHelpFormatter, ArgumentParser
  4. from subprocess import call
  5. from data_preprocessing.convert2tfrec import Converter
  6. from data_preprocessing.preprocessor import Preprocessor
  7. from utils.utils import get_task_code
  8. parser = ArgumentParser(formatter_class=ArgumentDefaultsHelpFormatter)
  9. parser.add_argument("--data", type=str, default="/data", help="Path to data directory")
  10. parser.add_argument("--results", type=str, default="/data", help="Path for saving results directory")
  11. parser.add_argument(
  12. "--exec_mode",
  13. type=str,
  14. default="training",
  15. choices=["training", "test"],
  16. help="Mode for data preprocessing",
  17. )
  18. parser.add_argument("--task", type=str, help="Number of task to be run. MSD uses numbers 01-10")
  19. parser.add_argument("--dim", type=int, default=3, choices=[2, 3], help="Data dimension to prepare")
  20. parser.add_argument("--n_jobs", type=int, default=-1, help="Number of parallel jobs for data preprocessing")
  21. parser.add_argument("--vpf", type=int, default=1, help="Volumes per tfrecord")
  22. if __name__ == "__main__":
  23. args = parser.parse_args()
  24. start = time.time()
  25. Preprocessor(args).run()
  26. Converter(args).run()
  27. task_code = get_task_code(args)
  28. path = os.path.join(args.data, task_code)
  29. if args.exec_mode == "test":
  30. path = os.path.join(path, "test")
  31. call(f'find {path} -name "*.npy" -print0 | xargs -0 rm', shell=True)
  32. end = time.time()
  33. print(f"Preprocessing time: {(end - start):.2f}")