| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114 |
- #!/usr/bin/env python
- # -*- coding: utf-8 -*-
- ##############################################################################
- # Copyright (c) Jonathan Dekhtiar - [email protected]
- # All Rights Reserved.
- #
- # This source code is licensed under the MIT license found in the
- # LICENSE file in the root directory of this source tree.
- ##############################################################################
- import os
- import glob
- import ntpath
- import argparse
- from collections import defaultdict
- parser = argparse.ArgumentParser(description="DAGM2007_preprocessing")
- parser.add_argument('--data_dir', required=True, type=str, help="Path to DAGM 2007 private dataset")
- DEFECTIVE_COUNT = defaultdict(lambda: defaultdict(int))
- EXPECTED_DEFECTIVE_SAMPLES_PER_CLASS = {
- "Train": {
- 1: 79,
- 2: 66,
- 3: 66,
- 4: 82,
- 5: 70,
- 6: 83,
- 7: 150,
- 8: 150,
- 9: 150,
- 10: 150,
- },
- "Test": {
- 1: 71,
- 2: 84,
- 3: 84,
- 4: 68,
- 5: 80,
- 6: 67,
- 7: 150,
- 8: 150,
- 9: 150,
- 10: 150,
- }
- }
- if __name__ == "__main__":
- FLAGS, unknown_args = parser.parse_known_args()
- if len(unknown_args) > 0:
- for bad_arg in unknown_args:
- print("ERROR: Unknown command line arg: %s" % bad_arg)
- raise ValueError("Invalid command line arg(s)")
- if not os.path.exists(FLAGS.data_dir):
- raise ValueError('The dataset directory received `%s` does not exists' % FLAGS.data_dir)
- for challenge_id in range(10):
- challenge_name = "Class%d" % (challenge_id + 1)
- challenge_folder_path = os.path.join(FLAGS.data_dir, challenge_name)
- print("[DAGM Preprocessing] Parsing Class ID: %02d ..." % (challenge_id + 1))
- if not os.path.exists(challenge_folder_path):
- raise ValueError('The folder `%s` does not exists' % challenge_folder_path)
- for data_set in ["Train", "Test"]:
- challenge_set_folder_path = os.path.join(challenge_folder_path, data_set)
- if not os.path.exists(challenge_set_folder_path):
- raise ValueError('The folder `%s` does not exists' % challenge_set_folder_path)
- with open(os.path.join(challenge_folder_path, "%s_list.csv" % data_set.lower()), 'w') as data_list_file:
- data_list_file.write('image_filepath,lbl_image_filepath,is_defective\n')
- files = glob.glob(os.path.join(challenge_set_folder_path, "*.PNG"))
- for file in files:
- filepath, fullname = ntpath.split(file)
- filename, extension = os.path.splitext(os.path.basename(fullname))
- lbl_filename = "%s_label.PNG" % filename
- lbl_filepath = os.path.join(filepath, "Label", lbl_filename)
- if os.path.exists(lbl_filepath):
- defective = True
- else:
- defective = False
- lbl_filename = ""
- if defective:
- DEFECTIVE_COUNT[data_set][challenge_id + 1] += 1
- data_list_file.write('%s,%s,%d\n' % (fullname, lbl_filename, defective))
- if DEFECTIVE_COUNT[data_set][challenge_id +
- 1] != EXPECTED_DEFECTIVE_SAMPLES_PER_CLASS[data_set][challenge_id + 1]:
- raise RuntimeError(
- "There should be `%d` defective samples instead of `%d` in challenge (%s): %d" % (
- DEFECTIVE_COUNT[data_set][challenge_id + 1],
- EXPECTED_DEFECTIVE_SAMPLES_PER_CLASS[data_set][challenge_id + 1], data_set, challenge_id + 1
- )
- )
|