Ver código fonte

[DLRM/TF2] Fix numpy bool API change

Tomasz Grel 2 anos atrás
pai
commit
e36f9d9bf3

+ 3 - 3
TensorFlow2/Recommendation/DLRM_and_DCNv2/dataloading/feature_spec.py

@@ -163,7 +163,7 @@ class FeatureSpec:
                     assert len(contained_features) == 1
 
                     # check label dtype
-                    assert np.dtype(self.feature_spec[first_feature][DTYPE_SELECTOR]) == np.bool
+                    assert np.dtype(self.feature_spec[first_feature][DTYPE_SELECTOR]) == bool
 
                 else:
                     assert False, "Feature of unknown type"
@@ -237,7 +237,7 @@ class FeatureSpec:
                         zip(categorical_feature_names, cat_feature_types, categorical_feature_cardinalities)}
         for f_name in numerical_feature_names:
             feature_dict[f_name] = {DTYPE_SELECTOR: str(np.dtype(np.float16))}
-        feature_dict[label_feature_name] = {DTYPE_SELECTOR: str(np.dtype(np.bool))}
+        feature_dict[label_feature_name] = {DTYPE_SELECTOR: str(np.dtype(bool))}
 
         channel_spec = {CATEGORICAL_CHANNEL: categorical_feature_names,
                         NUMERICAL_CHANNEL: numerical_feature_names,
@@ -297,4 +297,4 @@ def get_categorical_feature_type(size: int):
         if size < np.iinfo(numpy_type).max:
             return numpy_type
 
-    raise RuntimeError(f"Categorical feature of size {size} is too big for defined types")
+    raise RuntimeError(f"Categorical feature of size {size} is too big for defined types")

+ 2 - 2
TensorFlow2/Recommendation/DLRM_and_DCNv2/dataloading/raw_binary_dataset.py

@@ -137,7 +137,7 @@ class TfRawBinaryDataset:
             elif first_feature in set_of_label_features:
                 # Load label
                 # We verified earlier that there is only one label feature
-                label_bytes_per_batch = np.dtype(np.bool).itemsize * self._batch_size
+                label_bytes_per_batch = np.dtype(bool).itemsize * self._batch_size
                 self._label, batches = create_reader(path_to_open, label_bytes_per_batch)
             else:
                 raise ValueError("Unknown chunk type")
@@ -231,7 +231,7 @@ class TfRawBinaryDataset:
                     raw_data = feature.numpy().astype(ftype).tobytes()
                     stream.write(raw_data)
 
-                label_f.write(label.numpy().astype(np.bool).tobytes())
+                label_f.write(label.numpy().astype(bool).tobytes())
                 numerical_f.write(numerical_features.numpy().astype(np.float16).tobytes())
 
             for stream in chain(*categorical_fs, [label_f, numerical_f]):

+ 1 - 1
TensorFlow2/Recommendation/DLRM_and_DCNv2/dataloading/transcode.py

@@ -116,7 +116,7 @@ def main():
 
             # Append them to the binary files
             numerical_f.write(numerical_df.values.astype(np.float16).tobytes())
-            label_f.write(label_df.values.astype(np.bool).tobytes())
+            label_f.write(label_df.values.astype(bool).tobytes())
 
             categorical_arr = categorical_df.values
             for cat_idx, cat_feature_type in enumerate(categorical_feature_types):

+ 1 - 1
TensorFlow2/Recommendation/DLRM_and_DCNv2/preproc/split_dataset.py

@@ -69,7 +69,7 @@ def split_binary_file(
             numerical_f.write(numerical_features.astype(np.float16).tobytes())
 
             label = batch_data[:, 0]
-            label_f.write(label.astype(np.bool).tobytes())
+            label_f.write(label.astype(bool).tobytes())
 
             cat_offset = num_numerical_features + 1
             for cat_idx, cat_feature_type in enumerate(cat_feature_types):