filter_dedup.sh 332 B

12345678910111213
  1. #!/bin/usr/env sh
  2. # Copyright (c) 2018-present, Facebook, Inc.
  3. # All rights reserved.
  4. #
  5. # This source code is licensed under the MIT license found in the
  6. # LICENSE file in the root directory of this source tree.
  7. set -e
  8. LG=$(basename --suffix=".txt" "${1}")
  9. ./filter_utf8 < "shard/${LG}.txt" \
  10. | ./dedup > "shard/${LG}.dedup"