فهرست منبع

[ELECTRA/TF2] Fixed build issue of tokenizer on arm

Bobby Chen 3 سال پیش
والد
کامیت
cbeb6b9bc2
1فایلهای تغییر یافته به همراه1 افزوده شده و 37 حذف شده
  1. 1 37
      TensorFlow2/LanguageModeling/ELECTRA/Dockerfile

+ 1 - 37
TensorFlow2/LanguageModeling/ELECTRA/Dockerfile

@@ -13,45 +13,9 @@
 # limitations under the License.
 
 ARG FROM_IMAGE_NAME=nvcr.io/nvidia/tensorflow:20.07-tf2-py3
-
-######
-# Tokenizers is only available pre-built on x86
-#
-FROM ${FROM_IMAGE_NAME} AS tokenizers_amd64
-WORKDIR /wheelhouse
-RUN pip download tokenizers==0.7.0
-
-FROM quay.io/pypa/manylinux2014_aarch64 as tokenizers_arm64
-ARG PYVER=38
-RUN yum install -y openssl-devel
-RUN curl https://sh.rustup.rs -sSf | sh -s -- --default-toolchain nightly-2019-11-01 -y
-ENV PATH="/root/.cargo/bin:$PATH"
-ENV PYBIN=/opt/python/cp${PYVER}-cp${PYVER}/bin
-ENV PYTHON_SYS_EXECUTABLE="$PYBIN/python"
-RUN git clone -b python-v0.8.0 https://github.com/huggingface/tokenizers.git /opt/tokenizers
-WORKDIR /opt/tokenizers/bindings/python
-RUN "${PYBIN}/pip" install setuptools-rust \
- && "${PYBIN}/python" setup.py bdist_wheel \
- && rm -rf build/* \
- && for whl in dist/*.whl; do \
-        auditwheel repair "$whl" -w dist/; \
-    done \
- && rm dist/*-linux_* \
- && mkdir -p /wheelhouse \
- && mv dist/*.whl /wheelhouse
-
-ARG TARGETARCH
-FROM tokenizers_${TARGETARCH} AS tokenizers
-#
-#####
-
-
 FROM ${FROM_IMAGE_NAME}
 RUN apt-get update && apt-get install -y pbzip2 pv bzip2 cabextract
 
-RUN --mount=from=tokenizers,source=/wheelhouse,target=/tmp/wheelhouse \
-    pip install --no-cache-dir /tmp/wheelhouse/tokenizers*.whl
-
 ENV DATA_PREP_WORKING_DIR /workspace/electra/data
 WORKDIR /workspace
 RUN git clone https://github.com/attardi/wikiextractor.git && cd wikiextractor && git checkout 6408a430fc504a38b04d37ce5e7fc740191dee16 && cd ..
@@ -61,7 +25,7 @@ WORKDIR /workspace/electra
 
 RUN pip install --no-cache-dir tqdm boto3 requests six ipdb h5py nltk progressbar filelock  \
  git+https://github.com/NVIDIA/dllogger \
- nvidia-ml-py3==7.352.0
+ nvidia-ml-py3==7.352.0 tokenizers==0.11.0
 
 RUN apt-get install -y iputils-ping
 COPY . .