Dockerfile 2.0 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647
  1. # Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. ARG FROM_IMAGE_NAME=nvcr.io/nvidia/tensorflow:20.10-tf1-py3
  15. FROM ${FROM_IMAGE_NAME}
  16. USER root
  17. # Spark dependencies
  18. ENV APACHE_SPARK_VERSION 2.3.1
  19. ENV HADOOP_VERSION 2.7
  20. RUN apt-get -y update && \
  21. apt-get install --no-install-recommends -y openjdk-8-jre-headless ca-certificates-java && \
  22. apt-get clean && \
  23. rm -rf /var/lib/apt/lists/*
  24. RUN cd /tmp && \
  25. wget -q http://archive.apache.org/dist/spark/spark-${APACHE_SPARK_VERSION}/spark-${APACHE_SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}.tgz && \
  26. echo "DC3A97F3D99791D363E4F70A622B84D6E313BD852F6FDBC777D31EAB44CBC112CEEAA20F7BF835492FB654F48AE57E9969F93D3B0E6EC92076D1C5E1B40B4696 *spark-${APACHE_SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}.tgz" | sha512sum -c - && \
  27. tar xzf spark-${APACHE_SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}.tgz -C /usr/local --owner root --group root --no-same-owner && \
  28. rm spark-${APACHE_SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}.tgz
  29. RUN cd /usr/local && ln -s spark-${APACHE_SPARK_VERSION}-bin-hadoop${HADOOP_VERSION} spark
  30. # Spark config
  31. ENV SPARK_HOME /usr/local/spark
  32. ENV PYTHONPATH $SPARK_HOME/python:$SPARK_HOME/python/lib/py4j-0.10.7-src.zip
  33. ENV SPARK_OPTS --driver-java-options=-Xms1024M --driver-java-options=-Xmx4096M --driver-java-options=-Dlog4j.logLevel=info
  34. COPY requirements* ./
  35. RUN pip install --no-cache-dir -r requirements.txt
  36. RUN pip install --no-cache-dir --no-deps -r requirements-no-deps.txt
  37. WORKDIR /wd
  38. COPY . .