Dockerfile_spark 1.9 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344
  1. # Copyright (c) 2020 NVIDIA CORPORATION. All rights reserved.
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. ARG FROM_IMAGE_NAME=nvcr.io/nvidia/cuda:10.2-cudnn8-runtime-ubuntu18.04
  15. FROM ${FROM_IMAGE_NAME}
  16. RUN apt update && \
  17. apt install -y openjdk-8-jdk && \
  18. apt install -y curl && \
  19. curl https://archive.apache.org/dist/spark/spark-3.0.1/spark-3.0.1-bin-hadoop3.2.tgz -o /opt/spark.tgz && \
  20. tar zxf /opt/spark.tgz -C /opt/ && \
  21. mv /opt/spark-3.0.1-bin-hadoop3.2 /opt/spark && \
  22. rm /opt/spark.tgz && \
  23. curl https://repo1.maven.org/maven2/ai/rapids/cudf/0.14/cudf-0.14-cuda10-2.jar -o /opt/cudf.jar && \
  24. curl https://repo1.maven.org/maven2/com/nvidia/rapids-4-spark_2.12/0.1.0/rapids-4-spark_2.12-0.1.0.jar -o /opt/rapids-4-spark.jar && \
  25. apt install -y git
  26. ADD requirements.txt .
  27. RUN apt install -y python3-pip && python3 -m pip install --upgrade pip && pip3 install -r requirements.txt
  28. WORKDIR /workspace/dlrm
  29. COPY . .
  30. RUN mv /opt/cudf.jar /opt/spark/jars && \
  31. mv /opt/rapids-4-spark.jar /opt/spark/jars/ && \
  32. mv /workspace/dlrm/preproc/gpu/get_gpu_resources.sh /opt/spark/conf/ && \
  33. mv /workspace/dlrm/preproc/gpu/spark-defaults.conf /opt/spark/conf/ && \
  34. rm -fr /workspace/dlrm/preproc/gpu
  35. RUN chmod +x /opt/spark/conf/get_gpu_resources.sh
  36. RUN /bin/bash -c "echo export PYSPARK_PYTHON=/usr/bin/python3 >> /etc/bash.bashrc; update-alternatives --install /usr/bin/python python /usr/bin/python3 10"