awslabs · e-davidson · Jul 26, 2024 · Jul 26, 2024 · Jul 26, 2024 · Jul 26, 2024
@@ -3,10 +3,11 @@ FROM alpine AS tgi
 RUN mkdir -p /tgi
 ADD https://github.com/huggingface/text-generation-inference/archive/refs/tags/v2.0.2.tar.gz /tgi/sources.tar.gz
 RUN tar -C /tgi -xf /tgi/sources.tar.gz --strip-components=1
+RUN sed -i 's/1.75.0/1.79.0/g' /tgi/rust-toolchain.toml 
 
 # Build cargo components (adapted from TGI original Dockerfile)
 # Note that the build image is aligned on the same Linux version as the base image (Debian bookworm/ Ubuntu 22.04)
-FROM lukemathwalker/cargo-chef:latest-rust-1.75-bookworm AS chef
+FROM lukemathwalker/cargo-chef:latest-rust-1.79-bookworm AS chef
 WORKDIR /usr/src
 
 ARG CARGO_REGISTRIES_CRATES_IO_PROTOCOL=sparse
@@ -37,6 +38,9 @@ COPY --from=tgi /tgi/proto proto
 COPY --from=tgi /tgi/benchmark benchmark
 COPY --from=tgi /tgi/router router
 COPY --from=tgi /tgi/launcher launcher
+RUN rustc --version
+RUN rustup update
+RUN rustc --version
 RUN cargo build --release --workspace --exclude benchmark
 
 # Fetch optimum-neuron sources
@@ -161,4 +165,4 @@ RUN echo "N.B.: Although this image is released under the Apache-2.0 License, th
 
 LABEL dlc_major_version="1"
 LABEL com.amazonaws.ml.engines.sagemaker.dlc.framework.huggingface.tgi="true"
-LABEL com.amazonaws.sagemaker.capabilities.accept-bind-to-port="true"
+LABEL com.amazonaws.sagemaker.capabilities.accept-bind-to-port="true"
@@ -48,7 +48,7 @@
             {
                 "device": "inf2",
                 "min_version": "0.0.22",
-                "max_version": "0.0.23",
+                "max_version": "0.0.22",
                 "os_version": "ubuntu22.04",
                 "python_version": "py310",
                 "pytorch_version": "2.1.2"
@@ -79,12 +79,11 @@
     "releases": [
         {
             "framework": "TGI",
-            "device": "gpu",
-            "version": "2.2.0",
+            "device": "inf2",
+            "version": "0.0.22",
             "os_version": "ubuntu22.04",
-            "cuda_version": "cu121",
             "python_version": "py310",
-            "pytorch_version": "2.3.0"
+            "pytorch_version": "2.1.2"
         }
     ]
 }
@@ -81,7 +81,7 @@ def get_models_for_image(image_type, device_type):
                 ("google/flan-t5-xxl", None, "ml.g5.12xlarge"),
             ]
         elif device_type == "inf2":
-            return [ ("princeton-nlp/Sheared-LLaMA-1.3B", None, "ml.inf2.xlarge") ]
+            return [ ("HuggingFaceTB/cosmo-1b", None, "ml.inf2.8xlarge") ]
         else:
             raise ValueError(f"No testing models found for {image_type} on instance {device_type}. "
                              f"please check whether the image_type and instance_type are supported.")