diff --git "a/frameworks/vllm/0.16.0/0.16.0GPU\346\265\213\350\257\225.png" "b/frameworks/vllm/0.16.0/0.16.0GPU\346\265\213\350\257\225.png" new file mode 100644 index 0000000000000000000000000000000000000000..0f765458daa78bf43be16e54577a4fe322cef6ea Binary files /dev/null and "b/frameworks/vllm/0.16.0/0.16.0GPU\346\265\213\350\257\225.png" differ diff --git "a/frameworks/vllm/0.16.0/0.16.0\346\227\240GPU\346\265\213\350\257\225.png" "b/frameworks/vllm/0.16.0/0.16.0\346\227\240GPU\346\265\213\350\257\225.png" new file mode 100644 index 0000000000000000000000000000000000000000..f2b79c32f93e6fa035a9c011a93e7db4317c7a21 Binary files /dev/null and "b/frameworks/vllm/0.16.0/0.16.0\346\227\240GPU\346\265\213\350\257\225.png" differ diff --git "a/frameworks/vllm/0.16.0/0.16.0\351\225\234\345\203\217\346\236\204\345\273\272.png" "b/frameworks/vllm/0.16.0/0.16.0\351\225\234\345\203\217\346\236\204\345\273\272.png" new file mode 100644 index 0000000000000000000000000000000000000000..c380adfa473b314bda89b561dc75049522874221 Binary files /dev/null and "b/frameworks/vllm/0.16.0/0.16.0\351\225\234\345\203\217\346\236\204\345\273\272.png" differ diff --git a/frameworks/vllm/0.16.0/Dockerfile b/frameworks/vllm/0.16.0/Dockerfile new file mode 100644 index 0000000000000000000000000000000000000000..d18ba8d3967ea164aab9cec18fc161356a617985 --- /dev/null +++ b/frameworks/vllm/0.16.0/Dockerfile @@ -0,0 +1,37 @@ +# 基础镜像:OpenCloudOS 9 + CUDA 12.8 开发版,适用于需要 GPU 加速的框架 +FROM opencloudos/opencloudos9-cuda-devel:12.8 + +# 镜像元信息 +LABEL maintainer="pangxb666" +LABEL org.opencontainers.image.source="https://gitee.com/OpenCloudOS/ai-agent-container" +LABEL org.opencontainers.image.description="vLLM 0.16.0 (GPU) on OpenCloudOS 9" + +# 安装 Python 3.11 及 pip,安装完成后清理缓存以减小镜像体积 +RUN dnf install -y \ + python3 \ + python3-pip \ + python3-devel \ + gcc \ + gcc-c++ \ + && dnf clean all \ + && rm -rf /var/cache/yum/* + +# 使用清华镜像源安装 vLLM,--extra-index-url 确保拉取 CUDA 12.8 专用 torch wheel +RUN pip3 install --no-cache-dir vllm==0.16.0 \ + -i https://pypi.tuna.tsinghua.edu.cn/simple \ + --trusted-host pypi.tuna.tsinghua.edu.cn \ + --extra-index-url https://download.pytorch.org/whl/cu128 + +# 允许容器访问宿主机所有 GPU 设备 +ENV NVIDIA_VISIBLE_DEVICES=all +# OC9 将 pip 包安装到 lib64,用 ldconfig 批量注册所有 nvidia 动态库路径,避免 libnvshmem_host.so.3 等找不到的问题 +RUN find /usr/local/lib/python3.11/site-packages/nvidia \ + /usr/local/lib64/python3.11/site-packages/nvidia \ + -maxdepth 3 -name "*.so*" -exec dirname {} \; 2>/dev/null | sort -u \ + > /etc/ld.so.conf.d/nvidia-python.conf && ldconfig + +# 记录镜像构建时间,便于追溯 +RUN echo $(date +"%Y-%m-%dT%H:%M:%S%z") > /opencloudos_build_date.txt + +# 默认进入 Python 3.11 交互环境 +CMD ["python3"] diff --git a/frameworks/vllm/0.16.0/README.md b/frameworks/vllm/0.16.0/README.md new file mode 100644 index 0000000000000000000000000000000000000000..2c7a9f54efead1cac03f112e6a2044fa5538608d --- /dev/null +++ b/frameworks/vllm/0.16.0/README.md @@ -0,0 +1,41 @@ +# vLLM 0.16.0 on OpenCloudOS 9 + +## 基本信息 + +- **框架版本**:0.16.0 +- **基础镜像**:opencloudos/opencloudos9-cuda-devel:12.8 +- **Python 版本**:3.11 +- **CUDA 版本**:12.8 +- **开源地址**:https://github.com/vllm-project/vllm + +## 简介 + +vLLM 是高吞吐量、低延迟的大语言模型推理与服务引擎,支持 PagedAttention、连续批处理、OpenAI 兼容 API 等特性。 + +## 构建 + +```bash +docker build -t oc9-vllm:0.16.0 . +``` + +## 使用示例 + +```bash +# 验证版本 +docker run --rm oc9-vllm:0.16.0 \ + python3.11 -c "import importlib.metadata; print(importlib.metadata.version('vllm'))" + +# 运行基础测试(需要 GPU) +docker run --rm --gpus all oc9-vllm:0.16.0 bash /test.sh + +# 启动 OpenAI 兼容推理服务(需要 GPU + 模型) +docker run --rm --gpus all -p 8000:8000 oc9-vllm:0.16.0 \ + python3.11 -m vllm.entrypoints.openai.api_server \ + --model \ + --port 8000 +``` + +## 已知问题 + +- 安装依赖 torch+CUDA wheel,如网络受限需配置代理或使用离线 wheel +- 完整推理测试需在 GPU 实例上进行,并提前准备好模型文件 diff --git a/frameworks/vllm/0.16.0/build.conf b/frameworks/vllm/0.16.0/build.conf new file mode 100644 index 0000000000000000000000000000000000000000..cc8fbefad0c8357fc73145cd9d892865242eab59 --- /dev/null +++ b/frameworks/vllm/0.16.0/build.conf @@ -0,0 +1,4 @@ +# vllm 0.16.0 on OpenCloudOS 9 (GPU) +IMAGE_NAME=oc9-vllm +IMAGE_TAG=0.16.0 +GPU_TEST=true diff --git a/frameworks/vllm/0.16.0/test.sh b/frameworks/vllm/0.16.0/test.sh new file mode 100644 index 0000000000000000000000000000000000000000..3e2eeaac9cf6affc6fe6beb72cea02ae54f5f0f5 --- /dev/null +++ b/frameworks/vllm/0.16.0/test.sh @@ -0,0 +1,101 @@ +#!/bin/bash +set -e + +IMAGE="${1:?ERROR: 缺少镜像参数。用法: bash test.sh }" + +echo "=== vLLM 0.16.0 基础功能测试 ===" +echo "运行时间:$(date '+%Y-%m-%d %H:%M:%S')" +echo "" + +# ============================================================ +# 阶段一:CPU 安全检查(无需 GPU,镜像构建后即可验证) +# ============================================================ +echo "--- 阶段一:安装验证 ---" + +echo -n "检查 vllm 版本... " +sudo docker run --rm "$IMAGE" python3 -c " +import importlib.metadata +version = importlib.metadata.version('vllm') +print(version) +assert version == '0.16.0', f'期望 0.16.0,实际 {version}' +" && echo "✓ 通过" || { echo "✗ 失败"; exit 1; } + +echo -n "检查 LLM / SamplingParams 导入... " +sudo docker run --rm "$IMAGE" python3 -c " +from vllm import LLM, SamplingParams +print('ok') +" && echo "✓ 通过" || { echo "✗ 失败"; exit 1; } + +echo -n "检查 AsyncLLMEngine 导入... " +sudo docker run --rm "$IMAGE" python3 -c " +from vllm import AsyncLLMEngine +print('ok') +" && echo "✓ 通过" || { echo "✗ 失败"; exit 1; } + +echo -n "检查 EngineArgs 导入... " +sudo docker run --rm "$IMAGE" python3 -c " +from vllm.engine.arg_utils import EngineArgs +print('ok') +" && echo "✓ 通过" || { echo "✗ 失败"; exit 1; } + +echo -n "检查 OpenAI 兼容入口导入... " +sudo docker run --rm "$IMAGE" python3 -c " +from vllm.entrypoints.openai.chat_completion.serving import OpenAIServingChat +print('ok') +" && echo "✓ 通过" || { echo "✗ 失败"; exit 1; } + +echo -n "检查 SamplingParams 参数构造... " +sudo docker run --rm "$IMAGE" python3 -c " +from vllm import SamplingParams +params = SamplingParams(temperature=0.8, top_p=0.95, max_tokens=128) +assert params.temperature == 0.8 and params.max_tokens == 128 +print(f'temperature={params.temperature}, max_tokens={params.max_tokens}') +" && echo "✓ 通过" || { echo "✗ 失败"; exit 1; } + +echo -n "检查 torch CUDA 构建... " +sudo docker run --rm "$IMAGE" python3 -c " +import torch +assert torch.version.cuda is not None, 'torch 未使用 CUDA 构建' +print(f'torch={torch.__version__}, cuda={torch.version.cuda}') +" && echo "✓ 通过" || { echo "✗ 失败"; exit 1; } + +echo "" + +# ============================================================ +# 阶段二:GPU 运行时检查(需要 NVIDIA GPU 驱动) +# ============================================================ +echo "--- 阶段二:GPU 运行时检查 ---" + +if ! command -v nvidia-smi &>/dev/null; then + echo "⚠ 未检测到 nvidia-smi,跳过 GPU 测试(请在 GPU 实例上运行)" + echo "" + echo "=== 阶段一全部通过,阶段二已跳过(无 GPU 环境) ===" + exit 0 +fi + +echo -n "检查 nvidia-smi... " +sudo docker run --rm --gpus all "$IMAGE" nvidia-smi --query-gpu=name,driver_version,memory.total --format=csv,noheader \ + && echo "✓ 通过" || { echo "✗ 失败"; exit 1; } + +echo -n "检查 torch.cuda.is_available()... " +sudo docker run --rm --gpus all "$IMAGE" python3 -c " +import torch +assert torch.cuda.is_available(), 'torch 无法访问 GPU' +print(f'GPU 数量: {torch.cuda.device_count()}') +for i in range(torch.cuda.device_count()): + print(f' GPU {i}: {torch.cuda.get_device_name(i)}') +" && echo "✓ 通过" || { echo "✗ 失败"; exit 1; } + +echo -n "检查 vllm 平台探测... " +sudo docker run --rm --gpus all "$IMAGE" python3 -c " +from vllm.platforms import current_platform +print(f'平台: {type(current_platform).__name__}') +" && echo "✓ 通过" || { echo "✗ 失败"; exit 1; } + +echo "" +echo "--- 阶段三:完整推理测试 ---" +echo " 请使用项目根目录的 cuda_test.sh:" +echo " bash cuda_test.sh --vllm-version 0.16.0 --vllm-model " +echo "" + +echo "=== 所有测试通过 ===" diff --git "a/frameworks/vllm/0.17.0/0.17.0-vLLM \346\234\215\345\212\241\345\220\257\345\212\250\346\210\220\345\212\237\346\227\245\345\277\227.png" "b/frameworks/vllm/0.17.0/0.17.0-vLLM \346\234\215\345\212\241\345\220\257\345\212\250\346\210\220\345\212\237\346\227\245\345\277\227.png" new file mode 100644 index 0000000000000000000000000000000000000000..fcd172cb0206f673157294d9868efa666e487265 Binary files /dev/null and "b/frameworks/vllm/0.17.0/0.17.0-vLLM \346\234\215\345\212\241\345\220\257\345\212\250\346\210\220\345\212\237\346\227\245\345\277\227.png" differ diff --git "a/frameworks/vllm/0.17.0/0.17.0-\346\250\241\345\236\213API \345\257\271\350\257\235\346\265\213\350\257\225.png" "b/frameworks/vllm/0.17.0/0.17.0-\346\250\241\345\236\213API \345\257\271\350\257\235\346\265\213\350\257\225.png" new file mode 100644 index 0000000000000000000000000000000000000000..9f9f5567e9b731d9f89fdcd583bc6b7e54aa58a5 Binary files /dev/null and "b/frameworks/vllm/0.17.0/0.17.0-\346\250\241\345\236\213API \345\257\271\350\257\235\346\265\213\350\257\225.png" differ diff --git "a/frameworks/vllm/0.17.0/0.17.0GPU\346\265\213\350\257\225.png" "b/frameworks/vllm/0.17.0/0.17.0GPU\346\265\213\350\257\225.png" new file mode 100644 index 0000000000000000000000000000000000000000..d5a308b11be2f2b914457ffb026abae06d83a0a4 Binary files /dev/null and "b/frameworks/vllm/0.17.0/0.17.0GPU\346\265\213\350\257\225.png" differ diff --git "a/frameworks/vllm/0.17.0/0.17.0\346\227\240GPU\346\265\213\350\257\225.png" "b/frameworks/vllm/0.17.0/0.17.0\346\227\240GPU\346\265\213\350\257\225.png" new file mode 100644 index 0000000000000000000000000000000000000000..4424b6481b1cd39cf9ca743843e0628f2c28d2f0 Binary files /dev/null and "b/frameworks/vllm/0.17.0/0.17.0\346\227\240GPU\346\265\213\350\257\225.png" differ diff --git "a/frameworks/vllm/0.17.0/0.17.0\351\225\234\345\203\217\346\236\204\345\273\272.png" "b/frameworks/vllm/0.17.0/0.17.0\351\225\234\345\203\217\346\236\204\345\273\272.png" new file mode 100644 index 0000000000000000000000000000000000000000..3029d7964bb1ac9b884f0b949c637847c1498eb1 Binary files /dev/null and "b/frameworks/vllm/0.17.0/0.17.0\351\225\234\345\203\217\346\236\204\345\273\272.png" differ diff --git a/frameworks/vllm/0.17.0/Dockerfile b/frameworks/vllm/0.17.0/Dockerfile new file mode 100644 index 0000000000000000000000000000000000000000..1d6fd87fff520c511acba77476a212f429790bfe --- /dev/null +++ b/frameworks/vllm/0.17.0/Dockerfile @@ -0,0 +1,32 @@ +# 基础镜像:OpenCloudOS 9 + CUDA 12.8 开发版,适用于需要 GPU 加速的框架 +FROM opencloudos/opencloudos9-cuda-devel:12.8 + +# 镜像元信息 +LABEL maintainer="pangxb666" +LABEL org.opencontainers.image.source="https://gitee.com/OpenCloudOS/ai-agent-container" +LABEL org.opencontainers.image.description="vLLM 0.17.0 (GPU) on OpenCloudOS 9" + +# 安装 Python 3.11 及 pip,安装完成后清理缓存以减小镜像体积 +RUN dnf install -y \ + python3 \ + python3-pip \ + python3-devel \ + gcc \ + gcc-c++ \ + && dnf clean all \ + && rm -rf /var/cache/yum/* + +# 使用清华镜像源安装 vLLM,--extra-index-url 确保拉取 CUDA 12.8 专用 torch wheel +RUN pip3 install --no-cache-dir vllm==0.17.0 \ + -i https://pypi.tuna.tsinghua.edu.cn/simple \ + --trusted-host pypi.tuna.tsinghua.edu.cn \ + --extra-index-url https://download.pytorch.org/whl/cu128 + +# 允许容器访问宿主机所有 GPU 设备 +ENV NVIDIA_VISIBLE_DEVICES=all + +# 记录镜像构建时间,便于追溯 +RUN echo $(date +"%Y-%m-%dT%H:%M:%S%z") > /opencloudos_build_date.txt + +# 默认进入 Python 3.11 交互环境 +CMD ["python3"] diff --git a/frameworks/vllm/0.17.0/README.md b/frameworks/vllm/0.17.0/README.md new file mode 100644 index 0000000000000000000000000000000000000000..a48d1eb03894610189138dfa0746b797e278b0b8 --- /dev/null +++ b/frameworks/vllm/0.17.0/README.md @@ -0,0 +1,41 @@ +# vLLM 0.17.0 on OpenCloudOS 9 + +## 基本信息 + +- **框架版本**:0.17.0 +- **基础镜像**:opencloudos/opencloudos9-cuda-devel:12.8 +- **Python 版本**:3.11 +- **CUDA 版本**:12.8 +- **开源地址**:https://github.com/vllm-project/vllm + +## 简介 + +vLLM 是高吞吐量、低延迟的大语言模型推理与服务引擎,支持 PagedAttention、连续批处理、OpenAI 兼容 API 等特性。 + +## 构建 + +```bash +docker build -t oc9-vllm:0.17.0 . +``` + +## 使用示例 + +```bash +# 验证版本 +docker run --rm oc9-vllm:0.17.0 \ + python3.11 -c "import importlib.metadata; print(importlib.metadata.version('vllm'))" + +# 运行基础测试(需要 GPU) +docker run --rm --gpus all oc9-vllm:0.17.0 bash /test.sh + +# 启动 OpenAI 兼容推理服务(需要 GPU + 模型) +docker run --rm --gpus all -p 8000:8000 oc9-vllm:0.17.0 \ + python3.11 -m vllm.entrypoints.openai.api_server \ + --model \ + --port 8000 +``` + +## 已知问题 + +- 安装依赖 torch+CUDA wheel,如网络受限需配置代理或使用离线 wheel +- 完整推理测试需在 GPU 实例上进行,并提前准备好模型文件 diff --git a/frameworks/vllm/0.17.0/build.conf b/frameworks/vllm/0.17.0/build.conf new file mode 100644 index 0000000000000000000000000000000000000000..edd551dda36d82e7ed940707a611398ddd263e0f --- /dev/null +++ b/frameworks/vllm/0.17.0/build.conf @@ -0,0 +1,4 @@ +# vllm 0.17.0 on OpenCloudOS 9 (GPU) +IMAGE_NAME=oc9-vllm +IMAGE_TAG=0.17.0 +GPU_TEST=true diff --git a/frameworks/vllm/0.17.0/test.sh b/frameworks/vllm/0.17.0/test.sh new file mode 100644 index 0000000000000000000000000000000000000000..0f488aac55a6d5ca68191c631f1e01f32f499a2f --- /dev/null +++ b/frameworks/vllm/0.17.0/test.sh @@ -0,0 +1,101 @@ +#!/bin/bash +set -e + +IMAGE="${1:?ERROR: 缺少镜像参数。用法: bash test.sh }" + +echo "=== vLLM 0.17.0 基础功能测试 ===" +echo "运行时间:$(date '+%Y-%m-%d %H:%M:%S')" +echo "" + +# ============================================================ +# 阶段一:CPU 安全检查(无需 GPU,镜像构建后即可验证) +# ============================================================ +echo "--- 阶段一:安装验证 ---" + +echo -n "检查 vllm 版本... " +sudo docker run --rm "$IMAGE" python3 -c " +import importlib.metadata +version = importlib.metadata.version('vllm') +print(version) +assert version == '0.17.0', f'期望 0.17.0,实际 {version}' +" && echo "✓ 通过" || { echo "✗ 失败"; exit 1; } + +echo -n "检查 LLM / SamplingParams 导入... " +sudo docker run --rm "$IMAGE" python3 -c " +from vllm import LLM, SamplingParams +print('ok') +" && echo "✓ 通过" || { echo "✗ 失败"; exit 1; } + +echo -n "检查 AsyncLLMEngine 导入... " +sudo docker run --rm "$IMAGE" python3 -c " +from vllm import AsyncLLMEngine +print('ok') +" && echo "✓ 通过" || { echo "✗ 失败"; exit 1; } + +echo -n "检查 EngineArgs 导入... " +sudo docker run --rm "$IMAGE" python3 -c " +from vllm.engine.arg_utils import EngineArgs +print('ok') +" && echo "✓ 通过" || { echo "✗ 失败"; exit 1; } + +echo -n "检查 OpenAI 兼容入口导入... " +sudo docker run --rm "$IMAGE" python3 -c " +from vllm.entrypoints.openai.chat_completion.serving import OpenAIServingChat +print('ok') +" && echo "✓ 通过" || { echo "✗ 失败"; exit 1; } + +echo -n "检查 SamplingParams 参数构造... " +sudo docker run --rm "$IMAGE" python3 -c " +from vllm import SamplingParams +params = SamplingParams(temperature=0.8, top_p=0.95, max_tokens=128) +assert params.temperature == 0.8 and params.max_tokens == 128 +print(f'temperature={params.temperature}, max_tokens={params.max_tokens}') +" && echo "✓ 通过" || { echo "✗ 失败"; exit 1; } + +echo -n "检查 torch CUDA 构建... " +sudo docker run --rm "$IMAGE" python3 -c " +import torch +assert torch.version.cuda is not None, 'torch 未使用 CUDA 构建' +print(f'torch={torch.__version__}, cuda={torch.version.cuda}') +" && echo "✓ 通过" || { echo "✗ 失败"; exit 1; } + +echo "" + +# ============================================================ +# 阶段二:GPU 运行时检查(需要 NVIDIA GPU 驱动) +# ============================================================ +echo "--- 阶段二:GPU 运行时检查 ---" + +if ! command -v nvidia-smi &>/dev/null; then + echo "⚠ 未检测到 nvidia-smi,跳过 GPU 测试(请在 GPU 实例上运行)" + echo "" + echo "=== 阶段一全部通过,阶段二已跳过(无 GPU 环境) ===" + exit 0 +fi + +echo -n "检查 nvidia-smi... " +sudo docker run --rm --gpus all "$IMAGE" nvidia-smi --query-gpu=name,driver_version,memory.total --format=csv,noheader \ + && echo "✓ 通过" || { echo "✗ 失败"; exit 1; } + +echo -n "检查 torch.cuda.is_available()... " +sudo docker run --rm --gpus all "$IMAGE" python3 -c " +import torch +assert torch.cuda.is_available(), 'torch 无法访问 GPU' +print(f'GPU 数量: {torch.cuda.device_count()}') +for i in range(torch.cuda.device_count()): + print(f' GPU {i}: {torch.cuda.get_device_name(i)}') +" && echo "✓ 通过" || { echo "✗ 失败"; exit 1; } + +echo -n "检查 vllm 平台探测... " +sudo docker run --rm --gpus all "$IMAGE" python3 -c " +from vllm.platforms import current_platform +print(f'平台: {type(current_platform).__name__}') +" && echo "✓ 通过" || { echo "✗ 失败"; exit 1; } + +echo "" +echo "--- 阶段三:完整推理测试 ---" +echo " 请使用项目根目录的 cuda_test.sh:" +echo " bash cuda_test.sh --vllm-version 0.17.0 --vllm-model " +echo "" + +echo "=== 所有测试通过 ===" diff --git "a/frameworks/vllm/0.17.1/0.17.1GPU\346\265\213\350\257\225.png" "b/frameworks/vllm/0.17.1/0.17.1GPU\346\265\213\350\257\225.png" new file mode 100644 index 0000000000000000000000000000000000000000..acb2b66eea236b08b0eaea96615420470d449986 Binary files /dev/null and "b/frameworks/vllm/0.17.1/0.17.1GPU\346\265\213\350\257\225.png" differ diff --git "a/frameworks/vllm/0.17.1/0.17.1\346\227\240GPU\346\265\213\350\257\225.png" "b/frameworks/vllm/0.17.1/0.17.1\346\227\240GPU\346\265\213\350\257\225.png" new file mode 100644 index 0000000000000000000000000000000000000000..610089807c442ef8e94f0d73513ee07fe48b24ba Binary files /dev/null and "b/frameworks/vllm/0.17.1/0.17.1\346\227\240GPU\346\265\213\350\257\225.png" differ diff --git "a/frameworks/vllm/0.17.1/0.17.1\351\225\234\345\203\217\346\236\204\345\273\272.png" "b/frameworks/vllm/0.17.1/0.17.1\351\225\234\345\203\217\346\236\204\345\273\272.png" new file mode 100644 index 0000000000000000000000000000000000000000..d2723cac295474c75eae42648c8674fd231ca01e Binary files /dev/null and "b/frameworks/vllm/0.17.1/0.17.1\351\225\234\345\203\217\346\236\204\345\273\272.png" differ diff --git a/frameworks/vllm/0.17.1/Dockerfile b/frameworks/vllm/0.17.1/Dockerfile new file mode 100644 index 0000000000000000000000000000000000000000..0898ab69536ffa7a9cf7e87fd5f630817d6436d8 --- /dev/null +++ b/frameworks/vllm/0.17.1/Dockerfile @@ -0,0 +1,32 @@ +# 基础镜像:OpenCloudOS 9 + CUDA 12.8 开发版,适用于需要 GPU 加速的框架 +FROM opencloudos/opencloudos9-cuda-devel:12.8 + +# 镜像元信息 +LABEL maintainer="pangxb666" +LABEL org.opencontainers.image.source="https://gitee.com/OpenCloudOS/ai-agent-container" +LABEL org.opencontainers.image.description="vLLM 0.17.1 (GPU) on OpenCloudOS 9" + +# 安装 Python 3.11 及 pip,安装完成后清理缓存以减小镜像体积 +RUN dnf install -y \ + python3 \ + python3-pip \ + python3-devel \ + gcc \ + gcc-c++ \ + && dnf clean all \ + && rm -rf /var/cache/yum/* + +# 使用清华镜像源安装 vLLM,--extra-index-url 确保拉取 CUDA 12.8 专用 torch wheel +RUN pip3 install --no-cache-dir vllm==0.17.1 \ + -i https://pypi.tuna.tsinghua.edu.cn/simple \ + --trusted-host pypi.tuna.tsinghua.edu.cn \ + --extra-index-url https://download.pytorch.org/whl/cu128 + +# 允许容器访问宿主机所有 GPU 设备 +ENV NVIDIA_VISIBLE_DEVICES=all + +# 记录镜像构建时间,便于追溯 +RUN echo $(date +"%Y-%m-%dT%H:%M:%S%z") > /opencloudos_build_date.txt + +# 默认进入 Python 3.11 交互环境 +CMD ["python3"] diff --git a/frameworks/vllm/0.17.1/README.md b/frameworks/vllm/0.17.1/README.md new file mode 100644 index 0000000000000000000000000000000000000000..41574244ccb7bec0bce73c92a65530a4b06235a3 --- /dev/null +++ b/frameworks/vllm/0.17.1/README.md @@ -0,0 +1,41 @@ +# vLLM 0.17.1 on OpenCloudOS 9 + +## 基本信息 + +- **框架版本**:0.17.1 +- **基础镜像**:opencloudos/opencloudos9-cuda-devel:12.8 +- **Python 版本**:3.11 +- **CUDA 版本**:12.8 +- **开源地址**:https://github.com/vllm-project/vllm + +## 简介 + +vLLM 是高吞吐量、低延迟的大语言模型推理与服务引擎,支持 PagedAttention、连续批处理、OpenAI 兼容 API 等特性。 + +## 构建 + +```bash +docker build -t oc9-vllm:0.17.1 . +``` + +## 使用示例 + +```bash +# 验证版本 +docker run --rm oc9-vllm:0.17.1 \ + python3.11 -c "import importlib.metadata; print(importlib.metadata.version('vllm'))" + +# 运行基础测试(需要 GPU) +docker run --rm --gpus all oc9-vllm:0.17.1 bash /test.sh + +# 启动 OpenAI 兼容推理服务(需要 GPU + 模型) +docker run --rm --gpus all -p 8000:8000 oc9-vllm:0.17.1 \ + python3.11 -m vllm.entrypoints.openai.api_server \ + --model \ + --port 8000 +``` + +## 已知问题 + +- 安装依赖 torch+CUDA wheel,如网络受限需配置代理或使用离线 wheel +- 完整推理测试需在 GPU 实例上进行,并提前准备好模型文件 diff --git a/frameworks/vllm/0.17.1/build.conf b/frameworks/vllm/0.17.1/build.conf new file mode 100644 index 0000000000000000000000000000000000000000..3d14035ea2b73b8ee0687cf599b3d1a355094cf2 --- /dev/null +++ b/frameworks/vllm/0.17.1/build.conf @@ -0,0 +1,4 @@ +# vllm 0.17.1 on OpenCloudOS 9 (GPU) +IMAGE_NAME=oc9-vllm +IMAGE_TAG=0.17.1 +GPU_TEST=true diff --git a/frameworks/vllm/0.17.1/test.sh b/frameworks/vllm/0.17.1/test.sh new file mode 100644 index 0000000000000000000000000000000000000000..23abad8136b08e0a8d1a3eec5a445c939d3a926a --- /dev/null +++ b/frameworks/vllm/0.17.1/test.sh @@ -0,0 +1,101 @@ +#!/bin/bash +set -e + +IMAGE="${1:?ERROR: 缺少镜像参数。用法: bash test.sh }" + +echo "=== vLLM 0.17.1 基础功能测试 ===" +echo "运行时间:$(date '+%Y-%m-%d %H:%M:%S')" +echo "" + +# ============================================================ +# 阶段一:CPU 安全检查(无需 GPU,镜像构建后即可验证) +# ============================================================ +echo "--- 阶段一:安装验证 ---" + +echo -n "检查 vllm 版本... " +sudo docker run --rm "$IMAGE" python3 -c " +import importlib.metadata +version = importlib.metadata.version('vllm') +print(version) +assert version == '0.17.1', f'期望 0.17.1,实际 {version}' +" && echo "✓ 通过" || { echo "✗ 失败"; exit 1; } + +echo -n "检查 LLM / SamplingParams 导入... " +sudo docker run --rm "$IMAGE" python3 -c " +from vllm import LLM, SamplingParams +print('ok') +" && echo "✓ 通过" || { echo "✗ 失败"; exit 1; } + +echo -n "检查 AsyncLLMEngine 导入... " +sudo docker run --rm "$IMAGE" python3 -c " +from vllm import AsyncLLMEngine +print('ok') +" && echo "✓ 通过" || { echo "✗ 失败"; exit 1; } + +echo -n "检查 EngineArgs 导入... " +sudo docker run --rm "$IMAGE" python3 -c " +from vllm.engine.arg_utils import EngineArgs +print('ok') +" && echo "✓ 通过" || { echo "✗ 失败"; exit 1; } + +echo -n "检查 OpenAI 兼容入口导入... " +sudo docker run --rm "$IMAGE" python3 -c " +from vllm.entrypoints.openai.chat_completion.serving import OpenAIServingChat +print('ok') +" && echo "✓ 通过" || { echo "✗ 失败"; exit 1; } + +echo -n "检查 SamplingParams 参数构造... " +sudo docker run --rm "$IMAGE" python3 -c " +from vllm import SamplingParams +params = SamplingParams(temperature=0.8, top_p=0.95, max_tokens=128) +assert params.temperature == 0.8 and params.max_tokens == 128 +print(f'temperature={params.temperature}, max_tokens={params.max_tokens}') +" && echo "✓ 通过" || { echo "✗ 失败"; exit 1; } + +echo -n "检查 torch CUDA 构建... " +sudo docker run --rm "$IMAGE" python3 -c " +import torch +assert torch.version.cuda is not None, 'torch 未使用 CUDA 构建' +print(f'torch={torch.__version__}, cuda={torch.version.cuda}') +" && echo "✓ 通过" || { echo "✗ 失败"; exit 1; } + +echo "" + +# ============================================================ +# 阶段二:GPU 运行时检查(需要 NVIDIA GPU 驱动) +# ============================================================ +echo "--- 阶段二:GPU 运行时检查 ---" + +if ! command -v nvidia-smi &>/dev/null; then + echo "⚠ 未检测到 nvidia-smi,跳过 GPU 测试(请在 GPU 实例上运行)" + echo "" + echo "=== 阶段一全部通过,阶段二已跳过(无 GPU 环境) ===" + exit 0 +fi + +echo -n "检查 nvidia-smi... " +sudo docker run --rm --gpus all "$IMAGE" nvidia-smi --query-gpu=name,driver_version,memory.total --format=csv,noheader \ + && echo "✓ 通过" || { echo "✗ 失败"; exit 1; } + +echo -n "检查 torch.cuda.is_available()... " +sudo docker run --rm --gpus all "$IMAGE" python3 -c " +import torch +assert torch.cuda.is_available(), 'torch 无法访问 GPU' +print(f'GPU 数量: {torch.cuda.device_count()}') +for i in range(torch.cuda.device_count()): + print(f' GPU {i}: {torch.cuda.get_device_name(i)}') +" && echo "✓ 通过" || { echo "✗ 失败"; exit 1; } + +echo -n "检查 vllm 平台探测... " +sudo docker run --rm --gpus all "$IMAGE" python3 -c " +from vllm.platforms import current_platform +print(f'平台: {type(current_platform).__name__}') +" && echo "✓ 通过" || { echo "✗ 失败"; exit 1; } + +echo "" +echo "--- 阶段三:完整推理测试 ---" +echo " 请使用项目根目录的 cuda_test.sh:" +echo " bash cuda_test.sh --vllm-version 0.17.1 --vllm-model " +echo "" + +echo "=== 所有测试通过 ==="