diff --git a/frameworks/llamafactory/0.9.2/Dockerfile b/frameworks/llamafactory/0.9.2/Dockerfile new file mode 100644 index 0000000000000000000000000000000000000000..d133396378ca1068d21f8da481c981f6646cd441 --- /dev/null +++ b/frameworks/llamafactory/0.9.2/Dockerfile @@ -0,0 +1,82 @@ +# syntax=docker/dockerfile:1.6 +# +# Dockerfile for LLaMA-Factory 0.9.2 +# -------------------------------------------------------------------- +# Base image: OpenCloudOS 9 (RHEL 9 compatible) + CUDA 12.8 devel +# Python : 3.11 (pre-installed in the base image) +# PyTorch : 2.6.0 + cu128 (matches LLaMA-Factory 0.9.2 requirements) +# -------------------------------------------------------------------- + +ARG CUDA_IMAGE=opencloudos/opencloudos9-cuda-devel:12.8 +FROM ${CUDA_IMAGE} AS base + +LABEL maintainer="harrywu0913" +LABEL org.opencontainers.image.source="https://gitee.com/OpenCloudOS/ai-agent-container" +LABEL org.opencontainers.image.description="LLaMA-Factory (GPU) on OpenCloudOS 9" + +# ---- Build-time arguments (override with --build-arg) -------------- +ARG LLAMA_FACTORY_VERSION=0.9.2 +ARG TORCH_VERSION=2.11.0 +ARG TORCH_CUDA=cu128 +ARG PYTHON_VERSION=3.11 +ARG APP_USER=llama +ARG APP_UID=1000 +ARG APP_GID=1000 + +# ---- Runtime environment ------------------------------------------ +ENV LANG=C.UTF-8 \ + LC_ALL=C.UTF-8 \ + PYTHONDONTWRITEBYTECODE=1 \ + PYTHONUNBUFFERED=1 \ + PIP_NO_CACHE_DIR=1 \ + PIP_DISABLE_PIP_VERSION_CHECK=1 \ + HF_HOME=/workspace/.cache/huggingface \ + TRANSFORMERS_CACHE=/workspace/.cache/huggingface/transformers \ + TORCH_HOME=/workspace/.cache/torch \ + NVIDIA_VISIBLE_DEVICES=all \ + NVIDIA_DRIVER_CAPABILITIES=compute,utility + +# ---- System packages ---------------------------------------------- +# The base image (OpenCloudOS 9) already ships Python 3.11 and pip3, +# so we only install the build toolchain, VCS, and init helper via dnf. +# `tini` lives in EPEL on EL9, so we enable epel-release first. +RUN dnf install -y --setopt=install_weak_deps=False \ + git \ + curl \ + ca-certificates \ + tini \ + && ln -sf /usr/bin/python${PYTHON_VERSION} /usr/bin/python \ + && dnf clean all \ + && rm -rf /var/cache/dnf + +# ---- Non-root user ------------------------------------------------- +RUN groupadd --gid ${APP_GID} ${APP_USER} \ + && useradd --uid ${APP_UID} --gid ${APP_GID} --create-home --shell /bin/bash ${APP_USER} \ + && mkdir -p /workspace /workspace/.cache \ + && chown -R ${APP_USER}:${APP_USER} /workspace + +WORKDIR /workspace + +RUN python -m pip install --no-cache-dir\ + --index-url https://download.pytorch.org/whl/${TORCH_CUDA} \ + torch==${TORCH_VERSION} + +# Install LLaMA-Factory with the common extras: +# torch - torch-related metrics +# metrics - rouge/bleu/etc. +RUN python -m pip install --no-cache-dir\ + pydantic==2.10.6 \ + llamafactory[torch,metrics]==${LLAMA_FACTORY_VERSION} + +# ---- Final wiring -------------------------------------------------- +USER ${APP_USER} +ENV PATH="/home/${APP_USER}/.local/bin:${PATH}" + +# Healthcheck: import the library; fails fast if the install broke. +HEALTHCHECK --interval=30s --timeout=10s --start-period=20s --retries=3 \ + CMD python -c "import llamafactory; print(llamafactory.__version__)" || exit 1 + +# tini as PID 1 to reap zombies and forward signals cleanly. +ENTRYPOINT ["/usr/bin/tini", "--"] +CMD ["llamafactory-cli", "help"] + diff --git a/frameworks/llamafactory/0.9.2/README.md b/frameworks/llamafactory/0.9.2/README.md new file mode 100644 index 0000000000000000000000000000000000000000..23a11a07161129f247efc2ae3444f9acd6c0be2a --- /dev/null +++ b/frameworks/llamafactory/0.9.2/README.md @@ -0,0 +1,43 @@ +# LLaMA-Factory on OpenCloudOS 9 + +## 基本信息 +- **框架版本**:v0.9.2 +- **基础镜像**:opencloudos/opencloudos9-cuda-devel:12.8 +- **Python 版本**:3.11 +- **CUDA 版本**:12.8 + +## 构建 + +```bash +docker build -t oc9-llamafactory:0.9.2 . +``` + +## 使用示例 + +```bash +docker run --rm oc9-llamafactory:0.9.2 \ + python -c "import llamafactory; print(llamafactory.__version__)" +``` + +启动 WebUI(需要 GPU): + +```bash +docker run --rm --gpus all -p 7860:7860 \ + -v "$PWD/data:/workspace/data" \ + -v "$PWD/output:/workspace/output" \ + -e GRADIO_SERVER_NAME=0.0.0.0 \ + -e HF_TOKEN="${HF_TOKEN}" \ + oc9-llamafactory:0.9.2 \ + llamafactory-cli webui +``` + +构建后验证: + +```bash +chmod +x test.sh +./test.sh "oc9-llamafactory:0.9.2" +``` + +成功运行截图见 `screenshots/test-success.png`。 + + diff --git a/frameworks/llamafactory/0.9.2/build.conf b/frameworks/llamafactory/0.9.2/build.conf new file mode 100644 index 0000000000000000000000000000000000000000..0ed8a0da45d6581570d7e1efeded6153bac9ae95 --- /dev/null +++ b/frameworks/llamafactory/0.9.2/build.conf @@ -0,0 +1,4 @@ +# Llama-Factory 0.9.2 on OpenCloudOS 9 (GPU) +IMAGE_NAME=oc9-llamafactory +IMAGE_TAG=0.9.2 +GPU_TEST=true \ No newline at end of file diff --git a/frameworks/llamafactory/0.9.2/test-success.png b/frameworks/llamafactory/0.9.2/test-success.png new file mode 100644 index 0000000000000000000000000000000000000000..461b8f057cb245ac14885912ed9714d2468df645 Binary files /dev/null and b/frameworks/llamafactory/0.9.2/test-success.png differ diff --git a/frameworks/llamafactory/0.9.2/test.sh b/frameworks/llamafactory/0.9.2/test.sh new file mode 100755 index 0000000000000000000000000000000000000000..dd7dee1829e0a1ad6b90cff1da24c8cf6989f2ac --- /dev/null +++ b/frameworks/llamafactory/0.9.2/test.sh @@ -0,0 +1,90 @@ +#!/bin/bash +# --------------------------------------------------------------- +# test.sh - Post-build verification for oc9-llamafactory:0.9.2 +# +# Usage: +# ./test.sh [IMAGE_TAG] +# +# Exit code: 0 on success, non-zero on any failed check. +# --------------------------------------------------------------- +set -euo pipefail + +IMAGE="${1:-oc9-llamafactory:0.9.2}" +EXPECTED_LF_VERSION="0.9.2" +EXPECTED_TORCH_MAJOR="2" + +pass() { printf " \033[32m[✓ PASS]\033[0m %s\n" "$1"; } +fail() { printf " \033[31m[✗ FAIL]\033[0m %s\n" "$1"; exit 1; } +info() { printf "\033[34m==>\033[0m %s\n" "$1"; } + +# --- 0. Prerequisites ------------------------------------------ +info "Checking prerequisites" +command -v docker >/dev/null 2>&1 || fail "docker is not installed" +pass "docker binary found: $(docker --version)" + +# --- 1. Image exists ------------------------------------------- +info "Checking image '${IMAGE}' exists" +if ! docker image inspect "${IMAGE}" >/dev/null 2>&1; then + fail "image '${IMAGE}' not found. Build it first: docker build -t ${IMAGE} ." +fi +pass "image is present locally" + +# Detect whether the host exposes a GPU to Docker. +GPU_FLAG="" +if docker run --rm --gpus all "${IMAGE}" true >/dev/null 2>&1; then + GPU_FLAG="--gpus all" + info "GPU detected - running full test matrix" +else + info "No GPU available - running CPU-only subset" +fi + +# --- 2. Python & framework versions ---------------------------- +info "Verifying Python / PyTorch / LLaMA-Factory versions" +VERS_OUTPUT="$(docker run --rm -i ${GPU_FLAG} "${IMAGE}" python - <<'PY' +import sys, torch, llamafactory +print(f"python={sys.version.split()[0]}") +print(f"torch={torch.__version__}") +print(f"llamafactory={llamafactory.__version__}") +print(f"cuda_available={torch.cuda.is_available()}") +PY +)" +echo "${VERS_OUTPUT}" | sed 's/^/ /' + +echo "${VERS_OUTPUT}" | grep -q "^python=3\.11" \ + && pass "python 3.11 OK" \ + || fail "unexpected python version" + +echo "${VERS_OUTPUT}" | grep -q "^torch=${EXPECTED_TORCH_MAJOR}\." \ + && pass "torch ${EXPECTED_TORCH_MAJOR}.x OK" \ + || fail "unexpected torch version" + +echo "${VERS_OUTPUT}" | grep -q "^llamafactory=${EXPECTED_LF_VERSION}" \ + && pass "llamafactory ${EXPECTED_LF_VERSION} OK" \ + || fail "unexpected llamafactory version" + +# --- 3. CUDA visibility (GPU hosts only) ----------------------- +if [[ -n "${GPU_FLAG}" ]]; then + info "Verifying CUDA is visible to PyTorch" + echo "${VERS_OUTPUT}" | grep -q "^cuda_available=True" \ + && pass "torch.cuda.is_available() == True" \ + || fail "CUDA not visible inside container" +fi + +# --- 4. CLI smoke test ----------------------------------------- +info "Running 'llamafactory-cli help'" +if docker run --rm ${GPU_FLAG} "${IMAGE}" llamafactory-cli help >/dev/null 2>&1; then + pass "llamafactory-cli help exited 0" +else + fail "llamafactory-cli help failed" +fi + +# --- 5. Non-root user ------------------------------------------ +info "Verifying the container runs as non-root" +WHOAMI="$(docker run --rm ${GPU_FLAG} "${IMAGE}" whoami)" +[[ "${WHOAMI}" == "llama" ]] \ + && pass "runtime user is '${WHOAMI}'" \ + || fail "expected 'llama', got '${WHOAMI}'" + + +printf "\n\033[32mAll checks passed for %s\033[0m\n" "${IMAGE}" +