diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..1408b1c --- /dev/null +++ b/.gitignore @@ -0,0 +1,3 @@ +.env +.venv + diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..1408b1c --- /dev/null +++ b/.gitignore @@ -0,0 +1,3 @@ +.env +.venv + diff --git a/README.md b/README.md new file mode 100644 index 0000000..0da2acd --- /dev/null +++ b/README.md @@ -0,0 +1,24 @@ +# Docker For Kotoba Whisper v2.2 + +## build +1. HF_TOKEN を設定する。 +``` +echo "HF_TOKEN=" > .env + +Ex) +echo "HF_TOKEN=hf_xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx" > .env +``` + +2. ビルドする。 +``` +docker-compose build +``` + +## run +``` +docker-compose up -d +``` + +## Access +curl -X POST http://127.0.0.1:50022/transcribe -F "file=@sample.wav" + diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..1408b1c --- /dev/null +++ b/.gitignore @@ -0,0 +1,3 @@ +.env +.venv + diff --git a/README.md b/README.md new file mode 100644 index 0000000..0da2acd --- /dev/null +++ b/README.md @@ -0,0 +1,24 @@ +# Docker For Kotoba Whisper v2.2 + +## build +1. HF_TOKEN を設定する。 +``` +echo "HF_TOKEN=" > .env + +Ex) +echo "HF_TOKEN=hf_xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx" > .env +``` + +2. ビルドする。 +``` +docker-compose build +``` + +## run +``` +docker-compose up -d +``` + +## Access +curl -X POST http://127.0.0.1:50022/transcribe -F "file=@sample.wav" + diff --git a/docker-compose.yml b/docker-compose.yml new file mode 100644 index 0000000..c7609ba --- /dev/null +++ b/docker-compose.yml @@ -0,0 +1,19 @@ +version: '3' +services: + kotoba-whisper: + build: + context: ./kotoba-whisper + dockerfile: Dockerfile + args: + HF_TOKEN: ${HF_TOKEN} + image: kotoba-whisper:nvidia-latest + ports: + - 0.0.0.0:50022:50022 + tty: true + deploy: + resources: + reservations: + devices: + - driver: nvidia + count: 1 + capabilities: [gpu] diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..1408b1c --- /dev/null +++ b/.gitignore @@ -0,0 +1,3 @@ +.env +.venv + diff --git a/README.md b/README.md new file mode 100644 index 0000000..0da2acd --- /dev/null +++ b/README.md @@ -0,0 +1,24 @@ +# Docker For Kotoba Whisper v2.2 + +## build +1. HF_TOKEN を設定する。 +``` +echo "HF_TOKEN=" > .env + +Ex) +echo "HF_TOKEN=hf_xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx" > .env +``` + +2. ビルドする。 +``` +docker-compose build +``` + +## run +``` +docker-compose up -d +``` + +## Access +curl -X POST http://127.0.0.1:50022/transcribe -F "file=@sample.wav" + diff --git a/docker-compose.yml b/docker-compose.yml new file mode 100644 index 0000000..c7609ba --- /dev/null +++ b/docker-compose.yml @@ -0,0 +1,19 @@ +version: '3' +services: + kotoba-whisper: + build: + context: ./kotoba-whisper + dockerfile: Dockerfile + args: + HF_TOKEN: ${HF_TOKEN} + image: kotoba-whisper:nvidia-latest + ports: + - 0.0.0.0:50022:50022 + tty: true + deploy: + resources: + reservations: + devices: + - driver: nvidia + count: 1 + capabilities: [gpu] diff --git a/kotoba-whisper/Dockerfile b/kotoba-whisper/Dockerfile new file mode 100644 index 0000000..b8ead8e --- /dev/null +++ b/kotoba-whisper/Dockerfile @@ -0,0 +1,69 @@ +FROM nvidia/cuda:12.6.3-cudnn-runtime-ubuntu24.04 + + +ENV DEBIAN_FRONTEND noninteractive + +# 必要パッケージインストール +RUN \ + apt-get update && \ + apt-get -y upgrade && \ + apt-get install -y \ + sudo \ + wget \ + git \ + curl \ + python3 \ + python3-dev \ + python3-pip \ + ffmpeg \ + build-essential libbz2-dev libdb-dev \ + libreadline-dev libffi-dev libgdbm-dev liblzma-dev \ + libncursesw5-dev libsqlite3-dev libssl-dev \ + zlib1g-dev uuid-dev tk-dev && \ + rm -rf /var/lib/apt/lists/* + +# ユーザー追加 +RUN \ + useradd -m whisper && \ + echo "whisper ALL=(ALL) NOPASSWD:ALL" > /etc/sudoers.d/whisper +USER whisper +WORKDIR /home/whisper + +# pyenv インストール +RUN curl https://pyenv.run | bash + +# pyenv 環境変数 +ENV PYENV_ROOT /home/whisper/.pyenv +ENV PATH $PYENV_ROOT/bin:$PYENV_ROOT/shims:$PATH + +# pyenv 初期化、python ビルド +RUN \ + eval "$(pyenv init --path)" && \ + pyenv update && \ + pyenv install 3.10 && \ + pyenv global 3.10 + +# スクリプトファイルコピー、実行権限付与 +COPY kotoba-whisper-server.py /home/whisper/ +RUN sudo chown whisper:whisper kotoba-whisper-server.py && \ + chmod 755 kotoba-whisper-server.py + +# venv に pip install +ARG HF_TOKEN +RUN python -m venv ~/.venv && \ + . /home/whisper/.venv/bin/activate && \ + pip install --upgrade pip && \ + pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu126 && \ + pip install python-multipart && \ + pip install --upgrade transformers accelerate torchaudio && \ + pip install "punctuators==0.0.5" && \ + pip install "pyannote.audio" && \ + pip install fastapi uvicorn && \ + pip install git+https://github.com/huggingface/diarizers.git && \ + pip cache purge && \ + git config --global credential.helper store && \ + huggingface-cli login --token "$HF_TOKEN" --add-to-git-credential && \ + /home/whisper/.venv/bin/python kotoba-whisper-server.py + +CMD ["/home/whisper/.venv/bin/python", "-m", "uvicorn", "kotoba-whisper-server:app", "--host", "0.0.0.0", "--port", "50022"] + diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..1408b1c --- /dev/null +++ b/.gitignore @@ -0,0 +1,3 @@ +.env +.venv + diff --git a/README.md b/README.md new file mode 100644 index 0000000..0da2acd --- /dev/null +++ b/README.md @@ -0,0 +1,24 @@ +# Docker For Kotoba Whisper v2.2 + +## build +1. HF_TOKEN を設定する。 +``` +echo "HF_TOKEN=" > .env + +Ex) +echo "HF_TOKEN=hf_xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx" > .env +``` + +2. ビルドする。 +``` +docker-compose build +``` + +## run +``` +docker-compose up -d +``` + +## Access +curl -X POST http://127.0.0.1:50022/transcribe -F "file=@sample.wav" + diff --git a/docker-compose.yml b/docker-compose.yml new file mode 100644 index 0000000..c7609ba --- /dev/null +++ b/docker-compose.yml @@ -0,0 +1,19 @@ +version: '3' +services: + kotoba-whisper: + build: + context: ./kotoba-whisper + dockerfile: Dockerfile + args: + HF_TOKEN: ${HF_TOKEN} + image: kotoba-whisper:nvidia-latest + ports: + - 0.0.0.0:50022:50022 + tty: true + deploy: + resources: + reservations: + devices: + - driver: nvidia + count: 1 + capabilities: [gpu] diff --git a/kotoba-whisper/Dockerfile b/kotoba-whisper/Dockerfile new file mode 100644 index 0000000..b8ead8e --- /dev/null +++ b/kotoba-whisper/Dockerfile @@ -0,0 +1,69 @@ +FROM nvidia/cuda:12.6.3-cudnn-runtime-ubuntu24.04 + + +ENV DEBIAN_FRONTEND noninteractive + +# 必要パッケージインストール +RUN \ + apt-get update && \ + apt-get -y upgrade && \ + apt-get install -y \ + sudo \ + wget \ + git \ + curl \ + python3 \ + python3-dev \ + python3-pip \ + ffmpeg \ + build-essential libbz2-dev libdb-dev \ + libreadline-dev libffi-dev libgdbm-dev liblzma-dev \ + libncursesw5-dev libsqlite3-dev libssl-dev \ + zlib1g-dev uuid-dev tk-dev && \ + rm -rf /var/lib/apt/lists/* + +# ユーザー追加 +RUN \ + useradd -m whisper && \ + echo "whisper ALL=(ALL) NOPASSWD:ALL" > /etc/sudoers.d/whisper +USER whisper +WORKDIR /home/whisper + +# pyenv インストール +RUN curl https://pyenv.run | bash + +# pyenv 環境変数 +ENV PYENV_ROOT /home/whisper/.pyenv +ENV PATH $PYENV_ROOT/bin:$PYENV_ROOT/shims:$PATH + +# pyenv 初期化、python ビルド +RUN \ + eval "$(pyenv init --path)" && \ + pyenv update && \ + pyenv install 3.10 && \ + pyenv global 3.10 + +# スクリプトファイルコピー、実行権限付与 +COPY kotoba-whisper-server.py /home/whisper/ +RUN sudo chown whisper:whisper kotoba-whisper-server.py && \ + chmod 755 kotoba-whisper-server.py + +# venv に pip install +ARG HF_TOKEN +RUN python -m venv ~/.venv && \ + . /home/whisper/.venv/bin/activate && \ + pip install --upgrade pip && \ + pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu126 && \ + pip install python-multipart && \ + pip install --upgrade transformers accelerate torchaudio && \ + pip install "punctuators==0.0.5" && \ + pip install "pyannote.audio" && \ + pip install fastapi uvicorn && \ + pip install git+https://github.com/huggingface/diarizers.git && \ + pip cache purge && \ + git config --global credential.helper store && \ + huggingface-cli login --token "$HF_TOKEN" --add-to-git-credential && \ + /home/whisper/.venv/bin/python kotoba-whisper-server.py + +CMD ["/home/whisper/.venv/bin/python", "-m", "uvicorn", "kotoba-whisper-server:app", "--host", "0.0.0.0", "--port", "50022"] + diff --git a/kotoba-whisper/kotoba-whisper-server.py b/kotoba-whisper/kotoba-whisper-server.py new file mode 100755 index 0000000..b9271eb --- /dev/null +++ b/kotoba-whisper/kotoba-whisper-server.py @@ -0,0 +1,45 @@ +#!/usr/bin/env python + +from fastapi import FastAPI, File, UploadFile +from fastapi.responses import JSONResponse +from io import BytesIO +from transformers import pipeline + +import tempfile +import torch + +app = FastAPI() + +# モデルロード +model_id = "kotoba-tech/kotoba-whisper-v2.2" +torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32 +device = "cuda:0" if torch.cuda.is_available() else "cpu" +model_kwargs = {"attn_implementation": "sdpa"} if torch.cuda.is_available() else {} + +pipe = pipeline( + model=model_id, + torch_dtype=torch_dtype, + device=device, + model_kwargs=model_kwargs, + batch_size=8, + trust_remote_code=True, + ) + +@app.post("/transcribe") +async def transcribe_audio(file: UploadFile = File(...)): + if not file.filename.endswith((".mp3", ".wav", ".m4a")): + return JSONResponse(content={"error": "Invalid file format"}, status_code=400) + + audio_bytes = await file.read() + + # 一時ファイルとして保存 + with tempfile.NamedTemporaryFile(suffix=file.filename[-4:], delete=True) as tmp: + tmp.write(audio_bytes) + tmp.flush() + + try: + result = pipe(tmp.name, chunk_length_s=15) + return result + except Exception as e: + return JSONResponse(content={"error": str(e)}, status_code=500) +