From 13acda81ec42eac6eff2af06ae13e87bdc6fd185 Mon Sep 17 00:00:00 2001 From: amjad Date: Wed, 23 Jul 2025 10:20:02 +0200 Subject: [PATCH 1/5] Update README.md --- README.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/README.md b/README.md index 49e76c44..633a4ea8 100644 --- a/README.md +++ b/README.md @@ -527,6 +527,9 @@ If you need higher resolution, you could apply super resolution models such as [ - Jitter: There exists some jitter as the current pipeline adopts single-frame generation. +## Replicate +//TODO + # Citation ```bib @article{musetalk, @@ -542,3 +545,4 @@ If you need higher resolution, you could apply super resolution models such as [ 1. `other opensource model`: Other open-source models used must comply with their license, such as `whisper`, `ft-mse-vae`, `dwpose`, `S3FD`, etc.. 1. The testdata are collected from internet, which are available for non-commercial research purposes only. 1. `AIGC`: This project strives to impact the domain of AI-driven video generation positively. Users are granted the freedom to create videos using this tool, but they are expected to comply with local laws and utilize it responsibly. The developers do not assume any responsibility for potential misuse by users. + From c8f51bb5635eb964f85b3aaf96d3356b10aa02f9 Mon Sep 17 00:00:00 2001 From: amjad Date: Wed, 23 Jul 2025 10:20:42 +0200 Subject: [PATCH 2/5] Create training-data.md --- training-data.md | 1 + 1 file changed, 1 insertion(+) create mode 100644 training-data.md diff --git a/training-data.md b/training-data.md new file mode 100644 index 00000000..75dc95d1 --- /dev/null +++ b/training-data.md @@ -0,0 +1 @@ +### Training Data From c8babae5893fbb0fde3dd6f1cb6dfbe0e1377470 Mon Sep 17 00:00:00 2001 From: hunzai Date: Wed, 23 Jul 2025 10:54:17 +0200 Subject: [PATCH 3/5] Add cog.yaml from feat/optimize-gpu branch --- cog.yaml | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) create mode 100644 cog.yaml diff --git a/cog.yaml b/cog.yaml new file mode 100644 index 00000000..4f0ee988 --- /dev/null +++ b/cog.yaml @@ -0,0 +1,21 @@ +build: + gpu: true + cog_runtime: true + python_version: "3.10" + system_packages: + - libgl1-mesa-glx + - libglib2.0-0 + - ninja-build + - gcc + - g++ + - ffmpeg + + python_requirements: requirements.txt + run: + - pip install --no-cache-dir -U openmim + - mim install mmengine + - mim install "mmcv==2.0.1" + - mim install "mmdet==3.1.0" + - mim install "mmpose==1.1.0" + +predict: "predict.py:Predictor" From a4b2f1812b24d84c6b92667b63790d94a62b4cee Mon Sep 17 00:00:00 2001 From: hunzai Date: Wed, 23 Jul 2025 10:54:40 +0200 Subject: [PATCH 4/5] Add workflows from feat/optimize-gpu branch --- workflows/push.yaml | 58 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 58 insertions(+) create mode 100644 workflows/push.yaml diff --git a/workflows/push.yaml b/workflows/push.yaml new file mode 100644 index 00000000..5d20efd6 --- /dev/null +++ b/workflows/push.yaml @@ -0,0 +1,58 @@ +name: Push to Replicate + +on: + # Workflow dispatch allows you to manually trigger the workflow from GitHub.com + # Go to your repo, click "Actions", click "Push to Replicate", click "Run workflow" + workflow_dispatch: + inputs: + model_name: + description: 'hunzai/musetalk' + + # # Uncomment these lines to trigger the workflow on every push to the main branch + # push: + # branches: + # - main + +jobs: + push_to_replicate: + name: Push to Replicate + + # If your model is large, the default GitHub Actions runner may not + # have enough disk space. If you need more space you can set up a + # bigger runner on GitHub. + runs-on: ubuntu-latest + + steps: + # This action cleans up disk space to make more room for your + # model code, weights, etc. + - name: Free disk space + uses: jlumbroso/free-disk-space@v1.3.1 + with: + tool-cache: false + docker-images: false + + - name: Checkout + uses: actions/checkout@v4 + + # This action installs Docker buildx and Cog (and optionally CUDA) + - name: Setup Cog + uses: replicate/setup-cog@v2 + with: + # If you add a CI auth token to your GitHub repository secrets, + # the action will authenticate with Replicate automatically so you + # can push your model without needing to pass in a token. + # + # To genereate a CLI auth token, run `cog login` or visit this page + # in your browser: https://replicate.com/account/api-token + token: ${{ secrets.REPLICATE_CLI_AUTH_TOKEN }} + + # If you trigger the workflow manually, you can specify the model name. + # If you leave it blank (or if the workflow is triggered by a push), the + # model name will be derived from the `image` value in cog.yaml. + - name: Push to Replicate + run: | + if [ -n "${{ inputs.model_name }}" ]; then + cog push r8.im/${{ inputs.model_name }} + else + cog push + fi From 7b670388d0367bb0fd85ef43f5b32e618db7050f Mon Sep 17 00:00:00 2001 From: hunzai Date: Wed, 23 Jul 2025 10:58:21 +0200 Subject: [PATCH 5/5] fix: requirements --- requirements.txt | 40 +++++++++++++++++++++++++++------------- 1 file changed, 27 insertions(+), 13 deletions(-) diff --git a/requirements.txt b/requirements.txt index e87aa41d..c39f71e7 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,20 +1,34 @@ -diffusers==0.30.2 -accelerate==0.28.0 +# PyTorch (CUDA 11.8 compatible) +torch==2.0.1+cu118 +torchvision==0.15.2+cu118 +torchaudio==2.0.2+cu118 +--extra-index-url https://download.pytorch.org/whl/cu118 + +# Core tools numpy==1.23.5 -tensorflow==2.12.0 -tensorboard==2.12.0 -opencv-python==4.9.0.80 +opencv-python-headless==4.9.0.80 +imageio[ffmpeg] +ffmpeg-python soundfile==0.12.1 +einops==0.8.1 +PyYAML +omegaconf +gdown +requests + +# Transformers and diffusion transformers==4.39.2 huggingface_hub==0.30.2 +diffusers==0.30.2 +accelerate==0.28.0 librosa==0.11.0 -einops==0.8.1 -gradio==5.24.0 -gdown -requests -imageio[ffmpeg] +# TensorFlow for face model (used in MuseTalk) +tensorflow==2.12.0 +tensorboard==2.12.0 -omegaconf -ffmpeg-python -moviepy +# Gradio for UI mode +gradio==5.24.0 + +# OpenMIM installer (used in cog.yaml to install mmcv/mmpose/etc.) +openmim==0.3.9