From 7689c3282b712d4a273ad264c99331e3832a9c3f Mon Sep 17 00:00:00 2001
From: smy <18813151339@163.com>
Date: Tue, 2 Jun 2026 16:41:41 +0800
Subject: [PATCH 1/2] =?UTF-8?q?=E8=A7=A3=E5=86=B3=E5=AF=B9Blackwell?=
 =?UTF-8?q?=E6=9E=B6=E6=9E=84=E6=98=BE=E5=8D=A1=E4=B8=8D=E6=94=AF=E6=8C=81?=
 =?UTF-8?q?=E9=97=AE=E9=A2=98?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 README.md                                     | 118 +++++++++++++++++-
 create_symlinks.sh                            |  69 ++++++++++
 download_weights-bak.sh                       |  51 ++++++++
 download_weights.sh                           |   0
 entrypoint.sh                                 |   0
 inference.sh                                  |   0
 .../detection/sfd/sfd_detector.py             |   5 +-
 musetalk/utils/preprocessing.py               |  11 ++
 scripts/inference.py                          |  10 +-
 train.sh                                      |   0
 10 files changed, 256 insertions(+), 8 deletions(-)
 create mode 100755 create_symlinks.sh
 create mode 100755 download_weights-bak.sh
 mode change 100644 => 100755 download_weights.sh
 mode change 100644 => 100755 entrypoint.sh
 mode change 100644 => 100755 inference.sh
 mode change 100644 => 100755 train.sh

diff --git a/README.md b/README.md
index 49e76c44..4dec18c3 100644
--- a/README.md
+++ b/README.md
@@ -222,25 +222,28 @@ You can also download the weights manually from the following links:
 1. Download our trained [weights](https://huggingface.co/TMElyralab/MuseTalk/tree/main)
 2. Download the weights of other components:
    - [sd-vae-ft-mse](https://huggingface.co/stabilityai/sd-vae-ft-mse/tree/main)
-   - [whisper](https://huggingface.co/openai/whisper-tiny/tree/main)
+   - [whisper](https://huggingface.co/openai/whisper-tiny/tree/main) (make sure to include `preprocessor_config.json`)
    - [dwpose](https://huggingface.co/yzd-v/DWPose/tree/main)
    - [syncnet](https://huggingface.co/ByteDance/LatentSync/tree/main)
    - [face-parse-bisent](https://drive.google.com/file/d/154JgKpzCPW82qINcVieuPH3fZ2e0P812/view?pli=1)
    - [resnet18](https://download.pytorch.org/models/resnet18-5c106cde.pth)
+   - [s3fd](https://www.adrianbulat.com/downloads/python-fan/s3fd-619a316812.pth) (rename to `s3fd.pth` and place in `models/s3fd/`)
 
 Finally, these weights should be organized in `models` as follows:
 ```
 ./models/
 ├── musetalk
-│   └── musetalk.json
+│   ├── musetalk.json
 │   └── pytorch_model.bin
 ├── musetalkV15
-│   └── musetalk.json
+│   ├── musetalk.json
 │   └── unet.pth
 ├── syncnet
 │   └── latentsync_syncnet.pt
 ├── dwpose
 │   └── dw-ll_ucoco_384.pth
+├── s3fd
+│   └── s3fd.pth
 ├── face-parse-bisent
 │   ├── 79999_iter.pth
 │   └── resnet18-5c106cde.pth
@@ -251,8 +254,13 @@ Finally, these weights should be organized in `models` as follows:
     ├── config.json
     ├── pytorch_model.bin
     └── preprocessor_config.json
-    
+
 ```
+
+> **Note:**
+> - `s3fd/s3fd.pth` is the SFD face detection model weight, download from [s3fd-619a316812.pth](https://www.adrianbulat.com/downloads/python-fan/s3fd-619a316812.pth) and rename to `s3fd.pth`.
+> - `whisper/preprocessor_config.json` is required by `AutoFeatureExtractor`. If it is missing from the downloaded whisper weights, you can download it from [openai/whisper-tiny](https://huggingface.co/openai/whisper-tiny/blob/main/preprocessor_config.json).
+> - If you encounter `ImportError: cannot import name 'split_torch_state_dict_into_shards' from 'huggingface_hub'`, upgrade `huggingface_hub` to fix the version incompatibility: `pip install --upgrade huggingface_hub`.
 ## Quickstart
 
 ### Inference
@@ -512,6 +520,108 @@ python -m scripts.inference --inference_config configs/inference/test.yaml --bbo
 
 As a complete solution to virtual human generation, you are suggested to first apply [MuseV](https://github.com/TMElyralab/MuseV) to generate a video (text-to-video, image-to-video or pose-to-video) by referring [this](https://github.com/TMElyralab/MuseV?tab=readme-ov-file#text2video). Frame interpolation is suggested to increase frame rate. Then, you can use `MuseTalk` to generate a lip-sync video by referring [this](https://github.com/TMElyralab/MuseTalk?tab=readme-ov-file#inference).
 
+# Troubleshooting
+
+## New GPU Architecture (Blackwell sm_120)
+
+If you're using **NVIDIA RTX PRO 6000 Blackwell** or other new GPU with `sm_120` architecture, you'll encounter:
+
+```
+CUDA error: no kernel image is available for execution on the device
+The current PyTorch install supports CUDA capabilities sm_37 sm_50 ... sm_90.
+```
+
+This is because the default PyTorch 2.0.1 does not support the Blackwell architecture. Follow these steps:
+
+### Step 1: Upgrade PyTorch
+
+Install **PyTorch 2.6+ with CUDA 12.6/12.8** which supports `sm_120`:
+
+```bash
+# For CUDA 12.8 (tested with PyTorch 2.9.0)
+pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu128
+
+# Or for CUDA 12.6
+pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu126
+```
+
+### Step 2: Reinstall mmcv for the New CUDA Version
+
+After upgrading PyTorch, the existing mmcv (compiled for older CUDA) will fail with:
+
+```
+ImportError: libcudart.so.11.0: cannot open shared object file: No such file or directory
+```
+
+**Important version constraint:** `mmdet 3.1.0` and `mmpose 1.1.0` require **mmcv 2.0.1 ~ 2.1.x**. Do **not** use mmcv 2.2.0+ as they have strict runtime version checks that will raise `AssertionError`.
+
+If `mim install mmcv` finds a pre-built wheel for your CUDA/PyTorch combo:
+```bash
+pip uninstall mmcv -y
+mim install "mmcv==2.1.0"
+```
+
+If there is **no pre-built wheel** (common for new CUDA/PyTorch combinations), install from source:
+```bash
+pip install setuptools
+pip install mmcv==2.1.0 --no-binary mmcv --no-build-isolation
+```
+
+The `--no-build-isolation` flag is required so that pip uses the `setuptools` already in your environment instead of creating an isolated build environment (which may lack `pkg_resources`).
+
+### Step 3: Verify Installation
+
+```bash
+python -c "import mmcv; import mmdet; import mmpose; print('mmcv:', mmcv.__version__); print('mmdet:', mmdet.__version__); print('mmpose:', mmpose.__version__)"
+```
+
+Expected output:
+```
+mmcv: 2.1.0
+mmdet: 3.1.0
+mmpose: 1.1.0
+```
+
+## PyTorch 2.6+ Checkpoint Loading Error
+
+If you see `_pickle.UnpicklingError: Weights only load failed` when loading dwpose checkpoint:
+
+```
+WeightsUnpickler error: Unsupported global: GLOBAL numpy.core.multiarray._reconstruct
+```
+
+This is because PyTorch 2.6+ changed `torch.load` default to `weights_only=True`. The code has been patched to handle this automatically. If you encounter this issue, make sure you have the latest version of `preprocessing.py`.
+
+## FFmpeg Missing PNG Encoder
+
+```
+[vost#0:0] Automatic encoder selection failed Default encoder for format image2 (codec png) is probably disabled.
+Error opening output files: Encoder not found
+```
+
+This usually happens when FFmpeg is compiled without certain encoders (e.g., Windows static builds with `--disable-x86asm`).
+
+**Solution 1**: Use system FFmpeg:
+```bash
+sudo apt-get install ffmpeg  # Ubuntu/Debian
+```
+
+**Solution 2**: Specify FFmpeg path when running inference:
+```bash
+python -m scripts.inference --ffmpeg_path /usr/bin ...
+```
+
+**Solution 3**: Add system FFmpeg to PATH with higher priority:
+```bash
+export PATH=/usr/bin:$PATH
+```
+
+**Verify** your FFmpeg has PNG encoder:
+```bash
+ffmpeg -encoders | grep png
+# Should show: VF...D png  PNG (Portable Network Graphics) image
+```
+
 # Acknowledgement
 1. We thank open-source components like [whisper](https://github.com/openai/whisper), [dwpose](https://github.com/IDEA-Research/DWPose), [face-alignment](https://github.com/1adrianb/face-alignment), [face-parsing](https://github.com/zllrunning/face-parsing.PyTorch), [S3FD](https://github.com/yxlijun/S3FD.pytorch) and [LatentSync](https://huggingface.co/ByteDance/LatentSync/tree/main). 
 1. MuseTalk has referred much to [diffusers](https://github.com/huggingface/diffusers) and [isaacOnline/whisper](https://github.com/isaacOnline/whisper/tree/extract-embeddings).
diff --git a/create_symlinks.sh b/create_symlinks.sh
new file mode 100755
index 00000000..211ec138
--- /dev/null
+++ b/create_symlinks.sh
@@ -0,0 +1,69 @@
+#!/bin/bash
+
+# 源目录
+SOURCE_DIR="/mnt/f/Models_Download/MuseTalk"
+
+# 目标目录（项目中的 models 目录）
+TARGET_DIR="/home/smy/LM_projects/agentscope/MuseTalk/models"
+
+echo "========================================="
+echo "创建 MuseTalk 模型软链接"
+echo "========================================="
+echo "源目录: $SOURCE_DIR"
+echo "目标目录: $TARGET_DIR"
+echo "========================================="
+
+# 检查源目录是否存在
+if [ ! -d "$SOURCE_DIR" ]; then
+    echo "错误: 源目录不存在: $SOURCE_DIR"
+    exit 1
+fi
+
+# 创建目标目录（如果不存在）
+if [ ! -d "$TARGET_DIR" ]; then
+    echo "创建目标目录: $TARGET_DIR"
+    mkdir -p "$TARGET_DIR"
+fi
+
+# 遍历源目录中的所有文件夹
+count=0
+for folder in "$SOURCE_DIR"/*/; do
+    # 检查是否是有效的目录
+    if [ -d "$folder" ]; then
+        # 获取文件夹名称
+        folder_name=$(basename "$folder")
+        
+        # 跳过隐藏文件夹（以 . 开头）
+        if [[ "$folder_name" == .* ]]; then
+            echo "跳过隐藏文件夹: $folder_name"
+            continue
+        fi
+        
+        # 检查目标链接是否已存在
+        if [ -e "$TARGET_DIR/$folder_name" ] || [ -L "$TARGET_DIR/$folder_name" ]; then
+            echo "警告: 目标已存在，跳过: $folder_name"
+            continue
+        fi
+        
+        # 创建绝对路径
+        source_absolute_path=$(cd "$folder" && pwd)
+        
+        # 创建软链接
+        ln -s "$source_absolute_path" "$TARGET_DIR/$folder_name"
+        if [ $? -eq 0 ]; then
+            echo "✓ 创建软链接: $folder_name -> $source_absolute_path"
+            count=$((count + 1))
+        else
+            echo "✗ 创建软链接失败: $folder_name"
+        fi
+    fi
+done
+
+echo "========================================="
+echo "完成！共创建了 $count 个软链接"
+echo "========================================="
+
+# 显示创建的软链接
+echo ""
+echo "创建的软链接列表:"
+ls -la "$TARGET_DIR"
diff --git a/download_weights-bak.sh b/download_weights-bak.sh
new file mode 100755
index 00000000..affa5a9d
--- /dev/null
+++ b/download_weights-bak.sh
@@ -0,0 +1,51 @@
+#!/bin/bash
+
+# Set the checkpoints directory
+CheckpointsDir="models"
+
+# Create necessary directories
+mkdir -p models/musetalk models/musetalkV15 models/syncnet models/dwpose models/face-parse-bisent models/sd-vae models/whisper
+
+# Install required packages
+pip install -U "huggingface_hub[cli]"
+pip install gdown
+
+# Set HuggingFace mirror endpoint
+export HF_ENDPOINT=https://hf-mirror.com
+
+# Download MuseTalk V1.0 weights
+huggingface-cli download TMElyralab/MuseTalk \
+  --local-dir $CheckpointsDir \
+  --include "musetalk/musetalk.json" "musetalk/pytorch_model.bin"
+
+# Download MuseTalk V1.5 weights (unet.pth)
+huggingface-cli download TMElyralab/MuseTalk \
+  --local-dir $CheckpointsDir \
+  --include "musetalkV15/musetalk.json" "musetalkV15/unet.pth"
+
+# Download SD VAE weights
+huggingface-cli download stabilityai/sd-vae-ft-mse \
+  --local-dir $CheckpointsDir/sd-vae \
+  --include "config.json" "diffusion_pytorch_model.bin"
+
+# Download Whisper weights
+huggingface-cli download openai/whisper-tiny \
+  --local-dir $CheckpointsDir/whisper \
+  --include "config.json" "pytorch_model.bin" "preprocessor_config.json"
+
+# Download DWPose weights
+huggingface-cli download yzd-v/DWPose \
+  --local-dir $CheckpointsDir/dwpose \
+  --include "dw-ll_ucoco_384.pth"
+
+# Download SyncNet weights
+huggingface-cli download ByteDance/LatentSync \
+  --local-dir $CheckpointsDir/syncnet \
+  --include "latentsync_syncnet.pt"
+
+# Download Face Parse Bisent weights
+gdown --id 154JgKpzCPW82qINcVieuPH3fZ2e0P812 -O $CheckpointsDir/face-parse-bisent/79999_iter.pth
+curl -L https://download.pytorch.org/models/resnet18-5c106cde.pth \
+  -o $CheckpointsDir/face-parse-bisent/resnet18-5c106cde.pth
+
+echo "✅ All weights have been downloaded successfully!" 
diff --git a/download_weights.sh b/download_weights.sh
old mode 100644
new mode 100755
diff --git a/entrypoint.sh b/entrypoint.sh
old mode 100644
new mode 100755
diff --git a/inference.sh b/inference.sh
old mode 100644
new mode 100755
diff --git a/musetalk/utils/face_detection/detection/sfd/sfd_detector.py b/musetalk/utils/face_detection/detection/sfd/sfd_detector.py
index 8fbce152..f7fd7c1f 100755
--- a/musetalk/utils/face_detection/detection/sfd/sfd_detector.py
+++ b/musetalk/utils/face_detection/detection/sfd/sfd_detector.py
@@ -8,13 +8,16 @@
 from .bbox import *
 from .detect import *
 
+_PROJECT_ROOT = os.path.abspath(os.path.join(os.path.dirname(__file__), '..', '..', '..', '..', '..'))
+_SFD_MODEL_PATH = os.path.join(_PROJECT_ROOT, 'models', 's3fd', 's3fd.pth')
+
 models_urls = {
     's3fd': 'https://www.adrianbulat.com/downloads/python-fan/s3fd-619a316812.pth',
 }
 
 
 class SFDDetector(FaceDetector):
-    def __init__(self, device, path_to_detector=os.path.join(os.path.dirname(os.path.abspath(__file__)), 's3fd.pth'), verbose=False):
+    def __init__(self, device, path_to_detector=_SFD_MODEL_PATH, verbose=False):
         super(SFDDetector, self).__init__(device, verbose)
 
         # Initialise the face detector
diff --git a/musetalk/utils/preprocessing.py b/musetalk/utils/preprocessing.py
index 978480c6..9e321938 100755
--- a/musetalk/utils/preprocessing.py
+++ b/musetalk/utils/preprocessing.py
@@ -12,6 +12,17 @@
 import torch
 from tqdm import tqdm
 
+# Monkey-patch mmengine's torch.load to disable weights_only for PyTorch 2.6+
+import mmengine.runner.checkpoint as _ckpt_module
+_orig_torch_load = torch.load
+def _patched_torch_load(*args, **kwargs):
+    if 'weights_only' not in kwargs:
+        kwargs['weights_only'] = False
+    return _orig_torch_load(*args, **kwargs)
+# Patch at module level so mmengine uses it
+torch.load = _patched_torch_load
+_ckpt_module.torch.load = _patched_torch_load
+
 # initialize the mmpose model
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 config_file = './musetalk/utils/dwpose/rtmpose-l_8xb32-270e_coco-ubody-wholebody-384x288.py'
diff --git a/scripts/inference.py b/scripts/inference.py
index 428afb99..20f0c640 100644
--- a/scripts/inference.py
+++ b/scripts/inference.py
@@ -29,8 +29,12 @@ def fast_check_ffmpeg():
 
 @torch.no_grad()
 def main(args):
-    # Configure ffmpeg path
-    if not fast_check_ffmpeg():
+    # Configure ffmpeg path - always use provided ffmpeg_path if specified
+    if args.ffmpeg_path:
+        print(f"Using ffmpeg from: {args.ffmpeg_path}")
+        path_separator = ';' if sys.platform == 'win32' else ':'
+        os.environ["PATH"] = f"{args.ffmpeg_path}{path_separator}{os.environ['PATH']}"
+    elif not fast_check_ffmpeg():
         print("Adding ffmpeg to PATH")
         # Choose path separator based on operating system
         path_separator = ';' if sys.platform == 'win32' else ':'
@@ -250,7 +254,7 @@ def main(args):
 
 if __name__ == "__main__":
     parser = argparse.ArgumentParser()
-    parser.add_argument("--ffmpeg_path", type=str, default="./ffmpeg-4.4-amd64-static/", help="Path to ffmpeg executable")
+    parser.add_argument("--ffmpeg_path", type=str, default="/usr/bin", help="Path to ffmpeg executable")
     parser.add_argument("--gpu_id", type=int, default=0, help="GPU ID to use")
     parser.add_argument("--vae_type", type=str, default="sd-vae", help="Type of VAE model")
     parser.add_argument("--unet_config", type=str, default="./models/musetalk/config.json", help="Path to UNet configuration file")
diff --git a/train.sh b/train.sh
old mode 100644
new mode 100755

From 32d3b6c5f1e5da77e99db69bf1e3ef2b55b56839 Mon Sep 17 00:00:00 2001
From: smy <18813151339@163.com>
Date: Tue, 2 Jun 2026 16:50:41 +0800
Subject: [PATCH 2/2] =?UTF-8?q?=E5=9C=A8readme=E4=B8=AD=E8=A1=A5=E5=85=85?=
 =?UTF-8?q?=E4=BA=86=E6=8E=A8=E7=90=86=E6=97=B6=E7=BC=BA=E5=B0=91=E7=9A=84?=
 =?UTF-8?q?s3fd=E6=A8=A1=E5=9E=8B=E7=9A=84=E4=BF=A1=E6=81=AF=EF=BC=8C?=
 =?UTF-8?q?=E5=B9=B6=E4=BF=AE=E6=94=B9=E4=BA=86sfd=5Fdetector.py=E8=84=9A?=
 =?UTF-8?q?=E6=9C=AC=E4=B8=AD=E8=AF=A5=E6=A8=A1=E5=9E=8B=E9=BB=98=E8=AE=A4?=
 =?UTF-8?q?=E8=B7=AF=E5=BE=84?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .gitignore              |  3 ++-
 download_weights-bak.sh | 51 -----------------------------------------
 2 files changed, 2 insertions(+), 52 deletions(-)
 delete mode 100755 download_weights-bak.sh

diff --git a/.gitignore b/.gitignore
index c83750f5..0f53551c 100644
--- a/.gitignore
+++ b/.gitignore
@@ -15,4 +15,5 @@ ffmprobe*
 ffplay*
 debug
 exp_out
-.gradio
\ No newline at end of file
+.gradio
+.claude/
\ No newline at end of file
diff --git a/download_weights-bak.sh b/download_weights-bak.sh
deleted file mode 100755
index affa5a9d..00000000
--- a/download_weights-bak.sh
+++ /dev/null
@@ -1,51 +0,0 @@
-#!/bin/bash
-
-# Set the checkpoints directory
-CheckpointsDir="models"
-
-# Create necessary directories
-mkdir -p models/musetalk models/musetalkV15 models/syncnet models/dwpose models/face-parse-bisent models/sd-vae models/whisper
-
-# Install required packages
-pip install -U "huggingface_hub[cli]"
-pip install gdown
-
-# Set HuggingFace mirror endpoint
-export HF_ENDPOINT=https://hf-mirror.com
-
-# Download MuseTalk V1.0 weights
-huggingface-cli download TMElyralab/MuseTalk \
-  --local-dir $CheckpointsDir \
-  --include "musetalk/musetalk.json" "musetalk/pytorch_model.bin"
-
-# Download MuseTalk V1.5 weights (unet.pth)
-huggingface-cli download TMElyralab/MuseTalk \
-  --local-dir $CheckpointsDir \
-  --include "musetalkV15/musetalk.json" "musetalkV15/unet.pth"
-
-# Download SD VAE weights
-huggingface-cli download stabilityai/sd-vae-ft-mse \
-  --local-dir $CheckpointsDir/sd-vae \
-  --include "config.json" "diffusion_pytorch_model.bin"
-
-# Download Whisper weights
-huggingface-cli download openai/whisper-tiny \
-  --local-dir $CheckpointsDir/whisper \
-  --include "config.json" "pytorch_model.bin" "preprocessor_config.json"
-
-# Download DWPose weights
-huggingface-cli download yzd-v/DWPose \
-  --local-dir $CheckpointsDir/dwpose \
-  --include "dw-ll_ucoco_384.pth"
-
-# Download SyncNet weights
-huggingface-cli download ByteDance/LatentSync \
-  --local-dir $CheckpointsDir/syncnet \
-  --include "latentsync_syncnet.pt"
-
-# Download Face Parse Bisent weights
-gdown --id 154JgKpzCPW82qINcVieuPH3fZ2e0P812 -O $CheckpointsDir/face-parse-bisent/79999_iter.pth
-curl -L https://download.pytorch.org/models/resnet18-5c106cde.pth \
-  -o $CheckpointsDir/face-parse-bisent/resnet18-5c106cde.pth
-
-echo "✅ All weights have been downloaded successfully!"