From 080920ee66b118bb98d24cfde99a754baeadf9cf Mon Sep 17 00:00:00 2001
From: jhandsome <jhandsome06@gmail.com>
Date: Wed, 11 Mar 2026 03:10:45 +0800
Subject: [PATCH 1/9] feat(minimax): add minimax package with type definitions

---
 pkg/backend/minimax/speech.go | 64 +++++++++++++++++++++++++++++++++++
 1 file changed, 64 insertions(+)
 create mode 100644 pkg/backend/minimax/speech.go

diff --git a/pkg/backend/minimax/speech.go b/pkg/backend/minimax/speech.go
new file mode 100644
index 0000000..566182f
--- /dev/null
+++ b/pkg/backend/minimax/speech.go
@@ -0,0 +1,64 @@
+// Package minimax provides MiniMax TTS integration
+package minimax
+
+// VoiceSetting MiniMax 音色设置
+type VoiceSetting struct {
+	VoiceID           string  `json:"voice_id"`
+	Speed             float64 `json:"speed,omitempty"`
+	Vol               float64 `json:"vol,omitempty"`
+	Pitch             float64 `json:"pitch,omitempty"`
+	Emotion           string  `json:"emotion,omitempty"`
+	TextNormalization string  `json:"text_normalization,omitempty"`
+	LatexRead         string  `json:"latex_read,omitempty"`
+}
+
+// AudioSetting MiniMax 音频设置
+type AudioSetting struct {
+	SampleRate int    `json:"sample_rate,omitempty"`
+	Bitrate    int    `json:"bitrate,omitempty"`
+	Format     string `json:"format,omitempty"`
+	Channel    int    `json:"channel,omitempty"`
+}
+
+// TTSRequest MiniMax TTS 请求
+type TTSRequest struct {
+	Model        string        `json:"model"`
+	Text         string        `json:"text"`
+	Stream       bool          `json:"stream,omitempty"`
+	VoiceSetting *VoiceSetting `json:"voice_setting,omitempty"`
+	AudioSetting *AudioSetting `json:"audio_setting,omitempty"`
+	OutputFormat string        `json:"output_format,omitempty"`
+}
+
+// TTSResponseData MiniMax TTS 响应数据
+type TTSResponseData struct {
+	Audio        string `json:"audio"`
+	SubtitleFile string `json:"subtitle_file,omitempty"`
+	Status       int    `json:"status"`
+}
+
+// TTSResponseExtraInfo MiniMax TTS 响应额外信息
+type TTSResponseExtraInfo struct {
+	AudioLength     int    `json:"audio_length"`
+	AudioSampleRate int    `json:"audio_sample_rate"`
+	AudioSize       int    `json:"audio_size"`
+	Bitrate         int    `json:"bitrate"`
+	AudioFormat     string `json:"audio_format"`
+	AudioChannel    int    `json:"audio_channel"`
+	WordCount       int    `json:"word_count"`
+	UsageCharacters int    `json:"usage_characters"`
+}
+
+// TTSResponseBaseResp MiniMax TTS 响应基础信息
+type TTSResponseBaseResp struct {
+	StatusCode int    `json:"status_code"`
+	StatusMsg  string `json:"status_msg"`
+}
+
+// TTSResponse MiniMax TTS 响应
+type TTSResponse struct {
+	Data      TTSResponseData      `json:"data"`
+	TraceID   string              `json:"trace_id"`
+	ExtraInfo TTSResponseExtraInfo `json:"extra_info"`
+	BaseResp  TTSResponseBaseResp  `json:"base_resp"`
+}

From 51e53eaf34237695ed1942253d67b13e01d6d46f Mon Sep 17 00:00:00 2001
From: jhandsome <jhandsome06@gmail.com>
Date: Wed, 11 Mar 2026 03:13:00 +0800
Subject: [PATCH 2/9] feat(minimax): implement HandleSpeech with non-streaming
 and streaming TTS

---
 pkg/backend/minimax/speech.go | 347 +++++++++++++++++++++++++++++++++-
 1 file changed, 346 insertions(+), 1 deletion(-)

diff --git a/pkg/backend/minimax/speech.go b/pkg/backend/minimax/speech.go
index 566182f..d328549 100644
--- a/pkg/backend/minimax/speech.go
+++ b/pkg/backend/minimax/speech.go
@@ -1,6 +1,21 @@
-// Package minimax provides MiniMax TTS integration
 package minimax
 
+import (
+	"bytes"
+	"encoding/hex"
+	"encoding/json"
+	"log/slog"
+	"net/http"
+	"strings"
+
+	"github.com/labstack/echo/v4"
+	"github.com/moeru-ai/unspeech/pkg/apierrors"
+	"github.com/moeru-ai/unspeech/pkg/backend/types"
+	"github.com/moeru-ai/unspeech/pkg/utils"
+	"github.com/samber/lo"
+	"github.com/samber/mo"
+)
+
 // VoiceSetting MiniMax 音色设置
 type VoiceSetting struct {
 	VoiceID           string  `json:"voice_id"`
@@ -62,3 +77,333 @@ type TTSResponse struct {
 	ExtraInfo TTSResponseExtraInfo `json:"extra_info"`
 	BaseResp  TTSResponseBaseResp  `json:"base_resp"`
 }
+
+// HandleSpeech 处理 MiniMax TTS 请求
+func HandleSpeech(c echo.Context, options mo.Option[types.SpeechRequestOptions]) mo.Result[any] {
+	opts := options.MustGet()
+
+	// 获取 token
+	token := strings.TrimPrefix(c.Request().Header.Get("Authorization"), "Bearer ")
+
+	// 从 ExtraBody 获取 stream 参数
+	stream := utils.GetByJSONPath[bool](opts.ExtraBody, "{ .stream }")
+
+	// 如果是流式请求，使用流式处理
+	if stream {
+		return handleStreamingSpeech(c, token, opts)
+	}
+
+	// 构建 MiniMax 请求
+	reqBody := TTSRequest{
+		Model:        opts.Model,
+		Text:         opts.Input,
+		Stream:       false,
+		OutputFormat: "hex",
+	}
+
+	// 设置 voice_id（从用户传入的 voice 字段）
+	voiceID := opts.Voice
+	if voiceID != "" {
+		reqBody.VoiceSetting = &VoiceSetting{
+			VoiceID: voiceID,
+		}
+	}
+
+	// 从 ExtraBody 获取其他参数
+	if speed := utils.GetByJSONPath[*float64](opts.ExtraBody, "{ .speed }"); speed != nil {
+		if reqBody.VoiceSetting == nil {
+			reqBody.VoiceSetting = &VoiceSetting{}
+		}
+		reqBody.VoiceSetting.Speed = *speed
+	}
+
+	if vol := utils.GetByJSONPath[*float64](opts.ExtraBody, "{ .vol }"); vol != nil {
+		if reqBody.VoiceSetting == nil {
+			reqBody.VoiceSetting = &VoiceSetting{}
+		}
+		reqBody.VoiceSetting.Vol = *vol
+	}
+
+	if pitch := utils.GetByJSONPath[*float64](opts.ExtraBody, "{ .pitch }"); pitch != nil {
+		if reqBody.VoiceSetting == nil {
+			reqBody.VoiceSetting = &VoiceSetting{}
+		}
+		reqBody.VoiceSetting.Pitch = *pitch
+	}
+
+	if emotion := utils.GetByJSONPath[*string](opts.ExtraBody, "{ .emotion }"); emotion != nil {
+		if reqBody.VoiceSetting == nil {
+			reqBody.VoiceSetting = &VoiceSetting{}
+		}
+		reqBody.VoiceSetting.Emotion = *emotion
+	}
+
+	// 音频设置
+	if sampleRate := utils.GetByJSONPath[*int](opts.ExtraBody, "{ .sample_rate }"); sampleRate != nil {
+		reqBody.AudioSetting = &AudioSetting{}
+		reqBody.AudioSetting.SampleRate = *sampleRate
+	}
+
+	if bitrate := utils.GetByJSONPath[*int](opts.ExtraBody, "{ .bitrate }"); bitrate != nil {
+		if reqBody.AudioSetting == nil {
+			reqBody.AudioSetting = &AudioSetting{}
+		}
+		reqBody.AudioSetting.Bitrate = *bitrate
+	}
+
+	if format := utils.GetByJSONPath[*string](opts.ExtraBody, "{ .format }"); format != nil {
+		if reqBody.AudioSetting == nil {
+			reqBody.AudioSetting = &AudioSetting{}
+		}
+		reqBody.AudioSetting.Format = *format
+	}
+
+	// 序列化请求体
+	jsonBytes, err := json.Marshal(reqBody)
+	if err != nil {
+		return mo.Err[any](apierrors.NewErrInternal().WithDetail(err.Error()).WithCaller())
+	}
+
+	// 创建请求
+	req, err := http.NewRequestWithContext(
+		c.Request().Context(),
+		http.MethodPost,
+		"https://api.minimaxi.com/v1/t2a_v2",
+		bytes.NewBuffer(jsonBytes),
+	)
+	if err != nil {
+		return mo.Err[any](apierrors.NewErrInternal().WithDetail(err.Error()).WithCaller())
+	}
+
+	// 设置请求头
+	req.Header.Set("Authorization", "Bearer "+token)
+	req.Header.Set("Content-Type", "application/json")
+
+	// 发送请求
+	resp, err := http.DefaultClient.Do(req)
+	if err != nil {
+		return mo.Err[any](apierrors.NewErrBadGateway().WithDetail(err.Error()).WithError(err).WithCaller())
+	}
+
+	defer func() { _ = resp.Body.Close() }()
+
+	// 检查 HTTP 状态码
+	if resp.StatusCode >= 400 && resp.StatusCode < 600 {
+		switch {
+		case strings.HasPrefix(resp.Header.Get("Content-Type"), "application/json"):
+			return mo.Err[any](apierrors.
+				NewUpstreamError(resp.StatusCode).
+				WithDetail(utils.NewJSONResponseError(resp.StatusCode, resp.Body).OrEmpty().Error()))
+		case strings.HasPrefix(resp.Header.Get("Content-Type"), "text/"):
+			return mo.Err[any](apierrors.
+				NewUpstreamError(resp.StatusCode).
+				WithDetail(utils.NewTextResponseError(resp.StatusCode, resp.Body).OrEmpty().Error()))
+		default:
+			slog.Warn("unknown upstream error with unknown Content-Type",
+				slog.Int("status", resp.StatusCode),
+				slog.String("content_type", resp.Header.Get("Content-Type")),
+			)
+		}
+	}
+
+	// 解析响应
+	var ttsResp TTSResponse
+	err = json.NewDecoder(resp.Body).Decode(&ttsResp)
+	if err != nil {
+		return mo.Err[any](apierrors.NewErrBadGateway().WithDetail(err.Error()).WithError(err).WithCaller())
+	}
+
+	// 检查业务状态码
+	if ttsResp.BaseResp.StatusCode != 0 {
+		return mo.Err[any](handleMinimaxError(ttsResp.BaseResp.StatusCode, ttsResp.BaseResp.StatusMsg))
+	}
+
+	// 解码 hex 音频
+	audioBytes, err := hex.DecodeString(ttsResp.Data.Audio)
+	if err != nil {
+		return mo.Err[any](apierrors.NewErrInternal().WithDetail("failed to decode hex audio: "+err.Error()).WithError(err).WithCaller())
+	}
+
+	// 确定 Content-Type
+	contentType := "audio/mp3"
+	if reqBody.AudioSetting != nil && reqBody.AudioSetting.Format != "" {
+		contentType = lo.Ternary(reqBody.AudioSetting.Format == "pcm", "audio/pcm",
+			lo.Ternary(reqBody.AudioSetting.Format == "wav", "audio/wav",
+				lo.Ternary(reqBody.AudioSetting.Format == "flac", "audio/flac", "audio/mp3")))
+	}
+
+	return mo.Ok[any](c.Blob(http.StatusOK, contentType, audioBytes))
+}
+
+// handleMinimaxError 处理 MiniMax 错误码
+func handleMinimaxError(code int, msg string) *apierrors.Error {
+	var httpStatus int
+	switch code {
+	case 1004: // 鉴权失败
+		httpStatus = http.StatusUnauthorized
+	case 1002, 1039: // 限流
+		httpStatus = http.StatusTooManyRequests
+	case 1042, 2013: // 参数错误
+		httpStatus = http.StatusBadRequest
+	case 1001: // 超时
+		httpStatus = http.StatusGatewayTimeout
+	default:
+		httpStatus = http.StatusBadGateway
+	}
+	return apierrors.NewUpstreamError(httpStatus).WithDetailf("minimax error: %d - %s", code, msg)
+}
+
+// handleStreamingSpeech 处理流式 TTS 请求
+func handleStreamingSpeech(c echo.Context, token string, opts types.SpeechRequestOptions) mo.Result[any] {
+	// 构建 MiniMax 请求
+	reqBody := TTSRequest{
+		Model:        opts.Model,
+		Text:         opts.Input,
+		Stream:       true,
+		OutputFormat: "hex",
+	}
+
+	// 设置 voice_id
+	voiceID := opts.Voice
+	if voiceID != "" {
+		reqBody.VoiceSetting = &VoiceSetting{
+			VoiceID: voiceID,
+		}
+	}
+
+	// 从 ExtraBody 获取其他参数
+	if speed := utils.GetByJSONPath[*float64](opts.ExtraBody, "{ .speed }"); speed != nil {
+		if reqBody.VoiceSetting == nil {
+			reqBody.VoiceSetting = &VoiceSetting{}
+		}
+		reqBody.VoiceSetting.Speed = *speed
+	}
+
+	if vol := utils.GetByJSONPath[*float64](opts.ExtraBody, "{ .vol }"); vol != nil {
+		if reqBody.VoiceSetting == nil {
+			reqBody.VoiceSetting = &VoiceSetting{}
+		}
+		reqBody.VoiceSetting.Vol = *vol
+	}
+
+	if pitch := utils.GetByJSONPath[*float64](opts.ExtraBody, "{ .pitch }"); pitch != nil {
+		if reqBody.VoiceSetting == nil {
+			reqBody.VoiceSetting = &VoiceSetting{}
+		}
+		reqBody.VoiceSetting.Pitch = *pitch
+	}
+
+	if emotion := utils.GetByJSONPath[*string](opts.ExtraBody, "{ .emotion }"); emotion != nil {
+		if reqBody.VoiceSetting == nil {
+			reqBody.VoiceSetting = &VoiceSetting{}
+		}
+		reqBody.VoiceSetting.Emotion = *emotion
+	}
+
+	// 音频设置
+	if sampleRate := utils.GetByJSONPath[*int](opts.ExtraBody, "{ .sample_rate }"); sampleRate != nil {
+		reqBody.AudioSetting = &AudioSetting{}
+		reqBody.AudioSetting.SampleRate = *sampleRate
+	}
+
+	if bitrate := utils.GetByJSONPath[*int](opts.ExtraBody, "{ .bitrate }"); bitrate != nil {
+		if reqBody.AudioSetting == nil {
+			reqBody.AudioSetting = &AudioSetting{}
+		}
+		reqBody.AudioSetting.Bitrate = *bitrate
+	}
+
+	if format := utils.GetByJSONPath[*string](opts.ExtraBody, "{ .format }"); format != nil {
+		if reqBody.AudioSetting == nil {
+			reqBody.AudioSetting = &AudioSetting{}
+		}
+		reqBody.AudioSetting.Format = *format
+	}
+
+	// 序列化请求体
+	jsonBytes, err := json.Marshal(reqBody)
+	if err != nil {
+		return mo.Err[any](apierrors.NewErrInternal().WithDetail(err.Error()).WithCaller())
+	}
+
+	// 创建请求
+	req, err := http.NewRequestWithContext(
+		c.Request().Context(),
+		http.MethodPost,
+		"https://api.minimaxi.com/v1/t2a_v2",
+		bytes.NewBuffer(jsonBytes),
+	)
+	if err != nil {
+		return mo.Err[any](apierrors.NewErrInternal().WithDetail(err.Error()).WithCaller())
+	}
+
+	// 设置请求头
+	req.Header.Set("Authorization", "Bearer "+token)
+	req.Header.Set("Content-Type", "application/json")
+
+	// 发送请求
+	resp, err := http.DefaultClient.Do(req)
+	if err != nil {
+		return mo.Err[any](apierrors.NewErrBadGateway().WithDetail(err.Error()).WithError(err).WithCaller())
+	}
+
+	defer func() { _ = resp.Body.Close() }()
+
+	// 检查 HTTP 状态码
+	if resp.StatusCode >= 400 && resp.StatusCode < 600 {
+		switch {
+		case strings.HasPrefix(resp.Header.Get("Content-Type"), "application/json"):
+			return mo.Err[any](apierrors.
+				NewUpstreamError(resp.StatusCode).
+				WithDetail(utils.NewJSONResponseError(resp.StatusCode, resp.Body).OrEmpty().Error()))
+		default:
+			slog.Warn("unknown upstream error with unknown Content-Type",
+				slog.Int("status", resp.StatusCode),
+				slog.String("content_type", resp.Header.Get("Content-Type")),
+			)
+		}
+	}
+
+	// 流式处理：持续读取响应直到 status == 2
+	decoder := json.NewDecoder(resp.Body)
+	audioHex := new(strings.Builder)
+
+	for {
+		var ttsResp TTSResponse
+		if err := decoder.Decode(&ttsResp); err != nil {
+			if err.Error() == "EOF" {
+				break
+			}
+			return mo.Err[any](apierrors.NewErrBadGateway().WithDetail(err.Error()).WithError(err).WithCaller())
+		}
+
+		// 检查业务状态码
+		if ttsResp.BaseResp.StatusCode != 0 {
+			return mo.Err[any](handleMinimaxError(ttsResp.BaseResp.StatusCode, ttsResp.BaseResp.StatusMsg))
+		}
+
+		// 追加音频数据
+		audioHex.WriteString(ttsResp.Data.Audio)
+
+		// status == 2 表示合成结束
+		if ttsResp.Data.Status == 2 {
+			break
+		}
+	}
+
+	// 解码 hex 音频
+	audioBytes, err := hex.DecodeString(audioHex.String())
+	if err != nil {
+		return mo.Err[any](apierrors.NewErrInternal().WithDetail("failed to decode hex audio: "+err.Error()).WithError(err).WithCaller())
+	}
+
+	// 确定 Content-Type
+	contentType := "audio/mp3"
+	if reqBody.AudioSetting != nil && reqBody.AudioSetting.Format != "" {
+		contentType = lo.Ternary(reqBody.AudioSetting.Format == "pcm", "audio/pcm",
+			lo.Ternary(reqBody.AudioSetting.Format == "wav", "audio/wav",
+				lo.Ternary(reqBody.AudioSetting.Format == "flac", "audio/flac", "audio/mp3")))
+	}
+
+	return mo.Ok[any](c.Blob(http.StatusOK, contentType, audioBytes))
+}

From b24e974ef360cb890649ff783c61b11e30762dbc Mon Sep 17 00:00:00 2001
From: jhandsome <jhandsome06@gmail.com>
Date: Wed, 11 Mar 2026 03:14:11 +0800
Subject: [PATCH 3/9] feat(minimax): implement HandleVoices for voice list

---
 pkg/backend/minimax/voices.go | 190 ++++++++++++++++++++++++++++++++++
 1 file changed, 190 insertions(+)
 create mode 100644 pkg/backend/minimax/voices.go

diff --git a/pkg/backend/minimax/voices.go b/pkg/backend/minimax/voices.go
new file mode 100644
index 0000000..9603c72
--- /dev/null
+++ b/pkg/backend/minimax/voices.go
@@ -0,0 +1,190 @@
+package minimax
+
+import (
+	"encoding/json"
+	"log/slog"
+	"net/http"
+	"strings"
+
+	"github.com/labstack/echo/v4"
+	"github.com/moeru-ai/unspeech/pkg/apierrors"
+	"github.com/moeru-ai/unspeech/pkg/backend/types"
+	"github.com/moeru-ai/unspeech/pkg/utils"
+	"github.com/samber/mo"
+)
+
+// GetVoiceReq 获取音色列表请求
+type GetVoiceReq struct {
+	VoiceType string `json:"voice_type"`
+}
+
+// SystemVoice 系统音色
+type SystemVoice struct {
+	VoiceID     string   `json:"voice_id"`
+	VoiceName   string   `json:"voice_name"`
+	Description []string `json:"description"`
+}
+
+// VoiceCloning 快速复刻音色
+type VoiceCloning struct {
+	VoiceID     string   `json:"voice_id"`
+	Description []string `json:"description"`
+	CreatedTime string   `json:"created_time"`
+}
+
+// VoiceGeneration 文生音色
+type VoiceGeneration struct {
+	VoiceID     string   `json:"voice_id"`
+	Description []string `json:"description"`
+	CreatedTime string   `json:"created_time"`
+}
+
+// BaseResp 基础响应
+type BaseResp struct {
+	StatusCode int    `json:"status_code"`
+	StatusMsg  string `json:"status_msg"`
+}
+
+// GetVoiceResp 获取音色列表响应
+type GetVoiceResp struct {
+	SystemVoice      []SystemVoice     `json:"system_voice"`
+	VoiceCloning    []VoiceCloning   `json:"voice_cloning"`
+	VoiceGeneration []VoiceGeneration `json:"voice_generation"`
+	BaseResp        BaseResp          `json:"base_resp"`
+}
+
+var (
+	// 支持的音频格式
+	formats = []types.VoiceFormat{
+		{Name: "MP3", Extension: ".mp3", MimeType: "audio/mpeg"},
+		{Name: "PCM", Extension: ".pcm", MimeType: "audio/pcm"},
+		{Name: "FLAC", Extension: ".flac", MimeType: "audio/flac"},
+		{Name: "WAV", Extension: ".wav", MimeType: "audio/wav"},
+	}
+)
+
+// HandleVoices 处理获取音色列表请求
+func HandleVoices(c echo.Context, options mo.Option[types.VoicesRequestOptions]) mo.Result[any] {
+	// 获取 token
+	token := strings.TrimPrefix(c.Request().Header.Get("Authorization"), "Bearer ")
+
+	// 构建请求
+	reqBody := GetVoiceReq{
+		VoiceType: "all",
+	}
+
+	jsonBytes, err := json.Marshal(reqBody)
+	if err != nil {
+		return mo.Err[any](apierrors.NewErrInternal().WithDetail(err.Error()).WithCaller())
+	}
+
+	// 创建请求
+	req, err := http.NewRequestWithContext(
+		c.Request().Context(),
+		http.MethodPost,
+		"https://api.minimaxi.com/v1/get_voice",
+		strings.NewReader(string(jsonBytes)),
+	)
+	if err != nil {
+		return mo.Err[any](apierrors.NewErrInternal().WithDetail(err.Error()).WithCaller())
+	}
+
+	// 设置请求头
+	req.Header.Set("Authorization", "Bearer "+token)
+	req.Header.Set("Content-Type", "application/json")
+
+	// 发送请求
+	resp, err := http.DefaultClient.Do(req)
+	if err != nil {
+		return mo.Err[any](apierrors.NewErrBadGateway().WithDetail(err.Error()).WithError(err).WithCaller())
+	}
+
+	defer func() { _ = resp.Body.Close() }()
+
+	// 检查 HTTP 状态码
+	if resp.StatusCode >= 400 && resp.StatusCode < 600 {
+		switch {
+		case strings.HasPrefix(resp.Header.Get("Content-Type"), "application/json"):
+			return mo.Err[any](apierrors.
+				NewUpstreamError(resp.StatusCode).
+				WithDetail(utils.NewJSONResponseError(resp.StatusCode, resp.Body).OrEmpty().Error()))
+		default:
+			slog.Warn("unknown upstream error with unknown Content-Type",
+				slog.Int("status", resp.StatusCode),
+				slog.String("content_type", resp.Header.Get("Content-Type")),
+			)
+		}
+	}
+
+	// 解析响应
+	var voiceResp GetVoiceResp
+	err = json.NewDecoder(resp.Body).Decode(&voiceResp)
+	if err != nil {
+		return mo.Err[any](apierrors.NewErrBadGateway().WithDetail(err.Error()).WithError(err).WithCaller())
+	}
+
+	// 检查业务状态码
+	if voiceResp.BaseResp.StatusCode != 0 {
+		return mo.Err[any](handleMinimaxError(voiceResp.BaseResp.StatusCode, voiceResp.BaseResp.StatusMsg))
+	}
+
+	// 转换音色列表
+	voices := make([]types.Voice, 0, len(voiceResp.SystemVoice)+len(voiceResp.VoiceCloning)+len(voiceResp.VoiceGeneration))
+
+	// 添加系统音色
+	for _, v := range voiceResp.SystemVoice {
+		voices = append(voices, types.Voice{
+			ID:          v.VoiceID,
+			Name:        v.VoiceName,
+			Description: strings.Join(v.Description, ", "),
+			Labels: map[string]any{
+				"type": "system",
+			},
+			Tags:              []string{"system"},
+			Languages:         []types.VoiceLanguage{{Title: "Auto", Code: "auto"}},
+			Formats:           formats,
+			CompatibleModels:  []string{"speech-2.8-turbo", "speech-2.8-hd", "speech-2.6-turbo", "speech-2.6-hd"},
+			PredefinedOptions: map[string]any{},
+		})
+	}
+
+	// 添加快速复刻音色
+	for _, v := range voiceResp.VoiceCloning {
+		voices = append(voices, types.Voice{
+			ID:          v.VoiceID,
+			Name:        v.VoiceID,
+			Description: strings.Join(v.Description, ", "),
+			Labels: map[string]any{
+				"type":        "voice_cloning",
+				"createdTime": v.CreatedTime,
+			},
+			Tags:              []string{"voice_cloning"},
+			Languages:         []types.VoiceLanguage{{Title: "Auto", Code: "auto"}},
+			Formats:           formats,
+			CompatibleModels:  []string{"speech-2.8-turbo", "speech-2.8-hd"},
+			PredefinedOptions: map[string]any{},
+		})
+	}
+
+	// 添加文生音色
+	for _, v := range voiceResp.VoiceGeneration {
+		voices = append(voices, types.Voice{
+			ID:          v.VoiceID,
+			Name:        v.VoiceID,
+			Description: strings.Join(v.Description, ", "),
+			Labels: map[string]any{
+				"type":        "voice_generation",
+				"createdTime": v.CreatedTime,
+			},
+			Tags:              []string{"voice_generation"},
+			Languages:         []types.VoiceLanguage{{Title: "Auto", Code: "auto"}},
+			Formats:           formats,
+			CompatibleModels:  []string{"speech-2.8-turbo", "speech-2.8-hd"},
+			PredefinedOptions: map[string]any{},
+		})
+	}
+
+	return mo.Ok[any](types.ListVoicesResponse{
+		Voices: voices,
+	})
+}

From 9bca8fcca9bd40690332c2db3c8a196270a35020 Mon Sep 17 00:00:00 2001
From: jhandsome <jhandsome06@gmail.com>
Date: Wed, 11 Mar 2026 03:15:20 +0800
Subject: [PATCH 4/9] feat(backend): add minimax routing

---
 pkg/backend/backend.go | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/pkg/backend/backend.go b/pkg/backend/backend.go
index bd93b1d..355e291 100644
--- a/pkg/backend/backend.go
+++ b/pkg/backend/backend.go
@@ -10,6 +10,7 @@ import (
 	"github.com/moeru-ai/unspeech/pkg/backend/elevenlabs"
 	"github.com/moeru-ai/unspeech/pkg/backend/koemotion"
 	"github.com/moeru-ai/unspeech/pkg/backend/microsoft"
+	"github.com/moeru-ai/unspeech/pkg/backend/minimax"
 	"github.com/moeru-ai/unspeech/pkg/backend/openai"
 	"github.com/moeru-ai/unspeech/pkg/backend/types"
 	"github.com/moeru-ai/unspeech/pkg/backend/volcengine"
@@ -37,6 +38,8 @@ func Speech(c echo.Context) mo.Result[any] {
 		return volcengine.HandleSpeech(c, utils.ResultToOption(options))
 	case "ali", "aliyun", "alibaba", "bailian", "alibaba-model-studio":
 		return alibaba.HandleSpeech(c, utils.ResultToOption(options))
+	case "minimax", "minimax-tts":
+		return minimax.HandleSpeech(c, utils.ResultToOption(options))
 	default:
 		return mo.Err[any](apierrors.NewErrBadRequest().WithDetail("unsupported backend"))
 	}
@@ -63,6 +66,8 @@ func Voices(c echo.Context) mo.Result[any] {
 		return volcengine.HandleVoices(c, utils.ResultToOption(options))
 	case "ali", "aliyun", "alibaba", "bailian", "alibaba-model-studio":
 		return alibaba.HandleVoices(c, utils.ResultToOption(options))
+	case "minimax", "minimax-tts":
+		return minimax.HandleVoices(c, utils.ResultToOption(options))
 	default:
 		return mo.Err[any](apierrors.NewErrBadRequest().WithDetail("unsupported backend"))
 	}

From 6994a581b94b3dcc324754ed49d7e9ed4e1823c3 Mon Sep 17 00:00:00 2001
From: jhandsome <jhandsome06@gmail.com>
Date: Wed, 11 Mar 2026 03:48:05 +0800
Subject: [PATCH 5/9] feat(sdk): add MiniMax TypeScript SDK support

---
 sdk/typescript/README.md              |   2 +
 sdk/typescript/src/backend/index.ts   |   4 +-
 sdk/typescript/src/backend/minimax.ts | 145 ++++++++++++++++++++++++++
 3 files changed, 150 insertions(+), 1 deletion(-)
 create mode 100644 sdk/typescript/src/backend/minimax.ts

diff --git a/sdk/typescript/README.md b/sdk/typescript/README.md
index 0892294..e76c998 100644
--- a/sdk/typescript/README.md
+++ b/sdk/typescript/README.md
@@ -51,6 +51,7 @@ import {
   createUnAlibabaCloud,
   createUnElevenLabs,
   createUnMicrosoft,
+  createUnMinimax,
   createUnSpeech,
   createUnVolcengine,
 } from 'unspeech'
@@ -62,6 +63,7 @@ When using
 - [Alibaba Cloud Model Studio / 阿里云百炼 / CosyVoice](https://www.alibabacloud.com/en/product/modelstudio)
 - [Volcano Engine / 火山引擎语音技术](https://www.volcengine.com/product/voice-tech)
 - [ElevenLabs](https://elevenlabs.io/docs/api-reference/text-to-speech/convert)
+- [MiniMax](https://platform.minimaxi.com/docs/guides/speech-t2a-http)
 
 providers, [SSML](https://learn.microsoft.com/en-us/azure/ai-services/speech-service/speech-synthesis-markup) is supported to control in fine grain level for pitch, volume, rate, etc.
 
diff --git a/sdk/typescript/src/backend/index.ts b/sdk/typescript/src/backend/index.ts
index 1ad2965..58b3abe 100644
--- a/sdk/typescript/src/backend/index.ts
+++ b/sdk/typescript/src/backend/index.ts
@@ -7,6 +7,7 @@ export * from './alibabacloud'
 export * from './deepgram'
 export * from './elevenlabs'
 export * from './microsoft'
+export * from './minimax'
 export * from './volcengine'
 
 /** @see {@link https://github.com/moeru-ai/unspeech} */
@@ -26,7 +27,7 @@ export function createUnSpeech(apiKey: string, baseURL = 'http://localhost:5933/
         | 'ali'
         | 'alibaba'
         | 'alibaba-model-studio'
-        | 'aliyun' | 'bailian' | 'deepgram' | 'elevenlabs' | 'koemotion' | 'openai'
+        | 'aliyun' | 'bailian' | 'deepgram' | 'elevenlabs' | 'koemotion' | 'minimax' | 'openai'
     }
   > = {
     voice: (options) => {
@@ -60,6 +61,7 @@ export function createUnSpeech(apiKey: string, baseURL = 'http://localhost:5933/
       | `deepgram/${string}`
       | `elevenlabs/${string}`
       | `koemotion/${string}`
+      | `minimax/${string}`
       | `openai/${string}`
       | `volcano/${string}`
       | `volcengine/${string}`,
diff --git a/sdk/typescript/src/backend/minimax.ts b/sdk/typescript/src/backend/minimax.ts
new file mode 100644
index 0000000..7246dbf
--- /dev/null
+++ b/sdk/typescript/src/backend/minimax.ts
@@ -0,0 +1,145 @@
+import type { SpeechProviderWithExtraOptions } from '@xsai-ext/providers/utils'
+
+import type { UnSpeechOptions, VoiceProviderWithExtraOptions } from '../types'
+
+import { merge } from '@xsai-ext/providers/utils'
+import { objCamelToSnake } from '@xsai/shared'
+
+/**
+ * MiniMax TTS API options
+ * @see https://platform.minimaxi.com/docs/guides/speech-t2a-http
+ */
+export interface UnMinimaxOptions {
+  /**
+   * Speech speed. Range: 0.5-2.0
+   * @default 1.0
+   */
+  speed?: number
+  /**
+   * Volume. Range: 0-10
+   * @default 1.0
+   */
+  vol?: number
+  /**
+   * Pitch adjustment. Range: -12 to 12
+   * @default 0
+   */
+  pitch?: number
+  /**
+   * Emotion setting
+   * @see https://platform.minimaxi.com/docs/guides/speech-t2a-http#emotion
+   * @example "happy" | "sad" | "angry" | "fearful" | "disgusted" | "surprised" | "calm" | "fluent" | "whisper"
+   */
+  emotion?: string
+  /**
+   * Enable streaming output
+   * @default false
+   */
+  stream?: boolean
+  /**
+   * Sample rate for audio output
+   * @example 8000 | 16000 | 22050 | 24000 | 32000 | 44100
+   */
+  sampleRate?: number
+  /**
+   * Audio bitrate
+   * @example 32000 | 64000 | 128000 | 256000
+   */
+  bitrate?: number
+  /**
+   * Audio format
+   * @example "mp3" | "pcm" | "flac" | "wav"
+   * @default "mp3"
+   */
+  format?: 'mp3' | 'pcm' | 'flac' | 'wav'
+  /**
+   * Audio channel
+   * @example 1 | 2
+   * @default 1
+   */
+  channel?: number
+}
+
+/**
+ * MiniMax TTS models
+ * @see https://platform.minimaxi.com/docs/guides/speech-t2a-http#model
+ */
+export type MinimaxModel =
+  | 'speech-2.8-hd'
+  | 'speech-2.8-turbo'
+  | 'speech-2.6-hd'
+  | 'speech-2.6-turbo'
+  | 'speech-02-hd'
+  | 'speech-02-turbo'
+  | 'speech-01-hd'
+  | 'speech-01-turbo'
+
+/**
+ * [MiniMax](https://platform.minimaxi.com/) provider for [UnSpeech](https://github.com/moeru-ai/unspeech)
+ *
+ * @param apiKey - MiniMax API Key
+ * @param baseURL - UnSpeech Instance URL
+ * @returns SpeechProviderWithExtraOptions & VoiceProviderWithExtraOptions
+ */
+export function createUnMinimax(apiKey: string, baseURL = 'http://localhost:5933/v1/') {
+  const toUnSpeechOptions = ({
+    speed,
+    vol,
+    pitch,
+    emotion,
+    stream,
+    sampleRate,
+    bitrate,
+    format,
+    channel,
+  }: UnMinimaxOptions): UnSpeechOptions => ({
+    extraBody: objCamelToSnake({
+      speed,
+      vol,
+      pitch,
+      emotion,
+      stream,
+      sampleRate,
+      bitrate,
+      format,
+      channel,
+    }),
+  })
+
+  const speechProvider: SpeechProviderWithExtraOptions<
+    `minimax/${MinimaxModel}`,
+    UnMinimaxOptions
+  > = {
+    speech: (model, options) => ({
+      ...(options ? toUnSpeechOptions(options) : {}),
+      apiKey,
+      baseURL,
+      model: `minimax/${model}`,
+    }),
+  }
+
+  const voiceProvider: VoiceProviderWithExtraOptions<
+    UnMinimaxOptions
+  > = {
+    voice: (options) => {
+      if (baseURL.endsWith('v1/')) {
+        baseURL = baseURL.slice(0, -3)
+      }
+      else if (baseURL.endsWith('v1')) {
+        baseURL = baseURL.slice(0, -2)
+      }
+
+      return {
+        query: 'provider=minimax',
+        ...(options ? toUnSpeechOptions(options) : {}),
+        apiKey,
+        baseURL,
+      }
+    },
+  }
+
+  return merge(
+    speechProvider,
+    voiceProvider,
+  )
+}

From 118fc313c466f29c26da1e680d0c200be822e15c Mon Sep 17 00:00:00 2001
From: jhandsome <jhandsome06@gmail.com>
Date: Wed, 11 Mar 2026 03:50:56 +0800
Subject: [PATCH 6/9] docs: add MiniMax to supported providers list

---
 README.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/README.md b/README.md
index c3c570a..3ee26d3 100644
--- a/README.md
+++ b/README.md
@@ -12,6 +12,7 @@ unSpeech lets you use various online TTS with OpenAI-compatible API.
 - [Alibaba Cloud Model Studio / 阿里云百炼 / CosyVoice](https://www.alibabacloud.com/en/product/modelstudio)
 - [Volcano Engine / 火山引擎语音技术](https://www.volcengine.com/product/voice-tech)
 - [ElevenLabs](https://elevenlabs.io/docs/api-reference/text-to-speech/convert)
+- [MiniMax](https://platform.minimaxi.com/docs/guides/speech-t2a-http)
 - [Koemotion (by Rinna)](https://koemotion.rinna.co.jp/)
 
 ## Getting Started

From 2cdca2e5fe3e3c3d420dae70b947b57cddfc61ae Mon Sep 17 00:00:00 2001
From: jhandsome <jhandsome06@gmail.com>
Date: Wed, 11 Mar 2026 04:01:28 +0800
Subject: [PATCH 7/9] fix: change Speed field type from int to float64

The OpenAI API accepts speed values from 0.25 to 4.0, which requires
a float type. Changed the Speed field to float64 to match the API
specification and allow decimal values.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 pkg/backend/types/types.go | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pkg/backend/types/types.go b/pkg/backend/types/types.go
index db2bcb3..f5e0f20 100644
--- a/pkg/backend/types/types.go
+++ b/pkg/backend/types/types.go
@@ -27,7 +27,7 @@ type OpenAISpeechRequestOptions struct {
 	// The speed of the generated audio.
 	// Select a value from 0.25 to 4.0.
 	// 1.0 is the default.
-	Speed int `json:"speed,omitempty"`
+	Speed float64 `json:"speed,omitempty"`
 
 	// Extension: allows you to add custom content to body.
 	ExtraBody map[string]any `json:"extra_body,omitempty"`

From ce021af4410e44567fad79bed67fd84e959efc75 Mon Sep 17 00:00:00 2001
From: jhandsome <jhandsome06@gmail.com>
Date: Wed, 11 Mar 2026 04:38:44 +0800
Subject: [PATCH 8/9] fix: address code review comments

- Translate Chinese comments to English
- Extract common functions (buildVoiceSettings, buildAudioSettings, getContentType, handleHTTPError)
- Fix TypeScript baseURL side effect using local variable
- Replace string() conversion with bytes.NewReader
---
 pkg/backend/minimax/speech.go         | 304 ++++++++++++--------------
 pkg/backend/minimax/voices.go         |  43 ++--
 sdk/typescript/src/backend/minimax.ts |  11 +-
 3 files changed, 164 insertions(+), 194 deletions(-)

diff --git a/pkg/backend/minimax/speech.go b/pkg/backend/minimax/speech.go
index d328549..d3ce5ad 100644
--- a/pkg/backend/minimax/speech.go
+++ b/pkg/backend/minimax/speech.go
@@ -4,6 +4,7 @@ import (
 	"bytes"
 	"encoding/hex"
 	"encoding/json"
+	"io"
 	"log/slog"
 	"net/http"
 	"strings"
@@ -12,11 +13,10 @@ import (
 	"github.com/moeru-ai/unspeech/pkg/apierrors"
 	"github.com/moeru-ai/unspeech/pkg/backend/types"
 	"github.com/moeru-ai/unspeech/pkg/utils"
-	"github.com/samber/lo"
 	"github.com/samber/mo"
 )
 
-// VoiceSetting MiniMax 音色设置
+// VoiceSetting MiniMax voice settings
 type VoiceSetting struct {
 	VoiceID           string  `json:"voice_id"`
 	Speed             float64 `json:"speed,omitempty"`
@@ -27,7 +27,7 @@ type VoiceSetting struct {
 	LatexRead         string  `json:"latex_read,omitempty"`
 }
 
-// AudioSetting MiniMax 音频设置
+// AudioSetting MiniMax audio settings
 type AudioSetting struct {
 	SampleRate int    `json:"sample_rate,omitempty"`
 	Bitrate    int    `json:"bitrate,omitempty"`
@@ -35,7 +35,7 @@ type AudioSetting struct {
 	Channel    int    `json:"channel,omitempty"`
 }
 
-// TTSRequest MiniMax TTS 请求
+// TTSRequest MiniMax TTS request
 type TTSRequest struct {
 	Model        string        `json:"model"`
 	Text         string        `json:"text"`
@@ -45,14 +45,14 @@ type TTSRequest struct {
 	OutputFormat string        `json:"output_format,omitempty"`
 }
 
-// TTSResponseData MiniMax TTS 响应数据
+// TTSResponseData MiniMax TTS response data
 type TTSResponseData struct {
 	Audio        string `json:"audio"`
 	SubtitleFile string `json:"subtitle_file,omitempty"`
 	Status       int    `json:"status"`
 }
 
-// TTSResponseExtraInfo MiniMax TTS 响应额外信息
+// TTSResponseExtraInfo MiniMax TTS response extra info
 type TTSResponseExtraInfo struct {
 	AudioLength     int    `json:"audio_length"`
 	AudioSampleRate int    `json:"audio_sample_rate"`
@@ -64,13 +64,13 @@ type TTSResponseExtraInfo struct {
 	UsageCharacters int    `json:"usage_characters"`
 }
 
-// TTSResponseBaseResp MiniMax TTS 响应基础信息
+// TTSResponseBaseResp MiniMax TTS response base info
 type TTSResponseBaseResp struct {
 	StatusCode int    `json:"status_code"`
 	StatusMsg  string `json:"status_msg"`
 }
 
-// TTSResponse MiniMax TTS 响应
+// TTSResponse MiniMax TTS response
 type TTSResponse struct {
 	Data      TTSResponseData      `json:"data"`
 	TraceID   string              `json:"trace_id"`
@@ -78,22 +78,22 @@ type TTSResponse struct {
 	BaseResp  TTSResponseBaseResp  `json:"base_resp"`
 }
 
-// HandleSpeech 处理 MiniMax TTS 请求
+// HandleSpeech handles MiniMax TTS requests
 func HandleSpeech(c echo.Context, options mo.Option[types.SpeechRequestOptions]) mo.Result[any] {
 	opts := options.MustGet()
 
-	// 获取 token
+	// Get token
 	token := strings.TrimPrefix(c.Request().Header.Get("Authorization"), "Bearer ")
 
-	// 从 ExtraBody 获取 stream 参数
+	// Get stream parameter from ExtraBody
 	stream := utils.GetByJSONPath[bool](opts.ExtraBody, "{ .stream }")
 
-	// 如果是流式请求，使用流式处理
+	// If streaming, use streaming handler
 	if stream {
 		return handleStreamingSpeech(c, token, opts)
 	}
 
-	// 构建 MiniMax 请求
+	// Build MiniMax request
 	reqBody := TTSRequest{
 		Model:        opts.Model,
 		Text:         opts.Input,
@@ -101,70 +101,26 @@ func HandleSpeech(c echo.Context, options mo.Option[types.SpeechRequestOptions])
 		OutputFormat: "hex",
 	}
 
-	// 设置 voice_id（从用户传入的 voice 字段）
-	voiceID := opts.Voice
-	if voiceID != "" {
+	// Set voice_id from user input
+	if opts.Voice != "" {
 		reqBody.VoiceSetting = &VoiceSetting{
-			VoiceID: voiceID,
+			VoiceID: opts.Voice,
 		}
 	}
 
-	// 从 ExtraBody 获取其他参数
-	if speed := utils.GetByJSONPath[*float64](opts.ExtraBody, "{ .speed }"); speed != nil {
-		if reqBody.VoiceSetting == nil {
-			reqBody.VoiceSetting = &VoiceSetting{}
-		}
-		reqBody.VoiceSetting.Speed = *speed
-	}
+	// Build voice settings from ExtraBody
+	buildVoiceSettings(opts.ExtraBody, &reqBody)
 
-	if vol := utils.GetByJSONPath[*float64](opts.ExtraBody, "{ .vol }"); vol != nil {
-		if reqBody.VoiceSetting == nil {
-			reqBody.VoiceSetting = &VoiceSetting{}
-		}
-		reqBody.VoiceSetting.Vol = *vol
-	}
+	// Build audio settings from ExtraBody
+	buildAudioSettings(opts.ExtraBody, &reqBody)
 
-	if pitch := utils.GetByJSONPath[*float64](opts.ExtraBody, "{ .pitch }"); pitch != nil {
-		if reqBody.VoiceSetting == nil {
-			reqBody.VoiceSetting = &VoiceSetting{}
-		}
-		reqBody.VoiceSetting.Pitch = *pitch
-	}
-
-	if emotion := utils.GetByJSONPath[*string](opts.ExtraBody, "{ .emotion }"); emotion != nil {
-		if reqBody.VoiceSetting == nil {
-			reqBody.VoiceSetting = &VoiceSetting{}
-		}
-		reqBody.VoiceSetting.Emotion = *emotion
-	}
-
-	// 音频设置
-	if sampleRate := utils.GetByJSONPath[*int](opts.ExtraBody, "{ .sample_rate }"); sampleRate != nil {
-		reqBody.AudioSetting = &AudioSetting{}
-		reqBody.AudioSetting.SampleRate = *sampleRate
-	}
-
-	if bitrate := utils.GetByJSONPath[*int](opts.ExtraBody, "{ .bitrate }"); bitrate != nil {
-		if reqBody.AudioSetting == nil {
-			reqBody.AudioSetting = &AudioSetting{}
-		}
-		reqBody.AudioSetting.Bitrate = *bitrate
-	}
-
-	if format := utils.GetByJSONPath[*string](opts.ExtraBody, "{ .format }"); format != nil {
-		if reqBody.AudioSetting == nil {
-			reqBody.AudioSetting = &AudioSetting{}
-		}
-		reqBody.AudioSetting.Format = *format
-	}
-
-	// 序列化请求体
+	// Serialize request body
 	jsonBytes, err := json.Marshal(reqBody)
 	if err != nil {
 		return mo.Err[any](apierrors.NewErrInternal().WithDetail(err.Error()).WithCaller())
 	}
 
-	// 创建请求
+	// Create request
 	req, err := http.NewRequestWithContext(
 		c.Request().Context(),
 		http.MethodPost,
@@ -175,11 +131,11 @@ func HandleSpeech(c echo.Context, options mo.Option[types.SpeechRequestOptions])
 		return mo.Err[any](apierrors.NewErrInternal().WithDetail(err.Error()).WithCaller())
 	}
 
-	// 设置请求头
+	// Set request headers
 	req.Header.Set("Authorization", "Bearer "+token)
 	req.Header.Set("Content-Type", "application/json")
 
-	// 发送请求
+	// Send request
 	resp, err := http.DefaultClient.Do(req)
 	if err != nil {
 		return mo.Err[any](apierrors.NewErrBadGateway().WithDetail(err.Error()).WithError(err).WithCaller())
@@ -187,146 +143,173 @@ func HandleSpeech(c echo.Context, options mo.Option[types.SpeechRequestOptions])
 
 	defer func() { _ = resp.Body.Close() }()
 
-	// 检查 HTTP 状态码
+	// Check HTTP status code
 	if resp.StatusCode >= 400 && resp.StatusCode < 600 {
-		switch {
-		case strings.HasPrefix(resp.Header.Get("Content-Type"), "application/json"):
-			return mo.Err[any](apierrors.
-				NewUpstreamError(resp.StatusCode).
-				WithDetail(utils.NewJSONResponseError(resp.StatusCode, resp.Body).OrEmpty().Error()))
-		case strings.HasPrefix(resp.Header.Get("Content-Type"), "text/"):
-			return mo.Err[any](apierrors.
-				NewUpstreamError(resp.StatusCode).
-				WithDetail(utils.NewTextResponseError(resp.StatusCode, resp.Body).OrEmpty().Error()))
-		default:
-			slog.Warn("unknown upstream error with unknown Content-Type",
-				slog.Int("status", resp.StatusCode),
-				slog.String("content_type", resp.Header.Get("Content-Type")),
-			)
-		}
+		return handleHTTPError(resp)
 	}
 
-	// 解析响应
+	// Parse response
 	var ttsResp TTSResponse
 	err = json.NewDecoder(resp.Body).Decode(&ttsResp)
 	if err != nil {
 		return mo.Err[any](apierrors.NewErrBadGateway().WithDetail(err.Error()).WithError(err).WithCaller())
 	}
 
-	// 检查业务状态码
+	// Check business status code
 	if ttsResp.BaseResp.StatusCode != 0 {
 		return mo.Err[any](handleMinimaxError(ttsResp.BaseResp.StatusCode, ttsResp.BaseResp.StatusMsg))
 	}
 
-	// 解码 hex 音频
+	// Decode hex audio
 	audioBytes, err := hex.DecodeString(ttsResp.Data.Audio)
 	if err != nil {
 		return mo.Err[any](apierrors.NewErrInternal().WithDetail("failed to decode hex audio: "+err.Error()).WithError(err).WithCaller())
 	}
 
-	// 确定 Content-Type
-	contentType := "audio/mp3"
-	if reqBody.AudioSetting != nil && reqBody.AudioSetting.Format != "" {
-		contentType = lo.Ternary(reqBody.AudioSetting.Format == "pcm", "audio/pcm",
-			lo.Ternary(reqBody.AudioSetting.Format == "wav", "audio/wav",
-				lo.Ternary(reqBody.AudioSetting.Format == "flac", "audio/flac", "audio/mp3")))
-	}
+	// Determine content type
+	contentType := getContentType(reqBody.AudioSetting)
 
 	return mo.Ok[any](c.Blob(http.StatusOK, contentType, audioBytes))
 }
 
-// handleMinimaxError 处理 MiniMax 错误码
-func handleMinimaxError(code int, msg string) *apierrors.Error {
-	var httpStatus int
-	switch code {
-	case 1004: // 鉴权失败
-		httpStatus = http.StatusUnauthorized
-	case 1002, 1039: // 限流
-		httpStatus = http.StatusTooManyRequests
-	case 1042, 2013: // 参数错误
-		httpStatus = http.StatusBadRequest
-	case 1001: // 超时
-		httpStatus = http.StatusGatewayTimeout
-	default:
-		httpStatus = http.StatusBadGateway
-	}
-	return apierrors.NewUpstreamError(httpStatus).WithDetailf("minimax error: %d - %s", code, msg)
-}
-
-// handleStreamingSpeech 处理流式 TTS 请求
-func handleStreamingSpeech(c echo.Context, token string, opts types.SpeechRequestOptions) mo.Result[any] {
-	// 构建 MiniMax 请求
-	reqBody := TTSRequest{
-		Model:        opts.Model,
-		Text:         opts.Input,
-		Stream:       true,
-		OutputFormat: "hex",
-	}
-
-	// 设置 voice_id
-	voiceID := opts.Voice
-	if voiceID != "" {
-		reqBody.VoiceSetting = &VoiceSetting{
-			VoiceID: voiceID,
-		}
-	}
-
-	// 从 ExtraBody 获取其他参数
-	if speed := utils.GetByJSONPath[*float64](opts.ExtraBody, "{ .speed }"); speed != nil {
+// buildVoiceSettings builds VoiceSetting from ExtraBody
+func buildVoiceSettings(extraBody map[string]any, reqBody *TTSRequest) {
+	if speed := utils.GetByJSONPath[*float64](extraBody, "{ .speed }"); speed != nil {
 		if reqBody.VoiceSetting == nil {
 			reqBody.VoiceSetting = &VoiceSetting{}
 		}
 		reqBody.VoiceSetting.Speed = *speed
 	}
 
-	if vol := utils.GetByJSONPath[*float64](opts.ExtraBody, "{ .vol }"); vol != nil {
+	if vol := utils.GetByJSONPath[*float64](extraBody, "{ .vol }"); vol != nil {
 		if reqBody.VoiceSetting == nil {
 			reqBody.VoiceSetting = &VoiceSetting{}
 		}
 		reqBody.VoiceSetting.Vol = *vol
 	}
 
-	if pitch := utils.GetByJSONPath[*float64](opts.ExtraBody, "{ .pitch }"); pitch != nil {
+	if pitch := utils.GetByJSONPath[*float64](extraBody, "{ .pitch }"); pitch != nil {
 		if reqBody.VoiceSetting == nil {
 			reqBody.VoiceSetting = &VoiceSetting{}
 		}
 		reqBody.VoiceSetting.Pitch = *pitch
 	}
 
-	if emotion := utils.GetByJSONPath[*string](opts.ExtraBody, "{ .emotion }"); emotion != nil {
+	if emotion := utils.GetByJSONPath[*string](extraBody, "{ .emotion }"); emotion != nil {
 		if reqBody.VoiceSetting == nil {
 			reqBody.VoiceSetting = &VoiceSetting{}
 		}
 		reqBody.VoiceSetting.Emotion = *emotion
 	}
+}
 
-	// 音频设置
-	if sampleRate := utils.GetByJSONPath[*int](opts.ExtraBody, "{ .sample_rate }"); sampleRate != nil {
+// buildAudioSettings builds AudioSetting from ExtraBody
+func buildAudioSettings(extraBody map[string]any, reqBody *TTSRequest) {
+	if sampleRate := utils.GetByJSONPath[*int](extraBody, "{ .sample_rate }"); sampleRate != nil {
 		reqBody.AudioSetting = &AudioSetting{}
 		reqBody.AudioSetting.SampleRate = *sampleRate
 	}
 
-	if bitrate := utils.GetByJSONPath[*int](opts.ExtraBody, "{ .bitrate }"); bitrate != nil {
+	if bitrate := utils.GetByJSONPath[*int](extraBody, "{ .bitrate }"); bitrate != nil {
 		if reqBody.AudioSetting == nil {
 			reqBody.AudioSetting = &AudioSetting{}
 		}
 		reqBody.AudioSetting.Bitrate = *bitrate
 	}
 
-	if format := utils.GetByJSONPath[*string](opts.ExtraBody, "{ .format }"); format != nil {
+	if format := utils.GetByJSONPath[*string](extraBody, "{ .format }"); format != nil {
 		if reqBody.AudioSetting == nil {
 			reqBody.AudioSetting = &AudioSetting{}
 		}
 		reqBody.AudioSetting.Format = *format
 	}
+}
 
-	// 序列化请求体
+// getContentType returns MIME type based on audio format
+func getContentType(audioSetting *AudioSetting) string {
+	if audioSetting == nil || audioSetting.Format == "" {
+		return "audio/mp3"
+	}
+
+	contentTypes := map[string]string{
+		"pcm":  "audio/pcm",
+		"wav":  "audio/wav",
+		"flac": "audio/flac",
+		"mp3":  "audio/mp3",
+	}
+
+	if ct, ok := contentTypes[audioSetting.Format]; ok {
+		return ct
+	}
+	return "audio/mp3"
+}
+
+// handleHTTPError handles HTTP errors from upstream
+func handleHTTPError(resp *http.Response) mo.Result[any] {
+	switch {
+	case strings.HasPrefix(resp.Header.Get("Content-Type"), "application/json"):
+		return mo.Err[any](apierrors.
+			NewUpstreamError(resp.StatusCode).
+			WithDetail(utils.NewJSONResponseError(resp.StatusCode, resp.Body).OrEmpty().Error()))
+	case strings.HasPrefix(resp.Header.Get("Content-Type"), "text/"):
+		return mo.Err[any](apierrors.
+			NewUpstreamError(resp.StatusCode).
+			WithDetail(utils.NewTextResponseError(resp.StatusCode, resp.Body).OrEmpty().Error()))
+	default:
+		slog.Warn("unknown upstream error with unknown Content-Type",
+			slog.Int("status", resp.StatusCode),
+			slog.String("content_type", resp.Header.Get("Content-Type")),
+		)
+	}
+	return mo.Err[any](apierrors.NewUpstreamError(resp.StatusCode))
+}
+
+// handleMinimaxError handles MiniMax error codes
+func handleMinimaxError(code int, msg string) *apierrors.Error {
+	var httpStatus int
+	switch code {
+	case 1004: // Auth failed
+		httpStatus = http.StatusUnauthorized
+	case 1002, 1039: // Rate limit
+		httpStatus = http.StatusTooManyRequests
+	case 1042, 2013: // Invalid parameter
+		httpStatus = http.StatusBadRequest
+	case 1001: // Timeout
+		httpStatus = http.StatusGatewayTimeout
+	default:
+		httpStatus = http.StatusBadGateway
+	}
+	return apierrors.NewUpstreamError(httpStatus).WithDetailf("minimax error: %d - %s", code, msg)
+}
+
+// handleStreamingSpeech handles streaming TTS requests
+func handleStreamingSpeech(c echo.Context, token string, opts types.SpeechRequestOptions) mo.Result[any] {
+	// Build MiniMax request
+	reqBody := TTSRequest{
+		Model:        opts.Model,
+		Text:         opts.Input,
+		Stream:       true,
+		OutputFormat: "hex",
+	}
+
+	// Set voice_id
+	if opts.Voice != "" {
+		reqBody.VoiceSetting = &VoiceSetting{
+			VoiceID: opts.Voice,
+		}
+	}
+
+	// Build settings from ExtraBody
+	buildVoiceSettings(opts.ExtraBody, &reqBody)
+	buildAudioSettings(opts.ExtraBody, &reqBody)
+
+	// Serialize request body
 	jsonBytes, err := json.Marshal(reqBody)
 	if err != nil {
 		return mo.Err[any](apierrors.NewErrInternal().WithDetail(err.Error()).WithCaller())
 	}
 
-	// 创建请求
+	// Create request
 	req, err := http.NewRequestWithContext(
 		c.Request().Context(),
 		http.MethodPost,
@@ -337,11 +320,11 @@ func handleStreamingSpeech(c echo.Context, token string, opts types.SpeechReques
 		return mo.Err[any](apierrors.NewErrInternal().WithDetail(err.Error()).WithCaller())
 	}
 
-	// 设置请求头
+	// Set request headers
 	req.Header.Set("Authorization", "Bearer "+token)
 	req.Header.Set("Content-Type", "application/json")
 
-	// 发送请求
+	// Send request
 	resp, err := http.DefaultClient.Do(req)
 	if err != nil {
 		return mo.Err[any](apierrors.NewErrBadGateway().WithDetail(err.Error()).WithError(err).WithCaller())
@@ -349,61 +332,46 @@ func handleStreamingSpeech(c echo.Context, token string, opts types.SpeechReques
 
 	defer func() { _ = resp.Body.Close() }()
 
-	// 检查 HTTP 状态码
+	// Check HTTP status code
 	if resp.StatusCode >= 400 && resp.StatusCode < 600 {
-		switch {
-		case strings.HasPrefix(resp.Header.Get("Content-Type"), "application/json"):
-			return mo.Err[any](apierrors.
-				NewUpstreamError(resp.StatusCode).
-				WithDetail(utils.NewJSONResponseError(resp.StatusCode, resp.Body).OrEmpty().Error()))
-		default:
-			slog.Warn("unknown upstream error with unknown Content-Type",
-				slog.Int("status", resp.StatusCode),
-				slog.String("content_type", resp.Header.Get("Content-Type")),
-			)
-		}
+		return handleHTTPError(resp)
 	}
 
-	// 流式处理：持续读取响应直到 status == 2
+	// Streaming: read response until status == 2
 	decoder := json.NewDecoder(resp.Body)
 	audioHex := new(strings.Builder)
 
 	for {
 		var ttsResp TTSResponse
 		if err := decoder.Decode(&ttsResp); err != nil {
-			if err.Error() == "EOF" {
+			if err == io.EOF {
 				break
 			}
 			return mo.Err[any](apierrors.NewErrBadGateway().WithDetail(err.Error()).WithError(err).WithCaller())
 		}
 
-		// 检查业务状态码
+		// Check business status code
 		if ttsResp.BaseResp.StatusCode != 0 {
 			return mo.Err[any](handleMinimaxError(ttsResp.BaseResp.StatusCode, ttsResp.BaseResp.StatusMsg))
 		}
 
-		// 追加音频数据
+		// Append audio data
 		audioHex.WriteString(ttsResp.Data.Audio)
 
-		// status == 2 表示合成结束
+		// status == 2 means synthesis complete
 		if ttsResp.Data.Status == 2 {
 			break
 		}
 	}
 
-	// 解码 hex 音频
+	// Decode hex audio
 	audioBytes, err := hex.DecodeString(audioHex.String())
 	if err != nil {
 		return mo.Err[any](apierrors.NewErrInternal().WithDetail("failed to decode hex audio: "+err.Error()).WithError(err).WithCaller())
 	}
 
-	// 确定 Content-Type
-	contentType := "audio/mp3"
-	if reqBody.AudioSetting != nil && reqBody.AudioSetting.Format != "" {
-		contentType = lo.Ternary(reqBody.AudioSetting.Format == "pcm", "audio/pcm",
-			lo.Ternary(reqBody.AudioSetting.Format == "wav", "audio/wav",
-				lo.Ternary(reqBody.AudioSetting.Format == "flac", "audio/flac", "audio/mp3")))
-	}
+	// Determine content type
+	contentType := getContentType(reqBody.AudioSetting)
 
 	return mo.Ok[any](c.Blob(http.StatusOK, contentType, audioBytes))
 }
diff --git a/pkg/backend/minimax/voices.go b/pkg/backend/minimax/voices.go
index 9603c72..6e37afd 100644
--- a/pkg/backend/minimax/voices.go
+++ b/pkg/backend/minimax/voices.go
@@ -1,6 +1,7 @@
 package minimax
 
 import (
+	"bytes"
 	"encoding/json"
 	"log/slog"
 	"net/http"
@@ -13,39 +14,39 @@ import (
 	"github.com/samber/mo"
 )
 
-// GetVoiceReq 获取音色列表请求
+// GetVoiceReq Request for getting voice list
 type GetVoiceReq struct {
 	VoiceType string `json:"voice_type"`
 }
 
-// SystemVoice 系统音色
+// SystemVoice System voice
 type SystemVoice struct {
 	VoiceID     string   `json:"voice_id"`
 	VoiceName   string   `json:"voice_name"`
 	Description []string `json:"description"`
 }
 
-// VoiceCloning 快速复刻音色
+// VoiceCloning Voice cloning
 type VoiceCloning struct {
 	VoiceID     string   `json:"voice_id"`
 	Description []string `json:"description"`
 	CreatedTime string   `json:"created_time"`
 }
 
-// VoiceGeneration 文生音色
+// VoiceGeneration Voice generation
 type VoiceGeneration struct {
 	VoiceID     string   `json:"voice_id"`
 	Description []string `json:"description"`
 	CreatedTime string   `json:"created_time"`
 }
 
-// BaseResp 基础响应
+// BaseResp Base response
 type BaseResp struct {
 	StatusCode int    `json:"status_code"`
 	StatusMsg  string `json:"status_msg"`
 }
 
-// GetVoiceResp 获取音色列表响应
+// GetVoiceResp Response for getting voice list
 type GetVoiceResp struct {
 	SystemVoice      []SystemVoice     `json:"system_voice"`
 	VoiceCloning    []VoiceCloning   `json:"voice_cloning"`
@@ -54,7 +55,7 @@ type GetVoiceResp struct {
 }
 
 var (
-	// 支持的音频格式
+	// Supported audio formats
 	formats = []types.VoiceFormat{
 		{Name: "MP3", Extension: ".mp3", MimeType: "audio/mpeg"},
 		{Name: "PCM", Extension: ".pcm", MimeType: "audio/pcm"},
@@ -63,12 +64,12 @@ var (
 	}
 )
 
-// HandleVoices 处理获取音色列表请求
+// HandleVoices handles getting voice list requests
 func HandleVoices(c echo.Context, options mo.Option[types.VoicesRequestOptions]) mo.Result[any] {
-	// 获取 token
+	// Get token
 	token := strings.TrimPrefix(c.Request().Header.Get("Authorization"), "Bearer ")
 
-	// 构建请求
+	// Build request
 	reqBody := GetVoiceReq{
 		VoiceType: "all",
 	}
@@ -78,22 +79,22 @@ func HandleVoices(c echo.Context, options mo.Option[types.VoicesRequestOptions])
 		return mo.Err[any](apierrors.NewErrInternal().WithDetail(err.Error()).WithCaller())
 	}
 
-	// 创建请求
+	// Create request
 	req, err := http.NewRequestWithContext(
 		c.Request().Context(),
 		http.MethodPost,
 		"https://api.minimaxi.com/v1/get_voice",
-		strings.NewReader(string(jsonBytes)),
+		bytes.NewReader(jsonBytes),
 	)
 	if err != nil {
 		return mo.Err[any](apierrors.NewErrInternal().WithDetail(err.Error()).WithCaller())
 	}
 
-	// 设置请求头
+	// Set request headers
 	req.Header.Set("Authorization", "Bearer "+token)
 	req.Header.Set("Content-Type", "application/json")
 
-	// 发送请求
+	// Send request
 	resp, err := http.DefaultClient.Do(req)
 	if err != nil {
 		return mo.Err[any](apierrors.NewErrBadGateway().WithDetail(err.Error()).WithError(err).WithCaller())
@@ -101,7 +102,7 @@ func HandleVoices(c echo.Context, options mo.Option[types.VoicesRequestOptions])
 
 	defer func() { _ = resp.Body.Close() }()
 
-	// 检查 HTTP 状态码
+	// Check HTTP status code
 	if resp.StatusCode >= 400 && resp.StatusCode < 600 {
 		switch {
 		case strings.HasPrefix(resp.Header.Get("Content-Type"), "application/json"):
@@ -116,22 +117,22 @@ func HandleVoices(c echo.Context, options mo.Option[types.VoicesRequestOptions])
 		}
 	}
 
-	// 解析响应
+	// Parse response
 	var voiceResp GetVoiceResp
 	err = json.NewDecoder(resp.Body).Decode(&voiceResp)
 	if err != nil {
 		return mo.Err[any](apierrors.NewErrBadGateway().WithDetail(err.Error()).WithError(err).WithCaller())
 	}
 
-	// 检查业务状态码
+	// Check business status code
 	if voiceResp.BaseResp.StatusCode != 0 {
 		return mo.Err[any](handleMinimaxError(voiceResp.BaseResp.StatusCode, voiceResp.BaseResp.StatusMsg))
 	}
 
-	// 转换音色列表
+	// Convert voice list
 	voices := make([]types.Voice, 0, len(voiceResp.SystemVoice)+len(voiceResp.VoiceCloning)+len(voiceResp.VoiceGeneration))
 
-	// 添加系统音色
+	// Add system voices
 	for _, v := range voiceResp.SystemVoice {
 		voices = append(voices, types.Voice{
 			ID:          v.VoiceID,
@@ -148,7 +149,7 @@ func HandleVoices(c echo.Context, options mo.Option[types.VoicesRequestOptions])
 		})
 	}
 
-	// 添加快速复刻音色
+	// Add voice cloning voices
 	for _, v := range voiceResp.VoiceCloning {
 		voices = append(voices, types.Voice{
 			ID:          v.VoiceID,
@@ -166,7 +167,7 @@ func HandleVoices(c echo.Context, options mo.Option[types.VoicesRequestOptions])
 		})
 	}
 
-	// 添加文生音色
+	// Add voice generation voices
 	for _, v := range voiceResp.VoiceGeneration {
 		voices = append(voices, types.Voice{
 			ID:          v.VoiceID,
diff --git a/sdk/typescript/src/backend/minimax.ts b/sdk/typescript/src/backend/minimax.ts
index 7246dbf..482553e 100644
--- a/sdk/typescript/src/backend/minimax.ts
+++ b/sdk/typescript/src/backend/minimax.ts
@@ -122,18 +122,19 @@ export function createUnMinimax(apiKey: string, baseURL = 'http://localhost:5933
     UnMinimaxOptions
   > = {
     voice: (options) => {
-      if (baseURL.endsWith('v1/')) {
-        baseURL = baseURL.slice(0, -3)
+      let adjustedBaseURL = baseURL
+      if (adjustedBaseURL.endsWith('v1/')) {
+        adjustedBaseURL = adjustedBaseURL.slice(0, -3)
       }
-      else if (baseURL.endsWith('v1')) {
-        baseURL = baseURL.slice(0, -2)
+      else if (adjustedBaseURL.endsWith('v1')) {
+        adjustedBaseURL = adjustedBaseURL.slice(0, -2)
       }
 
       return {
         query: 'provider=minimax',
         ...(options ? toUnSpeechOptions(options) : {}),
         apiKey,
-        baseURL,
+        baseURL: adjustedBaseURL,
       }
     },
   }

From 21b9a2792921e75dff8f66b38f244ea0f0d30213 Mon Sep 17 00:00:00 2001
From: jhandsome <jhandsome06@gmail.com>
Date: Wed, 11 Mar 2026 20:41:33 +0800
Subject: [PATCH 9/9] feat(sdk): export createUnMinimax from TypeScript SDK

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 sdk/typescript/src/index.ts | 1 +
 1 file changed, 1 insertion(+)

diff --git a/sdk/typescript/src/index.ts b/sdk/typescript/src/index.ts
index 0a43b7b..3e41b5f 100644
--- a/sdk/typescript/src/index.ts
+++ b/sdk/typescript/src/index.ts
@@ -4,6 +4,7 @@ export {
   createUnDeepgram,
   createUnElevenLabs,
   createUnMicrosoft,
+  createUnMinimax,
   createUnSpeech,
   createUnVolcengine,
 } from './backend'