diff --git a/.gitignore b/.gitignore index cd18f3a6..d14f5668 100644 --- a/.gitignore +++ b/.gitignore @@ -9,3 +9,4 @@ dist vhs .idea/ .vscode/ +vhs-dev diff --git a/command.go b/command.go index 4e5b0d06..def4fe71 100644 --- a/command.go +++ b/command.go @@ -70,6 +70,7 @@ var CommandFuncs = map[parser.CommandType]CommandFunc{ token.PASTE: ExecutePaste, token.ENV: ExecuteEnv, token.WAIT: ExecuteWait, + token.SUBTITLE: ExecuteSubtitle, } // ExecuteNoop is a no-op command that does nothing. @@ -476,9 +477,16 @@ var Settings = map[string]CommandFunc{ "WindowBar": ExecuteSetWindowBar, "WindowBarSize": ExecuteSetWindowBarSize, "BorderRadius": ExecuteSetBorderRadius, - "WaitPattern": ExecuteSetWaitPattern, - "WaitTimeout": ExecuteSetWaitTimeout, - "CursorBlink": ExecuteSetCursorBlink, + "WaitPattern": ExecuteSetWaitPattern, + "WaitTimeout": ExecuteSetWaitTimeout, + "CursorBlink": ExecuteSetCursorBlink, + "SubtitleFontSize": ExecuteSetSubtitleFontSize, + "SubtitleFontFamily": ExecuteSetSubtitleFontFamily, + "SubtitleColor": ExecuteSetSubtitleColor, + "SubtitleBackground": ExecuteSetSubtitleBackground, + "SubtitlePosition": ExecuteSetSubtitlePosition, + "SubtitlePadding": ExecuteSetSubtitlePadding, + "SubtitleBorderRadius": ExecuteSetSubtitleBorderRadius, } // ExecuteSet applies the settings on the running vhs specified by the @@ -773,3 +781,67 @@ func getJSONTheme(s string) (Theme, error) { } return t, nil } + +// ExecuteSubtitle stores subtitle state. The actual rendering happens in the +// Record loop where we draw onto the overlay canvas before each frame capture. +func ExecuteSubtitle(c parser.Command, v *VHS) error { + v.mu.Lock() + defer v.mu.Unlock() + v.subtitleText = c.Args + v.hasOverlay = true + return nil +} + +// Subtitle setting executors + +func ExecuteSetSubtitleFontSize(c parser.Command, v *VHS) error { + fontSize, err := strconv.Atoi(c.Args) + if err != nil { + return fmt.Errorf("invalid SubtitleFontSize: %s", c.Args) + } + v.Options.Subtitle.FontSize = fontSize + return nil +} + +func ExecuteSetSubtitleFontFamily(c parser.Command, v *VHS) error { + v.Options.Subtitle.FontFamily = c.Args + return nil +} + +func ExecuteSetSubtitleColor(c parser.Command, v *VHS) error { + v.Options.Subtitle.Color = c.Args + return nil +} + +func ExecuteSetSubtitleBackground(c parser.Command, v *VHS) error { + v.Options.Subtitle.Background = c.Args + return nil +} + +func ExecuteSetSubtitlePosition(c parser.Command, v *VHS) error { + pos := strings.ToLower(c.Args) + if pos != "top" && pos != "center" && pos != "bottom" { + return fmt.Errorf("invalid SubtitlePosition: %s (expected top, center, or bottom)", c.Args) + } + v.Options.Subtitle.Position = pos + return nil +} + +func ExecuteSetSubtitlePadding(c parser.Command, v *VHS) error { + padding, err := strconv.Atoi(c.Args) + if err != nil { + return fmt.Errorf("invalid SubtitlePadding: %s", c.Args) + } + v.Options.Subtitle.Padding = padding + return nil +} + +func ExecuteSetSubtitleBorderRadius(c parser.Command, v *VHS) error { + radius, err := strconv.Atoi(c.Args) + if err != nil { + return fmt.Errorf("invalid SubtitleBorderRadius: %s", c.Args) + } + v.Options.Subtitle.BorderRadius = radius + return nil +} + diff --git a/command_test.go b/command_test.go index 19ddc98a..0c811afd 100644 --- a/command_test.go +++ b/command_test.go @@ -8,12 +8,12 @@ import ( ) func TestCommand(t *testing.T) { - const numberOfCommands = 31 + const numberOfCommands = 32 if len(parser.CommandTypes) != numberOfCommands { t.Errorf("Expected %d commands, got %d", numberOfCommands, len(parser.CommandTypes)) } - const numberOfCommandFuncs = 31 + const numberOfCommandFuncs = 32 if len(CommandFuncs) != numberOfCommandFuncs { t.Errorf("Expected %d commands, got %d", numberOfCommandFuncs, len(CommandFuncs)) } diff --git a/evaluator.go b/evaluator.go index 5768b854..08c2b71d 100644 --- a/evaluator.go +++ b/evaluator.go @@ -110,6 +110,14 @@ func Evaluate(ctx context.Context, tape string, out io.Writer, opts ...Evaluator } } + // Check if any Subtitle commands exist — if so, enable overlay stream + for _, cmd := range cmds { + if cmd.Type == token.SUBTITLE { + v.hasOverlay = true + break + } + } + // Begin recording frames as we are now in a recording state. ctx, cancel := context.WithCancel(ctx) //nolint:gosec ch := v.Record(ctx) diff --git a/ffmpeg.go b/ffmpeg.go index d35823af..c707b7dc 100644 --- a/ffmpeg.go +++ b/ffmpeg.go @@ -168,6 +168,22 @@ func (fb *FilterComplexBuilder) WithMarginFill(marginStream int) *FilterComplexB return fb } +// WithOverlay adds the overlay stream on top of the styled terminal frame. +// The overlay canvas is pre-sized to match the full output dimensions. +func (fb *FilterComplexBuilder) WithOverlay(overlayStream int) *FilterComplexBuilder { + fb.filterComplex.WriteString(";") + _, _ = fmt.Fprintf( + fb.filterComplex, + ` + [%s][%d]overlay=0:0[withoverlay] + `, + fb.prevStageName, + overlayStream, + ) + fb.prevStageName = "withoverlay" + return fb +} + // WithGIF adds gif options to ffmepg filter_complex. func (fb *FilterComplexBuilder) WithGIF() *FilterComplexBuilder { fb.filterComplex.WriteString(";") @@ -200,9 +216,10 @@ type StreamBuilder struct { termWidth int termHeight int input string - barStream int - cornerStream int - marginStream int + barStream int + cornerStream int + marginStream int + overlayStream int } // NewStreamBuilder returns instance of StreamBuilder. diff --git a/parser/parser.go b/parser/parser.go index 39a4dcb4..cea65b65 100644 --- a/parser/parser.go +++ b/parser/parser.go @@ -58,6 +58,7 @@ var CommandTypes = []CommandType{ token.COPY, token.PASTE, token.ENV, + token.SUBTITLE, } // String returns the string representation of the command. @@ -185,6 +186,8 @@ func (p *Parser) parseCommand() []Command { return []Command{p.parsePaste()} case token.ENV: return []Command{p.parseEnv()} + case token.SUBTITLE: + return []Command{p.parseSubtitle()} default: p.errors = append(p.errors, NewError(p.cur, "Invalid command: "+p.cur.Literal)) return []Command{{Type: token.ILLEGAL}} @@ -665,6 +668,31 @@ func (p *Parser) parseEnv() Command { return cmd } +// parseSubtitle parses a subtitle command. +// Subtitle takes a string argument for the text to display. +// An empty string hides the subtitle. +// +// Subtitle "text to display" +// Subtitle "" +func (p *Parser) parseSubtitle() Command { + cmd := Command{Type: token.SUBTITLE} + + if p.peek.Type != token.STRING { + p.errors = append(p.errors, NewError(p.peek, "Subtitle expects string")) + return cmd + } + + for p.peek.Type == token.STRING { + p.nextToken() + if cmd.Args != "" { + cmd.Args += " " + } + cmd.Args += p.cur.Literal + } + + return cmd +} + // parseSource parses source command. // Source command takes a tape path to include in current tape. // diff --git a/parser/parser_test.go b/parser/parser_test.go index fd3a4467..9001653b 100644 --- a/parser/parser_test.go +++ b/parser/parser_test.go @@ -481,3 +481,53 @@ func TestParseScreeenshot(t *testing.T) { test.run(t) }) } + +func TestParseSubtitle(t *testing.T) { + t.Run("subtitle with text", func(t *testing.T) { + l := lexer.New(`Subtitle "Hello World"`) + p := New(l) + cmds := p.Parse() + + if len(p.errors) > 0 { + t.Fatalf("unexpected errors: %v", p.errors) + } + if len(cmds) != 1 { + t.Fatalf("expected 1 command, got %d", len(cmds)) + } + if cmds[0].Type != token.SUBTITLE { + t.Errorf("expected SUBTITLE, got %s", cmds[0].Type) + } + if cmds[0].Args != "Hello World" { + t.Errorf("expected 'Hello World', got '%s'", cmds[0].Args) + } + }) + + t.Run("subtitle with empty string hides", func(t *testing.T) { + l := lexer.New(`Subtitle ""`) + p := New(l) + cmds := p.Parse() + + if len(p.errors) > 0 { + t.Fatalf("unexpected errors: %v", p.errors) + } + if len(cmds) != 1 { + t.Fatalf("expected 1 command, got %d", len(cmds)) + } + if cmds[0].Args != "" { + t.Errorf("expected empty string, got '%s'", cmds[0].Args) + } + }) + + t.Run("subtitle with multiple words", func(t *testing.T) { + l := lexer.New(`Subtitle "This is a longer subtitle message"`) + p := New(l) + cmds := p.Parse() + + if len(p.errors) > 0 { + t.Fatalf("unexpected errors: %v", p.errors) + } + if cmds[0].Args != "This is a longer subtitle message" { + t.Errorf("expected full message, got '%s'", cmds[0].Args) + } + }) +} diff --git a/token/token.go b/token/token.go index d0d98a1a..ba3cf1f3 100644 --- a/token/token.go +++ b/token/token.go @@ -105,6 +105,15 @@ const ( WAIT_TIMEOUT = "WAIT_TIMEOUT" WAIT_PATTERN = "WAIT_PATTERN" CURSOR_BLINK = "CURSOR_BLINK" + + SUBTITLE = "SUBTITLE" + SUBTITLE_FONT_SIZE = "SUBTITLE_FONT_SIZE" + SUBTITLE_COLOR = "SUBTITLE_COLOR" + SUBTITLE_BACKGROUND = "SUBTITLE_BACKGROUND" + SUBTITLE_POSITION = "SUBTITLE_POSITION" + SUBTITLE_PADDING = "SUBTITLE_PADDING" + SUBTITLE_BORDER_RADIUS = "SUBTITLE_BORDER_RADIUS" + SUBTITLE_FONT_FAMILY = "SUBTITLE_FONT_FAMILY" ) // Keywords maps keyword strings to tokens. @@ -168,7 +177,15 @@ var Keywords = map[string]Type{ "Screenshot": SCREENSHOT, "Copy": COPY, "Paste": PASTE, - "Env": ENV, + "Env": ENV, + "Subtitle": SUBTITLE, + "SubtitleFontSize": SUBTITLE_FONT_SIZE, + "SubtitleColor": SUBTITLE_COLOR, + "SubtitleBackground": SUBTITLE_BACKGROUND, + "SubtitlePosition": SUBTITLE_POSITION, + "SubtitlePadding": SUBTITLE_PADDING, + "SubtitleBorderRadius": SUBTITLE_BORDER_RADIUS, + "SubtitleFontFamily": SUBTITLE_FONT_FAMILY, } // IsSetting returns whether a token is a setting. @@ -177,7 +194,10 @@ func IsSetting(t Type) bool { case SHELL, FONT_FAMILY, FONT_SIZE, LETTER_SPACING, LINE_HEIGHT, FRAMERATE, TYPING_SPEED, THEME, PLAYBACK_SPEED, HEIGHT, WIDTH, PADDING, LOOP_OFFSET, MARGIN_FILL, MARGIN, WINDOW_BAR, - WINDOW_BAR_SIZE, BORDER_RADIUS, CURSOR_BLINK, WAIT_TIMEOUT, WAIT_PATTERN: + WINDOW_BAR_SIZE, BORDER_RADIUS, CURSOR_BLINK, WAIT_TIMEOUT, WAIT_PATTERN, + SUBTITLE_FONT_SIZE, SUBTITLE_COLOR, SUBTITLE_BACKGROUND, + SUBTITLE_POSITION, SUBTITLE_PADDING, SUBTITLE_BORDER_RADIUS, + SUBTITLE_FONT_FAMILY: return true default: return false @@ -190,7 +210,8 @@ func IsCommand(t Type) bool { case TYPE, SLEEP, UP, DOWN, RIGHT, LEFT, PAGE_UP, PAGE_DOWN, SCROLL_UP, SCROLL_DOWN, ENTER, BACKSPACE, DELETE, TAB, - ESCAPE, HOME, INSERT, END, CTRL, SOURCE, SCREENSHOT, COPY, PASTE, WAIT: + ESCAPE, HOME, INSERT, END, CTRL, SOURCE, SCREENSHOT, COPY, PASTE, WAIT, + SUBTITLE: return true default: return false diff --git a/vhs.go b/vhs.go index 235e7b42..3a30b0bf 100644 --- a/vhs.go +++ b/vhs.go @@ -21,21 +21,49 @@ import ( // VHS is the object that controls the setup. type VHS struct { - Options *Options - Errors []error - Page *rod.Page - browser *rod.Browser - TextCanvas *rod.Element - CursorCanvas *rod.Element - mutex *sync.Mutex - started bool - recording bool - tty *exec.Cmd - totalFrames int - close func() error + Options *Options + Errors []error + Page *rod.Page + browser *rod.Browser + TextCanvas *rod.Element + CursorCanvas *rod.Element + OverlayCanvas *rod.Element + mutex *sync.Mutex + mu sync.Mutex // protects subtitle state + started bool + recording bool + tty *exec.Cmd + totalFrames int + close func() error + subtitleText string // current subtitle text (empty = hidden) + hasOverlay bool // true if any overlay was used during recording } // Options is the set of options for the setup. +// SubtitleOptions holds configuration for subtitle overlays. +type SubtitleOptions struct { + FontSize int + FontFamily string + Color string + Background string + Position string // "top", "center", "bottom" + Padding int + BorderRadius int +} + +// DefaultSubtitleOptions returns sane defaults for subtitles. +func DefaultSubtitleOptions() SubtitleOptions { + return SubtitleOptions{ + FontSize: 24, + FontFamily: "system-ui, -apple-system, sans-serif", + Color: "#ffffff", + Background: "rgba(0,0,0,0.75)", + Position: "bottom", + Padding: 12, + BorderRadius: 8, + } +} + type Options struct { Shell Shell FontFamily string @@ -52,6 +80,7 @@ type Options struct { CursorBlink bool Screenshot ScreenshotOptions Style StyleOptions + Subtitle SubtitleOptions } const ( @@ -107,6 +136,7 @@ func DefaultVHSOptions() Options { Screenshot: screenshot, WaitTimeout: defaultWaitTimeout, WaitPattern: defaultWaitPattern, + Subtitle: DefaultSubtitleOptions(), } } @@ -177,6 +207,20 @@ func (vhs *VHS) Setup() { vhs.TextCanvas, _ = vhs.Page.Element("canvas.xterm-text-layer") vhs.CursorCanvas, _ = vhs.Page.Element("canvas.xterm-cursor-layer") + // Create an overlay canvas sized to the full output dimensions (including padding/margins). + // This ensures subtitle positions are relative to the final styled frame. + outWidth := vhs.Options.Video.Style.Width + outHeight := vhs.Options.Video.Style.Height + vhs.Page.MustEval(fmt.Sprintf(`() => { + const overlay = document.createElement('canvas'); + overlay.id = 'vhs-overlay'; + overlay.width = %d; + overlay.height = %d; + overlay.style.cssText = 'position:absolute; left:-9999px; top:-9999px;'; + document.body.appendChild(overlay); + }`, outWidth, outHeight)) + vhs.OverlayCanvas, _ = vhs.Page.Element("#vhs-overlay") + // Apply options to the terminal // By this point the setting commands have been executed, so the `opts` struct is up to date. vhs.Page.MustEval(fmt.Sprintf("() => { term.options = { fontSize: %d, fontFamily: '%s', letterSpacing: %f, lineHeight: %f, theme: %s, cursorBlink: %t } }", @@ -208,6 +252,88 @@ func (vhs *VHS) terminate() error { return vhs.tty.Process.Kill() } +// renderOverlay draws the current subtitle (or clears) onto the overlay canvas. +func (vhs *VHS) renderOverlay(text string) { + opts := vhs.Options.Subtitle + + if text == "" { + // Clear the overlay canvas + vhs.Page.MustEval(`() => { + const c = document.getElementById('vhs-overlay'); + if (c) { c.getContext('2d').clearRect(0, 0, c.width, c.height); } + }`) + return + } + + escaped := strings.ReplaceAll(text, "\\", "\\\\") + escaped = strings.ReplaceAll(escaped, "'", "\\'") + + // Determine Y position + var positionJS string + switch opts.Position { + case "top": + positionJS = fmt.Sprintf("const y = %d + boxHeight / 2;", opts.Padding+20) + case "center": + positionJS = "const y = c.height / 2;" + default: // "bottom" + positionJS = fmt.Sprintf("const y = c.height - %d - boxHeight / 2;", opts.Padding+20) + } + + js := fmt.Sprintf(`() => { + const c = document.getElementById('vhs-overlay'); + if (!c) return; + const ctx = c.getContext('2d'); + ctx.clearRect(0, 0, c.width, c.height); + + const text = '%s'; + const fontSize = %d; + const fontFamily = '%s'; + const padding = %d; + const borderRadius = %d; + + ctx.font = fontSize + 'px ' + fontFamily; + ctx.textAlign = 'center'; + ctx.textBaseline = 'middle'; + + const metrics = ctx.measureText(text); + const textWidth = metrics.width; + const boxWidth = textWidth + padding * 4; + const boxHeight = fontSize * 1.4 + padding * 2; + const x = c.width / 2; + %s + + // Draw background pill (with roundRect fallback) + const bx = x - boxWidth / 2; + const by = y - boxHeight / 2; + ctx.fillStyle = '%s'; + ctx.beginPath(); + if (ctx.roundRect) { + ctx.roundRect(bx, by, boxWidth, boxHeight, borderRadius); + } else { + ctx.rect(bx, by, boxWidth, boxHeight); + } + ctx.fill(); + + // Draw text + ctx.fillStyle = '%s'; + ctx.fillText(text, x, y); + }`, + escaped, + opts.FontSize, + opts.FontFamily, + opts.Padding, + opts.BorderRadius, + positionJS, + opts.Background, + opts.Color, + ) + + _, err := vhs.Page.Eval(js) + if err != nil { + log.Printf("renderOverlay JS error: %v", err) + } +} + // Cleanup individual frames. // //nolint:wrapcheck @@ -226,6 +352,9 @@ func (vhs *VHS) Render() error { return err } + // Pass overlay flag to video options + vhs.Options.Video.HasOverlay = vhs.hasOverlay + // Generate the video(s) with the frames. var cmds []*exec.Cmd //nolint:prealloc cmds = append(cmds, MakeGIF(vhs.Options.Video)) @@ -358,6 +487,16 @@ func (vhs *VHS) Record(ctx context.Context) <-chan error { continue } + // Render subtitle onto overlay canvas if active + vhs.mu.Lock() + subtitleText := vhs.subtitleText + hasOverlay := vhs.hasOverlay + vhs.mu.Unlock() + + if hasOverlay { + vhs.renderOverlay(subtitleText) + } + counter++ if err := os.WriteFile( filepath.Join(vhs.Options.Video.Input, fmt.Sprintf(cursorFrameFormat, counter)), @@ -376,6 +515,23 @@ func (vhs *VHS) Record(ctx context.Context) <-chan error { continue } + // Capture overlay frame + if hasOverlay { + overlay, overlayErr := vhs.OverlayCanvas.CanvasToImage("image/png", quality) + if overlayErr != nil { + ch <- fmt.Errorf("error capturing overlay frame: %w", overlayErr) + continue + } + if err := os.WriteFile( + filepath.Join(vhs.Options.Video.Input, fmt.Sprintf(overlayFrameFormat, counter)), + overlay, + 0o600, + ); err != nil { + ch <- fmt.Errorf("error writing overlay frame: %w", err) + continue + } + } + // Capture current frame and disable frame capturing if vhs.Options.Screenshot.frameCapture { vhs.Options.Screenshot.makeScreenshot(counter) diff --git a/video.go b/video.go index 5548a527..9b00e60b 100644 --- a/video.go +++ b/video.go @@ -18,8 +18,9 @@ import ( ) const ( - textFrameFormat = "frame-text-%05d.png" - cursorFrameFormat = "frame-cursor-%05d.png" + textFrameFormat = "frame-text-%05d.png" + cursorFrameFormat = "frame-cursor-%05d.png" + overlayFrameFormat = "frame-overlay-%05d.png" ) const ( @@ -56,6 +57,7 @@ type VideoOptions struct { Output VideoOutputs StartingFrame int Style *StyleOptions + HasOverlay bool } const ( @@ -109,12 +111,16 @@ func ensureDir(output string) { func buildFFopts(opts VideoOptions, targetFile string) []string { var args []string //nolint:prealloc streamCounter := 2 + if opts.HasOverlay { + streamCounter = 3 + } streamBuilder := NewStreamBuilder(streamCounter, opts.Input, opts.Style) // Input frame options, used no matter what // Stream 0: text frames // Stream 1: cursor frames + // Stream 2 (optional): overlay frames streamBuilder.args = append(streamBuilder.args, "-y", "-r", fmt.Sprint(opts.Framerate), @@ -125,6 +131,18 @@ func buildFFopts(opts VideoOptions, targetFile string) []string { "-i", filepath.Join(opts.Input, cursorFrameFormat), ) + if opts.HasOverlay { + streamBuilder.args = append(streamBuilder.args, + "-r", fmt.Sprint(opts.Framerate), + "-start_number", fmt.Sprint(opts.StartingFrame), + "-i", filepath.Join(opts.Input, overlayFrameFormat), + ) + } + + if opts.HasOverlay { + streamBuilder.overlayStream = 2 // always stream 2: text=0, cursor=1, overlay=2 + } + streamBuilder = streamBuilder. WithMargin(). WithBar(). @@ -135,6 +153,10 @@ func buildFFopts(opts VideoOptions, targetFile string) []string { WithBorderRadius(streamBuilder.cornerStream). WithMarginFill(streamBuilder.marginStream) + if opts.HasOverlay { + filterBuilder = filterBuilder.WithOverlay(streamBuilder.overlayStream) + } + // Format-specific options switch filepath.Ext(targetFile) { case gif: