diff --git a/manim/scene/scene_file_writer.py b/manim/scene/scene_file_writer.py index 21425af759..397f596526 100644 --- a/manim/scene/scene_file_writer.py +++ b/manim/scene/scene_file_writer.py @@ -400,6 +400,50 @@ def add_sound( new_segment = new_segment.apply_gain(gain) self.add_audio_segment(new_segment, time, **kwargs) + def _build_audio_segment_for_partial_movie( + self, start_time: float, end_time: float + ) -> AudioSegment: + start_ms = int(round(start_time * 1000)) + end_ms = int(round(end_time * 1000)) + duration_ms = max(end_ms - start_ms, 0) + if duration_ms == 0: + return AudioSegment.silent() + + segment = self.audio_segment[start_ms:end_ms] + if len(segment) < duration_ms: + segment += AudioSegment.silent( + duration=duration_ms - len(segment), + frame_rate=segment.frame_rate, + ) + + return segment + + def _write_audio_for_partial_movie(self) -> None: + if self.audio_stream is None: + return + + segment = self._build_audio_segment_for_partial_movie( + self.partial_movie_start_time, + self.renderer.time, + ) + if len(segment) == 0: + return + + samples = np.frombuffer(segment.raw_data, dtype=np.int16) + if segment.channels == 2: + samples = samples.reshape((-1, 2)).T + layout = "stereo" + else: + samples = samples.reshape((1, -1)) + layout = "mono" + + samples = np.ascontiguousarray(samples) + frame = av.AudioFrame.from_ndarray(samples, format="s16p", layout=layout) + frame.sample_rate = segment.frame_rate + + for packet in self.audio_stream.encode(frame): + self.video_container.mux(packet) + # Writers def begin_animation( self, allow_write: bool = False, file_path: StrPath | None = None @@ -552,10 +596,12 @@ def open_partial_movie_stream(self, file_path: StrPath | None = None) -> None: partial_movie_file_codec = "libx264" partial_movie_file_pix_fmt = "yuv420p" av_options = { - "an": "1", # ffmpeg: -an, no audio "crf": "23", # ffmpeg: -crf, constant rate factor (improved bitrate) } + if not self.includes_sound: + av_options["an"] = "1" + if config.movie_file_extension == ".webm": partial_movie_file_codec = "libvpx-vp9" av_options["-auto-alt-ref"] = "1" @@ -579,6 +625,15 @@ def open_partial_movie_stream(self, file_path: StrPath | None = None) -> None: self.video_container: OutputContainer = video_container self.video_stream: Stream = stream + self.partial_movie_start_time = self.renderer.time + self.audio_stream: Stream | None = None + if not is_gif_format(): + audio_codec = ( + "libvorbis" if config.movie_file_extension == ".webm" else "aac" + ) + if self.includes_sound: + self.audio_stream = self.video_container.add_stream(audio_codec) + self.queue: Queue[tuple[int, PixelArray | None]] = Queue() self.writer_thread = Thread(target=self.listen_and_write, args=()) self.writer_thread.start() @@ -593,9 +648,16 @@ def close_partial_movie_stream(self) -> None: self.queue.put((-1, None)) self.writer_thread.join() + if self.audio_stream is not None: + self._write_audio_for_partial_movie() + for packet in self.video_stream.encode(): self.video_container.mux(packet) + if self.audio_stream is not None: + for packet in self.audio_stream.encode(): + self.video_container.mux(packet) + self.video_container.close() logger.info( @@ -622,7 +684,9 @@ def is_already_cached(self, hash_invocation: str) -> bool: self.partial_movie_directory / f"{hash_invocation}{config['movie_file_extension']}" ) - return path.exists() + return ( + path.exists() + ) # TODO: hash will not changed if the audio changes, is it a problem? def combine_files( self, @@ -652,7 +716,12 @@ def combine_files( partial_movies_input = av.open( str(file_list), options=av_options, format="concat" ) - partial_movies_stream = partial_movies_input.streams.video[0] + partial_movies_video_stream = partial_movies_input.streams.video[0] + partial_movies_audio_stream = ( + partial_movies_input.streams.audio[0] + if includes_sound and not create_gif and partial_movies_input.streams.audio + else None + ) output_container = av.open(str(output_file), mode="w") output_container.metadata["comment"] = ( f"Rendered with Manim Community v{__version__}" @@ -663,17 +732,17 @@ def combine_files( and the following code https://github.com/imageio/imageio/blob/65d79140018bb7c64c0692ea72cb4093e8d632a0/imageio/plugins/pyav.py#L927-L996. """ - output_stream = output_container.add_stream( + output_video_stream = output_container.add_stream( codec_name="gif", ) - output_stream.pix_fmt = "rgb8" + output_video_stream.pix_fmt = "rgb8" if config.transparent: - output_stream.pix_fmt = "pal8" - output_stream.width = config.pixel_width - output_stream.height = config.pixel_height - output_stream.rate = to_av_frame_rate(config.frame_rate) + output_video_stream.pix_fmt = "pal8" + output_video_stream.width = config.pixel_width + output_video_stream.height = config.pixel_height + output_video_stream.rate = to_av_frame_rate(config.frame_rate) graph = av.filter.Graph() - input_buffer = graph.add_buffer(template=partial_movies_stream) + input_buffer = graph.add_buffer(template=partial_movies_video_stream) split = graph.add("split") palettegen = graph.add("palettegen", "stats_mode=diff") paletteuse = graph.add( @@ -698,33 +767,51 @@ def combine_files( while True: try: frame = graph.pull() - if output_stream.codec_context.time_base is not None: - frame.time_base = output_stream.codec_context.time_base + if output_video_stream.codec_context.time_base is not None: + frame.time_base = output_video_stream.codec_context.time_base frame.pts = frames_written frames_written += 1 - output_container.mux(output_stream.encode(frame)) + output_container.mux(output_video_stream.encode(frame)) except av.error.EOFError: break - for packet in output_stream.encode(): + for packet in output_video_stream.encode(): output_container.mux(packet) else: - output_stream = output_container.add_stream_from_template( - template=partial_movies_stream, + output_video_stream = output_container.add_stream_from_template( + template=partial_movies_video_stream, ) + output_audio_stream: Stream | None = None + if includes_sound and partial_movies_audio_stream is not None: + output_audio_stream = output_container.add_stream_from_template( + template=partial_movies_audio_stream, + ) if config.transparent and config.movie_file_extension == ".webm": - output_stream.pix_fmt = "yuva420p" - for packet in partial_movies_input.demux(partial_movies_stream): + output_video_stream.pix_fmt = "yuva420p" + if partial_movies_audio_stream is None: + packets = partial_movies_input.demux(partial_movies_video_stream) + else: + packets = partial_movies_input.demux( + partial_movies_video_stream, + partial_movies_audio_stream, + ) + for packet in packets: # We need to skip the "flushing" packets that `demux` generates. if packet.dts is None: continue + packet_type = packet.stream.type packet.dts = None # This seems to be needed, as dts from consecutive # files may not be monotically increasing, so we let libav compute it. # We need to assign the packet to the new stream. - packet.stream = output_stream + if packet_type == "video": + packet.stream = output_video_stream + elif packet_type == "audio" and output_audio_stream is not None: + packet.stream = output_audio_stream + else: + continue output_container.mux(packet) partial_movies_input.close() @@ -755,7 +842,7 @@ def combine_to_movie(self) -> None: partial_movie_files, movie_file_path, is_gif_format(), - self.includes_sound, + includes_sound=False, # We will handle sound separately, as merging multiple audio tracks can cause issues (when the audio tracks is not cut properly between the different partial movie files) ) # handle sound @@ -849,6 +936,7 @@ def combine_to_section_videos(self) -> None: self.combine_files( section.get_clean_partial_movie_files(), self.sections_output_dir / section.video, + includes_sound=self.includes_sound, # TODO: maybe we should handle sound separately for sections as well, as merging multiple audio tracks can cause issues (when the audio tracks is not cut properly between the different partial movie files ) sections_index.append(section.get_dict(self.sections_output_dir)) with (self.sections_output_dir / f"{self.output_name}.json").open("w") as file: diff --git a/tests/test_scene_rendering/test_file_writer.py b/tests/test_scene_rendering/test_file_writer.py index 0065547709..3c55048575 100644 --- a/tests/test_scene_rendering/test_file_writer.py +++ b/tests/test_scene_rendering/test_file_writer.py @@ -158,6 +158,55 @@ def construct(self): assert "click.mp3 to .wav" in manim_caplog.text +@pytest.mark.slow +def test_partial_movie_files_include_audio(config, tmp_path): + output_filename = "partial_audio" + + class AudioPartialScene(Scene): + def construct(self): + click_path = ( + Path(__file__).parent.parent.parent + / "docs" + / "source" + / "_static" + / "click.wav" + ) + self.add_sound(click_path) + self.play(Create(Circle())) + self.wait(0.5) + + with tempconfig( + { + "media_dir": tmp_path, + "quality": "low_quality", + "format": "mp4", + "output_file": output_filename, + } + ): + scene = AudioPartialScene() + scene.render() + + partial_files = [ + Path(path) + for path in scene.renderer.file_writer.partial_movie_files + if path is not None + ] + assert partial_files + + has_nonzero_audio = False + for partial_file in partial_files: + with av.open(partial_file) as container: + assert container.streams.audio, "Partial movie missing audio stream" + for frame in container.decode(audio=0): + if np.any(frame.to_ndarray()): + has_nonzero_audio = True + break + if has_nonzero_audio: + break + + assert has_nonzero_audio, "All partial audio samples are zero" + + @pytest.mark.slow def test_unicode_partial_movie(config, tmpdir, simple_scenes_path): # Characters that failed for a user on Windows