From c7a9403e3b9a445ec5ebf2a140132daf4f63326e Mon Sep 17 00:00:00 2001
From: Jaro Habiger <jarohabiger@googlemail.com>
Date: Thu, 8 Jan 2026 00:50:09 +0100
Subject: [PATCH 1/2] =?UTF-8?q?=F0=9F=90=A3=20add=20tools=20for=20reflowin?=
 =?UTF-8?q?g=20the=20transcript?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

into one paragraph per sentence / speaker
---
 .../src/editor/automerge_websocket_editor.ts  |  24 +++-
 frontend/src/editor/text_tools.tsx            | 112 ++++++++++++++++++
 frontend/src/pages/document.tsx               |   2 +
 3 files changed, 135 insertions(+), 3 deletions(-)
 create mode 100644 frontend/src/editor/text_tools.tsx
diff --git a/frontend/src/editor/automerge_websocket_editor.ts b/frontend/src/editor/automerge_websocket_editor.ts
index f06b66c8..6a963da9 100644
--- a/frontend/src/editor/automerge_websocket_editor.ts
+++ b/frontend/src/editor/automerge_websocket_editor.ts
@@ -16,14 +16,18 @@ enum MessageSyncType {
   FullDoc = 3,
 }
 
+export type EditorWithWebsocket = Editor & {
+  update: (changeFn: (doc: Document) => void) => void;
+};
+
 export function useAutomergeWebsocketEditor(
   url: string,
   { onInitialSyncComplete }: { onInitialSyncComplete: (editor?: Editor) => void },
-): [Editor?, Paragraph[]?] {
+): [EditorWithWebsocket?, Paragraph[]?] {
   const debug = useDebugMode();
   const sentChanges = useRef<Set<string>>(new Set());
   const [editorAndInitialValue, setEditorAndInitialValue] = useState<null | {
-    editor: Editor;
+    editor: EditorWithWebsocket;
     initialValue: Paragraph[];
   }>(null);
   const editorRef = useRef<undefined | Editor>();
@@ -54,7 +58,9 @@ export function useAutomergeWebsocketEditor(
     const createNewEditor = (doc: Automerge.Doc<Document>) => {
       const baseEditor = createEditor();
       const editorWithReact = withReact(baseEditor);
-      const editor = withHistory(withAutomergeDoc(editorWithReact, Automerge.init()));
+      const editor = withHistory(
+        withAutomergeDoc(editorWithReact, Automerge.init()),
+      ) as EditorWithWebsocket;
       editor.addDocChangeListener(sendDocChange);
 
       const migratedDoc = migrateDocument(doc as Automerge.Doc<Document>);
@@ -68,6 +74,18 @@ export function useAutomergeWebsocketEditor(
           migratedDoc.children !== undefined
             ? JSON.parse(JSON.stringify(migratedDoc.children))
             : [];
+
+        editor.update = (changeFn: (doc: Document) => void) => {
+          console.time('changeFn');
+          const changed = Automerge.change(editor.doc, changeFn);
+          console.timeEnd('changeFn');
+          console.time('setDoc');
+          editor.setDoc(changed);
+          console.timeEnd('setDoc');
+          console.time('sendDocChange');
+          sendDocChange(changed);
+          console.timeEnd('sendDocChange');
+        };
         return { editor: editor, initialValue: initialValue };
       });
     };
diff --git a/frontend/src/editor/text_tools.tsx b/frontend/src/editor/text_tools.tsx
new file mode 100644
index 00000000..3db54dd1
--- /dev/null
+++ b/frontend/src/editor/text_tools.tsx
@@ -0,0 +1,112 @@
+import { TbHammer } from 'react-icons/tb';
+import { IconButton } from '../components/button';
+import { EditorWithWebsocket } from './automerge_websocket_editor';
+import { Document, Paragraph } from '../editor/types';
+import { Popup } from '../components/popup';
+import { primitiveWithClassname } from '../styled';
+
+export const MenuItemButton = primitiveWithClassname('button', [
+  'hover:bg-gray-200 dark:hover:bg-neutral-700',
+  'rounded-md',
+  'w-full',
+  'text-left',
+  'px-2',
+  'py-1',
+  'block',
+]);
+
+export function TextTools({ editor }: { editor: EditorWithWebsocket }) {
+  return (
+    <Popup
+      button={<IconButton icon={TbHammer} label={'text tools'} />}
+      onClick={(e) => {
+        e.preventDefault();
+      }}
+    >
+      <MenuItemButton
+        onClick={() => {
+          const mergePoints: number[] = [];
+          for (let i = 0; i < editor.doc.children.length - 1; i++) {
+            const paragraph = editor.doc.children[i];
+            const nextParagraph = editor.doc.children[i + 1];
+            if (paragraph.speaker == nextParagraph.speaker) {
+              mergePoints.push(i);
+            }
+          }
+          editor.update((doc: Document) => {
+            let removed = 0;
+            mergePoints.forEach((index) => {
+              const i = index - removed;
+              doc.children[i].children.push(
+                ...JSON.parse(JSON.stringify(doc.children[i + 1].children)),
+              );
+              doc.children.splice(i + 1, 1);
+              removed++;
+            });
+          });
+        }}
+      >
+        Reflow to One Paragraph per Speaker
+      </MenuItemButton>
+
+      <MenuItemButton
+        onClick={() => {
+          const punctuations = ['.', '?', '!'];
+          const non_punctuations = ['...'];
+          const contains_punctuation = (text: string) =>
+            punctuations.some((punct) => text.includes(punct)) &&
+            !non_punctuations.some((np) => text.includes(np));
+
+          // stategy: we first merge everything that could possibly be merged...
+          const mergePoints: number[] = [];
+          for (let i = 0; i < editor.doc.children.length - 1; i++) {
+            const paragraph = editor.doc.children[i];
+            const nextParagraph = editor.doc.children[i + 1];
+            if (
+              !contains_punctuation(paragraph.children[paragraph.children.length - 1].text) &&
+              paragraph.speaker == nextParagraph.speaker
+            ) {
+              mergePoints.push(i);
+            }
+          }
+          editor.update((doc: Document) => {
+            let removed = 0;
+            mergePoints.forEach((index) => {
+              const i = index - removed;
+              doc.children[i].children.push(
+                ...JSON.parse(JSON.stringify(doc.children[i + 1].children)),
+              );
+              doc.children.splice(i + 1, 1);
+              removed++;
+            });
+
+            // ...and only then break up
+            const newChildren: Paragraph[] = [];
+            doc.children.forEach((paragraph) => {
+              let currentParagraph = {
+                ...paragraph,
+                children: [] as { text: string }[],
+              };
+              paragraph.children.forEach((token) => {
+                currentParagraph.children.push(JSON.parse(JSON.stringify(token)));
+                if (contains_punctuation(token.text)) {
+                  newChildren.push(currentParagraph);
+                  currentParagraph = {
+                    ...paragraph,
+                    children: [],
+                  };
+                }
+              });
+              if (currentParagraph.children.length > 0) {
+                newChildren.push(currentParagraph);
+              }
+            });
+            doc.children = newChildren;
+          });
+        }}
+      >
+        Reflow to One Paragraph per Sentence
+      </MenuItemButton>
+    </Popup>
+  );
+}
diff --git a/frontend/src/pages/document.tsx b/frontend/src/pages/document.tsx
index 6252f207..0d8e8119 100644
--- a/frontend/src/pages/document.tsx
+++ b/frontend/src/pages/document.tsx
@@ -18,6 +18,7 @@ import { Helmet } from 'react-helmet';
 import { ShareModal } from '../editor/share';
 import { getDocumentWsUrl, useAuthData } from '../utils/auth';
 import { ExportModal } from '../editor/export';
+import { TextTools } from '../editor/text_tools';
 
 const LazyDebugPanel = lazy(() =>
   import('../editor/debug_panel').then((module) => ({ default: module.DebugPanel })),
@@ -163,6 +164,7 @@ export function DocumentPage({
           )}
         </TopBarPart>
         <TopBarPart>
+          {editor && <TextTools editor={editor} />}
           {data?.has_full_access && (
             <IconButton
               icon={TbShare3}

From 6fad5ae6c91b26ef24e1cfce42005d75c42ceeac Mon Sep 17 00:00:00 2001
From: Jaro Habiger <jarohabiger@googlemail.com>
Date: Thu, 8 Jan 2026 01:42:56 +0100
Subject: [PATCH 2/2] =?UTF-8?q?=E2=9C=A8=20add=20smart=20reflow?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 frontend/src/editor/text_tools.tsx | 166 ++++++++++++++++++++---------
 1 file changed, 118 insertions(+), 48 deletions(-)

diff --git a/frontend/src/editor/text_tools.tsx b/frontend/src/editor/text_tools.tsx
index 3db54dd1..8ec83a91 100644
--- a/frontend/src/editor/text_tools.tsx
+++ b/frontend/src/editor/text_tools.tsx
@@ -15,6 +15,33 @@ export const MenuItemButton = primitiveWithClassname('button', [
   'block',
 ]);
 
+function mergeSameSpeakerParagraphs(doc: Document) {
+  const mergePoints: number[] = [];
+  for (let i = 0; i < doc.children.length - 1; i++) {
+    const paragraph = doc.children[i];
+    const nextParagraph = doc.children[i + 1];
+    if (paragraph.speaker == nextParagraph.speaker) {
+      mergePoints.push(i);
+    }
+  }
+  let removed = 0;
+  mergePoints.forEach((index) => {
+    const i = index - removed;
+    doc.children[i].children.push(...JSON.parse(JSON.stringify(doc.children[i + 1].children)));
+    doc.children.splice(i + 1, 1);
+    removed++;
+  });
+}
+
+const punctuations = ['.', '?', '!'];
+const non_punctuations = ['...'];
+function containsSentenceEnd(text: string) {
+  return (
+    punctuations.some((punct) => text.includes(punct)) &&
+    !non_punctuations.some((np) => text.includes(np))
+  );
+}
+
 export function TextTools({ editor }: { editor: EditorWithWebsocket }) {
   return (
     <Popup
@@ -25,25 +52,7 @@ export function TextTools({ editor }: { editor: EditorWithWebsocket }) {
     >
       <MenuItemButton
         onClick={() => {
-          const mergePoints: number[] = [];
-          for (let i = 0; i < editor.doc.children.length - 1; i++) {
-            const paragraph = editor.doc.children[i];
-            const nextParagraph = editor.doc.children[i + 1];
-            if (paragraph.speaker == nextParagraph.speaker) {
-              mergePoints.push(i);
-            }
-          }
-          editor.update((doc: Document) => {
-            let removed = 0;
-            mergePoints.forEach((index) => {
-              const i = index - removed;
-              doc.children[i].children.push(
-                ...JSON.parse(JSON.stringify(doc.children[i + 1].children)),
-              );
-              doc.children.splice(i + 1, 1);
-              removed++;
-            });
-          });
+          editor.update(mergeSameSpeakerParagraphs);
         }}
       >
         Reflow to One Paragraph per Speaker
@@ -51,36 +60,11 @@ export function TextTools({ editor }: { editor: EditorWithWebsocket }) {
 
       <MenuItemButton
         onClick={() => {
-          const punctuations = ['.', '?', '!'];
-          const non_punctuations = ['...'];
-          const contains_punctuation = (text: string) =>
-            punctuations.some((punct) => text.includes(punct)) &&
-            !non_punctuations.some((np) => text.includes(np));
-
-          // stategy: we first merge everything that could possibly be merged...
-          const mergePoints: number[] = [];
-          for (let i = 0; i < editor.doc.children.length - 1; i++) {
-            const paragraph = editor.doc.children[i];
-            const nextParagraph = editor.doc.children[i + 1];
-            if (
-              !contains_punctuation(paragraph.children[paragraph.children.length - 1].text) &&
-              paragraph.speaker == nextParagraph.speaker
-            ) {
-              mergePoints.push(i);
-            }
-          }
           editor.update((doc: Document) => {
-            let removed = 0;
-            mergePoints.forEach((index) => {
-              const i = index - removed;
-              doc.children[i].children.push(
-                ...JSON.parse(JSON.stringify(doc.children[i + 1].children)),
-              );
-              doc.children.splice(i + 1, 1);
-              removed++;
-            });
+            // stategy: we first merge everything that could possibly be merged...
+            mergeSameSpeakerParagraphs(doc);
 
-            // ...and only then break up
+            // ...and only then break up on sentence boundaries
             const newChildren: Paragraph[] = [];
             doc.children.forEach((paragraph) => {
               let currentParagraph = {
@@ -89,7 +73,7 @@ export function TextTools({ editor }: { editor: EditorWithWebsocket }) {
               };
               paragraph.children.forEach((token) => {
                 currentParagraph.children.push(JSON.parse(JSON.stringify(token)));
-                if (contains_punctuation(token.text)) {
+                if (containsSentenceEnd(token.text)) {
                   newChildren.push(currentParagraph);
                   currentParagraph = {
                     ...paragraph,
@@ -107,6 +91,92 @@ export function TextTools({ editor }: { editor: EditorWithWebsocket }) {
       >
         Reflow to One Paragraph per Sentence
       </MenuItemButton>
+
+      <MenuItemButton
+        onClick={() => {
+          // this strategy tries to split paragraphs at sentence boundaries, but only if there is a pause between the sentences
+          // or the paragraphs would become too long.
+          const initial = 2;
+          const decay = 0.95;
+
+          const getPause = (i: number, paragraph: Paragraph) => {
+            const token = paragraph.children[i];
+            const nextToken = paragraph.children[i + 1];
+            if (nextToken?.start !== undefined && token?.end !== undefined) {
+              return nextToken.start - token.end;
+            }
+            return 0;
+          };
+
+          editor.update((doc: Document) => {
+            mergeSameSpeakerParagraphs(doc);
+            const newChildren: Paragraph[] = [];
+            const addNewChild = (paragraph: Paragraph) => {
+              // if the paragraph is very long and does not contain any sentence ends, we still want to break it up
+              if (paragraph.children.length <= 100) {
+                newChildren.push(paragraph);
+              } else {
+                const silences = paragraph.children
+                  .map((x, i) => ({ ...x, pause: getPause(i, paragraph) }))
+                  .filter((token) => token.text.includes(','))
+                  .map((token) => token.pause);
+                silences.sort();
+                const thresholdIndex = Math.floor(paragraph.children.length / 100); // aim for paragraphs of max ~50 tokens
+                const threshold = silences[silences.length - 1 - thresholdIndex];
+                let currentParagraph = {
+                  ...paragraph,
+                  children: [] as { text: string }[],
+                };
+                paragraph.children.forEach((token, i) => {
+                  currentParagraph.children.push(JSON.parse(JSON.stringify(token)));
+                  if (
+                    getPause(i, paragraph) >= threshold &&
+                    token.text.includes(',') &&
+                    currentParagraph.children.length > 3
+                  ) {
+                    newChildren.push(currentParagraph);
+                    currentParagraph = {
+                      ...paragraph,
+                      children: [],
+                    };
+                  }
+                });
+                if (currentParagraph.children.length > 0) {
+                  newChildren.push(currentParagraph);
+                }
+              }
+            };
+            doc.children.forEach((paragraph) => {
+              let minPauseBetweenSentences = initial; // this gets reduced with every additional token
+              let currentParagraph = {
+                ...paragraph,
+                children: [] as { text: string }[],
+              };
+              paragraph.children.forEach((token, i) => {
+                currentParagraph.children.push(JSON.parse(JSON.stringify(token)));
+                minPauseBetweenSentences *= decay;
+                if (
+                  getPause(i, paragraph) >= minPauseBetweenSentences &&
+                  containsSentenceEnd(token.text)
+                ) {
+                  addNewChild(currentParagraph);
+                  minPauseBetweenSentences = initial;
+                  currentParagraph = {
+                    ...paragraph,
+                    children: [],
+                  };
+                }
+              });
+              if (currentParagraph.children.length > 0) {
+                addNewChild(currentParagraph);
+              }
+            });
+            doc.children = newChildren;
+          });
+        }}
+      >
+        Smart Reflow ✨
+      </MenuItemButton>
     </Popup>
   );
 }