bugbakery · anuejn · Jan 7, 2026 · Jan 8, 2026 · rroohhh · Jan 9, 2026
diff --git a/frontend/src/editor/automerge_websocket_editor.ts b/frontend/src/editor/automerge_websocket_editor.ts
@@ -16,14 +16,18 @@ enum MessageSyncType {
   FullDoc = 3,
 }
 
+export type EditorWithWebsocket = Editor & {
+  update: (changeFn: (doc: Document) => void) => void;
+};
+
 export function useAutomergeWebsocketEditor(
   url: string,
   { onInitialSyncComplete }: { onInitialSyncComplete: (editor?: Editor) => void },
-): [Editor?, Paragraph[]?] {
+): [EditorWithWebsocket?, Paragraph[]?] {
   const debug = useDebugMode();
   const sentChanges = useRef<Set<string>>(new Set());
   const [editorAndInitialValue, setEditorAndInitialValue] = useState<null | {
-    editor: Editor;
+    editor: EditorWithWebsocket;
     initialValue: Paragraph[];
   }>(null);
   const editorRef = useRef<undefined | Editor>();
@@ -54,7 +58,9 @@ export function useAutomergeWebsocketEditor(
     const createNewEditor = (doc: Automerge.Doc<Document>) => {
       const baseEditor = createEditor();
       const editorWithReact = withReact(baseEditor);
-      const editor = withHistory(withAutomergeDoc(editorWithReact, Automerge.init()));
+      const editor = withHistory(
+        withAutomergeDoc(editorWithReact, Automerge.init()),
+      ) as EditorWithWebsocket;
       editor.addDocChangeListener(sendDocChange);
 
       const migratedDoc = migrateDocument(doc as Automerge.Doc<Document>);
@@ -68,6 +74,18 @@ export function useAutomergeWebsocketEditor(
           migratedDoc.children !== undefined
             ? JSON.parse(JSON.stringify(migratedDoc.children))
             : [];
+
+        editor.update = (changeFn: (doc: Document) => void) => {
+          console.time('changeFn');
+          const changed = Automerge.change(editor.doc, changeFn);
+          console.timeEnd('changeFn');
+          console.time('setDoc');
+          editor.setDoc(changed);
+          console.timeEnd('setDoc');
+          console.time('sendDocChange');
+          sendDocChange(changed);
+          console.timeEnd('sendDocChange');
+        };
         return { editor: editor, initialValue: initialValue };
       });
     };

diff --git a/frontend/src/editor/text_tools.tsx b/frontend/src/editor/text_tools.tsx
@@ -0,0 +1,182 @@
+import { TbHammer } from 'react-icons/tb';
+import { IconButton } from '../components/button';
+import { EditorWithWebsocket } from './automerge_websocket_editor';
+import { Document, Paragraph } from '../editor/types';
+import { Popup } from '../components/popup';
+import { primitiveWithClassname } from '../styled';
+
+export const MenuItemButton = primitiveWithClassname('button', [
+  'hover:bg-gray-200 dark:hover:bg-neutral-700',
+  'rounded-md',
+  'w-full',
+  'text-left',
+  'px-2',
+  'py-1',
+  'block',
+]);
+
+function mergeSameSpeakerParagraphs(doc: Document) {
+  const mergePoints: number[] = [];
+  for (let i = 0; i < doc.children.length - 1; i++) {
+    const paragraph = doc.children[i];
+    const nextParagraph = doc.children[i + 1];
+    if (paragraph.speaker == nextParagraph.speaker) {
+      mergePoints.push(i);
+    }
+  }
+  let removed = 0;
+  mergePoints.forEach((index) => {
+    const i = index - removed;
+    doc.children[i].children.push(...JSON.parse(JSON.stringify(doc.children[i + 1].children)));
+    doc.children.splice(i + 1, 1);
+    removed++;
+  });
+}
+
+const punctuations = ['.', '?', '!'];
+const non_punctuations = ['...'];
+function containsSentenceEnd(text: string) {
+  return (
+    punctuations.some((punct) => text.includes(punct)) &&
+    !non_punctuations.some((np) => text.includes(np))
+  );
+}
+
+export function TextTools({ editor }: { editor: EditorWithWebsocket }) {
+  return (
+    <Popup
+      button={<IconButton icon={TbHammer} label={'text tools'} />}
+      onClick={(e) => {
+        e.preventDefault();
+      }}
+    >
+      <MenuItemButton
+        onClick={() => {
+          editor.update(mergeSameSpeakerParagraphs);
+        }}
+      >
+        Reflow to One Paragraph per Speaker
+      </MenuItemButton>
+
+      <MenuItemButton
+        onClick={() => {
+          editor.update((doc: Document) => {
+            // stategy: we first merge everything that could possibly be merged...
+            mergeSameSpeakerParagraphs(doc);
+
+            // ...and only then break up on sentence boundaries
+            const newChildren: Paragraph[] = [];
+            doc.children.forEach((paragraph) => {
+              let currentParagraph = {
+                ...paragraph,
+                children: [] as { text: string }[],
+              };
+              paragraph.children.forEach((token) => {
+                currentParagraph.children.push(JSON.parse(JSON.stringify(token)));
+                if (containsSentenceEnd(token.text)) {
+                  newChildren.push(currentParagraph);
+                  currentParagraph = {
+                    ...paragraph,
+                    children: [],
+                  };
+                }
+              });
+              if (currentParagraph.children.length > 0) {
+                newChildren.push(currentParagraph);
+              }
+            });
+            doc.children = newChildren;
+          });
+        }}
+      >
+        Reflow to One Paragraph per Sentence
+      </MenuItemButton>
+
+      <MenuItemButton
+        onClick={() => {
+          // this strategy tries to split paragraphs at sentence boundaries, but only if there is a pause between the sentences
+          // or the paragraphs would become too long.
+          const initial = 2;
+          const decay = 0.95;
+
+          const getPause = (i: number, paragraph: Paragraph) => {
+            const token = paragraph.children[i];
+            const nextToken = paragraph.children[i + 1];
+            if (nextToken?.start !== undefined && token?.end !== undefined) {
+              return nextToken.start - token.end;
+            }
+            return 0;
+          };
+
+          editor.update((doc: Document) => {
+            mergeSameSpeakerParagraphs(doc);
+            const newChildren: Paragraph[] = [];
+            const addNewChild = (paragraph: Paragraph) => {
+              // if the paragraph is very long and does not contain any sentence ends, we still want to break it up
+              if (paragraph.children.length <= 100) {
+                newChildren.push(paragraph);
+              } else {
+                const silences = paragraph.children
+                  .map((x, i) => ({ ...x, pause: getPause(i, paragraph) }))
+                  .filter((token) => token.text.includes(','))
+                  .map((token) => token.pause);
+                silences.sort();
+                const thresholdIndex = Math.floor(paragraph.children.length / 100); // aim for paragraphs of max ~50 tokens
+                const threshold = silences[silences.length - 1 - thresholdIndex];
+                let currentParagraph = {
+                  ...paragraph,
+                  children: [] as { text: string }[],
+                };
+                paragraph.children.forEach((token, i) => {
+                  currentParagraph.children.push(JSON.parse(JSON.stringify(token)));
+                  if (
+                    getPause(i, paragraph) >= threshold &&
+                    token.text.includes(',') &&
+                    currentParagraph.children.length > 3
+                  ) {
+                    newChildren.push(currentParagraph);
+                    currentParagraph = {
+                      ...paragraph,
+                      children: [],
+                    };
+                  }
+                });
+                if (currentParagraph.children.length > 0) {
+                  newChildren.push(currentParagraph);
+                }
+              }
+            };
+            doc.children.forEach((paragraph) => {
+              let minPauseBetweenSentences = initial; // this gets reduced with every additional token
+              let currentParagraph = {
+                ...paragraph,
+                children: [] as { text: string }[],
+              };
+              paragraph.children.forEach((token, i) => {
+                currentParagraph.children.push(JSON.parse(JSON.stringify(token)));
+                minPauseBetweenSentences *= decay;
+                if (
+                  getPause(i, paragraph) >= minPauseBetweenSentences &&
+                  containsSentenceEnd(token.text)
+                ) {
+                  addNewChild(currentParagraph);
+                  minPauseBetweenSentences = initial;
+                  currentParagraph = {
+                    ...paragraph,
+                    children: [],
+                  };
+                }
+              });
+              if (currentParagraph.children.length > 0) {
+                addNewChild(currentParagraph);
+              }
+            });
+            doc.children = newChildren;
+          });
+        }}
+      >
+        Smart Reflow ✨
+      </MenuItemButton>
+    </Popup>
+  );
+}
diff --git a/frontend/src/pages/document.tsx b/frontend/src/pages/document.tsx
@@ -18,6 +18,7 @@ import { Helmet } from 'react-helmet';
 import { ShareModal } from '../editor/share';
 import { getDocumentWsUrl, useAuthData } from '../utils/auth';
 import { ExportModal } from '../editor/export';
+import { TextTools } from '../editor/text_tools';
 
 const LazyDebugPanel = lazy(() =>
   import('../editor/debug_panel').then((module) => ({ default: module.DebugPanel })),
@@ -163,6 +164,7 @@ export function DocumentPage({
           )}
         </TopBarPart>
         <TopBarPart>
+          {editor && <TextTools editor={editor} />}
           {data?.has_full_access && (
             <IconButton
               icon={TbShare3}