Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 21 additions & 3 deletions frontend/src/editor/automerge_websocket_editor.ts
Original file line number Diff line number Diff line change
Expand Up @@ -16,14 +16,18 @@ enum MessageSyncType {
FullDoc = 3,
}

export type EditorWithWebsocket = Editor & {
update: (changeFn: (doc: Document) => void) => void;
};

export function useAutomergeWebsocketEditor(
url: string,
{ onInitialSyncComplete }: { onInitialSyncComplete: (editor?: Editor) => void },
): [Editor?, Paragraph[]?] {
): [EditorWithWebsocket?, Paragraph[]?] {
const debug = useDebugMode();
const sentChanges = useRef<Set<string>>(new Set());
const [editorAndInitialValue, setEditorAndInitialValue] = useState<null | {
editor: Editor;
editor: EditorWithWebsocket;
initialValue: Paragraph[];
}>(null);
const editorRef = useRef<undefined | Editor>();
Expand Down Expand Up @@ -54,7 +58,9 @@ export function useAutomergeWebsocketEditor(
const createNewEditor = (doc: Automerge.Doc<Document>) => {
const baseEditor = createEditor();
const editorWithReact = withReact(baseEditor);
const editor = withHistory(withAutomergeDoc(editorWithReact, Automerge.init()));
const editor = withHistory(
withAutomergeDoc(editorWithReact, Automerge.init()),
) as EditorWithWebsocket;
Comment thread
anuejn marked this conversation as resolved.
editor.addDocChangeListener(sendDocChange);

const migratedDoc = migrateDocument(doc as Automerge.Doc<Document>);
Expand All @@ -68,6 +74,18 @@ export function useAutomergeWebsocketEditor(
migratedDoc.children !== undefined
? JSON.parse(JSON.stringify(migratedDoc.children))
: [];

editor.update = (changeFn: (doc: Document) => void) => {
console.time('changeFn');
const changed = Automerge.change(editor.doc, changeFn);
console.timeEnd('changeFn');
console.time('setDoc');
editor.setDoc(changed);
console.timeEnd('setDoc');
console.time('sendDocChange');
sendDocChange(changed);
console.timeEnd('sendDocChange');
};
return { editor: editor, initialValue: initialValue };
});
};
Expand Down
182 changes: 182 additions & 0 deletions frontend/src/editor/text_tools.tsx
Original file line number Diff line number Diff line change
@@ -0,0 +1,182 @@
import { TbHammer } from 'react-icons/tb';
Copy link
Copy Markdown
Member

@rroohhh rroohhh Jan 9, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we have tests for the transformations in this file? :)

import { IconButton } from '../components/button';
import { EditorWithWebsocket } from './automerge_websocket_editor';
import { Document, Paragraph } from '../editor/types';
import { Popup } from '../components/popup';
import { primitiveWithClassname } from '../styled';

export const MenuItemButton = primitiveWithClassname('button', [
'hover:bg-gray-200 dark:hover:bg-neutral-700',
'rounded-md',
'w-full',
'text-left',
'px-2',
'py-1',
'block',
]);

function mergeSameSpeakerParagraphs(doc: Document) {
const mergePoints: number[] = [];
for (let i = 0; i < doc.children.length - 1; i++) {
const paragraph = doc.children[i];
const nextParagraph = doc.children[i + 1];
if (paragraph.speaker == nextParagraph.speaker) {
mergePoints.push(i);
}
}
let removed = 0;
mergePoints.forEach((index) => {
const i = index - removed;
doc.children[i].children.push(...JSON.parse(JSON.stringify(doc.children[i + 1].children)));
doc.children.splice(i + 1, 1);
removed++;
});
}

const punctuations = ['.', '?', '!'];
const non_punctuations = ['...'];
function containsSentenceEnd(text: string) {
return (
punctuations.some((punct) => text.includes(punct)) &&
!non_punctuations.some((np) => text.includes(np))
);
}

export function TextTools({ editor }: { editor: EditorWithWebsocket }) {
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should we add a warning to these if applied to a document that is in a non latin style language?

return (
<Popup
button={<IconButton icon={TbHammer} label={'text tools'} />}
onClick={(e) => {
e.preventDefault();
}}
>
<MenuItemButton
onClick={() => {
editor.update(mergeSameSpeakerParagraphs);
}}
>
Reflow to One Paragraph per Speaker
</MenuItemButton>

<MenuItemButton
onClick={() => {
editor.update((doc: Document) => {
// stategy: we first merge everything that could possibly be merged...
mergeSameSpeakerParagraphs(doc);

// ...and only then break up on sentence boundaries
const newChildren: Paragraph[] = [];
doc.children.forEach((paragraph) => {
let currentParagraph = {
...paragraph,
children: [] as { text: string }[],
};
paragraph.children.forEach((token) => {
currentParagraph.children.push(JSON.parse(JSON.stringify(token)));
if (containsSentenceEnd(token.text)) {
newChildren.push(currentParagraph);
currentParagraph = {
...paragraph,
children: [],
};
}
});
if (currentParagraph.children.length > 0) {
newChildren.push(currentParagraph);
}
});
doc.children = newChildren;
});
}}
>
Reflow to One Paragraph per Sentence
</MenuItemButton>

<MenuItemButton
onClick={() => {
// this strategy tries to split paragraphs at sentence boundaries, but only if there is a pause between the sentences
// or the paragraphs would become too long.
const initial = 2;
const decay = 0.95;

const getPause = (i: number, paragraph: Paragraph) => {
const token = paragraph.children[i];
const nextToken = paragraph.children[i + 1];
if (nextToken?.start !== undefined && token?.end !== undefined) {
return nextToken.start - token.end;
}
return 0;
};

editor.update((doc: Document) => {
mergeSameSpeakerParagraphs(doc);
const newChildren: Paragraph[] = [];
const addNewChild = (paragraph: Paragraph) => {
// if the paragraph is very long and does not contain any sentence ends, we still want to break it up
if (paragraph.children.length <= 100) {
newChildren.push(paragraph);
} else {
const silences = paragraph.children
.map((x, i) => ({ ...x, pause: getPause(i, paragraph) }))
.filter((token) => token.text.includes(','))
.map((token) => token.pause);
silences.sort();
const thresholdIndex = Math.floor(paragraph.children.length / 100); // aim for paragraphs of max ~50 tokens
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This says ~50 tokens but divides by 100, this seems contradictory, or am I missing something?

Also the magic paragraph length could probably be a constant that is used here and for the <= 100 further up

const threshold = silences[silences.length - 1 - thresholdIndex];
let currentParagraph = {
...paragraph,
children: [] as { text: string }[],
};
paragraph.children.forEach((token, i) => {
currentParagraph.children.push(JSON.parse(JSON.stringify(token)));
if (
getPause(i, paragraph) >= threshold &&
token.text.includes(',') &&
currentParagraph.children.length > 3
) {
newChildren.push(currentParagraph);
currentParagraph = {
...paragraph,
children: [],
};
}
});
if (currentParagraph.children.length > 0) {
newChildren.push(currentParagraph);
}
}
};
doc.children.forEach((paragraph) => {
let minPauseBetweenSentences = initial; // this gets reduced with every additional token
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why does it get reduces with every additional token?

let currentParagraph = {
...paragraph,
children: [] as { text: string }[],
};
paragraph.children.forEach((token, i) => {
currentParagraph.children.push(JSON.parse(JSON.stringify(token)));
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why the JSON dance?

minPauseBetweenSentences *= decay;
if (
getPause(i, paragraph) >= minPauseBetweenSentences &&
containsSentenceEnd(token.text)
) {
addNewChild(currentParagraph);
minPauseBetweenSentences = initial;
currentParagraph = {
...paragraph,
children: [],
};
}
});
if (currentParagraph.children.length > 0) {
addNewChild(currentParagraph);
}
});
doc.children = newChildren;
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think doing it this way totally fucks up collaborative editing...

});
}}
>
Smart Reflow ✨
</MenuItemButton>
</Popup>
);
}
2 changes: 2 additions & 0 deletions frontend/src/pages/document.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ import { Helmet } from 'react-helmet';
import { ShareModal } from '../editor/share';
import { getDocumentWsUrl, useAuthData } from '../utils/auth';
import { ExportModal } from '../editor/export';
import { TextTools } from '../editor/text_tools';

const LazyDebugPanel = lazy(() =>
import('../editor/debug_panel').then((module) => ({ default: module.DebugPanel })),
Expand Down Expand Up @@ -163,6 +164,7 @@ export function DocumentPage({
)}
</TopBarPart>
<TopBarPart>
{editor && <TextTools editor={editor} />}
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This should be gated on data?.can_write, no?

{data?.has_full_access && (
<IconButton
icon={TbShare3}
Expand Down