diff --git a/frontend/src/editor/automerge_websocket_editor.ts b/frontend/src/editor/automerge_websocket_editor.ts index f06b66c8..6a963da9 100644 --- a/frontend/src/editor/automerge_websocket_editor.ts +++ b/frontend/src/editor/automerge_websocket_editor.ts @@ -16,14 +16,18 @@ enum MessageSyncType { FullDoc = 3, } +export type EditorWithWebsocket = Editor & { + update: (changeFn: (doc: Document) => void) => void; +}; + export function useAutomergeWebsocketEditor( url: string, { onInitialSyncComplete }: { onInitialSyncComplete: (editor?: Editor) => void }, -): [Editor?, Paragraph[]?] { +): [EditorWithWebsocket?, Paragraph[]?] { const debug = useDebugMode(); const sentChanges = useRef>(new Set()); const [editorAndInitialValue, setEditorAndInitialValue] = useState(null); const editorRef = useRef(); @@ -54,7 +58,9 @@ export function useAutomergeWebsocketEditor( const createNewEditor = (doc: Automerge.Doc) => { const baseEditor = createEditor(); const editorWithReact = withReact(baseEditor); - const editor = withHistory(withAutomergeDoc(editorWithReact, Automerge.init())); + const editor = withHistory( + withAutomergeDoc(editorWithReact, Automerge.init()), + ) as EditorWithWebsocket; editor.addDocChangeListener(sendDocChange); const migratedDoc = migrateDocument(doc as Automerge.Doc); @@ -68,6 +74,18 @@ export function useAutomergeWebsocketEditor( migratedDoc.children !== undefined ? JSON.parse(JSON.stringify(migratedDoc.children)) : []; + + editor.update = (changeFn: (doc: Document) => void) => { + console.time('changeFn'); + const changed = Automerge.change(editor.doc, changeFn); + console.timeEnd('changeFn'); + console.time('setDoc'); + editor.setDoc(changed); + console.timeEnd('setDoc'); + console.time('sendDocChange'); + sendDocChange(changed); + console.timeEnd('sendDocChange'); + }; return { editor: editor, initialValue: initialValue }; }); }; diff --git a/frontend/src/editor/text_tools.tsx b/frontend/src/editor/text_tools.tsx new file mode 100644 index 00000000..8ec83a91 --- /dev/null +++ b/frontend/src/editor/text_tools.tsx @@ -0,0 +1,182 @@ +import { TbHammer } from 'react-icons/tb'; +import { IconButton } from '../components/button'; +import { EditorWithWebsocket } from './automerge_websocket_editor'; +import { Document, Paragraph } from '../editor/types'; +import { Popup } from '../components/popup'; +import { primitiveWithClassname } from '../styled'; + +export const MenuItemButton = primitiveWithClassname('button', [ + 'hover:bg-gray-200 dark:hover:bg-neutral-700', + 'rounded-md', + 'w-full', + 'text-left', + 'px-2', + 'py-1', + 'block', +]); + +function mergeSameSpeakerParagraphs(doc: Document) { + const mergePoints: number[] = []; + for (let i = 0; i < doc.children.length - 1; i++) { + const paragraph = doc.children[i]; + const nextParagraph = doc.children[i + 1]; + if (paragraph.speaker == nextParagraph.speaker) { + mergePoints.push(i); + } + } + let removed = 0; + mergePoints.forEach((index) => { + const i = index - removed; + doc.children[i].children.push(...JSON.parse(JSON.stringify(doc.children[i + 1].children))); + doc.children.splice(i + 1, 1); + removed++; + }); +} + +const punctuations = ['.', '?', '!']; +const non_punctuations = ['...']; +function containsSentenceEnd(text: string) { + return ( + punctuations.some((punct) => text.includes(punct)) && + !non_punctuations.some((np) => text.includes(np)) + ); +} + +export function TextTools({ editor }: { editor: EditorWithWebsocket }) { + return ( + } + onClick={(e) => { + e.preventDefault(); + }} + > + { + editor.update(mergeSameSpeakerParagraphs); + }} + > + Reflow to One Paragraph per Speaker + + + { + editor.update((doc: Document) => { + // stategy: we first merge everything that could possibly be merged... + mergeSameSpeakerParagraphs(doc); + + // ...and only then break up on sentence boundaries + const newChildren: Paragraph[] = []; + doc.children.forEach((paragraph) => { + let currentParagraph = { + ...paragraph, + children: [] as { text: string }[], + }; + paragraph.children.forEach((token) => { + currentParagraph.children.push(JSON.parse(JSON.stringify(token))); + if (containsSentenceEnd(token.text)) { + newChildren.push(currentParagraph); + currentParagraph = { + ...paragraph, + children: [], + }; + } + }); + if (currentParagraph.children.length > 0) { + newChildren.push(currentParagraph); + } + }); + doc.children = newChildren; + }); + }} + > + Reflow to One Paragraph per Sentence + + + { + // this strategy tries to split paragraphs at sentence boundaries, but only if there is a pause between the sentences + // or the paragraphs would become too long. + const initial = 2; + const decay = 0.95; + + const getPause = (i: number, paragraph: Paragraph) => { + const token = paragraph.children[i]; + const nextToken = paragraph.children[i + 1]; + if (nextToken?.start !== undefined && token?.end !== undefined) { + return nextToken.start - token.end; + } + return 0; + }; + + editor.update((doc: Document) => { + mergeSameSpeakerParagraphs(doc); + const newChildren: Paragraph[] = []; + const addNewChild = (paragraph: Paragraph) => { + // if the paragraph is very long and does not contain any sentence ends, we still want to break it up + if (paragraph.children.length <= 100) { + newChildren.push(paragraph); + } else { + const silences = paragraph.children + .map((x, i) => ({ ...x, pause: getPause(i, paragraph) })) + .filter((token) => token.text.includes(',')) + .map((token) => token.pause); + silences.sort(); + const thresholdIndex = Math.floor(paragraph.children.length / 100); // aim for paragraphs of max ~50 tokens + const threshold = silences[silences.length - 1 - thresholdIndex]; + let currentParagraph = { + ...paragraph, + children: [] as { text: string }[], + }; + paragraph.children.forEach((token, i) => { + currentParagraph.children.push(JSON.parse(JSON.stringify(token))); + if ( + getPause(i, paragraph) >= threshold && + token.text.includes(',') && + currentParagraph.children.length > 3 + ) { + newChildren.push(currentParagraph); + currentParagraph = { + ...paragraph, + children: [], + }; + } + }); + if (currentParagraph.children.length > 0) { + newChildren.push(currentParagraph); + } + } + }; + doc.children.forEach((paragraph) => { + let minPauseBetweenSentences = initial; // this gets reduced with every additional token + let currentParagraph = { + ...paragraph, + children: [] as { text: string }[], + }; + paragraph.children.forEach((token, i) => { + currentParagraph.children.push(JSON.parse(JSON.stringify(token))); + minPauseBetweenSentences *= decay; + if ( + getPause(i, paragraph) >= minPauseBetweenSentences && + containsSentenceEnd(token.text) + ) { + addNewChild(currentParagraph); + minPauseBetweenSentences = initial; + currentParagraph = { + ...paragraph, + children: [], + }; + } + }); + if (currentParagraph.children.length > 0) { + addNewChild(currentParagraph); + } + }); + doc.children = newChildren; + }); + }} + > + Smart Reflow ✨ + + + ); +} diff --git a/frontend/src/pages/document.tsx b/frontend/src/pages/document.tsx index 6252f207..0d8e8119 100644 --- a/frontend/src/pages/document.tsx +++ b/frontend/src/pages/document.tsx @@ -18,6 +18,7 @@ import { Helmet } from 'react-helmet'; import { ShareModal } from '../editor/share'; import { getDocumentWsUrl, useAuthData } from '../utils/auth'; import { ExportModal } from '../editor/export'; +import { TextTools } from '../editor/text_tools'; const LazyDebugPanel = lazy(() => import('../editor/debug_panel').then((module) => ({ default: module.DebugPanel })), @@ -163,6 +164,7 @@ export function DocumentPage({ )} + {editor && } {data?.has_full_access && (