Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
164 changes: 66 additions & 98 deletions shared-libs/infodoc/src/infodoc.js
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,7 @@ const findInfoDocs = (database, ids) => {
};

//
// Given a set of changes, find all the infoDocs or create them as necessary. Also takes care of
// migrating legacy infodocs from the medic db, and legacy transition information from records.
// Given a set of changes, find all the infoDocs or create them as necessary.
//
// @param {Array} changes an array of PouchDB changes objects, each containing at least {id, doc}
// @return {Array} array of infodocs. NB: will not necessarily be in the same order as the
Expand All @@ -35,9 +34,7 @@ const resolveInfoDocs = (changes, writeDirtyInfoDocs) => {
return results.reduce((acc, row) => {
if (!row.doc) {
acc.missing.push({ _id: row.key });
} else if (!row.doc.transitions) {
// No transitions may mean that API created this infodoc on write but sentinel hasn't seen
// it yet. It's possible that there is a legacy infodoc with transition information.
} else if (!row.doc.transitions && !row.doc.transitions_started) {
acc.missingTransitions.push(row.doc);
} else {
acc.valid.push(row.doc);
Expand All @@ -48,95 +45,35 @@ const resolveInfoDocs = (changes, writeDirtyInfoDocs) => {
}, { valid: [], missing: [], missingTransitions: [] });
};

const changeForInfoDoc = infoDocId => changes.find(change => getInfoDocId(change.id) === infoDocId);

const infoDocIds = changes.map(change => getInfoDocId(change.id));

// First attempt, directly from sentinel where they should live
return findInfoDocs(db.sentinel, infoDocIds)
.then(results => {
const { valid, missing, missingTransitions: missingTransitionsSentinel } = splitInfoDocRows(results);

const lookForInMedic = missing.concat(missingTransitionsSentinel).map(r => r._id);

if (!lookForInMedic.length) {
return valid;
}

// the infodocs missing transitions are still valid, we just need to look for their transitions!
const infoDocs = valid.concat(missingTransitionsSentinel);

// Missing infodocs or missing transitions may be either
return findInfoDocs(db.medic, lookForInMedic)
.then(results => {
const migratedInfoDocs = [];
const { valid, missing, missingTransitions: missingTransitionsMedic } = splitInfoDocRows(results);

// Back when infodocs were in the medic db, transitions were still stored against the
// actual document. We'll deal with this below
valid.push(...missingTransitionsMedic);

// Convert valid MedicDB infodocs into Sentinel ones
valid.forEach(medicInfoDoc => {
const sentinelInfoDoc = missingTransitionsSentinel.find(d => d._id === medicInfoDoc._id);

const change = changes.find(change => change.id === medicInfoDoc.doc_id);

if (sentinelInfoDoc) {
// Merge information from the medic infodoc into sentinel's
Object.keys(medicInfoDoc).forEach(k => {
if (sentinelInfoDoc[k] === undefined) {
sentinelInfoDoc[k] = medicInfoDoc[k];
}
});

// Explicitly take the older (and so more correct) initial_replication_date. These would
// be different if a new write occurred on an old infodoc-unmigrated document, as api
// creates a new sentinel infodoc with an initial and latest replication date
sentinelInfoDoc.initial_replication_date = medicInfoDoc.initial_replication_date;

// Source transitions from the document if they don't exist
sentinelInfoDoc.transitions = sentinelInfoDoc.transitions || (change.doc && change.doc.transitions);

migratedInfoDocs.push(sentinelInfoDoc);
} else {
const infoDoc = Object.assign({}, medicInfoDoc);
delete infoDoc._rev;
infoDoc.transitions = change.doc && change.doc.transitions;
infoDocs.push(infoDoc);
migratedInfoDocs.push(infoDoc);
}

medicInfoDoc._deleted = true;
});

// Intentionally not waiting on the promise for performance
if (valid.length) {
db.medic.bulkDocs(valid);
}

// Infodocs that aren't in the Medic DB. This could mean there isn't one at all, or it
// could be that there was one without transition data back in sentinel
missing.forEach(missingDoc => {
const docId = getDocId(missingDoc._id);
const { valid, missing, missingTransitions } = splitInfoDocRows(results);

const collectedInfoDoc = infoDocs.find(i => i._id === missingDoc._id);
const change = changes.find(change => change.id === docId);
const infoDoc = collectedInfoDoc || blankInfoDoc(docId, !change.doc._rev && Date.now());
const infoDocs = valid.concat(missingTransitions);
const dirtyInfoDocs = [];

infoDoc.transitions = change.doc && change.doc.transitions;

if (!collectedInfoDoc) {
infoDocs.push(infoDoc);
}
missingTransitions.forEach(infoDoc => {
const change = changeForInfoDoc(infoDoc._id);
infoDoc.transitions = infoDoc.transitions || change.doc?.transitions;

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The history of the transitions changes was:

  1. 2.x - transitions were saved on the document itself (doc.transitions)
  2. 3.x - migrated from storing transitions on document to storing transitions on infodoc, and stored in medic database
  3. 3.x - infodoc transition from medic database to medic-sentinel database

So here, where you're deleting the migration of infodoc from medic to medic-sentinel but keeping the possibility of having transitions saved on the doc itself is like removing the middle migration and keeping the first one. If we assume that we are past infodocs in medic database, that we can absolutely assume that transitions are no longer properties of medic documents.

We either delete all or none.

dirtyInfoDocs.push(infoDoc);
});

migratedInfoDocs.push(infoDoc);
});
missing.forEach(missingDoc => {
const change = changeForInfoDoc(missingDoc._id);
const infoDoc = blankInfoDoc(change.id, !change.doc._rev && Date.now());
infoDoc.transitions = change.doc && change.doc.transitions;
infoDocs.push(infoDoc);
dirtyInfoDocs.push(infoDoc);
});

// Store any infoDocs that have been migrated.
if (writeDirtyInfoDocs && migratedInfoDocs.length) {
return bulkUpdate(migratedInfoDocs);
}
return infoDocs;
});
if (writeDirtyInfoDocs && dirtyInfoDocs.length) {
return bulkUpdate(dirtyInfoDocs).then(() => infoDocs);
}
return infoDocs;
});
};

Expand All @@ -151,33 +88,62 @@ const updateTransition = (change, transition, ok) => {
};
};

const saveTransitions = change => {
return saveProperty(change.id, change.info, 'transitions', {});
const saveTransitions = (change, clearStarted = false) => {
const modify = infoDoc => {
infoDoc.transitions = change.info?.transitions || {};
// Clear the in-progress marker in the same write that commits the transitions (API branch only)
if (clearStarted) {

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If nobody triggers this (so somehow API messes up and crashes or some other thing) and the transitions_started remains on the document, this will mean that it will permanently be skipped by Sentinel.

Can we add a fallback where sentinel checks the time when transitions_started was set, and if it's over some prescribed interval, it will delete it itself and resume transition processing for this doc.

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It feels a bit redundant that we have a clearStarted parameter but also a clearTransitionsStarted function, both deleting the same property. is it possible for this function to call clearTransitionsStarted instead of this annex to the modify function?

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I saw that just inlining the next call generates an extra round trip to the database. I asked claude about it and:

The cleaner option — drop the param without reusing clearTransitionsStarted and without an extra write: have saveTransitions always delete the marker.

  This is safe at all three callers: the two non-API callers have no marker, so delete is a harmless no-op; the API caller gets exactly today's behavior. It's strictly better than both alternatives — one write, no param, and it enforces a clean invariant ("transitions committed ⇒ no in-progress marker"). It even opportunistically clears a stale
  marker if one ever reaches this path, which slightly mitigates the crash-poisoning gap from finding #2.

  The one tradeoff: saveTransitions now always touches transitions_started, so it's coupled to the marker concept even on the sentinel path. That coupling is mild and arguably correct — but if witash wants saveTransitions to stay marker-agnostic, then keeping the explicit clearStarted param is the more honest expression of intent, and "redundant"
  isn't quite the right framing since the param is what folds two mutations into a single write. Worth raising as a suggestion either way; I'd lead with the unconditional-delete version.

Maybe worth considering.

delete infoDoc.transitions_started;
}
};
return modifyInfoDoc(change.id, modify, change.info);
};

const saveCompletedTasks = (id, infodoc, completedTasks = []) => {
return saveProperty(id, infodoc, 'completed_tasks', completedTasks);
Comment thread
witash marked this conversation as resolved.
return modifyInfoDoc(id, infoDoc => {
infoDoc.completed_tasks = infodoc?.completed_tasks || completedTasks;
}, infodoc);
};

const markTransitionsStarted = (id) => {
const modify = infoDoc => {
infoDoc.transitions_started = new Date().toISOString();
};
return modifyInfoDoc(id, modify, blankInfoDoc(id));
};

const saveProperty = async (id, infodoc, property, defaultValue = {}) => {
let updatedInfoDoc;
const clearTransitionsStarted = (id) => {
const modify = infoDoc => {
delete infoDoc.transitions_started;
};
return modifyInfoDoc(id, modify, blankInfoDoc(id));
};

// Fetch the infodoc. If it is missing, return `fallback` (to be created) when provided;
const fetchInfoDoc = async (id, fallback) => {
try {
updatedInfoDoc = await db.sentinel.get(getInfoDocId(id));
updatedInfoDoc[property] = (infodoc && infodoc[property]) || defaultValue;
return await db.sentinel.get(getInfoDocId(id));
} catch (err) {
if (err.status !== 404) {
throw err;
if (err.status === 404 && fallback) {
return fallback;
}
updatedInfoDoc = infodoc;
throw err;
}
};

// Fetch the infodoc, apply `modify`, and save, retrying on conflict
const modifyInfoDoc = async (id, modify, fallback) => {
const infoDoc = await fetchInfoDoc(id, fallback);

modify(infoDoc);

try {
return await db.sentinel.put(updatedInfoDoc);
return await db.sentinel.put(infoDoc);
} catch (err) {
if (err.status !== 409) {
throw err;
}
return saveProperty(id, infodoc, property, defaultValue);
return modifyInfoDoc(id, modify, fallback);
}
};

Expand Down Expand Up @@ -283,6 +249,8 @@ module.exports = {
bulkUpdate: bulkUpdate,
saveTransitions: saveTransitions,
saveCompletedTasks: saveCompletedTasks,
markTransitionsStarted: markTransitionsStarted,
clearTransitionsStarted: clearTransitionsStarted,

// Used to update infodoc metadata that occurs at write time. A delete does not count as a write
// in this instance, as deletes resolve as infodoc cleanups once sentinel's background-cleanup
Expand Down
Loading