From e392d523a7df4c22575b412b4ec0c632fe4ef3a4 Mon Sep 17 00:00:00 2001 From: Gianni Ceccarelli Date: Tue, 28 Nov 2023 09:45:51 +0000 Subject: [PATCH] prepare to import more notes `recreateChain` converts a list of notes into a forest of notes, using notes that are not replies as roots, and replies as child nodes, recursively. Previously, notes that are replies to notes not included in the export, and their children, were never put in the forest, and therefore wheren't imported. This can be fine when importing from Twitter, since we can't really link a note to a tweet. And, for the moment, it's acceptable when importing from *key, because the export doesn't contain the instance URL, so we can't resolve ids to remote notes. It's less fine when importing from Mastodon / Pleroma / Akkoma, because in those cases we _can_ link to the remote note that the user was replying to. This commit makes `recreateChain` optionally return "orphaned" note trees, so in the (near) future we can use it to properly thread imported notes from those services. --- .../processors/ImportNotesProcessorService.ts | 26 ++++++++++++------- 1 file changed, 17 insertions(+), 9 deletions(-) diff --git a/packages/backend/src/queue/processors/ImportNotesProcessorService.ts b/packages/backend/src/queue/processors/ImportNotesProcessorService.ts index cbe1d41e3..5b167e46b 100644 --- a/packages/backend/src/queue/processors/ImportNotesProcessorService.ts +++ b/packages/backend/src/queue/processors/ImportNotesProcessorService.ts @@ -74,7 +74,7 @@ export class ImportNotesProcessorService { // Function was taken from Firefish and modified for our needs @bindThis - private async recreateChain(idField: string, replyField: string, arr: any[]): Promise { + private async recreateChain(idField: string, replyField: string, arr: any[], includeOrphans: boolean): Promise { type NotesMap = { [id: string]: any; }; @@ -83,28 +83,36 @@ export class ImportNotesProcessorService { const notesWaitingForParent: NotesMap = {}; for await (const note of arr) { - noteById[note[idField]] = note; + const noteId = note[idField]; + + noteById[noteId] = note; note.childNotes = []; - const children = notesWaitingForParent[note[idField]]; + const children = notesWaitingForParent[noteId]; if (children) { note.childNotes.push(...children); + delete notesWaitingForParent[noteId]; } - if (note[replyField] == null) { + const noteReplyId = note[replyField]; + if (noteReplyId == null) { notesTree.push(note); continue; } - const parent = noteById[note[replyField]]; + const parent = noteById[noteReplyId]; if (parent) { parent.childNotes.push(note); } else { - notesWaitingForParent[note[replyField]] ||= []; - notesWaitingForParent[note[replyField]].push(note); + notesWaitingForParent[noteReplyId] ||= []; + notesWaitingForParent[noteReplyId].push(note); } } + if (includeOrphans) { + notesTree.push(...Object.values(notesWaitingForParent).flat(1)); + } + return notesTree; } @@ -176,7 +184,7 @@ export class ImportNotesProcessorService { const tweets = Object.keys(fakeWindow.window.YTD.tweets.part0).reduce((m, key, i, obj) => { return m.concat(fakeWindow.window.YTD.tweets.part0[key].tweet); }, []); - const processedTweets = await this.recreateChain('id_str', 'in_reply_to_status_id_str', tweets); + const processedTweets = await this.recreateChain('id_str', 'in_reply_to_status_id_str', tweets, false); this.queueService.createImportTweetsToDbJob(job.data.user, processedTweets, null); } finally { cleanup(); @@ -289,7 +297,7 @@ export class ImportNotesProcessorService { const notesJson = fs.readFileSync(path, 'utf-8'); const notes = JSON.parse(notesJson); - const processedNotes = await this.recreateChain('id', 'replyId', notes); + const processedNotes = await this.recreateChain('id', 'replyId', notes, false); this.queueService.createImportKeyNotesToDbJob(job.data.user, processedNotes, null); cleanup(); }