mirror of
https://github.com/vrtmrz/obsidian-livesync.git
synced 2026-05-20 22:31:44 +00:00
198 lines
8.9 KiB
TypeScript
198 lines
8.9 KiB
TypeScript
import { glob } from "glob";
|
|
import { resolve } from "node:path";
|
|
import { fileURLToPath } from "node:url";
|
|
import { promises as fs } from "node:fs";
|
|
import { isPlainText, shouldSplitAsPlainText } from "../../src/lib/src/string_and_binary/path";
|
|
import { splitPiecesRabinKarp } from "../../src/lib/src/string_and_binary/chunks";
|
|
import {
|
|
PREFERRED_BASE,
|
|
PREFERRED_JOURNAL_SYNC,
|
|
PREFERRED_SETTING_CLOUDANT,
|
|
PREFERRED_SETTING_SELF_HOSTED,
|
|
} from "../../src/lib/src/common/models/setting.const.preferred";
|
|
import { type ObsidianLiveSyncSettings, DEFAULT_SETTINGS, MAX_DOC_SIZE_BIN } from "../../src/lib/src/common/types";
|
|
|
|
async function blobFromString(content: string): Promise<Blob> {
|
|
return new Blob([content], { type: "text/plain" });
|
|
}
|
|
|
|
const preferred = PREFERRED_BASE;
|
|
const preferredJournal = PREFERRED_JOURNAL_SYNC;
|
|
const preferredCouchDB = PREFERRED_SETTING_SELF_HOSTED;
|
|
const preferredIBM = PREFERRED_SETTING_CLOUDANT;
|
|
|
|
function computeChunkSize(overlay: Partial<ObsidianLiveSyncSettings>) {
|
|
const settings = { ...DEFAULT_SETTINGS, ...overlay };
|
|
const maxChunkSize = Math.floor(MAX_DOC_SIZE_BIN * ((settings.customChunkSize || 0) * 1 + 1));
|
|
const pieceSize = maxChunkSize;
|
|
|
|
const minimumChunkSize = settings.minimumChunkSize;
|
|
return { pieceSize, minimumChunkSize };
|
|
}
|
|
|
|
async function testSplit(
|
|
splitPiecesRabinKarpFn: typeof splitPiecesRabinKarp,
|
|
content: Blob,
|
|
settingsOverlay: Partial<ObsidianLiveSyncSettings>
|
|
) {
|
|
const { pieceSize, minimumChunkSize } = computeChunkSize(settingsOverlay);
|
|
const isPlain = content.type === "text/plain";
|
|
const chunkGenerator = await splitPiecesRabinKarpFn(content, pieceSize, isPlain, minimumChunkSize);
|
|
const chunks = [] as string[];
|
|
for await (const chunk of chunkGenerator()) {
|
|
chunks.push(chunk);
|
|
}
|
|
// if there are few chunks, calculate average chunk size except the last chunk which can be smaller due to the way the algorithm works, especially for small files.
|
|
const averageChunkSize =
|
|
chunks.length > 1
|
|
? chunks.slice(0, -1).reduce((acc, chunk) => acc + chunk.length, 0) / (chunks.length - 1)
|
|
: chunks.reduce((acc, chunk) => acc + chunk.length, 0) / chunks.length;
|
|
const lastChunk = chunks[chunks.length - 1];
|
|
// compute minimum chunk size if the last chunk is not the smallest.
|
|
const nonLastChunkSizes = chunks.slice(0, -1).map((c) => c.length);
|
|
const minChunkSize = nonLastChunkSizes.length > 0 ? Math.min(...nonLastChunkSizes) : lastChunk.length;
|
|
const result = {
|
|
isPlain,
|
|
originalSize: content.size,
|
|
chunkCount: chunks.length,
|
|
totalLength: chunks.reduce((acc, chunk) => acc + chunk.length, 0),
|
|
averageChunkSize: averageChunkSize,
|
|
maxChunkSize: Math.max(...chunks.map((c) => c.length)),
|
|
minChunkSize: minChunkSize,
|
|
uniqueChunks: new Set(chunks).size,
|
|
chunks: chunks,
|
|
};
|
|
return result;
|
|
}
|
|
const __filename = fileURLToPath(import.meta.url);
|
|
const __dirname = resolve(__filename, "..");
|
|
async function loadFileAsBlob(filePath: string): Promise<Blob> {
|
|
if (shouldSplitAsPlainText(filePath)) {
|
|
const content = await fs.readFile(filePath, "utf-8");
|
|
return blobFromString(content);
|
|
} else {
|
|
const buffer = await fs.readFile(filePath);
|
|
return new Blob([buffer]);
|
|
}
|
|
}
|
|
const testProfiles = [
|
|
{ name: "CouchDB", settings: preferredCouchDB },
|
|
{ name: "IBM Cloudant", settings: preferredIBM },
|
|
{ name: "Journal Sync", settings: preferredJournal },
|
|
// { name: "Base", settings: preferred },
|
|
];
|
|
function modifyBlob(blob: Blob, position: number, insertText: string): Blob {
|
|
const before = blob.slice(0, position);
|
|
const after = blob.slice(position);
|
|
const insert = new Blob([insertText], { type: blob.type });
|
|
return new Blob([before, insert, after], { type: blob.type });
|
|
}
|
|
async function main() {
|
|
const results = [] as string[][];
|
|
console.log("directory:", __dirname);
|
|
const findPath = resolve(__dirname, "../../");
|
|
console.warn("CWD:", findPath);
|
|
let testFiles = await glob("**/*.*", {
|
|
cwd: findPath,
|
|
maxDepth: 20,
|
|
ignore: ["**/node_modules/**", "**/.obsidian/**", "**/dist/**", "**/build/**", "**/out/**"],
|
|
});
|
|
testFiles = testFiles.filter((file) => {
|
|
const ext = file.split(".").pop()?.toLowerCase() || "";
|
|
return ["md", "txt", "json", "csv", "png"].includes(ext);
|
|
});
|
|
const header = [
|
|
"Profile",
|
|
"Implementation",
|
|
"Edition",
|
|
"File",
|
|
"Mode",
|
|
"Original Size (bytes)",
|
|
"Chunk Count",
|
|
"Average Chunk Size",
|
|
"Max Chunk Size",
|
|
"Min Chunk Size",
|
|
"Unique Chunks",
|
|
"Shared Chunks",
|
|
"Savings",
|
|
"Newly added (count)",
|
|
"Newly consumed (bytes)",
|
|
];
|
|
for (const profile of testProfiles) {
|
|
console.log(`Testing profile: ${profile.name}`);
|
|
for (const fn of [splitPiecesRabinKarp]) {
|
|
const funcProfile = fn !== splitPiecesRabinKarp ? "Old" : "New";
|
|
console.log(`Testing function: ${funcProfile}`);
|
|
for (const file of testFiles) {
|
|
const filePath = resolve(findPath, file);
|
|
const isPlain = shouldSplitAsPlainText(filePath);
|
|
const content = await loadFileAsBlob(filePath);
|
|
console.log(`Testing file: ${file} (size: ${content.size} bytes)`);
|
|
const result = await testSplit(fn, content, profile.settings);
|
|
const chunkSizes = result.chunks.map((c) => c.length);
|
|
const savings = result.originalSize - chunkSizes.reduce((acc, size) => acc + size, 0);
|
|
// console.log(`Result for ${file}:`, result);
|
|
results.push([
|
|
`${profile.name}`,
|
|
funcProfile,
|
|
"original",
|
|
file,
|
|
isPlain ? "plain" : "binary",
|
|
content.size.toString(),
|
|
result.chunkCount.toString(),
|
|
result.averageChunkSize.toFixed(2),
|
|
result.maxChunkSize.toString(),
|
|
result.minChunkSize.toString(),
|
|
result.uniqueChunks.toString(),
|
|
"",
|
|
savings.toString(),
|
|
"",
|
|
"",
|
|
]);
|
|
// add editions (inserting "*") to content on head, 5%, middle, 95%, tail to see if it affects the chunking
|
|
const editions = [
|
|
{ name: "head", content: modifyBlob(content, 0, "*") },
|
|
{ name: "5%", content: modifyBlob(content, Math.floor(content.size * 0.05), "*") },
|
|
{ name: "middle", content: modifyBlob(content, Math.floor(content.size * 0.5), "*") },
|
|
{ name: "95%", content: modifyBlob(content, Math.floor(content.size * 0.95), "*") },
|
|
{ name: "tail", content: modifyBlob(content, content.size, "*") },
|
|
];
|
|
const baseChunks = result.chunks;
|
|
for (const edition of editions) {
|
|
console.log(`Testing edition: ${edition.name}`);
|
|
const editionResult = await testSplit(fn, edition.content, profile.settings);
|
|
const sharedChunks = editionResult.chunks.filter((chunk) => baseChunks.includes(chunk)).length;
|
|
const newChunks = editionResult.chunks.filter((chunk) => !baseChunks.includes(chunk));
|
|
const editionResultChunkLength = editionResult.chunks.map((c) => c.length);
|
|
// console.log(`Result for edition ${edition.name} of ${file}:`, editionResult);
|
|
const editionSavings =
|
|
editionResult.originalSize - editionResultChunkLength.reduce((acc, size) => acc + size, 0);
|
|
// newly added chunks size :
|
|
const newChunksSize = newChunks.reduce((acc, chunk) => acc + chunk.length, 0);
|
|
results.push([
|
|
`${profile.name}`,
|
|
funcProfile,
|
|
`${edition.name}`,
|
|
file,
|
|
isPlain ? "plain" : "binary",
|
|
edition.content.size.toString(),
|
|
editionResult.chunkCount.toString(),
|
|
editionResult.averageChunkSize.toFixed(2),
|
|
editionResult.maxChunkSize.toString(),
|
|
editionResult.minChunkSize.toString(),
|
|
editionResult.uniqueChunks.toString(),
|
|
sharedChunks.toString(),
|
|
editionSavings.toString(),
|
|
newChunks.length.toString(),
|
|
newChunksSize.toString(),
|
|
]);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
results.unshift(header);
|
|
await fs.writeFile(resolve(__dirname, "splitResults.csv"), results.map((r) => r.join(",")).join("\n"));
|
|
}
|
|
main();
|