feat: Version 1.1.4

This commit is contained in:
2025-02-21 10:16:02 +07:00
parent cf2b14b37a
commit 4fa634f813
2 changed files with 94 additions and 80 deletions

View File

@@ -1,3 +1,3 @@
{ {
"latest": "1.1.3" "latest": "1.1.4"
} }

View File

@@ -1,7 +1,7 @@
// ==UserScript== // ==UserScript==
// @name Crowdin Localization Tools // @name Crowdin Localization Tools
// @namespace https://yuzu.kirameki.cafe/ // @namespace https://yuzu.kirameki.cafe/
// @version 1.1.3 // @version 1.1.4
// @description A tool for translating Crowdin projects using a CSV file // @description A tool for translating Crowdin projects using a CSV file
// @author Yuzu (YuzuZensai) // @author Yuzu (YuzuZensai)
// @match https://crowdin.com/editor/* // @match https://crowdin.com/editor/*
@@ -45,7 +45,7 @@ const CONFIG = {
fuzzyThreshold: 0.7, fuzzyThreshold: 0.7,
metadata: { metadata: {
version: "1.1.3", version: "1.1.4",
repository: "https://github.com/YuzuZensai/Crowdin-Localization-Tools", repository: "https://github.com/YuzuZensai/Crowdin-Localization-Tools",
authorGithub: "https://github.com/YuzuZensai", authorGithub: "https://github.com/YuzuZensai",
}, },
@@ -863,9 +863,14 @@ function TranslatorTool() {
function setupEventListeners() { function setupEventListeners() {
log("info", "Setting up event listeners"); log("info", "Setting up event listeners");
searchInput.addEventListener("input", function () { // Debounce the search with 300ms delay
log("info", "Search input detected"); const debouncedSearch = debounce(() => {
searchTranslations(); searchTranslations();
}, 300);
searchInput.addEventListener("input", function () {
log("info", "Search input detected - debounced");
debouncedSearch();
}); });
} }
@@ -1280,20 +1285,30 @@ function TranslatorTool() {
} }
} }
function findMatches(text) { // Debounce function
if (!text || !translationData.length) return; function debounce(func, wait) {
let timeout;
return function executedFunction(...args) {
const later = () => {
clearTimeout(timeout);
func(...args);
};
clearTimeout(timeout);
timeout = setTimeout(later, wait);
};
}
log("debug", "Finding matches for text:", { // Cache for word combinations
text: text, const combinationsCache = new Map();
wordCount: text.split(/\s+/).filter((w) => w.length > 0).length, const similarityCache = new Map();
});
var words = text.split(/\s+/).filter((word) => word.length > 0); function getCachedCombinations(text) {
var matches = []; if (combinationsCache.has(text)) {
var seenCombinations = new Set(); return combinationsCache.get(text);
}
// Generate word combinations (phrases of decreasing length) const words = text.split(/\s+/).filter((word) => word.length > 0);
var combinations = []; const combinations = [];
// Add full phrase first // Add full phrase first
const fullPhrase = words.join(" "); const fullPhrase = words.join(" ");
@@ -1324,11 +1339,50 @@ function TranslatorTool() {
} }
}); });
combinationsCache.set(text, combinations);
return combinations;
}
function getCachedSimilarity(str1, str2) {
const key = `${str1}|${str2}`;
if (similarityCache.has(key)) {
return similarityCache.get(key);
}
const score = similarity(str1, str2);
similarityCache.set(key, score);
return score;
}
function findMatches(text) {
if (!text || !translationData.length) return;
log("debug", "Finding matches for text:", {
text: text,
wordCount: text.split(/\s+/).filter((w) => w.length > 0).length,
});
const matches = [];
const seenCombinations = new Set();
const combinations = getCachedCombinations(text);
log("debug", "Generated combinations:", combinations); log("debug", "Generated combinations:", combinations);
// Pre-calculate source combinations for each entry
const entryCombinations = new Map();
translationData.forEach((entry) => {
entryCombinations.set(entry, getCachedCombinations(entry.source));
});
combinations.forEach(function (combination) { combinations.forEach(function (combination) {
if (!combination) return; if (!combination) return;
const combinationLower = combination.toLowerCase();
// Early exit if we already have enough high-quality matches
if (matches.length > 20 && matches[19].score > 0.9) {
return;
}
translationData.forEach(function (entry) { translationData.forEach(function (entry) {
const uniqueKey = `${entry.source.toLowerCase()}_${ const uniqueKey = `${entry.source.toLowerCase()}_${
entry.category || "default" entry.category || "default"
@@ -1336,16 +1390,10 @@ function TranslatorTool() {
if (seenCombinations.has(uniqueKey)) return; if (seenCombinations.has(uniqueKey)) return;
const entryLower = entry.source.toLowerCase(); const entryLower = entry.source.toLowerCase();
const combinationLower = combination.toLowerCase();
// For exact matches (case-insensitive) // For exact matches (case-insensitive)
if (entryLower === combinationLower) { if (entryLower === combinationLower) {
seenCombinations.add(uniqueKey); seenCombinations.add(uniqueKey);
log("debug", "Exact match found:", {
source: entry.source,
combination: combination,
score: 1,
});
matches.push({ matches.push({
entry: entry, entry: entry,
score: 1, score: 1,
@@ -1354,53 +1402,27 @@ function TranslatorTool() {
return; return;
} }
// Split source into words and combinations
const sourceWords = entry.source
.split(/\s+/)
.filter((word) => word.length > 0);
// Only proceed if the source is significant // Only proceed if the source is significant
if (!isSignificantPhrase(entry.source)) { if (!isSignificantPhrase(entry.source)) {
return; return;
} }
// Generate source combinations similar to input combinations // Get cached source combinations
const sourceCombinations = []; const sourceCombinations = entryCombinations.get(entry);
sourceCombinations.push(sourceWords.join(" ")); // Full phrase
// 3-word combinations from source
for (let i = 0; i < sourceWords.length - 2; i++) {
const threeWordPhrase = sourceWords.slice(i, i + 3).join(" ");
if (isSignificantPhrase(threeWordPhrase)) {
sourceCombinations.push(threeWordPhrase);
}
}
// 2-word combinations from source
for (let i = 0; i < sourceWords.length - 1; i++) {
const twoWordPhrase = sourceWords.slice(i, i + 2).join(" ");
if (isSignificantPhrase(twoWordPhrase)) {
sourceCombinations.push(twoWordPhrase);
}
}
// Individual significant words from source
sourceWords.forEach((word) => {
if (isSignificantPhrase(word)) {
sourceCombinations.push(word);
}
});
// Find best matching combination // Find best matching combination
let bestScore = 0; let bestScore = 0;
let bestMatch = ""; let bestMatch = "";
let bestSourceCombo = ""; let bestSourceCombo = "";
sourceCombinations.forEach((sourceCombo) => { for (const sourceCombo of sourceCombinations) {
const score = similarity(sourceCombo.toLowerCase(), combinationLower); const score = getCachedSimilarity(
sourceCombo.toLowerCase(),
combinationLower
);
// Only consider high-quality matches // Early exit if score is too low
if (score < 0.8) return; if (score < 0.8) continue;
const sourceWordCount = sourceCombo.split(/\s+/).length; const sourceWordCount = sourceCombo.split(/\s+/).length;
const combinationWordCount = combination.split(/\s+/).length; const combinationWordCount = combination.split(/\s+/).length;
@@ -1409,11 +1431,11 @@ function TranslatorTool() {
// Heavy penalties for mismatches // Heavy penalties for mismatches
if (Math.abs(sourceWordCount - combinationWordCount) > 0) { if (Math.abs(sourceWordCount - combinationWordCount) > 0) {
adjustedScore *= 0.4; // 60% penalty for word count mismatch adjustedScore *= 0.4;
} }
if (combinationWordCount === 1 && sourceWords.length > 1) { if (combinationWordCount === 1 && sourceWordCount > 1) {
adjustedScore *= 0.3; // 70% penalty for single word matches adjustedScore *= 0.3;
} }
// Exact word boundary match bonus // Exact word boundary match bonus
@@ -1421,7 +1443,7 @@ function TranslatorTool() {
sourceCombo.toLowerCase() sourceCombo.toLowerCase()
); );
if (isExactMatch) { if (isExactMatch) {
adjustedScore *= 1.3; // 30% bonus for exact word boundary matches adjustedScore *= 1.3;
} }
if (adjustedScore > bestScore) { if (adjustedScore > bestScore) {
@@ -1429,28 +1451,17 @@ function TranslatorTool() {
bestMatch = combination; bestMatch = combination;
bestSourceCombo = sourceCombo; bestSourceCombo = sourceCombo;
} }
}); }
// Stricter thresholds // Stricter thresholds
let threshold = CONFIG.fuzzyThreshold * 1.2; // Base threshold increased by 20% let threshold = CONFIG.fuzzyThreshold * 1.2;
if (combination.split(/\s+/).length === 1) { if (combination.split(/\s+/).length === 1) {
threshold *= 1.4; // Even higher threshold for single words threshold *= 1.4;
} }
if (bestScore >= threshold && !seenCombinations.has(uniqueKey)) { if (bestScore >= threshold && !seenCombinations.has(uniqueKey)) {
seenCombinations.add(uniqueKey); seenCombinations.add(uniqueKey);
log("debug", "Fuzzy match found:", {
source: entry.source,
combination: combination,
matchedPart: bestSourceCombo,
originalScore: similarity(
bestSourceCombo.toLowerCase(),
combinationLower
),
adjustedScore: bestScore,
threshold: threshold,
});
matches.push({ matches.push({
entry: entry, entry: entry,
score: bestScore, score: bestScore,
@@ -1460,28 +1471,31 @@ function TranslatorTool() {
}); });
}); });
// Clear caches if they get too large
if (similarityCache.size > 10000) {
similarityCache.clear();
}
if (combinationsCache.size > 1000) {
combinationsCache.clear();
}
// Sort matches by score first, then by category // Sort matches by score first, then by category
matches.sort(function (a, b) { matches.sort(function (a, b) {
const aWordCount = a.matchedWord.split(/\s+/).length; const aWordCount = a.matchedWord.split(/\s+/).length;
const bWordCount = b.matchedWord.split(/\s+/).length; const bWordCount = b.matchedWord.split(/\s+/).length;
if (Math.abs(b.score - a.score) < 0.05) { if (Math.abs(b.score - a.score) < 0.05) {
// Reduced tolerance for "close" scores
// Prioritize multi-word matches
if (aWordCount !== bWordCount) { if (aWordCount !== bWordCount) {
return bWordCount - aWordCount; return bWordCount - aWordCount;
} }
// If word counts are equal, sort by category presence
if (!!a.entry.category !== !!b.entry.category) { if (!!a.entry.category !== !!b.entry.category) {
return a.entry.category ? -1 : 1; return a.entry.category ? -1 : 1;
} }
// If both have or don't have categories, sort by matched word length
return b.matchedWord.length - a.matchedWord.length; return b.matchedWord.length - a.matchedWord.length;
} }
return b.score - a.score; return b.score - a.score;
}); });
// Log final sorted matches
log( log(
"info", "info",
"Final matches:", "Final matches:",