", "source": "", "target": "", "corrected": "" } ], "errors": [ { "segment_id": , "category": "", "subcategory": "", "severity": "minor" | "major" | "critical", "source_span": "", "target_span": "", "description": "", "suggestion": "" } ], "verdict": "<2-4 sentences in English: overall quality, strengths, main risks>", "corrected_full_text": "" } RULES: 1. The JSON must be syntactically valid. Escape backslashes, quotes and newlines inside strings. 2. Be specific. Generic comments like "could be improved" are not acceptable — every error must point to a concrete span. 3. Do not invent errors. If the translation is correct, do not flag it. 4. Critical errors are rare. Reserve them for actual meaning-altering mistakes. 5. Segment EVERY sentence in the document. Do not skip, cluster, or sample. Every source sentence must appear as its own segment with its paired translation. For long documents this may mean 200+ segments — that is expected and required. 6. Output JSON only. No prose before or after.`; } function buildMqmUserPrompt(sourceText, targetText, glossary) { let out = ''; if (glossary && glossary.trim()) { out += `GLOSSARY / TRANSLATION MEMORY (preferred terms):\n${glossary.trim()}\n\n---\n\n`; } out += `SOURCE DOCUMENT:\n${sourceText}\n\n---\n\nTRANSLATION TO EVALUATE:\n${targetText}\n\n---\n\nReturn the MQM assessment as the single JSON object specified in the system prompt.`; return out; } // Extract JSON from a possibly chatty response. function extractJson(text) { const trimmed = text.trim(); if (trimmed.startsWith('{')) { try { return JSON.parse(trimmed); } catch (e) { /* fall through */ } } // Find first '{' and walk braces to balance const start = trimmed.indexOf('{'); if (start < 0) throw new Error('No JSON object found in response'); let depth = 0, inStr = false, esc = false, end = -1; for (let i = start; i < trimmed.length; i++) { const c = trimmed[i]; if (esc) { esc = false; continue; } if (c === '\\' && inStr) { esc = true; continue; } if (c === '"') { inStr = !inStr; continue; } if (inStr) continue; if (c === '{') depth++; else if (c === '}') { depth--; if (depth === 0) { end = i; break; } } } if (end < 0) throw new Error('Unterminated JSON in response'); return JSON.parse(trimmed.slice(start, end + 1)); } // Run the assessment. Returns the parsed MQM object plus computed metrics. async function runMqmAssessment({ sourceText, targetText, glossary, onLog }) { const log = (m) => { try { onLog && onLog(m); } catch (e) {} }; log('Building MQM prompt'); const system = buildMqmSystemPrompt(); const user = buildMqmUserPrompt(sourceText, targetText, glossary); log('Calling Claude (claude-opus-4-7)'); // In production this hits POST /api/assess on the proxy server. In the // artifact prototype we use window.claude.complete (Haiku 4.5 under the // hood) — same prompt, same JSON shape. let raw; if (window.__APE_USE_SERVER) { const res = await fetch('/api/assess', { method: 'POST', headers: { 'Content-Type': 'application/json' }, credentials: 'include', body: JSON.stringify({ system, user, model: 'claude-opus-4-7' }), }); if (!res.ok) { const body = await res.json().catch(() => ({})); throw new Error(body.error || 'Server error: ' + res.status); } const j = await res.json(); if (j.stop_reason === 'max_tokens') { log('Warning: response was truncated — results may be incomplete'); } raw = j.text; } else { raw = await window.claude.complete({ messages: [ { role: 'user', content: system + '\n\n---\n\n' + user }, ], }); } log('Parsing JSON response'); const data = extractJson(raw); log('Computing metrics'); // Defensive defaults data.segments = Array.isArray(data.segments) ? data.segments : []; data.errors = Array.isArray(data.errors) ? data.errors : []; // Re-number segments to be safe data.segments.forEach((s, i) => { s.id = i + 1; }); // Compute severity counts and penalty const counts = { critical: 0, major: 0, minor: 0 }; let penalty = 0; for (const e of data.errors) { const sev = (e.severity || 'minor').toLowerCase(); if (counts[sev] === undefined) continue; counts[sev]++; penalty += SEVERITY_WEIGHTS[sev]; } // Per-dimension counts const dimCounts = {}; MQM_DIMENSIONS.forEach((d) => { dimCounts[d.label] = 0; }); for (const e of data.errors) { const cat = e.category; if (dimCounts[cat] !== undefined) dimCounts[cat]++; } const wc = data.source_word_count || window.wordCount(sourceText) || 1; const qualityScore = Math.max(0, (1 - penalty / wc) * 100); data.metrics = { counts, penalty, quality_score: qualityScore, source_word_count: wc, dimension_counts: dimCounts, }; // Compute Levenshtein distance for each segment with a different corrected vs target for (const seg of data.segments) { const a = seg.target || ''; const b = seg.corrected || ''; if (a && b && a !== b) { seg.lev_distance = levenshtein(a, b); seg.lev_norm = seg.lev_distance / Math.max(a.length, b.length); } else { seg.lev_distance = 0; seg.lev_norm = 0; } } log('Done'); return data; } // Iterative Levenshtein, O(n·m) time, O(min(n,m)) space. function levenshtein(a, b) { if (a === b) return 0; if (a.length === 0) return b.length; if (b.length === 0) return a.length; if (a.length < b.length) { const t = a; a = b; b = t; } let prev = new Array(b.length + 1); let curr = new Array(b.length + 1); for (let j = 0; j <= b.length; j++) prev[j] = j; for (let i = 1; i <= a.length; i++) { curr[0] = i; for (let j = 1; j <= b.length; j++) { const cost = a[i - 1] === b[j - 1] ? 0 : 1; curr[j] = Math.min(curr[j - 1] + 1, prev[j] + 1, prev[j - 1] + cost); } const t = prev; prev = curr; curr = t; } return prev[b.length]; } // Word-level diff for the diff view. Returns array of {op: 'eq'|'ins'|'del', text}. function wordDiff(a, b) { const at = (a || '').split(/(\s+)/); const bt = (b || '').split(/(\s+)/); const m = at.length, n = bt.length; // LCS DP const dp = Array.from({ length: m + 1 }, () => new Int32Array(n + 1)); for (let i = m - 1; i >= 0; i--) for (let j = n - 1; j >= 0; j--) dp[i][j] = at[i] === bt[j] ? dp[i+1][j+1] + 1 : Math.max(dp[i+1][j], dp[i][j+1]); const out = []; let i = 0, j = 0; while (i < m && j < n) { if (at[i] === bt[j]) { out.push({ op: 'eq', text: at[i] }); i++; j++; } else if (dp[i+1][j] >= dp[i][j+1]) { out.push({ op: 'del', text: at[i] }); i++; } else { out.push({ op: 'ins', text: bt[j] }); j++; } } while (i < m) out.push({ op: 'del', text: at[i++] }); while (j < n) out.push({ op: 'ins', text: bt[j++] }); // Merge runs const merged = []; for (const tok of out) { const last = merged[merged.length - 1]; if (last && last.op === tok.op) last.text += tok.text; else merged.push({ ...tok }); } return merged; } Object.assign(window, { MQM_DIMENSIONS, SEVERITY_WEIGHTS, runMqmAssessment, levenshtein, wordDiff, });