// mqm.jsx — MQM 2.0 assessment workflow.
//
// In the production server, this prompt is sent to the real Anthropic API
// (model: claude-opus-4-7) via /api/assess. In the in-artifact prototype,
// we route through window.claude.complete so you can run the full flow
// right here without a server.

// MQM 2.0 taxonomy — the high-level dimensions Claude is asked to use.
// These match the user's "All of the above" pick from the question set.
const MQM_DIMENSIONS = [
  { id: 'accuracy',        label: 'Accuracy',        subs: ['Mistranslation', 'Omission', 'Addition', 'Untranslated', 'Ambiguity'] },
  { id: 'fluency',         label: 'Fluency',         subs: ['Grammar', 'Spelling', 'Punctuation', 'Register', 'Awkward syntax'] },
  { id: 'terminology',     label: 'Terminology',     subs: ['Inconsistent', 'Wrong term', 'Term not in glossary'] },
  { id: 'style',           label: 'Style',           subs: ['Awkward', 'Inconsistent', 'Unidiomatic'] },
  { id: 'locale',          label: 'Locale conventions', subs: ['Date/time', 'Number/currency', 'Measurement'] },
  { id: 'audience',        label: 'Audience appropriateness', subs: ['Register mismatch', 'Cultural'] },
  { id: 'design',          label: 'Design / Markup', subs: ['Formatting', 'Markup', 'Truncation', 'List structure'] },
];

const SEVERITY_WEIGHTS = { minor: 1, major: 5, critical: 25 };

function buildMqmSystemPrompt() {
  return `You are an expert translation quality evaluator performing a Multidimensional Quality Metrics (MQM 2.0) assessment.

Your task: compare a source document against its translation, identify every quality issue, segment-by-segment, and propose a clean post-edited version of the translation.

MQM DIMENSIONS (use exactly these top-level categories):
- Accuracy (subcategories: Mistranslation, Omission, Addition, Untranslated, Ambiguity)
- Fluency (Grammar, Spelling, Punctuation, Register, Awkward syntax)
- Terminology (Inconsistent, Wrong term, Term not in glossary)
- Style (Awkward, Inconsistent, Unidiomatic)
- Locale conventions (Date/time, Number/currency, Measurement)
- Audience appropriateness (Register mismatch, Cultural)
- Design / Markup (Formatting, Markup, Truncation, List structure)

SEVERITY (use exactly these labels):
- minor    — does not disrupt comprehension; preference or polish (weight 1)
- major    — disrupts comprehension or changes meaning non-critically (weight 5)
- critical — alters meaning materially, causes confusion, or breaks usability (weight 25)

QUALITY SCORE formula (you do NOT need to compute it — caller does):
  score = (1 \u2212 total_penalty / source_word_count) \u00d7 100%

SEGMENTATION:
- Split the source into sentence-level segments.
- Pair each source sentence with its translation sentence(s).
- Label each segment with its section/heading when discernible (e.g., "Abstract", "\u00a71.1", "Title", "Table 1 caption").
- For each segment, supply a clean corrected translation if changes are warranted; if no change, repeat the original target.

OUTPUT — return ONE JSON object, no preamble, no Markdown fence:
{
  "source_language": "<full name, e.g. 'Russian'>",
  "source_language_code": "<ISO 639-1, e.g. 'ru'>",
  "target_language": "<full name>",
  "target_language_code": "<ISO 639-1>",
  "domain": "<short description, e.g. 'ML research paper'>",
  "source_word_count": <integer>,
  "segments": [
    {
      "id": 1,
      "section": "<section label>",
      "source": "<source sentence>",
      "target": "<original target sentence>",
      "corrected": "<post-edited sentence (same as target if no change)>"
    }
  ],
  "errors": [
    {
      "segment_id": <int, matches segments[].id>,
      "category": "<one of the 7 top-level dimensions above>",
      "subcategory": "<one of the subcategories listed under that dimension>",
      "severity": "minor" | "major" | "critical",
      "source_span": "<exact substring from the source>",
      "target_span": "<exact substring from the original target>",
      "description": "<one-sentence rationale in English>",
      "suggestion": "<the suggested replacement text>"
    }
  ],
  "verdict": "<2-4 sentences in English: overall quality, strengths, main risks>",
  "corrected_full_text": "<the entire post-edited target document as continuous text, preserving paragraph breaks with \\n>"
}

RULES:
1. The JSON must be syntactically valid. Escape backslashes, quotes and newlines inside strings.
2. Be specific. Generic comments like "could be improved" are not acceptable — every error must point to a concrete span.
3. Do not invent errors. If the translation is correct, do not flag it.
4. Critical errors are rare. Reserve them for actual meaning-altering mistakes.
5. Segment EVERY sentence in the document. Do not skip, cluster, or sample. Every source sentence must appear as its own segment with its paired translation. For long documents this may mean 200+ segments — that is expected and required.
6. Output JSON only. No prose before or after.`;
}

function buildMqmUserPrompt(sourceText, targetText, glossary) {
  let out = '';
  if (glossary && glossary.trim()) {
    out += `GLOSSARY / TRANSLATION MEMORY (preferred terms):\n${glossary.trim()}\n\n---\n\n`;
  }
  out += `SOURCE DOCUMENT:\n${sourceText}\n\n---\n\nTRANSLATION TO EVALUATE:\n${targetText}\n\n---\n\nReturn the MQM assessment as the single JSON object specified in the system prompt.`;
  return out;
}

// Extract JSON from a possibly chatty response.
function extractJson(text) {
  const trimmed = text.trim();
  if (trimmed.startsWith('{')) {
    try { return JSON.parse(trimmed); } catch (e) { /* fall through */ }
  }
  // Find first '{' and walk braces to balance
  const start = trimmed.indexOf('{');
  if (start < 0) throw new Error('No JSON object found in response');
  let depth = 0, inStr = false, esc = false, end = -1;
  for (let i = start; i < trimmed.length; i++) {
    const c = trimmed[i];
    if (esc) { esc = false; continue; }
    if (c === '\\' && inStr) { esc = true; continue; }
    if (c === '"') { inStr = !inStr; continue; }
    if (inStr) continue;
    if (c === '{') depth++;
    else if (c === '}') {
      depth--;
      if (depth === 0) { end = i; break; }
    }
  }
  if (end < 0) throw new Error('Unterminated JSON in response');
  return JSON.parse(trimmed.slice(start, end + 1));
}

// Run the assessment. Returns the parsed MQM object plus computed metrics.
async function runMqmAssessment({ sourceText, targetText, glossary, onLog }) {
  const log = (m) => { try { onLog && onLog(m); } catch (e) {} };

  log('Building MQM prompt');
  const system = buildMqmSystemPrompt();
  const user = buildMqmUserPrompt(sourceText, targetText, glossary);

  log('Calling Claude (claude-opus-4-7)');
  // In production this hits POST /api/assess on the proxy server. In the
  // artifact prototype we use window.claude.complete (Haiku 4.5 under the
  // hood) — same prompt, same JSON shape.
  let raw;
  if (window.__APE_USE_SERVER) {
    const res = await fetch('/api/assess', {
      method: 'POST',
      headers: { 'Content-Type': 'application/json' },
      credentials: 'include',
      body: JSON.stringify({ system, user, model: 'claude-opus-4-7' }),
    });
    if (!res.ok) {
      const body = await res.json().catch(() => ({}));
      throw new Error(body.error || 'Server error: ' + res.status);
    }
    const j = await res.json();
    if (j.stop_reason === 'max_tokens') {
      log('Warning: response was truncated — results may be incomplete');
    }
    raw = j.text;
  } else {
    raw = await window.claude.complete({
      messages: [
        { role: 'user', content: system + '\n\n---\n\n' + user },
      ],
    });
  }

  log('Parsing JSON response');
  const data = extractJson(raw);

  log('Computing metrics');
  // Defensive defaults
  data.segments = Array.isArray(data.segments) ? data.segments : [];
  data.errors = Array.isArray(data.errors) ? data.errors : [];

  // Re-number segments to be safe
  data.segments.forEach((s, i) => { s.id = i + 1; });

  // Compute severity counts and penalty
  const counts = { critical: 0, major: 0, minor: 0 };
  let penalty = 0;
  for (const e of data.errors) {
    const sev = (e.severity || 'minor').toLowerCase();
    if (counts[sev] === undefined) continue;
    counts[sev]++;
    penalty += SEVERITY_WEIGHTS[sev];
  }
  // Per-dimension counts
  const dimCounts = {};
  MQM_DIMENSIONS.forEach((d) => { dimCounts[d.label] = 0; });
  for (const e of data.errors) {
    const cat = e.category;
    if (dimCounts[cat] !== undefined) dimCounts[cat]++;
  }

  const wc = data.source_word_count || window.wordCount(sourceText) || 1;
  const qualityScore = Math.max(0, (1 - penalty / wc) * 100);

  data.metrics = {
    counts,
    penalty,
    quality_score: qualityScore,
    source_word_count: wc,
    dimension_counts: dimCounts,
  };

  // Compute Levenshtein distance for each segment with a different corrected vs target
  for (const seg of data.segments) {
    const a = seg.target || '';
    const b = seg.corrected || '';
    if (a && b && a !== b) {
      seg.lev_distance = levenshtein(a, b);
      seg.lev_norm = seg.lev_distance / Math.max(a.length, b.length);
    } else {
      seg.lev_distance = 0;
      seg.lev_norm = 0;
    }
  }

  log('Done');
  return data;
}

// Iterative Levenshtein, O(n·m) time, O(min(n,m)) space.
function levenshtein(a, b) {
  if (a === b) return 0;
  if (a.length === 0) return b.length;
  if (b.length === 0) return a.length;
  if (a.length < b.length) { const t = a; a = b; b = t; }
  let prev = new Array(b.length + 1);
  let curr = new Array(b.length + 1);
  for (let j = 0; j <= b.length; j++) prev[j] = j;
  for (let i = 1; i <= a.length; i++) {
    curr[0] = i;
    for (let j = 1; j <= b.length; j++) {
      const cost = a[i - 1] === b[j - 1] ? 0 : 1;
      curr[j] = Math.min(curr[j - 1] + 1, prev[j] + 1, prev[j - 1] + cost);
    }
    const t = prev; prev = curr; curr = t;
  }
  return prev[b.length];
}

// Word-level diff for the diff view. Returns array of {op: 'eq'|'ins'|'del', text}.
function wordDiff(a, b) {
  const at = (a || '').split(/(\s+)/);
  const bt = (b || '').split(/(\s+)/);
  const m = at.length, n = bt.length;
  // LCS DP
  const dp = Array.from({ length: m + 1 }, () => new Int32Array(n + 1));
  for (let i = m - 1; i >= 0; i--)
    for (let j = n - 1; j >= 0; j--)
      dp[i][j] = at[i] === bt[j] ? dp[i+1][j+1] + 1 : Math.max(dp[i+1][j], dp[i][j+1]);
  const out = [];
  let i = 0, j = 0;
  while (i < m && j < n) {
    if (at[i] === bt[j]) { out.push({ op: 'eq', text: at[i] }); i++; j++; }
    else if (dp[i+1][j] >= dp[i][j+1]) { out.push({ op: 'del', text: at[i] }); i++; }
    else { out.push({ op: 'ins', text: bt[j] }); j++; }
  }
  while (i < m) out.push({ op: 'del', text: at[i++] });
  while (j < n) out.push({ op: 'ins', text: bt[j++] });
  // Merge runs
  const merged = [];
  for (const tok of out) {
    const last = merged[merged.length - 1];
    if (last && last.op === tok.op) last.text += tok.text;
    else merged.push({ ...tok });
  }
  return merged;
}

Object.assign(window, {
  MQM_DIMENSIONS, SEVERITY_WEIGHTS,
  runMqmAssessment, levenshtein, wordDiff,
});
