import { readFileSync } from "node:fs";

const allSongs = JSON.parse(
  readFileSync("./output.json", { encoding: "utf-8" })
) as Record<string, any>;

const ids = Object.keys(allSongs);

console.log("Number of songs to process", ids.length.toLocaleString("en-US"));

Number of songs to process 368,895

import vl from "npm:vega-lite-api";

const countByKey = ids.reduce((acc, id) => {
  const { key } = allSongs[id];
  acc[key] = (acc[key] || 0) + 1;
  return acc;
}, {} as Record<string, number>);

const keyDistroChart = vl
  .markBar()
  .data(
    Object.entries(countByKey).map(([key, count]) => ({
      key,
      count,
    }))
  )
  .encode(
    vl.x().fieldN("key").title("Key").sort("-y"),
    vl.y().fieldQ("count").title("Count")
  )
  .width(300)
  .height(400)
  .title("Distribution of Keys in Verses");

const combined = vl.hconcat(keyDistroChart);

await Deno.jupyter.display(combined);

import { get as getRomanNumeral, RomanNumeral } from "@tonaljs/roman-numeral";

const countByChordType: Record<string, number> = {};

function fixChord(c: string) {
  if (c.includes("min")) {
    return c.replace("min", "").toLowerCase();
  }
  return c
    .replace("dim", "d")
    .replace("aug", "a")
    .replace("sus", "s")
    .replace("maj", "");
}

function getChordType(rn: RomanNumeral) {
  return `${rn.major ? "maj" : "min"}${
    rn.chordType && !rn.chordType.startsWith("/") ? "/" + rn.chordType : ""
  }`;
}

let chordTypeTotal = 0;

const allChords: {
  type: string;
  step: number;
}[] = [];

for (const id of ids) {
  const { chords } = allSongs[id];
  for (const chord of chords) {
    const fixed = fixChord(chord);
    const rn = getRomanNumeral(fixed);
    if (rn.empty) {
      continue;
    }
    const type = getChordType(rn);
    countByChordType[type] = (countByChordType[type] || 0) + 1;
    chordTypeTotal++;
    allChords.push({
      type,
      step: rn.step,
    });
  }
}

import vl from "npm:vega-lite-api";

const chordTypeDistroChart = vl
  .markBar()
  .data(
    Object.entries(countByChordType)
      .map(([type, count]) => ({
        type,
        percentage: (count / chordTypeTotal) * 100,
      }))
      .filter(({ percentage }) => percentage > 0.1)
  )
  .encode(
    vl.x().fieldN("type").title("Chord Type").sort("-y"),
    vl.y().fieldQ("percentage").title("%")
  )
  .width(300)
  .height(400)
  .title("Distribution of Chord Types in Verses");

const combined = vl.hconcat(chordTypeDistroChart);

await Deno.jupyter.display(combined);

const cadenceSequences = {
  authentic: "V,I",
  plagal: "IV,I",
  deceptive: "V,vi",
};

function normalizeChord(c: string) {
  const fixed = fixChord(c);
  const rn = getRomanNumeral(fixed);
  return rn.roman;
}

const counts: {
  [key: string]: Record<string, number>;
} = {
  authentic: {},
  plagal: {},
  deceptive: {},
};

function processPhrases(chords: string[]) {
  let lastPhraseBegin = {
    authentic: 0,
    plagal: 0,
    deceptive: 0,
  };

  for (let i = 0; i < chords.length - 1; i++) {
    const chord = chords[i];

    const nextChord = chords[i + 1];
    const pair = `${chord},${nextChord}`;
    // Check if the chord is a cadence
    for (const sequence in cadenceSequences) {
      if (cadenceSequences[sequence] === pair && i > 0) {
        const phraseEnd = i + 1;
        const phrase = chords
          .slice(lastPhraseBegin[sequence], phraseEnd + 1)
          .join("-");

        // Only saving phrases that start with tonic
        if (phrase[0] === "I") {
          // sets[sequence].add(phrase);
          counts[sequence][phrase] = (counts[sequence][phrase] || 0) + 1;
        }
        lastPhraseBegin[sequence] = phraseEnd;
      }
    }
  }
}

for (const id of ids) {
  const { chords } = allSongs[id];
  const normalizedChords = chords.map(normalizeChord);
  processPhrases(normalizedChords);
}

import vl from "npm:vega-lite-api";

const charts = Object.keys(counts).map(seq => {
  return vl
  .markBar()
  .data(
    Object.entries(counts[seq])
      .map(([progression, count]) => ({
        progression,
        count
      }))
      .sort((a, b) => b.count - a.count)
      .slice(0, 10)
  )
  .encode(
    vl.x().fieldN("progression").title("Progression").sort("-y"),
    vl.y().fieldQ("count").title("Frequency")
  )
  .width(300)
  .height(400)
  .title("Distribution of Progressions in " + seq);
})

const combined = vl.hconcat(charts);

await Deno.jupyter.display(combined);

import vl from "npm:vega-lite-api";

// const allChordTypes = Object.keys(countByChordType);

const toRoman = (n: number) =>
  ["I", "II", "III", "IV", "V", "VI", "VII"][n] ?? "?";

const freqMap = new Map<string, number>();

for (const { type, step } of allChords) {
  const roman = toRoman(step);
  const key = `${roman}|${type}`;
  freqMap.set(key, (freqMap.get(key) ?? 0) + 1);
}

// Build flat array for Vega
const distroData = Array.from(freqMap.entries()).map(([key, freq]) => {
  const [step, type] = key.split("|");
  return { step, type, freq };
});

const chordTypeDistroChart = vl
  .markRect()
  .data(distroData)
  .encode(
    vl.x().fieldN("step").title("Chord Step"),
    vl.y().fieldN("type").title("Chord Type"),
    vl.color().fieldQ("freq").title("Frequency"),
    vl.tooltip([vl.fieldN("type"), vl.fieldN("step"), vl.fieldQ("freq")])
  )
  .title("Chord Density Heatmap");

await Deno.jupyter.display(chordTypeDistroChart);