Evaluation d’un modèle
Connexion à l’API et ajout des données
Distribution des indices de confiances
grid = await html`<div style="
background: #fff;
margin: 0;
border: none ;
display: grid;
width: ${screen.width};
grid-template-areas:
'a b c'
'd d d'
'e e e'
;
grid-gap: 10px;
">
<div name="a" style="grid-area: a; position: relative;">${viewof quantile}</div>
<div name="b" style="grid-area: b; position: relative;"><div class="textstyled">Accuracy: ${Accuracy}</div></div>
<div name="c" style="grid-area: c; position: relative;"><div class="textstyled">Seuil: ${Seuil}</div></div>
<div name="d" style="grid-area: d; position: relative;">${distrib}</div>
<div name="e" style="grid-area: e; position: relative;">${distrib_adjusted}</div>
</div>`
Analyse des erreurs
Impact du seuil de reprise sur la performance et le taux
grid2 = await html`<div style="
background: #fff;
margin: 0;
border: none ;
display: grid;
width: ${screen.width};
grid-template-areas:
'a'
'b'
'c'
;
grid-gap: 10px;
">
<div name="a" style="grid-area: a; position: relative;">${codif_seuil}</div>
<div name="b" style="grid-area: b; position: relative;">${acc_seuil}</div>
<div name="c" style="grid-area: c; position: relative;">${acc_codif}</div>
</div>`
viewof quantile = Inputs.range([0, 100], {label: "Taux de classification automatique", step: 1, value: 80})
distrib = Plot.plot({
round: true,
width: screen.width * 0.7,
color: { scheme: "BuRd", legend: true },
marks: [
Plot.rectY(
processed_response,
Plot.binX(
{ y: "count" },
{ x: "IC", fill: "Result", mixBlendMode: "multiply", tip: true }
)
),
Plot.ruleY([0]),
Plot.ruleX([Seuil], { strokeWidth: 2, tip: true })
]
})
distrib_adjusted = Plot.plot({
width: screen.width * 0.7,
y: { percent: true },
color: { scheme: "BuRd", legend: true },
marks: [
Plot.rectY(
processed_response,
Plot.normalizeY(
"sum", // normalize each series by the sum per series
Plot.binX(
{ y2: "count" }, // disable implicit stack transform
{ x: "IC", fill: "Result", mixBlendMode: "multiply", tip: true }
)
)
),
Plot.ruleY([0]),
Plot.ruleX([Seuil], { strokeWidth: 2, tip: true })
]
})
codif_seuil = Plot.plot({
grid: true,
y: {
label: "↑ Taux de classification automatique (%)",
percent: true
},
x: {
label: "→ Seuil choisi"
},
marks: [
Plot.line(ic_tx_classif, {
x: "seuils",
y: "codif_auto",
stroke: "black",
strokeWidth: 2
}),
Plot.tip(
ic_tx_classif,
Plot.pointer({
x: "seuils",
y: "codif_auto",
title: (d) => `Accuracy : ${round(d.accuracies, 2)}\nSeuil : ${round(
d.seuils,
2
)}\nTaux classification automatique:
${round(d.codif_auto * 100, 2)} %`
})
),
Plot.tip([`Seuil à 0.6`], {
x: ic_tx_classif[60].seuils,
y: ic_tx_classif[60].codif_auto,
anchor: "bottom"
})
],
color: { legend: true }
})
acc_seuil = Plot.plot({
grid: true,
y: {
label: "↑ Accuracy (%)",
percent: true
},
x: {
label: "→ Seuil choisi"
},
marks: [
Plot.line(ic_tx_classif, {
x: "seuils",
y: "accuracies",
stroke: "black",
strokeWidth: 2
}),
Plot.tip(
ic_tx_classif,
Plot.pointer({
x: "seuils",
y: "accuracies",
title: (d) => `Accuracy : ${round(d.accuracies, 2)}\nSeuil : ${round(
d.seuils,
2
)}\nTaux classification automatique:
${round(d.codif_auto * 100, 2)} %`
})
),
Plot.tip([`Seuil à 0.6`], {
x: ic_tx_classif[60].seuils,
y: ic_tx_classif[60].accuracies,
anchor: "bottom"
})
],
color: { legend: true }
})
acc_codif = Plot.plot({
grid: true,
y: {
label: "↑ Accuracy (%)",
percent: true
},
x: {
label: "→ Taux de classification automatique (%)",
percent: true
},
marks: [
Plot.line(ic_tx_classif, {
x: "codif_auto",
y: "accuracies",
stroke: "black",
strokeWidth: 2
}),
Plot.tip(
ic_tx_classif,
Plot.pointer({
x: "codif_auto",
y: "accuracies",
title: (d) => `Accuracy : ${round(d.accuracies, 2)}\nSeuil : ${round(
d.seuils,
2
)}\nTaux classification automatique:
${round(d.codif_auto * 100, 2)} %`
})
),
Plot.tip([`Seuil à 0.6`], {
x: ic_tx_classif[60].codif_auto,
y: ic_tx_classif[60].accuracies,
anchor: "bottom"
})
],
color: { legend: true }
})
ic_tx_classif = {
const newArray = [];
for (let i = 0; i <= 1; i += 0.01) {
let seuil = round(i, 2);
newArray.push({
seuils: seuil,
codif_auto:
processed_response.filter((d) => d.IC > seuil).length /
processed_response.length,
accuracies: get_accuracies_from_threshold(processed_response, i, {
round_int: 15
})
});
}
return newArray;
}
transformedData = data.reduce((acc, obj) => {
for (const [key, value] of Object.entries(obj)) {
if (!acc.hasOwnProperty(key)) {
acc[key] = [];
}
acc[key].push(value);
}
return acc;
}, {})
processed_response = Object.keys(response.IC).map((key) => ({
IC: response.IC[key] == 1 ? response.IC[key] - 1e-16 : response.IC[key],
Probability: response.Probability[key],
Prediction: response.Prediction[key],
Code: response.Code[key],
Result: response.Result[key],
Lib: response.Lib[key]
}))
seuils = quantiles.map((quantile) =>
d3.quantile(
processed_response.map((d) => d.IC),
1 - quantile
)
)
async function queryBatchApi(username, password, data) {
const url = "https://codification-ape.lab.sspcloud.fr/evaluation";
// Create the request body
const request_body = data.reduce((acc, obj) => {
for (const [key, value] of Object.entries(obj)) {
if (!acc.hasOwnProperty(key)) {
acc[key] = [];
}
acc[key].push(value);
}
return acc;
}, {});
const response = await fetch(url, {
method: "POST",
headers: {
"Content-Type": "application/json",
Authorization: `Basic ${btoa(`${username}:${password}`)}`
},
body: JSON.stringify(request_body)
});
if (response.ok) {
return response.json();
} else if (response.status === 400) {
console.log((await response.json()).detail);
} else {
console.log("Error occurred while querying the API.");
return response;
}
}
function get_accuracies_from_threshold(
data,
threshold,
{ round_int = 2 } = {}
) {
let acc = d3.mean(data.filter((d) => d.IC > threshold).map((d) => d.Result));
acc = acc === undefined ? 1 : round(acc, round_int);
return acc;
}
function valueCounts(array, { freq = false } = {}) {
const counts = {};
for (let i = 0; i < array.length; i++) {
const value = array[i];
if (counts[value]) {
counts[value]++;
} else {
counts[value] = 1;
}
}
const result = [];
const arrayLength = array.length;
for (const value in counts) {
const frequency = freq ? counts[value] / arrayLength : counts[value];
result.push({ value: value, frequency: frequency });
}
result.sort((a, b) => b.frequency - a.frequency); // Sort by frequency in descending order
return result;
}