FWIW, this is what I came up with.
It is basically the same code (only in TS) but I’ve added the calculation and the tooltip only for the rectangle and the dots (outliers):
It is also from my Vue project and I did not want to add additional dependency on the D3 so I have added an implementation for:
import {max, min, quantile} from "d3";
On the other hand, I’m not really proficient with this lib and I feel this could have been done better.
import { dot, map, marks } from '@observablehq/plot';
import { identity } from '@observablehq/plot';
import { groupX, groupZ } from '@observablehq/plot';
import { barY } from '@observablehq/plot';
import { ruleX } from '@observablehq/plot';
import { tickY } from '@observablehq/plot';
import type { ChannelTransform, CompoundMark, Data } from '@observablehq/plot';
import type { BarXOptions, BarYOptions } from '@observablehq/plot';
import type { DotOptions } from '@observablehq/plot';
import type { RuleXOptions, RuleYOptions } from '@observablehq/plot';
import type { TickXOptions, TickYOptions } from '@observablehq/plot';
/** Options for the boxX mark. */
export type BoxXOptions = DotOptions &
BarXOptions &
TickXOptions &
RuleXOptions;
/** Options for the boxY mark. */
export type BoxYOptions = DotOptions &
BarYOptions &
TickYOptions &
RuleYOptions;
// Returns a composite mark for producing a vertical box plot, applying the
// necessary statistical transforms. The boxes are grouped by x, if present.
export function boxY(
data: Data,
{
y = identity,
x = null,
r,
fill = '#ccc',
fillOpacity,
stroke = 'currentColor',
strokeOpacity,
strokeWidth = 2,
sort,
...options
}: BoxYOptions = {}
): CompoundMark {
const group = x != null ? groupX : groupZ;
const dataArray = Array.from(data);
// Compute Q1, Q3, and median for each unique `x` value and store it in a dictionary for later use
const summaryStats: Record<
string,
{ q1: number; q3: number; median: number }
> = {};
const distinctXValues = new Set(
dataArray.map((d, i) =>
typeof x === 'function' ? x(d, i) : d[x as string]
)
);
distinctXValues.forEach((xValue) => {
if (!summaryStats[xValue]) {
const values = dataArray
.filter(
(obj, i) =>
(typeof x === 'function' ? x(obj, i) : obj[x as string]) === xValue
)
.map((v, j) => (typeof y === 'function' ? y(v, j) : v[y as string]))
.filter((v) => v !== undefined);
if (values.length > 0) {
summaryStats[xValue] = {
q1: quartile1(values) as number,
q3: quartile3(values) as number,
median: quantile(values, 0.5) as number,
};
}
}
});
return marks(
ruleX(
data,
group(
{ y1: loqr1, y2: hiqr2 },
{ x, y, stroke, strokeOpacity, ...options, tip: false, title: '' }
)
),
barY(
data,
group(
{ y1: 'p25', y2: 'p75' },
{
x,
y,
fill,
fillOpacity,
...options,
title: (_d, _i) => {
const xValue =
typeof x === 'function' ? x(_d, _i) : _d[x as string];
const stats = summaryStats[xValue];
return stats
? `Q1: ${stats.q1}, Median: ${stats.median}, Q3: ${stats.q3}`
: 'No data';
},
}
)
),
tickY(
data,
group(
{ y: 'p50' },
{
x,
y,
stroke,
strokeOpacity,
strokeWidth,
sort,
...options,
tip: false,
title: '',
}
)
),
dot(
data,
map(
{ y: oqr as any },
{
x,
y,
z: x as unknown as ChannelTransform,
r,
stroke,
strokeOpacity,
...options,
tip: true,
}
)
)
);
}
// A map function that returns only outliers, returning NaN for non-outliers
function oqr(values: number[]): number[] {
const r1 = loqr1(values);
const r2 = hiqr2(values);
return values.map((v: number) =>
r1 !== undefined && (v < r1 || (r2 !== undefined && v > r2)) ? v : NaN
);
}
function loqr1(values: any) {
const q1 = quartile1(values);
const q3 = quartile3(values);
if (q1 === undefined || q3 === undefined) return NaN;
const lo = q1 * 2.5 - q3 * 1.5;
return min(values, (d) => ((d as number) >= lo ? (d as number) : NaN));
}
function hiqr2(values: any) {
const q1 = quartile1(values);
const q3 = quartile3(values);
if (q1 === undefined || q3 === undefined) return NaN;
const hi = q3 * 2.5 - q1 * 1.5;
return max(values, (d) => ((d as number) <= hi ? (d as number) : -Infinity));
}
function quartile1(values: any) {
return quantile(values, 0.25);
}
function quartile3(values: any) {
return quantile(values, 0.75);
}
function max<T>(
values: T[],
accessor: (d: T) => number = (d) => d as unknown as number
): number | undefined {
if (values.length === 0) return undefined;
let maxValue = -Infinity;
for (const v of values) {
const value = accessor(v);
if (value > maxValue) {
maxValue = value;
}
}
return maxValue === -Infinity ? undefined : maxValue;
}
function min<T>(
values: T[],
accessor: (d: T) => number = (d) => d as unknown as number
): number | undefined {
if (values.length === 0) return undefined;
let minValue = Infinity;
for (const v of values) {
const value = accessor(v);
if (value < minValue) {
minValue = value;
}
}
return minValue === Infinity ? undefined : minValue;
}
function quantile(values: number[], p: number): number | undefined {
if (values.length === 0 || p < 0 || p > 1) return undefined;
const sorted = [...values].sort((a, b) => a - b); // Sort array ascending
const index = (sorted.length - 1) * p;
const lower = Math.floor(index);
const upper = Math.ceil(index);
if (lower === upper) return sorted[lower];
const weight = index - lower;
return sorted[lower] * (1 - weight) + sorted[upper] * weight;
}
Here it is in action: