--- layout: layout.njk permalink: "{{ page.filePathStem }}.html" --- {% include "toc.njk" %}
In previous section, we show how to visualize data in imperative way.
Smile also support data visualization in declarative approach.
With smile.plot.vega
package (in the artifact
smile-scala
), we can create a specification
that describes visualizations as mappings from data to properties
of graphical marks (e.g., points or bars). The specification is
based on Vega-Lite.
The Vega-Lite compiler automatically produces visualization components
including axes, legends, and scales. It then determines properties of
these components based on a set of carefully designed rules.
This approach allows specifications to be succinct and expressive, but also provide user control. As Vega-Lite is designed for analysis, it supports data transformations such as aggregation, binning, filtering, sorting, and visual transformations including stacking and faceting. Moreover, Vega-Lite specifications can be composed into layered and multi-view displays, and made interactive with selections.
Vega-Lite website provides detailed documentation on the specification. In the below, we will show how to create a variety of charts through examples.
val bar = VegaLite.view().
mark("bar").
heightStep(17).
x(field = "a", `type` = "ordinal", axis = JsObject("labelAngel" -> JsInt(0))).
y(field = "b", `type` = "quantitative").
data(jsan"""
[
{"a": "A", "b": 28}, {"a": "B", "b": 55}, {"a": "C", "b": 43},
{"a": "D", "b": 91}, {"a": "E", "b": 81}, {"a": "F", "b": 53},
{"a": "G", "b": 19}, {"a": "H", "b": 87}, {"a": "I", "b": 52}
]"""
).
description("A simple bar chart with embedded data.")
val aggregateBar = VegaLite("https://vega.github.io/vega-lite/examples/data/population.json").
mark("bar").
heightStep(17).
x(field = "people", `type` = "quantitative", aggregate = "sum", title = "population").
y(field = "age", `type` = "ordinal").
transform(json"""{"filter": "datum.year == 2000"}""").
description("A bar chart showing the US population distribution of age groups in 2000.")
val sortedAggregateBar = VegaLite("https://vega.github.io/vega-lite/examples/data/population.json").
mark("bar").
heightStep(17).
x(field = "people", `type` = "quantitative", aggregate = "sum", title = "population").
y(field = "age", `type` = "ordinal", sort = Some("-x")).
transform(json"""{"filter": "datum.year == 2000"}""").
description("A bar chart that sorts the y-values by the x-values.")
show(sortedAggregateBar)
val groupedBar = VegaLite.facet("https://vega.github.io/vega-lite/examples/data/population.json").
column(field = "age", `type` = "ordinal", spacing = Some(10)).
mark("bar").
widthStep(12).
x(field = "gender", `type` = "nominal", title = null).
y(field = "people", `type` = "quantitative", aggregate = "sum", axis = json"""{"title": "population", "grid": false}""").
color(field = "gender", `type` = "nominal", scale = json"""{"range": ["#675193", "#ca8861"]}""").
transform(
json"""{"filter": "datum.year == 2000"}""",
json"""{"calculate": "datum.sex == 2 ? 'Female' : 'Male'", "as": "gender"}"""
).
config(json"""{"view": {"stroke": "transparent"}, "axis": {"domainWidth": 1}}""")
val stackedBar = VegaLite("https://vega.github.io/vega-lite/examples/data/seattle-weather.csv").
mark("bar").
x(field = "date", `type` = "ordinal", timeUnit = "month", title = "Month of the year").
y(field = null, aggregate = "count", `type` = "quantitative").
color(field = "weather", `type` = "nominal",
scale = json"""{
"domain": ["sun", "fog", "drizzle", "rain", "snow"],
"range": ["#e7ba52", "#c7c7c7", "#aec7e8", "#1f77b4", "#9467bd"]
}""",
legend = JsObject("title" -> JsString("Weather type"))
)
val stackedRoundedBar = VegaLite("https://vega.github.io/vega-lite/examples/data/seattle-weather.csv").
mark(JsObject("type" -> "bar", "cornerRadiusTopLeft" -> 3, "cornerRadiusTopRight" -> 3)).
x(field = "date", `type` = "ordinal", timeUnit = "month").
y(field = null, aggregate = "count", `type` = "quantitative").
color(field = "weather", `type` = "nominal")
val horizontalStackedBar = VegaLite("https://vega.github.io/vega-lite/examples/data/barley.json").
mark("bar").
x(field = "yield", `type` = "quantitative", aggregate = "sum").
y(field = "variety", `type` = "nominal").
color(field = "site", `type` = "nominal")
val layeredBar = VegaLite("https://vega.github.io/vega-lite/examples/data/population.json").
mark("bar").
widthStep(17).
x(field = "age", `type` = "ordinal").
y(field = "people", `type` = "quantitative", aggregate = "sum", title = "population", stack = JsNull).
color(field = "gender", `type` = "nominal", scale = json"""{"range": ["#675193", "#ca8861"]}""").
opacity(0.7).
transform(
json"""{"filter": "datum.year == 2000"}""",
json"""{"calculate": "datum.sex == 2 ? 'Female' : 'Male'", "as": "gender"}"""
)
val normalizedStackedBar = VegaLite("https://vega.github.io/vega-lite/examples/data/population.json").
mark("bar").
widthStep(17).
x(field = "age", `type` = "ordinal").
y(field = "people", `type` = "quantitative", aggregate = "sum", title = "population", stack = "normalize").
color(field = "gender", `type` = "nominal", scale = json"""{"range": ["#675193", "#ca8861"]}""").
transform(
json"""{"filter": "datum.year == 2000"}""",
json"""{"calculate": "datum.sex == 2 ? 'Female' : 'Male'", "as": "gender"}"""
)
val gantt = VegaLite.view().
mark("bar").
y(field = "task", `type` = "ordinal").
x(field = "start", `type` = "quantitative").
x2(field = "end").
data(jsan"""
[
{"task": "A", "start": 1, "end": 3},
{"task": "B", "start": 3, "end": 8},
{"task": "C", "start": 8, "end": 10}
]"""
)
val colorBar = VegaLite.view().
mark("bar").
x(field = "color", `type` = "nominal").
y(field = "b", `type` = "quantitative").
color(field = "color", `type` = "nominal", scale = JsNull).
data(jsan"""
[
{"color": "red", "b": 28},
{"color": "green", "b": 55},
{"color": "blue", "b": 43}
]"""
)
val histogram = VegaLite("https://vega.github.io/vega-lite/examples/data/movies.json").
mark("bar").
x(field = "IMDB_Rating", `type` = "quantitative", bin = Left(true)).
y(field = null, `type` = "quantitative", aggregate = "count")
val freqHistogram = VegaLite("https://vega.github.io/vega-lite/examples/data/cars.json").
mark(JsObject("type" -> "bar", "tooltip" -> true)).
x(field = "bin_Horsepwoer", `type` = "quantitative", bin = Right(json"""{"binned":true}"""), title = "Horsepower").
x2(field = "bin_Horsepwoer_end").
y(field = "PercentOfTotal", `type` = "quantitative", title = "Relative Frequency", axis = json"""{"format": ".1~%"}""").
transform(jsan"""[
{
"bin": true, "field": "Horsepower", "as": "bin_Horsepwoer"
},
{
"aggregate": [{"op": "count", "as": "Count"}],
"groupby": ["bin_Horsepwoer", "bin_Horsepwoer_end"]
},
{
"joinaggregate": [{"op": "sum", "field": "Count", "as": "TotalCount"}]
},
{
"calculate": "datum.Count/datum.TotalCount", "as": "PercentOfTotal"
}
]"""
)
val histHeatmap = VegaLite("https://vega.github.io/vega-lite/examples/data/movies.json").
mark("rect").
x(field = "IMDB_Rating", `type` = "quantitative", bin = Right(json"""{"maxbins":60}"""), title = "IMDB Rating").
y(field = "Rotten_Tomatoes_Rating", `type` = "quantitative", bin = Right(json"""{"maxbins":40}""")).
color(field = null, `type` = "quantitative", aggregate = "count").
width(300).
height(200).
transform(json"""{
"filter": {
"and": [
{"field": "IMDB_Rating", "valid": true},
{"field": "Rotten_Tomatoes_Rating", "valid": true}
]}}"""
).
config(JsObject("view" -> json"""{"stroke": "transparent"}"""))
val density = VegaLite("https://vega.github.io/vega-lite/examples/data/movies.json").
mark("area").
x(field = "value", `type` = "quantitative", title = "IMDB Rating").
y(field = "density", `type` = "quantitative").
width(400).
height(100).
transform(json"""
{
"density": "IMDB_Rating",
"bandwidth": 0.3
}"""
)
val cdf = VegaLite("https://vega.github.io/vega-lite/examples/data/movies.json").
mark("area").
x(field = "IMDB_Rating", `type` = "quantitative").
y(field = "cumulative_count", `type` = "quantitative").
transform(json"""
{
"sort": [{"field": "IMDB_Rating"}],
"window": [{"op": "count", "field": "count", "as": "cumulative_count"}],
"frame": [null, 0]
}"""
)
val scatter = VegaLite("https://vega.github.io/vega-lite/examples/data/cars.json").
mark("point").
x(field = "Horsepower", `type` = "quantitative").
y(field = "Miles_per_Gallon", `type` = "quantitative").
color(field = "Origin", `type` = "nominal").
shape(field = "Origin", `type` = "nominal")
val bubble = VegaLite("https://vega.github.io/vega-lite/examples/data/cars.json").
mark("point").
x(field = "Horsepower", `type` = "quantitative").
y(field = "Miles_per_Gallon", `type` = "quantitative").
size(field = "Acceleration", `type` = "quantitative")
val disaster = VegaLite("https://vega.github.io/vega-lite/examples/data/disasters.csv").
mark(JsObject(
"type" -> "circle",
"opacity" -> 0.8,
"stroke" -> "black",
"strokeWidth" -> 1
)).
width(600).
height(400).
x(field = "Year", `type` = "ordinal", axis = json"""{"labelAngle": 0}""").
y(field = "Entity", `type` = "nominal", title = null).
size(field = "Deaths", `type` = "quantitative",
legend = json"""{"title": "Annual Global Deaths", "clipHeight": 30}""",
scale = json"""{"range": [0, 5000]}"""
).
color(field = "Entity", `type` = "nominal", legend = JsNull).
transform(json"""{"filter": "datum.Entity !== 'All natural disasters'"}""")
val textPlot = VegaLite("https://vega.github.io/vega-lite/examples/data/cars.json").
mark("text").
x(field = "Horsepower", `type` = "quantitative").
y(field = "Miles_per_Gallon", `type` = "quantitative").
color(field = "Brand", `type` = "nominal").
text(field = "Brand", `type` = "nominal").
transform(json"""
{
"calculate": "split(datum.Name, ' ')[0]",
"as": "Brand"
}"""
)
val line = VegaLite("https://vega.github.io/vega-lite/examples/data/stocks.csv").
mark("line").
x(field = "date", `type` = "temporal").
y(field = "price", `type` = "quantitative").
transform(json"""{"filter": "datum.symbol==='GOOG'"}""")
val pointLine = VegaLite("https://vega.github.io/vega-lite/examples/data/stocks.csv").
mark(JsObject("type" -> "line", "point" -> true)).
x(field = "date", `type` = "temporal", timeUnit = "year").
y(field = "price", `type` = "quantitative", aggregate = "mean").
color(field = "symbol", `type` = "nominal")
val confidenceInterval = VegaLite.layer(
"https://vega.github.io/vega-lite/examples/data/cars.json",
"json",
VegaLite.view().
mark(JsObject("type" -> "errorband", "extent" -> "ci")).
y(field = "Miles_per_Gallon", `type` = "quantitative", title = "Mean of Miles per Gallon (95% CIs)"),
VegaLite.view().
mark("line").
y(field = "Miles_per_Gallon", `type` = "quantitative", aggregate = "mean")
).x(field = "Year", `type` = "temporal", timeUnit = "year")
val rollingAverages = VegaLite.layer(
"https://vega.github.io/vega-lite/examples/data/seattle-weather.csv",
"csv",
VegaLite.view().
mark(JsObject("type" -> "point", "opacity" -> 0.3)).
x(field = "date", `type` = "temporal", title = "Date").
y(field = "temp_max", `type` = "quantitative", title = "Max Temperature"),
VegaLite.view().
mark(JsObject("color" -> "red", "size" -> 3, "type" -> "line")).
x(field = "date", `type` = "temporal").
y(field = "rolling_mean", `type` = "quantitative"),
).
width(400).
height(300).
transform(json"""
{
"frame": [-15, 15],
"window": [
{
"field": "temp_max",
"op": "mean",
"as": "rolling_mean"
}
]
}"""
)
val area = VegaLite("https://vega.github.io/vega-lite/examples/data/stocks.csv").
mark(JsObject("type" -> "area", "line" -> true, "point" -> true)).
x(field = "date", `type` = "temporal").
y(field = "price", `type` = "quantitative").
transform(json"""{"filter": "datum.symbol==='GOOG'"}""")
val heatmap = VegaLite("https://vega.github.io/vega-lite/examples/data/seattle-temps.csv").
mark("rect").
x(field = "date", `type` = "ordinal", timeUnit = "date", title = "Day", axis = json"""{"labelAngle": 0, "format": "%e"}""").
y(field = "date", `type` = "ordinal", timeUnit = "month", title = "Month").
color(field = "temp", `type` = "quantitative", aggregate = "max", legend = json"""{"title": null}""").
config(json"""
{
"view": {
"strokeWidth": 0,
"step": 13
},
"axis": {
"domain": false
}
}"""
).
title("2010 Daily Max Temperature (F) in Seattle, WA")
val donut = VegaLite(jsan"""
[
{"category": 1, "value": 4},
{"category": 2, "value": 6},
{"category": 3, "value": 10},
{"category": 4, "value": 3},
{"category": 5, "value": 7},
{"category": 6, "value": 8}
]"""
).
mark(JsObject("type" -> "arc", "innerRadius" -> 50)).
view(JsObject("stroke" -> JsNull)).
theta(field = "value", `type` = "quantitative").
color(field = "category", `type` = "nominal")
val radial = VegaLite.layer(
jsan"""[12, 23, 47, 6, 52, 19]""",
VegaLite.view().
mark(JsObject("type" -> "arc", "innerRadius" -> 20, "stroke" -> "#fff")),
VegaLite.view().mark(JsObject("type" -> "text", "radiusOffset" -> 10)).
text(field = "data", `type` = "quantitative")
).
mark(JsObject("type" -> "arc", "innerRadius" -> 50)).
view(JsObject("stroke" -> JsNull)).
theta(field = "data", `type` = "quantitative", stack = true).
radius(field = "data", `type` = "quantitative", scale = json"""{"type": "sqrt", "zero": true, "range": [20, 100]}""").
color(field = "data", `type` = "nominal", legend = JsNull)
val boxplot = VegaLite("https://vega.github.io/vega-lite/examples/data/population.json").
mark(JsObject("type" -> "boxplot", "extent" -> "min-max")).
view(JsObject("stroke" -> JsNull)).
x(field = "age", `type` = "ordinal").
y(field = "people", `type` = "quantitative", title = "population")
show(VegaLite.vconcat(bar, aggregateBar, groupedBar))
val iris = read.arff("data/weka/iris.arff")
show(VegaLite.splom(iris, "class"))
val geo = VegaLite(
"https://vega.github.io/vega-lite/examples/data/us-10m.json",
JsObject("type" -> "topojson", "feature" -> "counties")
).
mark("geoshape").
color(field = "rate", `type` = "quantitative").
projection(json"""{"type": "albersUsa"}""").
width(500).
height(300).
transform(json"""
{
"lookup": "id",
"from": {
"data": {
"url": "https://vega.github.io/vega-lite/examples/data/unemployment.tsv"
},
"key": "id",
"fields": ["rate"]
}
}"""
)