Hearing is Believing: Generating Realistic Speech with Deep Learning

Page 1 of 124

Generate slides with `npx @marp-team/marp-cli src/talks/_tts.md -o src/talks/tts.html`

{ "$schema": "https://vega.github.io/schema/vega-lite/v5.json", "data": { "values": [ {"mos_error": 0.096, "mos_center": 3.492, "model": "Parametric"}, {"mos_error": 0.091, "mos_center": 4.166, "model": "Concatenative"}, {"mos_error": 0.066, "mos_center": 4.526, "model": "Tacotron 2"}, {"mos_error": 0.053, "mos_center": 4.582, "model": "Ground truth"} ] }, "layer": [ { "mark": "errorbar", "encoding": { "y": { "field": "mos_center", "type": "quantitative", "scale": { "domainMin": 3, "domainMax": 5 }, "title": "MOS" }, "yError": {"field": "mos_error"}, "x": { "field": "model", "type": "ordinal", "title": "Model", "sort": {"field": "mos_center"}, "scale": { "padding": 8 }, "axis": { "labelAngle": -45 } }, "color": {"field": "model", "type": "nominal", "legend": null} } }, { "mark": {"type": "point", "filled": true, "size": 50}, "encoding": { "y": {"field": "mos_center", "type": "quantitative"}, "x": {"field": "model", "type": "ordinal","sort": {"field": "mos_center"}}, "color": {"field": "model", "type": "nominal", "legend": null} } } ] }

https://miro.medium.com/max/1400/1*baPJcGNY6mpRkio3zEi6gw.png

https://users.aalto.fi/~ljuvela/interspeech19/