{"id":2,"date":"2021-09-22T05:08:25","date_gmt":"2021-09-22T05:08:25","guid":{"rendered":"http:\/\/www.yukimitsufuji.com\/?page_id=2"},"modified":"2026-05-04T20:51:09","modified_gmt":"2026-05-04T11:51:09","slug":"sample-page","status":"publish","type":"page","link":"https:\/\/www.yukimitsufuji.com\/","title":{"rendered":"Introduction"},"content":{"rendered":"\n<div class=\"wp-block-columns is-layout-flex wp-container-core-columns-is-layout-28f84493 wp-block-columns-is-layout-flex\">\n<div class=\"wp-block-column is-layout-flow wp-block-column-is-layout-flow\" style=\"flex-basis:50%\">\n<figure class=\"wp-block-image size-full is-resized\"><img loading=\"lazy\" decoding=\"async\" width=\"480\" height=\"480\" src=\"https:\/\/www.yukimitsufuji.com\/wp-content\/uploads\/2022\/03\/DSC00679_circle.jpg\" alt=\"\" class=\"wp-image-301\" style=\"width:402px;height:402px\" srcset=\"https:\/\/www.yukimitsufuji.com\/wp-content\/uploads\/2022\/03\/DSC00679_circle.jpg 480w, https:\/\/www.yukimitsufuji.com\/wp-content\/uploads\/2022\/03\/DSC00679_circle-300x300.jpg 300w, https:\/\/www.yukimitsufuji.com\/wp-content\/uploads\/2022\/03\/DSC00679_circle-150x150.jpg 150w\" sizes=\"auto, (max-width: 480px) 100vw, 480px\" \/><\/figure>\n\n\n\n<div class=\"wp-block-columns is-layout-flex wp-container-core-columns-is-layout-28f84493 wp-block-columns-is-layout-flex\">\n<div class=\"wp-block-column is-layout-flow wp-block-column-is-layout-flow\" style=\"flex-basis:100%\">\n<div style=\"display:flex; gap:12px; align-items:center; margin:20px 0; justify-content:center;\">\n  <!-- X -->\n  <a href=\"https:\/\/x.com\/mittu1204\" target=\"_blank\" rel=\"noopener\" style=\"width:40px;height:40px;background:#000;border-radius:50%;display:flex;align-items:center;justify-content:center;\">\n    <svg width=\"26\" height=\"26\" viewBox=\"0 0 24 24\" fill=\"white\" xmlns=\"http:\/\/www.w3.org\/2000\/svg\">\n      <path d=\"M18.244 2.25h3.308l-7.227 8.26 8.502 11.24H16.17l-5.214-6.817L4.99 21.75H1.68l7.73-8.835L1.254 2.25H8.08l4.713 6.231zm-1.161 17.52h1.833L7.084 4.126H5.394z\"\/>\n    <\/svg>\n  <\/a>\n\n  <!-- LinkedIn -->\n  <a href=\"https:\/\/www.linkedin.com\/in\/mittu1204\" target=\"_blank\" rel=\"noopener\" style=\"width:40px;height:40px;background:#0A66C2;border-radius:50%;display:flex;align-items:center;justify-content:center;\">\n    <svg width=\"26\" height=\"26\" viewBox=\"0 0 24 24\" fill=\"white\" xmlns=\"http:\/\/www.w3.org\/2000\/svg\">\n      <path d=\"M20.5 2h-17C2.7 2 2 2.7 2 3.5v17c0 .8.7 1.5 1.5 1.5h17c.8 0 1.5-.7 1.5-1.5v-17c0-.8-.7-1.5-1.5-1.5zM8.3 20H5V9.1h3.3V20zm-1.6-12.6c-1.1 0-1.9-.9-1.9-1.9s.8-1.9 1.9-1.9c1.1 0 1.9.9 1.9 1.9s-.8 1.9-1.9 1.9zm13.3 12.6h-3.3v-5.7c0-1.4 0-3.2-1.9-3.2-1.7 0-2 .9-2 3.1v5.8h-3.3V9.1h3.2v1.5h.1c.4-.8 1.5-1.7 3.1-1.7 3.3 0 3.9 2.2 3.9 5v6.1z\"\/>\n    <\/svg>\n  <\/a>\n\n  <!-- Google Scholar -->\n  <a href=\"https:\/\/scholar.google.co.jp\/citations?user=GMytI10AAAAJ\" target=\"_blank\" rel=\"noopener\" style=\"width:40px;height:40px;border-radius:50%;overflow:hidden;display:flex;align-items:center;justify-content:center;\">\n    <img loading=\"lazy\" decoding=\"async\" src=\"https:\/\/upload.wikimedia.org\/wikipedia\/commons\/thumb\/c\/c7\/Google_Scholar_logo.svg\/2048px-Google_Scholar_logo.svg.png\" width=\"38\" height=\"38\" alt=\"Google Scholar\">\n  <\/a>\n<\/div>\n<\/div>\n<\/div>\n<\/div>\n\n\n\n<div class=\"wp-block-column is-layout-flow wp-block-column-is-layout-flow\" style=\"flex-basis:50%\">\n<h2 class=\"wp-block-heading\" id=\"expertise\"><strong>Expertise<\/strong><\/h2>\n\n\n\n<ul class=\"wp-block-list\">\n<li><mark style=\"background-color:rgba(0, 0, 0, 0)\" class=\"has-inline-color has-pale-pink-color\">PhD <\/mark>in Information Science &amp; Technology Conferred by the University of Tokyo [<a href=\"https:\/\/www.i.u-tokyo.ac.jp\/edu\/course\/ipc\/doctor.shtml#2020\">URL<\/a>][<a href=\"https:\/\/www.yukimitsufuji.com\/wp-content\/uploads\/2023\/02\/PhD_Certification_EN.pdf\" target=\"_blank\" rel=\"noreferrer noopener\">certificate<\/a>]<\/li>\n\n\n\n<li>Selected as a Stanford\/Elsevier <mark style=\"background-color:rgba(0, 0, 0, 0)\" class=\"has-inline-color has-pale-pink-color\">World&#8217;s Top 2% Scientist<\/mark> [<a href=\"https:\/\/www.yukimitsufuji.com\/wp-content\/uploads\/2026\/01\/Top2_certificate-1.pdf\" target=\"_blank\" rel=\"noreferrer noopener\">certificate<\/a>]<\/li>\n\n\n\n<li>Having Papers Published at Top Venues, e.g., <mark style=\"background-color:rgba(0, 0, 0, 0)\" class=\"has-inline-color has-pale-pink-color\">CVPR, ICLR<\/mark>, <mark style=\"background-color:rgba(0, 0, 0, 0)\" class=\"has-inline-color has-pale-pink-color\">NeurIPS<\/mark>, <mark style=\"background-color:rgba(0, 0, 0, 0)\" class=\"has-inline-color has-pale-pink-color\">ICML<\/mark> (See &#8220;<a href=\"#selected_papers\">Selected Papers<\/a>&#8220;)<\/li>\n\n\n\n<li>Fluent in Japanese (Native), English (<a rel=\"noreferrer noopener\" href=\"https:\/\/www.yukimitsufuji.com\/wp-content\/uploads\/2021\/10\/Eiken.pdf\" target=\"_blank\">\u82f1\u691c1\u7d1a<\/a>), French (<a rel=\"noreferrer noopener\" href=\"https:\/\/www.yukimitsufuji.com\/wp-content\/uploads\/2021\/10\/Futsuken.pdf\" target=\"_blank\">\u4ecf\u691c\u6e961\u7d1a)<\/a><\/li>\n<\/ul>\n\n\n\n<h2 class=\"wp-block-heading\" id=\"experience\"><strong>Experience<\/strong><\/h2>\n\n\n\n<ul id=\"block-059258e3-4a81-45ae-9e25-b17a548de748\" class=\"wp-block-list\">\n<li><mark style=\"background-color:rgba(0, 0, 0, 0)\" class=\"has-inline-color has-pale-pink-color\">Distinguished Engineer<\/mark>, Sony Group Corporation [<a href=\"https:\/\/www.sony.com\/en\/SonyInfo\/technology\/distinguished_engineer\/YukiMitsufuji.html\" target=\"_blank\" rel=\"noreferrer noopener\">profile<\/a>]\n<ul class=\"wp-block-list\">\n<li>Building Technologies to Expand the Future of Sound for Creators [<a href=\"https:\/\/www.sony.com\/en\/SonyInfo\/technology\/stories\/entries\/interview_de_mitsufuji\/\" target=\"_blank\" rel=\"noreferrer noopener\">URL<\/a>]<\/li>\n<\/ul>\n<\/li>\n\n\n\n<li><mark style=\"background-color:rgba(0, 0, 0, 0)\" class=\"has-inline-color has-pale-pink-color\">Lead Research Scientist<\/mark> \/ <mark style=\"background-color:rgba(0, 0, 0, 0)\" class=\"has-inline-color has-pale-pink-color\">VP of AI Research<\/mark>, Sony AI [<a href=\"https:\/\/www.ai.sony\/people\/Yuki-Mitsufuji\/\" target=\"_blank\" rel=\"noreferrer noopener\">profile<\/a>]\n<ul class=\"wp-block-list\">\n<li>Sights on AI: Yuki Mitsufuji Shares Inspiration for AI Research into Music and Sound [<a href=\"https:\/\/www.ai.sony\/blog\/Sights-on-AI-Yuki-Mitsufuji\/\" target=\"_blank\" rel=\"noreferrer noopener\">URL<\/a>]<\/li>\n\n\n\n<li>Interview with Yuki Mitsufuji: Improving AI Image Generation [<a href=\"https:\/\/aihub.org\/2025\/01\/23\/interview-with-yuki-mitsufuji-improving-ai-image-generation\/\" target=\"_blank\" rel=\"noreferrer noopener\">URL<\/a>]<\/li>\n<\/ul>\n<\/li>\n\n\n\n<li><mark style=\"background-color:rgba(0, 0, 0, 0)\" class=\"has-inline-color has-pale-pink-color\">Head of Creative AI Lab<\/mark>, Sony R&amp;D [<a href=\"https:\/\/www.sony.com\/en\/SonyInfo\/technology\/activities\/STEF2022\/exhibition_0203\/\" target=\"_blank\" rel=\"noreferrer noopener\">URL<\/a>][<a href=\"https:\/\/sony.github.io\/creativeai\/\" target=\"_blank\" rel=\"noreferrer noopener\">demo<\/a>]\n<ul class=\"wp-block-list\">\n<li>Music Restoration of a Canadian Pianist Glen Gould [<a rel=\"noreferrer noopener\" href=\"https:\/\/www.youtube.com\/watch?v=EWYxJGmw0Ng&amp;ab_channel=Sony\" target=\"_blank\">YouTube<\/a>]<\/li>\n\n\n\n<li>Soundtrack Restoration of a Classic Movie Lawrence of Arabia [<a rel=\"noreferrer noopener\" href=\"https:\/\/www.youtube.com\/watch?v=jcWINJxnw70&amp;ab_channel=Sony\" target=\"_blank\">YouTube<\/a>]<\/li>\n<\/ul>\n<\/li>\n\n\n\n<li><mark style=\"background-color:rgba(0, 0, 0, 0)\" class=\"has-inline-color has-pale-pink-color\">Visiting Research Professor<\/mark> at New York University 2025\u2013present<\/li>\n\n\n\n<li>Former Specially Appointed <strong><mark style=\"background-color:rgba(0, 0, 0, 0)\" class=\"has-inline-color has-pale-pink-color\">Associate Professor<\/mark><\/strong> at Tokyo Institute of Technology 2022\u20132024 [<a href=\"http:\/\/www.ocw.titech.ac.jp\/index.php?module=General&amp;action=T0300&amp;GakubuCD=2&amp;GakkaCD=321717&amp;KeiCD=17&amp;course=17&amp;KougiCD=202234374&amp;Nendo=2022&amp;lang=EN&amp;vid=03\" target=\"_blank\" rel=\"noreferrer noopener\">2022<\/a>][<a href=\"http:\/\/www.ocw.titech.ac.jp\/index.php?module=General&amp;action=T0300&amp;GakubuCD=2&amp;GakkaCD=321717&amp;KeiCD=17&amp;course=17&amp;KougiCD=202334374&amp;Nendo=2023&amp;vid=03&amp;lang=EN\" target=\"_blank\" rel=\"noreferrer noopener\">2023<\/a>]<\/li>\n\n\n\n<li><mark style=\"background-color:rgba(0, 0, 0, 0)\" class=\"has-inline-color has-pale-pink-color\">IEEE Senior Member<\/mark> [<a href=\"https:\/\/www.yukimitsufuji.com\/wp-content\/uploads\/2024\/08\/IEEE_Senior_Member.jpg\" target=\"_blank\" rel=\"noreferrer noopener\">certificate<\/a>][<a href=\"https:\/\/www.ieee-jp.org\/section\/tokyo\/adm\/info\/newsm\/NewSM_2023.pdf\" target=\"_blank\" rel=\"noreferrer noopener\">URL<\/a>]<\/li>\n\n\n\n<li><mark style=\"background-color:rgba(0, 0, 0, 0)\" class=\"has-inline-color has-pale-pink-color\">Associate Editor<\/mark> of the SPS Open Journal of Signal Processing (OJSP) [<a href=\"https:\/\/signalprocessingsociety.org\/publications-resources\/ieee-open-journal-signal-processing\/advisoryeditorial-board\">URL<\/a>]<\/li>\n\n\n\n<li><mark style=\"background-color:rgba(0, 0, 0, 0)\" class=\"has-inline-color has-pale-pink-color\">Area Chair<\/mark> of NeurIPS, ICLR, ICML, ACL, ICASSP<\/li>\n\n\n\n<li>Invited Researcher at IRCAM 2011\u20132012 [<a rel=\"noreferrer noopener\" href=\"http:\/\/anasynth.ircam.fr\/home\/english\/node\/1516\" target=\"_blank\">URL<\/a>]\n<ul class=\"wp-block-list\">\n<li>Contributed to the 3DTV Content Search Project Sponsored by European Project FP7 [<a href=\"http:\/\/anasynth.ircam.fr\/home\/projects\/3dtvs\" target=\"_blank\" rel=\"noreferrer noopener\">URL<\/a>]<\/li>\n<\/ul>\n<\/li>\n<\/ul>\n<\/div>\n<\/div>\n\n\n\n<div style=\"height:50px\" aria-hidden=\"true\" class=\"wp-block-spacer\"><\/div>\n\n\n\n<h2 class=\"wp-block-heading\" id=\"publications\"><strong>News<\/strong><\/h2>\n\n\n\n<ul class=\"wp-block-list\">\n<li>3 papers were accepted at <mark style=\"background-color:rgba(0, 0, 0, 0)\" class=\"has-inline-color has-pale-pink-color\">ICML 2026<\/mark><\/li>\n\n\n\n<li>My interview on Protective AI was published at <mark style=\"background-color:rgba(0, 0, 0, 0)\" class=\"has-inline-color has-pale-pink-color\">Nikkei Journal<\/mark> on Mar. 14, 2026 [<a href=\"https:\/\/www.nikkei.com\/article\/DGXZQOUC1125A0R10C26A3000000\/\" target=\"_blank\" rel=\"noreferrer noopener\">URL<\/a>]<\/li>\n\n\n\n<li>1 blog post was accepted at <mark style=\"background-color:rgba(0, 0, 0, 0)\" class=\"has-inline-color has-pale-pink-color\">ICLR 2026 <\/mark>[<a href=\"https:\/\/the-principles-of-diffusion-models.github.io\/#\/blog\" target=\"_blank\" rel=\"noreferrer noopener\">blog<\/a>]<\/li>\n\n\n\n<li>4 papers were accepted at <mark style=\"background-color:rgba(0, 0, 0, 0)\" class=\"has-inline-color has-pale-pink-color\">CVPR 2026<\/mark> (PAVAS as Oral)<\/li>\n\n\n\n<li>My interview on Protective AI was published at <mark style=\"background-color:rgba(0, 0, 0, 0)\" class=\"has-inline-color has-pale-pink-color\">Nikkei Digital Governance<\/mark> on Feb. 19, 2026 [<a href=\"https:\/\/www.nikkei.com\/prime\/digital-governance\/article\/DGXZQOGN104TB0Q6A210C2000000\" target=\"_blank\" rel=\"noreferrer noopener\">URL<\/a>]<\/li>\n\n\n\n<li>Our project was featured on the top page of <mark style=\"background-color:rgba(0, 0, 0, 0)\" class=\"has-inline-color has-pale-pink-color\">Nikkei Journal<\/mark> on Feb. 16, 2026 [<a href=\"https:\/\/www.nikkei.com\/article\/DGXZQOGN070LT0X00C26A2000000\/\" target=\"_blank\" rel=\"noreferrer noopener\">URL<\/a>]<\/li>\n\n\n\n<li>Our diffusion tutorial was accepted at <mark style=\"background-color:rgba(0, 0, 0, 0)\" class=\"has-inline-color has-pale-pink-color\">CVPR 2026<\/mark> [<a href=\"https:\/\/sites.google.com\/view\/cvpr26-principles-of-diffusion\/home\" target=\"_blank\" rel=\"noreferrer noopener\">tutorial<\/a>][<a href=\"https:\/\/cvpr.thecvf.com\/virtual\/2026\/tutorial\/36147\" target=\"_blank\" rel=\"noreferrer noopener\">URL<\/a>]<\/li>\n\n\n\n<li>8 papers were accepted at <mark style=\"background-color:rgba(0, 0, 0, 0)\" class=\"has-inline-color has-pale-pink-color\">ICLR 2026 <\/mark><\/li>\n\n\n\n<li>9 papers were accepted at <mark style=\"background-color:rgba(0, 0, 0, 0)\" class=\"has-inline-color has-pale-pink-color\">ICASSP 2026<\/mark><\/li>\n\n\n\n<li>5 papers were accepted at <mark style=\"background-color:rgba(0, 0, 0, 0)\" class=\"has-inline-color has-pale-pink-color\">NeurIPS 2025<\/mark><\/li>\n<\/ul>\n\n\n\n<div style=\"height:24px\" aria-hidden=\"true\" class=\"wp-block-spacer\"><\/div>\n\n\n<div class=\"su-divider su-divider-style-default\" style=\"margin:16px 0;border-width:1px;border-color:#000000\"><a href=\"#\" style=\"color:#000000\">Go to top<\/a><\/div>\n\n\n\n<h2 class=\"wp-block-heading\" id=\"publications\"><strong>Publications<\/strong><\/h2>\n\n\n\n<h3 class=\"wp-block-heading\" id=\"selected_papers\">Selected Papers<\/h3>\n\n\n\n<ol class=\"wp-block-list\">\n<li>Naoki Murata, Yuhta Takida, Chieh-Hsin Lai, Toshimitsu Uesaka, Bac Nguyen, Stefano Ermon, <span style=\"text-decoration: underline;\">Yuki Mitsufuji<\/span>, \u201cGUDA: Counterfactual Group-wise Training Data Attribution for Diffusion Models via Unlearning,\u201d accepted at International Conference on Machine Learning (<mark style=\"background-color:rgba(0, 0, 0, 0)\" class=\"has-inline-color has-pale-pink-color\">ICML<\/mark>), 2026 [<a href=\"https:\/\/arxiv.org\/abs\/2601.22651\" target=\"_blank\" rel=\"noreferrer noopener\">arXiv<\/a>]<\/li>\n\n\n\n<li>Silin Gao, Hao Zhao, Zeming Chen, Sepideh Mamooler, Antara Raaghavi Bhattacharya, Qiyu Wu, Hiromi Wakaki, <span style=\"text-decoration: underline;\">Yuki Mitsufuji<\/span>, Li Mi, Syrielle Montariol, Antoine Bosselut, \u201cSchema-Guided World Modeling for Understanding Hierarchical Visual Dynamics,\u201d accepted at International Conference on Machine Learning (<mark style=\"background-color:rgba(0, 0, 0, 0)\" class=\"has-inline-color has-pale-pink-color\">ICML<\/mark>), 2026<\/li>\n\n\n\n<li>Geyang Guo, Hiromi Wakaki, <span style=\"text-decoration: underline;\">Yuki Mitsufuji<\/span>, Alan Ritter, Wei Xu, \u201cLearning to Route Languages for Multilingual Preference Optimization,\u201d accepted at International Conference on Machine Learning (<mark style=\"background-color:rgba(0, 0, 0, 0)\" class=\"has-inline-color has-pale-pink-color\">ICML<\/mark>), 2026<\/li>\n\n\n\n<li>Satoshi Hayakawa, Yuhta Takida, Masaaki Imaizumi, Hiromi Wakaki, <span style=\"text-decoration: underline;\">Yuki Mitsufuji<\/span>, \u201cDemystifying MaskGIT Sampler and Beyond: Adaptive Order Selection in Masked Diffusion,\u201d Transactions on Machine Learning Research (<mark style=\"background-color:rgba(0, 0, 0, 0)\" class=\"has-inline-color has-pale-pink-color\">TMLR<\/mark>), 2026 [<a href=\"https:\/\/openreview.net\/forum?id=mKlW68i2Ig\" target=\"_blank\" rel=\"noreferrer noopener\">OpenReview<\/a>][<a href=\"https:\/\/arxiv.org\/abs\/2510.04525\" target=\"_blank\" rel=\"noreferrer noopener\">arXiv<\/a>] \u2013 <mark style=\"background-color:rgba(0, 0, 0, 0)\" class=\"has-inline-color has-pale-pink-color\">Featured Certification<\/mark><\/li>\n\n\n\n<li>Zheyuan Hu, Chieh-Hsin Lai, Ge Wu, <span style=\"text-decoration: underline;\">Yuki Mitsufuji<\/span>, Stefano Ermon, \u201cMeanFlow Transformers with Representation Autoencoders,\u201d accepted at IEEE\/CVF Conference on Computer Vision and Pattern Recognition (<mark style=\"background-color:rgba(0, 0, 0, 0)\" class=\"has-inline-color has-pale-pink-color\">CVPR<\/mark>), 2026 [<a href=\"https:\/\/arxiv.org\/abs\/2511.13019\" target=\"_blank\" rel=\"noreferrer noopener\">arXiv<\/a>][<a href=\"https:\/\/github.com\/sony\/mf-rae\" target=\"_blank\" rel=\"noreferrer noopener\">code<\/a>]<\/li>\n\n\n\n<li>Oh Hyun-Bin, Yuhta Takida, Toshimitsu Uesaka, Tae-Hyun Oh, <span style=\"text-decoration: underline;\">Yuki Mitsufuji<\/span>, \u201cPAVAS: Physics-Aware Video-to-Audio Synthesis,\u201d accepted at IEEE\/CVF Conference on Computer Vision and Pattern Recognition (<mark style=\"background-color:rgba(0, 0, 0, 0)\" class=\"has-inline-color has-pale-pink-color\">CVPR<\/mark>), 2026 [<a href=\"https:\/\/arxiv.org\/abs\/2512.08282\" target=\"_blank\" rel=\"noreferrer noopener\">arXiv<\/a>][<a href=\"https:\/\/physics-aware-video-to-audio-synthesis.github.io\/\" target=\"_blank\" rel=\"noreferrer noopener\">demo<\/a>] \u2013 <mark style=\"background-color:rgba(0, 0, 0, 0)\" class=\"has-inline-color has-pale-pink-color\">Oral<\/mark><\/li>\n\n\n\n<li>Christian Simon, Masato Ishii, Wei-Yao Wang, Koichi Saito, Akio Hayakawa, Dongseok Shim, Zhi Zhong, Shuyang Cui, Takashi Shibuya, Shusuke Takahashi, <span style=\"text-decoration: underline;\">Yuki Mitsufuji<\/span>, \u201cEchoes Over Time: Unlocking Length Generalization in Video-to-Audio Generation Models,\u201d accepted at IEEE\/CVF Conference on Computer Vision and Pattern Recognition (<mark style=\"background-color:rgba(0, 0, 0, 0)\" class=\"has-inline-color has-pale-pink-color\">CVPR<\/mark>), 2026 [<a href=\"https:\/\/arxiv.org\/abs\/2602.20981\" target=\"_blank\" rel=\"noreferrer noopener\">arXiv<\/a>][<a href=\"https:\/\/echoesovertime.github.io\/\" target=\"_blank\" rel=\"noreferrer noopener\">demo<\/a>]<\/li>\n\n\n\n<li>Honggyu An, Jaewoo Jung, Mungyeom Kim, Chaehyun Kim, Minkyeong Jeon, Jisang Han, Kazumi Fukuda, Takuya Narihira, Hyunah Ko, Junsu Kim, Sunghwan Hong, <span style=\"text-decoration: underline;\">Yuki Mitsufuji<\/span>, Seungryong Kim, \u201cLearning Concept 3D Representations from Feed-Forward Novel View Synthesis,\u201d accepted at IEEE\/CVF Conference on Computer Vision and Pattern Recognition (<mark style=\"background-color:rgba(0, 0, 0, 0)\" class=\"has-inline-color has-pale-pink-color\">CVPR<\/mark>), 2026 [<a href=\"https:\/\/arxiv.org\/abs\/2512.04021\" target=\"_blank\" rel=\"noreferrer noopener\">arXiv<\/a>][<a href=\"https:\/\/github.com\/cvlab-kaist\/C3G\" target=\"_blank\" rel=\"noreferrer noopener\">code<\/a>][<a href=\"https:\/\/cvlab-kaist.github.io\/C3G\/\" target=\"_blank\" rel=\"noreferrer noopener\">demo<\/a>]<\/li>\n<\/ol>\n\n\n\n<div style=\"height:24px\" aria-hidden=\"true\" class=\"wp-block-spacer\"><\/div>\n\n\n<div class=\"su-divider su-divider-style-default\" style=\"margin:16px 0;border-width:1px;border-color:#000000\"><a href=\"#\" style=\"color:#000000\">Go to top<\/a><\/div>\n\n\n\n<h3 class=\"wp-block-heading\" id=\"books\">Books<\/h3>\n\n\n\n<ol class=\"wp-block-list\">\n<li>Chieh-Hsin Lai, Yang Song, Dongjun Kim, <span style=\"text-decoration: underline;\">Yuki Mitsufuji<\/span>, Stefano Ermon, \u201cThe Principles of Diffusion Models,\u201d 2025 [<a href=\"https:\/\/arxiv.org\/abs\/2510.21890\" target=\"_blank\" rel=\"noreferrer noopener\">arX<\/a><a href=\"https:\/\/arxiv.org\/abs\/2510.15543\" target=\"_blank\" rel=\"noreferrer noopener\">i<\/a><a href=\"https:\/\/arxiv.org\/abs\/2510.15508\" target=\"_blank\" rel=\"noreferrer noopener\">v<\/a>][<a href=\"https:\/\/the-principles-of-diffusion-models.github.io\/\" target=\"_blank\" rel=\"noreferrer noopener\">project<\/a>][<a href=\"https:\/\/the-principles-of-diffusion-models.github.io\/#\/blog\" target=\"_blank\" rel=\"noreferrer noopener\">blog<\/a>]<\/li>\n<\/ol>\n\n\n<div class=\"wp-block-image\">\n<figure class=\"aligncenter size-large\"><img loading=\"lazy\" decoding=\"async\" width=\"1024\" height=\"527\" src=\"https:\/\/www.yukimitsufuji.com\/wp-content\/uploads\/2025\/12\/Diffusion_Monograph-1024x527.png\" alt=\"\" class=\"wp-image-1861\" srcset=\"https:\/\/www.yukimitsufuji.com\/wp-content\/uploads\/2025\/12\/Diffusion_Monograph-1024x527.png 1024w, https:\/\/www.yukimitsufuji.com\/wp-content\/uploads\/2025\/12\/Diffusion_Monograph-300x154.png 300w, https:\/\/www.yukimitsufuji.com\/wp-content\/uploads\/2025\/12\/Diffusion_Monograph-768x396.png 768w, https:\/\/www.yukimitsufuji.com\/wp-content\/uploads\/2025\/12\/Diffusion_Monograph-1536x791.png 1536w, https:\/\/www.yukimitsufuji.com\/wp-content\/uploads\/2025\/12\/Diffusion_Monograph.png 1736w\" sizes=\"auto, (max-width: 1024px) 100vw, 1024px\" \/><\/figure>\n<\/div>\n\n<div class=\"su-divider su-divider-style-default\" style=\"margin:16px 0;border-width:1px;border-color:#000000\"><a href=\"#\" style=\"color:#000000\">Go to top<\/a><\/div>\n\n\n\n<h3 class=\"wp-block-heading\" id=\"journal_papers\">Journal Papers<\/h3>\n\n\n\n<ol class=\"wp-block-list\">\n<li>Satoshi Hayakawa, Yuhta Takida, Masaaki Imaizumi, Hiromi Wakaki, <span style=\"text-decoration: underline;\">Yuki Mitsufuji<\/span>, \u201cDemystifying MaskGIT Sampler and Beyond: Adaptive Order Selection in Masked Diffusion,\u201d Transactions on Machine Learning Research (<mark style=\"background-color:rgba(0, 0, 0, 0)\" class=\"has-inline-color has-pale-pink-color\">TMLR<\/mark>), 2026 [<a href=\"https:\/\/openreview.net\/forum?id=mKlW68i2Ig\" target=\"_blank\" rel=\"noreferrer noopener\">OpenReview<\/a>][<a href=\"https:\/\/arxiv.org\/abs\/2510.04525\" target=\"_blank\" rel=\"noreferrer noopener\">arXiv<\/a>] \u2013 <mark style=\"background-color:rgba(0, 0, 0, 0)\" class=\"has-inline-color has-pale-pink-color\">Featured Certification<\/mark><\/li>\n\n\n\n<li>Masato Hirano, Shimada Kazuki, Yuichiro Koyama, Shusuke Takahashi, <span style=\"text-decoration: underline;\">Yuki Mitsufuji<\/span>, \u201cDiffusion-based Signal Refiner for Speech Enhancement and Separation,\u201d IEEE Transactions on Audio, Speech, and Language Processing (<mark style=\"background-color:rgba(0, 0, 0, 0)\" class=\"has-inline-color has-pale-pink-color\">Trans. ASLP<\/mark>), pp. 1\u201316, 2026 [<a href=\"https:\/\/ieeexplore.ieee.org\/document\/11393528\" target=\"_blank\" rel=\"noreferrer noopener\">IEEE<\/a>][<a href=\"https:\/\/arxiv.org\/abs\/2305.05857\" target=\"_blank\" rel=\"noreferrer noopener\">arXiv<\/a>]<\/li>\n\n\n\n<li>Fabio Morreale, Marco Martinez-Ramirez, Raul Masu, WeiHsiang Liao, <span style=\"text-decoration: underline;\">Yuki Mitsufuji<\/span>, \u201cReductive, Exclusionary, Normalising: The Limits of Generative AI,\u201d Transactions of the International Society for Music Information Retrieval (<mark style=\"background-color:rgba(0, 0, 0, 0)\" class=\"has-inline-color has-pale-pink-color\">Trans. ISMIR<\/mark>), vol. 8, no. 1, pp. 300\u2013312, 2025 [<a href=\"https:\/\/transactions.ismir.net\/articles\/10.5334\/tismir.256\" target=\"_blank\" rel=\"noreferrer noopener\">TISMIR<\/a>]<\/li>\n\n\n\n<li>Naoki Murata, Chieh-Hsin Lai, Yuhta Takida, Toshimitsu Uesaka, Bac Nguyen, Stefano Ermon, <span style=\"text-decoration: underline;\">Yuki Mitsufuji<\/span>, \u201cG2D2: Gradient-Guided Discrete Diffusion for Image Inverse Problem Solving,\u201d Transactions on Machine Learning Research (<mark style=\"background-color:rgba(0, 0, 0, 0)\" class=\"has-inline-color has-pale-pink-color\">TMLR<\/mark>), 2025 [<a href=\"https:\/\/openreview.net\/forum?id=fj23qnVifX\" target=\"_blank\" rel=\"noreferrer noopener\">OpenReview<\/a>][<a href=\"https:\/\/arxiv.org\/abs\/2410.14710\" target=\"_blank\" rel=\"noreferrer noopener\">arXiv<\/a>][<a href=\"https:\/\/github.com\/sony\/g2d2\" target=\"_blank\" rel=\"noreferrer noopener\">code<\/a>] \u2013 <mark style=\"background-color:rgba(0, 0, 0, 0)\" class=\"has-inline-color has-pale-pink-color\">Journal-to-Conference Certification<\/mark><\/li>\n\n\n\n<li>M. Jehanzeb Mirza, Mengjie Zhao, Zhuoyuan Mao, Sivan Doveh, Wei Lin, Paul Gavrikov, Michael Dorkenwald, Shiqi Yang, Saurav Jha, Hiromi Wakaki, <span style=\"text-decoration: underline;\">Yuki Mitsufuji<\/span>, Horst Possegger, Rogerio Feris, Leonid Karlinsky, James Glass, \u201cGLOV: Guided Large Language Models as Implicit Optimizers for Vision Language Models,\u201d Transactions on Machine Learning Research (<mark style=\"background-color:rgba(0, 0, 0, 0)\" class=\"has-inline-color has-pale-pink-color\">TMLR<\/mark>), 2025 [<a href=\"https:\/\/openreview.net\/forum?id=kZLANTp6Vw\" target=\"_blank\" rel=\"noreferrer noopener\">OpenReview<\/a>][<a href=\"https:\/\/arxiv.org\/abs\/2410.06154\" target=\"_blank\" rel=\"noreferrer noopener\">arXiv<\/a>][<a href=\"https:\/\/github.com\/jmiemirza\/GLOV\" target=\"_blank\" rel=\"noreferrer noopener\">code<\/a>]<\/li>\n\n\n\n<li>Yutong He, Alexander Robey, Naoki Murata, Yiding Jiang, Joshua Williams, George J. Pappas, Hamed Hassani, <span style=\"text-decoration: underline;\">Yuki Mitsufuji<\/span>, Ruslan Salakhutdinov, J. Zico Kolter, \u201cAutomated Black-box Prompt Engineering for Personalized Text-to-Image Generation,\u201d Transactions on Machine Learning Research (<mark style=\"background-color:rgba(0, 0, 0, 0)\" class=\"has-inline-color has-pale-pink-color\">TMLR<\/mark>), 2025 [<a href=\"https:\/\/openreview.net\/forum?id=IVYVDN6pJ6\" target=\"_blank\" rel=\"noreferrer noopener\">OpenReview<\/a>][<a href=\"https:\/\/arxiv.org\/abs\/2403.19103\" target=\"_blank\" rel=\"noreferrer noopener\">arXiv<\/a>][<a href=\"https:\/\/github.com\/KellyYutongHe\/prism_demo\" target=\"_blank\" rel=\"noreferrer noopener\">code<\/a>][<a href=\"https:\/\/kellyyutonghe.github.io\/prism\/\" target=\"_blank\" rel=\"noreferrer noopener\">demo<\/a>]<\/li>\n\n\n\n<li>Kazuki Shimada, Kengo Uchida, Yuichiro Koyama, Takashi Shibuya, Shusuke Takahashi, <span style=\"text-decoration: underline;\">Yuki Mitsufuji<\/span>, Tatsuya Kawahara, \u201cOpen-Vocabulary Sound Event Localization and Detection with Joint Learning of CLAP Embedding and Activity-Coupled Cartesian DOA Vector,\u201d IEEE Transactions on Audio, Speech, and Language Processing (<mark style=\"background-color:rgba(0, 0, 0, 0)\" class=\"has-inline-color has-pale-pink-color\">Trans. ASLP<\/mark>), vol. 33, pp.2946\u20132960, 2025 [<a href=\"https:\/\/ieeexplore.ieee.org\/document\/11074724\" target=\"_blank\" rel=\"noreferrer noopener\">IEEE<\/a>]<\/li>\n\n\n\n<li>Sungho Lee, Marco Mart\u00ednez-Ram\u00edrez, Weihsiang Liao, Stefan Uhlich, Giorgio Fabbro, Kyogu Lee, <span style=\"text-decoration: underline;\">Yuki Mitsufuji<\/span>, \u201cReverse Engineering of Music Mixing Graphs with Differentiable Processors and Iterative Pruning,\u201d Journal of the Audio Engineering Society (<mark style=\"background-color:rgba(0, 0, 0, 0)\" class=\"has-inline-color has-pale-pink-color\">JAES<\/mark>), vol. 73, issue 6, pp. 344\u2013365, 2025 [<a href=\"https:\/\/aes2.org\/publications\/elibrary-page\/?id=22917\" target=\"_blank\" rel=\"noreferrer noopener\">AES<\/a>][<a href=\"https:\/\/www.arxiv.org\/abs\/2509.15948\" target=\"_blank\" rel=\"noreferrer noopener\">arXiv<\/a>][<a href=\"https:\/\/github.com\/sh-lee97\/grafx\" target=\"_blank\" rel=\"noreferrer noopener\">code<\/a>]<\/li>\n\n\n\n<li>WeiHsiang Liao, Yuhta Takida, Yukara Ikemiya, Zhi Zhong, Chieh-Hsin Lai, Giorgio Fabbro, Kazuki Shimada, Keisuke Toyama, Kinwai Cheuk, Marco A. Mart\u00ednez-Ram\u00edrez, Shusuke Takahashi, Stefan Uhlich, Taketo Akama, Woosung Choi, Yuichiro Koyama, <span style=\"text-decoration: underline;\">Yuki Mitsufuji<\/span>, \u201cMusic Foundation Model as Generic Booster for Music Downstream Tasks,\u201d Transactions on Machine Learning Research (<mark style=\"background-color:rgba(0, 0, 0, 0)\" class=\"has-inline-color has-pale-pink-color\">TMLR<\/mark>), 2025 [<a href=\"https:\/\/openreview.net\/forum?id=kHl4JzyNzF\" target=\"_blank\" rel=\"noreferrer noopener\">OpenReview<\/a>][<a href=\"https:\/\/arxiv.org\/abs\/2411.01135\" target=\"_blank\" rel=\"noreferrer noopener\">arXiv<\/a>]<\/li>\n\n\n\n<li>Ryosuke Sawata, Naoya Takahashi, Stefan Uhlich, Shusuke Takahashi, <span style=\"text-decoration: underline;\">Yuki Mitsufuji<\/span>, \u201cThe Whole Is Greater than the Sum of Its Parts: Improving Music Source Separation by Bridging Networks,\u201d EURASIP Journal Audio, Speech, and Music Processing (<mark style=\"background-color:rgba(0, 0, 0, 0)\" class=\"has-inline-color has-pale-pink-color\">EURASIP J. ASMP<\/mark>), vol. 2024, Issue 1, pp. 39\u201358, 2024 [<a href=\"https:\/\/link.springer.com\/article\/10.1186\/s13636-024-00354-6?utm_source=rct_congratemailt&amp;utm_medium=email&amp;utm_campaign=oa_20240719&amp;utm_content=10.1186%2Fs13636-024-00354-6\" target=\"_blank\" rel=\"noreferrer noopener\">EURASHIP<\/a>][<a href=\"https:\/\/arxiv.org\/abs\/2305.07855\" target=\"_blank\" rel=\"noreferrer noopener\">arXiv<\/a>]<\/li>\n\n\n\n<li>Yuhta Takida, Yukara Ikemiya, Takashi Shibuya, Kazuki Shimada, Woosung Choi, Chieh-Hsin Lai, Naoki Murata, Toshimitsu Uesaka, Kengo Uchida, Wei-Hsiang Liao, <span style=\"text-decoration: underline;\">Yuki Mitsufuji<\/span>, \u201cHQ-VAE: Hierarchical Discrete Representation Learning with Variational Bayes,\u201d Transactions on Machine Learning Research (<mark style=\"background-color:rgba(0, 0, 0, 0)\" class=\"has-inline-color has-pale-pink-color\">TMLR<\/mark>), 2024 [<a href=\"https:\/\/openreview.net\/forum?id=xqAVkqrLjx\" target=\"_blank\" rel=\"noreferrer noopener\">OpenReview<\/a>][<a href=\"https:\/\/arxiv.org\/abs\/2401.00365\" target=\"_blank\" rel=\"noreferrer noopener\">arXiv<\/a>]<\/li>\n\n\n\n<li>Stefan Uhlich, Giorgio Fabbro, Masato Hirano, Shusuke Takahashi, Gordon Wichern, Jonathan Le Roux, Dipam Chakraborty, Sharada Mohanty, Kai Li, Yi Luo, Jianwei Yu, Rongzhi Gu, Roman Solovyev, Alexander Stempkovskiy, Tatiana Habruseva, Mikhail Sukhovei, <span style=\"text-decoration: underline;\">Yuki Mitsufuji<\/span>, \u201cThe Sound Demixing Challenge 2023 \u2013 Cinematic Demixing Track,\u201d Transactions of the International Society for Music Information Retrieval (<mark style=\"background-color:rgba(0, 0, 0, 0)\" class=\"has-inline-color has-pale-pink-color\">Trans. ISMIR<\/mark>), vol. 7, Issue 1, pp. 44\u201362, 2024 [<a href=\"https:\/\/transactions.ismir.net\/articles\/10.5334\/tismir.172\" target=\"_blank\" rel=\"noreferrer noopener\">TISMIR<\/a>][<a href=\"https:\/\/arxiv.org\/abs\/2308.06981\" target=\"_blank\" rel=\"noreferrer noopener\">arXiv<\/a>][<a href=\"https:\/\/www.aicrowd.com\/challenges\/sound-demixing-challenge-2023\/problems\/cinematic-sound-demixing-track-cdx-23\" target=\"_blank\" rel=\"noreferrer noopener\">challenge<\/a>]<\/li>\n\n\n\n<li>Giorgio Fabbro, Stefan Uhlich, Chieh-Hsin Lai, Woosung Choi, Marco Mart\u00ednez-Ram\u00edrez, Weihsiang Liao, Igor Gadelha, Geraldo Ramos, Eddie Hsu, Hugo Rodrigues, Fabian-Robert St\u00f6ter, Alexandre D\u00e9fossez, Yi Luo, Jianwei Yu, Dipam Chakraborty, Sharada Mohanty, Roman Solovyev, Alexander Stempkovskiy, Tatiana Habruseva, Nabarun Goswami, Tatsuya Harada, Minseok Kim, Jun Hyung Lee, Yuanliang Dong, Xinran Zhang, Jiafeng Liu, <span style=\"text-decoration: underline;\">Yuki Mitsufuji<\/span>, \u201cThe Sound Demixing Challenge 2023 \u2013 Music Demixing Track,\u201d Transactions of the International Society for Music Information Retrieval (<mark style=\"background-color:rgba(0, 0, 0, 0)\" class=\"has-inline-color has-pale-pink-color\">Trans. ISMIR<\/mark>), vol. 7, Issue 1, pp. 63\u201384, 2024 [<a href=\"https:\/\/transactions.ismir.net\/articles\/10.5334\/tismir.171\" target=\"_blank\" rel=\"noreferrer noopener\">TISMIR<\/a>][<a href=\"https:\/\/arxiv.org\/abs\/2308.06979\" data-type=\"link\" data-id=\"https:\/\/arxiv.org\/abs\/2308.06979\" target=\"_blank\" rel=\"noreferrer noopener\">arXiv<\/a>][<a href=\"https:\/\/music.ai\/research\/#datasets\" target=\"_blank\" rel=\"noreferrer noopener\">dataset<\/a>][<a href=\"https:\/\/www.aicrowd.com\/challenges\/sound-demixing-challenge-2023\/problems\/music-demixing-track-mdx-23\" target=\"_blank\" rel=\"noreferrer noopener\">challenge<\/a>]<\/li>\n\n\n\n<li>Yuhta Takida, Wei-Hsiang Liao, Toshimitsu Uesaka, Shusuke Takahashi, <span style=\"text-decoration: underline;\">Yuki Mitsufuji<\/span>, \u201cPreventing Oversmoothing in VAE via Generalized Variance Parameterization,\u201d <mark><mark><mark style=\"background-color:rgba(0, 0, 0, 0)\" class=\"has-inline-color has-pale-pink-color\">Neurocomputing<\/mark><\/mark><\/mark>, vol. 509, pp. 137\u2013156, 2022 [<a href=\"https:\/\/www.sciencedirect.com\/science\/article\/pii\/S0925231222010591\" target=\"_blank\" rel=\"noreferrer noopener\">Elsevier<\/a>][<a href=\"https:\/\/arxiv.org\/abs\/2102.08663\" target=\"_blank\" rel=\"noreferrer noopener\">arXiv<\/a>]<\/li>\n\n\n\n<li><span style=\"text-decoration: underline;\">Yuki Mitsufuji<\/span>, Giorgio Fabbro, Stefan Uhlich, Fabian-Robert St\u00f6ter, Alexandre D\u00e9fossez, Minseok Kim, Woosung Choi, Chin-Yun Yu, Kin-Wai Cheuk, \u201cMusic Demixing Challenge 2021,&#8221; Frontiers in Signal Processing (<mark style=\"background-color:rgba(0, 0, 0, 0)\" class=\"has-inline-color has-pale-pink-color\">Front. signal process.<\/mark>), vol. 1, 2022 [<a rel=\"noreferrer noopener\" href=\"https:\/\/www.frontiersin.org\/articles\/10.3389\/frsip.2021.808395\/abstract\" target=\"_blank\">Frontiers<\/a>][<a rel=\"noreferrer noopener\" href=\"https:\/\/arxiv.org\/abs\/2108.13559\" target=\"_blank\">arXiv<\/a>][<a rel=\"noreferrer noopener\" href=\"https:\/\/www.aicrowd.com\/challenges\/music-demixing-challenge-ismir-2021\" target=\"_blank\">challenge<\/a>][<a rel=\"noreferrer noopener\" href=\"https:\/\/www.frontiersin.org\/articles\/10.3389\/frsip.2021.808395\/bibTex\" target=\"_blank\">bibtex<\/a>]<\/li>\n\n\n\n<li>Jihui Aimee Zhang, Naoki Murata, Yu Maeno, Prasanga N. Samarasinghe, Thushara D. Abhayapala, <span style=\"text-decoration-line: underline;\">Yuki Mitsufuji<\/span>, \u201cCoherence-Based Performance Analysis on Noise Reduction in Multichannel Active Noise Control Systems,\u201d Journal of the Acoustical Society of America (<mark style=\"background-color:rgba(0, 0, 0, 0)\" class=\"has-inline-color has-pale-pink-color\">JASA<\/mark>), vol. 148, issue 3, 2020 [<a rel=\"noreferrer noopener\" href=\"https:\/\/asa.scitation.org\/doi\/10.1121\/10.0001938\" target=\"_blank\">ASA<\/a>]<\/li>\n\n\n\n<li><span style=\"text-decoration: underline;\">Yuki Mitsufuji<\/span>, Norihiro Takamune, Shoichi Koyama, Hiroshi Saruwatari, \u201cMultichannel Blind Source Separation Based on Evanescent-Region-Aware Non-Negative Tensor Factorization in Spherical Harmonic Domain,\u201d IEEE\/ACM Transactions on Audio, Speech, and Language Processing (<mark style=\"background-color:rgba(0, 0, 0, 0)\" class=\"has-inline-color has-pale-pink-color\">Trans. ASLP<\/mark>), vol. 29, pp. 607\u2013617, 2020 [<a rel=\"noreferrer noopener\" href=\"https:\/\/ieeexplore.ieee.org\/document\/9300203\" target=\"_blank\">IEEE<\/a>][<a rel=\"noreferrer noopener\" href=\"https:\/\/dblp.org\/rec\/journals\/taslp\/MitsufujiTKS21.html?view=bibtex\" target=\"_blank\">bibtex<\/a>]<\/li>\n\n\n\n<li>Tetsu Magariyachi, <span style=\"text-decoration: underline;\">Yuki Mitsufuji<\/span>, \u201cAnalytic Error Control Methods for Efficient Rotation in Dynamic Binaural Rendering of Ambisonics,\u201d Journal of the Acoustical Society of America (<mark style=\"background-color:rgba(0, 0, 0, 0)\" class=\"has-inline-color has-pale-pink-color\">JASA<\/mark>), vol. 147, issue 1, 2020 [<a rel=\"noreferrer noopener\" href=\"https:\/\/asa.scitation.org\/doi\/10.1121\/10.0000569\" target=\"_blank\">ASA<\/a>]<\/li>\n\n\n\n<li>Yu Maeno, <span style=\"text-decoration: underline;\">Yuki Mitsufuji<\/span>, Prasanga N. Samarasinghe, Naoki Murata, Thushara D. Abhayapala, \u201cSpherical-Harmonic-Domain Feedforward Active Noise Control Using Sparse Decomposition of Reference Signals from Distributed Sensor Arrays,\u201d IEEE\/ACM Transactions on Audio, Speech, and Language Processing (<mark style=\"background-color:rgba(0, 0, 0, 0)\" class=\"has-inline-color has-pale-pink-color\">Trans. ASLP<\/mark>), vol. 28, pp. 656\u2013670, 2019 [<a rel=\"noreferrer noopener\" href=\"https:\/\/ieeexplore.ieee.org\/abstract\/document\/8944020\" target=\"_blank\">IEEE<\/a>][<a rel=\"noreferrer noopener\" href=\"https:\/\/dblp.org\/rec\/journals\/taslp\/MaenoMSMA20.html?view=bibtex\" target=\"_blank\">bibtex<\/a>]<\/li>\n\n\n\n<li><span style=\"text-decoration: underline;\">Yuki Mitsufuji<\/span>, Stefan Uhlich, Norihiro Takamune, Daichi Kitamura, Shoichi Koyama, Hiroshi Saruwatari, \u201cMultichannel Non-Negative Matrix Factorization Using Banded Spatial Covariance Matrices in Wavenumber Domain,\u201d IEEE\/ACM Transactions on Audio, Speech, and Language Processing (<mark style=\"background-color:rgba(0, 0, 0, 0)\" class=\"has-inline-color has-pale-pink-color\">Trans. ASLP<\/mark>), vol. 28, pp. 49\u201360, 2019 [<a rel=\"noreferrer noopener\" href=\"https:\/\/ieeexplore.ieee.org\/document\/8878116\" target=\"_blank\">IEEE<\/a>][<a rel=\"noreferrer noopener\" href=\"https:\/\/dblp.org\/rec\/journals\/taslp\/MitsufujiUTKKS20.html?view=bibtex\" target=\"_blank\">bibtex<\/a>]<\/li>\n\n\n\n<li>Fabian-Robert St\u00f6ter, Stefan Uhlich, Antoine Liutkus, <span style=\"text-decoration: underline;\">Yuki Mitsufuji<\/span>, \u201cOpen-Unmix \u2013 A Reference Implementation for Music Source Separation,\u201d Journal of Open Source Software (<mark style=\"background-color:rgba(0, 0, 0, 0)\" class=\"has-inline-color has-pale-pink-color\">JOSS<\/mark>), vol. 4, no. 41, pp. 1667, 2019 [<a rel=\"noreferrer noopener\" href=\"https:\/\/joss.theoj.org\/papers\/10.21105\/joss.01667\" target=\"_blank\">OSI<\/a>][<a rel=\"noreferrer noopener\" href=\"https:\/\/github.com\/sigsep\/open-unmix-pytorch\" target=\"_blank\">code<\/a>][<a rel=\"noreferrer noopener\" href=\"https:\/\/dblp.org\/rec\/journals\/jossw\/StoterULM19.html?view=bibtex\" target=\"_blank\">bibtex<\/a>]<\/li>\n\n\n\n<li><span style=\"text-decoration: underline;\">Yuki Mitsufuji<\/span>, Axel R\u00f6bel, \u201cOn the Use of a Spatial Cue as Prior Information for Stereo Sound Source Separation Based on Spatially Weighted Non-Negative Tensor Factorization,\u201d EURASIP Journal of Advancement of Signal Processing (<mark style=\"background-color:rgba(0, 0, 0, 0)\" class=\"has-inline-color has-pale-pink-color\"><strong>EURASIP J.<\/strong>&nbsp;<strong>Adv.<\/strong>&nbsp;<strong>Signal Process<\/strong>.<\/mark>), issue 1, 2014 [<a rel=\"noreferrer noopener\" href=\"https:\/\/asp-eurasipjournals.springeropen.com\/articles\/10.1186\/1687-6180-2014-40\" target=\"_blank\">Springer<\/a>][<a rel=\"noreferrer noopener\" href=\"https:\/\/dblp.org\/rec\/journals\/ejasp\/MitsufujiR14.html?view=bibtex\" target=\"_blank\">bibtex<\/a>]<\/li>\n<\/ol>\n\n\n\n<div style=\"height:24px\" aria-hidden=\"true\" class=\"wp-block-spacer\"><\/div>\n\n\n<div class=\"su-divider su-divider-style-default\" style=\"margin:16px 0;border-width:1px;border-color:#000000\"><a href=\"#\" style=\"color:#000000\">Go to top<\/a><\/div>\n\n\n\n<h3 class=\"wp-block-heading\" id=\"conference_papers\">Conference Papers<\/h3>\n\n\n\n<ol class=\"wp-block-list\">\n<li>Naoki Murata, Yuhta Takida, Chieh-Hsin Lai, Toshimitsu Uesaka, Bac Nguyen, Stefano Ermon, <span style=\"text-decoration: underline;\">Yuki Mitsufuji<\/span>, \u201cGUDA: Counterfactual Group-wise Training Data Attribution for Diffusion Models via Unlearning,\u201d accepted at International Conference on Machine Learning (<mark style=\"background-color:rgba(0, 0, 0, 0)\" class=\"has-inline-color has-pale-pink-color\">ICML<\/mark>), 2026 [<a href=\"https:\/\/arxiv.org\/abs\/2601.22651\" target=\"_blank\" rel=\"noreferrer noopener\">arXiv<\/a>]<\/li>\n\n\n\n<li>Silin Gao, Hao Zhao, Zeming Chen, Sepideh Mamooler, Antara Raaghavi Bhattacharya, Qiyu Wu, Hiromi Wakaki, <span style=\"text-decoration: underline;\">Yuki Mitsufuji<\/span>, Li Mi, Syrielle Montariol, Antoine Bosselut, \u201cSchema-Guided World Modeling for Understanding Hierarchical Visual Dynamics,\u201d accepted at International Conference on Machine Learning (<mark style=\"background-color:rgba(0, 0, 0, 0)\" class=\"has-inline-color has-pale-pink-color\">ICML<\/mark>), 2026<\/li>\n\n\n\n<li>Geyang Guo, Hiromi Wakaki, <span style=\"text-decoration: underline;\">Yuki Mitsufuji<\/span>, Alan Ritter, Wei Xu, \u201cLearning to Route Languages for Multilingual Preference Optimization,\u201d accepted at International Conference on Machine Learning (<mark style=\"background-color:rgba(0, 0, 0, 0)\" class=\"has-inline-color has-pale-pink-color\">ICML<\/mark>), 2026<\/li>\n\n\n\n<li>Zheyuan Hu, Chieh-Hsin Lai, Ge Wu, <span style=\"text-decoration: underline;\">Yuki Mitsufuji<\/span>, Stefano Ermon, \u201cMeanFlow Transformers with Representation Autoencoders,\u201d accepted at IEEE\/CVF Conference on Computer Vision and Pattern Recognition (<mark style=\"background-color:rgba(0, 0, 0, 0)\" class=\"has-inline-color has-pale-pink-color\">CVPR<\/mark>), 2026 [<a href=\"https:\/\/arxiv.org\/abs\/2511.13019\" target=\"_blank\" rel=\"noreferrer noopener\">arXiv<\/a>][<a href=\"https:\/\/github.com\/sony\/mf-rae\" target=\"_blank\" rel=\"noreferrer noopener\">code<\/a>]<\/li>\n\n\n\n<li>Oh Hyun-Bin, Yuhta Takida, Toshimitsu Uesaka, Tae-Hyun Oh, <span style=\"text-decoration: underline;\">Yuki Mitsufuji<\/span>, \u201cPAVAS: Physics-Aware Video-to-Audio Synthesis,\u201d accepted at IEEE\/CVF Conference on Computer Vision and Pattern Recognition (<mark style=\"background-color:rgba(0, 0, 0, 0)\" class=\"has-inline-color has-pale-pink-color\">CVPR<\/mark>), 2026 [<a href=\"https:\/\/arxiv.org\/abs\/2512.08282\" target=\"_blank\" rel=\"noreferrer noopener\">arXiv<\/a>][<a href=\"https:\/\/physics-aware-video-to-audio-synthesis.github.io\/\" target=\"_blank\" rel=\"noreferrer noopener\">demo<\/a>] \u2013 <mark style=\"background-color:rgba(0, 0, 0, 0)\" class=\"has-inline-color has-pale-pink-color\">Oral<\/mark><\/li>\n\n\n\n<li>Christian Simon, Masato Ishii, Wei-Yao Wang, Koichi Saito, Akio Hayakawa, Dongseok Shim, Zhi Zhong, Shuyang Cui, Takashi Shibuya, Shusuke Takahashi, <span style=\"text-decoration: underline;\">Yuki Mitsufuji<\/span>, \u201cEchoes Over Time: Unlocking Length Generalization in Video-to-Audio Generation Models,\u201d accepted at IEEE\/CVF Conference on Computer Vision and Pattern Recognition (<mark style=\"background-color:rgba(0, 0, 0, 0)\" class=\"has-inline-color has-pale-pink-color\">CVPR<\/mark>), 2026 [<a href=\"https:\/\/arxiv.org\/abs\/2602.20981\" target=\"_blank\" rel=\"noreferrer noopener\">arXiv<\/a>][<a href=\"https:\/\/echoesovertime.github.io\/\" target=\"_blank\" rel=\"noreferrer noopener\">demo<\/a>]<\/li>\n\n\n\n<li>Honggyu An, Jaewoo Jung, Mungyeom Kim, Chaehyun Kim, Minkyeong Jeon, Jisang Han, Kazumi Fukuda, Takuya Narihira, Hyunah Ko, Junsu Kim, Sunghwan Hong, <span style=\"text-decoration: underline;\">Yuki Mitsufuji<\/span>, Seungryong Kim, \u201cLearning Concept 3D Representations from Feed-Forward Novel View Synthesis,\u201d accepted at IEEE\/CVF Conference on Computer Vision and Pattern Recognition (<mark style=\"background-color:rgba(0, 0, 0, 0)\" class=\"has-inline-color has-pale-pink-color\">CVPR<\/mark>), 2026 [<a href=\"https:\/\/arxiv.org\/abs\/2512.04021\" target=\"_blank\" rel=\"noreferrer noopener\">arXiv<\/a>][<a href=\"https:\/\/github.com\/cvlab-kaist\/C3G\" target=\"_blank\" rel=\"noreferrer noopener\">code<\/a>][<a href=\"https:\/\/cvlab-kaist.github.io\/C3G\/\" target=\"_blank\" rel=\"noreferrer noopener\">demo<\/a>]<\/li>\n\n\n\n<li>Kevin Rojas, Ye He, Chieh-Hsin Lai, Yuta Takida, <span style=\"text-decoration: underline;\">Yuki Mitsufuji<\/span>, Molei Tao, \u201cImproving Classifier-Free Guidance in Masked Diffusion: Low-Dim Theoretical Insights with High-Dim Impact,\u201d in Proc. International Conference on Learning Representations (<mark><mark style=\"background-color:rgba(0, 0, 0, 0)\" class=\"has-inline-color has-pale-pink-color\">ICLR<\/mark><\/mark>), 2026 [<a href=\"https:\/\/openreview.net\/forum?id=mMK9pvQJxf\" target=\"_blank\" rel=\"noreferrer noopener\">OpenReview<\/a>][<a href=\"https:\/\/arxiv.org\/abs\/2507.08965\" target=\"_blank\" rel=\"noreferrer noopener\">arXiv<\/a>]<\/li>\n\n\n\n<li>JoungBin Lee, Jaewoo Jung, Jisang Han, Takuya Narihira, Kazumi Fukuda, Junyoung Seo, Sunghwan Hong, <span style=\"text-decoration: underline;\">Yuki Mitsufuji<\/span>, Seungryong Kim, \u201c3D Scene Prompting for Scene-Consistent Camera-Controllable Video Generation,\u201d in Proc. International Conference on Learning Representations (<mark><mark style=\"background-color:rgba(0, 0, 0, 0)\" class=\"has-inline-color has-pale-pink-color\">ICLR<\/mark><\/mark>), 2026 [<a href=\"https:\/\/openreview.net\/forum?id=3XxoBwMusJ\" target=\"_blank\" rel=\"noreferrer noopener\">OpenReview<\/a>][<a href=\"https:\/\/arxiv.org\/abs\/2510.14945\" target=\"_blank\" rel=\"noreferrer noopener\">arXiv<\/a>][<a href=\"https:\/\/github.com\/cvlab-kaist\/3DScenePrompt\" target=\"_blank\" rel=\"noreferrer noopener\">code<\/a>][<a href=\"https:\/\/cvlab-kaist.github.io\/3DSchttps:\/\/cvlab-kaist.github.io\/3DScenePrompt\/enePrompt\/\" target=\"_blank\" rel=\"noreferrer noopener\">demo<\/a>]<\/li>\n\n\n\n<li>Seungheon Doh, Junghyun Koo, Marco A. Mart\u00ednez-Ram\u00edrez, Woosung Choi, Wei-Hsiang Liao, Qiyu Wu, Juhan Nam, <span style=\"text-decoration: underline;\">Yuki Mitsufuji<\/span>, \u201cLLM2Fx-Tools: Tool Calling For Music Post-Production,\u201d in Proc. International Conference on Learning Representations (<mark><mark style=\"background-color:rgba(0, 0, 0, 0)\" class=\"has-inline-color has-pale-pink-color\">ICLR<\/mark><\/mark>), 2026 [<a href=\"https:\/\/openreview.net\/forum?id=OyIJvyyB3R\" target=\"_blank\" rel=\"noreferrer noopener\">OpenReview<\/a>][<a href=\"https:\/\/arxiv.org\/abs\/2512.01559\" target=\"_blank\" rel=\"noreferrer noopener\">arXiv<\/a>][<a href=\"https:\/\/seungheondoh.github.io\/llm2fx-tools-demo\/\" target=\"_blank\" rel=\"noreferrer noopener\">demo<\/a>]<\/li>\n\n\n\n<li>Yuhta Takida, Satoshi Hayakawa, Takashi Shibuya, Masaaki Imaizumi, Naoki Murata, Bac Nguyen, Toshimitsu Uesaka, Chieh-Hsin Lai, <span style=\"text-decoration: underline;\">Yuki Mitsufuji<\/span>, \u201cSONA: Learning Conditional, Unconditional, and Matching-Aware Discriminator,\u201d in Proc. International Conference on Learning Representations (<mark><mark style=\"background-color:rgba(0, 0, 0, 0)\" class=\"has-inline-color has-pale-pink-color\">ICLR<\/mark><\/mark>), 2026 [<a href=\"https:\/\/openreview.net\/forum?id=lymykMnKBS\" target=\"_blank\" rel=\"noreferrer noopener\">OpenReview<\/a>][<a href=\"https:\/\/arxiv.org\/abs\/2510.04576\" target=\"_blank\" rel=\"noreferrer noopener\">arXiv<\/a>]<\/li>\n\n\n\n<li>Bac Nguyen, Yuhta Takida, Naoki Murata, Chieh-Hsin Lai, Toshimitsu Uesaka, Stefano Ermon, <span style=\"text-decoration: underline;\">Yuki Mitsufuji<\/span>, \u201cImproved Object-Centric Diffusion Learning with Registers and Contrastive Alignment,\u201d in Proc. International Conference on Learning Representations (<mark><mark style=\"background-color:rgba(0, 0, 0, 0)\" class=\"has-inline-color has-pale-pink-color\">ICLR<\/mark><\/mark>), 2026 [<a href=\"https:\/\/openreview.net\/forum?id=WQHj907qSI\" target=\"_blank\" rel=\"noreferrer noopener\">OpenReview<\/a>][<a href=\"https:\/\/arxiv.org\/abs\/2601.01224\" target=\"_blank\" rel=\"noreferrer noopener\">arXiv<\/a>]<\/li>\n\n\n\n<li>Yonghyun Park, Chieh-Hsin Lai, Satoshi Hayakawa, Yuhta Takida, Naoki Murata, Wei-Hsiang Liao, Woosung Choi, Kin Wai Cheuk, Junghyun Koo, <span style=\"text-decoration: underline;\">Yuki Mitsufuji<\/span>, \u201cConcept-TRAK: Understanding How Diffusion Models Learn Concepts through Concept-Level Attribution,\u201d in Proc. International Conference on Learning Representations (<mark><mark style=\"background-color:rgba(0, 0, 0, 0)\" class=\"has-inline-color has-pale-pink-color\">ICLR<\/mark><\/mark>), 2026 [<a href=\"https:\/\/openreview.net\/forum?id=TRmIcgMe8I\" target=\"_blank\" rel=\"noreferrer noopener\">OpenReview<\/a>][<a href=\"https:\/\/arxiv.org\/abs\/2507.06547\" target=\"_blank\" rel=\"noreferrer noopener\">arXiv<\/a>]<\/li>\n\n\n\n<li>Zheyuan Hu, Chieh-Hsin Lai, <span style=\"text-decoration: underline;\">Yuki Mitsufuji<\/span>, Stefano Ermon, \u201cCMT: Mid-Training for Efficient Learning of Consistency, Mean Flow, and Flow Map Models,\u201d in Proc. International Conference on Learning Representations (<mark><mark style=\"background-color:rgba(0, 0, 0, 0)\" class=\"has-inline-color has-pale-pink-color\">ICLR<\/mark><\/mark>), 2026 [<a href=\"https:\/\/openreview.net\/forum?id=2B8GkGTgmY\" target=\"_blank\" rel=\"noreferrer noopener\">OpenReview<\/a>][<a href=\"https:\/\/arxiv.org\/abs\/2509.24526\" target=\"_blank\" rel=\"noreferrer noopener\">arXiv<\/a>][<a href=\"https:\/\/github.com\/sony\/cmt\" target=\"_blank\" rel=\"noreferrer noopener\">code<\/a>]<\/li>\n\n\n\n<li>Wei-Yao Wang, Kazuya Tateishi, Qiyu Wu, Shusuke Takahashi, <span style=\"text-decoration: underline;\">Yuki Mitsufuji<\/span>, \u201cVIRTUE: Visual-Interactive Text-Image Universal Embedder,\u201d in Proc. International Conference on Learning Representations (<mark><mark style=\"background-color:rgba(0, 0, 0, 0)\" class=\"has-inline-color has-pale-pink-color\">ICLR<\/mark><\/mark>), 2026 [<a href=\"https:\/\/openreview.net\/forum?id=H4RgGzx4iL\" target=\"_blank\" rel=\"noreferrer noopener\">OpenReview<\/a>][<a href=\"https:\/\/arxiv.org\/abs\/2510.00523\" target=\"_blank\" rel=\"noreferrer noopener\">arXiv<\/a>]<\/li>\n\n\n\n<li>Satvik Dixit, Koichi Saito, Zhi Zhong, <span style=\"text-decoration: underline;\">Yuki Mitsufuji<\/span>, Chris Donahue, \u201cFoleyBench: A Benchmark For Video-to-Audio Models,\u201d in Proc. International Conference on Acoustics, Speech, and Signal Processing (<mark style=\"background-color:rgba(0, 0, 0, 0)\" class=\"has-inline-color has-pale-pink-color\">ICASSP<\/mark>), pp. 14512\u201314516, 2026 [<a href=\"https:\/\/ieeexplore.ieee.org\/document\/11461920\" target=\"_blank\" rel=\"noreferrer noopener\">IEEE<\/a>][<a href=\"https:\/\/arxiv.org\/abs\/2511.13219\" target=\"_blank\" rel=\"noreferrer noopener\">arXiv<\/a>][<a href=\"https:\/\/huggingface.co\/datasets\/FoleyBench\/foleybench\" target=\"_blank\" rel=\"noreferrer noopener\">dataset<\/a>][<a href=\"https:\/\/gclef-cmu.org\/foleybench\/\" target=\"_blank\" rel=\"noreferrer noopener\">demo<\/a>]<\/li>\n\n\n\n<li>Eloi Moliner, Marco A. Mart\u00ednez-Ram\u00edrez, Junghyun Koo, Wei-Hsiang Liao, Kin Wai Cheuk, Joan Serr\u00e0, Vesa V\u00e4lim\u00e4ki, <span style=\"text-decoration: underline;\">Yuki Mitsufuji<\/span>, \u201cAutomatic Music Mixing Using a Generative Model of Effect Embeddings,\u201d in Proc. International Conference on Acoustics, Speech, and Signal Processing (<mark style=\"background-color:rgba(0, 0, 0, 0)\" class=\"has-inline-color has-pale-pink-color\">ICASSP<\/mark>), pp. 14582\u201314586, 2026 [<a href=\"https:\/\/ieeexplore.ieee.org\/document\/11462677\" target=\"_blank\" rel=\"noreferrer noopener\">IEEE<\/a>][<a href=\"https:\/\/arxiv.org\/abs\/2511.08040\" target=\"_blank\" rel=\"noreferrer noopener\">arXiv<\/a>][<a href=\"https:\/\/github.com\/SonyResearch\/megami\" target=\"_blank\" rel=\"noreferrer noopener\">code<\/a>][<a href=\"https:\/\/sonyresearch.github.io\/MEGAMI\/\" target=\"_blank\" rel=\"noreferrer noopener\">demo<\/a>]<\/li>\n\n\n\n<li>Alain Riou, Joan Serr\u00e0, <span style=\"text-decoration: underline;\">Yuki Mitsufuji<\/span>, \u201cAutomatic Music Sample Identification with Multi-Track Contrastive Learning,\u201d in Proc. International Conference on Acoustics, Speech, and Signal Processing (<mark style=\"background-color:rgba(0, 0, 0, 0)\" class=\"has-inline-color has-pale-pink-color\">ICASSP<\/mark>), 14587\u201314591, 2026 [<a href=\"https:\/\/ieeexplore.ieee.org\/document\/11461576\" target=\"_blank\" rel=\"noreferrer noopener\">IEEE<\/a>][<a href=\"https:\/\/arxiv.org\/abs\/2510.11507\" target=\"_blank\" rel=\"noreferrer noopener\">arXiv<\/a>][<a href=\"https:\/\/github.com\/sony\/sampleid\/\" target=\"_blank\" rel=\"noreferrer noopener\">code<\/a>]<\/li>\n\n\n\n<li>Zachary Novack, Koichi Saito, Zhi Zhong, Takashi Shibuya, Shuyang Cui, Julian McAuley, Taylor Berg-Kirkpatrick, Christian Simon, Shusuke Takahashi, <span style=\"text-decoration: underline;\">Yuki Mitsufuji<\/span>, \u201cFlashFoley: Fast Interactive Sketch2Audio Generation,\u201d in Proc. International Conference on Acoustics, Speech, and Signal Processing (<mark style=\"background-color:rgba(0, 0, 0, 0)\" class=\"has-inline-color has-pale-pink-color\">ICASSP<\/mark>), pp. 15497\u201315501, 2026 [<a href=\"https:\/\/ieeexplore.ieee.org\/document\/11465146\" target=\"_blank\" rel=\"noreferrer noopener\">IEEE<\/a>][<a href=\"https:\/\/anonaudiogen.github.io\/web\/\" target=\"_blank\" rel=\"noreferrer noopener\">demo<\/a>]<\/li>\n\n\n\n<li>Eleonora Mancini, Joan Serr\u00e0, Paolo Torroni, <span style=\"text-decoration: underline;\">Yuki Mitsufuji<\/span>, \u201cLeveraging Whisper Embeddings for Audio-based Lyrics Matching,\u201d in Proc. International Conference on Acoustics, Speech, and Signal Processing (<mark style=\"background-color:rgba(0, 0, 0, 0)\" class=\"has-inline-color has-pale-pink-color\">ICASSP<\/mark>), pp. 15967\u201315971, 2026 [<a href=\"https:\/\/ieeexplore.ieee.org\/document\/11461231\" target=\"_blank\" rel=\"noreferrer noopener\">IEEE<\/a>][<a href=\"https:\/\/arxiv.org\/abs\/2510.08176\" target=\"_blank\" rel=\"noreferrer noopener\">arXiv<\/a>][<a href=\"https:\/\/github.com\/helemanc\/audio-based-lyrics-matching\" target=\"_blank\" rel=\"noreferrer noopener\">code<\/a>]<\/li>\n\n\n\n<li>Akira Takahashi, Shusuke Takahashi, <span style=\"text-decoration: underline;\">Yuki Mitsufuji<\/span>, \u201cMMAudioSep: Taming Video-to-Audio Generative Model Towards Video\/Text-Queried Sound Separation,\u201d in Proc. International Conference on Acoustics, Speech, and Signal Processing (<mark style=\"background-color:rgba(0, 0, 0, 0)\" class=\"has-inline-color has-pale-pink-color\">ICASSP<\/mark>), pp. 15667\u201315671, 2026 [<a href=\"https:\/\/ieeexplore.ieee.org\/document\/11462711\" target=\"_blank\" rel=\"noreferrer noopener\">IEEE<\/a>][<a href=\"https:\/\/arxiv.org\/abs\/2510.09065\" target=\"_blank\" rel=\"noreferrer noopener\">arXiv<\/a>][<a href=\"https:\/\/github.com\/sony\/mmaudiosep\" target=\"_blank\" rel=\"noreferrer noopener\">code<\/a>]<\/li>\n\n\n\n<li>Azalea Gui, Woosung Choi, Junghyun Koo, Kazuki Shimada, Takashi Shibuya, Joan Serr\u00e0, Wei-Hsiang Liao, <span style=\"text-decoration: underline;\">Yuki Mitsufuji<\/span>, \u201cTowards Blind Data Cleaning: A Case Study in Music Source Separation,\u201d in Proc. International Conference on Acoustics, Speech, and Signal Processing (<mark style=\"background-color:rgba(0, 0, 0, 0)\" class=\"has-inline-color has-pale-pink-color\">ICASSP<\/mark>), pp. 14882\u201314886, 2026 [<a href=\"https:\/\/ieeexplore.ieee.org\/document\/11462165\" target=\"_blank\" rel=\"noreferrer noopener\">IEEE<\/a>][<a href=\"https:\/\/arxiv.org\/abs\/2510.15409\" target=\"_blank\" rel=\"noreferrer noopener\">arXiv<\/a>]<\/li>\n\n\n\n<li>Kazuki Shimada, Christian Simon, Takashi Shibuya, Shusuke Takahashi, <span style=\"text-decoration: underline;\">Yuki Mitsufuji<\/span>, \u201cSAVGBench: Benchmarking Spatially Aligned Audio-Video Generation,\u201d in Proc. International Conference on Acoustics, Speech, and Signal Processing (<mark style=\"background-color:rgba(0, 0, 0, 0)\" class=\"has-inline-color has-pale-pink-color\">ICASSP<\/mark>), pp. 11977\u201311981, 2026 [<a href=\"https:\/\/ieeexplore.ieee.org\/document\/11464978\" target=\"_blank\" rel=\"noreferrer noopener\">IEEE<\/a>][<a href=\"https:\/\/arxiv.org\/abs\/2412.13462\" target=\"_blank\" rel=\"noreferrer noopener\">arXiv<\/a>][<a href=\"https:\/\/github.com\/SonyResearch\/SAVGBench\" target=\"_blank\" rel=\"noreferrer noopener\">code<\/a>][<a href=\"https:\/\/zenodo.org\/records\/17139882\" target=\"_blank\" rel=\"noreferrer noopener\">dataset<\/a>]<\/li>\n\n\n\n<li>Keisuke Toyama, Zhi Zhong, Akira Takahashi, Shusuke Takahashi, <span style=\"text-decoration: underline;\">Yuki Mitsufuji<\/span>, \u201cDo Foundational Audio Encoders Understand Music Structure?,\u201d in Proc. International Conference on Acoustics, Speech, and Signal Processing (<mark style=\"background-color:rgba(0, 0, 0, 0)\" class=\"has-inline-color has-pale-pink-color\">ICASSP<\/mark>), pp. 15242\u201315246, 2026 [<a href=\"https:\/\/ieeexplore.ieee.org\/document\/11464137\" target=\"_blank\" rel=\"noreferrer noopener\">IEEE<\/a>][<a href=\"https:\/\/arxiv.org\/abs\/2512.17209\" target=\"_blank\" rel=\"noreferrer noopener\">arXiv<\/a>][<a href=\"https:\/\/github.com\/sony\/MSA-bench\" target=\"_blank\" rel=\"noreferrer noopener\">code<\/a>]<\/li>\n\n\n\n<li>Fabio Morreale, Joan Serr\u00e0, <span style=\"text-decoration: underline;\">Yuki Mitsufuji<\/span>, \u201cEmergent, not Immanent: A Baradian Reading of Explainable AI,\u201d in Proc. the ACM Conference on Human Factors in Computing Systems (<mark style=\"background-color:rgba(0, 0, 0, 0)\" class=\"has-inline-color has-pale-pink-color\">CHI<\/mark>), pp. 1\u201315, 2026 [<a href=\"https:\/\/dl.acm.org\/doi\/10.1145\/3772318.3790725\" target=\"_blank\" rel=\"noreferrer noopener\">ACM<\/a>][<a href=\"https:\/\/arxiv.org\/abs\/2601.15029\" target=\"_blank\" rel=\"noreferrer noopener\">arXiv<\/a>]<\/li>\n\n\n\n<li>Xinlei Niu, Kin Wai Cheuk, Jing Zhang, Naoki Murata, Chieh-Hsin Lai, Michele Mancusi, Woosung Choi, Giorgio Fabbro, Wei-Hsiang Liao, Charles Patrick Martin, <span style=\"text-decoration: underline;\">Yuki Mitsufuji<\/span>, \u201cSteerMusic: Enhanced Musical Consistency for Zero-shot Text-Guided and Personalized Music Editing,\u201d in Proc. Annual AAAI Conference on Artificial Intelligence (<mark style=\"background-color:rgba(0, 0, 0, 0)\" class=\"has-inline-color has-pale-pink-color\">AAAI<\/mark>), pp. 2000\u20132010, 2026 [<a href=\"https:\/\/ojs.aaai.org\/index.php\/AAAI\/article\/view\/37181\" target=\"_blank\" rel=\"noreferrer noopener\">AAAI<\/a>][<a href=\"https:\/\/arxiv.org\/abs\/2504.10826\" target=\"_blank\" rel=\"noreferrer noopener\">arXiv<\/a>][<a href=\"https:\/\/github.com\/sony\/steermusic\/\" target=\"_blank\" rel=\"noreferrer noopener\">code<\/a>][<a href=\"https:\/\/steermusic.pages.dev\/\" target=\"_blank\" rel=\"noreferrer noopener\">demo<\/a>]<\/li>\n\n\n\n<li>Junyoung Seo, Jisang Han, Jaewoo Jung, Siyoon Jin, Joungbin Lee, Takuya Narihira, Kazumi Fukuda, Takashi Shibuya, Donghoon Ahn, Shoukang Hu, Seungryong Kim, <span style=\"text-decoration: underline;\">Yuki Mitsufuji<\/span>, \u201cVideo Camera Trajectory Editing with Generative Rendering from Estimated Geometry,\u201d in Proc. Annual AAAI Conference on Artificial Intelligence (<mark style=\"background-color:rgba(0, 0, 0, 0)\" class=\"has-inline-color has-pale-pink-color\">AAAI<\/mark>), pp.8787\u20138795, 2026 [<a href=\"https:\/\/ojs.aaai.org\/index.php\/AAAI\/article\/view\/37832\" target=\"_blank\" rel=\"noreferrer noopener\">AAAI<\/a>][<a href=\"https:\/\/arxiv.org\/abs\/2506.13697\" target=\"_blank\" rel=\"noreferrer noopener\">arXiv<\/a>][<a href=\"https:\/\/github.com\/cvlab-kaist\/Vid-CamEdit\" target=\"_blank\" rel=\"noreferrer noopener\">code<\/a>][<a href=\"https:\/\/cvlab-kaist.github.io\/Vid-CamEdit\/\" target=\"_blank\" rel=\"noreferrer noopener\">demo<\/a>]<\/li>\n\n\n\n<li>Woosung Choi, Junghyun Koo, Kin Wai Cheuk, Joan Serr\u00e0, Marco A. Mart\u00ednez-Ram\u00edrez, Yukara Ikemiya, Naoki Murata, Yuhta Takida, Wei-Hsiang Liao, <span style=\"text-decoration: underline;\">Yuki Mitsufuji<\/span>, \u201cLarge-Scale Training Data Attribution for Music Generative Models via Unlearning,\u201d accepted at Neural Information Processing Systems (<mark style=\"background-color:rgba(0, 0, 0, 0)\" class=\"has-inline-color has-pale-pink-color\">NeurIPS<\/mark>), 2025 [<a href=\"https:\/\/openreview.net\/forum?id=qj3ps8lNIf\" target=\"_blank\" rel=\"noreferrer noopener\">OpenReview<\/a>][<a href=\"https:\/\/arxiv.org\/abs\/2506.18312\" target=\"_blank\" rel=\"noreferrer noopener\">arXiv<\/a>]<\/li>\n\n\n\n<li>Yonghyun Kim, Wayne Chi, Anastasios N. Angelopoulos, Wei-Lin Chiang, Koichi Saito, Shinji Watanabe, <span style=\"text-decoration: underline;\">Yuki Mitsufuji<\/span>, Chris Donahue, \u201cMusic Arena: Live Evaluation for Text-to-Music,\u201d accepted at Neural Information Processing Systems (<mark style=\"background-color:rgba(0, 0, 0, 0)\" class=\"has-inline-color has-pale-pink-color\">NeurIPS<\/mark>), 2025 [<a href=\"https:\/\/openreview.net\/forum?id=kMrpfLjIlM\" target=\"_blank\" rel=\"noreferrer noopener\">OpenReview<\/a>][<a href=\"https:\/\/arxiv.org\/abs\/2507.20900\" target=\"_blank\" rel=\"noreferrer noopener\">arXiv<\/a>][<a href=\"https:\/\/github.com\/gclef-cmu\/music-arena\" target=\"_blank\" rel=\"noreferrer noopener\">code<\/a>][<a href=\"http:\/\/music-arena.org\/\" target=\"_blank\" rel=\"noreferrer noopener\">demo<\/a>]<\/li>\n\n\n\n<li>Chihiro Nagashima, Akira Takahashi, Zhi Zhong, Shusuke Takahashi, <span style=\"text-decoration: underline;\">Yuki Mitsufuji<\/span>, \u201cStudies for: A Human-AI Co-Creative Sound Artwork Using a Real-time Multi-channel Sound Generation Model,\u201d accepted at Neural Information Processing Systems (<mark style=\"background-color:rgba(0, 0, 0, 0)\" class=\"has-inline-color has-pale-pink-color\">NeurIPS<\/mark>), 2025 [<a href=\"https:\/\/openreview.net\/forum?id=bIiWZyrS6a\" target=\"_blank\" rel=\"noreferrer noopener\">OpenReview<\/a>][<a href=\"https:\/\/arxiv.org\/abs\/2510.25228\" target=\"_blank\" rel=\"noreferrer noopener\">arXiv<\/a>][<a href=\"https:\/\/sony.github.io\/studies-for\/\" target=\"_blank\" rel=\"noreferrer noopener\">demo<\/a>][<a href=\"https:\/\/www.ntticc.or.jp\/en\/hive\/artist-talk\/20250215\/\" target=\"_blank\" rel=\"noreferrer noopener\">video<\/a>]<\/li>\n\n\n\n<li>Jisang Han, Honggyu An, Jaewoo Jung, Takuya Narihira, Junyoung Seo, Kazumi Fukuda, Chaehyun Kim, Sunghwan Hong, <span style=\"text-decoration: underline;\">Yuki Mitsufuji<\/span>, Seungryong Kim, \u201cEnhancing 3D Reconstruction for Dynamic Scenes,\u201d accepted at Neural Information Processing Systems (<mark style=\"background-color:rgba(0, 0, 0, 0)\" class=\"has-inline-color has-pale-pink-color\">NeurIPS<\/mark>), 2025 [<a href=\"https:\/\/openreview.net\/forum?id=oBOVYRRSy2\" target=\"_blank\" rel=\"noreferrer noopener\">OpenReview<\/a>][<a href=\"https:\/\/arxiv.org\/abs\/2504.06264\" target=\"_blank\" rel=\"noreferrer noopener\">arXiv<\/a>][<a href=\"https:\/\/cvlab-kaist.github.io\/DDUSt3R\/\" target=\"_blank\" rel=\"noreferrer noopener\">demo<\/a>]<\/li>\n\n\n\n<li>Jiaben Chen,&nbsp;Zixin Wang,&nbsp;Ailing Zeng,&nbsp;Yang Fu,&nbsp;Xueyang Yu,&nbsp;Siyuan Cen,&nbsp;Julian Tanke,&nbsp;Yihang Chen,&nbsp;Koichi Saito,&nbsp;<span style=\"text-decoration: underline;\">Yuki Mitsufuji<\/span>,&nbsp;Chuang Gan, \u201cTalkCuts: A Large-Scale Dataset for Multi-Shot Human Speech Video Generation,\u201d accepted at Neural Information Processing Systems (<mark style=\"background-color:rgba(0, 0, 0, 0)\" class=\"has-inline-color has-pale-pink-color\">NeurIPS<\/mark>), 2025 [<a href=\"https:\/\/openreview.net\/forum?id=4a0w7AkrY7\" target=\"_blank\" rel=\"noreferrer noopener\">OpenReview<\/a>][<a href=\"https:\/\/arxiv.org\/abs\/2510.07249\" target=\"_blank\" rel=\"noreferrer noopener\">arXiv<\/a>][<a href=\"https:\/\/www.kaggle.com\/datasets\/f6e549a12ebd5ee185dc27247602d6e3828b772a68bae1f080587a6b84fafbbd\" target=\"_blank\" rel=\"noreferrer noopener\">dataset<\/a>][<a href=\"https:\/\/talkcuts.github.io\/\" target=\"_blank\" rel=\"noreferrer noopener\">demo<\/a>]<\/li>\n\n\n\n<li>Jihui (Aimee) Zhang, Thushara D. Abhayapala, Naoki Murata, Prasanga N. Samarasinghe, Yu Maeno, <span style=\"text-decoration: underline;\">Yuki Mitsufuji<\/span>, \u201cPerformance Analysis of Active Noise Control over a Spatial Region,\u201d in Proc. Asia Pacific Signal and Information Processing Association Annual Summit and Conference (<mark style=\"background-color:rgba(0, 0, 0, 0)\" class=\"has-inline-color has-pale-pink-color\">APSIPA ASC<\/mark>), 2025 [<a href=\"https:\/\/ieeexplore.ieee.org\/document\/11249302\" target=\"_blank\" rel=\"noreferrer noopener\">IEEE<\/a>]<\/li>\n\n\n\n<li>Zhuoyuan Mao, Mengjie Zhao, Qiyu Wu, Hiromi Wakaki, <span style=\"text-decoration: underline;\">Yuki Mitsufuji<\/span>, \u201cDeepResonance: Enhancing Multimodal Music Understanding via Music-centric Multi-way Instruction Tuning,\u201d in Proc. Conference on Empirical Methods in Natural Language Processing (<mark style=\"background-color:rgba(0, 0, 0, 0)\" class=\"has-inline-color has-pale-pink-color\">EMNLP<\/mark>), pp. 12937\u201312959, 2025 [<a href=\"https:\/\/aclanthology.org\/2025.emnlp-main.653\/\" target=\"_blank\" rel=\"noreferrer noopener\">ACL<\/a>][<a href=\"https:\/\/arxiv.org\/abs\/2502.12623\" target=\"_blank\" rel=\"noreferrer noopener\">arXiv<\/a>][<a href=\"https:\/\/github.com\/sony\/DeepResonance\" target=\"_blank\" rel=\"noreferrer noopener\">code<\/a>][<a href=\"https:\/\/huggingface.co\/datasets\/Sony\/DeepResonance_data_models\/tree\/main\/data\" target=\"_blank\" rel=\"noreferrer noopener\">dataset<\/a>]<\/li>\n\n\n\n<li>Geyang Guo, Tarek Naous, Hiromi Wakaki, Yukiko Nishimura, <span style=\"text-decoration: underline;\">Yuki Mitsufuji<\/span>, Alan Ritter, Wei Xu, \u201cCARE: Aligning Language Models for Regional Cultural Awareness,\u201d in Proc. Conference on Empirical Methods in Natural Language Processing (<mark style=\"background-color:rgba(0, 0, 0, 0)\" class=\"has-inline-color has-pale-pink-color\">EMNLP<\/mark>), pp. 32854\u201332883, 2025 [<a href=\"https:\/\/aclanthology.org\/2025.emnlp-main.1669\/\" target=\"_blank\" rel=\"noreferrer noopener\">ACL<\/a>][<a href=\"https:\/\/arxiv.org\/abs\/2504.05154\" target=\"_blank\" rel=\"noreferrer noopener\">arXiv<\/a>][<a href=\"https:\/\/github.com\/Guochry\/CARE\" target=\"_blank\" rel=\"noreferrer noopener\">code<\/a>][<a href=\"https:\/\/huggingface.co\/datasets\/geyang627\/CARE\" target=\"_blank\" rel=\"noreferrer noopener\">dataset<\/a>]<\/li>\n\n\n\n<li>Yuanhong Chen, Kazuki Shimada, Christian Simon, Yukara Ikemiya, Takashi Shibuya, <span style=\"text-decoration: underline;\">Yuki Mitsufuji<\/span>, \u201cCCStereo: Audio-Visual Contextual and Contrastive Learning for Binaural Audio Generation,\u201d in Proc. ACM Multimedia (<mark style=\"background-color:rgba(0, 0, 0, 0)\" class=\"has-inline-color has-pale-pink-color\">ACMMM<\/mark>), pp.7510\u20137518, 2025 [<a href=\"https:\/\/dl.acm.org\/doi\/10.1145\/3746027.3754919\" target=\"_blank\" rel=\"noreferrer noopener\">ACM<\/a>][<a href=\"https:\/\/arxiv.org\/abs\/2501.02786\" target=\"_blank\" rel=\"noreferrer noopener\">arXiv<\/a>][<a href=\"https:\/\/github.com\/SonyResearch\/CCStereo\" target=\"_blank\" rel=\"noreferrer noopener\">code<\/a>] \u2013 <mark style=\"background-color:rgba(0, 0, 0, 0)\" class=\"has-inline-color has-pale-pink-color\">ACM Showcase<\/mark> [<a href=\"https:\/\/www.growkudos.com\/publications\/10.1145%25252F3746027.3754919\/reader\" target=\"_blank\" rel=\"noreferrer noopener\">URL<\/a>]<\/li>\n\n\n\n<li>Shuichiro Nishigori, Koichi Saito, Naoki Murata, Masato Hirano, Shusuke Takahashi, <span style=\"text-decoration: underline;\">Yuki Mitsufuji<\/span>, \u201cSchr\u00f6dinger Bridge Consistency Trajectory Models for Speech Enhancement,\u201d in Proc. IEEE Workshop on Applications of Signal Processing to Audio and Acoustics (<mark style=\"background-color:rgba(0, 0, 0, 0)\" class=\"has-inline-color has-pale-pink-color\">WASPAA<\/mark>), 2025 [<a href=\"https:\/\/ieeexplore.ieee.org\/document\/11231011\" target=\"_blank\" rel=\"noreferrer noopener\">IEEE<\/a>][<a href=\"https:\/\/arxiv.org\/abs\/2507.11925\" target=\"_blank\" rel=\"noreferrer noopener\">arXiv<\/a>][<a href=\"https:\/\/github.com\/sony\/sbctm\/\" target=\"_blank\" rel=\"noreferrer noopener\">code<\/a>]<\/li>\n\n\n\n<li>Chin-Yun Yu, Marco A. Mart\u00ednez-Ram\u00edrez, Junghyun Koo, Wei-Hsiang Liao, <span style=\"text-decoration: underline;\">Yuki Mitsufuji<\/span>, Gy\u00f6rgy Fazekas, \u201cImproving Inference-Time Optimisation for Vocal Effects Style Transfer with a Gaussian Prior,\u201d in Proc. IEEE Workshop on Applications of Signal Processing to Audio and Acoustics (<mark style=\"background-color:rgba(0, 0, 0, 0)\" class=\"has-inline-color has-pale-pink-color\">WASPAA<\/mark>), 2025 [<a href=\"https:\/\/ieeexplore.ieee.org\/document\/11231006\" target=\"_blank\" rel=\"noreferrer noopener\">IEEE<\/a>][<a href=\"https:\/\/arxiv.org\/abs\/2505.11315\" target=\"_blank\" rel=\"noreferrer noopener\">arXiv<\/a>][<a href=\"https:\/\/github.com\/SonyResearch\/diffvox\" target=\"_blank\" rel=\"noreferrer noopener\">code<\/a>]<\/li>\n\n\n\n<li>Zhi Zhong, Akira Takahashi, Shuyang Cui, Keisuke Toyama, Shusuke Takahashi, <span style=\"text-decoration: underline;\">Yuki Mitsufuji<\/span>, \u201cSpecMaskFoley: Steering Pretrained Spectral Masked Generative Transformer Toward Synchronized Video-to-audio Synthesis via ControlNet,\u201d in Proc. IEEE Workshop on Applications of Signal Processing to Audio and Acoustics (<mark style=\"background-color:rgba(0, 0, 0, 0)\" class=\"has-inline-color has-pale-pink-color\">WASPAA<\/mark>), 2025 [<a href=\"https:\/\/ieeexplore.ieee.org\/document\/11230970\" target=\"_blank\" rel=\"noreferrer noopener\">IEEE<\/a>][<a href=\"https:\/\/arxiv.org\/abs\/2505.16195\" target=\"_blank\" rel=\"noreferrer noopener\">arXiv<\/a>][<a href=\"https:\/\/zzaudio.github.io\/SpecMaskFoley_Demo\/\" target=\"_blank\" rel=\"noreferrer noopener\">demo<\/a>]<\/li>\n\n\n\n<li>Seungheon Doh, Junghyun Koo, Marco A. Mart\u00ednez-Ram\u00edrez, Wei-Hsiang Liao, Juhan Nam, <span style=\"text-decoration: underline;\">Yuki Mitsufuji<\/span>, \u201cCan Large Language Models Predict Audio Effects Parameters from Natural Language?,\u201d in Proc. IEEE Workshop on Applications of Signal Processing to Audio and Acoustics (<mark style=\"background-color:rgba(0, 0, 0, 0)\" class=\"has-inline-color has-pale-pink-color\">WASPAA<\/mark>), 2025 [<a href=\"https:\/\/ieeexplore.ieee.org\/document\/11230953\" target=\"_blank\" rel=\"noreferrer noopener\">IEEE<\/a>][<a href=\"https:\/\/arxiv.org\/abs\/2505.20770\" target=\"_blank\" rel=\"noreferrer noopener\">arXiv<\/a>][<a href=\"https:\/\/github.com\/SonyResearch\/LLM2Fx\" target=\"_blank\" rel=\"noreferrer noopener\">code<\/a>][<a href=\"https:\/\/huggingface.co\/collections\/seungheondoh\/llm2fx-6821b961b982fe1eab1b00bf\" target=\"_blank\" rel=\"noreferrer noopener\">dataset<\/a>][<a href=\"https:\/\/seungheondoh.github.io\/llm2fx-demo\/\" target=\"_blank\" rel=\"noreferrer noopener\">demo<\/a>]<\/li>\n\n\n\n<li>Christian Simon, Masato Ishii, Akio Hayakawa, Zhi Zhong, Shusuke Takahashi, Takashi Shibuya, <span style=\"text-decoration: underline;\">Yuki Mitsufuji<\/span>, \u201cTITAN-Guide: Taming Inference-Time Alignment for Guided Text-to-Video Diffusion Models,\u201d in Proc. International Conference on Computer Vision (<mark><mark style=\"background-color:rgba(0, 0, 0, 0)\" class=\"has-inline-color has-pale-pink-color\">ICCV<\/mark><\/mark>), pp. 16662\u201316671, 2025 [<a href=\"https:\/\/openaccess.thecvf.com\/content\/ICCV2025\/html\/Simon_TITAN-Guide_Taming_Inference-Time_Alignment_for_Guided_Text-to-Video_Diffusion_Models_ICCV_2025_paper.html\" target=\"_blank\" rel=\"noreferrer noopener\">CVF<\/a>][<a href=\"https:\/\/arxiv.org\/abs\/2508.00289\" target=\"_blank\" rel=\"noreferrer noopener\">arXiv<\/a>][<a href=\"https:\/\/titanguide.github.io\/\" target=\"_blank\" rel=\"noreferrer noopener\">demo<\/a>]<\/li>\n\n\n\n<li>Zerui Tao, Yuhta Takida, Naoki Murata, Qibin Zhao, <span style=\"text-decoration: underline;\">Yuki Mitsufuji<\/span>, \u201cTransformed Low-rank Adaptation via Tensor Decomposition and Its Applications to Text-to-Image Models,\u201d in Proc. International Conference on Computer Vision (<mark><mark style=\"background-color:rgba(0, 0, 0, 0)\" class=\"has-inline-color has-pale-pink-color\">ICCV<\/mark><\/mark>), pp. 16333\u201316344, 2025 [<a href=\"https:\/\/openaccess.thecvf.com\/content\/ICCV2025\/html\/Tao_Transformed_Low-rank_Adaptation_via_Tensor_Decomposition_and_Its_Applications_to_ICCV_2025_paper.html\" target=\"_blank\" rel=\"noreferrer noopener\">CVF<\/a>][<a href=\"https:\/\/arxiv.org\/abs\/2501.08727\" target=\"_blank\" rel=\"noreferrer noopener\">arXiv<\/a>][<a href=\"https:\/\/github.com\/taozerui\/tlora_diffusion\" target=\"_blank\" rel=\"noreferrer noopener\">code<\/a>]<\/li>\n\n\n\n<li>Yichen Huang, Zachary Novack, Koichi Saito, Jiatong Shi, Shinji Watanabe, <span style=\"text-decoration: underline;\">Yuki Mitsufuji<\/span>, John Thickstun, Chris Donahue, \u201cAligning Text-to-Music Evaluation with Human Preferences,\u201d in Proc. International Society for Music Information Retrieval (<mark><mark style=\"background-color:rgba(0, 0, 0, 0)\" class=\"has-inline-color has-pale-pink-color\">ISMIR<\/mark><\/mark>) Conference, 2025 [<a href=\"https:\/\/ismir2025program.ismir.net\/poster_314.html\" target=\"_blank\" rel=\"noreferrer noopener\">ISMIR<\/a>][<a href=\"https:\/\/arxiv.org\/abs\/2503.16669\" target=\"_blank\" rel=\"noreferrer noopener\">arXiv<\/a>][<a href=\"https:\/\/github.com\/i-need-sleep\/mad\" target=\"_blank\" rel=\"noreferrer noopener\">code<\/a>][<a href=\"https:\/\/huggingface.co\/datasets\/i-need-sleep\/musicprefs\" target=\"_blank\" rel=\"noreferrer noopener\">dataset<\/a>][<a href=\"https:\/\/mad-metric-83cde1d399d1.herokuapp.com\/\" target=\"_blank\" rel=\"noreferrer noopener\">demo<\/a>]<\/li>\n\n\n\n<li>Junghyun Koo, Marco A. Mart\u00ednez-Ram\u00edrez, Wei-Hsiang Liao, Giorgio Fabbro, Michele Mancusi, <span style=\"text-decoration: underline;\">Yuki Mitsufuji<\/span>, \u201cITO-Master: Inference-Time Optimization for Audio Effects Modeling of Music Mastering Processors,\u201d in Proc. International Society for Music Information Retrieval (<mark><mark style=\"background-color:rgba(0, 0, 0, 0)\" class=\"has-inline-color has-pale-pink-color\">ISMIR<\/mark><\/mark>) Conference, 2025 [<a href=\"https:\/\/ismir2025program.ismir.net\/poster_59.html\" target=\"_blank\" rel=\"noreferrer noopener\">ISMIR<\/a>][<a href=\"https:\/\/arxiv.org\/abs\/2506.16889\" target=\"_blank\" rel=\"noreferrer noopener\">arXiv<\/a>][<a href=\"https:\/\/github.com\/SonyResearch\/ITO-Master\" target=\"_blank\" rel=\"noreferrer noopener\">code<\/a>]<\/li>\n\n\n\n<li>Yen-Tung Yeh, Junghyun Koo, Marco Mart\u00ednez-Ram\u00edrez, Wei-Hsiang Liao, Yi-Hsuan Yang, <span style=\"text-decoration: underline;\">Yuki Mitsufuji<\/span>, \u201cFx-Encoder++: Extracting Instrument-Wise Audio Effect Representations from Mixtures,\u201d in Proc. International Society for Music Information Retrieval (<mark><mark style=\"background-color:rgba(0, 0, 0, 0)\" class=\"has-inline-color has-pale-pink-color\">ISMIR<\/mark><\/mark>) Conference, 2025 [<a href=\"https:\/\/ismir2025program.ismir.net\/poster_148.html\" target=\"_blank\" rel=\"noreferrer noopener\">ISMIR<\/a>][<a href=\"https:\/\/arxiv.org\/abs\/2507.02273\" target=\"_blank\" rel=\"noreferrer noopener\">arXiv<\/a>][<a href=\"https:\/\/github.com\/SonyResearch\/Fx-Encoder_PlusPlus\" target=\"_blank\" rel=\"noreferrer noopener\">code<\/a>]<\/li>\n\n\n\n<li>Recep Oguz Araz, Guillem Cort\u00e8s-Sebasti\u00e0, Emilio Molina, Joan Serr\u00e0, Xavier Serra, <span style=\"text-decoration: underline;\">Yuki Mitsufuji<\/span>, Dmitry Bogdanov, \u201cEnhancing Neural Audio Fingerprint Robustness to Audio Degradation for Music Identification,\u201d in Proc. International Society for Music Information Retrieval (<mark><mark style=\"background-color:rgba(0, 0, 0, 0)\" class=\"has-inline-color has-pale-pink-color\">ISMIR<\/mark><\/mark>) Conference, 2025 [<a href=\"https:\/\/ismir2025program.ismir.net\/poster_186.html\" target=\"_blank\" rel=\"noreferrer noopener\">ISMIR<\/a>][<a href=\"https:\/\/arxiv.org\/abs\/2506.22661\" target=\"_blank\" rel=\"noreferrer noopener\">arXiv<\/a>][<a href=\"https:\/\/github.com\/raraz15\/neural-music-fp\" target=\"_blank\" rel=\"noreferrer noopener\">code<\/a>][<a href=\"https:\/\/zenodo.org\/records\/15736620\" target=\"_blank\" rel=\"noreferrer noopener\">dataset<\/a>]<\/li>\n\n\n\n<li>Yixiao Zhang, Yukara Ikemiya, Woosung Choi, Naoki Murata, Marco A. Mart\u00ednez-Ram\u00edrez, Liwei Lin, Gus Xia, Wei-Hsiang Liao, <span style=\"text-decoration: underline;\">Yuki Mitsufuji<\/span>, Simon Dixon, \u201cInstruct-MusicGen: Unlocking Text-to-Music Editing for Music Language Models via Instruction Tuning,\u201d in Proc. International Society for Music Information Retrieval (<mark><mark style=\"background-color:rgba(0, 0, 0, 0)\" class=\"has-inline-color has-pale-pink-color\">ISMIR<\/mark><\/mark>) Conference, 2025 [<a href=\"https:\/\/ismir2025program.ismir.net\/poster_64.html\" target=\"_blank\" rel=\"noreferrer noopener\">ISMIR<\/a>][<a href=\"https:\/\/arxiv.org\/abs\/2405.18386\" target=\"_blank\" rel=\"noreferrer noopener\">arXiv<\/a>][<a href=\"https:\/\/github.com\/ldzhangyx\/instruct-MusicGen\" target=\"_blank\" rel=\"noreferrer noopener\">code<\/a>][<a href=\"https:\/\/foul-ice-5ea.notion.site\/Instruct-MusicGen-Demo-Page-a1e7d8d474f74df18bda9539d96687ab\" target=\"_blank\" rel=\"noreferrer noopener\">demo<\/a>]<\/li>\n\n\n\n<li>Yigitcan \u00d6zer, Woosung Choi, Joan Serr\u00e0, Mayank Kumar Singh, Wei-Hsiang Liao, <span style=\"text-decoration: underline;\">Yuki Mitsufuji<\/span>, \u201cA Comprehensive Real-World Assessment of Audio Watermarking Algorithms: Will They Survive Neural Codecs?,\u201d in Proc. Annual Conference of the International Speech Communication Association (<mark><mark style=\"background-color:rgba(0, 0, 0, 0)\" class=\"has-inline-color has-pale-pink-color\">INTERSPEECH<\/mark><\/mark>), pp. 5113\u20135117, 2025 [<a href=\"https:\/\/www.isca-archive.org\/interspeech_2025\/ozer25_interspeech.html\" target=\"_blank\" rel=\"noreferrer noopener\">ISCA<\/a>][<a href=\"https:\/\/arxiv.org\/abs\/2505.19663\" target=\"_blank\" rel=\"noreferrer noopener\">arXiv<\/a>][<a href=\"https:\/\/github.com\/SonyResearch\/raw_bench\" target=\"_blank\" rel=\"noreferrer noopener\">code<\/a>]<\/li>\n\n\n\n<li>Chin-Yun Yu, Marco A. Mart\u00ednez-Ram\u00edrez, Junghyun Koo, Ben Hayes, Wei-Hsiang Liao, Gy\u00f6rgy Fazekas, <span style=\"text-decoration: underline;\">Yuki Mitsufuji<\/span>, \u201cDiffVox: A Differentiable Model for Capturing and Analysing Professional Effects Distributions,\u201d in Proc. Digital Audio Effect Conference (<mark style=\"background-color:rgba(0, 0, 0, 0)\" class=\"has-inline-color has-pale-pink-color\">DAFx<\/mark>), pp. 334\u2013341, 2025 [<a href=\"https:\/\/www.dafx.de\/paper-archive\/2025\/DAFx25_paper_9.pdf\" target=\"_blank\" rel=\"noreferrer noopener\">DAFx<\/a>][<a href=\"https:\/\/arxiv.org\/abs\/2504.14735\" target=\"_blank\" rel=\"noreferrer noopener\">arXiv<\/a>][<a href=\"https:\/\/github.com\/SonyResearch\/diffvox\" target=\"_blank\" rel=\"noreferrer noopener\">code<\/a>]<\/li>\n\n\n\n<li>Satoshi Hayakawa, Yuhta Takida, Masaaki Imaizumi, Hiromi Wakaki, <span style=\"text-decoration: underline;\">Yuki Mitsufuji<\/span>, \u201cDistillation of Discrete Diffusion through Dimensional Correlations,\u201d in Proc. International Conference on Machine Learning (<mark style=\"background-color:rgba(0, 0, 0, 0)\" class=\"has-inline-color has-pale-pink-color\">ICML<\/mark>), pp. 22259\u201322297, 2025 [<a href=\"https:\/\/proceedings.mlr.press\/v267\/hayakawa25a.html\" target=\"_blank\" rel=\"noreferrer noopener\">PRML<\/a>][<a href=\"https:\/\/openreview.net\/forum?id=jCEl0aJpF6\" target=\"_blank\" rel=\"noreferrer noopener\">OpenReview<\/a>][<a href=\"https:\/\/arxiv.org\/abs\/2410.08709\" target=\"_blank\" rel=\"noreferrer noopener\">arXiv<\/a>][<a href=\"https:\/\/github.com\/sony\/di4c\" target=\"_blank\" rel=\"noreferrer noopener\">code<\/a>]<\/li>\n\n\n\n<li>Joan Serr\u00e0, R. Oguz Araz, Dmitry Bogdanov, <span style=\"text-decoration: underline;\">Yuki Mitsufuji<\/span>, \u201cSupervised Contrastive Learning from Weakly-labeled Audio Segments for Musical Version Matching,\u201d in Proc. International Conference on Machine Learning (<mark style=\"background-color:rgba(0, 0, 0, 0)\" class=\"has-inline-color has-pale-pink-color\">ICML<\/mark>), pp. 53923\u201353939, 2025 [<a href=\"https:\/\/proceedings.mlr.press\/v267\/serra25a.html\" target=\"_blank\" rel=\"noreferrer noopener\">PRML<\/a>][<a href=\"https:\/\/openreview.net\/forum?id=2GJkMGMACH\" target=\"_blank\" rel=\"noreferrer noopener\">OpenReview<\/a>][<a href=\"https:\/\/arxiv.org\/abs\/2502.16936\" target=\"_blank\" rel=\"noreferrer noopener\">arXiv<\/a>][<a href=\"https:\/\/github.com\/sony\/clews\" target=\"_blank\" rel=\"noreferrer noopener\">code<\/a>]<\/li>\n\n\n\n<li>Gianluigi Silvestri, Luca Ambrogioni, Chieh-Hsin Lai, Yuhta Takida, <span style=\"text-decoration: underline;\">Yuki Mitsufuji<\/span>, \u201cVCT: Training Consistency Models with Variational Noise Coupling,\u201d in Proc. International Conference on Machine Learning (<mark style=\"background-color:rgba(0, 0, 0, 0)\" class=\"has-inline-color has-pale-pink-color\">ICML<\/mark>), pp. 55657\u201355683, 2025 [<a href=\"https:\/\/proceedings.mlr.press\/v267\/silvestri25a.html\" target=\"_blank\" rel=\"noreferrer noopener\">PRML<\/a>][<a href=\"https:\/\/openreview.net\/forum?id=CMoX0BEsDs\" target=\"_blank\" rel=\"noreferrer noopener\">OpenReview<\/a>][<a href=\"https:\/\/arxiv.org\/abs\/2502.18197\" target=\"_blank\" rel=\"noreferrer noopener\">arXiv<\/a>][<a href=\"https:\/\/github.com\/sony\/vct\" target=\"_blank\" rel=\"noreferrer noopener\">code<\/a>]<\/li>\n\n\n\n<li>Masato Ishii, Akio Hayakawa, Takashi Shibuya, <span style=\"text-decoration: underline;\">Yuki Mitsufuji<\/span>, \u201cA Simple but Strong Baseline for Sounding Video Generation: Effective Adaptation of Audio and Video Diffusion Models for Joint Generation,\u201d in Proc. IEEE International Joint Conference on Neural Networks (<mark style=\"background-color:rgba(0, 0, 0, 0)\" class=\"has-inline-color has-pale-pink-color\">IJCNN<\/mark>), 2025 [<a href=\"https:\/\/ieeexplore.ieee.org\/document\/11228639\" target=\"_blank\" rel=\"noreferrer noopener\">IEEE<\/a>][<a href=\"https:\/\/arxiv.org\/abs\/2409.17550\" target=\"_blank\" rel=\"noreferrer noopener\">arXiv<\/a>][<a href=\"https:\/\/github.com\/SonyResearch\/SVG_baseline\" target=\"_blank\" rel=\"noreferrer noopener\">code<\/a>]<\/li>\n\n\n\n<li>Bac Nguyen, Chieh-Hsin Lai, Yuhta Takida, Naoki Murata, Toshimitsu Uesaka, Stefano Ermon, <span style=\"text-decoration: underline;\">Yuki Mitsufuji<\/span>, \u201cImproving Vector-Quantized Image Modeling with Latent Consistency-Matching Diffusion,\u201d in Proc. IEEE International Joint Conference on Neural Networks (<mark style=\"background-color:rgba(0, 0, 0, 0)\" class=\"has-inline-color has-pale-pink-color\">IJCNN<\/mark>), 2025 [<a href=\"https:\/\/ieeexplore.ieee.org\/document\/11228445\" target=\"_blank\" rel=\"noreferrer noopener\">IEEE<\/a>][<a href=\"https:\/\/arxiv.org\/abs\/2410.14758\" target=\"_blank\" rel=\"noreferrer noopener\">arXiv<\/a>] \u2013 <mark style=\"background-color:rgba(0, 0, 0, 0)\" class=\"has-inline-color has-pale-pink-color\">Best Industrial Paper Award<\/mark> [<a href=\"https:\/\/2025.ijcnn.org\/program\/awards\" target=\"_blank\" rel=\"noreferrer noopener\">URL<\/a>][<a href=\"https:\/\/www.yukimitsufuji.com\/wp-content\/uploads\/2025\/07\/IJCNN_2025_award.pdf\" target=\"_blank\" rel=\"noreferrer noopener\">certificate<\/a>]<\/li>\n\n\n\n<li>Yuya Kobayashi, Yuhta Takida, Takashi Shibuya, <span style=\"text-decoration: underline;\">Yuki Mitsufuji<\/span>, \u201cEfficiency without Compromise: CLIP-aided Text-to-Image GANs with Increased Diversity,\u201d in Proc. IEEE International Joint Conference on Neural Networks (<mark style=\"background-color:rgba(0, 0, 0, 0)\" class=\"has-inline-color has-pale-pink-color\">IJCNN<\/mark>), 2025 [<a href=\"https:\/\/ieeexplore.ieee.org\/document\/11227151\" target=\"_blank\" rel=\"noreferrer noopener\">IEEE<\/a>][<a href=\"https:\/\/arxiv.org\/abs\/2506.01493\" target=\"_blank\" rel=\"noreferrer noopener\">arXiv<\/a>]<\/li>\n\n\n\n<li>Christian Marinoni, Riccardo Fosco Gramaccioni, Kazuki Shimada, Takashi Shibuya, <span style=\"text-decoration: underline;\">Yuki Mitsufuji<\/span>, Danilo Comminiello, \u201cStereoSync: Spatially-Aware Stereo Audio Generation from Videos,\u201d in Proc. IEEE International Joint Conference on Neural Networks (<mark style=\"background-color:rgba(0, 0, 0, 0)\" class=\"has-inline-color has-pale-pink-color\">IJCNN<\/mark>), 2025 [<a href=\"https:\/\/arxiv.org\/abs\/2510.05828\" target=\"_blank\" rel=\"noreferrer noopener\">arXiv<\/a>]<\/li>\n\n\n\n<li>Silin Gao, Sheryl Mathew, Li Mi, Sepideh Mamooler, Mengjie Zhao, Hiromi Wakaki, <span style=\"text-decoration: underline;\">Yuki Mitsufuji<\/span>, Syrielle Montariol, Antoine Bosselut, \u201cVinaBench: Benchmark for Faithful and Consistent Visual Narratives,\u201d in Proc. IEEE\/CVF Conference on Computer Vision and Pattern Recognition (<mark style=\"background-color:rgba(0, 0, 0, 0)\" class=\"has-inline-color has-pale-pink-color\">CVPR<\/mark>), pp. 2870\u20132879, 2025 [<a href=\"https:\/\/openaccess.thecvf.com\/content\/CVPR2025\/html\/Gao_VinaBench_Benchmark_for_Faithful_and_Consistent_Visual_Narratives_CVPR_2025_paper.html\" target=\"_blank\" rel=\"noreferrer noopener\">CVF<\/a>][<a href=\"https:\/\/ieeexplore.ieee.org\/document\/11092502\" target=\"_blank\" rel=\"noreferrer noopener\">IEEE<\/a>][<a href=\"https:\/\/arxiv.org\/abs\/2503.20871\" target=\"_blank\" rel=\"noreferrer noopener\">arXiv<\/a>][<a href=\"https:\/\/github.com\/Silin159\/VinaBench\" target=\"_blank\" rel=\"noreferrer noopener\">code<\/a>][<a href=\"https:\/\/silin159.github.io\/Vina-Bench\/\" target=\"_blank\" rel=\"noreferrer noopener\">demo<\/a>]<\/li>\n\n\n\n<li>Anubhav Jain, Yuya Kobayashi, Takashi Shibuya, Yuhta Takida, Nasir Memon, Julian Togelius, <span style=\"text-decoration: underline;\">Yuki Mitsufuji<\/span>, \u201cClassifier-Free Guidance inside the Attraction Basin May Cause Memorization,\u201d in Proc. IEEE\/CVF Conference on Computer Vision and Pattern Recognition (<mark style=\"background-color:rgba(0, 0, 0, 0)\" class=\"has-inline-color has-pale-pink-color\">CVPR<\/mark>), pp. 12871\u201312879, 2025 [<a href=\"https:\/\/openaccess.thecvf.com\/content\/CVPR2025\/html\/Jain_Classifier-Free_Guidance_Inside_the_Attraction_Basin_May_Cause_Memorization_CVPR_2025_paper.html\" target=\"_blank\" rel=\"noreferrer noopener\">CVF<\/a>][<a href=\"https:\/\/ieeexplore.ieee.org\/document\/11092395\" target=\"_blank\" rel=\"noreferrer noopener\">IEEE<\/a>][<a href=\"https:\/\/arxiv.org\/abs\/2411.16738\" target=\"_blank\" rel=\"noreferrer noopener\">arXiv<\/a>][<a href=\"https:\/\/github.com\/SonyResearch\/mitigating_memorization\" target=\"_blank\" rel=\"noreferrer noopener\">code<\/a>]<\/li>\n\n\n\n<li>Ho Kei Cheng, Masato Ishii, Akio Hayakawa, Takashi Shibuya, Alexander Schwing, <span style=\"text-decoration: underline;\">Yuki Mitsufuji<\/span>, \u201cMMAudio: Taming Multimodal Joint Training for High-Quality Video-to-Audio Synthesis,\u201d in Proc. IEEE\/CVF Conference on Computer Vision and Pattern Recognition (<mark style=\"background-color:rgba(0, 0, 0, 0)\" class=\"has-inline-color has-pale-pink-color\">CVPR<\/mark>), pp. 28901\u201328911, 2025 [<a href=\"https:\/\/openaccess.thecvf.com\/content\/CVPR2025\/html\/Cheng_MMAudio_Taming_Multimodal_Joint_Training_for_High-Quality_Video-to-Audio_Synthesis_CVPR_2025_paper.html\" target=\"_blank\" rel=\"noreferrer noopener\">CVF<\/a>][<a href=\"https:\/\/ieeexplore.ieee.org\/document\/11092848\" target=\"_blank\" rel=\"noreferrer noopener\">IEEE<\/a>][<a href=\"https:\/\/arxiv.org\/abs\/2412.15322\">arXiv<\/a>][<a href=\"https:\/\/github.com\/hkchengrex\/MMAudio\" target=\"_blank\" rel=\"noreferrer noopener\">code<\/a>][<a href=\"https:\/\/huggingface.co\/spaces\/hkchengrex\/MMAudio\" target=\"_blank\" rel=\"noreferrer noopener\">demo<\/a>][<a href=\"https:\/\/colab.research.google.com\/drive\/1TAaXCY2-kPk4xE4PwKB3EqFbSnkUuzZ8?usp=sharing\" target=\"_blank\" rel=\"noreferrer noopener\">colab<\/a>]<\/li>\n\n\n\n<li>Koichi Saito, Dongjun Kim, Takashi Shibuya, Chieh-Hsin Lai, Zhi Zhong, Yuhta Takida, <span style=\"text-decoration: underline;\">Yuki Mitsufuji<\/span>, \u201cSoundCTM: Unifying Score-based and Consistency Models for Full-band Text-to-Sound Generation,\u201d in Proc. International Conference on Learning Representations (<mark><mark style=\"background-color:rgba(0, 0, 0, 0)\" class=\"has-inline-color has-pale-pink-color\">ICLR<\/mark><\/mark>), 2025 [<a href=\"https:\/\/openreview.net\/forum?id=KrK6zXbjfO\" target=\"_blank\" rel=\"noreferrer noopener\">OpenReview<\/a>][<a href=\"https:\/\/arxiv.org\/abs\/2405.18503\" target=\"_blank\" rel=\"noreferrer noopener\">arXiv<\/a>][<a href=\"https:\/\/github.com\/sony\/soundctm\" target=\"_blank\" rel=\"noreferrer noopener\">code<\/a>][<a href=\"https:\/\/koichi-saito-sony.github.io\/soundctm\/\" target=\"_blank\" rel=\"noreferrer noopener\">demo<\/a>]<\/li>\n\n\n\n<li>Akio Hayakawa, Masato Ishii, Takashi Shibuya, <span style=\"text-decoration: underline;\">Yuki Mitsufuji<\/span>, \u201cMMDisCo: Multi-Modal Discriminator-Guided Cooperative Diffusion for Joint Audio and Video Generation,\u201d in Proc. International Conference on Learning Representations (<mark><mark style=\"background-color:rgba(0, 0, 0, 0)\" class=\"has-inline-color has-pale-pink-color\">ICLR<\/mark><\/mark>), 2025 [<a href=\"https:\/\/openreview.net\/forum?id=agbiPPuSeQ\" target=\"_blank\" rel=\"noreferrer noopener\">OpenReview<\/a>][<a href=\"https:\/\/arxiv.org\/abs\/2405.17842\" target=\"_blank\" rel=\"noreferrer noopener\">arXiv<\/a>][<a href=\"https:\/\/github.com\/SonyResearch\/MMDisCo\" target=\"_blank\" rel=\"noreferrer noopener\">code<\/a>]<\/li>\n\n\n\n<li>Yong-Hyun Park, Chieh-Hsin Lai, Satoshi Hayakawa, Yuhta Takida, <span style=\"text-decoration: underline;\">Yuki Mitsufuji<\/span>, \u201cJump Your Steps: Optimizing Sampling Schedule of Discrete Diffusion Models,\u201d in Proc. International Conference on Learning Representations (<mark><mark style=\"background-color:rgba(0, 0, 0, 0)\" class=\"has-inline-color has-pale-pink-color\">ICLR<\/mark><\/mark>), 2025 [<a href=\"https:\/\/openreview.net\/forum?id=pD6TiCpyDR\" target=\"_blank\" rel=\"noreferrer noopener\">OpenReview<\/a>][<a href=\"https:\/\/arxiv.org\/abs\/2410.07761\" target=\"_blank\" rel=\"noreferrer noopener\">arXiv<\/a>][<a href=\"https:\/\/github.com\/sony\/jys\" target=\"_blank\" rel=\"noreferrer noopener\">code<\/a>]<\/li>\n\n\n\n<li>Ayano Hiranaka, Shang-Fu Chen, Chieh-Hsin Lai, Dongjun Kim, Naoki Murata, Takashi Shibuya, Wei-Hsiang Liao, Shao-Hua Sun, <span style=\"text-decoration: underline;\">Yuki Mitsufuji<\/span>, \u201cHERO: Human-Feedback Efficient Reinforcement Learning for Online Diffusion Model Finetuning,\u201d in Proc. International Conference on Learning Representations (<mark><mark style=\"background-color:rgba(0, 0, 0, 0)\" class=\"has-inline-color has-pale-pink-color\">ICLR<\/mark><\/mark>), 2025 [<a href=\"https:\/\/openreview.net\/forum?id=yMHe9SRvxk\" target=\"_blank\" rel=\"noreferrer noopener\">OpenReview<\/a>][<a href=\"\/\/arxiv.org\/abs\/2410.05116\" target=\"_blank\" rel=\"noreferrer noopener\">arXiv<\/a>][<a href=\"https:\/\/github.com\/sony\/hero\/\" target=\"_blank\" rel=\"noreferrer noopener\">code<\/a>][<a href=\"https:\/\/hero-dm.github.io\/\" target=\"_blank\" rel=\"noreferrer noopener\">demo<\/a>]<\/li>\n\n\n\n<li>Saurav Jha, Shiqi Yang, Masato Ishii, Mengjie Zhao, Christian Simon, Jehanzeb Mirza, Dong Gong, Lina Yao, Shusuke Takahashi, <span style=\"text-decoration: underline;\">Yuki Mitsufuji<\/span>, \u201cMining Your Own Secrets: Diffusion Classifier Scores for Continual Personalization of Text-to-Image Diffusion Models,\u201d in Proc. International Conference on Learning Representations (<mark><mark style=\"background-color:rgba(0, 0, 0, 0)\" class=\"has-inline-color has-pale-pink-color\">ICLR<\/mark><\/mark>), 2025 [<a href=\"https:\/\/openreview.net\/forum?id=hUdLs6TqZL\" target=\"_blank\" rel=\"noreferrer noopener\">OpenReview<\/a>][<a href=\"https:\/\/arxiv.org\/abs\/2410.00700\" target=\"_blank\" rel=\"noreferrer noopener\">arXiv<\/a>][<a href=\"https:\/\/srvcodes.github.io\/continual_personalization\/\" target=\"_blank\" rel=\"noreferrer noopener\">demo<\/a>]<\/li>\n\n\n\n<li>Toshimitsu Uesaka, Taiji Suzuki, Yuhta Takida, Chieh-Hsin Lai, Naoki Murata, <span style=\"text-decoration: underline;\">Yuki Mitsufuji<\/span>, \u201cWeighted Point Set Embedding for Multimodal Contrastive Learning Toward Optimal Similarity Metric,\u201d in Proc. International Conference on Learning Representations (<mark><mark style=\"background-color:rgba(0, 0, 0, 0)\" class=\"has-inline-color has-pale-pink-color\">ICLR<\/mark><\/mark>), 2025 [<a href=\"https:\/\/openreview.net\/forum?id=uSz2K30RRd\" target=\"_blank\" rel=\"noreferrer noopener\">OpenReview<\/a>][<a href=\"https:\/\/export.arxiv.org\/abs\/2404.19228\">arXiv<\/a>][<a href=\"https:\/\/github.com\/sony\/wpse\" target=\"_blank\" rel=\"noreferrer noopener\">code<\/a>] \u2013 <mark style=\"background-color:rgba(0, 0, 0, 0)\" class=\"has-inline-color has-pale-pink-color\">Spotlight<\/mark><\/li>\n\n\n\n<li>Shoko Araki, Nobutaka Ito, Reinhold Haeb-Umbach, Gordon Wichern, Zhong-Qiu Wang, <span style=\"text-decoration: underline;\">Yuki Mitsufuji<\/span>, \u201c30+ Years of Source Separation Research: Achievements and Future Challenges,\u201d in Proc. International Conference on Acoustics, Speech, and Signal Processing (<mark style=\"background-color:rgba(0, 0, 0, 0)\" class=\"has-inline-color has-pale-pink-color\">ICASSP<\/mark>), 2025 [<a href=\"https:\/\/ieeexplore.ieee.org\/document\/10889006\" target=\"_blank\" rel=\"noreferrer noopener\">IEEE<\/a>][<a href=\"https:\/\/export.arxiv.org\/abs\/2501.11837\">arXiv<\/a>]<\/li>\n\n\n\n<li>Geoffroy Peeters, Zafar Rafii, Magdalena Fuentes, Zhiyao Duan, Emmanouil Benetos, Juhan Nam, <span style=\"text-decoration: underline;\">Yuki Mitsufuji<\/span>, \u201cTwenty-Five Years of MIR Research: Achievements, Practices, Evaluations, and Future Challenges,\u201d in Proc. International Conference on Acoustics, Speech, and Signal Processing (<mark style=\"background-color:rgba(0, 0, 0, 0)\" class=\"has-inline-color has-pale-pink-color\">ICASSP<\/mark>), 2025 [<a href=\"https:\/\/ieeexplore.ieee.org\/document\/10888947\" target=\"_blank\" rel=\"noreferrer noopener\">IEEE<\/a>][<a href=\"https:\/\/arxiv.org\/abs\/2511.07205\" target=\"_blank\" rel=\"noreferrer noopener\">arXiv<\/a>]<\/li>\n\n\n\n<li>Michele Mancusi, Yurii Halychansky, Kin Wai Cheuk, Chieh-Hsin Lai, Stefan Uhlich, Junghyun Koo, Marco A. Mart\u00ednez-Ram\u00edrez, Wei-Hsiang Liao, Giorgio Fabbro, <span style=\"text-decoration: underline;\">Yuki Mitsufuji<\/span>, \u201cLatent Diffusion Bridges for Unsupervised Musical Audio Timbre Transfer,\u201d in Proc. International Conference on Acoustics, Speech, and Signal Processing (<mark style=\"background-color:rgba(0, 0, 0, 0)\" class=\"has-inline-color has-pale-pink-color\">ICASSP<\/mark>), 2025 [<a href=\"https:\/\/ieeexplore.ieee.org\/document\/10890708\" target=\"_blank\" rel=\"noreferrer noopener\">IEEE<\/a>][<a href=\"https:\/\/arxiv.org\/abs\/2409.06096\" target=\"_blank\" rel=\"noreferrer noopener\">arxiv<\/a>][<a href=\"https:\/\/github.com\/sony\/diffusion-timbre-transfer\" target=\"_blank\" rel=\"noreferrer noopener\">code<\/a>][<a href=\"https:\/\/sony.github.io\/diffusion-timbre-transfer\/\" target=\"_blank\" rel=\"noreferrer noopener\">demo<\/a>]<\/li>\n\n\n\n<li>Yunkee Chae, Woosung Choi, Yuhta Takida, Junghyun Koo, Yukara Ikemiya, Zhi Zhong, Kin Wai Cheuk, Marco A. Mart\u00ednez-Ram\u00edrez, Kyogu Lee, Wei-Hsiang Liao, <span style=\"text-decoration: underline;\">Yuki Mitsufuji<\/span>, \u201cVariable Bitrate Residual Vector Quantization for Audio Coding,\u201d in Proc. International Conference on Acoustics, Speech, and Signal Processing (<mark style=\"background-color:rgba(0, 0, 0, 0)\" class=\"has-inline-color has-pale-pink-color\">ICASSP<\/mark>), 2025 [<a href=\"https:\/\/ieeexplore.ieee.org\/document\/10889508\" target=\"_blank\" rel=\"noreferrer noopener\">IEEE<\/a>][<a href=\"https:\/\/arxiv.org\/abs\/2410.06016\" target=\"_blank\" rel=\"noreferrer noopener\">arXiv<\/a>][<a href=\"https:\/\/github.com\/SonyResearch\/VRVQ\" target=\"_blank\" rel=\"noreferrer noopener\">code<\/a>][<a href=\"https:\/\/sonyresearch.github.io\/VRVQ\/\" target=\"_blank\" rel=\"noreferrer noopener\">demo<\/a>]<\/li>\n\n\n\n<li>Dongjun Kim, Chieh-Hsin Lai, Wei-Hsiang Liao, Yuhta Takida, Naoki Murata, Toshimitsu Uesaka, <span style=\"text-decoration: underline;\">Yuki Mitsufuji<\/span>, Stefano Ermon, \u201cPaGoDA: Progressive Growing of a One-Step Generator from a Low-Resolution Diffusion Teacher,\u201d in Proc. Neural Information Processing Systems (<mark style=\"background-color:rgba(0, 0, 0, 0)\" class=\"has-inline-color has-pale-pink-color\">NeurIPS<\/mark>), pp. 19167\u201319208, 2024 [<a href=\"https:\/\/proceedings.neurips.cc\/paper_files\/paper\/2024\/hash\/221ccaeaef4b9cc8f89b63d6fc98a271-Abstract-Conference.html\" target=\"_blank\" rel=\"noreferrer noopener\">NeurIPS<\/a>][<a href=\"https:\/\/arxiv.org\/abs\/2405.14822\" target=\"_blank\" rel=\"noreferrer noopener\">arXiv<\/a>][<a href=\"https:\/\/github.com\/sony\/pagoda\" target=\"_blank\" rel=\"noreferrer noopener\">code<\/a>]<\/li>\n\n\n\n<li>Junyoung Seo, Kazumi Fukuda, Takashi Shibuya, Takuya Narihira, Naoki Murata, Shoukang Hu, Chieh-Hsin Lai, Seungryong Kim, <span style=\"text-decoration: underline;\">Yuki Mitsufuji<\/span>, \u201cGenWarp: Single Image to Novel Views with Semantic-Preserving Generative Warping,\u201d in Proc. Neural Information Processing Systems (<mark style=\"background-color:rgba(0, 0, 0, 0)\" class=\"has-inline-color has-pale-pink-color\">NeurIPS<\/mark>), pp. 80220\u201380243, 2024 [<a href=\"https:\/\/proceedings.neurips.cc\/paper_files\/paper\/2024\/hash\/92e886487a8354b03d8bf4416eae6d7d-Abstract-Conference.html\" target=\"_blank\" rel=\"noreferrer noopener\">NeurIPS<\/a>][<a href=\"https:\/\/arxiv.org\/abs\/2405.17251\" target=\"_blank\" rel=\"noreferrer noopener\">arXiv<\/a>][<a href=\"https:\/\/github.com\/sony\/genwarp\" target=\"_blank\" rel=\"noreferrer noopener\">code<\/a>][<a href=\"https:\/\/genwarp-nvs.github.io\/\" target=\"_blank\" rel=\"noreferrer noopener\">demo<\/a>]<\/li>\n\n\n\n<li>Roser Batlle-Roca, Wei-Hsiang Liao, Xavier Serra, <span style=\"text-decoration: underline;\">Yuki Mitsufuji<\/span>, Emilia G\u00f3mez, \u201cTowards Assessing Data Replication in Music Generation with Music Similarity Metrics on Raw Audio,\u201d in Proc. International Society for Music Information Retrieval (<mark><mark style=\"background-color:rgba(0, 0, 0, 0)\" class=\"has-inline-color has-pale-pink-color\">ISMIR<\/mark><\/mark>) Conference, pp. 1004\u20131011, 2024 [<a href=\"https:\/\/zenodo.org\/records\/14877501\" target=\"_blank\" rel=\"noreferrer noopener\">ISMIR<\/a>][<a href=\"https:\/\/arxiv.org\/abs\/2407.14364\" target=\"_blank\" rel=\"noreferrer noopener\">arXiv<\/a>][<a href=\"https:\/\/github.com\/roserbatlleroca\/mira\" target=\"_blank\" rel=\"noreferrer noopener\">code<\/a>]<\/li>\n\n\n\n<li>Marco Comunita, Zhi Zhong, Akira Takahashi, Shiqi Yang, Mengjie Zhao, Koichi Saito, Yukara Ikemiya. Takashi Shibuya, Shusuke Takahashi, <span style=\"text-decoration: underline;\">Yuki Mitsufuji<\/span>, \u201cSpecMaskGIT: Masked Generative Modeling of Audio Spectrograms for Efficient Audio Synthesis and Beyond,\u201d in Proc. International Society for Music Information Retrieval (<mark><mark style=\"background-color:rgba(0, 0, 0, 0)\" class=\"has-inline-color has-pale-pink-color\">ISMIR<\/mark><\/mark>) Conference, pp. 420\u2013428, 2024 [<a href=\"https:\/\/zenodo.org\/records\/14877363\" target=\"_blank\" rel=\"noreferrer noopener\">ISMIR<\/a>][<a href=\"https:\/\/arxiv.org\/abs\/2406.17672\" target=\"_blank\" rel=\"noreferrer noopener\">arXiv<\/a>][<a href=\"https:\/\/zzaudio.github.io\/SpecMaskGIT\/\" target=\"_blank\" rel=\"noreferrer noopener\">demo<\/a>]<\/li>\n\n\n\n<li>Mayank Kumar Singh, Naoya Takahashi, Wei-Hsiang Liao, <span style=\"text-decoration: underline;\">Yuki Mitsufuji<\/span>, \u201cSilentCipher: Deep Audio Watermarking,\u201d in Proc. Annual Conference of the International Speech Communication Association (<mark><mark style=\"background-color:rgba(0, 0, 0, 0)\" class=\"has-inline-color has-pale-pink-color\">INTERSPEECH<\/mark><\/mark>), pp. 2235\u20132239, 2024 [<a href=\"https:\/\/www.isca-archive.org\/interspeech_2024\/singh24_interspeech.html\" target=\"_blank\" rel=\"noreferrer noopener\">ISCA<\/a>][<a href=\"https:\/\/arxiv.org\/abs\/2406.03822\" target=\"_blank\" rel=\"noreferrer noopener\">arXiv<\/a>][<a href=\"https:\/\/github.com\/sony\/silentcipher\" target=\"_blank\" rel=\"noreferrer noopener\">code<\/a>][<a href=\"https:\/\/interspeech2024.github.io\/silentcipher\/\" target=\"_blank\" rel=\"noreferrer noopener\">demo<\/a>]<\/li>\n\n\n\n<li>Silin Gao, Mete Ismayilzada, Mengjie Zhao, Hiromi Wakaki, <span style=\"text-decoration: underline;\">Yuki Mitsufuji<\/span>, Antoine Bosselut, \u201cDiffuCOMET: Contextual Commonsense Knowledge Diffusion,\u201d in Proc. the Annual Meeting of the Association for Computational Linguistics (<mark style=\"background-color:rgba(0, 0, 0, 0)\" class=\"has-inline-color has-pale-pink-color\">ACL<\/mark>), pp. 4809\u20134831, 2024 [<a href=\"https:\/\/aclanthology.org\/2024.acl-long.264\/\" target=\"_blank\" rel=\"noreferrer noopener\">ACL<\/a>][<a href=\"https:\/\/arxiv.org\/abs\/2402.17011\" target=\"_blank\" rel=\"noreferrer noopener\">arXiv<\/a>][<a href=\"https:\/\/github.com\/Silin159\/DiffuCOMET\" target=\"_blank\" rel=\"noreferrer noopener\">code<\/a>]<\/li>\n\n\n\n<li>Mengjie Zhao, Junya Ono, Zhi Zhong, Chieh-Hsin Lai, Yuhta Takida, Naoki Murata, Wei-Hsiang Liao, Takashi Shibuya, Hiromi Wakaki, <span style=\"text-decoration: underline;\">Yuki Mitsufuji<\/span>, \u201cOn the Language Encoder of Contrastive Cross-modal Models,\u201d In Findings of the Annual Meeting of the Association for Computational Linguistics (<mark style=\"background-color:rgba(0, 0, 0, 0)\" class=\"has-inline-color has-pale-pink-color\">ACL<\/mark>), pp. 4923\u20134940, 2024 [<a href=\"https:\/\/aclanthology.org\/2024.findings-acl.293\/\" target=\"_blank\" rel=\"noreferrer noopener\">ACL<\/a>][<a href=\"https:\/\/arxiv.org\/abs\/2310.13267\" target=\"_blank\" rel=\"noreferrer noopener\">arXiv<\/a>]<\/li>\n\n\n\n<li>Sungho Lee, Marco Mart\u00ednez-Ram\u00edrez, Wei-Hsiang Liao, Stefan Uhlich, Giorgio Fabbro, Kyogu Lee, <span style=\"text-decoration: underline;\">Yuki Mitsufuji<\/span>, \u201cSearching For Music Mixing Graphs: A Pruning Approach,\u201d in Proc. Digital Audio Effect Conference (<mark style=\"background-color:rgba(0, 0, 0, 0)\" class=\"has-inline-color has-pale-pink-color\">DAFx<\/mark>), pp. 147\u2013154, 2024 [<a href=\"https:\/\/www.dafx.de\/paper-archive\/2024\/papers\/DAFx24_paper_17.pdf\" target=\"_blank\" rel=\"noreferrer noopener\">DAFx<\/a>][<a href=\"https:\/\/arxiv.org\/abs\/2406.01049\" target=\"_blank\" rel=\"noreferrer noopener\">arXiv<\/a>][<a href=\"https:\/\/github.com\/sh-lee97\/grafx-prune\/tree\/main\" target=\"_blank\" rel=\"noreferrer noopener\">code<\/a>][<a href=\"https:\/\/sh-lee97.github.io\/grafx-prune\/\" target=\"_blank\" rel=\"noreferrer noopener\">demo<\/a>] \u2013 <mark style=\"background-color:rgba(0, 0, 0, 0)\" class=\"has-inline-color has-pale-pink-color\">Beset Show &amp; Tell Award<\/mark> [<a href=\"https:\/\/www.yukimitsufuji.com\/wp-content\/uploads\/2024\/10\/DAFx-24_Best_show_and_tell_award.png\" target=\"_blank\" rel=\"noreferrer noopener\">certificate<\/a>]<\/li>\n\n\n\n<li>Yu-Hua Chen, Woosung Choi, Wei-Hsiang Liao, Marco A. Mart\u00ednez-Ram\u00edrez, Kin Wai Cheuk, <span style=\"text-decoration: underline;\">Yuki Mitsufuji<\/span>, Jyh-Shing Roger Jang and Yi-Hsuan Yang, \u201cImproving Unsupervised Clean-to-Rendered Guitar Tone Transformation Using GANs and Integrated Unaligned Clean Data,\u201d in Proc. Digital Audio Effect Conference (<mark style=\"background-color:rgba(0, 0, 0, 0)\" class=\"has-inline-color has-pale-pink-color\">DAFx<\/mark>), pp. 192\u2013199, 2024 [<a href=\"https:\/\/www.dafx.de\/paper-archive\/2024\/papers\/DAFx24_paper_30.pdf\" target=\"_blank\" rel=\"noreferrer noopener\">DAFx<\/a>][<a href=\"https:\/\/arxiv.org\/abs\/2406.15751\" target=\"_blank\" rel=\"noreferrer noopener\">arXiv<\/a>][<a href=\"https:\/\/ampdafx24.notionlinker.com\/\" target=\"_blank\" rel=\"noreferrer noopener\">demo<\/a>]<\/li>\n\n\n\n<li>Yixiao Zhang, Yukara Ikemiya, Gus Xia, Naoki Murata, Marco Mart\u00ednez, Wei-Hsiang Liao, <span style=\"text-decoration: underline;\">Yuki Mitsufuji<\/span>, Simon Dixon, \u201cMusicMagus: Zero-Shot Text-to-Music Editing via Diffusion Models,\u201d in Proc. International Joint Conferences on Artificial Intelligence (<mark style=\"background-color:rgba(0, 0, 0, 0)\" class=\"has-inline-color has-pale-pink-color\">IJCAI<\/mark>) AI, Arts &amp; Creativity Track, pp. 7805\u20137813, 2024 [<a href=\"https:\/\/www.ijcai.org\/proceedings\/2024\/864\" target=\"_blank\" rel=\"noreferrer noopener\">IJCAI<\/a>][<a href=\"https:\/\/arxiv.org\/abs\/2402.06178\" target=\"_blank\" rel=\"noreferrer noopener\">arXiv<\/a>][<a href=\"https:\/\/github.com\/ldzhangyx\/MusicMagus\" target=\"_blank\" rel=\"noreferrer noopener\">code<\/a>][<a href=\"https:\/\/wry-neighbor-173.notion.site\/MusicMagus-Zero-Shot-Text-to-Music-Editing-via-Diffusion-Models-8f55a82f34944eb9a4028ca56c546d9d\" target=\"_blank\" rel=\"noreferrer noopener\">demo<\/a>][<a href=\"https:\/\/t.co\/PtgcHqwpGI\" target=\"_blank\" rel=\"noreferrer noopener\">video<\/a>]<\/li>\n\n\n\n<li>Dongjun Kim, Chieh-Hsin Lai, Wei-Hsiang Liao, Naoki Murata, Yuhta Takida, Toshimitsu Uesaka, Yutong He, <span style=\"text-decoration: underline;\">Yuki Mitsufuji<\/span>, Stefano Ermon, \u201cConsistency Trajectory Models: Learning Probability Flow ODE Trajectory of Diffusion,\u201d in Proc. International Conference on Learning Representations (<mark><mark style=\"background-color:rgba(0, 0, 0, 0)\" class=\"has-inline-color has-pale-pink-color\">ICLR<\/mark><\/mark>), 2024 [<a href=\"https:\/\/openreview.net\/forum?id=ymjI8feDTD\" target=\"_blank\" rel=\"noreferrer noopener\">OpenReview<\/a>][<a href=\"https:\/\/arxiv.org\/abs\/2310.02279\" target=\"_blank\" rel=\"noreferrer noopener\">arXiv<\/a>][<a href=\"https:\/\/github.com\/sony\/ctm\" target=\"_blank\" rel=\"noreferrer noopener\">code<\/a>][<a href=\"https:\/\/chiehhsinjesselai.github.io\/ConsistencyTrajectoryModel\/\" target=\"_blank\" rel=\"noreferrer noopener\">demo<\/a>]<\/li>\n\n\n\n<li>Yutong He, Naoki Murata, Chieh-Hsin Lai, Yuhta Takida, Toshimitsu Uesaka, Dongjun Kim, Wei-Hsiang Liao, <span style=\"text-decoration: underline;\">Yuki Mitsufuji<\/span>, Zico Kolter, Ruslan Salakhutdinov, Stefano Ermon, \u201cManifold Preserving Guided Diffusion,\u201d in Proc. International Conference on Learning Representations (<mark><mark style=\"background-color:rgba(0, 0, 0, 0)\" class=\"has-inline-color has-pale-pink-color\">ICLR<\/mark><\/mark>), 2024 [<a href=\"https:\/\/openreview.net\/forum?id=o3BxOLoxm1\" target=\"_blank\" rel=\"noreferrer noopener\">OpenReview<\/a>][<a href=\"https:\/\/arxiv.org\/abs\/2311.16424\" target=\"_blank\" rel=\"noreferrer noopener\">arXiv<\/a>][<a href=\"https:\/\/github.com\/KellyYutongHe\/mpgd_pytorch\" target=\"_blank\" rel=\"noreferrer noopener\">code<\/a>][<a href=\"https:\/\/kellyyutonghe.github.io\/mpgd\/\" target=\"_blank\" rel=\"noreferrer noopener\">demo<\/a>]<\/li>\n\n\n\n<li>Yuhta Takida, Masaaki Imaizumi, Takashi Shibuya, Chieh-Hsin Lai, Toshimitsu Uesaka, Naoki Murata, <span style=\"text-decoration: underline;\">Yuki Mitsufuji<\/span>, \u201cSAN: Inducing Metrizability of GAN with Discriminative Normalized Linear Layer,\u201d in Proc. International Conference on Learning Representations (<mark><mark style=\"background-color:rgba(0, 0, 0, 0)\" class=\"has-inline-color has-pale-pink-color\">ICLR<\/mark><\/mark>), 2024 [<a href=\"https:\/\/openreview.net\/forum?id=eiF7TU1E8E\" target=\"_blank\" rel=\"noreferrer noopener\">OpenReview<\/a>][<a href=\"https:\/\/arxiv.org\/abs\/2301.12811\" target=\"_blank\" rel=\"noreferrer noopener\">arXiv<\/a>][<a href=\"https:\/\/github.com\/sony\/san\" target=\"_blank\" rel=\"noreferrer noopener\">code<\/a>][<a href=\"https:\/\/ytakida.github.io\/san\/\" target=\"_blank\" rel=\"noreferrer noopener\">demo<\/a>]<\/li>\n\n\n\n<li>Carlos Hernandez-Olivan, Koichi Saito, Naoki Murata, Chieh-Hsin Lai, Marco A. Mart\u00ednez-Ramirez, Wei-Hsiang Liao, <span style=\"text-decoration: underline;\">Yuki Mitsufuji<\/span>, \u201cVRDMG: Vocal Restoration via Diffusion Posterior Sampling with Multiple Guidance,\u201d in Proc. International Conference on Acoustics, Speech, and Signal Processing (<mark style=\"background-color:rgba(0, 0, 0, 0)\" class=\"has-inline-color has-pale-pink-color\">ICASSP<\/mark>), pp. 596\u2013600, 2024 [<a href=\"https:\/\/ieeexplore.ieee.org\/document\/10446423\" target=\"_blank\" rel=\"noreferrer noopener\">IEEE<\/a>][<a href=\"https:\/\/arxiv.org\/abs\/2309.06934\" target=\"_blank\" rel=\"noreferrer noopener\">arXiv<\/a>][<a href=\"https:\/\/carlosholivan.github.io\/demos\/audio-restoration-2023.html\" target=\"_blank\" rel=\"noreferrer noopener\">demo<\/a>]<\/li>\n\n\n\n<li>Kazuki Shimada, Kengo Uchida, Yuichiro Koyama, Takashi Shibuya, Shusuke Takahashi, <span style=\"text-decoration: underline;\">Yuki Mitsufuji<\/span>, Tatsuya Kawahara, \u201cZero- and Few-shot Sound Event Localization and Detection,\u201d in Proc. International Conference on Acoustics, Speech, and Signal Processing (<mark style=\"background-color:rgba(0, 0, 0, 0)\" class=\"has-inline-color has-pale-pink-color\">ICASSP<\/mark>), pp. 636\u2013640, 2024 [<a href=\"https:\/\/ieeexplore.ieee.org\/document\/10448497\" target=\"_blank\" rel=\"noreferrer noopener\">IEEE<\/a>][<a href=\"https:\/\/arxiv.org\/abs\/2309.09223\" target=\"_blank\" rel=\"noreferrer noopener\">arXiv<\/a>]<\/li>\n\n\n\n<li>Frank Cwitkowitz, Kin-Wai Cheuk, Woosung Choi, Marco A. Mart\u00ednez-Ram\u00edrez, Keisuke Toyama, Wei-Hsiang Liao, <span style=\"text-decoration: underline;\">Yuki Mitsufuji<\/span>, \u201cTimbre-Trap: A Low-Resource Framework for Instrument-Agnostic Music Transcription,\u201d in Proc. International Conference on Acoustics, Speech, and Signal Processing (<mark style=\"background-color:rgba(0, 0, 0, 0)\" class=\"has-inline-color has-pale-pink-color\">ICASSP<\/mark>), pp. 1291\u20131295, 2024 [<a href=\"https:\/\/ieeexplore.ieee.org\/document\/10446141\" target=\"_blank\" rel=\"noreferrer noopener\">IEEE<\/a>][<a href=\"https:\/\/arxiv.org\/abs\/2309.15717\" target=\"_blank\" rel=\"noreferrer noopener\">arXiv<\/a>][<a href=\"https:\/\/github.com\/sony\/timbre-trap\" target=\"_blank\" rel=\"noreferrer noopener\">code<\/a>][<a href=\"https:\/\/sony.github.io\/timbre-trap\/\" target=\"_blank\" rel=\"noreferrer noopener\">demo<\/a>]<\/li>\n\n\n\n<li>Takashi Shibuya, Yuhta Takida, <span style=\"text-decoration: underline;\">Yuki Mitsufuji<\/span>, \u201cBigVSAN: Enhancing GAN-based Neural Vocoders with Slicing Adversarial Network,\u201d in Proc. at International Conference on Acoustics, Speech, and Signal Processing (<mark style=\"background-color:rgba(0, 0, 0, 0)\" class=\"has-inline-color has-pale-pink-color\">ICASSP<\/mark>), pp. 10121\u201310125, 2024 [<a href=\"https:\/\/ieeexplore.ieee.org\/document\/10446121\" target=\"_blank\" rel=\"noreferrer noopener\">IEEE<\/a>][<a href=\"https:\/\/arxiv.org\/abs\/2309.02836\" target=\"_blank\" rel=\"noreferrer noopener\">arXiv<\/a>][<a href=\"https:\/\/takashishibuyasony.github.io\/bigvsan\/\" target=\"_blank\" rel=\"noreferrer noopener\">demo<\/a>][<a href=\"https:\/\/github.com\/sony\/bigvsan\" target=\"_blank\" rel=\"noreferrer noopener\">code<\/a>]<\/li>\n\n\n\n<li>Hao Shi, Kazuki Shimada, Masato Hirano, Takashi Shibuya, Yuichiro Koyama, Zhi Zhong, Shusuke Takahashi, Tatsuya Kawahara, <span style=\"text-decoration: underline;\">Yuki Mitsufuji<\/span>, \u201cDiffusion-Based Speech Enhancement with Joint Generative and Predictive Decoders,\u201d in Proc. International Conference on Acoustics, Speech, and Signal Processing (<mark style=\"background-color:rgba(0, 0, 0, 0)\" class=\"has-inline-color has-pale-pink-color\">ICASSP<\/mark>), pp. 12951\u201312955, 2024 [<a href=\"https:\/\/ieeexplore.ieee.org\/document\/10448429\" target=\"_blank\" rel=\"noreferrer noopener\">IEEE<\/a>][<a href=\"https:\/\/arxiv.org\/abs\/2305.10734\" target=\"_blank\" rel=\"noreferrer noopener\">arXiv<\/a>]<\/li>\n\n\n\n<li>Eleonora Grassucci, <span style=\"text-decoration: underline;\">Yuki Mitsufuji<\/span>, Ping Zhang, Danilo Comminiello, \u201cEnhancing Semantic Communication with Deep Generative Models \u2013 An ICASSP Special Session Overview,\u201d in Proc. International Conference on Acoustics, Speech, and Signal Processing (<mark style=\"background-color:rgba(0, 0, 0, 0)\" class=\"has-inline-color has-pale-pink-color\">ICASSP<\/mark>), pp. 13021\u201313025, 2024 [<a href=\"https:\/\/ieeexplore.ieee.org\/document\/10448235\" target=\"_blank\" rel=\"noreferrer noopener\">IEEE<\/a>][<a href=\"https:\/\/arxiv.org\/abs\/2309.02478\" target=\"_blank\" rel=\"noreferrer noopener\">arXiv<\/a>]<\/li>\n\n\n\n<li>Kazuki Shimada, Archontis Politis, Parthasaarathy Sudarsanam, Daniel Krause, Kengo Uchida, Sharath Adavanne, Aapo Hakala, Yuichiro Koyama, Naoya Takahashi, Shusuke Takahashi, Tuomas Virtanen, <span style=\"text-decoration: underline;\">Yuki Mitsufuji<\/span>, \u201cSTARSS23: An Audio-Visual Dataset of Spatial Recordings of Real Scenes with Spatiotemporal Annotations of Sound Events,\u201d in Proc. Neural Information Processing Systems (<mark style=\"background-color:rgba(0, 0, 0, 0)\" class=\"has-inline-color has-pale-pink-color\">NeurIPS<\/mark>), pp. 72931\u201372957, 2023 [<a href=\"https:\/\/proceedings.neurips.cc\/paper_files\/paper\/2023\/hash\/e6c9671ed3b3106b71cafda3ba225c1a-Abstract-Datasets_and_Benchmarks.html\" target=\"_blank\" rel=\"noreferrer noopener\">NeurIPS<\/a>][<a href=\"https:\/\/openreview.net\/forum?id=OzcPJz7rgg\" target=\"_blank\" rel=\"noreferrer noopener\">OpenReview<\/a>][<a href=\"https:\/\/arxiv.org\/abs\/2306.09126\" target=\"_blank\" rel=\"noreferrer noopener\">arXiv<\/a>][<a href=\"https:\/\/github.com\/sony\/audio-visual-seld-dcase2023\" target=\"_blank\" rel=\"noreferrer noopener\">code<\/a>][<a href=\"https:\/\/zenodo.org\/record\/7880637\" target=\"_blank\" rel=\"noreferrer noopener\">dataset<\/a>][<a href=\"https:\/\/www.youtube.com\/watch?v=ZtL-8wBYPow\" target=\"_blank\" rel=\"noreferrer noopener\">demo<\/a>]<\/li>\n\n\n\n<li>Zhi Zhong, Hao Shi, Masato Hirano, Kazuki Shimada, Kazuya Tateishi, Takashi Shibuya, Shusuke Takahashi, <span style=\"text-decoration: underline;\">Yuki Mitsufuji<\/span>, \u201cExtending Audio Masked Autoencoders Toward Audio Restoration,\u201d in Proc. IEEE Workshop on Applications of Signal Processing to Audio and Acoustics (<mark style=\"background-color:rgba(0, 0, 0, 0)\" class=\"has-inline-color has-pale-pink-color\">WASPAA<\/mark>), pp. 1\u20135, 2023 [<a href=\"https:\/\/ieeexplore.ieee.org\/document\/10248171\" target=\"_blank\" rel=\"noreferrer noopener\">IEEE<\/a>][<a href=\"https:\/\/arxiv.org\/abs\/2305.06701\" target=\"_blank\" rel=\"noreferrer noopener\">arXiv<\/a>][<a href=\"https:\/\/zzaudio.github.io\/Demo_Extend_AudioMAE_toward_Restoration\/demo_page.html\" target=\"_blank\" rel=\"noreferrer noopener\">demo<\/a>][<a href=\"https:\/\/dblp.org\/rec\/conf\/waspaa\/ZhongSHSTSTM23.html?view=bibtex\" target=\"_blank\" rel=\"noreferrer noopener\">bibtex<\/a>]<\/li>\n\n\n\n<li>Keisuke Toyama, Taketo Akama, Yukara Ikemiya, Yuhta Takida, WeiHsiang Liao, <span style=\"text-decoration: underline;\">Yuki Mitsufuji<\/span>, \u201cAutomatic Piano Transcription with Hierarchical Frequency-Time Transformer,\u201d in Proc. International Society for Music Information Retrieval (<mark><mark style=\"background-color:rgba(0, 0, 0, 0)\" class=\"has-inline-color has-pale-pink-color\">ISMIR<\/mark><\/mark>) Conference, pp. 215\u2013222, 2023 [<a href=\"https:\/\/ismir2023program.ismir.net\/poster_72.html\" target=\"_blank\" rel=\"noreferrer noopener\">ISMIR<\/a>][<a href=\"https:\/\/arxiv.org\/abs\/2307.04305\" target=\"_blank\" rel=\"noreferrer noopener\">arXiv<\/a>][<a href=\"https:\/\/github.com\/sony\/hFT-Transformer\" target=\"_blank\" rel=\"noreferrer noopener\">code<\/a>]<\/li>\n\n\n\n<li>Ryosuke Sawata, Naoki Murata, Yuhta Takida, Toshimitsu Uesaka, Takashi Shibuya, Shusuke Takahashi, <span style=\"text-decoration: underline;\">Yuki Mitsufuji<\/span>, \u201cDiffiner: A Versatile Diffusion-based Generative Refiner for Speech Enhancement,\u201d in Proc. Annual Conference of the International Speech Communication Association (<mark><mark style=\"background-color:rgba(0, 0, 0, 0)\" class=\"has-inline-color has-pale-pink-color\">INTERSPEECH<\/mark><\/mark>), pp. 3824\u20133828, 2023 [<a rel=\"noreferrer noopener\" href=\"https:\/\/www.isca-speech.org\/archive\/interspeech_2023\/sawata23_interspeech.html\" target=\"_blank\">ISCA<\/a>][<a rel=\"noreferrer noopener\" href=\"https:\/\/arxiv.org\/abs\/2210.17287\" target=\"_blank\">arXiv<\/a>][<a rel=\"noreferrer noopener\" href=\"https:\/\/github.com\/sony\/diffiner\" target=\"_blank\">code<\/a>]<\/li>\n\n\n\n<li>Silin Gao, Beatriz Borges, Soyoung Oh, Deniz Bayazit, Saya Kanno, Hiromi Wakaki, <span style=\"text-decoration: underline;\">Yuki Mitsufuji<\/span>, Antoine Bosselut, \u201cPeaCoK: Persona Commonsense Knowledge for Consistent and Engaging Narratives,\u201d in Proc. the Annual Meeting of the Association for Computational Linguistics (<mark style=\"background-color:rgba(0, 0, 0, 0)\" class=\"has-inline-color has-pale-pink-color\">ACL<\/mark>), pp. 6569\u20136591, 2023 [<a href=\"https:\/\/aclanthology.org\/2023.acl-long.362\/\" target=\"_blank\" rel=\"noreferrer noopener\">ACL<\/a>][<a href=\"https:\/\/arxiv.org\/abs\/2305.02364\" target=\"_blank\" rel=\"noreferrer noopener\">arXiv<\/a>][<a href=\"https:\/\/github.com\/Silin159\/PeaCoK\" target=\"_blank\" rel=\"noreferrer noopener\">code<\/a>][<a href=\"https:\/\/aclanthology.org\/2023.acl-long.362.bib\" target=\"_blank\" rel=\"noreferrer noopener\">bibtex<\/a>] \u2013 <mark style=\"background-color:rgba(0, 0, 0, 0)\" class=\"has-inline-color has-pale-pink-color\">Outstanding Paper Award<\/mark> [<a href=\"https:\/\/www.yukimitsufuji.com\/wp-content\/uploads\/2023\/07\/ACL-scaled.jpg\" target=\"_blank\" rel=\"noreferrer noopener\">certificate<\/a>]<\/li>\n\n\n\n<li>Naoki Murata, Koichi Saito, Chieh-Hsin Lai, Yuhta Takida, Toshimitsu Uesaka, <span style=\"text-decoration: underline;\">Yuki Mitsufuji<\/span>, Stefano Ermon, \u201cGibbsDDRM: A Partially Collapsed Gibbs Sampler for Solving Blind Linear Inverse Problems with Denoising Diffusion Restoration,\u201d in Proc. International Conference on Machine Learning (<mark style=\"background-color:rgba(0, 0, 0, 0)\" class=\"has-inline-color has-pale-pink-color\">ICML<\/mark>), pp. 25501\u201325522, 2023 <a href=\"https:\/\/proceedings.mlr.press\/v202\/murata23a.html\" target=\"_blank\" rel=\"noreferrer noopener\">[PRML<\/a>][<a href=\"https:\/\/openreview.net\/forum?id=4weSHLFgtZ\" target=\"_blank\" rel=\"noreferrer noopener\">OpenReview<\/a>][<a href=\"https:\/\/arxiv.org\/abs\/2301.12686\" target=\"_blank\" rel=\"noreferrer noopener\">arXiv<\/a>][<a href=\"https:\/\/github.com\/sony\/GibbsDDRM\" target=\"_blank\" rel=\"noreferrer noopener\">code<\/a>][<a href=\"https:\/\/dblp.org\/rec\/conf\/icml\/MurataSLTUME23.html?view=bibtex\" target=\"_blank\" rel=\"noreferrer noopener\">bibtex<\/a>] \u2013 <mark style=\"background-color:rgba(0, 0, 0, 0)\" class=\"has-inline-color has-pale-pink-color\">Oral<\/mark><\/li>\n\n\n\n<li>Chieh-Hsin Lai, Yuhta Takida, Naoki Murata, Toshimitsu Uesaka, <span style=\"text-decoration: underline;\">Yuki Mitsufuji<\/span>, Stefano Ermon, \u201cFP-Diffusion: Improving Score-based Diffusion Models by Enforcing the Underlying Score Fokker-Planck Equation,\u201d in Proc.  International Conference on Machine Learning (<mark style=\"background-color:rgba(0, 0, 0, 0)\" class=\"has-inline-color has-pale-pink-color\">ICML<\/mark>), pp. 18365\u201318398, 2023 [<a href=\"https:\/\/proceedings.mlr.press\/v202\/lai23d.html\" target=\"_blank\" rel=\"noreferrer noopener\">PRML<\/a>][<a href=\"https:\/\/openreview.net\/forum?id=UULcrko6Hk\" target=\"_blank\" rel=\"noreferrer noopener\">OpenReview<\/a>][<a href=\"https:\/\/arxiv.org\/abs\/2210.04296\" target=\"_blank\" rel=\"noreferrer noopener\">arXiv<\/a>][<a href=\"https:\/\/github.com\/sony\/FP-diffusion\" target=\"_blank\" rel=\"noreferrer noopener\">code<\/a>][<a href=\"https:\/\/dblp.org\/rec\/conf\/icml\/LaiTMUME23.html?view=bibtex\" target=\"_blank\" rel=\"noreferrer noopener\">bibtex<\/a>]<\/li>\n\n\n\n<li>Zhi Zhong, Masato Hirano, Kazuki Shimada, Kazuya Tateishi, Shusuke Takahashi, <span style=\"text-decoration: underline;\">Yuki Mitsufuji<\/span>, \u201cAn Attention-based Approach to Hierarchical Multi-label Music Instrument Classification,\u201d in Proc. International Conference on Acoustics, Speech, and Signal Processing (<mark><mark style=\"background-color:rgba(0, 0, 0, 0)\" class=\"has-inline-color has-pale-pink-color\">ICASSP<\/mark><\/mark>), pp.1\u20135, 2023 [<a href=\"https:\/\/ieeexplore.ieee.org\/document\/10095162\" target=\"_blank\" rel=\"noreferrer noopener\">IEEE<\/a>][<a href=\"https:\/\/arxiv.org\/abs\/2302.08136\" target=\"_blank\" rel=\"noreferrer noopener\">arXiv<\/a>][<a href=\"https:\/\/dblp.org\/rec\/conf\/icassp\/ZhongHSTTM23.html?view=bibtex\" target=\"_blank\" rel=\"noreferrer noopener\">bibtex<\/a>]<\/li>\n\n\n\n<li>Koichi Saito, Naoki Murata, Toshimitsu Uesaka, Chieh-Hsin Lai, Yuhta Takida, Takao Fukui, <span style=\"text-decoration: underline;\">Yuki Mitsufuji<\/span>, \u201cUnsupervised Vocal Dereverberation with Diffusion-based Generative Models,\u201d in Proc. International Conference on Acoustics, Speech, and Signal Processing (<mark><mark style=\"background-color:rgba(0, 0, 0, 0)\" class=\"has-inline-color has-pale-pink-color\">ICASSP<\/mark><\/mark>), 2023 [<a href=\"https:\/\/ieeexplore.ieee.org\/document\/10095761\" target=\"_blank\" rel=\"noreferrer noopener\">IEEE<\/a>][<a href=\"https:\/\/arxiv.org\/abs\/2211.04124\" target=\"_blank\" rel=\"noreferrer noopener\">arXiv<\/a>][<a href=\"https:\/\/koichi-saito-sony.github.io\/unsupervised-vocal-dereverb\/audio_samples.html\" target=\"_blank\" rel=\"noreferrer noopener\">demo<\/a>][<a href=\"https:\/\/dblp.org\/rec\/conf\/icassp\/SaitoMULTFM23.html?view=bibtex\" target=\"_blank\" rel=\"noreferrer noopener\">bibtex<\/a>]<\/li>\n\n\n\n<li>Junghyun Koo, Marco A. Mart\u0131\u0301nez-Ram\u0131\u0301rez, Wei-Hsiang Liao, Stefan Uhlich, Kyogu Lee, <span style=\"text-decoration: underline;\">Yuki Mitsufuji<\/span>, \u201cMusic Mixing Style Transfer: A Contrastive Learning Approach to Disentangle Audio Effects,\u201d in Proc. International Conference on Acoustics, Speech, and Signal Processing (<mark style=\"background-color:rgba(0, 0, 0, 0)\" class=\"has-inline-color has-pale-pink-color\">ICASSP<\/mark>), 2023 [<a href=\"https:\/\/ieeexplore.ieee.org\/document\/10096458\" target=\"_blank\" rel=\"noreferrer noopener\">IEEE<\/a>][<a href=\"https:\/\/arxiv.org\/abs\/2211.02247\" target=\"_blank\" rel=\"noreferrer noopener\">arXiv<\/a>][<a href=\"https:\/\/jhtonykoo.github.io\/MixingStyleTransfer\/\" target=\"_blank\" rel=\"noreferrer noopener\">demo<\/a>][<a href=\"https:\/\/github.com\/jhtonyKoo\/e2e_music_remastering_system\" target=\"_blank\" rel=\"noreferrer noopener\">code<\/a>][<a href=\"https:\/\/dblp.org\/rec\/conf\/icassp\/KooRLULM23.html?view=bibtex\" target=\"_blank\" rel=\"noreferrer noopener\">bibtex<\/a>]<\/li>\n\n\n\n<li>Naoya Takahashi, Mayank Kumar, Singh, <span style=\"text-decoration: underline;\">Yuki Mitsufuji<\/span>, \u201cHierarchical Diffusion Models for Singing Voice Neural Vocoder,\u201d in Proc. International Conference on Acoustics, Speech, and Signal Processing (<mark style=\"background-color:rgba(0, 0, 0, 0)\" class=\"has-inline-color has-pale-pink-color\">ICASSP<\/mark>), 2023 [<a href=\"https:\/\/ieeexplore.ieee.org\/document\/10095749\" target=\"_blank\" rel=\"noreferrer noopener\">IEEE<\/a>][<a href=\"https:\/\/arxiv.org\/abs\/2210.07508\" target=\"_blank\" rel=\"noreferrer noopener\">arXiv<\/a>][<a href=\"https:\/\/t-naoya.github.io\/hdm\/\" target=\"_blank\" rel=\"noreferrer noopener\">demo<\/a>][<a href=\"https:\/\/dblp.org\/rec\/conf\/icassp\/TakahashiSM23.html?view=bibtex\" target=\"_blank\" rel=\"noreferrer noopener\">bibtex<\/a>]<\/li>\n\n\n\n<li>Kin-Wai Cheuk, Ryosuke Sawata, Toshimitsu Uesaka, Naoki Murata, Naoya Takahashi, Shusuke Takahashi, Dorien Herremans, <span style=\"text-decoration: underline;\">Yuki Mitsufuji<\/span>, \u201cDiffRoll: Diffusion-based Generative Music Transcription with Unsupervised Pretraining Capability,\u201d in Proc. International Conference on Acoustics, Speech, and Signal Processing (<mark style=\"background-color:rgba(0, 0, 0, 0)\" class=\"has-inline-color has-pale-pink-color\">ICASSP<\/mark>), 2023 [<a href=\"https:\/\/ieeexplore.ieee.org\/document\/10095935\" target=\"_blank\" rel=\"noreferrer noopener\">IEEE<\/a>][<a href=\"https:\/\/arxiv.org\/abs\/2210.05148\" target=\"_blank\" rel=\"noreferrer noopener\">arXiv<\/a>][<a href=\"https:\/\/sony.github.io\/DiffRoll\/\" target=\"_blank\" rel=\"noreferrer noopener\">demo<\/a>][<a href=\"https:\/\/github.com\/sony\/DiffRoll\" target=\"_blank\" rel=\"noreferrer noopener\">code<\/a>][<a href=\"https:\/\/dblp.org\/rec\/conf\/icassp\/CheukSUMTTHM23.html?view=bibtex\" target=\"_blank\" rel=\"noreferrer noopener\">bibtex<\/a>]<\/li>\n\n\n\n<li>Hao-Wen Dong, Naoya Takahashi, <span style=\"text-decoration: underline;\">Yuki Mitsufuji<\/span>, Julian McAuley, Taylor Berg-Kirkpatrick, \u201cCLIPSep: Learning Text-queried Sound Separation with Noisy Unlabeled Videos,\u201d in Proc. International Conference on Learning Representations (<mark><mark style=\"background-color:rgba(0, 0, 0, 0)\" class=\"has-inline-color has-pale-pink-color\">ICLR<\/mark><\/mark>), 2023 [<a href=\"https:\/\/openreview.net\/forum?id=H-T3F0dMbyj\" target=\"_blank\" rel=\"noreferrer noopener\">OpenReview<\/a>][<a href=\"https:\/\/arxiv.org\/abs\/2212.07065\" target=\"_blank\" rel=\"noreferrer noopener\">arXiv<\/a>][<a href=\"https:\/\/sony.github.io\/CLIPSep\/\" target=\"_blank\" rel=\"noreferrer noopener\">demo<\/a>][<a href=\"https:\/\/github.com\/sony\/CLIPSep\" target=\"_blank\" rel=\"noreferrer noopener\">code<\/a>][<a href=\"https:\/\/dblp.org\/rec\/conf\/iclr\/DongTMMB23.html?view=bibtex\" target=\"_blank\" rel=\"noreferrer noopener\">bibtex<\/a>]<\/li>\n\n\n\n<li>Silin Gao, Jena D. Hwang, Saya Kanno, Hiromi Wakaki, <span style=\"text-decoration: underline;\">Yuki Mitsufuji<\/span>, Antoine Bosselut, \u201cComFact: A Benchmark for Linking Contextual Commonsense Knowledge,\u201d In Findings of Conference on Empirical Methods in Natural Language Processing (<mark style=\"background-color:rgba(0, 0, 0, 0)\" class=\"has-inline-color has-pale-pink-color\">EMNLP<\/mark>), pp.1656\u20131675,&nbsp;2022 [<a rel=\"noreferrer noopener\" href=\"https:\/\/aclanthology.org\/2022.findings-emnlp.120\/\" target=\"_blank\">ACL<\/a>][<a rel=\"noreferrer noopener\" href=\"https:\/\/arxiv.org\/abs\/2210.12678\" target=\"_blank\">arXiv<\/a>][<a rel=\"noreferrer noopener\" href=\"https:\/\/github.com\/Silin159\/ComFact\" target=\"_blank\">code<\/a>][<a rel=\"noreferrer noopener\" href=\"https:\/\/aclanthology.org\/2022.findings-emnlp.120.bib\" target=\"_blank\">bibtex<\/a>]<\/li>\n\n\n\n<li>Marco A. Mart\u00ednez Ram\u00edrez, WeiHsiang Liao, Giorgio Fabbro, Stefan Uhlich, Chihiro Nagashima, <span style=\"text-decoration: underline;\">Yuki Mitsufuji<\/span>, \u201cAutomatic Music Mixing with Deep Learning and Out-of-Domain Data,\u201d in Proc. International Society for Music Information Retrieval (<mark><mark style=\"background-color:rgba(0, 0, 0, 0)\" class=\"has-inline-color has-pale-pink-color\">ISMIR<\/mark><\/mark>) Conference, pp.411\u2013418, 2022 [<a href=\"https:\/\/ismir2022program.ismir.net\/poster_11.html\" target=\"_blank\" rel=\"noreferrer noopener\">ISMIR<\/a>][<a href=\"https:\/\/arxiv.org\/abs\/2208.11428\" target=\"_blank\" rel=\"noreferrer noopener\">arXiv<\/a>][<a href=\"https:\/\/marco-martinez-sony.github.io\/FxNorm-automix\/\" target=\"_blank\" rel=\"noreferrer noopener\">demo<\/a>][<a href=\"https:\/\/github.com\/sony\/FxNorm-automix\" target=\"_blank\" rel=\"noreferrer noopener\">code<\/a>]<\/li>\n\n\n\n<li>Johannes Imort, Giorgio Fabbro, Marco A. Martinez Ramirez, Stefan Uhlich, Yuichiro Koyama, <span style=\"text-decoration: underline;\">Yuki Mitsufuji<\/span>, \u201cDistortion Audio Effects: Learning How to Recover the Clean Signal,\u201d in Proc. International Society for Music Information Retrieval (<mark><mark style=\"background-color:rgba(0, 0, 0, 0)\" class=\"has-inline-color has-pale-pink-color\">ISMIR<\/mark><\/mark>) Conference, pp.218\u2013225, 2022 [<a href=\"https:\/\/ismir2022program.ismir.net\/poster_113.html\" target=\"_blank\" rel=\"noreferrer noopener\">ISMIR<\/a>][<a href=\"https:\/\/arxiv.org\/abs\/2202.01664\" target=\"_blank\" rel=\"noreferrer noopener\">arXiv<\/a>][<a href=\"https:\/\/joimort.github.io\/distortionremoval\/\" target=\"_blank\" rel=\"noreferrer noopener\">demo<\/a>]<\/li>\n\n\n\n<li>Yuhta Takida, Takashi Shibuya, WeiHsiang Liao, Chieh-Hsin Lai, Junki Ohmura, Toshimitsu Uesaka, Naoki Murata, Shusuke Takahashi, Toshiyuki Kumakura, <span style=\"text-decoration: underline;\">Yuki Mitsufuji<\/span>, \u201cSQ-VAE: Variational Bayes on Discrete Representation with Self-annealed Stochastic Quantization,\u201d in Proc. International Conference on Machine Learning (<mark style=\"background-color:rgba(0, 0, 0, 0)\" class=\"has-inline-color has-pale-pink-color\">ICML<\/mark>), pp.20987\u201321012, 2022 [<a rel=\"noreferrer noopener\" href=\"https:\/\/proceedings.mlr.press\/v162\/takida22a.html\" target=\"_blank\">PMLR<\/a>][<a rel=\"noreferrer noopener\" href=\"https:\/\/arxiv.org\/abs\/2205.07547\" target=\"_blank\">arXiv<\/a>][<a rel=\"noreferrer noopener\" href=\"https:\/\/github.com\/sony\/sqvae\" target=\"_blank\">code<\/a>][<a rel=\"noreferrer noopener\" href=\"https:\/\/dblp.org\/rec\/conf\/icml\/Takida0LLOUMTKM22.html?view=bibtex\" target=\"_blank\">bibtex<\/a>]<\/li>\n\n\n\n<li>Kazuki Shimada, Yuichiro Koyama, Shusuke Takahashi, Naoya Takahashi, Emiru Tsunoo, <span style=\"text-decoration: underline;\">Yuki Mitsufuji<\/span>, \u201cMulti-ACCDOA: Localizing and Detecting Overlapping Sounds from the Same Class with Auxiliary Duplicating Permutation Invariant Training,\u201d  in Proc. International Conference on Acoustics, Speech, and Signal Processing (<mark style=\"background-color:rgba(0, 0, 0, 0)\" class=\"has-inline-color has-pale-pink-color\">ICASSP<\/mark>), pp.&nbsp;316\u2013320, 2022 [<a rel=\"noreferrer noopener\" href=\"https:\/\/ieeexplore.ieee.org\/document\/9746384\" target=\"_blank\">IEEE<\/a>][<a rel=\"noreferrer noopener\" href=\"https:\/\/arxiv.org\/abs\/2110.07124\" target=\"_blank\">arXiv<\/a>][<a rel=\"noreferrer noopener\" href=\"https:\/\/dblp.org\/rec\/conf\/icassp\/ShimadaKTTTM22.html?view=bibtex\" target=\"_blank\">bibtex<\/a>]<\/li>\n\n\n\n<li>Bo-Yu Chen, Wei-Han Hsu, Wei-Hsiang Liao, Marco A. Mart\u00ednez Ram\u00edrez, <span style=\"text-decoration: underline;\">Yuki Mitsufuji<\/span>, Yi-Hsuan Yang, \u201cAutomatic DJ Transitions with Differentiable Audio Effects and Generative Adversarial Networks,\u201d in Proc. International Conference on Acoustics, Speech, and Signal Processing (<mark style=\"background-color:rgba(0, 0, 0, 0)\" class=\"has-inline-color has-pale-pink-color\">ICASSP<\/mark>), pp.&nbsp;466\u2013470, 2022 [<a rel=\"noreferrer noopener\" href=\"https:\/\/ieeexplore.ieee.org\/document\/9746663\" target=\"_blank\">IEEE<\/a>][<a rel=\"noreferrer noopener\" href=\"https:\/\/arxiv.org\/abs\/2110.06525\" target=\"_blank\">arXiv<\/a>][<a href=\"https:\/\/paulyuchen.com\/djtransgan-icassp2022\/\">demo<\/a>][<a rel=\"noreferrer noopener\" href=\"https:\/\/github.com\/ChenPaulYu\/DJtransGAN\" target=\"_blank\">code<\/a>][<a rel=\"noreferrer noopener\" href=\"https:\/\/dblp.org\/rec\/conf\/icassp\/ChenHLRMY22.html?view=bibtex\" target=\"_blank\">bibtex<\/a>]<\/li>\n\n\n\n<li>Yuichiro Koyama, Kazuhide Shigemi, Masafumi Takahashi, Kazuki Shimada, Naoya Takahashi, Emiru Tsunoo, Shusuke Takahashi, <span style=\"text-decoration: underline;\">Yuki Mitsufuji<\/span>, <span style=\"color: initial;\">\u201c<\/span>Spatial Data Augmentation with Simulated Room Impulse Responses for Sound Event Localization and Detection,<span style=\"color: initial;\">\u201d<\/span> in Proc. International Conference on Acoustics, Speech, and Signal Processing (<mark style=\"background-color:rgba(0, 0, 0, 0)\" class=\"has-inline-color has-pale-pink-color\">ICASSP<\/mark>), pp.&nbsp;8872\u20138876, 2022 [<a rel=\"noreferrer noopener\" href=\"https:\/\/ieeexplore.ieee.org\/document\/9746754\" target=\"_blank\">IEEE<\/a>][<a rel=\"noreferrer noopener\" href=\"https:\/\/arxiv.org\/abs\/2110.06501\" target=\"_blank\">arXiv<\/a>][<a rel=\"noreferrer noopener\" href=\"https:\/\/dblp.org\/rec\/conf\/icassp\/KoyamaSTSTTTM22.html?view=bibtex\" target=\"_blank\">bibtex<\/a>]<\/li>\n\n\n\n<li>Yuichiro Koyama, Naoki Murata, Stefan Uhlich, Giorgio Fabbro, Shusuke Takahashi, <span style=\"text-decoration: underline;\">Yuki Mitsufuji<\/span>, <span style=\"color: initial;\">\u201c<\/span>Music Source Separation with Deep Equilibrium Models,\u201d in Proc. International Conference on Acoustics, Speech, and Signal Processing (<mark style=\"background-color:rgba(0, 0, 0, 0)\" class=\"has-inline-color has-pale-pink-color\">ICASSP<\/mark>), pp.&nbsp;296\u2013300, 2022 [<a rel=\"noreferrer noopener\" href=\"https:\/\/ieeexplore.ieee.org\/document\/9746317\" target=\"_blank\">IEEE<\/a>][<a rel=\"noreferrer noopener\" href=\"https:\/\/arxiv.org\/abs\/2110.06494\" target=\"_blank\">arXiv<\/a>]<a rel=\"noreferrer noopener\" href=\"https:\/\/arxiv.org\/abs\/2110.06494\" target=\"_blank\">[<\/a><a rel=\"noreferrer noopener\" href=\"https:\/\/dblp.org\/rec\/conf\/icassp\/KoyamaMUFTM22.html?view=bibtex\" target=\"_blank\">bibtex<\/a>]<\/li>\n\n\n\n<li>Ricardo Falcon-Perez, Kazuki Shimada, Yuichiro Koyama, Shusuke Takahashi, <span style=\"text-decoration: underline;\">Yuki Mitsufuji<\/span>, <span style=\"color: initial;\">\u201c<\/span>Spatial Mixup: Directional Loudness Modification as Data Augmentation for Sound Event Localization and Detection,\u201d in Proc. International Conference on Acoustics, Speech, and Signal Processing (<mark style=\"background-color:rgba(0, 0, 0, 0)\" class=\"has-inline-color has-pale-pink-color\">ICASSP<\/mark>), pp.&nbsp;431\u2013435, 2022 [<a rel=\"noreferrer noopener\" href=\"https:\/\/ieeexplore.ieee.org\/document\/9747312\" target=\"_blank\">IEEE<\/a>][<a rel=\"noreferrer noopener\" href=\"https:\/\/arxiv.org\/abs\/2110.06126\" target=\"_blank\">arXiv<\/a>][<a rel=\"noreferrer noopener\" href=\"https:\/\/github.com\/rfalcon100\/Spatial-Mixup-Pytorch\" target=\"_blank\">code<\/a>][<a rel=\"noreferrer noopener\" href=\"https:\/\/dblp.org\/rec\/conf\/icassp\/PerezSKTM22.html?view=bibtex\" target=\"_blank\">bibtex<\/a>]<\/li>\n\n\n\n<li>Naoya Takahashi, <span style=\"text-decoration: underline;\">Yuki Mitsufuji<\/span>, <span style=\"color: initial;\">\u201c<\/span>Amicable Examples for Informed Source Separation,\u201d in Proc. International Conference on Acoustics, Speech, and Signal Processing (<mark style=\"background-color:rgba(0, 0, 0, 0)\" class=\"has-inline-color has-pale-pink-color\">ICASSP<\/mark>), pp. 241\u2013245, 2022 [<a rel=\"noreferrer noopener\" href=\"https:\/\/ieeexplore.ieee.org\/document\/9746486\" target=\"_blank\">IEEE<\/a>][<a rel=\"noreferrer noopener\" href=\"https:\/\/arxiv.org\/abs\/2110.05059\" target=\"_blank\">arXiv<\/a>][<a rel=\"noreferrer noopener\" href=\"https:\/\/dblp.org\/rec\/conf\/icassp\/TakahashiM22.html?view=bibtex\" target=\"_blank\">bibtex<\/a>]<\/li>\n\n\n\n<li>Naoya Takahashi, Mayank Kumar Singh, <span style=\"text-decoration: underline;\">Yuki Mitsufuji<\/span>, <span style=\"color: initial;\">\u201c<\/span>Source Mixing and Separation Robust Audio Steganography,\u201d in Proc. International Conference on Acoustics, Speech, and Signal Processing (<mark style=\"background-color:rgba(0, 0, 0, 0)\" class=\"has-inline-color has-pale-pink-color\">ICASSP<\/mark>), pp. 4368\u20134372, 2022 [<a rel=\"noreferrer noopener\" href=\"https:\/\/arxiv.org\/abs\/2110.05054\" target=\"_blank\">arXiv<\/a>]<\/li>\n\n\n\n<li>Yasuhide Hyodo, Chihiro Sugai, Junya Suzuki, Masafumi Takahashi, Masahiko Koizumi, Asako Tomura, <span style=\"text-decoration: underline;\">Yuki Mitsufuji<\/span>, Yota Komoriya<span style=\"color: initial;\">, <\/span>\u201cPsychophysiological Effect of Immersive Spatial Audio Experience Enhanced Using Sound Field Synthesis<span style=\"color: initial;\">,\u201d <\/span>in Proc. International Conference on Affective Computing &amp; Intelligent Interaction (<mark style=\"background-color:rgba(0, 0, 0, 0)\" class=\"has-inline-color has-pale-pink-color\">ACII<\/mark>), pp.&nbsp;1\u20138, 2021 [<a rel=\"noreferrer noopener\" href=\"https:\/\/ieeexplore.ieee.org\/abstract\/document\/9597435\" target=\"_blank\">IEEE<\/a>][<a rel=\"noreferrer noopener\" href=\"https:\/\/dblp.org\/rec\/conf\/acii\/HyodoSSTKTMK21.html?view=bibtex\" target=\"_blank\">bibtex<\/a>]<\/li>\n\n\n\n<li>Naoya Takahashi, Kumar Singh Singh, <span style=\"text-decoration: underline;\">Yuki Mitsufuji<\/span>, \u201cHierarchical Disentangled Representation Learning for Singing Voice Conversion,\u201d International Joint Conference on Neural Networks (<mark style=\"background-color:rgba(0, 0, 0, 0)\" class=\"has-inline-color has-pale-pink-color\">IJCNN<\/mark>), pp. 1\u20137, 2021 [<a rel=\"noreferrer noopener\" href=\"https:\/\/ieeexplore.ieee.org\/document\/9533583\" target=\"_blank\">IEEE<\/a>][<a rel=\"noreferrer noopener\" href=\"https:\/\/arxiv.org\/abs\/2101.06842\" target=\"_blank\">arXiv<\/a>][<a rel=\"noreferrer noopener\" href=\"https:\/\/dblp.org\/rec\/conf\/ijcnn\/TakahashiSM21.html?view=bibtex\" target=\"_blank\">bibtex<\/a>]<\/li>\n\n\n\n<li>Naoya Takahashi, <span style=\"text-decoration: underline;\">Yuki Mitsufuji<\/span>, \u201cDensely Connected Multi-Dilated Convolutional Networks for Dense Prediction Tasks,\u201d in Proc. IEEE\/CVF Conference on Computer Vision and Pattern Recognition (<mark style=\"background-color:rgba(0, 0, 0, 0)\" class=\"has-inline-color has-pale-pink-color\">CVPR<\/mark>), pp.&nbsp;993\u20131002, 2021 [<a rel=\"noreferrer noopener\" href=\"https:\/\/openaccess.thecvf.com\/content\/CVPR2021\/html\/Takahashi_Densely_Connected_Multi-Dilated_Convolutional_Networks_for_Dense_Prediction_Tasks_CVPR_2021_paper.html\" target=\"_blank\">CVF<\/a>][<a rel=\"noreferrer noopener\" href=\"https:\/\/ieeexplore.ieee.org\/document\/9578294\" target=\"_blank\">IEEE<\/a>][<a rel=\"noreferrer noopener\" href=\"https:\/\/arxiv.org\/abs\/2011.11844\" target=\"_blank\">arXiv<\/a>][<a rel=\"noreferrer noopener\" href=\"https:\/\/github.com\/sony\/ai-research-code\/tree\/master\/d3net\" target=\"_blank\">code<\/a>][<a rel=\"noreferrer noopener\" href=\"https:\/\/dblp.org\/rec\/conf\/cvpr\/TakahashiM21.html?view=bibtex\" target=\"_blank\">bibtex<\/a>]<\/li>\n\n\n\n<li>Kazuki Shimada, Yuichiro Koyama, Naoya Takahashi, Shusuke Takahashi, <span style=\"text-decoration: underline;\">Yuki Mitsufuji<\/span>, \u201cACCDOA: Activity-Coupled Cartesian Direction of Arrival Representation for Sound Event Localization And Detection,\u201d in Proc. International Conference on Acoustics, Speech, and Signal Processing (<mark style=\"background-color:rgba(0, 0, 0, 0)\" class=\"has-inline-color has-pale-pink-color\">ICASSP<\/mark>), pp. 915\u2013919, 2021 [<a rel=\"noreferrer noopener\" href=\"https:\/\/ieeexplore.ieee.org\/document\/9413609\" target=\"_blank\">IEEE<\/a>][<a rel=\"noreferrer noopener\" href=\"https:\/\/arxiv.org\/abs\/2010.15306\" target=\"_blank\">arXiv<\/a>][<a rel=\"noreferrer noopener\" href=\"https:\/\/github.com\/sharathadavanne\/seld-dcase2021\" target=\"_blank\">code<\/a>][<a rel=\"noreferrer noopener\" href=\"https:\/\/dblp.org\/rec\/conf\/icassp\/ShimadaKTTM21.html?view=bibtex\" target=\"_blank\">bibtex<\/a>]<\/li>\n\n\n\n<li>Naoya Takahashi, Shota Inoue, <span style=\"text-decoration: underline;\">Yuki Mitsufuji<\/span>, \u201cAdversarial Attacks on Audio Source Separation,\u201d in Proc. International Conference on Acoustics, Speech, and Signal Processing (<mark style=\"background-color:rgba(0, 0, 0, 0)\" class=\"has-inline-color has-pale-pink-color\">ICASSP<\/mark>), pp. 521\u2013525, 2021 [<a rel=\"noreferrer noopener\" href=\"https:\/\/ieeexplore.ieee.org\/document\/9414844\" target=\"_blank\">IEEE<\/a>][<a rel=\"noreferrer noopener\" href=\"https:\/\/arxiv.org\/abs\/2010.03164\" target=\"_blank\">arXiv<\/a>][<a rel=\"noreferrer noopener\" href=\"https:\/\/dblp.org\/rec\/conf\/icassp\/TakahashiIM21.html?view=bibtex\" target=\"_blank\">bibtex<\/a>]<\/li>\n\n\n\n<li>Ryosuke Sawata, Stefan Uhlich, Shusuke Takahashi, <span style=\"text-decoration: underline;\">Yuki Mitsufuji<\/span>, \u201cAll for One and One for All: Improving Music Separation by Bridging Networks,\u201d in Proc. International Conference on Acoustics, Speech, and Signal Processing (<mark style=\"background-color:rgba(0, 0, 0, 0)\" class=\"has-inline-color has-pale-pink-color\">ICASSP<\/mark>), pp. 51\u201355, 2021 [<a rel=\"noreferrer noopener\" href=\"https:\/\/ieeexplore.ieee.org\/document\/9414044\" target=\"_blank\">IEEE<\/a>][<a rel=\"noreferrer noopener\" href=\"https:\/\/arxiv.org\/abs\/2010.04228\" target=\"_blank\">arXiv<\/a>][<a rel=\"noreferrer noopener\" href=\"https:\/\/github.com\/asteroid-team\/asteroid\/tree\/master\/egs\/musdb18\/X-UMX\" target=\"_blank\">code<\/a>][<a rel=\"noreferrer noopener\" href=\"https:\/\/dblp.org\/rec\/conf\/icassp\/SawataUTM21.html?view=bibtex\" target=\"_blank\">bibtex<\/a>]<\/li>\n\n\n\n<li>Yu Maeno, Yuhta Takida, Naoki Murata, <span style=\"text-decoration: underline;\">Yuki Mitsufuji<\/span>, \u201cArray-Geometry-Aware Spatial Active Noise Control Based on Direction-of-Arrival Weighting,\u201d in Proc. International Conference on Acoustics, Speech, and Signal Processing (<mark style=\"background-color:rgba(0, 0, 0, 0)\" class=\"has-inline-color has-pale-pink-color\">ICASSP<\/mark>), pp. 8414\u20138418, 2020 [<a rel=\"noreferrer noopener\" href=\"https:\/\/ieeexplore.ieee.org\/document\/9054013\" target=\"_blank\">IEEE<\/a>][<a rel=\"noreferrer noopener\" href=\"https:\/\/dblp.org\/rec\/conf\/icassp\/MaenoTMM20.html?view=bibtex\" target=\"_blank\">bibtex<\/a>]<\/li>\n\n\n\n<li>Naoya Takahashi, Mayank Kumar Singh, Sakya Basak, Parthasaarathy Sudarsanam, Sriram Ganapathy, <span style=\"text-decoration: underline;\">Yuki Mitsufuji<\/span>, \u201cImproving Voice Separation by Incorporating End-To-End Speech Recognition,\u201d in Proc. International Conference on Acoustics, Speech, and Signal Processing (<mark style=\"background-color:rgba(0, 0, 0, 0)\" class=\"has-inline-color has-pale-pink-color\">ICASSP<\/mark>), pp. 41\u201345, 2020 [<a rel=\"noreferrer noopener\" href=\"https:\/\/ieeexplore.ieee.org\/document\/9053845\" target=\"_blank\">IEEE<\/a>][<a rel=\"noreferrer noopener\" href=\"https:\/\/arxiv.org\/abs\/1911.12928\" target=\"_blank\">arXiv<\/a>][<a rel=\"noreferrer noopener\" href=\"https:\/\/dblp.org\/rec\/conf\/icassp\/TakahashiSBPGM20.html?view=bibtex\" target=\"_blank\">bibtex<\/a>]<\/li>\n\n\n\n<li>Naoki Murata, Jihui Zhang, Yu Maeno, <span style=\"text-decoration: underline;\">Yuki Mitsufuji<\/span>, \u201cGlobal and Local Mode Domain Adaptive Algorithms for Spatial Active Noise Control Using Higher-Order Sources,\u201d in Proc. International Conference on Acoustics, Speech, and Signal Processing (<mark style=\"background-color:rgba(0, 0, 0, 0)\" class=\"has-inline-color has-pale-pink-color\">ICASSP<\/mark>), pp. 526\u2013530, 2019 [<a rel=\"noreferrer noopener\" href=\"https:\/\/ieeexplore.ieee.org\/document\/8682933\" target=\"_blank\">IEEE<\/a>][<a rel=\"noreferrer noopener\" href=\"https:\/\/dblp.org\/rec\/conf\/icassp\/MurataZMM19.html?view=bibtex\" target=\"_blank\">bibtex<\/a>]<\/li>\n\n\n\n<li>Naoya Takahashi, Sudarsanam Parthasaarathy, Nabarun Goswami, <span style=\"text-decoration: underline;\">Yuki Mitsufuji<\/span>, \u201cRecursive Speech Separation for Unknown Number of Speakers,\u201d in Proc. Annual Conference of the International Speech Communication Association (<mark style=\"background-color:rgba(0, 0, 0, 0)\" class=\"has-inline-color has-pale-pink-color\">INTERSPEECH<\/mark>), pp. 1348\u20131352, 2019 [<a rel=\"noreferrer noopener\" href=\"https:\/\/www.isca-speech.org\/archive\/interspeech_2019\/takahashi19_interspeech.html\" target=\"_blank\">ISCA<\/a>][<a rel=\"noreferrer noopener\" href=\"https:\/\/arxiv.org\/abs\/1904.03065\" target=\"_blank\">arXiv<\/a>][<a rel=\"noreferrer noopener\" href=\"https:\/\/dblp.org\/rec\/conf\/interspeech\/TakahashiPGM19.html?view=bibtex\" target=\"_blank\">bibtex<\/a>]<\/li>\n\n\n\n<li>Naoya Takahashi, Purvi Agrawal, Nabarun Goswami, <span style=\"text-decoration: underline;\">Yuki Mitsufuji<\/span>, \u201cPhaseNet: Discretized Phase Modeling with Deep Neural Networks for Audio Source Separation,\u201d in Proc. Annual Conference of the International Speech Communication Association (<mark style=\"background-color:rgba(0, 0, 0, 0)\" class=\"has-inline-color has-pale-pink-color\">INTERSPEECH<\/mark>), pp. 2713\u20132717, 2018 <span style=\"color: initial;\">[<\/span><a rel=\"noreferrer noopener\" href=\"https:\/\/www.isca-speech.org\/archive\/interspeech_2018\/takahashi18_interspeech.html\" target=\"_blank\">ISCA<\/a><span style=\"color: initial;\">]<\/span>[<a rel=\"noreferrer noopener\" href=\"https:\/\/dblp.org\/rec\/conf\/interspeech\/TakahashiAGM18.html?view=bibtex\" target=\"_blank\">bibtex<\/a>]<\/li>\n\n\n\n<li>Wei-Hsiang Liao, <span style=\"text-decoration: underline;\">Yuki Mitsufuji<\/span>, Keiichi Osako, Kazunobu Ohkuri, \u201cMicrophone Array Geometry for Two Dimensional Broadband Sound Field Recording,\u201d in Proc. 145th Audio Engineering Society (<mark style=\"background-color:rgba(0, 0, 0, 0)\" class=\"has-inline-color has-pale-pink-color\">AES<\/mark>) Convention, 2018 [<a rel=\"noreferrer noopener\" href=\"https:\/\/www.aes.org\/e-lib\/browse.cfm?elib=19808\" target=\"_blank\">AES<\/a>][<a rel=\"noreferrer noopener\" href=\"http:\/\/www.aes.org\/e-lib\/browse.cfm?elib=19808&amp;fmt=bibtex\" target=\"_blank\">bibtex<\/a>]<\/li>\n\n\n\n<li>Yu Maeno, <span style=\"text-decoration: underline;\">Yuki Mitsufuji<\/span>, Prasanga N. Samarasinghe, Thushara D. Abhayapala, \u201cMode-domain Spatial Active Noise Control Using Multiple Circular Arrays,\u201d in Proc. International Workshop on Acoustic Signal Enhancement (<mark style=\"background-color:rgba(0, 0, 0, 0)\" class=\"has-inline-color has-pale-pink-color\">IWAENC<\/mark>), pp. 441\u2013445, 2018 [<a rel=\"noreferrer noopener\" href=\"https:\/\/ieeexplore.ieee.org\/document\/8521386\" target=\"_blank\">IEEE<\/a>][<a rel=\"noreferrer noopener\" href=\"https:\/\/dblp.org\/rec\/conf\/iwaenc\/MaenoMSA18.html?view=bibtex\" target=\"_blank\">bibtex<\/a>]<\/li>\n\n\n\n<li>Naoya Takahashi, Nabarun Goswami, <span style=\"text-decoration: underline;\">Yuki Mitsufuji<\/span>, \u201cMMDenseLSTM: An Efficient Combination of Convolutional and Recurrent Neural Networks for Audio Source Separation,\u201d in Proc. International Workshop on Acoustic Signal Enhancement (<mark style=\"background-color:rgba(0, 0, 0, 0)\" class=\"has-inline-color has-pale-pink-color\">IWAENC<\/mark>), 2018 [<a rel=\"noreferrer noopener\" href=\"https:\/\/ieeexplore.ieee.org\/document\/8521383\" target=\"_blank\">IEEE<\/a>][<a rel=\"noreferrer noopener\" href=\"https:\/\/arxiv.org\/abs\/1805.02410\" target=\"_blank\">arXiv<\/a>][<a rel=\"noreferrer noopener\" href=\"https:\/\/dblp.org\/rec\/conf\/iwaenc\/TakahashiGM18.html?view=bibtex\" target=\"_blank\">bibtex<\/a>]<\/li>\n\n\n\n<li><span style=\"text-decoration: underline;\">Yuki Mitsufuji<\/span>, Asako Tomura, Kazunobu Ohkuri, \u201cCreating a Highly-Realistic \u201cAcoustic Vessel Odyssey\u201d Using Sound field Synthesis with 576 Loudspeakers,\u201d in Proc. Audio Engineering Society (<mark style=\"background-color:rgba(0, 0, 0, 0)\" class=\"has-inline-color has-pale-pink-color\">AES<\/mark>) Conference on Spatial Reproduction-Aesthetics and Science, 2018 [<a href=\"https:\/\/www.aes.org\/e-lib\/browse.cfm?elib=19648\" target=\"_blank\" rel=\"noreferrer noopener\">AES<\/a>][<a href=\"http:\/\/www.aes.org\/e-lib\/browse.cfm?elib=19648&amp;fmt=bibtex\" target=\"_blank\" rel=\"noreferrer noopener\">bibtex<\/a>]<\/li>\n\n\n\n<li>Yu Maeno, <span style=\"text-decoration: underline;\">Yuki Mitsufuji<\/span>, Thushara D. Abhayapala, \u201cMode Domain Spatial Active Noise Control Using Sparse Signal Representation,\u201d in Proc. International Conference on Acoustics, Speech, and Signal Processing (<mark style=\"background-color:rgba(0, 0, 0, 0)\" class=\"has-inline-color has-pale-pink-color\">ICASSP<\/mark>), pp. 211\u2013215, 2018 [<a rel=\"noreferrer noopener\" href=\"https:\/\/ieeexplore.ieee.org\/document\/8461482\" target=\"_blank\">IEEE<\/a>][<a rel=\"noreferrer noopener\" href=\"https:\/\/arxiv.org\/abs\/1803.00187\" target=\"_blank\">arXiv<\/a>][<a rel=\"noreferrer noopener\" href=\"https:\/\/dblp.org\/rec\/conf\/icassp\/MaenoMA18.html?view=bibtex\" target=\"_blank\">bibtex<\/a>]<\/li>\n\n\n\n<li>Naoya Takahashi, <span style=\"text-decoration: underline;\">Yuki Mitsufuji<\/span>, \u201cMulti-Scale Multi-Band DenseNets for Audio Source Separation,\u201d in Proc. Workshop on Applications of Signal Processing to Audio and Acoustics (<mark style=\"background-color:rgba(0, 0, 0, 0)\" class=\"has-inline-color has-pale-pink-color\">WASPAA<\/mark>), pp. 21\u201325, 2017 [<a rel=\"noreferrer noopener\" href=\"https:\/\/ieeexplore.ieee.org\/document\/8169987\" target=\"_blank\">IEEE<\/a>][<a rel=\"noreferrer noopener\" href=\"https:\/\/arxiv.org\/abs\/1706.09588\" target=\"_blank\">arXiv<\/a>][<a rel=\"noreferrer noopener\" href=\"https:\/\/dblp.org\/rec\/conf\/waspaa\/TakahashiM17.html?view=bibtex\" target=\"_blank\">bibtex<\/a>]<\/li>\n\n\n\n<li>Stefan Uhlich, Marcello Porcu, Franck Giron, Michael Enenkl, Thomas Kemp, Naoya Takahashi, <span style=\"text-decoration: underline;\">Yuki Mitsufuji<\/span>, \u201cImproving Music Source Separation Based on Deep Neural Networks Through Data Augmentation and Network Blending,\u201d in Proc. International Conference on Acoustics, Speech, and Signal Processing (<mark style=\"background-color:rgba(0, 0, 0, 0)\" class=\"has-inline-color has-pale-pink-color\">ICASSP<\/mark>), pp. 261\u2013265, 2017 [<a rel=\"noreferrer noopener\" href=\"https:\/\/ieeexplore.ieee.org\/document\/7952158\" target=\"_blank\">IEEE<\/a>][<a rel=\"noreferrer noopener\" href=\"https:\/\/dblp.org\/rec\/conf\/icassp\/UhlichPGEKTM17.html?view=bibtex\" target=\"_blank\">bibtex<\/a>]<\/li>\n\n\n\n<li>Keiichi Osako, <span style=\"text-decoration: underline;\">Yuki Mitsufuji<\/span>, Rita Singh, Bhiksha Raj, \u201cSupervised Monaural Source Separation Based on Autoencoders,\u201d in Proc. International Conference on Acoustics, Speech, and Signal Processing (<mark style=\"background-color:rgba(0, 0, 0, 0)\" class=\"has-inline-color has-pale-pink-color\">ICASSP<\/mark>), pp. 11\u201315, 2017 [<a rel=\"noreferrer noopener\" href=\"https:\/\/ieeexplore.ieee.org\/document\/7951788\" target=\"_blank\">IEEE<\/a>][<a rel=\"noreferrer noopener\" href=\"https:\/\/dblp.org\/rec\/conf\/icassp\/OsakoMSR17.html?view=bibtex\" target=\"_blank\">bibtex<\/a>]<\/li>\n\n\n\n<li><span style=\"text-decoration: underline;\">Yuki Mitsufuji<\/span>, Shoichi Koyama, Hiroshi Saruwatari, \u201cMultichannel Blind Source Separation Based on Non-Negative Tensor Factorization in Wavenumber Domain,\u201d in Proc. International Conference on Acoustics, Speech, and Signal Processing (<mark style=\"background-color:rgba(0, 0, 0, 0)\" class=\"has-inline-color has-pale-pink-color\">ICASSP<\/mark>), pp. 56\u201360, 2016 [<a rel=\"noreferrer noopener\" href=\"https:\/\/ieeexplore.ieee.org\/document\/7471636\" target=\"_blank\">IEEE<\/a>][<a rel=\"noreferrer noopener\" href=\"https:\/\/dblp.org\/rec\/conf\/icassp\/MitsufujiKS16.html?view=bibtex\" target=\"_blank\">bibtex<\/a>]<\/li>\n\n\n\n<li>Stefan Uhlich, Franck Giron, <span style=\"text-decoration: underline;\">Yuki Mitsufuji<\/span>, \u201cDeep Neural Network Based Instrument Extraction from Music,\u201d in Proc. International Conference on Acoustics, Speech, and Signal Processing (<mark style=\"background-color:rgba(0, 0, 0, 0)\" class=\"has-inline-color has-pale-pink-color\">ICASSP<\/mark>), pp. 2135\u20132139, 2015 [<a rel=\"noreferrer noopener\" href=\"https:\/\/ieeexplore.ieee.org\/document\/7178348\" target=\"_blank\">IEEE<\/a>][<a rel=\"noreferrer noopener\" href=\"https:\/\/dblp.org\/rec\/conf\/icassp\/UhlichGM15.html?view=bibtex\" target=\"_blank\">bibtex<\/a>]<\/li>\n\n\n\n<li>Xin Guo, Stefan Uhlich, <span style=\"text-decoration: underline;\">Yuki Mitsufuji<\/span>, \u201cNMF-Based Blind Source Separation Using a Linear Predictive Coding Error Clustering Criterion,\u201d in Proc. International Conference on Acoustics, Speech, and Signal Processing (<mark style=\"background-color:rgba(0, 0, 0, 0)\" class=\"has-inline-color has-pale-pink-color\">ICASSP<\/mark>), pp. 261\u2013265, 2015 [<a rel=\"noreferrer noopener\" href=\"https:\/\/ieeexplore.ieee.org\/document\/7177972\" target=\"_blank\">IEEE<\/a>][<a rel=\"noreferrer noopener\" href=\"https:\/\/dblp.org\/rec\/conf\/icassp\/GuoUM15.html?view=bibtex\" target=\"_blank\">bibtex<\/a>]<\/li>\n\n\n\n<li><span style=\"text-decoration: underline;\">Yuki Mitsufuji<\/span>, Marco Liuni, Alex Baker, Axel R\u00f6bel, \u201cOnline Non-Negative Tensor Deconvolution for Source Detection in 3DTV Audio,\u201d in Proc. International Conference on Acoustics, Speech, and Signal Processing (<mark style=\"background-color:rgba(0, 0, 0, 0)\" class=\"has-inline-color has-pale-pink-color\">ICASSP<\/mark>), pp. 3082\u20133086, 2014 [<a rel=\"noreferrer noopener\" href=\"https:\/\/ieeexplore.ieee.org\/document\/6854167\" target=\"_blank\">IEEE<\/a>][<a rel=\"noreferrer noopener\" href=\"https:\/\/dblp.org\/rec\/conf\/icassp\/MitsufujiLBR14.html?view=bibtex\" target=\"_blank\">bibtex<\/a>]<\/li>\n\n\n\n<li><span style=\"text-decoration: underline;\">Yuki Mitsufuji<\/span>, Axel R\u00f6bel, \u201cSound Source Separation Based on Non-Negative Tensor Factorization Incorporating Spatial Cue as Prior Knowledge,\u201d in Proc. International Conference on Acoustics, Speech, and Signal Processing (<mark style=\"background-color:rgba(0, 0, 0, 0)\" class=\"has-inline-color has-pale-pink-color\">ICASSP<\/mark>), pp. 71\u201375, 2013 [<a rel=\"noreferrer noopener\" href=\"https:\/\/ieeexplore.ieee.org\/document\/6637611\" target=\"_blank\">IEEE<\/a>][<a rel=\"noreferrer noopener\" href=\"https:\/\/dblp.org\/rec\/conf\/icassp\/MitsufujiR13.html?view=bibtex\" target=\"_blank\">bibtex<\/a>]<\/li>\n<\/ol>\n\n\n\n<div style=\"height:14px\" aria-hidden=\"true\" class=\"wp-block-spacer\"><\/div>\n\n\n<div class=\"su-divider su-divider-style-default\" style=\"margin:16px 0;border-width:1px;border-color:#000000\"><\/div>\n\n\n\n<h3 class=\"wp-block-heading\" id=\"workshop_papers_and_demos\">Workshop Papers and Demos<\/h3>\n\n\n\n<ol class=\"wp-block-list\">\n<li>Honggyu An, Jaewoo Jung, Mungyeom Kim, Chaehyun Kim, Minkyeong Jeon, Jisang Han, Kazumi Fukuda, Takuya Narihira, Hyunah Ko, Junsu Kim, Sunghwan Hong, <span style=\"text-decoration: underline;\">Yuki Mitsufuji<\/span>, Seungryong Kim, \u201cC3G: Learning Compact 3D Representations with 2K Gaussians,\u201d CVPR Workshop on Open-World 3D Scene Understanding with Foundation Models (<mark style=\"background-color:rgba(0, 0, 0, 0)\" class=\"has-inline-color has-pale-pink-color\">OpenSUN3D<\/mark>), 2026 [<a href=\"https:\/\/openreview.net\/forum?id=rLkBZS6iuE\" target=\"_blank\" rel=\"noreferrer noopener\">OpenReview<\/a>][<a href=\"https:\/\/arxiv.org\/abs\/2512.04021\" target=\"_blank\" rel=\"noreferrer noopener\">arXiv<\/a>]<\/li>\n\n\n\n<li>Kazuki Ozeki, Shun Kenney, Yuto Shibata, Eisuke Takeuchi, Takuya Narihira, Kazumi Fukuda, Ryosuke Sawata, <span style=\"text-decoration: underline;\">Yuki Mitsufuji<\/span>, Yoshimitsu Aoki, \u201c4D Reconstruction from Sparse Dynamic Cameras,\u201d CVPR Workshop on 4D Vision: Modeling the Dynamic World (<mark style=\"background-color:rgba(0, 0, 0, 0)\" class=\"has-inline-color has-pale-pink-color\">CVPR 4DV<\/mark>), 2026 [<a href=\"https:\/\/openreview.net\/forum?id=nu6edbOr1Z\" target=\"_blank\" rel=\"noreferrer noopener\">OpenReview<\/a>]<\/li>\n\n\n\n<li>Yongyi Zang, Jiarui Hai, Wanying Ge, Qiuqiang Kong, Zheqi Dai, Helin Wang, <span style=\"text-decoration: underline;\">Yuki Mitsufuji<\/span>, Mark D. Plumbley, \u201cSummary of The Inaugural Music Source Restoration Challenge,\u201d in Proc. IEEE ICASSP Music Source Restoration (<mark style=\"background-color:rgba(0, 0, 0, 0)\" class=\"has-inline-color has-pale-pink-color\">ICASSP MSR<\/mark>) Challenge, 2026 [<a href=\"https:\/\/ieeexplore.ieee.org\/document\/11462762\" target=\"_blank\" rel=\"noreferrer noopener\">IEEE<\/a>][<a href=\"https:\/\/arxiv.org\/abs\/2601.04343\" target=\"_blank\" rel=\"noreferrer noopener\">arXiv<\/a>]<\/li>\n\n\n\n<li>Michail Dontas, Yutong He, Naoki Murata, <span style=\"text-decoration: underline;\">Yuki Mitsufuji<\/span>, J. Zico Kolter, Ruslan Salakhutdinov, \u201cBlind Inverse Problem Solving Made Easy by Text-to-Image Latent Diffusion,\u201d NeurIPS Workshop on Structured Probabilistic Inference &amp; Generative Modeling (<mark style=\"background-color:rgba(0, 0, 0, 0)\" class=\"has-inline-color has-pale-pink-color\">NeurIPS SPIGM<\/mark>), 2025 [<a href=\"https:\/\/arxiv.org\/abs\/2412.00557\" target=\"_blank\" rel=\"noreferrer noopener\">arXiv<\/a>]<\/li>\n\n\n\n<li>Zachary Novack, Koichi Saito, Zhi Zhong, Takashi Shibuya, Shuyang Cui, Julian McAuley, Taylor Berg-Kirkpatrick, Christian Simon, Shusuke Takahashi, <span style=\"text-decoration: underline;\">Yuki Mitsufuji<\/span>, \u201cFlashFoley: Fast Interactive Sketch2Audio Generation,\u201d NeurIPS Workshop on Generative and Protective AI for Content Creation (<mark style=\"background-color:rgba(0, 0, 0, 0)\" class=\"has-inline-color has-pale-pink-color\">NeurIPS GenProCC<\/mark>), 2025 [<a href=\"https:\/\/anonaudiogen.github.io\/web\/\" target=\"_blank\" rel=\"noreferrer noopener\">demo<\/a>]<\/li>\n\n\n\n<li>Zachary Novack, Koichi Saito, Zhi Zhong, Takashi Shibuya, Shuyang Cui, Julian McAuley, Taylor Berg-Kirkpatrick, Christian Simon, Shusuke Takahashi, <span style=\"text-decoration: underline;\">Yuki Mitsufuji<\/span>, \u201cFlashFoley: Fast Interactive Sketch2Audio Generation,\u201d NeurIPS Workshop on AI for Music (<mark style=\"background-color:rgba(0, 0, 0, 0)\" class=\"has-inline-color has-pale-pink-color\">NeurIPS AI4Music<\/mark>), 2025 [<a href=\"https:\/\/openreview.net\/forum?id=dxwsVO0W47\" target=\"_blank\" rel=\"noreferrer noopener\">Openreview<\/a>][<a href=\"https:\/\/anonaudiogen.github.io\/web\/\" target=\"_blank\" rel=\"noreferrer noopener\">demo<\/a>]<\/li>\n\n\n\n<li>Kazuki Shimada, Archontis Politis, Iran R. Roman, Parthasaarathy Sudarsanam, David Diaz-Guerra, Ruchi Pandey, Kengo Uchida, Yuichiro Koyama, Naoya Takahashi, Takashi Shibuya, Shusuke Takahashi, Tuomas Virtanen, <span style=\"text-decoration: underline;\">Yuki Mitsufuji<\/span>, \u201cStereo Sound Event Localization and Detection with Onscreen\/Offscreen Classification,\u201d Detection and Classification of Acoustic Scenes and Events 2025 Workshop (<mark style=\"background-color:rgba(0, 0, 0, 0)\" class=\"has-inline-color has-pale-pink-color\">DCASE Workshop<\/mark>), 2025 [<a href=\"https:\/\/dcase.community\/documents\/workshop2025\/proceedings\/DCASE2025Workshop_Shimada_48.pdf\" target=\"_blank\" rel=\"noreferrer noopener\">DCASE<\/a>][<a href=\"https:\/\/arxiv.org\/abs\/2507.12042\" target=\"_blank\" rel=\"noreferrer noopener\">arXiv<\/a>][<a href=\"https:\/\/github.com\/partha2409\/DCASE2025_seld_baseline\" target=\"_blank\" rel=\"noreferrer noopener\">code<\/a>][<a href=\"https:\/\/zenodo.org\/records\/15559774\" target=\"_blank\" rel=\"noreferrer noopener\">dataset<\/a>]<\/li>\n\n\n\n<li>\u201cPCA-DiffVox: Augmenting Vocal Effects Tweakability With a Bijective Latent Space,\u201d Demo Track of IEEE Workshop on Applications of Signal Processing to Audio and Acoustics (<mark style=\"background-color:rgba(0, 0, 0, 0)\" class=\"has-inline-color has-pale-pink-color\">WASPAA Demo<\/mark>) [<a href=\"https:\/\/www.waspaa.com\/waspaa25\/proceedings\/WASPAA2025-291.pdf\" target=\"_blank\" rel=\"noreferrer noopener\">WASPAA<\/a>]<\/li>\n\n\n\n<li>Qiyu Wu, Mengjie Zhao, Yutong He, Lang Huang, Junya Ono, Hiromi Wakaki, <span style=\"text-decoration: underline;\">Yuki Mitsufuji<\/span>, \u201cTowards Reporting Bias in Visual-Language Datasets: Bimodal Augmentation by Decoupling Object-Attribute Association,\u201d ICCV Workshop on Multimodal Representation and Retrieval (<mark style=\"background-color:rgba(0, 0, 0, 0)\" class=\"has-inline-color has-pale-pink-color\">ICCV MRR<\/mark>), 2025 [<a href=\"https:\/\/arxiv.org\/abs\/2310.01330\" target=\"_blank\" rel=\"noreferrer noopener\">arXiv<\/a>]<\/li>\n\n\n\n<li>Zhi Zhong, Akira Takahashi, Shuyang Cui, Keisuke Toyama, Shusuke Takahashi, <span style=\"text-decoration: underline;\">Yuki Mitsufuji<\/span>, \u201cSpecMaskFoley: Steering Pretrained Spectral Masked Generative Transformer Toward Synchronized Video-to-audio Synthesis via ControlNet,\u201d ICCV Workshop on Generative AI for Audio-Visual Content Creation (<mark style=\"background-color:rgba(0, 0, 0, 0)\" class=\"has-inline-color has-pale-pink-color\">ICCV Gen4AVC<\/mark>), 2025 [<a href=\"https:\/\/arxiv.org\/abs\/2505.16195\" target=\"_blank\" rel=\"noreferrer noopener\">arXiv<\/a>][<a href=\"https:\/\/zzaudio.github.io\/SpecMaskFoley_Demo\/\" target=\"_blank\" rel=\"noreferrer noopener\">demo<\/a>]<\/li>\n\n\n\n<li>Woosung Choi, Junghyun Koo, Kin Wai Cheuk, Joan Serr\u00e0, Marco A. Mart\u00ednez-Ram\u00edrez, Yukara Ikemiya, Naoki Murata, Yuhta Takida, Wei-Hsiang Liao, <span style=\"text-decoration: underline;\">Yuki Mitsufuji<\/span>, \u201cLarge-Scale Training Data Attribution for Music Generative Models via Unlearning,\u201d accepted at ICML Workshop on Machine Learning for Audio (<mark style=\"background-color:rgba(0, 0, 0, 0)\" class=\"has-inline-color has-pale-pink-color\">ICML MLA<\/mark>), 2025 [<a href=\"https:\/\/arxiv.org\/abs\/2506.18312\" target=\"_blank\" rel=\"noreferrer noopener\">arXiv<\/a>]<\/li>\n\n\n\n<li>Kazuki Shimada, Christian Simon, Takashi Shibuya, Shusuke Takahashi, <span style=\"text-decoration: underline;\">Yuki Mitsufuji<\/span>, \u201cSAVGBench: Benchmarking Spatially Aligned Audio-Video Generation,\u201d CVPR Workshop Sight and Sound (<mark style=\"background-color:rgba(0, 0, 0, 0)\" class=\"has-inline-color has-pale-pink-color\">CVPR WSS<\/mark>), 2025 [<a href=\"https:\/\/arxiv.org\/abs\/2412.13462\" target=\"_blank\" rel=\"noreferrer noopener\">arXiv<\/a>]<\/li>\n\n\n\n<li>Kengo Uchida, Takashi Shibuya, Yuhta Takida, Naoki Murata, Julian Tanke, Shusuke Takahashi, <span style=\"text-decoration: underline;\">Yuki Mitsufuji<\/span>, \u201cMoLA: Motion Generation and Editing with Latent Diffusion Enhanced by Adversarial Training,\u201d accepted at CVPR Workshop on Human Motion Generation (<mark style=\"background-color:rgba(0, 0, 0, 0)\" class=\"has-inline-color has-pale-pink-color\">CVPR HuMoGen<\/mark>), 2025 [<a href=\"https:\/\/arxiv.org\/abs\/2406.01867\" target=\"_blank\" rel=\"noreferrer noopener\">arXiv<\/a>][<a href=\"https:\/\/github.com\/sony\/MoLA\" target=\"_blank\" rel=\"noreferrer noopener\">code<\/a>][<a href=\"https:\/\/k5uchida.github.io\/MoLA-demo\/\" target=\"_blank\" rel=\"noreferrer noopener\">demo<\/a>]<\/li>\n\n\n\n<li>Julian Tanke, Takashi Shibuya, Kengo Uchida, Koichi Saito, <span style=\"text-decoration: underline;\">Yuki Mitsufuji<\/span>, \u201cDyadic Mamba: Long-term Dyadic Human Motion Synthesis,\u201d CVPR Workshop on Human Motion Generation (<mark style=\"background-color:rgba(0, 0, 0, 0)\" class=\"has-inline-color has-pale-pink-color\">CVPR HuMoGen<\/mark>), 2025 [<a href=\"https:\/\/arxiv.org\/abs\/2505.09827\" target=\"_blank\" rel=\"noreferrer noopener\">arXiv<\/a>]<\/li>\n\n\n\n<li>Zhuoyuan Mao, Mengjie Zhao, Qiyu Wu, Zhi Zhong, Wei-Hsiang Liao, Hiromi Wakaki, <span style=\"text-decoration: underline;\">Yuki Mitsufuji<\/span>, \u201cCross-Modal Learning for Music-to-Music-Video Description Generation,\u201d NAACL Workshop on Representation Learning for NLP (<mark style=\"background-color:rgba(0, 0, 0, 0)\" class=\"has-inline-color has-pale-pink-color\">NAACL RepL4NLP<\/mark>), 2025 [<a href=\"https:\/\/arxiv.org\/abs\/2503.11190\" target=\"_blank\" rel=\"noreferrer noopener\">arXiv<\/a>]<\/li>\n\n\n\n<li>Gianluigi Silvestri, Luca Ambrogioni, Chieh-Hsin Lai, Yuhta Takida, <span style=\"text-decoration: underline;\">Yuki Mitsufuji<\/span>, \u201cTraining Consistency Models with Variational Noise Coupling,\u201d ICLR2025 Workshop on Deep Generative Model in Machine Learning: Theory, Principle and Efficacy (<mark style=\"background-color:rgba(0, 0, 0, 0)\" class=\"has-inline-color has-pale-pink-color\">ICLR DeLTa<\/mark>), 2025 [<a href=\"https:\/\/arxiv.org\/abs\/2502.18197\" target=\"_blank\" rel=\"noreferrer noopener\">arXiv<\/a>][<a href=\"https:\/\/github.com\/sony\/vct\" target=\"_blank\" rel=\"noreferrer noopener\">code<\/a>]<\/li>\n\n\n\n<li>Bac Nguyen, Chieh-Hsin Lai, Yuhta Takida, Naoki Murata, Toshimitsu Uesaka, Stefano Ermon, <span style=\"text-decoration: underline;\">Yuki Mitsufuji<\/span>, \u201cImproving Vector-Quantized Image Modeling with Latent Consistency-Matching Diffusion,\u201d ICLR Workshop on Deep Generative Model in Machine Learning: Theory, Principle and Efficacy (<mark style=\"background-color:rgba(0, 0, 0, 0)\" class=\"has-inline-color has-pale-pink-color\">ICLR DeLTa<\/mark>), 2025<\/li>\n\n\n\n<li>Yangming Li, Chieh-Hsin Lai, Carola-Bibiane Sch\u00f6nlieb, <span style=\"text-decoration: underline;\">Yuki Mitsufuji<\/span>, Stefano Ermon, \u201cBellman Diffusion: Generative Modeling as Learning a Linear Operator in the Distribution Space,\u201d ICLR Workshop on Frontiers in Probabilistic Inference: Learning Meets Sampling (<mark style=\"background-color:rgba(0, 0, 0, 0)\" class=\"has-inline-color has-pale-pink-color\">ICLR FPI<\/mark>), 2025 [<a href=\"https:\/\/arxiv.org\/abs\/2410.01796\" target=\"_blank\" rel=\"noreferrer noopener\">arXiv<\/a>]<\/li>\n\n\n\n<li>Felix Leeb, Satoshi Hayakawa, Yuhta Takida, <span style=\"text-decoration: underline;\">Yuki Mitsufuji<\/span>, \u201cPartial Alignment of Representations via Interventional Consistency,\u201d ICLR Workshop on Representational Alignment (<mark style=\"background-color:rgba(0, 0, 0, 0)\" class=\"has-inline-color has-pale-pink-color\">ICLR Re-Align<\/mark>), 2025 [<a href=\"https:\/\/openreview.net\/forum?id=eimAJqoIWt\" target=\"_blank\" rel=\"noreferrer noopener\">OpenReview<\/a>]<\/li>\n\n\n\n<li>R. Oguz Araz, Joan Serr\u00e0, Xavier Serra, <span style=\"text-decoration: underline;\">Yuki Mitsufuji<\/span>, Dmitry Bogdanov, \u201cDISCOGS-VINET-MIREX,\u201d Cover Song Identification Track (<mark style=\"background-color:rgba(0, 0, 0, 0)\" class=\"has-inline-color has-pale-pink-color\">MIREX<\/mark>), 2024 [<a href=\"https:\/\/futuremirex.com\/portal\/wp-content\/uploads\/2024\/11\/R_Oguz_Araz-MIREX2024.pdf\" target=\"_blank\" rel=\"noreferrer noopener\">MIREX<\/a>]<\/li>\n\n\n\n<li>Mengjie Zhao, Zhi Zhong, Zhuoyuan Mao, Shiqi Yang, Wei-Hsiang Liao, Shusuke Takahashi, Hiromi Wakaki, <span style=\"text-decoration: underline;\">Yuki Mitsufuji<\/span>, \u201cDemonstrating OpenMU-LightBench: A Benchmark Suite for Music Understanding,\u201d ISMIR Late Breaking Demo (<mark style=\"background-color:rgba(0, 0, 0, 0)\" class=\"has-inline-color has-pale-pink-color\">ISMIR LBD<\/mark>), 2024 [<a href=\"https:\/\/ismir2024program.ismir.net\/lbd_454.html\" target=\"_blank\" rel=\"noreferrer noopener\">ISMIR<\/a>]<\/li>\n\n\n\n<li>Junghyun Koo, Marco A. Mart\u00ednez-Ram\u00edrez, Wei-Hsiang Liao, Giorgio Fabbro, Michele Mancusi, <span style=\"text-decoration: underline;\">Yuki Mitsufuji<\/span>, \u201cITO-Master: Inference-Time Optimization for Music Mastering Style Transfer,\u201d ISMIR Late Breaking Demo (<mark style=\"background-color:rgba(0, 0, 0, 0)\" class=\"has-inline-color has-pale-pink-color\">ISMIR LBD<\/mark>), 2024 [<a href=\"https:\/\/ismir2024program.ismir.net\/lbd_446.html\" target=\"_blank\" rel=\"noreferrer noopener\">ISMIR<\/a>]<\/li>\n\n\n\n<li>Yin-Jyun Luo, Kin Wai Cheuk, Woosung Choi, Toshimitsu Uesaka, Keisuke Toyama, Wei-Hsiang Liao, Simon Dixon, <span style=\"text-decoration: underline;\">Yuki Mitsufuji<\/span>, \u201cSource-Level Pitch and Timbre Editing for Mixtures of Tones Using Disentangled Representations,\u201d ISMIR Late Breaking Demo (<mark style=\"background-color:rgba(0, 0, 0, 0)\" class=\"has-inline-color has-pale-pink-color\">ISMIR LBD<\/mark>), 2024 [<a href=\"https:\/\/ismir2024program.ismir.net\/lbd_432.html\" target=\"_blank\" rel=\"noreferrer noopener\">ISMIR<\/a>]<\/li>\n\n\n\n<li>David Diaz-Guerra, Archontis Politis, Parthasaarathy Sudarsanam, Kazuki Shimada, Daniel A. Krause, Kengo Uchida, Yuichiro Koyama, Naoya Takahashi, Shusuke Takahashi, Takashi Shibuya, <span style=\"text-decoration: underline;\">Yuki Mitsufuji<\/span>, Tuomas Virtanen, \u201cBaseline Models and Evaluation of Sound Event Localization and Detection with Distance Estimation in DCASE2024 Challenge,\u201d in Proc. Detection and Classification of Acoustic Scenes and Events 2024 Workshop (<mark style=\"background-color:rgba(0, 0, 0, 0)\" class=\"has-inline-color has-pale-pink-color\">DCASE Workshop<\/mark>), 41\u201345, 2024 [<a href=\"https:\/\/dcase.community\/documents\/workshop2024\/proceedings\/DCASE2024Workshop_Diaz-Guerra_53.pdf\" target=\"_blank\" rel=\"noreferrer noopener\">DCASE<\/a>]<\/li>\n\n\n\n<li>Koichi Saito, Dongjun Kim, Takashi Shibuya, Chieh-Hsin Lai, Zhi Zhong, Yuhta Takida, <span style=\"text-decoration: underline;\">Yuki Mitsufuji<\/span>, \u201cSoundCTM: Uniting Score-based and Consistency Models for Text-to-Sound Generation,\u201d NeurIPS Workshop on AI-Driven Speech, Music, and Sound Generation (<mark style=\"background-color:rgba(0, 0, 0, 0)\" class=\"has-inline-color has-pale-pink-color\">NeurIPS Audio Imagination<\/mark>), 2024 [<a href=\"https:\/\/arxiv.org\/abs\/2405.18503\" target=\"_blank\" rel=\"noreferrer noopener\">arXiv<\/a>][<a href=\"https:\/\/github.com\/sony\/soundctm\" target=\"_blank\" rel=\"noreferrer noopener\">code<\/a>][<a href=\"https:\/\/koichi-saito-sony.github.io\/soundctm\/\" target=\"_blank\" rel=\"noreferrer noopener\">demo<\/a>]<\/li>\n\n\n\n<li>Yin-Jyun Luo, Kin Wai Cheuk, Woosung Choi, Toshimitsu Uesaka, Keisuke Toyama, Koichi Saito, Chieh-Hsin Lai, Yuhta Takida, Wei-Hsiang Liao, Simon Dixon, <span style=\"text-decoration: underline;\">Yuki Mitsufuji<\/span>, \u201cDisentangling Mixtures of Musical Instruments for Source-level Pitch and Timbre Manipulation,\u201d NeurIPS Workshop on AI-Driven Speech, Music, and Sound Generation (<mark style=\"background-color:rgba(0, 0, 0, 0)\" class=\"has-inline-color has-pale-pink-color\">NeurIPS Audio Imagination<\/mark>), 2024 [<a href=\"https:\/\/arxiv.org\/abs\/2408.10807\" target=\"_blank\" rel=\"noreferrer noopener\">arXiv<\/a>]<\/li>\n\n\n\n<li>Mayank Kumar Singh, Naoya Takahashi, Wei-Hsiang Liao, <span style=\"text-decoration: underline;\">Yuki Mitsufuji<\/span>, \u201cLOCKEY: A Novel Approach to Model Authentication and Deepfake Tracking,\u201d NeurIPS Workshop on AI-Driven Speech, Music, and Sound Generation (<mark style=\"background-color:rgba(0, 0, 0, 0)\" class=\"has-inline-color has-pale-pink-color\">NeurIPS Audio Imagination<\/mark>), 2024 [<a href=\"https:\/\/arxiv.org\/abs\/2409.07743\" target=\"_blank\" rel=\"noreferrer noopener\">arxiv<\/a>][<a href=\"https:\/\/mayank-git-hub-sony.github.io\/model_authentication_demo\/\" target=\"_blank\" rel=\"noreferrer noopener\">demo<\/a>]<\/li>\n\n\n\n<li>Yutong He, Alexander Robey, Naoki Murata, Yiding Jiang, Joshua Williams, George J. Pappas, Hamed Hassani, <span style=\"text-decoration: underline;\">Yuki Mitsufuji<\/span>, Ruslan Salakhutdinov, J. Zico Kolter, \u201cAutomated Black-box Prompt Engineering for Personalized Text-to-Image Generation,\u201d NeurIPS Workshop on Creativity and Artificial Intelligence (<mark style=\"background-color:rgba(0, 0, 0, 0)\" class=\"has-inline-color has-pale-pink-color\">NeurIPS Creativity<\/mark>), 2024 [<a href=\"https:\/\/arxiv.org\/abs\/2403.19103\" target=\"_blank\" rel=\"noreferrer noopener\">arXiv<\/a>]<\/li>\n\n\n\n<li>Satoshi Hayakawa, Yuhta Takida, Masaaki Imaizumi, Hiromi Wakaki, <span style=\"text-decoration: underline;\">Yuki Mitsufuji<\/span>, \u201cDistillation of Discrete Diffusion through Dimensional Correlations,\u201d NeurIPS Workshop on Machine Learning and Compression (<mark style=\"background-color:rgba(0, 0, 0, 0)\" class=\"has-inline-color has-pale-pink-color\">NeurIPS Neural Compression<\/mark>), 2024 [<a href=\"https:\/\/arxiv.org\/abs\/2410.08709\" target=\"_blank\" rel=\"noreferrer noopener\">arXiv<\/a>][<a href=\"https:\/\/github.com\/sony\/di4c\" target=\"_blank\" rel=\"noreferrer noopener\">code<\/a>]<\/li>\n\n\n\n<li>Yunkee Chae, Woosung Choi, Yuhta Takida, Junghyun Koo, Yukara Ikemiya, Zhi Zhong, Kin Wai Cheuk, Marco A. Mart\u00ednez-Ram\u00edrez, Kyogu Lee, Wei-Hsiang Liao, <span style=\"text-decoration: underline;\">Yuki Mitsufuji<\/span>, \u201cVRVQ: Variable Bitrate Residual Vector Quantization for Audio Compression,\u201d NeurIPS Workshop on Machine Learning and Compression (<mark style=\"background-color:rgba(0, 0, 0, 0)\" class=\"has-inline-color has-pale-pink-color\">NeurIPS Neural Compression<\/mark>), 2024 [<a href=\"https:\/\/arxiv.org\/abs\/2410.06016\" target=\"_blank\" rel=\"noreferrer noopener\">arXiv<\/a>]<\/li>\n\n\n\n<li>Masato Ishii, Akio Hayakawa, Takashi Shibuya, <span style=\"text-decoration: underline;\">Yuki Mitsufuji<\/span>, \u201cA Simple but Strong Baseline for Sounding Video Generation: Effective Adaptation of Audio and Video Diffusion Models for Joint Generation,\u201d ECCV Workshop Audio-Visual Generation and Learning (<mark style=\"background-color:rgba(0, 0, 0, 0)\" class=\"has-inline-color has-pale-pink-color\">ECCV AVGenL<\/mark>), 2024 [<a href=\"https:\/\/arxiv.org\/abs\/2409.17550\" target=\"_blank\" rel=\"noreferrer noopener\">arXiv<\/a>]<\/li>\n\n\n\n<li>Silin Gao, Mete Ismayilzada, Mengjie Zhao, Hiromi Wakaki, <span style=\"text-decoration: underline;\">Yuki Mitsufuji<\/span>, Antoine Bosselut, \u201cDiffuCOMET: Contextual Commonsense Knowledge Diffusion,\u201d ACL Workshop on Knowledge Augmented Methods for NLP (<mark style=\"background-color:rgba(0, 0, 0, 0)\" class=\"has-inline-color has-pale-pink-color\">ACL KnowledgeNLP<\/mark>), 2024 [<a href=\"https:\/\/knowledge-nlp.github.io\/acl2024\/papers\/12_diffucomet_contextual_commonse.pdf\" target=\"_blank\" rel=\"noreferrer noopener\">URL<\/a>]<\/li>\n\n\n\n<li>Sungho Lee, Marco Mart\u00ednez-Ram\u00edrez, Wei-Hsiang Liao, Stefan Uhlich, Giorgio Fabbro, Kyogu Lee, <span style=\"text-decoration: underline;\">Yuki Mitsufuji<\/span>, \u201cGRAFX: An Open-source Library for Audio Processing Graphs in PyTorch,\u201d DAFx Demo\/LBR (<mark style=\"background-color:rgba(0, 0, 0, 0)\" class=\"has-inline-color has-pale-pink-color\">DAFx Demo\/LBR<\/mark>), 2024 [<a href=\"https:\/\/www.dafx.de\/paper-archive\/2024\/papers\/DAFx24_paper_94.pdf\" target=\"_blank\" rel=\"noreferrer noopener\">DAFx<\/a>][<a href=\"https:\/\/www.arxiv.org\/abs\/2408.03204\" target=\"_blank\" rel=\"noreferrer noopener\">arXiv<\/a>]<\/li>\n\n\n\n<li>Toshimitsu Uesaka, Taiji Suzuki, Yuhta Takida, Chieh-Hsin Lai, Naoki Murata, <span style=\"text-decoration: underline;\">Yuki Mitsufuji<\/span>, \u201cUnderstanding Multimodal Contrastive Learning Through Pointwise Mutual Information,\u201d ICLR Workshop on Bridging the Gap Between Practice and Theory in Deep Learning (<mark style=\"background-color:rgba(0, 0, 0, 0)\" class=\"has-inline-color has-pale-pink-color\">ICLR BGPT<\/mark>), 2024 [<a href=\"https:\/\/arxiv.org\/abs\/2404.19228\">arXiv<\/a>]<\/li>\n\n\n\n<li>Dongjun Kim, Chieh-Hsin Lai, Wei-Hsiang Liao, Naoki Murata, Yuhta Takida, Toshimitsu Uesaka, Yutong He, <span style=\"text-decoration: underline;\">Yuki Mitsufuji<\/span>, Stefano Ermon, \u201cConsistency Trajectory Models: Learning Probability Flow ODE Trajectory of Diffusion,\u201d NeurIPS Workshop on&nbsp;Diffusion Models (<mark style=\"background-color:rgba(0, 0, 0, 0)\" class=\"has-inline-color has-pale-pink-color\">NeurIPS WDM<\/mark>), 2023 [<a href=\"https:\/\/neurips.cc\/media\/neurips-2023\/Slides\/74846.pdf\" target=\"_blank\" rel=\"noreferrer noopener\">URL<\/a>]<\/li>\n\n\n\n<li>Yu-Hua Chen, Woosung Choi, WeiHsiang Liao, Marco A. Mart\u00ednez-Ram\u00edrez, Kin-Wai Cheuk, Yi-Hsuan Yang, <span style=\"text-decoration: underline;\">Yuki Mitsufuji<\/span>, \u201cNeural Amplifier Modelling with Several GAN Variants,\u201d ISMIR Late Breaking Demo (<mark style=\"background-color:rgba(0, 0, 0, 0)\" class=\"has-inline-color has-pale-pink-color\">ISMIR LBD<\/mark>), 2023 [<a href=\"https:\/\/ismir2023program.ismir.net\/lbd_364.html\" target=\"_blank\" rel=\"noreferrer noopener\">ISMIR<\/a>][<a href=\"https:\/\/ss12f32v.github.io\/neural_amplifier_modelling\/\" target=\"_blank\" rel=\"noreferrer noopener\">demo<\/a>]<\/li>\n\n\n\n<li>Chieh-Hsin Lai, Yuhta Takida, Toshimitsu Uesaka, Naoki Murata, <span style=\"text-decoration: underline;\">Yuki Mitsufuji<\/span>, Stefano Ermon, \u201cOn the Equivalence of Consistency-Type Models: Consistency Models, Consistent Diffusion Models, and Fokker-Planck Regularization,\u201d ICML2023 Workshop on Structured Probabilistic Inference &amp; Generative Modeling (<mark style=\"background-color:rgba(0, 0, 0, 0)\" class=\"has-inline-color has-pale-pink-color\">ICML SPIGM<\/mark>), 2023 [<a href=\"https:\/\/openreview.net\/forum?id=wjtGsScvAO\" target=\"_blank\" rel=\"noreferrer noopener\">OpenReview<\/a>][<a href=\"https:\/\/arxiv.org\/abs\/2306.00367\" target=\"_blank\" rel=\"noreferrer noopener\">arXiv<\/a>]<\/li>\n\n\n\n<li>Kazuki Shimada, Archontis Politis, Parthasaarathy Sudarsanam, Daniel Krause, Kengo Uchida, Sharath Adavanne, Aapo Hakala, Yuichiro Koyama, Naoya Takahashi, Shusuke Takahashi, Tuomas Virtanen, <span style=\"text-decoration: underline;\">Yuki Mitsufuji<\/span>, \u201cToward an Audio-Visual Dataset of Spatial Recordings of Real Scenes with Spatiotemporal Annotations of Sound Events,\u201d CVPR Workshop Sight and Sound (<mark style=\"background-color:rgba(0, 0, 0, 0)\" class=\"has-inline-color has-pale-pink-color\">CVPR WSS<\/mark>), 2023 [<a href=\"https:\/\/sightsound.org\/papers\/2023\/Shimada_Toward_an_Audio-Visual_Dataset_of_Spatial_Recordings_of_Real_Scenes_with_Spatiotemporal_Annotations_of_Sound_Events.pdf\" target=\"_blank\" rel=\"noreferrer noopener\">URL<\/a>][<a href=\"https:\/\/zenodo.org\/record\/7880637\" target=\"_blank\" rel=\"noreferrer noopener\">dataset<\/a>]<\/li>\n\n\n\n<li>Silin Gao, Jena D. Hwang, Saya Kanno, Hiromi Wakaki, <span style=\"text-decoration: underline;\">Yuki Mitsufuji<\/span>, Antoine Bosselut, \u201cComFact: A Benchmark for Linking Contextual Commonsense Knowledge,\u201d AAAI Workshop on Knowledge Augmented Methods for NLP (<mark style=\"background-color:rgba(0, 0, 0, 0)\" class=\"has-inline-color has-pale-pink-color\">KnowledgeNLP-AAAI<\/mark>), 2023 [<a href=\"https:\/\/knowledge-nlp.github.io\/aaai2023\/papers\/002-ComFact-oral.pdf\" target=\"_blank\" rel=\"noreferrer noopener\">AAAI<\/a>][<a href=\"https:\/\/github.com\/Silin159\/ComFact\" target=\"_blank\" rel=\"noreferrer noopener\">code<\/a>]<\/li>\n\n\n\n<li>Chieh-Hsin Lai, Yuhta Takida, Naoki Murata, Toshimitsu Uesaka, <span style=\"text-decoration: underline;\">Yuki Mitsufuji<\/span>, Stefano Ermon, \u201cRegularizing Score-based Models with Score Fokker-Planck Equations,\u201d NeurIPS Workshop on Score-Based Methods (<mark style=\"background-color:rgba(0, 0, 0, 0)\" class=\"has-inline-color has-pale-pink-color\">NeurIPS SBM<\/mark>), 2022 [<a href=\"https:\/\/openreview.net\/forum?id=WqW7tC32v8N\" target=\"_blank\" rel=\"noreferrer noopener\">OpenReview<\/a>]<\/li>\n\n\n\n<li>Archontis Politis, Kazuki Shimada, Parthasaarathy Sudarsanam, Sharath Adavanne, Daniel Krause, Yuichiro Koyama, Naoya Takahashi, Shusuke Takahashi, <span style=\"text-decoration: underline;\">Yuki Mitsufuji<\/span>, Tuomas Virtanen, \u201cSTARSS22: A Dataset of Spatial Recordings of Real Scenes with Spatiotemporal Annotations of Sound Events,\u201d in Proc. Detection and Classification of Acoustic Scenes and Events 2022 Workshop (<mark style=\"background-color:rgba(0, 0, 0, 0)\" class=\"has-inline-color has-pale-pink-color\">DCASE Workshop<\/mark>), 2022 [<a href=\"https:\/\/dcase.community\/documents\/workshop2022\/proceedings\/DCASE2022Workshop_Politis_51.pdf\" target=\"_blank\" rel=\"noreferrer noopener\">DCASE<\/a>][<a href=\"https:\/\/arxiv.org\/abs\/2206.01948\" target=\"_blank\" rel=\"noreferrer noopener\">arXiv<\/a>][<a href=\"https:\/\/zenodo.org\/record\/6600531#.Yp_dLO7P3b0\" target=\"_blank\" rel=\"noreferrer noopener\">dataset<\/a>]<\/li>\n\n\n\n<li>Fabian-Robert St\u00f6ter, Maria Clara Machry, Delton de Andrade Vaz, Stefan Uhlich, <span style=\"text-decoration: underline;\">Yuki Mitsufuji<\/span>, Antoine Liutkus, \u201cOpen.Unmix.app \u2013 Towards Audio Separation on the Edge,\u201d Wave Audio Conference (<mark style=\"background-color:rgba(0, 0, 0, 0)\" class=\"has-inline-color has-pale-pink-color\">WAC<\/mark>), 2021 [<a rel=\"noreferrer noopener\" href=\"https:\/\/webaudioconf2021.com\/wp-content\/uploads\/2021\/06\/stoeter_wac.pdf\" target=\"_blank\">URL<\/a>][<a rel=\"noreferrer noopener\" href=\"https:\/\/sigsep.github.io\/open-unmix-js\/\" target=\"_blank\">demo<\/a>]<\/li>\n\n\n\n<li>Joachim Muth, Stefan Uhlich, Nathanael Perraudin, Thomas Kemp, Fabien Cardinaux, <span style=\"text-decoration: underline;\">Yuki Mitsufuji<\/span>, \u201cImproving DNN-based Music Source Separation Using Phase Features,\u201d Joint Workshop on Machine Learning for Music at <mark style=\"background-color:rgba(0, 0, 0, 0)\" class=\"has-inline-color has-pale-pink-color\">ICML, IJCAI\/ECAI and AAMAS<\/mark>, 2018 [<a href=\"https:\/\/arxiv.org\/abs\/1807.02710\" target=\"_blank\" rel=\"noreferrer noopener\">arXiv<\/a>]<\/li>\n<\/ol>\n\n\n\n<div style=\"height:26px\" aria-hidden=\"true\" class=\"wp-block-spacer\"><\/div>\n\n\n<div class=\"su-divider su-divider-style-default\" style=\"margin:16px 0;border-width:1px;border-color:#000000\"><a href=\"#\" style=\"color:#000000\">Go to top<\/a><\/div>\n\n\n\n<h3 class=\"wp-block-heading\" id=\"papers_under_review\">Papers Under Review<\/h3>\n\n\n\n<ol class=\"wp-block-list\">\n<li>Giannis Daras, Hyungjin Chung, Chieh-Hsin Lai, <span style=\"text-decoration: underline;\">Yuki Mitsufuji<\/span>, Jong Chul Ye, Peyman Milanfar, Alexandros G. Dimakis, Mauricio Delbracio, \u201cA Survey on Diffusion Models for Inverse Problems,\u201d 2024 [<a href=\"https:\/\/arxiv.org\/abs\/2410.00083\" target=\"_blank\" rel=\"noreferrer noopener\">arXiv<\/a>]<\/li>\n\n\n\n<li>Roser Batlle-Roca, Emilia G\u00f3mez, Wei-Hsiang Liao, Xavier Serra, <span style=\"text-decoration: underline;\">Yuki Mitsufuji<\/span>, \u201cTransparency in Music-Generative AI: A Systematic Literature Review,\u201d 2024 [<a href=\"https:\/\/www.researchsquare.com\/article\/rs-3708077\/v1\" target=\"_blank\" rel=\"noreferrer noopener\">preprint<\/a>]<\/li>\n\n\n\n<li>Ga\u00ebtan Hadjeres, Marc Ferras, Khaled Koutini, Benno Weck, Alexandre Bittar, Thomas Hummel, Zineb Lahrici, Hakim Missoum, Joan Serr\u00e0, <span style=\"text-decoration: underline;\">Yuki Mitsufuji<\/span>, \u201cWoosh: A Sound Effects Foundation Model,\u201d 2026 [<a href=\"https:\/\/arxiv.org\/abs\/2604.01929\" target=\"_blank\" rel=\"noreferrer noopener\">arXiv<\/a>][<a href=\"https:\/\/github.com\/SonyResearch\/Woosh\" target=\"_blank\" rel=\"noreferrer noopener\">code<\/a>][<a href=\"https:\/\/sonyresearch.github.io\/Woosh\/\" target=\"_blank\" rel=\"noreferrer noopener\">demo<\/a>]<\/li>\n\n\n\n<li>Chieh-Hsin Lai, Bac Nguyen, Wei-Hsiang Liao, Yuhta Takida, Naoki Murata, Toshimitsu Uesaka, <span style=\"text-decoration: underline;\">Yuki Mitsufuji<\/span>, \u201cPadvFlow: Towards Learning Imperceptible Adversarial Distribution for Black-Box Attacks against Image Classifiers and Automatic Speech Recognition Systems,\u201d 2024<\/li>\n\n\n\n<li>Naoya Takahashi, Mayank Kumar, Singh, <span style=\"text-decoration: underline;\">Yuki Mitsufuji<\/span>, \u201cRobust One-Shot Singing Voice Conversion,\u201d 2024 [<a href=\"https:\/\/arxiv.org\/abs\/2210.11096\" target=\"_blank\" rel=\"noreferrer noopener\">arXiv<\/a>][<a href=\"https:\/\/t-naoya.github.io\/rosvc\/\" target=\"_blank\" rel=\"noreferrer noopener\">demo<\/a>]<\/li>\n\n\n\n<li>Hao Hao Tan, Kin Wai Cheuk, Taemin Cho, Wei-Hsiang Liao, <span style=\"text-decoration: underline;\">Yuki Mitsufuji<\/span>, \u201cMR-MT3: Memory Retaining Multi-Track Music Transcription to Mitigate Instrument Leakage,\u201d 2024 [<a href=\"https:\/\/arxiv.org\/abs\/2403.10024\" target=\"_blank\" rel=\"noreferrer noopener\">arXiv<\/a>]<\/li>\n\n\n\n<li>Shiqi Yang, Zhi Zhong, Mengjie Zhao, Shusuke Takahashi, Masato Ishii, Takashi Shibuya, <span style=\"text-decoration: underline;\">Yuki Mitsufuji<\/span>, \u201cVisual Echoes: A Simple Unified Transformer for Audio-Visual Generation,\u201d 2024 [<a href=\"https:\/\/arxiv.org\/abs\/2405.14598\" target=\"_blank\" rel=\"noreferrer noopener\">arXiv<\/a>]<\/li>\n\n\n\n<li>Hiromi Wakaki, <span style=\"text-decoration: underline;\">Yuki Mitsufuji<\/span>, Yoshinori Maeda, Yukiko Nishimura, Silin Gao, Mengjie Zhao, Keiichi Yamada, Antoine Bosselut, \u201cComperDial: Commonsense Persona-grounded Dialogue Dataset and Benchmark,\u201d 2024 [<a href=\"https:\/\/arxiv.org\/abs\/2406.11228\" target=\"_blank\" rel=\"noreferrer noopener\">arXiv<\/a>][<a href=\"https:\/\/huggingface.co\/datasets\/Sony\/ComperDial\" target=\"_blank\" rel=\"noreferrer noopener\">dataset<\/a>]<\/li>\n\n\n\n<li>Mengjie Zhao, Zhi Zhong, Zhuoyuan Mao, Shiqi Yang, Wei-Hsiang Liao, Shusuke Takahashi, Hiromi Wakaki, <span style=\"text-decoration: underline;\">Yuki Mitsufuji<\/span>, \u201cOpenMU: Your Swiss Army Knife for Music Understanding,\u201d 2024 [<a href=\"https:\/\/arxiv.org\/abs\/2410.15573\" target=\"_blank\" rel=\"noreferrer noopener\">arXiv<\/a>][<a href=\"https:\/\/github.com\/sony\/openmu\" target=\"_blank\" rel=\"noreferrer noopener\">code<\/a>][<a href=\"https:\/\/mzhaojp22.github.io\/open_music_understanding\/\" target=\"_blank\" rel=\"noreferrer noopener\">demo<\/a>][<a href=\"https:\/\/huggingface.co\/datasets\/Sony\/OpenMU-Bench\" target=\"_blank\" rel=\"noreferrer noopener\">dataset<\/a>]<\/li>\n\n\n\n<li>Anubhav Jain, Yuya Kobayashi, Takashi Shibuya, Yuhta Takida, Nasir Memon, Julian Togelius, <span style=\"text-decoration: underline;\">Yuki Mitsufuji<\/span>, \u201cTraSCE: Trajectory Steering for Concept Erasure,\u201d under review, 2024 [<a href=\"https:\/\/arxiv.org\/abs\/2412.07658\" target=\"_blank\" rel=\"noreferrer noopener\">arXiv<\/a>][<a href=\"https:\/\/github.com\/SonyResearch\/TraSCE\/\" target=\"_blank\" rel=\"noreferrer noopener\">code<\/a>]<\/li>\n\n\n\n<li>Shoukang Hu, Takuya Narihira, Kazumi Fukuda, Ryosuke Sawata, Takashi Shibuya, <span style=\"text-decoration: underline;\">Yuki Mitsufuji<\/span>, \u201cHumanGif: Single-View Human Diffusion with Generative Prior,\u201d under review, 2025 [<a href=\"https:\/\/arxiv.org\/abs\/2502.12080\" target=\"_blank\" rel=\"noreferrer noopener\">arXiv<\/a>][<a href=\"https:\/\/github.com\/skhu101\/HumanGif\" target=\"_blank\" rel=\"noreferrer noopener\">code<\/a>][<a href=\"https:\/\/skhu101.github.io\/HumanGif\/\" target=\"_blank\" rel=\"noreferrer noopener\">demo<\/a>]<\/li>\n\n\n\n<li>Anubhav Jain, Yuya Kobayashi, Naoki Murata, Yuhta Takida, Takashi Shibuya, <span style=\"text-decoration: underline;\">Yuki Mitsufuji<\/span>, Niv Cohen, Nasir Memon, Julian Togelius, \u201cForging and Removing Latent-Noise Diffusion Watermarks Using a Single Image,\u201d under review, 2025 [<a href=\"https:\/\/arxiv.org\/abs\/2504.20111\" target=\"_blank\" rel=\"noreferrer noopener\">arXiv<\/a>][<a href=\"https:\/\/github.com\/anubhav1997\/watermark_forgery_removal\" target=\"_blank\" rel=\"noreferrer noopener\">code<\/a>]<\/li>\n\n\n\n<li>Akio Hayakawa, Masato Ishii, Takashi Shibuya, <span style=\"text-decoration: underline;\">Yuki Mitsufuji<\/span>, \u201cStep-by-Step Video-to-Audio Synthesis via Negative Audio Guidance,\u201d under review, 2025 [<a href=\"https:\/\/arxiv.org\/abs\/2506.20995\" target=\"_blank\" rel=\"noreferrer noopener\">arXiv<\/a>]<\/li>\n\n\n\n<li>Anshuk Uppal, Yuhta Takida, Chieh-Hsin Lai, <span style=\"text-decoration: underline;\">Yuki Mitsufuji<\/span>, \u201cDenoising Multi-Beta VAE: Representation Learning for Disentanglement and Generation,\u201d under review, 2025 [<a href=\"https:\/\/arxiv.org\/abs\/2507.06613\" target=\"_blank\" rel=\"noreferrer noopener\">arXiv<\/a>]<\/li>\n\n\n\n<li>Koichi Saito, Julian Tanke, Christian Simon, Masato Ishii, Kazuki Shimada, Zachary Novack, Zhi Zhong, Akio Hayakawa, Takashi Shibuya, <span style=\"text-decoration: underline;\">Yuki Mitsufuji<\/span>, \u201cSoundReactor: Frame-level Online Video-to-Audio Generation,\u201d under review, 2026 [<a href=\"https:\/\/arxiv.org\/abs\/2510.02110\" target=\"_blank\" rel=\"noreferrer noopener\">arXiv<\/a>][<a href=\"https:\/\/koichi-saito-sony.github.io\/soundreactor\/\" target=\"_blank\" rel=\"noreferrer noopener\">demo<\/a>]<\/li>\n\n\n\n<li>Fabio Morreale, Wiebke Hutiri, Joan Serr\u00e0, Alice Xiang, <span style=\"text-decoration: underline;\">Yuki Mitsufuji<\/span>, \u201cAttribution-by-Design: Ensuring Inference-Time Provenance in Generative Music Systems,\u201d under review, 2026 [<a href=\"https:\/\/arxiv.org\/abs\/2510.08062\" target=\"_blank\" rel=\"noreferrer noopener\">arXiv<\/a>]<\/li>\n\n\n\n<li>Yongyi Zang, Jiarui Hai, Wanying Ge, Qiuqiang Kong, Zheqi Dai, Helin Wang, <span style=\"text-decoration: underline;\">Yuki Mitsufuji<\/span>, Mark D. Plumbley, \u201cMSRBench: A Benchmarking Dataset for Music Source Restoration,\u201d under review, 2025 [<a href=\"https:\/\/arxiv.org\/abs\/2510.10995\" target=\"_blank\" rel=\"noreferrer noopener\">arXiv<\/a>][<a href=\"https:\/\/huggingface.co\/datasets\/yongyizang\/MSRBench\" target=\"_blank\" rel=\"noreferrer noopener\">dataset<\/a>]<\/li>\n\n\n\n<li>Naoki Yoshida, Satoshi Hayakawa, Yuhta Takida, Toshimitsu Uesaka, Hiromi Wakaki, <span style=\"text-decoration: underline;\">Yuki Mitsufuji<\/span>, \u201cTheoretical Refinement of CLIP by Utilizing Linear Structure of Optimal Similarity,\u201d under review, 2026 [<a href=\"https:\/\/arxiv.org\/abs\/2510.15508\" target=\"_blank\" rel=\"noreferrer noopener\">arXiv<\/a>]<\/li>\n\n\n\n<li>Qiyu Wu, Shuyang Cui, Satoshi Hayakawa, Wei-Yao Wang, Hiromi Wakaki, <span style=\"text-decoration: underline;\">Yuki Mitsufuji<\/span>, \u201cMCA: Modality Composition Awareness for Robust Composed Multimodal Retrieval,\u201d under review, 2026 [<a href=\"https:\/\/arxiv.org\/abs\/2510.15543\" target=\"_blank\" rel=\"noreferrer noopener\">arXi<\/a><a href=\"https:\/\/arxiv.org\/abs\/2510.15508\" target=\"_blank\" rel=\"noreferrer noopener\">v<\/a>]<\/li>\n\n\n\n<li>Naveen George, Naoki Murata, Yuhta Takida, Konda Reddy Mopuri, <span style=\"text-decoration: underline;\">Yuki Mitsufuji<\/span>, \u201cDistill, Forget, Repeat: A Framework for Continual Unlearning in Text-to-Image Diffusion Models,\u201d under review, 2026 [<a href=\"https:\/\/arxiv.org\/abs\/2512.02657\" target=\"_blank\" rel=\"noreferrer noopener\">arXiv<\/a>]<\/li>\n\n\n\n<li>Masato Ishii, Akio Hayakawa, Takashi Shibuya, <span style=\"text-decoration: underline;\">Yuki Mitsufuji<\/span>, \u201cCoherent Audio-Visual Editing via Conditional Audio Generation Following Video Edits,\u201d under review, 2026 [<a href=\"https:\/\/arxiv.org\/abs\/2512.07209\" target=\"_blank\" rel=\"noreferrer noopener\">arXiv<\/a>]<\/li>\n\n\n\n<li>Zhengyang Yu, Akio Hayakawa, Masato Ishii, Qingtao Yu, Takashi Shibuya, Jing Zhang, <span style=\"text-decoration: underline;\">Yuki Mitsufuji<\/span>, \u201cAutoRefiner: Improving Autoregressive Video Diffusion Models via Reflective Refinement Over the Stochastic Sampling Path,\u201d under review, 2026 [<a href=\"https:\/\/arxiv.org\/abs\/2512.11203\" target=\"_blank\" rel=\"noreferrer noopener\">arXiv<\/a>]<\/li>\n\n\n\n<li>Weihan Xu, Kan Jen Cheng, Koichi Saito, Muhammad Jehanzeb Mirza, Tingle Li, Yisi Liu, Alexander H. Liu, Liming Wang, Masato Ishii, Takashi Shibuya, <span style=\"text-decoration: underline;\">Yuki Mitsufuji<\/span>, Gopala Anumanchipalli, Paul Pu Liang, \u201cSchrodinger Audio-Visual Editor: Object-Level Audiovisual Removal,\u201d under review, 2026 [<a href=\"https:\/\/arxiv.org\/abs\/2512.12875\" target=\"_blank\" rel=\"noreferrer noopener\">arXiv<\/a>]<\/li>\n\n\n\n<li>Fabio Morreale, Joan Serr\u00e0, Wiebke Hutiri, Alice Xiang, <span style=\"text-decoration: underline;\">Yuki Mitsufuji<\/span>, \u201cStop the Guesswork &#8211; Just Ask the User! Attributable Conditioning in Generative Music,\u201d 2026<\/li>\n\n\n\n<li>Gabriel Raya, Bac Nguyen, Georgios Batzolis, Yuhta Takida, Dejan Stancevic, Naoki Murata, Chieh-Hsin Lai, <span style=\"text-decoration: underline;\">Yuki Mitsufuji<\/span>, Luca Ambrogioni, \u201cInformation-Guided Noise Allocation for Efficient Diffusion Training,\u201d under review, 2026 [<a href=\"https:\/\/arxiv.org\/abs\/2602.18647\" target=\"_blank\" rel=\"noreferrer noopener\">arXiv<\/a>]<\/li>\n\n\n\n<li>Chieh-Hsin Lai, Bac Nguyen, Naoki Murata, Yuhta Takida, Toshimitsu Uesaka, <span style=\"text-decoration: underline;\">Yuki Mitsufuji<\/span>, Stefano Ermon, Molei Tao, \u201cA Unified View of Drifting and Score-Based Models,\u201d under review, 2026 [<a href=\"https:\/\/arxiv.org\/abs\/2603.07514\" target=\"_blank\" rel=\"noreferrer noopener\">arXiv<\/a>]<\/li>\n\n\n\n<li>Akira Takahashi, Ryosuke Sawata, Shusuke Takahashi, <span style=\"text-decoration: underline;\">Yuki Mitsufuji<\/span>, \u201cMMAudioReverbs: Video-Guided Acoustic Modeling for Dereverberation and Room Impulse Response Estimation,\u201d under review, 2026 [<a href=\"https:\/\/arxiv.org\/abs\/2605.00431\" target=\"_blank\" rel=\"noreferrer noopener\">arXiv<\/a>]<\/li>\n\n\n\n<li>Kazuya Tateishi, Akira Takahashi, Atsuo Hiroe, Hirofumi Takeda, Shusuke Takahashi, <span style=\"text-decoration: underline;\">Yuki Mitsufuji<\/span>, MMAudio-LABEL: Audio Event Labeling via Audio Generation for Silent Video,\u201d under review, 2026 [<a href=\"https:\/\/arxiv.org\/abs\/2605.00495\" target=\"_blank\" rel=\"noreferrer noopener\">arXiv<\/a>]<\/li>\n<\/ol>\n\n\n\n<div style=\"height:26px\" aria-hidden=\"true\" class=\"wp-block-spacer\"><\/div>\n\n\n<div class=\"su-divider su-divider-style-default\" style=\"margin:16px 0;border-width:1px;border-color:#000000\"><a href=\"#\" style=\"color:#000000\">Go to top<\/a><\/div>\n\n\n\n<h2 class=\"wp-block-heading\" id=\"award_and_competitions\"><strong>Awards and Competitions<\/strong><\/h2>\n\n\n\n<h3 class=\"wp-block-heading\">Awards<\/h3>\n\n\n\n<ul class=\"wp-block-list\">\n<li><mark style=\"background-color:rgba(0, 0, 0, 0)\" class=\"has-inline-color has-pale-pink-color\">AICA Award<\/mark> for the sound artwork with Evala \u201cStudies for,\u201d AI Creative Future Awards (AICA), 2025 [<a href=\"https:\/\/aica-awards.com\/\" target=\"_blank\" rel=\"noreferrer noopener\">URL<\/a>][<a href=\"https:\/\/www.yukimitsufuji.com\/wp-content\/uploads\/2025\/12\/AICA_Certificate.jpg\" target=\"_blank\" rel=\"noreferrer noopener\">certificate<\/a>]<\/li>\n\n\n\n<li><mark style=\"background-color:rgba(0, 0, 0, 0)\" class=\"has-inline-color has-pale-pink-color\">Stanford\/Elsevier World&#8217;s Top 2% Scientist<\/mark>, 2025 [<a href=\"https:\/\/www.yukimitsufuji.com\/wp-content\/uploads\/2026\/01\/Top2_certificate-1.pdf\" target=\"_blank\" rel=\"noreferrer noopener\">certificate<\/a>]<\/li>\n\n\n\n<li><mark style=\"background-color:rgba(0, 0, 0, 0)\" class=\"has-inline-color has-pale-pink-color\">Yagami Award<\/mark> for \u201cResearch and Development of Audio Source Separation Technology and Its Contribution to Commercialization and Social Implementation (Translated from the Japanese Title),\u201d Keio University, 2025 [<a href=\"https:\/\/www.dosokai.st.keio.ac.jp\/info\/2025yagamiprize\/\" target=\"_blank\" rel=\"noreferrer noopener\">URL<\/a>]<\/li>\n\n\n\n<li><mark style=\"background-color:rgba(0, 0, 0, 0)\" class=\"has-inline-color has-pale-pink-color\">Best Industrial Paper Award<\/mark> for \u201cImproving Vector-Quantized Image Modeling with Latent Consistency-Matching Diffusion,\u201d IEEE International Joint Conference on Neural Networks (<mark style=\"background-color:rgba(0, 0, 0, 0)\" class=\"has-inline-color has-pale-pink-color\">IJCNN<\/mark>), 2025 [<a href=\"https:\/\/2025.ijcnn.org\/program\/awards\" target=\"_blank\" rel=\"noreferrer noopener\">URL<\/a>][<a href=\"https:\/\/www.yukimitsufuji.com\/wp-content\/uploads\/2025\/07\/IJCNN_2025_award.pdf\" target=\"_blank\" rel=\"noreferrer noopener\">certificate<\/a>]<\/li>\n\n\n\n<li><mark style=\"background-color:rgba(0, 0, 0, 0)\" class=\"has-inline-color has-pale-pink-color\">Best Show &amp; Tell Award<\/mark> for \u201cSearching For Music Mixing Graphs: A Pruning Approach,\u201d Digital Audio Effect Conference (<mark style=\"background-color:rgba(0, 0, 0, 0)\" class=\"has-inline-color has-pale-pink-color\">DAFx<\/mark>), 2024 [<a href=\"https:\/\/www.yukimitsufuji.com\/wp-content\/uploads\/2024\/10\/DAFx-24_Best_show_and_tell_award.png\" target=\"_blank\" rel=\"noreferrer noopener\">certificate<\/a>]<\/li>\n\n\n\n<li><mark style=\"background-color:rgba(0, 0, 0, 0)\" class=\"has-inline-color has-pale-pink-color\">Outstanding Paper Award<\/mark> for \u201cPeaCoK: Persona Commonsense Knowledge for Consistent and Engaging Narratives,\u201d the Annual Meeting of the Association for Computational Linguistics (<mark><mark style=\"background-color:rgba(0, 0, 0, 0)\" class=\"has-inline-color has-pale-pink-color\">ACL<\/mark><\/mark>), 2023 [<a href=\"https:\/\/2023.aclweb.org\/program\/best_papers\/\" target=\"_blank\" rel=\"noreferrer noopener\">URL<\/a>][<a href=\"https:\/\/www.yukimitsufuji.com\/wp-content\/uploads\/2023\/07\/ACL-scaled.jpg\" target=\"_blank\" rel=\"noreferrer noopener\">certificate<\/a>]<\/li>\n\n\n\n<li><mark style=\"background-color:rgba(0, 0, 0, 0)\" class=\"has-inline-color has-pale-pink-color\">Local Commendation for Invention Award<\/mark> for \u201cSound Separation Technology Using Multiple AI Models (Patent No. 6981417),\u201d Japan Institute of Invention and Innovation, 2022 [<a href=\"http:\/\/koueki.jiii.or.jp\/hyosho\/chihatsu\/R4\/jusho_kanto\/index.html\" target=\"_blank\" rel=\"noreferrer noopener\">URL<\/a>][<a href=\"https:\/\/www.yukimitsufuji.com\/wp-content\/uploads\/2023\/08\/Certificate-scaled.jpg\" target=\"_blank\" rel=\"noreferrer noopener\">certificate<\/a>]<\/li>\n\n\n\n<li><strong><mark style=\"background-color:rgba(0, 0, 0, 0)\" class=\"has-inline-color has-pale-pink-color\">Ranked 1st<\/mark><\/strong> in Task 3 at DCASE2021 Challenge (IEEE AASP Challenge on Detection and Classification of Acoustic Scenes and Events) [<a rel=\"noreferrer noopener\" href=\"https:\/\/www.sony.com\/en\/SonyInfo\/technology\/activities\/DCASE2021\/\" target=\"_blank\">URL<\/a>][<a rel=\"noreferrer noopener\" href=\"https:\/\/arxiv.org\/abs\/2106.10806\" target=\"_blank\">arXiv<\/a>]<\/li>\n\n\n\n<li>Ranked 3rd in Task 3 at DCASE2020 Challenge (IEEE AASP Challenge on Detection and Classification of Acoustic Scenes and Events) [<a rel=\"noreferrer noopener\" href=\"https:\/\/arxiv.org\/abs\/2006.12014\" target=\"_blank\">arXiv<\/a>]<\/li>\n\n\n\n<li>Japan Media Arts Festival 2019 <mark><mark style=\"background-color:rgba(0, 0, 0, 0)\" class=\"has-inline-color has-pale-pink-color\">Jury Selections<\/mark><\/mark> &#8211; Acoustic Vessel Odyssey [<a rel=\"noreferrer noopener\" href=\"http:\/\/archive.j-mediaarts.jp\/en\/festival\/2019\/entertainment\/works\/Acoustic_Vessel_Odyssey\/\" target=\"_blank\">URL<\/a>][<a rel=\"noreferrer noopener\" href=\"http:\/\/www.aes.org\/e-lib\/download.cfm\/19648.pdf?ID=19648\" target=\"_blank\">AES<\/a>]<\/li>\n\n\n\n<li><strong><mark style=\"background-color:rgba(0, 0, 0, 0)\" class=\"has-inline-color has-pale-pink-color\">Ranked 1st<\/mark><\/strong> in Music Task at the 2018 Signal Separation Evaluation Campaign [<a rel=\"noreferrer noopener\" href=\"https:\/\/arxiv.org\/abs\/1804.06267\" target=\"_blank\">URL<\/a>]<\/li>\n\n\n\n<li><strong><mark style=\"background-color:rgba(0, 0, 0, 0)\" class=\"has-inline-color has-pale-pink-color\">Ranked 1st<\/mark><\/strong> in Music Task at the 2016 Signal Separation Evaluation Campaign [<a rel=\"noreferrer noopener\" href=\"https:\/\/hal.archives-ouvertes.fr\/hal-01472932\" target=\"_blank\">URL<\/a>]<\/li>\n\n\n\n<li><strong><mark style=\"background-color:rgba(0, 0, 0, 0)\" class=\"has-inline-color has-pale-pink-color\">Ranked 1st<\/mark><\/strong> in Music Task at the 2015 Signal Separation Evaluation Campaign [<a rel=\"noreferrer noopener\" href=\"https:\/\/hal.archives-ouvertes.fr\/hal-01188725\" target=\"_blank\">URL<\/a>]<\/li>\n<\/ul>\n\n\n\n<h3 class=\"wp-block-heading\" id=\"competition_organizer\">Competition Organizer<\/h3>\n\n\n\n<ul class=\"wp-block-list\">\n<li>IEEE ICASSP Music Source Restoration (MSR) Challenge 2026 [<a href=\"https:\/\/msrchallenge.com\/\" target=\"_blank\" rel=\"noreferrer noopener\">URL<\/a>][<a href=\"https:\/\/arxiv.org\/abs\/2510.10995\" target=\"_blank\" rel=\"noreferrer noopener\">benchmark<\/a>][<a href=\"https:\/\/arxiv.org\/abs\/2601.04343\" target=\"_blank\" rel=\"noreferrer noopener\">report<\/a>][<a href=\"https:\/\/huggingface.co\/datasets\/yongyizang\/MSRBench\" target=\"_blank\" rel=\"noreferrer noopener\">dataset<\/a>]<\/li>\n\n\n\n<li>Sounding Video Generation Challenge\n<ul class=\"wp-block-list\">\n<li>Sounding Video Generation (SVG) Challenge 2024 [<a href=\"https:\/\/www.aicrowd.com\/challenges\/sounding-video-generation-svg-challenge-2024\" target=\"_blank\" rel=\"noreferrer noopener\">URL<\/a>][<a href=\"https:\/\/arxiv.org\/abs\/2412.13462\" target=\"_blank\" rel=\"noreferrer noopener\">dataset<\/a>]<\/li>\n<\/ul>\n<\/li>\n\n\n\n<li>Commonsense Persona-grounded Dialogue Challenge\n<ul class=\"wp-block-list\">\n<li>Commonsense Persona-grounded Dialogue (CPD) Challenge 2025 [<a href=\"https:\/\/www.aicrowd.com\/challenges\/commonsense-persona-grounded-dialogue-challenge-2025\" target=\"_blank\" rel=\"noreferrer noopener\">URL<\/a>]<\/li>\n\n\n\n<li>Commonsense Persona-grounded Dialogue (CPD) Challenge 2023 [<a href=\"https:\/\/www.aicrowd.com\/challenges\/commonsense-persona-grounded-dialogue-challenge-2023\" target=\"_blank\" rel=\"noreferrer noopener\">URL<\/a>][<a href=\"https:\/\/arxiv.org\/abs\/2406.11228\" target=\"_blank\" rel=\"noreferrer noopener\">report<\/a>][<a href=\"https:\/\/huggingface.co\/datasets\/Sony\/ComperDial\" target=\"_blank\" rel=\"noreferrer noopener\">dataset<\/a>]<\/li>\n<\/ul>\n<\/li>\n\n\n\n<li>Music\/Sound Demixing Challenge\n<ul class=\"wp-block-list\">\n<li>Sound Demixing (SDX) Challenge 2023 [<a href=\"https:\/\/www.aicrowd.com\/challenges\/sound-demixing-challenge-2023\" target=\"_blank\" rel=\"noreferrer noopener\">URL<\/a>][<a href=\"https:\/\/arxiv.org\/abs\/2308.06979\" data-type=\"link\" data-id=\"https:\/\/arxiv.org\/abs\/2308.06979\" target=\"_blank\" rel=\"noreferrer noopener\">report MDX track<\/a>][<a href=\"https:\/\/arxiv.org\/abs\/2308.06981\" target=\"_blank\" rel=\"noreferrer noopener\">report CDX track<\/a>][<a href=\"https:\/\/sdx-workshop.github.io\/\" target=\"_blank\" rel=\"noreferrer noopener\">Workshop<\/a>]<\/li>\n\n\n\n<li>Music Demixing (MDX) Challenge 2021 [<a href=\"https:\/\/www.aicrowd.com\/challenges\/music-demixing-challenge-ismir-2021\" target=\"_blank\" rel=\"noreferrer noopener\">URL<\/a>] [<a href=\"https:\/\/www.frontiersin.org\/articles\/10.3389\/frsip.2021.808395\/full\" target=\"_blank\" rel=\"noreferrer noopener\">report<\/a>][<a href=\"https:\/\/mdx-workshop.github.io\/\" target=\"_blank\" rel=\"noreferrer noopener\">Workshop<\/a>]<\/li>\n<\/ul>\n<\/li>\n\n\n\n<li>IEEE DCASE Challenge\n<ul class=\"wp-block-list\">\n<li>DCASE2026 Challenge Task 3: \u201cSemantic Acoustic Imaging for Sound Event Localization and Detection from Spatial Audio and Audiovisual Scenes\u201d [<a href=\"https:\/\/dcase.community\/challenge2026\/task-semantic-acoustic-imaging-for-sound-event-localization-and-detection-from-spatial-audio-and-audiovisual-scenes\" target=\"_blank\" rel=\"noreferrer noopener\">URL<\/a>][<a href=\"https:\/\/zenodo.org\/records\/18171005\" target=\"_blank\" rel=\"noreferrer noopener\">dataset<\/a>]<\/li>\n\n\n\n<li>DCASE2025 Challenge Task 3: \u201cStereo Sound Event Localization and Detection in Regular Video Content\u201d [<a href=\"https:\/\/dcase.community\/challenge2025\/task-stereo-sound-event-localization-and-detection-in-regular-video-content\" target=\"_blank\" rel=\"noreferrer noopener\">URL<\/a>][<a href=\"https:\/\/arxiv.org\/abs\/2507.12042\" target=\"_blank\" rel=\"noreferrer noopener\">report<\/a>][<a href=\"https:\/\/github.com\/partha2409\/DCASE2025_seld_baseline\" target=\"_blank\" rel=\"noreferrer noopener\">code<\/a>][<a href=\"https:\/\/zenodo.org\/records\/15559774\" target=\"_blank\" rel=\"noreferrer noopener\">dataset<\/a>]<\/li>\n\n\n\n<li>DCASE2024 Challenge Task 3: \u201cAudio and Audiovisual Sound Event Localization and Detection with Source Distance Estimation\u201d [<a href=\"https:\/\/dcase.community\/challenge2024\/task-audio-and-audiovisual-sound-event-localization-and-detection-with-source-distance-estimation\" target=\"_blank\" rel=\"noreferrer noopener\">URL<\/a>][<a href=\"https:\/\/dcase.community\/documents\/workshop2024\/proceedings\/DCASE2024Workshop_Diaz-Guerra_53.pdf\" target=\"_blank\" rel=\"noreferrer noopener\">report<\/a>][<a href=\"https:\/\/zenodo.org\/record\/7880637\" target=\"_blank\" rel=\"noreferrer noopener\">dataset<\/a>]<\/li>\n\n\n\n<li>DCASE2023 Challenge Task 3: \u201cSound Event Localization and Detection Evaluated in Real Spatial Sound Scenes\u201d [<a href=\"https:\/\/dcase.community\/challenge2023\/task-sound-event-localization-and-detection-evaluated-in-real-spatial-sound-scenes\" target=\"_blank\" rel=\"noreferrer noopener\">URL<\/a>][<a href=\"https:\/\/arxiv.org\/abs\/2306.09126\" target=\"_blank\" rel=\"noreferrer noopener\">report<\/a>][<a href=\"https:\/\/zenodo.org\/record\/7880637\" target=\"_blank\" rel=\"noreferrer noopener\">dataset<\/a>]<\/li>\n\n\n\n<li>DCASE2022 Challenge Task 3: \u201cSound Event Localization and Detection Evaluated in Real Spatial Sound Scenes\u201d [<a href=\"https:\/\/dcase.community\/challenge2022\/task-sound-event-localization-and-detection-evaluated-in-real-spatial-sound-scenes\">URL<\/a>][<a href=\"https:\/\/arxiv.org\/abs\/2206.01948\" target=\"_blank\" rel=\"noreferrer noopener\">report<\/a>][<a href=\"https:\/\/zenodo.org\/record\/6600531#.Yp_dLO7P3b0\" target=\"_blank\" rel=\"noreferrer noopener\">dataset<\/a>]<\/li>\n<\/ul>\n<\/li>\n<\/ul>\n\n\n\n<h2 class=\"wp-block-heading\" id=\"granted_patents\"><strong>Granted Patents<\/strong><\/h2>\n\n\n\n<ul class=\"wp-block-list\">\n<li>US11067661B2 \u201cInformation processing device and information processing method\u201d [<a rel=\"noreferrer noopener\" href=\"https:\/\/patents.google.com\/patent\/US11067661B2\/en\" target=\"_blank\">URL<\/a>]<\/li>\n\n\n\n<li>US10924849B2 \u201cSound source separation device and method\u201d [<a rel=\"noreferrer noopener\" href=\"https:\/\/patents.google.com\/patent\/US10924849B2\/en\" target=\"_blank\">URL<\/a>]<\/li>\n\n\n\n<li>US10880638B2 \u201cSound field forming apparatus and method\u201d [<a rel=\"noreferrer noopener\" href=\"https:\/\/patents.google.com\/patent\/US10880638B2\/en\" target=\"_blank\">URL<\/a>]<\/li>\n\n\n\n<li>US10757505B2 \u201cSignal processing device, method, and program stored on a computer-readable medium, enabling a sound to be reproduced at a remote location and a different sound to be reproduced at a location neighboring the remote location\u201d [<a rel=\"noreferrer noopener\" href=\"https:\/\/patents.google.com\/patent\/US10757505B2\/en\" target=\"_blank\">URL<\/a>]<\/li>\n\n\n\n<li>US10674255B2 \u201cSound processing device, method and program\u201d [<a rel=\"noreferrer noopener\" href=\"https:\/\/patents.google.com\/patent\/US10674255B2\/en\" target=\"_blank\">URL<\/a>]<\/li>\n\n\n\n<li>US10657973B2 \u201cMethod, apparatus and system\u201d [<a rel=\"noreferrer noopener\" href=\"https:\/\/patents.google.com\/patent\/US10657973B2\/en\" target=\"_blank\">URL<\/a>]<\/li>\n\n\n\n<li>US10650841B2 \u201cSound source separation apparatus and method\u201d [<a rel=\"noreferrer noopener\" href=\"https:\/\/patents.google.com\/patent\/US10650841B2\/en\" target=\"_blank\">URL<\/a>]<\/li>\n\n\n\n<li>US10602266B2 \u201cAudio processing apparatus and method, and program\u201d [<a rel=\"noreferrer noopener\" href=\"https:\/\/patents.google.com\/patent\/US10602266B2\/en\" target=\"_blank\">URL<\/a>]<\/li>\n\n\n\n<li>US10595148B2 \u201cSound processing apparatus and method, and program\u201d [<a rel=\"noreferrer noopener\" href=\"https:\/\/patents.google.com\/patent\/US10595148B2\/en\" target=\"_blank\">URL<\/a>]<\/li>\n\n\n\n<li>US10567872B2 \u201cLocally silenced sound field forming apparatus and method\u201d [<a rel=\"noreferrer noopener\" href=\"https:\/\/patents.google.com\/patent\/US10567872B2\/en\" target=\"_blank\">URL<\/a>]<\/li>\n\n\n\n<li>US10524075B2 \u201cSound processing apparatus, method, and program\u201d [<a rel=\"noreferrer noopener\" href=\"https:\/\/patents.google.com\/patent\/US10524075B2\/en\" target=\"_blank\">URL<\/a>]<\/li>\n\n\n\n<li>US10477309B2 \u201cSound field reproduction device, sound field reproduction method, and program\u201d [<a rel=\"noreferrer noopener\" href=\"https:\/\/patents.google.com\/patent\/US10477309B2\/en\" target=\"_blank\">URL<\/a>]<\/li>\n\n\n\n<li>US10412531B2 \u201cAudio processing apparatus, method, and program\u201d [<a rel=\"noreferrer noopener\" href=\"https:\/\/patents.google.com\/patent\/US10412531B2\/en\" target=\"_blank\">URL<\/a>]<\/li>\n\n\n\n<li>US10380991B2 \u201cSignal processing device, signal processing method, and program for selectable spatial correction of multichannel audio signal\u201d [<a rel=\"noreferrer noopener\" href=\"https:\/\/patents.google.com\/patent\/US10380991B2\/en\" target=\"_blank\">URL<\/a>]<\/li>\n\n\n\n<li>US10206034B2 \u201cSound field collecting apparatus and method, sound field reproducing apparatus and method\u201d [<a rel=\"noreferrer noopener\" href=\"https:\/\/patents.google.com\/patent\/US10206034B2\/en\" target=\"_blank\">URL<\/a>]<\/li>\n\n\n\n<li>US10015615B2 \u201cSound field reproduction apparatus and method, and program\u201d [<a rel=\"noreferrer noopener\" href=\"https:\/\/patents.google.com\/patent\/US10015615B2\/en\" target=\"_blank\">URL<\/a>]<\/li>\n\n\n\n<li>US9711161B2 \u201cVoice processing apparatus, voice processing method, and program\u201d [<a rel=\"noreferrer noopener\" href=\"https:\/\/patents.google.com\/patent\/US9711161B2\/en\" target=\"_blank\">URL<\/a>]<\/li>\n\n\n\n<li>US9654872B2 \u201cInput device, signal processing method, program, and recording medium\u201d [<a rel=\"noreferrer noopener\" href=\"https:\/\/patents.google.com\/patent\/US9654872B2\/en\" target=\"_blank\">URL<\/a>]<\/li>\n\n\n\n<li>US9426564B2 \u201cAudio processing device, method and program\u201d [<a rel=\"noreferrer noopener\" href=\"https:\/\/patents.google.com\/patent\/US9426564B2\/en\" target=\"_blank\">URL<\/a>]<\/li>\n\n\n\n<li>US9406312B2 \u201cSignal processing apparatus and signal processing method, encoder and encoding method, decoder and decoding method, and program\u201d [<a href=\"https:\/\/patents.google.com\/patent\/US9406312B2\/en\" target=\"_blank\" rel=\"noreferrer noopener\">URL<\/a>]<\/li>\n\n\n\n<li>US9380398B2 \u201cSound processing apparatus, method, and program\u201d [<a rel=\"noreferrer noopener\" href=\"https:\/\/patents.google.com\/patent\/US9380398B2\/en\" target=\"_blank\">URL<\/a>]<\/li>\n\n\n\n<li>US9208795B2 \u201cFrequency band extending device and method, encoding device and method, decoding device and method, and program\u201d [<a href=\"https:\/\/patents.google.com\/patent\/US9208795B2\/en\" target=\"_blank\" rel=\"noreferrer noopener\">URL<\/a>]<\/li>\n\n\n\n<li>US8295507B2 \u201cFrequency band extending apparatus, frequency band extending method, player apparatus, playing method, program and recording medium\u201d [<a rel=\"noreferrer noopener\" href=\"https:\/\/patents.google.com\/patent\/US8295507\" target=\"_blank\">URL<\/a>]<\/li>\n<\/ul>\n\n\n\n<div style=\"height:26px\" aria-hidden=\"true\" class=\"wp-block-spacer\"><\/div>\n\n\n\n<h2 class=\"wp-block-heading\" id=\"academic_services_and_activities\"><strong>Academic Services and Activities<\/strong><\/h2>\n\n\n\n<h3 class=\"wp-block-heading\" id=\"committee_member_session_chair\">Committee Member \/ Session Chair<\/h3>\n\n\n\n<ul class=\"wp-block-list\">\n<li>IEEE IJCNN\n<ul class=\"wp-block-list\">\n<li><mark style=\"background-color:rgba(0, 0, 0, 0)\" class=\"has-inline-color has-pale-pink-color\">Session Chair<\/mark> at IEEE IJCNN 2025 for \u201cLeveraging Foundation Models for Efficiently Developing Generative Models\u201d [<a href=\"https:\/\/2025.ijcnn.org\/authors\/special-sessions#session-3-56\" target=\"_blank\" rel=\"noreferrer noopener\">URL<\/a>]<\/li>\n\n\n\n<li><mark style=\"background-color:rgba(0, 0, 0, 0)\" class=\"has-inline-color has-pale-pink-color\">Competition Chair<\/mark> at IEEE IJCNN 2025 [<a href=\"https:\/\/2025.ijcnn.org\/about\/organizing-committee\" target=\"_blank\" rel=\"noreferrer noopener\">URL<\/a>]<\/li>\n<\/ul>\n<\/li>\n\n\n\n<li>IEEE Audio and Acoustic Signal Processing <mark style=\"background-color:rgba(0, 0, 0, 0)\" class=\"has-inline-color has-pale-pink-color\">Technical Committee (AASP TC) Member<\/mark> 2023<span style=\"color: rgb(0, 0, 0);\">\u2013<\/span>2026 [<a href=\"https:\/\/www.sony.com\/en\/SonyInfo\/research\/news\/article002\/\" target=\"_blank\" rel=\"noreferrer noopener\">URL<\/a>]<\/li>\n\n\n\n<li>IEEE ICCE Japan <mark style=\"background-color:rgba(0, 0, 0, 0)\" class=\"has-inline-color has-pale-pink-color\">Program Committee Chair<\/mark> 2021<span style=\"color: rgb(0, 0, 0);\">\u2013<\/span>2023<\/li>\n\n\n\n<li>IEEE ICASSP\n<ul class=\"wp-block-list\">\n<li><mark style=\"background-color:rgba(0, 0, 0, 0)\" class=\"has-inline-color has-pale-pink-color\">Session Chair<\/mark> at IEEE ICASSP 2024  for\u201cGenerative Semantic Communication: How Generative Models Enhance Semantic Communications\u201d [<a href=\"https:\/\/sites.google.com\/uniroma1.it\/icassp2024-special-session\/home-page\" target=\"_blank\" rel=\"noreferrer noopener\">URL<\/a>]<\/li>\n\n\n\n<li><mark><mark style=\"background-color:rgba(0, 0, 0, 0)\" class=\"has-inline-color has-pale-pink-color\">Session Chair<\/mark><\/mark> at IEEE ICASSP 2023 for \u201cDiffusion-based Generative Models for Audio and Speech\u201d [<a href=\"https:\/\/2023.ieeeicassp.org\/wp-content\/uploads\/sites\/443\/icassp-2023-program_v5.pdf\" target=\"_blank\" rel=\"noreferrer noopener\">URL<\/a>]<\/li>\n\n\n\n<li><mark><mark style=\"background-color:rgba(0, 0, 0, 0)\" class=\"has-inline-color has-pale-pink-color\">Session Chair<\/mark><\/mark> at IEEE ICASSP 2022 for Signal Processing and Neural Approaches for Soundscapes (SiNApS)\u201d [<a href=\"https:\/\/2022.ieeeicassp.org\/view_session.php?SessionID=1312\" target=\"_blank\" rel=\"noreferrer noopener\">URL<\/a>]<\/li>\n\n\n\n<li><mark style=\"background-color:rgba(0, 0, 0, 0)\" class=\"has-inline-color has-pale-pink-color\">Session Chair<\/mark> at IEEE ICASSP 2020 for \u201cActive Control of Acoustic Noise over Spatial Regions\u201d [<a href=\"https:\/\/cmsworkshops.com\/ICASSP2020\/Papers\/ViewSession.asp?Sessionid=1183\" target=\"_blank\" rel=\"noreferrer noopener\">URL<\/a>]<\/li>\n<\/ul>\n<\/li>\n<\/ul>\n\n\n\n<h3 class=\"wp-block-heading\" id=\"phd_supervision\">PhD Supervision<\/h3>\n\n\n\n<ul class=\"wp-block-list\">\n<li>TRAMUCA: Transparency in AI-powered Music Creation Algorithms, 4-year Fully-funded PhD Studentship by Sony and MTG-UPF, Joint Supervision with Dr. Emilia G\u00f3mez and Dr. Xavier Serra [<a href=\"https:\/\/www.upf.edu\/web\/mtg\/ongoing-projects\/-\/asset_publisher\/DneGVrJZ7tmE\/content\/id\/261546460\/maximized\" target=\"_blank\" rel=\"noreferrer noopener\">URL<\/a>]\n<ul class=\"wp-block-list\">\n<li>\u201cTransparency in Music-Generative AI: A Systematic Literature Review\u201d [<a href=\"https:\/\/www.researchsquare.com\/article\/rs-3708077\/v1\" target=\"_blank\" rel=\"noreferrer noopener\">preprint<\/a>]<\/li>\n\n\n\n<li>Roser Batlle-Roca, Wei-Hsiang Liao, Xavier Serra, <span style=\"text-decoration: underline;\">Yuki Mitsufuji<\/span>, Emilia G\u00f3mez, \u201cTowards Assessing Data Replication in Music Generation with Music Similarity Metrics on Raw Audio,\u201d in Proc. International Society for Music Information Retrieval (<mark><mark style=\"background-color:rgba(0, 0, 0, 0)\" class=\"has-inline-color has-pale-pink-color\">ISMIR<\/mark><\/mark>) Conference, pp. 1004<span style=\"color: rgb(0, 0, 0);\">\u2013<\/span>1011, 2024 [<a href=\"https:\/\/zenodo.org\/records\/14877501\" target=\"_blank\" rel=\"noreferrer noopener\">ISMIR<\/a>][<a href=\"https:\/\/arxiv.org\/abs\/2407.14364\" target=\"_blank\" rel=\"noreferrer noopener\">arXiv<\/a>][<a href=\"https:\/\/github.com\/roserbatlleroca\/mira\" target=\"_blank\" rel=\"noreferrer noopener\">code<\/a>]<\/li>\n<\/ul>\n<\/li>\n<\/ul>\n\n\n\n<h3 class=\"wp-block-heading\" id=\"guest_lectures_at_university\">Guest Lectures at University<\/h3>\n\n\n\n<ul class=\"wp-block-list\">\n<li>\u201cProtective AI for Creators,\u201d MARL, New York University, Apr. 28, 2026<\/li>\n\n\n\n<li>\u201cAI for Creators: Pushing Creative Abilities to the Next Level,\u201d GenAudio &amp; AI, <mark style=\"background-color:rgba(0, 0, 0, 0)\" class=\"has-inline-color has-pale-pink-color\">New York University<\/mark>, Nov. 10, 2025 [<a href=\"https:\/\/www.instagram.com\/p\/DQlHMY7jVJy\/\" target=\"_blank\" rel=\"noreferrer noopener\">URL<\/a>]<\/li>\n\n\n\n<li>\u201cAI for Creators: Pushing Creative Abilities to the Next Level,\u201d MILA, <mark style=\"background-color:rgba(0, 0, 0, 0)\" class=\"has-inline-color has-pale-pink-color\">University of Montreal<\/mark>, Jun. 19, 2025 [<a href=\"https:\/\/poonehmousavi.github.io\/rg.html#schedule\" target=\"_blank\" rel=\"noreferrer noopener\">URL<\/a>]<\/li>\n\n\n\n<li>\u201cAI for Creators: Pushing Creative Abilities to the Next Level,\u201d DAPLab, <mark style=\"background-color:rgba(0, 0, 0, 0)\" class=\"has-inline-color has-pale-pink-color\">Columbia University<\/mark>, Mar. 11, 2025<\/li>\n\n\n\n<li>\u201cAI for Creators: Pushing Creative Abilities to the Next Level,\u201d MARL, New York University, Feb. 27, 2025<\/li>\n\n\n\n<li>\u201cAI for Creators: Pushing Creative Abilities to the Next Level,\u201d Matsuo Lab, <mark style=\"background-color:rgba(0, 0, 0, 0)\" class=\"has-inline-color has-pale-pink-color\">the University of Tokyo<\/mark>, Dec. 12, 2024 [<a href=\"https:\/\/weblab.t.u-tokyo.ac.jp\/lecture\/course-list\/world-model\/\" target=\"_blank\" rel=\"noreferrer noopener\">URL<\/a>]<\/li>\n\n\n\n<li>\u201cAI for Creators: Pushing Creative Abilities to the Next Level,\u201d Kakei Lab, the University of Tokyo, Dec. 10, 2024 [<a href=\"https:\/\/www.iii.u-tokyo.ac.jp\/event\/20241202event2\" target=\"_blank\" rel=\"noreferrer noopener\">URL<\/a>]<\/li>\n\n\n\n<li>\u201cDeep Generative Models for Audio Applications,\u201d AI Research Center, National Institute of Advanced Industrial Science and Technology (<mark style=\"background-color:rgba(0, 0, 0, 0)\" class=\"has-inline-color has-pale-pink-color\">AIST<\/mark>), Mar. 22, 2024 [<a href=\"https:\/\/www.airc.aist.go.jp\/seminar_detail\/seminar_076.html\" target=\"_blank\" rel=\"noreferrer noopener\">URL<\/a>]<\/li>\n\n\n\n<li>\u201cDeep Generative Models for Audio Applications,\u201d <mark style=\"background-color:rgba(0, 0, 0, 0)\" class=\"has-inline-color has-pale-pink-color\">T\u00e9l\u00e9comParis<\/mark> (Audio\/ADASP group), Jan. 25, 2024 [<a href=\"https:\/\/listen.telecom-paris.fr\/en\/agenda\/monthly-talk-yuki-mitsufuji-sony-research\/\" target=\"_blank\" rel=\"noreferrer noopener\">URL<\/a>]<\/li>\n\n\n\n<li>\u201cAI x Creators: Pushing Creative Abilities to the Next Level,\u201d Matsuo Lab, the University of Tokyo, Nov. 24, 2023 [<a href=\"https:\/\/deeplearning.jp\/en\/lectures\/world-model-2023\/\" target=\"_blank\" rel=\"noreferrer noopener\">URL<\/a>]<\/li>\n\n\n\n<li>\u201cAI &amp; Network Communication Systems\u201d, 7-lecture Course, <mark style=\"background-color:rgba(0, 0, 0, 0)\" class=\"has-inline-color has-pale-pink-color\">Tokyo Institute of Technology<\/mark>, 2023 [<a href=\"http:\/\/www.ocw.titech.ac.jp\/index.php?module=General&amp;action=T0300&amp;GakubuCD=2&amp;GakkaCD=321717&amp;KeiCD=17&amp;course=17&amp;KougiCD=202334374&amp;Nendo=2023&amp;vid=03&amp;lang=EN\" target=\"_blank\" rel=\"noreferrer noopener\">URL<\/a>]<\/li>\n\n\n\n<li>\u201cAI x Creators: Pushing Creative Abilities to the Next Level,\u201d Matsuo Lab, the University of Tokyo, Dec. 16, 2022 [<a href=\"https:\/\/deeplearning.jp\/en\/lectures\/world-model-2022\/\" target=\"_blank\" rel=\"noreferrer noopener\">URL<\/a>]<\/li>\n\n\n\n<li>\u201cAI &amp; Network Communication Systems\u201d, 7-lecture Course, Tokyo Institute of Technology, 2022 [<a href=\"http:\/\/www.ocw.titech.ac.jp\/index.php?module=General&amp;action=T0300&amp;GakubuCD=2&amp;GakkaCD=321717&amp;KeiCD=17&amp;course=17&amp;KougiCD=202234374&amp;Nendo=2022&amp;lang=EN&amp;vid=03\" target=\"_blank\" rel=\"noreferrer noopener\">URL<\/a>]<\/li>\n\n\n\n<li>\u201cAI x Creators: Pushing Creative Abilities to the Next Level,\u201d Matsuo Lab, the University of Tokyo, Feb. 16, 2022 [<a href=\"https:\/\/deeplearning.jp\/en\/lectures\/world-model-2021\/\" target=\"_blank\" rel=\"noreferrer noopener\">URL<\/a>]<\/li>\n\n\n\n<li>\u201cContent Creation by Cutting Edge AI-powered Music Technology,\u201d Tokyo Institute of Technology, Dec. 1, 2021 [<a href=\"http:\/\/www.ee.e.titech.ac.jp\/jp\/edu\/eefst\/Flyer_SONY_Lecture_draft_Mitsufuji_20211109.pdf\" target=\"_blank\" rel=\"noreferrer noopener\">URL<\/a>]<\/li>\n\n\n\n<li>\u201cAI x Creators: Pushing Creative Abilities to the Next Level,\u201d <mark style=\"background-color:rgba(0, 0, 0, 0)\" class=\"has-inline-color has-pale-pink-color\">Keio University<\/mark>, Oct. 21, 2021<\/li>\n<\/ul>\n\n\n\n<h3 class=\"wp-block-heading\" id=\"workshops\">Workshops<\/h3>\n\n\n\n<ol class=\"wp-block-list\">\n<li>Organizer at NeurIPS 2025 Workshop on Generative and Protective AI for Content Creation (<mark style=\"background-color:rgba(0, 0, 0, 0)\" class=\"has-inline-color has-pale-pink-color\">NeurIPS GenProCC<\/mark>) [<a href=\"https:\/\/genprocc.github.io\/\" target=\"_blank\" rel=\"noreferrer noopener\">URL<\/a>]<\/li>\n\n\n\n<li>Organizer at ICCV 2025 Workshop on Generative AI for Audio-Visual Content Creation (<mark style=\"background-color:rgba(0, 0, 0, 0)\" class=\"has-inline-color has-pale-pink-color\">ICCV Gen4AVC<\/mark>) [<a href=\"https:\/\/gen4avc.github.io\/\" target=\"_blank\" rel=\"noreferrer noopener\">URL<\/a>]<\/li>\n\n\n\n<li>Organizer at ECCV 2024 Workshop on Audio- Visual Generation and Learning (<mark style=\"background-color:rgba(0, 0, 0, 0)\" class=\"has-inline-color has-pale-pink-color\">ECCV AVGenL<\/mark>) [<a href=\"https:\/\/sites.google.com\/view\/avgenl\" target=\"_blank\" rel=\"noreferrer noopener\">URL<\/a>]<\/li>\n<\/ol>\n\n\n\n<h3 class=\"wp-block-heading\" id=\"tutorials\">Tutorials<\/h3>\n\n\n\n<ol class=\"wp-block-list\">\n<li>\u201cThe Principles of Diffusion Models: From Origins to Real-Time Diffusion &amp; Tokenized Vision Models,\u201d <mark style=\"background-color:rgba(0, 0, 0, 0)\" class=\"has-inline-color has-pale-pink-color\">CVPR 2026<\/mark> [<a href=\"https:\/\/sites.google.com\/view\/cvpr26-principles-of-diffusion\/home\" target=\"_blank\" rel=\"noreferrer noopener\">tutorial<\/a>][<a href=\"https:\/\/cvpr.thecvf.com\/virtual\/2026\/tutorial\/36147\" target=\"_blank\" rel=\"noreferrer noopener\">URL<\/a>]<\/li>\n\n\n\n<li>\u201cDiffusion Models and Flows,\u201d <mark style=\"background-color:rgba(0, 0, 0, 0)\" class=\"has-inline-color has-pale-pink-color\">IJCNN 2025 Tutorial<\/mark> [<a href=\"https:\/\/sites.google.com\/view\/diffusion-model-tutorial-ijcnn\/home\" target=\"_blank\" rel=\"noreferrer noopener\">tutorial<\/a>]<\/li>\n\n\n\n<li>\u201cTransforming Chaos into Harmony: Diffusion Models in Audio Signal Processing,\u201d <mark style=\"background-color:rgba(0, 0, 0, 0)\" class=\"has-inline-color has-pale-pink-color\">ICASSP 2025 Tutorial<\/mark> [<a href=\"https:\/\/2025.ieeeicassp.org\/accepted-tutorials\/#1735042350198-a786fb63-8c6b\" target=\"_blank\" rel=\"noreferrer noopener\">ICASSP<\/a>][<a href=\"https:\/\/sites.google.com\/view\/diffusionmodeltutorialicassp25\/home\" target=\"_blank\" rel=\"noreferrer noopener\">tutorial<\/a>][<a href=\"https:\/\/drive.google.com\/file\/d\/1gxNUg7NMEn0Ric-MIfDDRlFfN8aeFuy_\/view\" target=\"_blank\" rel=\"noreferrer noopener\">pdf<\/a>][<a href=\"https:\/\/drive.google.com\/file\/d\/1cSI2csdMKQM-QcBi8NBdiTK8jDPLU8VI\/view\" target=\"_blank\" rel=\"noreferrer noopener\">video<\/a>]<\/li>\n\n\n\n<li>\u201cFrom White Noise to Symphony: Diffusion Models for Music and Sound,\u201d <mark style=\"background-color:rgba(0, 0, 0, 0)\" class=\"has-inline-color has-pale-pink-color\">ISMIR 2024 Tutorial<\/mark> [<a href=\"https:\/\/ismir2024.ismir.net\/tutorials\" target=\"_blank\" rel=\"noreferrer noopener\">ISMIR<\/a>][<a href=\"https:\/\/sites.google.com\/view\/diffusion-tutorial-ismir24\/home\" target=\"_blank\" rel=\"noreferrer noopener\">tutorial<\/a>][<a href=\"https:\/\/github.com\/ChiehHsinJesseLai\/ISMIR24DiffusionModelTutorial?tab=readme-ov-file\" target=\"_blank\" rel=\"noreferrer noopener\">gitpage<\/a>][<a href=\"https:\/\/github.com\/ChiehHsinJesseLai\/ISMIR24DiffusionModelTutorial\/blob\/main\/tutorial_diffusion_model_all.pdf\" target=\"_blank\" rel=\"noreferrer noopener\">pdf<\/a>][<a href=\"https:\/\/drive.google.com\/file\/d\/14UKMFCrd3kyBjCVvd1qaAUPzZWah5m8x\/view\" target=\"_blank\" rel=\"noreferrer noopener\">video<\/a>][<a href=\"https:\/\/github.com\/koichi-saito-sony\/ismir2024_tutorial_demo\" target=\"_blank\" rel=\"noreferrer noopener\">notebook<\/a>]<\/li>\n<\/ol>\n\n\n\n<h3 class=\"wp-block-heading\" id=\"keynote_speeches\">Keynote Speeches<\/h3>\n\n\n\n<ol class=\"wp-block-list\">\n<li>\u201cAI for Creators: Pushing Creative Abilities to the Next Level,\u201d IEEE S&amp;P Workshop Artwork Security and Provenance in the Age of AI (<mark style=\"background-color:rgba(0, 0, 0, 0)\" class=\"has-inline-color has-pale-pink-color\">ArtSec<\/mark>), May. 2026 [<a href=\"https:\/\/artsec26.ieee-security.org\/\" target=\"_blank\" rel=\"noreferrer noopener\">URL<\/a>]<\/li>\n\n\n\n<li>\u201cAI for Creators: Pushing Creative Abilities to the Next Level,\u201d ICASSP Workshop on Speech, Music and Mind (<mark style=\"background-color:rgba(0, 0, 0, 0)\" class=\"has-inline-color has-pale-pink-color\">ICASSP SMM<\/mark>), May. 2026 [<a href=\"https:\/\/smm26.iiit.ac.in\/\" target=\"_blank\" rel=\"noreferrer noopener\">URL<\/a>]<\/li>\n\n\n\n<li>\u201cAdvances in Audiovisual Generative Models,\u201d ICCV Workshop on AI for Content Generation, Quality Enhancement and Streaming (<strong><mark style=\"background-color:rgba(0, 0, 0, 0)\" class=\"has-inline-color has-pale-pink-color\">ICCV <\/mark><\/strong><mark style=\"background-color:rgba(0, 0, 0, 0)\" class=\"has-inline-color has-pale-pink-color\">AIGENS<\/mark>), Oct. 2025 [<a href=\"https:\/\/ai4streaming-workshop.github.io\/\" target=\"_blank\" rel=\"noreferrer noopener\">URL<\/a>]<\/li>\n<\/ol>\n\n\n\n<div style=\"height:26px\" aria-hidden=\"true\" class=\"wp-block-spacer\"><\/div>\n\n\n\n<h2 class=\"wp-block-heading\" id=\"invited_talks_and_media\"><strong>Invited Talks and Media<\/strong><\/h2>\n\n\n\n<h3 class=\"wp-block-heading\" id=\"invited_talks\">Invited Talks<\/h3>\n\n\n\n<ol class=\"wp-block-list\">\n<li>\u201cArt Content Creation: When Demands are Met by Pipelines (or Not),\u201d Creative AI Panel at <mark style=\"background-color:rgba(0, 0, 0, 0)\" class=\"has-inline-color has-pale-pink-color\">NeurIPS 2025<\/mark>, Dec. 2025 [<a href=\"https:\/\/neurips.cc\/virtual\/2025\/loc\/san-diego\/panel\/131738\" target=\"_blank\" rel=\"noreferrer noopener\">URL<\/a>]<\/li>\n\n\n\n<li>\u201cAI for Creators: Pushing Creative Abilities to the Next Level,\u201d Speech and Audio in the Northeast (<mark style=\"background-color:rgba(0, 0, 0, 0)\" class=\"has-inline-color has-pale-pink-color\">SANE<\/mark>) Workshop, Nov. 2025 [<a href=\"https:\/\/www.saneworkshop.org\/sane2025\/\" target=\"_blank\" rel=\"noreferrer noopener\">URL<\/a>][<a href=\"https:\/\/youtu.be\/HAQeX7AMt3k?list=PLBJWRPcgwk7s543ajMnlZjlmFYxRuVMqM\" target=\"_blank\" rel=\"noreferrer noopener\">video<\/a>]<\/li>\n\n\n\n<li>\u201cAI in Music Production: Pioneering the Future,\u201d <mark style=\"background-color:rgba(0, 0, 0, 0)\" class=\"has-inline-color has-pale-pink-color\">Soundtrax<\/mark>, Oct. 2025 [<a href=\"https:\/\/www.soundtrax.org\/festival-schedule\" target=\"_blank\" rel=\"noreferrer noopener\">URL<\/a>]<\/li>\n\n\n\n<li>\u201cState of AI in Music,\u201d <strong><mark style=\"background-color:rgba(0, 0, 0, 0)\" class=\"has-inline-color has-pale-pink-color\">Mondo.NYC<\/mark><\/strong>, Oct. 2025 [<a href=\"https:\/\/www.mondo.nyc\/2020-panels\/state-of-ai-in-music\" target=\"_blank\" rel=\"noreferrer noopener\">URL<\/a>][<a href=\"https:\/\/www.mondo.nyc\/speakers-2022\/yuki-mitsufuji\" target=\"_blank\" rel=\"noreferrer noopener\">bio<\/a>]<\/li>\n\n\n\n<li>\u201cAI for Creators: Pushing Creative Abilities to the Next Level,\u201d ISMIR Workshop on Large Language Models for Music &amp; Audio (<mark style=\"background-color:rgba(0, 0, 0, 0)\" class=\"has-inline-color has-pale-pink-color\">ISMIR LLM4Music<\/mark>), Sep. 2025 [<a href=\"https:\/\/m-a-p.ai\/LLM4Music\/\" target=\"_blank\" rel=\"noreferrer noopener\">URL<\/a>]<\/li>\n\n\n\n<li>\u201cAI for Creators: Pushing Creative Abilities to the Next Level,\u201d <mark style=\"background-color:rgba(0, 0, 0, 0)\" class=\"has-inline-color has-pale-pink-color\">Wallifornia MusicTech<\/mark>, Jul. 2025 [<a href=\"https:\/\/walliforniamusictech.com\/conference\/#speakers\" target=\"_blank\" rel=\"noreferrer noopener\">URL<\/a>][<a href=\"https:\/\/www.youtube.com\/watch?v=jTVE2ABxRYg\" target=\"_blank\" rel=\"noreferrer noopener\">video<\/a>]<\/li>\n\n\n\n<li>\u201cArtists\u2019 Talk evala\u2002vol. 1,\u201d Feb. 2025 [<a href=\"https:\/\/www.ntticc.or.jp\/en\/exhibitions\/2025\/artists-talk-evala-feb-15-2025\/\" target=\"_blank\" rel=\"noreferrer noopener\">URL<\/a>][<a href=\"https:\/\/www.ntticc.or.jp\/en\/hive\/artist-talk\/20250215\/\" target=\"_blank\" rel=\"noreferrer noopener\">video<\/a>]<\/li>\n\n\n\n<li>\u201cCopying and Attributing Training Data in Audio Generative Models,\u201d <mark style=\"background-color:rgba(0, 0, 0, 0)\" class=\"has-inline-color has-pale-pink-color\">AES Show NY<\/mark>, Oct. 2024 [<a href=\"https:\/\/sched.co\/1k8IP\" target=\"_blank\" rel=\"noreferrer noopener\">URL<\/a>]<\/li>\n\n\n\n<li>\u201cHow AI is Shaking up the Music Industry,\u201d <mark style=\"background-color:rgba(0, 0, 0, 0)\" class=\"has-inline-color has-pale-pink-color\">MIDEM <\/mark>Digital, Nov. 2021 [<a href=\"https:\/\/youtu.be\/MH2HNIk9g1E\" target=\"_blank\" rel=\"noreferrer noopener\">URL<\/a>]<\/li>\n\n\n\n<li>\u201cAI &amp; the Future of Television Part 1: Content Production,\u201d <mark style=\"background-color:rgba(0, 0, 0, 0)\" class=\"has-inline-color has-pale-pink-color\">MIPCOM<\/mark> Online+, Oct. 2020<\/li>\n<\/ol>\n\n\n<div class=\"su-image-carousel  su-image-carousel-has-spacing su-image-carousel-has-lightbox su-image-carousel-has-outline su-image-carousel-adaptive su-image-carousel-slides-style-default su-image-carousel-controls-style-dark su-image-carousel-align-none\" style=\"max-width:100%\" data-flickity-options='{\"groupCells\":true,\"cellSelector\":\".su-image-carousel-item\",\"adaptiveHeight\":true,\"cellAlign\":\"left\",\"prevNextButtons\":true,\"pageDots\":true,\"autoPlay\":5000,\"imagesLoaded\":true,\"contain\":false,\"selectedAttraction\":0.007,\"friction\":0.25}' id=\"su_image_carousel_69fb213d30838\"><div class=\"su-image-carousel-item\"><div class=\"su-image-carousel-item-content\"><a href=\"https:\/\/www.yukimitsufuji.com\/wp-content\/uploads\/2021\/11\/MIDEM2021_HITLAB.jpg\" target=\"_blank\" rel=\"noopener noreferrer\" data-caption=\"\"><img loading=\"lazy\" decoding=\"async\" width=\"2048\" height=\"1152\" src=\"https:\/\/www.yukimitsufuji.com\/wp-content\/uploads\/2021\/11\/MIDEM2021_HITLAB.jpg\" class=\"\" alt=\"\" srcset=\"https:\/\/www.yukimitsufuji.com\/wp-content\/uploads\/2021\/11\/MIDEM2021_HITLAB.jpg 2048w, https:\/\/www.yukimitsufuji.com\/wp-content\/uploads\/2021\/11\/MIDEM2021_HITLAB-300x169.jpg 300w, https:\/\/www.yukimitsufuji.com\/wp-content\/uploads\/2021\/11\/MIDEM2021_HITLAB-1024x576.jpg 1024w, https:\/\/www.yukimitsufuji.com\/wp-content\/uploads\/2021\/11\/MIDEM2021_HITLAB-768x432.jpg 768w, https:\/\/www.yukimitsufuji.com\/wp-content\/uploads\/2021\/11\/MIDEM2021_HITLAB-1536x864.jpg 1536w\" sizes=\"auto, (max-width: 2048px) 100vw, 2048px\" \/><\/a><\/div><\/div><div class=\"su-image-carousel-item\"><div class=\"su-image-carousel-item-content\"><a href=\"https:\/\/www.yukimitsufuji.com\/wp-content\/uploads\/2021\/11\/MIPCOM_2020.jpg\" target=\"_blank\" rel=\"noopener noreferrer\" data-caption=\"\"><img loading=\"lazy\" decoding=\"async\" width=\"1200\" height=\"630\" src=\"https:\/\/www.yukimitsufuji.com\/wp-content\/uploads\/2021\/11\/MIPCOM_2020.jpg\" class=\"\" alt=\"\" srcset=\"https:\/\/www.yukimitsufuji.com\/wp-content\/uploads\/2021\/11\/MIPCOM_2020.jpg 1200w, https:\/\/www.yukimitsufuji.com\/wp-content\/uploads\/2021\/11\/MIPCOM_2020-300x158.jpg 300w, https:\/\/www.yukimitsufuji.com\/wp-content\/uploads\/2021\/11\/MIPCOM_2020-1024x538.jpg 1024w, https:\/\/www.yukimitsufuji.com\/wp-content\/uploads\/2021\/11\/MIPCOM_2020-768x403.jpg 768w\" sizes=\"auto, (max-width: 1200px) 100vw, 1200px\" \/><\/a><\/div><\/div><div class=\"su-image-carousel-item\"><div class=\"su-image-carousel-item-content\"><a href=\"https:\/\/www.yukimitsufuji.com\/wp-content\/uploads\/2021\/11\/MIDEM2021_meet_asia.jpg\" target=\"_blank\" rel=\"noopener noreferrer\" data-caption=\"\"><img loading=\"lazy\" decoding=\"async\" width=\"800\" height=\"418\" src=\"https:\/\/www.yukimitsufuji.com\/wp-content\/uploads\/2021\/11\/MIDEM2021_meet_asia.jpg\" class=\"\" alt=\"\" srcset=\"https:\/\/www.yukimitsufuji.com\/wp-content\/uploads\/2021\/11\/MIDEM2021_meet_asia.jpg 800w, https:\/\/www.yukimitsufuji.com\/wp-content\/uploads\/2021\/11\/MIDEM2021_meet_asia-300x157.jpg 300w, https:\/\/www.yukimitsufuji.com\/wp-content\/uploads\/2021\/11\/MIDEM2021_meet_asia-768x401.jpg 768w\" sizes=\"auto, (max-width: 800px) 100vw, 800px\" \/><\/a><\/div><\/div><div class=\"su-image-carousel-item\"><div class=\"su-image-carousel-item-content\"><a href=\"https:\/\/www.yukimitsufuji.com\/wp-content\/uploads\/2025\/10\/MondoNYC_Panel.jpg\" target=\"_blank\" rel=\"noopener noreferrer\" data-caption=\"\"><img loading=\"lazy\" decoding=\"async\" width=\"800\" height=\"1000\" src=\"https:\/\/www.yukimitsufuji.com\/wp-content\/uploads\/2025\/10\/MondoNYC_Panel.jpg\" class=\"\" alt=\"\" srcset=\"https:\/\/www.yukimitsufuji.com\/wp-content\/uploads\/2025\/10\/MondoNYC_Panel.jpg 800w, https:\/\/www.yukimitsufuji.com\/wp-content\/uploads\/2025\/10\/MondoNYC_Panel-240x300.jpg 240w, https:\/\/www.yukimitsufuji.com\/wp-content\/uploads\/2025\/10\/MondoNYC_Panel-768x960.jpg 768w\" sizes=\"auto, (max-width: 800px) 100vw, 800px\" \/><\/a><\/div><\/div><div class=\"su-image-carousel-item\"><div class=\"su-image-carousel-item-content\"><a href=\"https:\/\/www.yukimitsufuji.com\/wp-content\/uploads\/2025\/10\/ICCV2025_AIGENS.jpg\" target=\"_blank\" rel=\"noopener noreferrer\" data-caption=\"\"><img loading=\"lazy\" decoding=\"async\" width=\"1920\" height=\"1080\" src=\"https:\/\/www.yukimitsufuji.com\/wp-content\/uploads\/2025\/10\/ICCV2025_AIGENS.jpg\" class=\"\" alt=\"\" srcset=\"https:\/\/www.yukimitsufuji.com\/wp-content\/uploads\/2025\/10\/ICCV2025_AIGENS.jpg 1920w, https:\/\/www.yukimitsufuji.com\/wp-content\/uploads\/2025\/10\/ICCV2025_AIGENS-300x169.jpg 300w, https:\/\/www.yukimitsufuji.com\/wp-content\/uploads\/2025\/10\/ICCV2025_AIGENS-1024x576.jpg 1024w, https:\/\/www.yukimitsufuji.com\/wp-content\/uploads\/2025\/10\/ICCV2025_AIGENS-768x432.jpg 768w, https:\/\/www.yukimitsufuji.com\/wp-content\/uploads\/2025\/10\/ICCV2025_AIGENS-1536x864.jpg 1536w\" sizes=\"auto, (max-width: 1920px) 100vw, 1920px\" \/><\/a><\/div><\/div><\/div><script id=\"su_image_carousel_69fb213d30838_script\">if(window.SUImageCarousel){setTimeout(function() {window.SUImageCarousel.initGallery(document.getElementById(\"su_image_carousel_69fb213d30838\"))}, 0);}var su_image_carousel_69fb213d30838_script=document.getElementById(\"su_image_carousel_69fb213d30838_script\");if(su_image_carousel_69fb213d30838_script){su_image_carousel_69fb213d30838_script.parentNode.removeChild(su_image_carousel_69fb213d30838_script);}<\/script>\n\n\n\n<div style=\"height:28px\" aria-hidden=\"true\" class=\"wp-block-spacer\"><\/div>\n\n\n\n<h3 class=\"wp-block-heading\" id=\"web_articles\">Web Articles<\/h3>\n\n\n\n<ol class=\"wp-block-list\">\n<li>Mar. 2026, Courrier International, \u201cTechnologies. Au Japon, Sony d\u00e9veloppe des outils pour prot\u00e9ger les cr\u00e9ateurs du pillage par l\u2019IA\u201d [<a href=\"https:\/\/www.courrierinternational.com\/article\/technologies-au-japon-sony-developpe-des-outils-pour-proteger-les-createurs-du-pillage-par-l-ia_242113\" target=\"_blank\" rel=\"noreferrer noopener\">URL<\/a>]<\/li>\n\n\n\n<li>Jul. 2025, AIhub, \u201cInterview with Yuki Mitsufuji: Text-to-sound generation\u201d [<a href=\"https:\/\/aihub.org\/2025\/07\/29\/interview-with-yuki-mitsufuji-text-to-sound-generation\/\" target=\"_blank\" rel=\"noreferrer noopener\">URL<\/a>]<\/li>\n\n\n\n<li>Apr. 2025, Stories by Sony, \u201cBuilding Technologies to Expand the Future of Sound for Creators\u201d [<a href=\"https:\/\/www.sony.com\/en\/SonyInfo\/technology\/stories\/entries\/interview_de_mitsufuji\/\" target=\"_blank\" rel=\"noreferrer noopener\">URL<\/a>]<\/li>\n\n\n\n<li>Mar. 2025, Sony AI Blog, \u201cUnlocking the Future of Video-to-Audio Synthesis: Inside the MMAudio Model\u201d [<a href=\"https:\/\/ai.sony\/blog\/Unlocking-the-Future-of-Video-to-Audio-Synthesis-Inside-the-MMAudio-Model\/\" target=\"_blank\" rel=\"noreferrer noopener\">URL<\/a>]<\/li>\n\n\n\n<li>Jan. 2025, AIhub, \u201cInterview with Yuki Mitsufuji: Improving AI image generation\u201d [<a href=\"https:\/\/aihub.org\/2025\/01\/23\/interview-with-yuki-mitsufuji-improving-ai-image-generation\/\" target=\"_blank\" rel=\"noreferrer noopener\">URL<\/a>]<\/li>\n\n\n\n<li>Aug. 2024, Sony AI Blog, \u201cSights on AI: Yuki Mitsufuji Shares Inspiration for AI Research into Music and Sound\u201d [<a href=\"https:\/\/www.ai.sony\/blog\/Sights-on-AI-Yuki-Mitsufuji\/\" target=\"_blank\" rel=\"noreferrer noopener\">URL<\/a>]<\/li>\n\n\n\n<li>May. 2024, Sony AI Blog, \u201cRevolutionizing Creativity with CTM and SAN: Sony AI&#8217;s Groundbreaking Advances in Generative AI for Creators\u201d [<a href=\"https:\/\/ai.sony\/blog\/Revolutionizing-Creativity-with-CTM-and-SAN-Sony-AIs-Groundbreaking-Advances-in-Generative-AI-for-Creators\/\" target=\"_blank\" rel=\"noreferrer noopener\">URL<\/a>]<\/li>\n\n\n\n<li>Feb. 2024, InsideBIGDATA, \u201cSony AI Big Data Industry Predictions for 2024\u201d [<a href=\"https:\/\/insidebigdata.com\/2024\/02\/05\/sony-ai-big-data-industry-predictions-for-2024\/\" target=\"_blank\" rel=\"noreferrer noopener\">URL<\/a>]<\/li>\n\n\n\n<li>Nov. 2020, Reviving the Sound of Classic Movies with AI \u201cAI Sound Separation\u201d [<a href=\"https:\/\/www.sony.net\/SonyInfo\/technology\/stories\/AI_Sound_Separation\/\" target=\"_blank\" rel=\"noreferrer noopener\">URL<\/a>]<\/li>\n\n\n\n<li>Oct. 2020, New Excitement and Fun Ways to Enjoy Video and Audio Content \u201cAI Sound Separation x Entertainment\u201d<\/li>\n\n\n\n<li>The freedom to extract audio gives you the freedom to create new music \u201cAudio source separation\u201d<\/li>\n<\/ol>\n\n\n\n<div style=\"height:28px\" aria-hidden=\"true\" class=\"wp-block-spacer\"><\/div>\n\n\n\n<h3 class=\"wp-block-heading\" id=\"invited_talks_japanese\">Invited Talks (Japanese)<\/h3>\n\n\n\n<ol class=\"wp-block-list\">\n<li>May. 2025, \u97f3\u5b66\u30b7\u30f3\u30dd\u30b8\u30a6\u30e0\u300cDeep Generative Models for Audio Applications\u300d [<a href=\"https:\/\/www.ipsj.or.jp\/kenkyukai\/event\/mus143slp156.html\" target=\"_blank\" rel=\"noreferrer noopener\">URL<\/a>]<\/li>\n\n\n\n<li>Mar. 2024, \u4eba\u5de5\u77e5\u80fd\u7814\u7a76\u30bb\u30f3\u30bf\u30fc\u300c\u97f3\u58f0AI\u3092\u652f\u3048\u308b\u57fa\u76e4\u6280\u8853\u306e\u6700\u524d\u7dda\u300d [<a href=\"https:\/\/www.airc.aist.go.jp\/seminar_detail\/seminar_076.html\" target=\"_blank\" rel=\"noreferrer noopener\">URL<\/a>]<\/li>\n\n\n\n<li>Jun. 2023, \u30bd\u30cb\u30c3\u30af\u30a2\u30ab\u30c7\u30df\u30fc\u30b5\u30ed\u30f3\u300c\u30bd\u30cb\u30fc\u6700\u65b0\u97f3\u697dAI\u30c6\u30af\u30ce\u30ed\u30b8\u30fc\u7279\u5225\u4f53\u9a13\u4f1a\u300d [<a href=\"https:\/\/salon.sonicacademy.jp\/event\/detail?a=98\" target=\"_blank\" rel=\"noreferrer noopener\">URL<\/a>]<\/li>\n\n\n\n<li>Jul. 2020, DCAJ\u30d3\u30b8\u30cd\u30b9\u30bb\u30df\u30ca\u30fc\u300c\u30bd\u30cb\u30fc\u306eR&amp;D\u304c\u4ed5\u639b\u3051\u308b\u6700\u5148\u7aef\u97f3\u97ff\u6280\u8853\u300d [<a href=\"https:\/\/www.dcaj.or.jp\/news\/2020\/06\/rd.html\" target=\"_blank\" rel=\"noreferrer noopener\">URL<\/a>]<\/li>\n\n\n\n<li>\u5148\u7aef\u30c6\u30af\u30ce\u30ed\u30b8\u30fc\u30b3\u30fc\u30b9\u300c\u30bd\u30cb\u30fc\u306e\u6280\u8853\u529b\u00d7\u30a2\u30fc\u30c6\u30a3\u30b9\u30c8\u306e\u8868\u73fe\u529b\u3000\u30b5\u30a6\u30f3\u30c9VR\u304c\u3064\u304f\u308b\u6f14\u51fa\u6700\u524d\u7dda\u300d<\/li>\n\n\n\n<li>Jul. 2019, SDM\u30b7\u30f3\u30dd\u30b8\u30a6\u30e0\u300cSonic Surf VR: \u97f3\u306eVR\u3092\u5b9f\u73fe\u3059\u308b\u6ce2\u9762\u5408\u6210\u6280\u8853\u3068\u30b3\u30f3\u30c6\u30f3\u30c4\u30af\u30ea\u30a8\u30fc\u30b7\u30e7\u30f3\u306b\u3064\u3044\u3066\u300d [<a href=\"https:\/\/sdm.wide.ad.jp\/symposium\/2019\/\" target=\"_blank\" rel=\"noreferrer noopener\">URL<\/a>]<\/li>\n<\/ol>\n\n\n\n<h3 class=\"wp-block-heading\" id=\"web_articles_japanese\">Web Articles (Japanese)<\/h3>\n\n\n\n<ol id=\"block-b87e861e-532b-4420-8d2d-e710bf9dbcc9\" class=\"wp-block-list\">\n<li>Mar. 2026, <mark style=\"background-color:rgba(0, 0, 0, 0)\" class=\"has-inline-color has-pale-pink-color\">\u65e5\u7d4c\u65b0\u805e\u96fb\u5b50\u7248<\/mark>,\u300c\u30b8\u30d6\u30ea\u98a8\u300d\u3082\u751f\u6210NG \u8457\u4f5c\u6a29\u5b88\u308b\u6280\u8853, \u30bd\u30cb\u30fcG\u304c\u958b\u767a [<a href=\"https:\/\/www.nikkei.com\/article\/DGXZQOUC1125A0R10C26A3000000\/\" target=\"_blank\" rel=\"noreferrer noopener\">URL<\/a>]<\/li>\n\n\n\n<li>Feb. 2026, <mark style=\"background-color:rgba(0, 0, 0, 0)\" class=\"has-inline-color has-pale-pink-color\">\u65e5\u7d4c\u30c7\u30b8\u30bf\u30eb\u30ac\u30d0\u30ca\u30f3\u30b9<\/mark>,\u300c\u30b8\u30d6\u30ea\u98a8\u300d\u3082\u751f\u6210NG \u8457\u4f5c\u6a29\u5b88\u308b\u6280\u8853, \u30bd\u30cb\u30fcG\u304c\u958b\u767a [<a href=\"https:\/\/www.nikkei.com\/prime\/digital-governance\/article\/DGXZQOGN104TB0Q6A210C2000000\" target=\"_blank\" rel=\"noreferrer noopener\">URL<\/a>]<\/li>\n\n\n\n<li>Feb. 2026, <mark style=\"background-color:rgba(0, 0, 0, 0)\" class=\"has-inline-color has-pale-pink-color\">\u65e5\u7d4c\u65b0\u805e\u671d\u520a<\/mark>, \u30bd\u30cb\u30fc\u30b0\u30eb\u30fc\u30d7\u3001\u4f5c\u66f2AI\u306e\u5b66\u7fd2\u30c7\u30fc\u30bf\u3092\u7279\u5b9a \u5275\u4f5c\u8005\u3078\u306e\u5bfe\u4fa1\u7b97\u51fa\u53ef\u80fd\u306b [<a href=\"https:\/\/www.nikkei.com\/article\/DGXZQOGN070LT0X00C26A2000000\/\" target=\"_blank\" rel=\"noreferrer noopener\">URL<\/a>]<\/li>\n\n\n\n<li>Feb. 2026, <mark style=\"background-color:rgba(0, 0, 0, 0)\" class=\"has-inline-color has-pale-pink-color\">\u4e09\u7530\u8a55\u8ad6\u30aa\u30f3\u30e9\u30a4\u30f3<\/mark>, \u5149\u85e4 \u7950\u57fa: \u97f3\u6e90\u5206\u96e2\u6280\u8853\u3067\u30a8\u30f3\u30bf\u30fc\u30c6\u30a4\u30f3\u30e1\u30f3\u30c8\u3092\u9769\u65b0\u3059\u308b [<a href=\"https:\/\/www.mita-hyoron.keio.ac.jp\/spotlight\/202602-1.html\" target=\"_blank\" rel=\"noreferrer noopener\">URL<\/a>]<\/li>\n\n\n\n<li>Oct. 2025, <mark style=\"background-color:rgba(0, 0, 0, 0)\" class=\"has-inline-color has-pale-pink-color\">LALALA USA<\/mark>,\u3010\u30ed\u30b5\u30f3\u30bc\u30eb\u30b9\u3067\u66ae\u3089\u3059\u4eba\u3005\u3011\u30a2\u30fc\u30c6\u30a3\u30b9\u30c8\u304c\u529b\u3092\u767a\u63ee\u3067\u304d\u308b\u4ed5\u7d44\u307f\u3065\u304f\u308a\u3092 AI\u00d7\u30a8\u30f3\u30bf\u30e1\u306e\u7814\u7a76\u5206\u91ce\u306f\u4eca\u304c\u6b63\u5ff5\u5834 [<a href=\"https:\/\/lalalausa.com\/archives\/68983\" target=\"_blank\" rel=\"noreferrer noopener\">URL<\/a>]<\/li>\n\n\n\n<li>Jul. 2025, <mark style=\"background-color:rgba(0, 0, 0, 0)\" class=\"has-inline-color has-pale-pink-color\">\u65e5\u7d4cxTech<\/mark>, \u7b2c\u4e09\u56de CVPR 2025\u306e\u30d9\u30b9\u30c8\u30da\u30fc\u30d1\u30fc\u300cVGGT\u300d\u306b\u71b1\u8996\u7dda\u30018\u4eba\u306e\u6709\u8b58\u8005\u304c\u30a4\u30c1\u30aa\u30b7\u8ad6\u6587\u3092\u62ab\u9732 [<a href=\"https:\/\/xtech.nikkei.com\/atcl\/nxt\/column\/18\/03230\/061100004\/\" target=\"_blank\" rel=\"noreferrer noopener\">URL<\/a>]<\/li>\n\n\n\n<li>Apr. 2025, \u30bd\u30cb\u30fcTech Stories, \u97f3\u306e\u4fa1\u5024\u3068\u53ef\u80fd\u6027\u3092\u5e83\u3052\u308b\u6280\u8853\u306b\u6311\u3080\u3002\u30af\u30ea\u30a8\u30a4\u30bf\u30fc\u304c\u529b\u3092\u767a\u63ee\u3067\u304d\u308b\u4ed5\u7d44\u307f\u3065\u304f\u308a\u3068\u306f [<a href=\"https:\/\/www.sony.com\/ja\/SonyInfo\/technology\/stories\/entries\/interview_de_mitsufuji\/\" target=\"_blank\" rel=\"noreferrer noopener\">URL<\/a>]<\/li>\n\n\n\n<li>Nov. 2022, <mark style=\"background-color:rgba(0, 0, 0, 0)\" class=\"has-inline-color has-pale-pink-color\">\u65e5\u7d4cRobotics<\/mark>12\u6708\u53f7, \u30bd\u30cb\u30fc\u304c\u65b0\u578b\u306e\u6df1\u5c64\u751f\u6210\u30e2\u30c7\u30eb\u3092\u81ea\u793e\u958b\u767a\u3001\u307e\u305a\u306f\u9ad8\u6027\u80fdVAE\u306e\u5229\u7528\u3092\u5bb9\u6613\u306b [<a href=\"https:\/\/xtech.nikkei.com\/atcl\/nxt\/mag\/rob\/18\/012600001\/00111\/\" target=\"_blank\" rel=\"noreferrer noopener\">URL<\/a>]<\/li>\n\n\n\n<li>Oct. 2022, <mark><mark style=\"background-color:rgba(0, 0, 0, 0)\" class=\"has-inline-color has-pale-pink-color\">DTM\u30b9\u30c6\u30fc\u30b7\u30e7\u30f3<\/mark><\/mark>, \u30bd\u30cb\u30fc\u306b\u3088\u308b\u4e16\u754c\u6700\u9ad8\u306e\u97f3\u6e90\u5206\u96e2\u6280\u8853\u3067\u5b9f\u73fe\u3057\u305f\u3001\u30dc\u30fc\u30ab\u30eb\u3060\u3051\u3092\u30ad\u30ec\u30a4\u306b\u62bd\u51fa\u3067\u304d\u308bSoundmain Studio\u306e\u65b0\u6a5f\u80fd [<a rel=\"noreferrer noopener\" href=\"https:\/\/www.dtmstation.com\/archives\/58473.html\" target=\"_blank\">URL<\/a>]<\/li>\n\n\n\n<li>Jul. 2022, <mark style=\"background-color:rgba(0, 0, 0, 0)\" class=\"has-inline-color has-pale-pink-color\">DTM\u30b9\u30c6\u30fc\u30b7\u30e7\u30f3<\/mark>, \u30bd\u30cb\u30fc\u958b\u767a\u306e\u30c7\u30a3\u30fc\u30d7\u30e9\u30fc\u30cb\u30f3\u30b0\u306b\u3088\u308b\u4e16\u754c\u6700\u9ad8\u306e\u97f3\u6e90\u5206\u96e2\u6280\u8853\u3092\u5229\u7528\u3067\u304d\u308b\u3001\u97f3\u697d\u5236\u4f5c\u30b5\u30fc\u30d3\u30b9\u3001Soundmain [<a rel=\"noreferrer noopener\" href=\"https:\/\/www.dtmstation.com\/archives\/56675.html\" target=\"_blank\">URL<\/a>]<\/li>\n\n\n\n<li>Jan. 2022, <mark style=\"background-color:rgba(0, 0, 0, 0)\" class=\"has-inline-color has-pale-pink-color\">\u30ec\u30b3\u30fc\u30c9\u82b8\u8853<\/mark>2\u6708\u53f7 \u5091\u4f5c\u30d5\u30a1\u30a4\u30f42021 \u4ffa\u306e\u30aa\u30fc\u30c7\u30a3\u30aa pp. 188\u2013189 [<a rel=\"noreferrer noopener\" href=\"https:\/\/www.ongakunotomo.co.jp\/catalog\/detail.php?id=042202\" target=\"_blank\">URL<\/a>]<\/li>\n\n\n\n<li>Jun. 2021, <mark style=\"background-color:rgba(0, 0, 0, 0)\" class=\"has-inline-color has-pale-pink-color\">Phile Web<\/mark>, \u30bd\u30cb\u30fc\u304c\u6642\u7a7a\u3092\u8d8a\u3048\u305f\u30a2\u30fc\u30c6\u30a3\u30b9\u30c8\u306e\u30b3\u30e9\u30dc\u3092\u5b9f\u73fe\u3001\u300cAI\u97f3\u6e90\u5206\u96e2\u300d\u6280\u8853\u3068\u306f\u4f55\u304b [<a rel=\"noreferrer noopener\" href=\"https:\/\/www.phileweb.com\/interview\/article\/202106\/23\/836.html\" target=\"_blank\">URL<\/a>]<\/li>\n\n\n\n<li>Jun. 2021, Sony Group Career Forum 2022, AI\u3067\u97f3\u697d\u30d3\u30b8\u30cd\u30b9\u3092\u5909\u3048\u308b\u3001\u30bd\u30cb\u30fc\u306e\u30b0\u30eb\u30fc\u30d7\u30b7\u30ca\u30b8\u30fc\u306b\u8feb\u308b\u3002 [<a href=\"https:\/\/www.sony.com\/ja\/SonyInfo\/Jobs\/DiscoverSony\/articles\/202106\/career_forum2\/\">URL<\/a>]<\/li>\n\n\n\n<li>Apr. 2021, AI Start Lab, \u30bd\u30cb\u30fc\u304c\u63d0\u793a\u3059\u308b\u3001AI\u306b\u3088\u308b\u97f3\u6e90\u5206\u96e2\u3067\u5e83\u304c\u308b\u30a8\u30f3\u30bf\u30fc\u30c6\u30a4\u30e1\u30f3\u30c8\u4e16\u754c\u306e\u53ef\u80fd\u6027\u3068\u306f<\/li>\n\n\n\n<li>Jan. 2021, <mark style=\"background-color:rgba(0, 0, 0, 0)\" class=\"has-inline-color has-pale-pink-color\">Stereo Sound Online<\/mark>, \u30bd\u30cb\u30fc\u306e\u300cAI\u306b\u3088\u308b\u97f3\u6e90\u5206\u96e2\u300d\u306f\u3001\u904e\u53bb\u306e\u540d\u4f5c\u306b\u65b0\u3057\u3044\u9b45\u529b\u3092\u4e0e\u3048\u308b\u3002\u4e16\u754c\u521d\u306e\u753b\u671f\u7684\u6280\u8853\u306f\u3069\u3046\u3084\u3063\u3066\u5b9f\u73fe\u3067\u304d\u305f\u306e\u304b\uff08\u524d\uff09\uff1a\u9ebb\u5009\u601c\u58eb\u306e\u3044\u3044\u3082\u306e\u7814\u7a76\u6240 \u30ec\u30dd\u30fc\u30c842 [<a rel=\"noreferrer noopener\" href=\"https:\/\/online.stereosound.co.jp\/_ct\/17424582\" target=\"_blank\">URL<\/a>]<\/li>\n\n\n\n<li>Jan. 2021, <mark style=\"background-color:rgba(0, 0, 0, 0)\" class=\"has-inline-color has-pale-pink-color\">Stereo Sound Online<\/mark>,  \u30bd\u30cb\u30fc\u306e\u300cAI\u306b\u3088\u308b\u97f3\u6e90\u5206\u96e2\u300d\u306f\u3001\u904e\u53bb\u306e\u540d\u4f5c\u306b\u65b0\u3057\u3044\u9b45\u529b\u3092\u4e0e\u3048\u308b\u3002\u4e16\u754c\u521d\u306e\u753b\u671f\u7684\u6280\u8853\u306f\u3069\u3046\u3084\u3063\u3066\u5b9f\u73fe\u3067\u304d\u305f\u306e\u304b\uff08\u5f8c\uff09\uff1a\u9ebb\u5009\u601c\u58eb\u306e\u3044\u3044\u3082\u306e\u7814\u7a76\u6240 \u30ec\u30dd\u30fc\u30c843 [<a rel=\"noreferrer noopener\" href=\"https:\/\/online.stereosound.co.jp\/_ct\/17424593\" target=\"_blank\">URL<\/a>]<\/li>\n\n\n\n<li>Dec. 2020, Cocotame, \u300eLINE MUSIC\u300f\u3067\u30ab\u30e9\u30aa\u30b1\u3092\u5b9f\u73fe\u3055\u305b\u305f\u300c\u97f3\u6e90\u5206\u96e2\u6280\u8853\u300d\u306f\u904e\u53bb\u3068\u73fe\u5728\u306e\u97f3\u3092\u3064\u306a\u3050\u5922\u306e\u6280\u8853\u3060\u3063\u305f\u3010\u524d\u7de8\u3011 [<a rel=\"noreferrer noopener\" href=\"https:\/\/cocotame.jp\/series\/016464\/\" target=\"_blank\">URL<\/a>]<\/li>\n\n\n\n<li>Dec. 2020, Cocotame, \u300eLINE MUSIC\u300f\u3067\u30ab\u30e9\u30aa\u30b1\u3092\u5b9f\u73fe\u3055\u305b\u305f\u300c\u97f3\u6e90\u5206\u96e2\u6280\u8853\u300d\u306f\u904e\u53bb\u3068\u73fe\u5728\u306e\u97f3\u3092\u3064\u306a\u3050\u5922\u306e\u6280\u8853\u3060\u3063\u305f\u3010\u5f8c\u7de8\u3011 [<a rel=\"noreferrer noopener\" href=\"https:\/\/cocotame.jp\/series\/016476\/\" target=\"_blank\">URL<\/a>]<\/li>\n\n\n\n<li>Jul. 2020,<mark style=\"background-color:rgba(0, 0, 0, 0)\" class=\"has-inline-color has-pale-pink-color\"> \u65e5\u7d4c\u30a8\u30ec\u30af\u30c8\u30ed\u30cb\u30af\u30b9<\/mark>,\u300c\u97f3\u3060\u3063\u3066\u8d85\u73fe\u5b9f\uff5e\u97f3\u5834\u3092\u64cd\u308a\u3001\u4e16\u754c\u3092\u4e00\u5909\uff5e\u300d [<a rel=\"noreferrer noopener\" href=\"https:\/\/xtech.nikkei.com\/atcl\/nxt\/mag\/ne\/18\/00061\/00002\/\" target=\"_blank\">URL<\/a>]<\/li>\n\n\n\n<li>Sep. 2019, Sounmain Blog, \u97f3\u697d\u5236\u4f5c\u306e\u4e16\u754c\u304c\u5909\u308f\u308b\u3002\u4e16\u754c\u6700\u5148\u7aef\u306e\u300c\u97f3\u6e90\u5206\u96e2\u6280\u8853\u300d\u304c\u4f5c\u308a\u3060\u3059\u672a\u6765\u3068\u306f\uff1f<\/li>\n\n\n\n<li>May 2019, <mark><mark style=\"background-color:rgba(0, 0, 0, 0)\" class=\"has-inline-color has-pale-pink-color\">\u30b5\u30a6\u30f3\u30c9\uff06\u30ec\u30b3\u30fc\u30c7\u30a3\u30f3\u30b0\u30de\u30ac\u30b8\u30f3<\/mark><\/mark>, 6\u6708\u53f7 \u30bd\u30cb\u30fc\u306e\u6700\u65b0\u6280\u8853Sonic Surf VR\u3092\u4f53\u611f\u3059\u308b\u30a4\u30f3\u30b9\u30bf\u30ec\u30fc\u30b7\u30e7\u30f3\u5c55\u3000Touch that Sound\uff01<\/li>\n\n\n\n<li>Mar. 2019, Impress Watch, \u30bd\u30cb\u30fc\u300cSonic Surf VR\u300d\u3067\u97f3\u304c\u81ea\u5728\u306b\u52d5\u304f\u4e0d\u601d\u8b70\u4f53\u9a13\u3002\u4ed5\u7d44\u307f\u3092\u805e\u3044\u305f [<a rel=\"noreferrer noopener\" href=\"https:\/\/av.watch.impress.co.jp\/docs\/series\/dal\/1175133.html\" target=\"_blank\">URL<\/a>]<\/li>\n<\/ol>\n\n\n\n<div style=\"height:28px\" aria-hidden=\"true\" class=\"wp-block-spacer\"><\/div>\n\n\n\n<h3 class=\"wp-block-heading\" id=\"media_appearances_japanese\">Media Appearances (Japanese)<\/h3>\n\n\n\n<ol id=\"block-3090e4bd-4a0d-46a5-ae40-6e70143b8495\" class=\"wp-block-list\">\n<li>Sep. 2021, <mark style=\"background-color:rgba(0, 0, 0, 0)\" class=\"has-inline-color has-pale-pink-color\">Tokyo FM\u30e9\u30b8\u30aa \u30df\u30e5\u30fc\u30b8\u30c3\u30af\u30d0\u30fc\u30c9<\/mark>, \u77f3\u4e38\u5e79\u4e8c\u3068\u5171\u6f14\uff1f\u30bd\u30cb\u30fc\u306e\u65b0\u6280\u8853\u3067\u7526\u308b\u30b0\u30ec\u30f3\u30fb\u30b0\u30fc\u30eb\u30c9 [<a rel=\"noreferrer noopener\" href=\"https:\/\/musicbird.jp\/programs\/audio2\/202109\/\" target=\"_blank\">radio<\/a>]<\/li>\n\n\n\n<li>Apr. 2021, Podcast, \u30bd\u30cb\u30fc\u304c\u8a9e\u308b\u300cAI\u00d7\u97f3\u697d\u300d\u306e\u53ef\u80fd\u6027\u3002\u30a2\u30fc\u30c6\u30a3\u30b9\u30c8\u306e\u50cd\u304d\u65b9\u306b\u3082\u5909\u5316\uff1f [<a rel=\"noreferrer noopener\" href=\"https:\/\/open.spotify.com\/episode\/2V0uPswKYiVKElvJVedaU1?si=1dtYeQdORNKCdkT_E5oOiw\" target=\"_blank\">podcast<\/a>]<\/li>\n\n\n\n<li>Apr. 2021, YouTube Channel \u30b5\u30f3\u30dc\u30de\u30b9\u30bf\u30fc, \u3010\u8fd1\u85e4\u6d0b\u4e00 Sony \u30c6\u30af\u30ce\u30ed\u30b8\u30fc\u4f53\u9a13\u7de8\uff5e\u5f8c\u7de8\uff5e\u3011 [<a rel=\"noreferrer noopener\" href=\"https:\/\/youtu.be\/vGkMN2qoTiM\" target=\"_blank\">YouTube<\/a>]<\/li>\n\n\n\n<li>Jul. 2020, <mark style=\"background-color:rgba(0, 0, 0, 0)\" class=\"has-inline-color has-pale-pink-color\">NHK TV\u653e\u9001 \u3089\u3089\u3089\u266a\u30af\u30e9\u30b7\u30c3\u30af<\/mark>,\u300c\u6e0b\u8c37\u6176\u4e00\u90ce\u304c\u8a9e\u308b\uff5e\u30c6\u30af\u30ce\u30ed\u30b8\u30fc\u3068\u97f3\u697d\uff5e\u300d [<a rel=\"noreferrer noopener\" href=\"https:\/\/www.nhk.jp\/p\/lalala\/ts\/57LY35Q588\/episode\/te\/8PMZNN48Z5\/\" target=\"_blank\">TV<\/a>]<\/li>\n<\/ol>\n\n\n\n<p><\/p>\n","protected":false},"excerpt":{"rendered":"<p>Expertise Experience News Publications Selected Papers Books Journal Papers Conference Papers Workshop Papers  [&hellip;]<\/p>\n","protected":false},"author":1,"featured_media":123,"parent":0,"menu_order":0,"comment_status":"closed","ping_status":"closed","template":"","meta":{"footnotes":""},"class_list":["post-2","page","type-page","status-publish","has-post-thumbnail","hentry"],"_links":{"self":[{"href":"https:\/\/www.yukimitsufuji.com\/index.php?rest_route=\/wp\/v2\/pages\/2","targetHints":{"allow":["GET"]}}],"collection":[{"href":"https:\/\/www.yukimitsufuji.com\/index.php?rest_route=\/wp\/v2\/pages"}],"about":[{"href":"https:\/\/www.yukimitsufuji.com\/index.php?rest_route=\/wp\/v2\/types\/page"}],"author":[{"embeddable":true,"href":"https:\/\/www.yukimitsufuji.com\/index.php?rest_route=\/wp\/v2\/users\/1"}],"replies":[{"embeddable":true,"href":"https:\/\/www.yukimitsufuji.com\/index.php?rest_route=%2Fwp%2Fv2%2Fcomments&post=2"}],"version-history":[{"count":1535,"href":"https:\/\/www.yukimitsufuji.com\/index.php?rest_route=\/wp\/v2\/pages\/2\/revisions"}],"predecessor-version":[{"id":2148,"href":"https:\/\/www.yukimitsufuji.com\/index.php?rest_route=\/wp\/v2\/pages\/2\/revisions\/2148"}],"wp:featuredmedia":[{"embeddable":true,"href":"https:\/\/www.yukimitsufuji.com\/index.php?rest_route=\/wp\/v2\/media\/123"}],"wp:attachment":[{"href":"https:\/\/www.yukimitsufuji.com\/index.php?rest_route=%2Fwp%2Fv2%2Fmedia&parent=2"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}