-
L. Cao, J. Chen, M. Brennan, T. O'Leary-Roseberry, Y. M. Marzouk, O. Ghattas.
LazyDINO: Fast, scalable, and efficiently amortized Bayesian inversion via structure-exploiting and surrogate-driven measure transport
Journal of Machine Learning Research, 27 (2026), pp. 1--71.
Available Links
Abstract
We present LazyDINO, a transport map variational inference method for fast, scalable, and efficiently amortized solutions of high-dimensional nonlinear Bayesian inverse problems with expensive parameter-to-observable (PtO) maps. Our method consists of an offline phase in which we construct a derivative-informed neural surrogate of the PtO map using joint samples of the PtO map and its Jacobian. During the online phase, when given observational data, we seek rapid posterior approximation using surrogate-driven training of a lazy map [Brennan et al., NeurIPS, (2020)], i.e., a structure-exploiting transport map with low-dimensional nonlinearity. The trained lazy map then produces approximate posterior samples or density evaluations. Our surrogate construction is optimized for amortized Bayesian inversion using lazy map variational inference. We show that (i) the derivative-based reduced basis architecture [O'Leary-Roseberry et al., Comput. Methods Appl. Mech. Eng., 388 (2022)] minimizes the upper bound on the expected error in surrogate posterior approximation, and (ii) the derivative-informed training formulation [O'Leary-Roseberry et al., J. Comput. Phys., 496 (2024)] minimizes the expected error due to surrogate-driven transport map optimization. Our numerical results demonstrate that LazyDINO is highly efficient in cost amortization for Bayesian inversion. We observe one to two orders of magnitude reduction of offline cost for accurate posterior approximation, compared to simulation-based amortized inference via conditional transport and conventional surrogate-driven transport. In particular, LazyDINO outperforms Laplace approximation consistently using fewer than 1000 offline samples, while other amortized inference methods struggle and sometimes fail at 16,000 offline samples.
BibTeX Citation
@article{ cao-lazydino-2026, keywords = { transport,dimension reduction,surrogates,amortized inference }, title = { LazyDINO: Fast, scalable, and efficiently amortized Bayesian inversion via structure-exploiting and surrogate-driven measure transport }, author = { L. Cao and J. Chen and M. Brennan and T. O'Leary-Roseberry and Y. M. Marzouk and O. Ghattas }, journal = { Journal of Machine Learning Research }, volume = { 27 }, number = { 24 }, pages = { 1--71 }, month = { 2 }, year = { 2026 }, editor = { }, } -
M. T. C. Li, T. Cui, F. Li, Y. M. Marzouk, O. Zahm.
Sharp detection of low-dimensional structure in probability measures via dimensional logarithmic Sobolev inequalities
Information and Inference: A Journal of the IMA, 14 (2025).
Available Links
Abstract
BibTeX Citation
@article{ li-sharpdetectionlowdimensional-2025, title = { Sharp detection of low-dimensional structure in probability measures via dimensional logarithmic Sobolev inequalities }, author = { M. T. C. Li and T. Cui and F. Li and Y. M. Marzouk and O. Zahm }, journal = { Information and Inference: A Journal of the IMA }, volume = { 14 }, number = { 3 }, year = { 2025 }, publisher = { Oxford University Press }, editor = { }, } -
H. Lu, L. Saló--Salgado, Y. M. Marzouk, R. Juanes.
Uncertainty Quantification of Fluid Leakage and Fault Instability in Geologic CO₂ Storage
Water Resources Research, 61 (2025).
Available Links
Abstract
BibTeX Citation
@article{ lu-fluidleakagefaultinstability-2025, title = { Uncertainty Quantification of Fluid Leakage and Fault Instability in Geologic CO₂ Storage }, author = { H. Lu and L. Saló--Salgado and Y. M. Marzouk and R. Juanes }, journal = { Water Resources Research }, volume = { 61 }, number = { 10 }, year = { 2025 }, publisher = { Wiley Online Library }, editor = { }, } -
R. Baptista, B. Hosseini, N. B. Kovachki, Y. M. Marzouk, A. Sagiv.
An approximation theory framework for measure-transport sampling algorithms
Mathematics of Computation, 94 (2025), pp. 1863--1909.
Available Links
Abstract
This article presents a general approximation-theoretic framework to analyze measure transport algorithms for probabilistic modeling. A primary motivating application for such algorithms is sampling -- a central task in statistical inference and generative modeling. We provide a priori error estimates in the continuum limit, i.e., when the measures (or their densities) are given, but when the transport map is discretized or approximated using a finite-dimensional function space. Our analysis relies on the regularity theory of transport maps and on classical approximation theory for high-dimensional functions. A third element of our analysis, which is of independent interest, is the development of new stability estimates that relate the distance between two maps to the distance~(or divergence) between the pushforward measures they define. We present a series of applications of our framework, where quantitative convergence rates are obtained for practical problems using Wasserstein metrics, maximum mean discrepancy, and Kullback--Leibler divergence. Specialized rates for approximations of the popular triangular Kn{ö}the-Rosenblatt maps are obtained, followed by numerical experiments that demonstrate and extend our theory.
BibTeX Citation
@article{ baptista-theorytransportsample-2025, keywords = { Transport map, generative models, stability analysis, approximation theory }, doi = { 10.48550/arXiv.2302.13965 }, title = { An approximation theory framework for measure-transport sampling algorithms }, author = { R. Baptista and B. Hosseini and N. B. Kovachki and Y. M. Marzouk and A. Sagiv }, journal = { Mathematics of Computation }, volume = { 94 }, number = { 354 }, pages = { 1863--1909 }, month = { 0 }, year = { 2025 }, editor = { }, } -
A. Maurais, T. Alsup, B. Peherstorfer, Y. M. Marzouk.
Multifidelity Covariance Estimation via Regression on the Manifold of Symmetric Positive Definite Matrices
SIAM Journal on Mathematics of Data Science, 7 (2025), pp. 189-223.
Available Links
Abstract
We introduce a multifidelity estimator of covariance matrices formulated as the solution to a regression problem on the manifold of symmetric positive definite matrices. The estimator is positive definite by construction, and the Mahalanobis distance minimized to obtain it possesses properties which enable practical computation. We show that our manifold regression multifidelity (MRMF) covariance estimator is a maximum likelihood estimator under a certain error model on manifold tangent space. More broadly, we show that our Riemannian regression framework encompasses existing multifidelity covariance estimators constructed from control variates. We demonstrate via numerical examples that our estimator can provide significant decreases, up to one order of magnitude, in squared estimation error relative to both single-fidelity and other multifidelity covariance estimators. Furthermore, preservation of positive definiteness ensures that our estimator is compatible with downstream tasks, such as data assimilation and metric learning, in which this property is essential.
BibTeX Citation
@article{ maurais-covarianceregression-2025, keywords = { covariance, regression, SPD }, doi = { doi:10.1137/23M159247X }, title = { Multifidelity Covariance Estimation via Regression on the Manifold of Symmetric Positive Definite Matrices }, author = { A. Maurais and T. Alsup and B. Peherstorfer and Y. M. Marzouk }, journal = { SIAM Journal on Mathematics of Data Science }, volume = { 7 }, number = { 1 }, pages = { 189-223 }, month = { 1 }, year = { 2025 }, editor = { }, } -
F. Y. Li, Y. M. Marzouk.
Diffusion map particle systems for generative modeling
Foundations of Data Science, 7 (2025), pp. 814--837.
Abstract
We propose a novel diffusion map particle system (DMPS) for generative modeling, based on diffusion maps and Laplacian-adjusted Wasserstein gradient descent (LAWGD). Diffusion maps are used to approximate the generator of the Langevin diffusion process from samples, and hence to learn the underlying data-generating manifold. On the other hand, LAWGD enables efficient sampling from the target distribution given a suitable choice of kernel, which we construct here via a spectral approximation of the generator, computed with diffusion maps. Numerical experiments show that our method outperforms others on synthetic datasets, including examples with manifold structure.
BibTeX Citation
@article{ li-diffusionparticles-2025, keywords = { diffusion maps, kernel methods, gradient flows, generative modeling, sampling }, doi = { 10.3934/fods.2024054 }, title = { Diffusion map particle systems for generative modeling }, author = { F. Y. Li and Y. M. Marzouk }, journal = { Foundations of Data Science }, volume = { 7 }, number = { 3 }, pages = { 814--837 }, month = { 9 }, year = { 2025 }, booktitle = { ICML 2023 Workshop on Structured Probabilistic Inference & Generative Modeling }, editor = { }, } -
M. T. C. Li, Y. M. Marzouk, O. Zahm.
Principal feature detection via ϕ-Sobolev inequalities
Bernoulli, 30 (2024), pp. 2979-3003.
Abstract
We investigate the approximation of high-dimensional target measures as low-dimensional updates of a dominating reference measure. This approximation class replaces the associated density with the composition of: (i) a feature map that identifies the leading principal components or features of the target measure, relative to the reference, and (ii) a low-dimensional profile function. When the reference measure satisfies a subspace ϕ-Sobolev inequality, we construct a computationally tractable approximation that yields certifiable error guarantees with respect to the Amari α-divergences. Our construction proceeds in two stages. First, for any feature map and any α-divergence, we obtain an analytical expression for the optimal profile function. Second, for linear feature maps, the principal features are obtained from eigenvectors of a matrix involving gradients of the log-density. Neither step requires explicit access to normalizing constants. Notably, by leveraging the ϕ-Sobolev inequalities, we demonstrate that these features universally certify approximation errors across the range of α-divergences α ∈(0,1]. We then propose an application to Bayesian inverse problems and provide an analogous construction with approximation guarantees that hold in expectation over the data. We conclude with an extension of the proposed dimension reduction strategy to nonlinear feature maps.
BibTeX Citation
@article{ li-featuredetection-2024, keywords = { Amari α-divergences; Bayesian inference; feature detection; gradient-based dimension reduction; principal components; φ-Sobolev inequalities }, doi = { 10.3150/23-BEJ1702 }, title = { Principal feature detection via ϕ-Sobolev inequalities }, author = { M. T. C. Li and Y. M. Marzouk and O. Zahm }, journal = { Bernoulli }, volume = { 30 }, number = { 4 }, pages = { 2979-3003 }, month = { 0 }, year = { 2024 }, editor = { }, } -
K. E. Fisher, M. F. Herbst, Y. M. Marzouk.
Multitask methods for predicting molecular properties from heterogeneous data
The Journal of Chemical Physics, 161 (2024).
Available Links
Abstract
Data generation remains a bottleneck in training surrogate models to predict molecular properties. We demonstrate that multitask Gaussian process regression overcomes this limitation by leveraging both expensive and cheap data sources. In particular, we consider training sets constructed from coupled-cluster (CC) and density functional theory (DFT) data. We report that multitask surrogates can predict at CC-level accuracy with a reduction in data generation cost by over an order of magnitude. Of note, our approach allows the training set to include DFT data generated by a heterogeneous mix of exchange–correlation functionals without imposing any artificial hierarchy on functional accuracy. More generally, the multitask framework can accommodate a wider range of training set structures—including the full disparity between the different levels of fidelity—than existing kernel approaches based on Δ-learning although we show that the accuracy of the two approaches can be similar. Consequently, multitask regression can be a tool for reducing data generation costs even further by opportunistically exploiting existing data sources.
BibTeX Citation
@article{ fisher-multitask-2024, keywords = { Quantum chemistry, Coupled-cluster methods, Density functional theory, Exchange correlation functionals, Machine learning, Molecular properties, Covariance and correlation, Gaussian processes, Nonparametric statistics }, doi = { 10.1063/5.0201681 }, title = { Multitask methods for predicting molecular properties from heterogeneous data }, author = { K. E. Fisher and M. F. Herbst and Y. M. Marzouk }, journal = { The Journal of Chemical Physics }, volume = { 161 }, number = { 014114 }, month = { 7 }, year = { 2024 }, editor = { }, } -
X. Huan, J. Jagalur, Y. M. Marzouk.
Optimal experimental design: Formulations and computations
Acta Numerica, 33 (2024), pp. 715-840.
Abstract
Questions of ‘how best to acquire data’ are essential to modelling and prediction in the natural and social sciences, engineering applications, and beyond. Optimal experimental design (OED) formalizes these questions and creates computational methods to answer them. This article presents a systematic survey of modern OED, from its foundations in classical design theory to current research involving OED for complex models. We begin by reviewing criteria used to formulate an OED problem and thus to encode the goal of performing an experiment. We emphasize the flexibility of the Bayesian and decision-theoretic approach, which encompasses information-based criteria that are well-suited to nonlinear and non-Gaussian statistical models. We then discuss methods for estimating or bounding the values of these design criteria; this endeavour can be quite challenging due to strong nonlinearities, high parameter dimension, large per-sample costs, or settings where the model is implicit. A complementary set of computational issues involves optimization methods used to find a design; we discuss such methods in the discrete (combinatorial) setting of observation selection and in settings where an exact design can be continuously parametrized. Finally we present emerging methods for sequential OED that build non-myopic design policies, rather than explicit designs; these methods naturally adapt to the outcomes of past experiments in proposing new experiments, while seeking coordination among all experiments to be performed. Throughout, we highlight important open questions and challenges.
BibTeX Citation
@article{ huan-oedreview-2024, keywords = { optimal experimental design }, doi = { 10.1017/S0962492924000023 }, title = { Optimal experimental design: Formulations and computations }, author = { X. Huan and J. Jagalur and Y. M. Marzouk }, journal = { Acta Numerica }, volume = { 33 }, pages = { 715-840 }, month = { 7 }, year = { 2024 }, editor = { }, } -
K. Leung, D. Thompson, J. Susiluoto, J. Jagalur, A. Braverman, Y. M. Marzouk.
Evaluating the accuracy of Gaussian approximations in VSWIR imaging spectroscopy retrievals
IEEE Transactions on Geoscience and Remote Sensing, 62 (2024), pp. 1-12.
Available Links
Abstract
The joint retrieval of surface reflectances and atmospheric parameters in visible/short-wave infrared (VSWIR) imaging spectroscopy is a computationally challenging high-dimensional problem. Using NASA’s Surface Biology and Geology mission (SBG) as the motivational context, the uncertainty associated with the retrievals is crucial for further application of the retrieved results for environmental applications. Although Markov chain Monte Carlo (MCMC) is a Bayesian method ideal for uncertainty quantification (UQ), the full-dimensional implementation of MCMC for the retrieval is computationally intractable. In this work, we developed a block Metropolis MCMC algorithm for the high-dimensional VSWIR surface reflectance retrieval that leverages conditional linearity structure in the forward radiative transfer model to enable tractable fully Bayesian computation. We use the posterior distribution from this MCMC algorithm to assess the limitations of optimal estimation (OE), the state-of-the-art Bayesian algorithm in operational retrievals which is more computationally efficient but uses a Gaussian approximation to characterize the posterior. Analyzing the differences in the posterior computed by each method, the MCMC algorithm was shown to give more physically sensible results and reveals the non-Gaussian structure of the posterior, specifically in the atmospheric aerosol optical depth (AOD) parameter and the low-wavelength surface reflectances.
BibTeX Citation
@article{ leung-spectroscopy-2024, keywords = { mcmc }, doi = { 10.1109/TGRS.2024.3411916 }, title = { Evaluating the accuracy of Gaussian approximations in VSWIR imaging spectroscopy retrievals }, author = { K. Leung and D. Thompson and J. Susiluoto and J. Jagalur and A. Braverman and Y. M. Marzouk }, journal = { IEEE Transactions on Geoscience and Remote Sensing }, volume = { 62 }, pages = { 1-12 }, month = { 6 }, year = { 2024 }, editor = { }, } -
R. Baptista, L. Cao, J. Chen, O. Ghattas, F. Y. Li, Y. M. Marzouk, J. T. Oden.
Bayesian model calibration for block copolymer self-assembly: Likelihood-free inference and expected information gain computation via measure transport
Journal of Computational Physics, 503 (2024).
Available Links
Abstract
We consider the Bayesian calibration of models describing the phenomenon of block copolymer (BCP) self-assembly, which is to infer model parameters and their uncertainty from noisy image data produced by microscopy or X-ray scattering characterization of BCP structures. To account for the long-range disorder in BCP structures, we introduce auxiliary variables in the model to represent this aleatory uncertainty. These variables, however, result in an integrated likelihood function for high-dimensional image data that is generally intractable to evaluate. We tackle this Bayesian inference problem using a likelihood-free inference (LFI) approach based on measure transport together with the construction of summary statistics for the image data. We show that expected information gains (EIG) from observed data about the model parameters can be computed with no significant additional cost. Lastly, we present a numerical case study using the Ohta–Kawasaki model for diblock copolymer thin film self-assembly and top-down microscopy characterization. We introduce several domain-specific energy and Fourier-based summary statistics for calibration and quantify their informativeness using EIG. The effect of various data corruptions, summary statistics, and experimental designs on the calibration results are studied using the proposed LFI method.
BibTeX Citation
@article{ baptista-bayesiancopolymer-2024, keywords = { Block copolymers, Uncertainty quantification, Likelihood-free inference, Measure transport, Expected information gain, Ohta–Kawasaki model }, doi = { 10.1016/j.jcp.2024.112844 }, title = { Bayesian model calibration for block copolymer self-assembly: Likelihood-free inference and expected information gain computation via measure transport }, author = { R. Baptista and L. Cao and J. Chen and O. Ghattas and F. Y. Li and Y. M. Marzouk and J. T. Oden }, journal = { Journal of Computational Physics }, volume = { 503 }, number = { 112844 }, month = { 4 }, year = { 2024 }, editor = { }, } -
G. Gottwald, F. Y. Li, Y. M. Marzouk, S. Reich.
Stable generative modeling using Schrödinger bridges
Philosophical Transactions of the Royal Society A, 383 (2025), pp. 20240332.
Available Links
Abstract
We consider the problem of sampling from an unknown distribution for which only a sufficiently large number of training samples are available. Such settings have recently drawn considerable interest in the context of generative modelling and Bayesian inference. In this paper, we propose a generative model combining Schrödinger bridges and Langevin dynamics. Schrödinger bridges over an appropriate reversible reference process are used to approximate the conditional transition probability from the available training samples, which is then implemented in a discrete-time reversible Langevin sampler to generate new samples. By setting the kernel bandwidth in the reference process to match the time step size used in the unadjusted Langevin algorithm, our method effectively circumvents any stability issues typically associated with the time-stepping of stiff stochastic differential equations. Moreover, we introduce a novel split-step scheme, ensuring that the generated samples remain within the convex hull of the training samples. Our framework can be naturally extended to generate conditional samples and to Bayesian inference problems. We demonstrate the performance of our proposed scheme through experiments on synthetic datasets with increasing dimensions and on a stochastic subgrid-scale parametrization conditional sampling problem.
BibTeX Citation
@article{ li-schroedinger-2024, keywords = { Langevin dynamics, Schrödinger bridges, generative modeling, Bayesian inference }, doi = { https://doi.org/10.48550/arXiv.2401.04372 }, title = { Stable generative modeling using Schrödinger bridges }, author = { G. Gottwald and F. Y. Li and Y. M. Marzouk and S. Reich }, journal = { Philosophical Transactions of the Royal Society A }, volume = { 383 }, number = { 2299 }, pages = { 20240332 }, month = { 6 }, year = { 2025 }, editor = { }, } -
Y. M. Marzouk, Z. R. Ren, S. Wang, J. Zech.
Distribution Learning via Neural Differential Equations: A Nonparametric Statistical Perspective
Journal of Machine Learning Research, 25 (2024), pp. 1-61.
Available Links
Abstract
Ordinary differential equations (ODEs), via their induced flow maps, provide a powerful framework to parameterize invertible transformations for representing complex probability distributions. While such models have achieved enormous success in machine learning, little is known about their statistical properties. This work establishes the first general nonparametric statistical convergence analysis for distribution learning via ODE models trained through likelihood maximization. We first prove a convergence theorem applicable to arbitrary velocity field classes F satisfying certain simple boundary constraints. This general result captures the trade-off between the approximation error and complexity of the ODE model. We show that the latter can be quantified via the C1-metric entropy of the class F. We then apply this general framework to the setting of Ck-smooth target densities, and establish nearly minimax-optimal convergence rates for two relevant velocity field classes F: Ck functions and neural networks. The latter is the practically important case of neural ODEs. Our results also provide insight on how the choice of velocity field class, and the dependence of this choice on sample size (e.g., the scaling of neural network classes), impact statistical performance.
BibTeX Citation
@article{ marzouk-distributionneuralode-2024, keywords = { Neural differential equations, normalizing flows, density estimation, nonparametric statistics, M-estimation }, title = { Distribution Learning via Neural Differential Equations: A Nonparametric Statistical Perspective }, author = { Y. M. Marzouk and Z. R. Ren and S. Wang and J. Zech }, journal = { Journal of Machine Learning Research }, volume = { 25 }, number = { 232 }, pages = { 1-61 }, month = { 6 }, year = { 2024 }, editor = { }, } -
R. Baptista, B. Hosseini, N. B. Kovachki, Y. M. Marzouk, A. Sagiv.
An approximation theory framework for measure-transport sampling algorithms
Mathematics of Computation, (2025).
Abstract
This article presents a general approximation-theoretic framework to analyze measure transport algorithms for probabilistic modeling. A primary motivating application for such algorithms is sampling—a central task in statistical inference and generative modeling. We provide a priori error estimates in the continuum limit, i.e., when the measures (or their densities) are given, but when the transport map is discretized or approximated using a finite-dimensional function space. Our analysis relies on the regularity theory of transport maps and on classical approximation theory for high-dimensional functions. A third element of our analysis, which is of independent interest, is the development of new stability estimates that relate the distance between two maps to the distance (or divergence) between the pushforward measures they define. We present a series of applications of our framework, where quantitative convergence rates are obtained for practical problems using Wasserstein metrics, maximum mean discrepancy, and Kullback–Leibler divergence. Specialized rates for approximations of the popular triangular Knöthe–Rosenblatt maps are obtained, followed by numerical experiments that demonstrate and extend our theory.
BibTeX Citation
@article{ baptista-transportapproxtheory-2024, keywords = { Transport map, generative models, stability analysis, approximation theory }, doi = { 10.1090/mcom/4013 }, title = { An approximation theory framework for measure-transport sampling algorithms }, author = { R. Baptista and B. Hosseini and N. B. Kovachki and Y. M. Marzouk and A. Sagiv }, journal = { Mathematics of Computation }, month = { 0 }, year = { 2025 }, editor = { }, } -
R. Baptista, R. Morrison, O. Zahm, Y. M. Marzouk.
Learning Non-Gaussian Graphical Models via Hessian Scores and Triangular Transport
Journal of Machine Learning Research, 25 (2024), pp. 1-46.
Available Links
Abstract
Undirected probabilistic graphical models represent the conditional dependencies, or Markov properties, of a collection of random variables. Knowing the sparsity of such a graphical model is valuable for modeling multivariate distributions and for efficiently performing inference. While the problem of learning graph structure from data has been studied extensively for certain parametric families of distributions, most existing methods fail to consistently recover the graph structure for non-Gaussian data. Here we propose an algorithm for learning the Markov structure of continuous and non-Gaussian distributions. To characterize conditional independence, we introduce a score based on integrated Hessian information from the joint log-density, and we prove that this score upper bounds the conditional mutual information for a general class of distributions. To compute the score, our algorithm SING estimates the density using a deterministic coupling, induced by a triangular transport map, and iteratively exploits sparse structure in the map to reveal sparsity in the graph. For certain non-Gaussian datasets, we show that our algorithm recovers the graph structure even with a biased approximation to the density. Among other examples, we apply SING to learn the dependencies between the states of a chaotic dynamical system with local interactions.
BibTeX Citation
@article{ baptista-nongaussiangraphical-2024, keywords = { Undirected graphical models, structure learning, non-Gaussian distributions, conditional mutual information, transport map, sparsity }, title = { Learning Non-Gaussian Graphical Models via Hessian Scores and Triangular Transport }, author = { R. Baptista and R. Morrison and O. Zahm and Y. M. Marzouk }, journal = { Journal of Machine Learning Research }, volume = { 25 }, number = { 85 }, pages = { 1-46 }, month = { 0 }, year = { 2024 }, editor = { }, } -
J. Pidstrigach, Y. M. Marzouk, S. Reich, S. Wang.
Infinite-Dimensional Diffusion Models
Journal of Machine Learning Research, 25 (2024), pp. 1--52.
Abstract
We define diffusion-based generative models in infinite dimensions, and apply them to the generative modeling of functions. By first formulating such models in the infinite-dimensional limit and only then discretizing, we are able to obtain a sampling algorithm that has \emph{dimension-free} bounds on the distance from the sample measure to the target measure. Furthermore, we propose a new way to perform conditional sampling in an infinite-dimensional space and show that our approach outperforms previously suggested procedures.
BibTeX Citation
@article{ pidstrigach-infinitedimdiffusion-2024, title = { Infinite-Dimensional Diffusion Models }, author = { J. Pidstrigach and Y. M. Marzouk and S. Reich and S. Wang }, journal = { Journal of Machine Learning Research }, volume = { 25 }, number = { 414 }, pages = { 1--52 }, month = { 0 }, year = { 2024 }, editor = { }, } -
F. Menhorn, G. Geraci, D. T. Seidl, Y. M. Marzouk, M. S. Eldred, H.-J. Bungartz.
Multilevel Monte Carlo estimators for derivative-free optimization under uncertainty
International Journal for Uncertainty Quantification, 14 (2023), pp. 21-65.
Available Links
Abstract
Optimization is a key tool for scientific and engineering applications; however, in the presence of models affected by uncertainty, the optimization formulation needs to be extended to consider statistics of the quantity of interest. Optimization under uncertainty (OUU) deals with this endeavor and requires uncertainty quantification analyses at several design locations; i.e., its overall computational cost is proportional to the cost of performing a forward uncertainty analysis at each design location. An OUU workflow has two main components: an inner loop strategy for the computation of statistics of the quantity of interest, and an outer loop optimization strategy tasked with finding the optimal design, given a merit function based on the inner loop statistics. In this work, we propose to alleviate the cost of the inner loop uncertainty analysis by leveraging the so-called multilevel Monte Carlo (MLMC) method, which is able to allocate resources over multiple models with varying accuracy and cost. The resource allocation problem in MLMC is formulated by minimizing the computational cost given a target variance for the estimator. We consider MLMC estimators for statistics usually employed in OUU workflows and solve the corresponding allocation problem. For the outer loop, we consider a derivative-free optimization strategy implemented in the SNOWPAC library; our novel strategy is implemented and released in the Dakota software toolkit. We discuss several numerical test cases to showcase the features and performance of our approach with respect to its Monte Carlo single fidelity counterpart.
BibTeX Citation
@article{ menhorn-multileveloptimization-2024, keywords = { optimization under uncertainty, multilevel Monte Carlo, uncertainty quantification }, doi = { 10.1615/Int.J.UncertaintyQuantification.2023048049 }, title = { Multilevel Monte Carlo estimators for derivative-free optimization under uncertainty }, author = { F. Menhorn and G. Geraci and D. T. Seidl and Y. M. Marzouk and M. S. Eldred and H.-J. Bungartz }, journal = { International Journal for Uncertainty Quantification }, volume = { 14 }, number = { 3 }, pages = { 21-65 }, month = { 11 }, year = { 2023 }, editor = { }, } -
R. Baptista, B. Hosseini, N. B. Kovachki, Y. M. Marzouk.
Conditional Sampling with Monotone GANs: From Generative Models to Likelihood-Free Inference
SIAM/ASA Journal on Uncertainty Quantification, 12 (2024), pp. 868-900.
Available Links
Abstract
We present a novel framework for conditional sampling of probability measures, using block triangular transport maps. We develop the theoretical foundations of block triangular transport in a Banach space setting, establishing general conditions under which conditional sampling can be achieved and drawing connections between monotone block triangular maps and optimal transport. Based on this theory, we then introduce a computational approach, called monotone generative adversarial networks (M-GANs), to learn suitable block triangular maps. Our algorithm uses only samples from the underlying joint probability measure and is hence likelihood-free. Numerical experiments with M-GAN demonstrate accurate sampling of conditional measures in synthetic examples, Bayesian inverse problems involving ordinary and partial differential equations, and probabilistic image inpainting.
BibTeX Citation
@article{ baptista-monotonegans-2024, keywords = { measure transport;conditional simulation;likelihood-free inference;optimal transport;GANs;normalizing flows }, doi = { 10.1137/23M1581546 }, title = { Conditional Sampling with Monotone GANs: From Generative Models to Likelihood-Free Inference }, author = { R. Baptista and B. Hosseini and N. B. Kovachki and Y. M. Marzouk }, journal = { SIAM/ASA Journal on Uncertainty Quantification }, volume = { 12 }, number = { 3 }, pages = { 868-900 }, month = { 8 }, year = { 2024 }, editor = { }, } -
Z. O. Wang, R. Baptista, Y. M. Marzouk, L. Ruthotto, D. Verma.
Efficient Neural Network Approaches for Conditional Optimal Transport with Applications in Bayesian Inference
SIAM Journal on Scientific Computing, 47 (2025), pp. C979--C1005.
Available Links
Abstract
We present two neural network approaches that approximate the solutions of static and dynamic conditional optimal transport (COT) problems. Both approaches enable conditional sampling and conditional density estimation, which are core tasks in Bayesian inference–particularly in the simulation-based ("likelihood-free") setting. Our methods represent the target conditional distributions as transformations of a tractable reference distribution and, therefore, fall into the framework of measure transport. Although many measure transport approaches model the transformation as COT maps, obtaining the map is computationally challenging, even in moderate dimensions. To improve scalability, our numerical algorithms use neural networks to parameterize COT maps and further exploit the structure of the COT problem. Our static approach approximates the map as the gradient of a partially input-convex neural network. It uses a novel numerical implementation to increase computational efficiency compared to state-of-the-art alternatives. Our dynamic approach approximates the conditional optimal transport via the flow map of a regularized neural ODE; compared to the static approach, it is slower to train but offers more modeling choices and can lead to faster sampling. We demonstrate both algorithms numerically, comparing them with competing state-of-the-art approaches, using benchmark datasets and simulation-based Bayesian inverse problems.
BibTeX Citation
@article{ wang-conditionalot-2025, keywords = { measure transport, generative modeling, optimal transport, Bayesian inference, inverse problems, uncertainty quantification }, doi = { 10.1137/24M1678659 }, title = { Efficient Neural Network Approaches for Conditional Optimal Transport with Applications in Bayesian Inference }, author = { Z. O. Wang and R. Baptista and Y. M. Marzouk and L. Ruthotto and D. Verma }, journal = { SIAM Journal on Scientific Computing }, volume = { 47 }, number = { 4 }, pages = { C979--C1005 }, month = { 8 }, year = { 2025 }, editor = { }, } -
R. Baptista, Y. M. Marzouk, O. Zahm.
On the representation and learning of monotone triangular transport maps
Foundations of Computational Mathematics, (2023).
Available Links
Abstract
Transportation of measure provides a versatile approach for modeling complex probability distributions, with applications in density estimation, Bayesian inference, generative modeling, and beyond. Monotone triangular transport maps—approximations of the Knothe–Rosenblatt (KR) rearrangement—are a canonical choice for these tasks. Yet the representation and parameterization of such maps have a significant impact on their generality and expressiveness, and on properties of the optimization problem that arises in learning a map from data (e.g., via maximum likelihood estimation). We present a general framework for representing monotone triangular maps via invertible transformations of smooth functions. We establish conditions on the transformation such that the associated infinite-dimensional minimization problem has no spurious local minima, i.e., all local minima are global minima; and we show for target distributions satisfying certain tail conditions that the unique global minimizer corresponds to the KR map. Given a sample from the target, we then propose an adaptive algorithm that estimates a sparse semi-parametric approximation of the underlying KR map. We demonstrate how this framework can be applied to joint and conditional density estimation, likelihood-free inference, and structure learning of directed graphical models, with stable generalization performance across a range of sample sizes.
BibTeX Citation
@article{ baptista-atm-2023, keywords = { Transportation of measure, Knothe–Rosenblatt rearrangement, normalizing flows, monotone functions, infinite-dimensional optimization, adaptive approximation, multivariate polynomials, wavelets, density estimation. }, doi = { 10.1007/s10208-023-09630-x }, title = { On the representation and learning of monotone triangular transport maps }, author = { R. Baptista and Y. M. Marzouk and O. Zahm }, journal = { Foundations of Computational Mathematics }, month = { 11 }, year = { 2023 }, editor = { }, } -
A. Scarinci, U. bin Waheed, C. Gu, X. Ren, B. M. Dia, S. Kaka, M. Fehler, Y. M. Marzouk.
Robust Bayesian moment tensor inversion with optimal transport misfits: layered medium approximations to the 3-D SEG-EAGE overthrust velocity model
Geophysical Journal International, 234 (2023), pp. 1169-1190.
Available Links
Abstract
A velocity model is generally an imperfect representation of the subsurface, which cannot precisely account for the 3-D inhomogeneities of Earth structure. We present a Bayesian moment tensor inversion framework for applications where reliable, tomography-based, velocity model reconstructions are not available. In particular, synthetic data generated using a 3-D model (SEG-EAGE Overthrust) are inverted using a layered medium model. We use a likelihood function derived from an optimal transport distance—specifically, the transport-Lagrangian distance introduced by Thorpe et al.—and show that this formulation yields inferences that are robust to misspecification of the velocity model. We establish several quantitative metrics to evaluate the performance of the proposed Bayesian framework, comparing it to Bayesian inversion with a standard Gaussian likelihood. We also show that the non-double-couple component of the recovered mechanisms drastically diminishes when the impact of velocity model misspecification is mitigated.
BibTeX Citation
@article{ scarinci-tensorinversion-2023, keywords = { Inverse theory; Probability distributions; Statistical methods; Earthquake source observations; Induced seismicity; Waveform inversion }, doi = { 10.1093/gji/ggad116 }, title = { Robust Bayesian moment tensor inversion with optimal transport misfits: layered medium approximations to the 3-D SEG-EAGE overthrust velocity model }, author = { A. Scarinci and U. bin Waheed and C. Gu and X. Ren and B. M. Dia and S. Kaka and M. Fehler and Y. M. Marzouk }, journal = { Geophysical Journal International }, volume = { 234 }, number = { 2 }, pages = { 1169-1190 }, month = { 8 }, year = { 2023 }, editor = { }, } -
M. Ramgraber, R. Baptista, D. McLaughlin, Y. M. Marzouk.
Ensemble transport smoothing. Part 2: nonlinear updates
Journal of Computational Physics: X, 17 (2023).
Available Links
Abstract
Smoothing is a specialized form of Bayesian inference for state-space models that characterizes the posterior distribution of a collection of states given an associated sequence of observations. Our companion manuscript proposes a general framework for transport-based ensemble smoothing, which includes linear Kalman-type smoothers as special cases. Here, we build on this foundation to realize and demonstrate nonlinear backward ensemble transport smoothers. We discuss parameterization and regularization of the associated transport maps, and then examine the performance of these smoothers for nonlinear and chaotic dynamical systems that exhibit non-Gaussian behavior. In these settings, our nonlinear transport smoothers yield lower estimation error than conventional linear smoothers and state-of-the-art iterative ensemble Kalman smoothers, for comparable numbers of model evaluations.
BibTeX Citation
@article{ ramgraber-smoothingparttwo-2022, keywords = { Data assimilation, smoothing, ensemble methods, triangular transport. }, doi = { 10.1016/j.jcpx.2023.100133 }, title = { Ensemble transport smoothing. Part 2: nonlinear updates }, author = { M. Ramgraber and R. Baptista and D. McLaughlin and Y. M. Marzouk }, journal = { Journal of Computational Physics: X }, volume = { 17 }, number = { 100133 }, month = { 11 }, year = { 2023 }, editor = { }, } -
M. Ramgraber, R. Baptista, D. McLaughlin, Y. M. Marzouk.
Ensemble transport smoothing. Part 1: unified framework
Journal of Computational Physics: X, 17 (2023).
Available Links
Abstract
Smoothers are algorithms for Bayesian time series re-analysis. Most operational smoothers rely either on affine Kalman-type transformations or on sequential importance sampling. These strategies occupy opposite ends of a spectrum that trades computational efficiency and scalability for statistical generality and consistency: non-Gaussianity renders affine Kalman updates inconsistent with the true Bayesian solution, while the ensemble size required for successful importance sampling can be prohibitive. This paper revisits the smoothing problem from the perspective of measure transport, which offers the prospect of consistent prior-to-posterior transformations for Bayesian inference. We leverage this capacity by proposing a general ensemble framework for transport-based smoothing. Within this framework, we derive a comprehensive set of smoothing recursions based on nonlinear transport maps and detail how they exploit the structure of state-space models in fully non-Gaussian settings. We also describe how many standard Kalman-type smoothing algorithms emerge as special cases of our framework. A companion paper explores the implementation of nonlinear ensemble transport smoothers in greater depth.
BibTeX Citation
@article{ ramgraber-smoothingpartone-2022, keywords = { Data assimilation, smoothing, ensemble methods, triangular transport }, doi = { 10.1016/j.jcpx.2023.100134 }, title = { Ensemble transport smoothing. Part 1: unified framework }, author = { M. Ramgraber and R. Baptista and D. McLaughlin and Y. M. Marzouk }, journal = { Journal of Computational Physics: X }, volume = { 17 }, number = { 100134 }, month = { 11 }, year = { 2023 }, editor = { }, } -
M. Parno, P.-B. Rubio, D. Sharp, M. Brennan, R. Baptista, H. Bonart, Y. M. Marzouk.
MParT: Monotone Parameterization Toolkit
Journal of Open Source Software, 7 (2022), pp. 4843.
Available Links
Abstract
BibTeX Citation
@article{ parno-mpart-2022, title = { MParT: Monotone Parameterization Toolkit }, author = { M. Parno and P.-B. Rubio and D. Sharp and M. Brennan and R. Baptista and H. Bonart and Y. M. Marzouk }, journal = { Journal of Open Source Software }, volume = { 7 }, number = { 80 }, pages = { 4843 }, month = { 0 }, year = { 2022 }, editor = { }, } -
K.-T. Kim, U. Villa, M. Parno, Y. M. Marzouk, O. Ghattas, N. Petra.
hIPPYlib-MUQ: a Bayesian inference software framework for integration of data with complex predictive models under uncertainty
ACM Transactions on Mathematical Software, (2022).
Available Links
Abstract
Bayesian inference provides a systematic means of quantifying uncertainty in the solution of the inverse problem. However, solution of Bayesian inverse problems governed by complex forward models described by partial differential equations (PDEs) remains prohibitive with black-box Markov chain Monte Carlo (MCMC) methods. We present hIPPYlib-MUQ, an extensible and scalable software framework that contains implementations of state-of-the art algorithms aimed to overcome the challenges of high-dimensional, PDE-constrained Bayesian inverse problems. hIPPYlib-MUQ integrates two complementary open-source software packages. hIPPYlib solves PDE-constrained inverse problems using automatically-generated adjoint-based derivatives, but it lacks full Bayesian capabilities. MUQ provides numerous powerful Bayesian inversion algorithms, but expects forward models to come equipped with derivatives to permit large-scale solution. By combining these two libraries, we created a robust, scalable, and efficient software framework that can be used to tackle complex large-scale Bayesian inverse problems across a broad spectrum of scientific and engineering disciplines. To illustrate the capabilities of hIPPYlib-MUQ, we compare a number of MCMC methods on several high-dimensional Bayesian inverse problems. The results demonstrate that large (∼50×) speedups over conventional black box and gradient-based MCMC algorithms can be obtained by exploiting Hessian information (from the log-posterior), underscoring the power of the integrated hIPPYlib-MUQ framework.
BibTeX Citation
@article{ kim-hippylibmuq-2021, doi = { 10.48550/arXiv.2112.00713 }, title = { hIPPYlib-MUQ: a Bayesian inference software framework for integration of data with complex predictive models under uncertainty }, author = { K.-T. Kim and U. Villa and M. Parno and Y. M. Marzouk and O. Ghattas and N. Petra }, journal = { ACM Transactions on Mathematical Software }, month = { 0 }, year = { 2022 }, editor = { }, } -
D. Bigoni, Y. M. Marzouk, C. Prieur, O. Zahm.
Nonlinear dimension reduction for surrogate modeling using gradient information
Information and Inference: A Journal of the IMA, in press (2022).
Abstract
We introduce a method for the nonlinear dimension reduction of a high-dimensional function $u: \mathbb{R}^d \to \mathbb{R}$, $d \gg 1$. Our objective is to identify a nonlinear feature map $g: \mathbb{R}^d \to \mathbb{R}^m$, with a prescribed intermediate dimension $m \ll d$, so that $u$ can be well approximated by $f \circ g$ for some profile function $f: \mathbb{R}^m \to \mathbb{R}$. We propose to build the feature map by aligning the Jacobian $\nabla g$ with the gradient $\nabla u$, and we theoretically analyze the properties of the resulting $g$. Once $g$ is built, we construct $f$ by solving a gradient-enhanced least squares problem. Our practical algorithm makes use of a sample $\{ x(i), u(x(i)), \nabla u(x(i)) \}_{i=1}^N$ and builds both $g$ and $f$ on adaptive downward-closed polynomial spaces, using cross validation to avoid overfitting. We numerically evaluate the performance of our algorithm across different benchmarks, and explore the impact of the intermediate dimension $m$. We show that building a nonlinear feature map $g$ can permit more accurate approximation of $u$ than a linear $g$, for the same input data set.
BibTeX Citation
@article{ bigoni-nonlineardimred-2021, doi = { 10.1093/imaiai/iaac006 }, title = { Nonlinear dimension reduction for surrogate modeling using gradient information }, author = { D. Bigoni and Y. M. Marzouk and C. Prieur and O. Zahm }, journal = { Information and Inference: A Journal of the IMA }, volume = { in press }, month = { 0 }, year = { 2022 }, editor = { }, } -
A. Spantini, R. Baptista, Y. M. Marzouk.
Coupling techniques for nonlinear ensemble filtering
SIAM Review, 64 (2022), pp. 921–953.
Available Links
Abstract
We consider filtering in high-dimensional non-Gaussian state-space models with intractable transition kernels, nonlinear and possibly chaotic dynamics, and sparse observations in space and time. We propose a novel filtering methodology that harnesses transportation of measures, convex optimization, and ideas from probabilistic graphical models to yield robust ensemble approximations of the filtering distribution in high dimensions. Our approach can be understood as the natural generalization of the ensemble Kalman filter (EnKF) to nonlinear updates, using stochastic or deterministic couplings. The use of nonlinear updates can reduce the intrinsic bias of the EnKF at a marginal increase in computational cost. We avoid any form of importance sampling and introduce non-Gaussian localization approaches for dimension scalability. Our framework achieves state-of-the-art tracking performance on challenging configurations of the Lorenz-96 model in the chaotic regime.
BibTeX Citation
@article{ art_79, keywords = { nonlinear filtering, state-space models, couplings, transport maps, ensemble Kalman filter, graphical models, localization, approximate Bayesian computation }, doi = { 10.1137/20M1312204 }, title = { Coupling techniques for nonlinear ensemble filtering }, author = { A. Spantini and R. Baptista and Y. M. Marzouk }, journal = { SIAM Review }, volume = { 64 }, number = { 4 }, pages = { 921–953 }, month = { 0 }, year = { 2022 }, editor = { }, } -
M. Le Provost, R. Baptista, Y. M. Marzouk, J. Eldredge.
A low-rank ensemble Kalman filter for elliptic observations
Proceedings of the Royal Society A, 478 (2022).
Abstract
We propose a regularization method for ensemble Kalman filtering (EnKF) with elliptic observation operators. Commonly used EnKF regularization methods suppress state correlations at long distances. For observations described by elliptic partial differential equations, such as the pressure Poisson equation (PPE) in incompressible fluid flows, distance localization should be used cautiously, as we cannot disentangle slowly decaying physical interactions from spurious long-range correlations. This is particularly true for the PPE, in which distant vortex elements couple nonlinearly to induce pressure. Instead, these inverse problems have a low effective dimension: low-dimensional projections of the observations strongly inform a low-dimensional subspace of the state space. We derive a low-rank factorization of the Kalman gain based on the spectrum of the Jacobian of the observation operator. The identified eigenvectors generalize the source and target modes of the multipole expansion, independently of the underlying spatial distribution of the problem. Given rapid spectral decay, inference can be performed in the low-dimensional subspace spanned by the dominant eigenvectors. This low-rank EnKF is assessed on dynamical systems with Poisson observation operators, where we seek to estimate the positions and strengths of point singularities over time from potential or pressure observations. We also comment on the broader applicability of this approach to elliptic inverse problems outside the context of filtering.
BibTeX Citation
@article{ leprovost-lrenkf-2022, keywords = { data assimilation, ensemble Kalman filter, elliptic inverse problems, incompressible fluid mechanics, observation-informed dimension reduction }, doi = { 10.1098/rspa.2022.0182 }, title = { A low-rank ensemble Kalman filter for elliptic observations }, author = { M. Le Provost and R. Baptista and Y. M. Marzouk and J. Eldredge }, journal = { Proceedings of the Royal Society A }, volume = { 478 }, number = { 2266 }, month = { 0 }, year = { 2022 }, editor = { }, } -
A. Litvinenko, Y. M. Marzouk, H. G. Matthies, M. Scavino, A. Spantini.
Computing f-divergences and distances of high-dimensional probability density functions
Numerical Linear Algebra with Applications, in press (2022).
Available Links
Abstract
Very often, in the course of uncertainty quantification tasks or data analysis, one has to deal with high-dimensional random variables. Here the interest is mainly to compute characterizations like the entropy, the Kullback–Leibler divergence, more general -divergences, or other such characteristics based on the probability density. The density is often not available directly, and it is a computational challenge to just represent it in a numerically feasible fashion in case the dimension is even moderately large. It is an even stronger numerical challenge to then actually compute said characteristics in the high-dimensional case. In this regard it is proposed to approximate the discretized density in a compressed form, in particular by a low-rank tensor. This can alternatively be obtained from the corresponding probability characteristic function, or more general representations of the underlying random variable. The mentioned characterizations need point-wise functions like the logarithm. This normally rather trivial task becomes computationally difficult when the density is approximated in a compressed resp. low-rank tensor format, as the point values are not directly accessible. The computations become possible by considering the compressed data as an element of an associative, commutative algebra with an inner product, and using matrix algorithms to accomplish the mentioned tasks. The representation as a low-rank element of a high order tensor space allows to reduce the computational complexity and storage cost from exponential in the dimension to almost linear.
BibTeX Citation
@article{ litvinenko-tensordiv-2022, doi = { 10.1002/nla.2467 }, title = { Computing f-divergences and distances of high-dimensional probability density functions }, author = { A. Litvinenko and Y. M. Marzouk and H. G. Matthies and M. Scavino and A. Spantini }, journal = { Numerical Linear Algebra with Applications }, volume = { in press }, month = { 0 }, year = { 2022 }, editor = { }, } -
M. Brennan, M. Howard, Y. M. Marzouk, L. Dresselhaus-Marais.
Analytical methods for superresolution dislocation identification in dark-field X-ray microscopy
Journal of Materials Science, 57 (2022), pp. 14890–14904.
Abstract
We develop several inference methods to estimate the position of dislocations from images generated using dark-field X-ray microscopy (DFXM)—achieving superresolution accuracy and principled uncertainty quantification. Using the framework of Bayesian inference, we incorporate models of the DFXM contrast mechanism and detector measurement noise, along with initial position estimates, into a statistical model coupling DFXM images with the dislocation position of interest. We motivate several position estimation and uncertainty quantification algorithms based on this model. We then demonstrate the accuracy of our primary estimation algorithm on synthetic realistic DFXM images of edge dislocations in single-crystal aluminum. We conclude with a discussion of our methods’ impact on future dislocation studies and possible future research avenues.
BibTeX Citation
@article{ brennan-dfxm-2022, keywords = { dislocation, Bayesian inference, dark-field X-ray microscopy, metals }, doi = { 10.1007/s10853-022-07465-5 }, title = { Analytical methods for superresolution dislocation identification in dark-field X-ray microscopy }, author = { M. Brennan and M. Howard and Y. M. Marzouk and L. Dresselhaus-Marais }, journal = { Journal of Materials Science }, volume = { 57 }, pages = { 14890–14904 }, month = { 0 }, year = { 2022 }, editor = { }, } -
B. J. Zhang, Y. M. Marzouk, K. Spiliopoulos.
Geometry-informed irreversible perturbations for accelerated convergence of Langevin dynamics
Statistics and Computing, 32 (2022).
Abstract
We introduce a novel geometry-informed irreversible perturbation that accelerates convergence of the Langevin algorithm for Bayesian computation. It is well documented that there exist perturbations to the Langevin dynamics that preserve its invariant measure while accelerating its convergence. Irreversible perturbations and reversible perturbations (such as Riemannian manifold Langevin dynamics (RMLD)) have separately been shown to improve the performance of Langevin samplers. We consider these two perturbations simultaneously by presenting a novel form of irreversible perturbation for RMLD that is informed by the underlying geometry. Through numerical examples, we show that this new irreversible perturbation can improve estimation performance over irreversible perturbations that do not take the geometry into account. Moreover we demonstrate that irreversible perturbations generally can be implemented in conjunction with the stochastic gradient version of the Langevin algorithm. Lastly, while continuous-time irreversible perturbations cannot impair the performance of a Langevin estimator, the situation can sometimes be more complicated when discretization is considered. To this end, we describe a discrete-time example in which irreversibility increases both the bias and variance of the resulting estimator.
BibTeX Citation
@article{ zhang-girr-2021, doi = { 10.1007/s11222-022-10147-6 }, title = { Geometry-informed irreversible perturbations for accelerated convergence of Langevin dynamics }, author = { B. J. Zhang and Y. M. Marzouk and K. Spiliopoulos }, journal = { Statistics and Computing }, volume = { 32 }, number = { 78 }, month = { 0 }, year = { 2022 }, editor = { }, } -
O. Zahm, T. Cui, K. J. H. Law, A. Spantini, Y. M. Marzouk.
Certified dimension reduction in nonlinear Bayesian inverse problems
Mathematics of Computation, 91 (2022), pp. 1789–1835.
Available Links
Abstract
We propose a dimension reduction technique for Bayesian inverse problems with nonlinear forward operators, non-Gaussian priors, and non-Gaussian observation noise. The likelihood function is approximated by a ridge function, i.e., a map which depends non-trivially only on a few linear combinations of the parameters. We build this ridge approximation by minimizing an upper bound on the Kullback--Leibler divergence between the posterior distribution and its approximation. This bound, obtained via logarithmic Sobolev inequalities, allows one to certify the error of the posterior approximation. Computing the bound requires computing the second moment matrix of the gradient of the log likelihood function. In practice, a sample-based approximation of the upper bound is then required. We provide an analysis that enables control of the posterior approximation error due to this sampling. Numerical and theoretical comparisons with existing methods illustrate the benefits of the proposed methodology.
BibTeX Citation
@article{ art_73, keywords = { dimension reduction, nonlinear Bayesian inverse problem, logarithmic Sobolev inequality, certified error bound, non-asymptotic analysis }, doi = { 10.1090/mcom/3737 }, title = { Certified dimension reduction in nonlinear Bayesian inverse problems }, author = { O. Zahm and T. Cui and K. J. H. Law and A. Spantini and Y. M. Marzouk }, journal = { Mathematics of Computation }, volume = { 91 }, pages = { 1789–1835 }, month = { 0 }, year = { 2022 }, editor = { }, } -
J. Zech, Y. M. Marzouk.
Sparse approximation of triangular transports. Part II: the infinite dimensional case
Constructive Approximation, 55 (2022), pp. 987–1036.
Abstract
For two probability measures $\rho$ and $\pi$ on $[−1,1]^\mathbb{N}$ we investigate the approximation of the triangular Knothe-Rosenblatt transport $T: [−1,1]^\mathbb{N} \to [−1,1]^\mathbb{N}$ that pushes forward $\rho$ to $\pi$. Under suitable assumptions, we show that $T$ can be approximated by rational functions without suffering from the curse of dimension. Our results are applicable to posterior measures arising in certain inference problems where the unknown belongs to an (infinite-dimensional) Banach space. In particular, we show that it is possible to efficiently approximately sample from certain high-dimensional measures by transforming a lower-dimensional latent variable.
BibTeX Citation
@article{ zech-infdim-2021, doi = { 10.1007/s00365-022-09570-9 }, title = { Sparse approximation of triangular transports. Part II: the infinite dimensional case }, author = { J. Zech and Y. M. Marzouk }, journal = { Constructive Approximation }, volume = { 55 }, pages = { 987–1036 }, month = { 0 }, year = { 2022 }, editor = { }, } -
J. Zech, Y. M. Marzouk.
Sparse approximation of triangular transports. Part I: the finite dimensional case
Constructive Approximation, 55 (2022), pp. 919–986.
Abstract
For two probability measures $\rho$ and $\pi$ with analytic densities on the $d$-dimensional cube $[−1,1]^d$, we investigate the approximation of the unique triangular monotone Knothe--Rosenblatt transport $T: [−1,1]^d \to [−1,1]^d$, such that the pushforward $T_\sharp \rho$ equals $\pi$. It is shown that for $d \in \mathbb{N}$ there exist approximations $\tilde{T}$ of $T$, based on either sparse polynomial expansions or deep ReLU neural networks, such that the distance between $\tilde{T}_\sharp \rho$ and $\pi$ decreases exponentially. More precisely, we prove error bounds of the type $\exp(−\beta N^{1/d})$ (or $\exp(−\beta N^{1/d})$) for neural networks), where $N$ refers to the dimension of the ansatz space (or the size of the network) containing $\tilde{T}$; the notion of distance comprises the Hellinger distance, the total variation distance, the Wasserstein distance and the Kullback--Leibler divergence. Our construction guarantees $\tilde{T}$ to be a monotone triangular bijective transport on the hypercube $[−1,1]^d$. Analogous results hold for the inverse transport $S=T^{−1}$. The proofs are constructive, and we give an explicit a priori description of the ansatz space, which can be used for numerical implementations.
BibTeX Citation
@article{ zech-triangular-2020, keywords = { transport maps, domains of holomorphy, uncertainty quantification, sparse approximation, neural networks, sampling }, doi = { 10.1007/s00365-022-09569-2 }, title = { Sparse approximation of triangular transports. Part I: the finite dimensional case }, author = { J. Zech and Y. M. Marzouk }, journal = { Constructive Approximation }, volume = { 55 }, pages = { 919–986 }, month = { 0 }, year = { 2022 }, editor = { }, } -
B. J. Zhang, T. Sahai, Y. M. Marzouk.
A Koopman framework for rare event simulation in stochastic differential equations
Journal of Computational Physics, 456 (2022), pp. 111025.
Abstract
We exploit the relationship between the stochastic Koopman operator and the Kolmogorov backward equation to construct importance sampling schemes for stochastic differential equations. Specifically, we propose using eigenfunctions of the stochastic Koopman operator to approximate the Doob transform for an observable of interest (e.g., associated with a rare event) which in turn yields an approximation of the corresponding zero-variance importance sampling estimator. Our approach is broadly applicable and systematic, treating non-normal systems, non-gradient systems, and systems with oscillatory dynamics or rank-deficient noise in a common framework. In nonlinear settings where the stochastic Koopman eigenfunctions cannot be derived analytically, we use dynamic mode decomposition (DMD) methods to approximate them numerically, but the framework is agnostic to the particular numerical method employed. Numerical experiments demonstrate that even coarse approximations of a few eigenfunctions, where the latter are built from non-rare trajectories, can produce effective importance sampling schemes for rare events.
BibTeX Citation
@article{ zhang-kfr-2021, doi = { 10.1016/j.jcp.2022.111025 }, title = { A Koopman framework for rare event simulation in stochastic differential equations }, author = { B. J. Zhang and T. Sahai and Y. M. Marzouk }, journal = { Journal of Computational Physics }, volume = { 456 }, pages = { 111025 }, month = { 0 }, year = { 2022 }, editor = { }, } -
A. Davis, Y. M. Marzouk, N. Pillai, A. Smith.
Rate-optimal refinement strategies for local approximation MCMC
Statistics and Computing, 32 (2022).
Abstract
Many Bayesian inference problems involve target distributions whose density functions are computationally expensive to evaluate. Replacing the target density with a local approximation based on a small number of carefully chosen density evaluations can significantly reduce the computational expense of Markov chain Monte Carlo (MCMC) sampling. Moreover, continual refinement of the local approximation can guarantee asymptotically exact sampling. We devise a new strategy for balancing the decay rate of the bias due to the approximation with that of the MCMC variance. We prove that the error of the resulting local approximation MCMC (LA-MCMC) algorithm decays at roughly the expected $1/\sqrt{T}$ rate, and we demonstrate this rate numerically. We also introduce an algorithmic parameter that guarantees convergence given very weak tail bounds, significantly strengthening previous convergence results. Finally, we apply LA-MCMC to a computationally intensive Bayesian inverse problem arising in groundwater hydrology.
BibTeX Citation
@article{ davis-lamcmc-2020, keywords = { Markov chain Monte Carlo, local regression, Bayesian inference, surrogate models, sampling }, doi = { 10.1007/s11222-022-10123-0 }, title = { Rate-optimal refinement strategies for local approximation MCMC }, author = { A. Davis and Y. M. Marzouk and N. Pillai and A. Smith }, journal = { Statistics and Computing }, volume = { 32 }, number = { 60 }, month = { 0 }, year = { 2022 }, editor = { }, } -
A. Musolas, E. Massart, J. Hendrickx, P.-A. Absil, Y. M. Marzouk.
Low-rank multi-parametric covariance identification
BIT Numerical Mathematics, 62 (2022), pp. 221–249.
Available Links
Abstract
We propose a differential geometric approach for building families of low-rank covariance matrices, via interpolation on low-rank matrix manifolds. In contrast with standard parametric covariance classes, these families offer significant flexibility for problem-specific tailoring via the choice of “anchor” matrices for interpolation, for instance over a grid of relevant conditions describing the underlying stochastic process. The interpolation is computationally tractable in high dimensions, as it only involves manipulations of low-rank matrix factors. We also consider the problem of covariance identification, i.e., selecting the most representative member of the covariance family given a data set. In this setting, standard procedures such as maximum likelihood estimation are nontrivial because the covariance family is rank-deficient; we resolve this issue by casting the identification problem as distance minimization. We demonstrate the utility of these differential geometric families for interpolation and identification in a practical application: wind field covariance approximation for unmanned aerial vehicle navigation.
BibTeX Citation
@article{ musolas-lowrank-2020, doi = { https://doi.org/10.1007/s10543-021-00867-y }, title = { Low-rank multi-parametric covariance identification }, author = { A. Musolas and E. Massart and J. Hendrickx and P.-A. Absil and Y. M. Marzouk }, journal = { BIT Numerical Mathematics }, volume = { 62 }, pages = { 221–249 }, month = { 0 }, year = { 2022 }, editor = { }, } -
J. Jagalur-Mohan, Y. M. Marzouk.
Batch greedy maximization of non-submodular functions: guarantees and applications to experimental design
The Journal of Machine Learning Research, 22 (2021), pp. 1–62.
Abstract
We propose and analyze batch greedy heuristics for cardinality constrained maximization of non-submodular non-decreasing set functions. We consider the standard greedy paradigm, along with its distributed greedy and stochastic greedy variants. Our theoretical guarantees are characterized by the combination of submodularity and supermodularity ratios. We argue how these parameters define tight modular bounds based on incremental gains, and provide a novel reinterpretation of the classical greedy algorithm using the minorize-maximize (MM) principle. Based on that analogy, we propose a new class of methods exploiting any plausible modular bound. In the context of optimal experimental design for linear Bayesian inverse problems, we bound the submodularity and supermodularity ratios when the underlying objective is based on mutual information. We also develop novel modular bounds for the mutual information in this setting, and describe certain connections to polyhedral combinatorics. We discuss how algorithms using these modular bounds relate to established statistical notions such as leverage scores and to more recent efforts such as volume sampling. We demonstrate our theoretical findings on synthetic problems and on a real-world climate monitoring example.
BibTeX Citation
@article{ jagalur-batchgreedy-2020, keywords = { greedy methods, submodularity, non-submodular functions, optimal experi- mental design, inverse problems, mutual information, uncertainty quantification, Bayesian statistics }, title = { Batch greedy maximization of non-submodular functions: guarantees and applications to experimental design }, author = { J. Jagalur-Mohan and Y. M. Marzouk }, journal = { The Journal of Machine Learning Research }, volume = { 22 }, number = { 252 }, pages = { 1–62 }, month = { 0 }, year = { 2021 }, editor = { }, } -
S. Springer, H. Haario, J. Susiluoto, A. Bibov, A. Davis, Y. M. Marzouk.
Efficient Bayesian inference for large chaotic dynamical systems
Geoscientific Model Development, 14 (2021), pp. 4319–4333.
Available Links
Abstract
Estimating parameters of chaotic geophysical models is challenging due to their inherent unpredictability. These models cannot be calibrated with standard least squares or filtering methods if observations are temporally sparse. Obvious remedies, such as averaging over temporal and spatial data to characterize the mean behavior, do not capture the subtleties of the underlying dynamics. We perform Bayesian inference of parameters in high-dimensional and computationally demanding chaotic dynamical systems by combining two approaches: (i) measuring model–data mismatch by comparing chaotic attractors and (ii) mitigating the computational cost of inference by using surrogate models. Specifically, we construct a likelihood function suited to chaotic models by evaluating a distribution over distances between points in the phase space; this distribution defines a summary statistic that depends on the geometry of the attractor, rather than on pointwise matching of trajectories. This statistic is computationally expensive to simulate, compounding the usual challenges of Bayesian computation with physical models. Thus, we develop an inexpensive surrogate for the log likelihood with the local approximation Markov chain Monte Carlo method, which in our simulations reduces the time required for accurate inference by orders of magnitude. We investigate the behavior of the resulting algorithm with two smaller-scale problems and then use a quasi-geostrophic model to demonstrate its large-scale application.
BibTeX Citation
@article{ springer-ebi-2021, doi = { https://doi.org/10.5194/gmd-14-4319-2021 }, title = { Efficient Bayesian inference for large chaotic dynamical systems }, author = { S. Springer and H. Haario and J. Susiluoto and A. Bibov and A. Davis and Y. M. Marzouk }, journal = { Geoscientific Model Development }, volume = { 14 }, number = { 7 }, pages = { 4319–4333 }, month = { 0 }, year = { 2021 }, editor = { }, } -
F. Uribe, I. Papaioannou, Y. M. Marzouk, D. Straub.
Cross-entropy based importance sampling with failure-informed dimension reduction for rare event simulation
SIAM/ASA Journal on Uncertainty Quantification, 9 (2021), pp. 818–847.
Available Links
Abstract
The estimation of rare event or failure probabilities in high dimensions is of interest in many areas of science and technology. We consider problems where the rare event is expressed in terms of a computationally costly numerical model. Importance sampling with the cross-entropy method offers an efficient way to address such problems provided that a suitable parametric family of biasing densities is employed. Although some existing parametric distribution families are designed to perform efficiently in high dimensions, their applicability within the cross-entropy method is limited to problems with dimension of O(10^2). In this work, rather than directly building sampling densities in high dimensions, we focus on identifying the intrinsic low-dimensional structure of the rare event simulation problem. To this end, we exploit a connection between rare event simulation and Bayesian inverse problems. This allows us to adapt dimension reduction techniques from Bayesian inference to construct new, effectively low-dimensional, biasing distributions within the cross-entropy method. In particular, we employ the approach in [Zahm et al. 2018], as it enables control of the error in the approximation of the optimal biasing distribution. We illustrate our method using two standard high-dimensional reliability benchmark problems and one structural mechanics application involving random fields.
BibTeX Citation
@article{ uribe-crossentropy-2020, keywords = { rare event simulation, reliability analysis, likelihood-informed subspace, importance sampling, cross- entropy method, random fields }, doi = { https://doi.org/10.1137/20M1344585 }, title = { Cross-entropy based importance sampling with failure-informed dimension reduction for rare event simulation }, author = { F. Uribe and I. Papaioannou and Y. M. Marzouk and D. Straub }, journal = { SIAM/ASA Journal on Uncertainty Quantification }, volume = { 9 }, number = { 2 }, pages = { 818–847 }, month = { 0 }, year = { 2021 }, editor = { }, } -
A. Musolas, S. T. Smith, Y. M. Marzouk.
Geodesically parameterized covariance estimation
SIAM Journal on Matrix Analysis and Applications, 42 (2021), pp. 528–556.
Available Links
Abstract
Statistical modeling of spatiotemporal phenomena often requires selecting a covariance matrix from a covariance class. Yet standard parametric covariance families can be insufficiently flexible for practical applications, while nonparametric approaches may not easily allow certain kinds of prior knowledge to be incorporated. We propose instead to build covariance families out of geodesic curves. These covariances offer more flexibility for problem-specific tailoring than classical parametric families and are preferable to simple convex combinations. Once the covariance family has been chosen, one typically needs to select a representative member by solving an optimization problem, e.g., by maximizing the likelihood of a data set. We consider instead a differential geometric interpretation of this problem: minimizing the geodesic distance to a sample covariance matrix (``natural projection''). Our approach is consistent with the notion of distance employed to build the covariance family and does not require assuming a particular probability distribution for the data. We show that natural projection and maximum likelihood estimation within the covariance family are locally equivalent up to second order. We also demonstrate that natural projection may yield more accurate estimates with noise-corrupted data.
BibTeX Citation
@article{ musolas-fullrank-2020, doi = { https://doi.org/10.1137/19M1284646 }, title = { Geodesically parameterized covariance estimation }, author = { A. Musolas and S. T. Smith and Y. M. Marzouk }, journal = { SIAM Journal on Matrix Analysis and Applications }, volume = { 42 }, number = { 2 }, pages = { 528–556 }, month = { 0 }, year = { 2021 }, editor = { }, } -
M. Brennan, D. Bigoni, O. Zahm, A. Spantini, Y. M. Marzouk.
Greedy inference with structure-exploiting lazy maps
Advances in Neural Information Processing Systems (NeurIPS) <b> oral presentation</b>, (2020).
Abstract
We propose a framework for solving high-dimensional Bayesian inference problems using \emph{structure-exploiting} low-dimensional transport maps or flows. These maps are confined to a low-dimensional subspace (hence, lazy), and the subspace is identified by minimizing an upper bound on the Kullback--Leibler divergence (hence, structured). Our framework provides a principled way of identifying and exploiting low-dimensional structure in an inference problem. It focuses the expressiveness of a transport map along the directions of most significant discrepancy from the posterior, and can be used to build deep compositions of lazy maps, where low-dimensional projections of the parameters are iteratively transformed to match the posterior. We prove weak convergence of the generated sequence of distributions to the posterior, and we demonstrate the benefits of the framework on challenging inference problems in machine learning and differential equations, using inverse autoregressive flows and polynomial maps as examples of the underlying density estimators.
BibTeX Citation
@article{ brennanbigoni-lazy-2020, keywords = { Bayesian inference, greedy approximation, transport }, title = { Greedy inference with structure-exploiting lazy maps }, author = { M. Brennan and D. Bigoni and O. Zahm and A. Spantini and Y. M. Marzouk }, journal = { Advances in Neural Information Processing Systems (NeurIPS) <b> oral presentation</b> }, month = { 0 }, year = { 2020 }, editor = { }, } -
D. Bigoni, Y. Chen, N. Garcia-Trillos, Y. M. Marzouk, D. Sanz-Alonso.
Data-driven forward discretizations for Bayesian inversion
Inverse Problems, 36 (2020), pp. 105008.
Abstract
This paper suggests a framework for the learning of discretizations of expensive forward models in Bayesian inverse problems. The main idea is to incorporate the parameters governing the discretization as part of the unknown to be estimated within the Bayesian machinery. We numerically show that in a variety of inverse problems arising in mechanical engineering, signal processing and the geosciences, the observations contain useful information to guide the choice of discretization.
BibTeX Citation
@article{ bigoni-discretization-2020, doi = { 10.1088/1361-6420/abb2fa }, title = { Data-driven forward discretizations for Bayesian inversion }, author = { D. Bigoni and Y. Chen and N. Garcia-Trillos and Y. M. Marzouk and D. Sanz-Alonso }, journal = { Inverse Problems }, volume = { 36 }, number = { 10 }, pages = { 105008 }, month = { 0 }, year = { 2020 }, editor = { }, } -
J. Susiluoto, A. Spantini, H. Haario, T. Härkönen, Y. M. Marzouk.
Efficient multi-scale Gaussian process regression for massive remote sensing data with satGP v0.1.2.
Geoscientific Model Development, 13 (2020), pp. 3439–3463.
Abstract
Satellite remote sensing provides a global view to processes on Earth that has unique benefits compared to making measurements on the ground, such as global coverage and enormous data volume. The typical downsides are spatial and temporal gaps and potentially low data quality. Meaningful statistical inference from such data requires overcoming these problems and developing efficient and robust computational tools. We design and implement a computationally efficient multi-scale Gaussian process (GP) software package, satGP, geared towards remote sensing applications. The software is able to handle problems of enormous sizes and to compute marginals and sample from the random field conditioning on at least hundreds of millions of observations. This is achieved by optimizing the computation by, e.g., randomization and splitting the problem into parallel local subproblems which aggressively discard uninformative data. We describe the mean function of the Gaussian process by approximating marginals of a Markov random field (MRF). Variability around the mean is modeled with a multi-scale covariance kernel, which consists of Matérn, exponential, and periodic components. We also demonstrate how winds can be used to inform covariances locally. The covariance kernel parameters are learned by calculating an approximate marginal maximum likelihood estimate, and the validity of both the multi-scale approach and the method used to learn the kernel parameters is verified in synthetic experiments. We apply these techniques to a moderate size ozone data set produced by an atmospheric chemistry model and to the very large number of observations retrieved from the Orbiting Carbon Observatory 2 (OCO-2) satellite. The satGP software is released under an open-source license.
BibTeX Citation
@article{ susiluoto-satGP-2020, doi = { 10.5194/gmd-13-3439-2020 }, title = { Efficient multi-scale Gaussian process regression for massive remote sensing data with satGP v0.1.2. }, author = { J. Susiluoto and A. Spantini and H. Haario and T. Härkönen and Y. M. Marzouk }, journal = { Geoscientific Model Development }, volume = { 13 }, pages = { 3439–3463 }, month = { 0 }, year = { 2020 }, editor = { }, } -
F. Sciortino, N. T. Howard, E. S. Marmar, T. Odstrcil, N. M. Cao, R. Dux, A. E. Hubbard, J. W. Hughes, J. H. Irby, Y. M. Marzouk, L. M. Milanese.
Inference of experimental radial impurity transport on Alcator C-Mod: Bayesian parameter estimation and model selection
Nuclear Fusion, 60 (2020), pp. 126014.
Abstract
We present a fully Bayesian approach for the inference of radial profiles of impurity transport coefficients and compare its results to neoclassical, gyrofluid and gyrokinetic modeling. Using nested sampling, the Bayesian Impurity Transport InferencE (BITE) framework can handle complex parameter spaces with multiple possible solutions, offering great advantages in interpretative power and reliability with respect to previously demonstrated methods. BITE employs a forward model based on the pySTRAHL package, built on the success of the well-known STRAHL code [Dux, IPP Report, 2004], to simulate impurity transport in magnetically-confined plasmas. In this paper, we focus on calcium (Ca, Z=20) Laser Blow-Off injections into Alcator C-Mod plasmas. Multiple Ca atomic lines are diagnosed via high-resolution X-ray Imaging Crystal Spectroscopy and Vacuum Ultra-Violet measurements. We analyze a sawtoothing I-mode discharge for which neoclassical and turbulent (quasilinear and nonlinear) predictions are also obtained. We find good agreement in diffusion across the entire radial extent, while turbulent convection and density profile peaking are estimated to be larger in experiment than suggested by theory. Efforts and challenges associated with the inference of experimental pedestal impurity transport are discussed.
BibTeX Citation
@article{ sciortino-impurity-2020, doi = { 10.1088/1741-4326/abae85 }, title = { Inference of experimental radial impurity transport on Alcator C-Mod: Bayesian parameter estimation and model selection }, author = { F. Sciortino and N. T. Howard and E. S. Marmar and T. Odstrcil and N. M. Cao and R. Dux and A. E. Hubbard and J. W. Hughes and J. H. Irby and Y. M. Marzouk and L. M. Milanese }, journal = { Nuclear Fusion }, volume = { 60 }, pages = { 126014 }, month = { 0 }, year = { 2020 }, editor = { }, } -
O. Zahm, P. Constantine, C. Prieur, Y. M. Marzouk.
Gradient-based dimension reduction of multivariate vector-valued functions
SIAM Journal on Scientific Computing, 42 (2020), pp. A534-A558.
Available Links
Abstract
Multivariate functions encountered in high-dimensional uncertainty quantification problems often vary along a few dominant directions in the input parameter space. We propose a gradient-based method for detecting these directions and using them to construct {ridge approximations} of such functions, in a setting where the functions are vector-valued (e.g., taking values in $\mathbb{R}^n$). The methodology consists of minimizing an upper bound on the approximation error, obtained by {subspace Poincaré inequalities}. We provide a thorough mathematical analysis in the case where the parameter space is equipped with a Gaussian probability measure. The resulting method generalizes the notion of active subspaces associated with scalar-valued functions. A numerical illustration shows that using gradients of the function yields effective dimension reduction. We also show how the choice of norm on the codomain of the function has an impact on the function's low-dimensional approximation.
BibTeX Citation
@article{ zahm-dimred-2020, keywords = { high-dimensional function approximation, dimension reduction, active subspace, ridge approximation, Karhunen-Loève decomposition, Poincaré inequality }, title = { Gradient-based dimension reduction of multivariate vector-valued functions }, author = { O. Zahm and P. Constantine and C. Prieur and Y. M. Marzouk }, journal = { SIAM Journal on Scientific Computing }, volume = { 42 }, number = { 1 }, pages = { A534-A558 }, month = { 0 }, year = { 2020 }, editor = { }, } -
C. Gu, U. Mok, Y. M. Marzouk, G. Prieto Gomez, F. Sheibani, J. B. Evans, B. Hager.
Bayesian waveform-based calibration of high-pressure acoustic emission systems with ball drop measurements
Geophysical Journal International, 221 (2020), pp. 20-36.
Available Links
Abstract
Acoustic emission (AE) is a widely used technology to study source mechanisms and material properties during high-pressure rock failure experiments. It is important to understand the physical quantities that acoustic emission sensors measure, as well as the response of these sensors as a function of frequency. This study calibrates the newly built AE system in the MIT Rock Physics Laboratory using a ball-bouncing system. Full waveforms of multi-bounce events due to ball drops are used to infer the transfer function of lead zirconate titanate (PZT) sensors in high pressure environments. Uncertainty in the sensor transfer functions is quantified using a waveform-based Bayesian approach. The quantification of \textit{in situ} sensor transfer functions makes it possible to apply full waveform analysis for acoustic emissions at high pressures.
BibTeX Citation
@article{ gu-waveform-2020, title = { Bayesian waveform-based calibration of high-pressure acoustic emission systems with ball drop measurements }, author = { C. Gu and U. Mok and Y. M. Marzouk and G. Prieto Gomez and F. Sheibani and J. B. Evans and B. Hager }, journal = { Geophysical Journal International }, volume = { 221 }, pages = { 20-36 }, month = { 0 }, year = { 2020 }, editor = { }, } -
X. T. Tong, M. Morzfeld, Y. M. Marzouk.
MALA-within-Gibbs samplers for high-dimensional distributions with sparse conditional structure
SIAM Journal on Scientific Computing, 42 (2020), pp. A1765-A1788.
Available Links
Abstract
Markov chain Monte Carlo (MCMC) samplers are numerical methods for drawing samples from a given target probability distribution. We discuss one particular MCMC sampler, the MALA-within-Gibbs sampler, from the theoretical and practical perspectives. We first show that the acceptance ratio and step size of this sampler are independent of the overall problem dimension when (i) the target distribution has sparse conditional structure, and (ii) this structure is reflected in the partial updating strategy of MALA-within-Gibbs. If, in addition, the target density is block-wise log-concave, then the sampler's convergence rate is independent of dimension. From a practical perspective, we expect that MALA-within-Gibbs is useful for solving high-dimensional Bayesian inference problems where the posterior exhibits sparse conditional structure at least approximately. In this context, a partitioning of the state that correctly reflects the sparse conditional structure must be found, and we illustrate this process in two numerical examples. We also discuss trade-offs between the block size used for partial updating and computational requirements that may increase with the number of blocks.
BibTeX Citation
@article{ tong-mala-2020, keywords = { Bayesian computation, high-dimensional distribution, Markov chain Monte Carlo, sparsity, conditional independence, Metropolis-adjusted Langevin }, title = { MALA-within-Gibbs samplers for high-dimensional distributions with sparse conditional structure }, author = { X. T. Tong and M. Morzfeld and Y. M. Marzouk }, journal = { SIAM Journal on Scientific Computing }, volume = { 42 }, number = { 3 }, pages = { A1765-A1788 }, month = { 0 }, year = { 2020 }, editor = { }, } -
R. Lam, O. Zahm, Y. M. Marzouk, K. Willcox.
Multifidelity dimension reduction via active subspaces
SIAM Journal on Scientific Computing, 42 (2020), pp. A929-A956.
Available Links
Abstract
We propose a multifidelity dimension reduction method to identify a low-dimensional structure present in many engineering models. The structure of interest arises when functions vary primarily on a low-dimensional subspace of the high-dimensional input space, while varying little along the complementary directions. Our approach builds on the gradient-based methodology of active subspaces, and exploits models of different fidelities to reduce the cost of performing dimension reduction through the computation of the active subspace matrix. We provide a non-asymptotic analysis of the number of gradient evaluations sufficient to achieve a prescribed error in the active subspace matrix, both in expectation and with high probability. We show that the sample complexity depends on a notion of intrinsic dimension of the problem, which can be much smaller than the dimension of the input space. We illustrate the benefits of such a multifidelity dimension reduction approach using numerical experiments with input spaces of up to two thousand dimensions.
BibTeX Citation
@article{ lam-dimred-2020, keywords = { Dimension reduction, multifidelity, gradient-based, active subspace, intrinsic dimension, effective rank, matrix Bernstein inequality, control variate }, title = { Multifidelity dimension reduction via active subspaces }, author = { R. Lam and O. Zahm and Y. M. Marzouk and K. Willcox }, journal = { SIAM Journal on Scientific Computing }, volume = { 42 }, number = { 2 }, pages = { A929-A956 }, month = { 0 }, year = { 2020 }, editor = { }, } -
J. M. Bardsley, T. Cui, Y. M. Marzouk, Z. Wang.
Scalable optimization-based sampling on function space
SIAM Journal on Scientific Computing, 42 (2020), pp. A1317-1347.
Available Links
Abstract
Optimization-based samplers such as randomize-then-optimize (RTO) provide an efficient and parallellizable approach to solving large-scale Bayesian inverse problems. These methods solve randomly perturbed optimization problems to draw samples from an approximate posterior distribution. ``Correcting'' these samples, either by Metropolization or importance sampling, enables characterization of the original posterior distribution. This paper focuses on the scalability of RTO to problems with high- or infinite-dimensional parameters. In particular, we introduce a new subspace strategy to reformulate RTO. For problems with intrinsic low-rank structure, this subspace acceleration makes the computational complexity of RTO scale linearly with the parameter dimension. Furthermore, this subspace perspective suggests a natural extension of RTO to a function space setting. We thus formalize a function space version of RTO and establish sufficient conditions for it to produce a valid Metropolis--Hastings proposal, yielding dimension-independent sampling performance. Numerical examples corroborate the dimension-independence of RTO and demonstrate sampling performance that is also robust to small observational noise.
BibTeX Citation
@article{ art_76, keywords = { Markov chain Monte Carlo, Metropolis independence sampling, Bayesian inference, infinite-dimensional inverse problems }, title = { Scalable optimization-based sampling on function space }, author = { J. M. Bardsley and T. Cui and Y. M. Marzouk and Z. Wang }, journal = { SIAM Journal on Scientific Computing }, volume = { 42 }, number = { 2 }, pages = { A1317-1347 }, month = { 0 }, year = { 2020 }, editor = { }, } -
M. Chilenski, M. Greenwald, Y. M. Marzouk, J. Rice, A. E. White.
On the importance of model selection when inferring impurity transport coefficient profiles
Plasma Physics and Controlled Fusion, 61 (2019), pp. 125012.
Available Links
Abstract
We present an analysis which suggests that model selection is a critical ingredient for successful reconstruction of impurity transport coefficient profiles, $D$ and $V$, from experimental data. Determining these quantities is a challenging nonlinear inverse problem. We use synthetic data to show that this problem is ill-posed, and hence $D$ and $V$ are not recommended for use in validation metrics unless the data analysis procedure goes to great lengths to account for the possibility that there are multiple possible solutions. In particular, inferred profiles which are very different from the true ones yield seemingly reasonable goodness-of-fit for synthetic X-ray spectrometer data. We present a Bayesian approach for inferring $D$ and $V$ which provides a rigorous means of selecting the level of complexity of the inferred profiles, thereby enabling successful reconstruction of the profiles.
BibTeX Citation
@article{ art_75, title = { On the importance of model selection when inferring impurity transport coefficient profiles }, editor = { }, author = { M. Chilenski and M. Greenwald and Y. M. Marzouk and J. Rice and A. E. White }, journal = { Plasma Physics and Controlled Fusion }, doi = { 10.1088/1361-6587/ab4e69 }, year = { 2019 }, volume = { 61 }, month = { 11 }, pages = { 125012 }, } -
B. Peherstorfer, Y. M. Marzouk.
A transport-based multifidelity preconditioner for Markov chain Monte Carlo
Advances in Computational Mathematics, 45 (2019), pp. 2321--2348.
Available Links
Abstract
Markov chain Monte Carlo (MCMC) sampling of posterior distributions arising in Bayesian inverse problems is challenging when evaluations of the forward model are computationally expensive. Replacing the forward model with a low-cost, low-fidelity model often significantly reduces computational cost; however, employing a low-fidelity model alone means that the stationary distribution of the MCMC chain is the posterior distribution corresponding to the low-fidelity model, rather than the original posterior distribution corresponding to the high-fidelity model. We propose a multifidelity approach that combines, rather than replaces, the high-fidelity model with a low-fidelity model. First, the low-fidelity model is used to construct a transport map that deterministically couples a reference Gaussian distribution with an approximation of the low-fidelity posterior. Then, the high-fidelity posterior distribution is explored using a non-Gaussian proposal distribution derived from the transport map. This multifidelity "preconditioned" MCMC approach seeks efficient sampling via a proposal that is explicitly tailored to the posterior at hand and that is constructed efficiently with the low-fidelity model. By relying on the low-fidelity model only to construct the proposal distribution, our approach guarantees that the stationary distribution of the MCMC chain is the high-fidelity posterior. In our numerical examples, our multifidelity approach achieves significant speedups compared to single-fidelity MCMC sampling methods.
BibTeX Citation
@article{ art_74, title = { A transport-based multifidelity preconditioner for Markov chain Monte Carlo }, keywords = { Bayesian inverse problems, transport maps, multifidelity, model reduction, Markov chain Monte Carlo }, editor = { }, author = { B. Peherstorfer and Y. M. Marzouk }, journal = { Advances in Computational Mathematics }, doi = { 10.1007/s10444-019-09711-y }, year = { 2019 }, arxiv = { 1808.09379 }, volume = { 45 }, number = { 5-6 }, month = { 12 }, pages = { 2321--2348 }, } -
N. Galagali, Y. M. Marzouk.
Exploiting network topology for large-scale inference of nonlinear reaction models
Journal of the Royal Society: Interface, 16 (2019), pp. 20180766.
Available Links
Abstract
The development of chemical reaction models aids understanding and prediction in areas ranging from biology to electrochemistry and combustion. A systematic approach to building reaction network models uses observational data not only to estimate unknown parameters, but also to learn model structure. Bayesian inference provides a natural approach to this data-driven construction of models. Yet traditional Bayesian model inference methodologies that numerically evaluate the evidence for each model are often infeasible for nonlinear reaction network inference, as the number of plausible models can be combinatorially large. Alternative approaches based on model-space sampling can enable large-scale network inference, but their realization presents many challenges. In this paper, we present new computational methods that make large-scale nonlinear network inference tractable. First, we exploit the topology of networks describing potential interactions among chemical species to design improved "between-model" proposals for reversible-jump Markov chain Monte Carlo. Second, we introduce a sensitivity-based determination of move types which, when combined with network-aware proposals, yields significant additional gains in sampling performance. These algorithms are demonstrated on inference problems drawn from systems biology, with nonlinear differential equation models of species interactions.
BibTeX Citation
@article{ art_72, title = { Exploiting network topology for large-scale inference of nonlinear reaction models }, keywords = { reaction network, network inference, model selection, Bayesian inference, reversible-jump MCMC. }, editor = { }, author = { N. Galagali and Y. M. Marzouk }, journal = { Journal of the Royal Society: Interface }, doi = { https://doi.org/10.1098/rsif.2018.0766 }, year = { 2019 }, arxiv = { 1705.04678 }, volume = { 16 }, number = { 152 }, month = { 3 }, pages = { 20180766 }, } -
M. Morzfeld, X. T. Tong, Y. M. Marzouk.
Localization for MCMC: sampling high-dimensional posterior distributions with local structure
Journal of Computational Physics, 380 (2019), pp. 1--28.
Available Links
Abstract
We investigate how ideas from covariance localization in numerical weather prediction can be used in Markov chain Monte Carlo (MCMC) sampling of high-dimensional posterior distributions arising in Bayesian inverse problems. To localize an inverse problem is to enforce an anticipated "local" structure by (i) neglecting small off-diagonal elements of the prior precision and covariance matrices; and (ii) restricting the influence of observations to their neighborhood. For linear problems we can specify the conditions under which posterior moments of the localized problem are close to those of the original problem. We explain physical interpretations of our assumptions about local structure and discuss the notion of high dimensionality in local problems, which is different from the usual notion of high dimensionality in function space MCMC. The Gibbs sampler is a natural choice of MCMC algorithm for localized inverse problems and we demonstrate that its convergence rate is independent of dimension for localized linear problems. Nonlinear problems can also be tackled efficiently by localization and, as a simple illustration of these ideas, we present a localized Metropolis-within-Gibbs sampler. Several linear and nonlinear numerical examples illustrate localization in the context of MCMC samplers for inverse problems.
BibTeX Citation
@article{ art_67, keywords = { Markov chain Monte Carlo, Bayesian inverse problems, high dimensions, localization, dimension-independent convergence }, doi = { 10.1016/j.jcp.2018.12.008 }, title = { Localization for MCMC: sampling high-dimensional posterior distributions with local structure }, author = { M. Morzfeld and X. T. Tong and Y. M. Marzouk }, journal = { Journal of Computational Physics }, volume = { 380 }, pages = { 1--28 }, month = { 0 }, year = { 2019 }, editor = { }, } -
A. Gorodetsky, S. Karaman, Y. M. Marzouk.
A continuous analogue of the tensor-train decomposition
Computer Methods in Applied Mechanics and Engineering, 347 (2019), pp. 59--84.
Available Links
Abstract
We develop new approximation algorithms and data structures for representing and computing with multivariate functions using the functional tensor-train (FT), a continuous extension of the tensor-train (TT) decomposition. The FT represents functions using a tensor-train ansatz by replacing the three-dimensional TT cores with univariate matrix-valued functions. The main contribution of this paper is a framework to compute the FT that employs adaptive approximations of univariate fibers, and that is not tied to any tensorized discretization. The algorithm can be coupled with any univariate linear or nonlinear approximation procedure. We demonstrate that this approach can generate multivariate function approximations that are several orders of magnitude more accurate, for the same cost, than those based on the conventional approach of compressing the coefficient tensor of a tensor-product basis. Our approach is in the spirit of other continuous computation packages such as Chebfun, and yields an algorithm which requires the computation of “continuous” matrix factorizations such as the LU and QR decompositions of vector-valued functions. To support these developments, we describe continuous versions of an approximate maximum-volume cross approximation algorithm and of a rounding algorithm that re-approximates an FT by one of lower ranks. We demonstrate that our technique improves accuracy and robustness, compared to TT and quantics-TT approaches with fixed parameterizations, of high-dimensional integration, differentiation, and approximation of functions with local features such as discontinuities and other nonlinearities.
BibTeX Citation
@article{ art_70, title = { A continuous analogue of the tensor-train decomposition }, keywords = { tensor decompositions, tensor-train, Chebfun, high-dimensional approximation }, editor = { }, author = { A. Gorodetsky and S. Karaman and Y. M. Marzouk }, journal = { Computer Methods in Applied Mechanics and Engineering }, doi = { 10.1016/j.cma.2018.12.015 }, year = { 2019 }, arxiv = { 1510.09088 }, volume = { 347 }, month = { 0 }, pages = { 59--84 }, } -
W. Ji, Z. Ren, Y. M. Marzouk, C. K. J. H. Law.
Quantifying kinetic uncertainty in turbulent combustion simulations using active subspaces
Proceedings of the Combustion Institute, 37 (2019), pp. 2175--2182.
Available Links
Abstract
Uncertainty quantification in expensive turbulent combustion simulations usually adopts response surface techniques to accelerate Monte Carlo sampling. However, it is computationally intractable to build response surfaces for high-dimensional kinetic parameters. We employ the active subspaces approach to reduce the dimension of the parameter space, such that building a response surface on the resulting low-dimensional subspace requires many fewer runs of the expensive simulation, rendering the approach suitable for various turbulent combustion models. We demonstrate this approach in simulations of the Cabra H2/N2 jet flame, propagating the uncertainties of 21 kinetic parameters to the liftoff height. We identify a one-dimensional active subspace for the liftoff height using 84 runs of the simulations, from which a response surface with a one-dimensional input is built; the probability distribution of the liftoff height is then characterized by evaluating a large number of samples using the inexpensive response surface. In addition, the active subspace provides a global sensitivity metric for determining the most influential reactions. Comparison with autoignition tests reveals that the sensitivities to the HO2-related reactions in the Cabra flame are promoted by the diffusion processes. The present work demonstrates the capability of active subspaces in quantifying uncertainty in turbulent combustion simulations and provides physical insights into the flame via the active subspace-based sensitivity metric.
BibTeX Citation
@article{ art_69, keywords = { uncertainty quantification, active subspaces, turbulent lifted flames, autoignition }, doi = { 10.1016/j.proci.2018.06.206 }, title = { Quantifying kinetic uncertainty in turbulent combustion simulations using active subspaces }, author = { W. Ji and Z. Ren and Y. M. Marzouk and C. K. J. H. Law }, journal = { Proceedings of the Combustion Institute }, volume = { 37 }, number = { 2 }, pages = { 2175--2182 }, month = { 0 }, year = { 2019 }, editor = { }, } -
G. Detomasso, T. Cui, Y. M. Marzouk, R. Scheichl, A. Spantini.
A Stein variational Newton method
Advances in Neural Information Processing Systems (NeurIPS), 31 (2018).
Abstract
Stein variational gradient descent (SVGD) was recently proposed as a general purpose nonparametric variational inference algorithm [Liu & Wang, NIPS 2016]: it minimizes the Kullback-Leibler divergence between the target distribution and its approximation by implementing a form of functional gradient descent on a reproducing kernel Hilbert space. In this paper, we accelerate and generalize the SVGD algorithm by including second-order information, thereby approximating a Newton-like iteration in function space. We also show how second-order information can lead to more effective choices of kernel. We observe significant computational gains over the original SVGD algorithm in multiple test cases.
BibTeX Citation
@article{ detomasso-svn-2020, keywords = { variational inference, Stein's method, transport, nonparametric approximation, gradient descent, Newton method }, title = { A Stein variational Newton method }, author = { G. Detomasso and T. Cui and Y. M. Marzouk and R. Scheichl and A. Spantini }, journal = { Advances in Neural Information Processing Systems (NeurIPS) }, volume = { 31 }, month = { 0 }, year = { 2018 }, editor = { }, } -
M. Parno, Y. M. Marzouk.
Transport map accelerated Markov chain Monte Carlo
SIAM/ASA Journal on Uncertainty Quantification, 6 (2018), pp. 645--682.
Available Links
Abstract
We introduce a new framework for efficient sampling from complex probability distributions, using a combination of transport maps and the Metropolis-Hastings rule. The core idea is to use deterministic couplings to transform typical Metropolis proposal mechanisms (e.g., random walks, Langevin methods) into non-Gaussian proposal distributions that can more effectively explore the target density. Our approach adaptively constructs a lower triangular transport map---an approximation of the Knothe-Rosenblatt rearrangement---using information from previous Markov chain Monte Carlo (MCMC) states, via the solution of an optimization problem. This optimization problem is convex regardless of the form of the target distribution, and can be solved efficiently without gradient information from the target probability distribution; the target distribution is instead represented via samples. Sequential updates enable efficient and parallelizable adaptation of the map even for large numbers of samples. We show that this approach uses inexact or truncated maps to produce an adaptive MCMC algorithm that is ergodic for the exact target distribution. Numerical demonstrations on a range of parameter inference problems show order-of-magnitude speedups over standard MCMC techniques, measured by the number of effectively independent samples produced per target density evaluation and per unit of wallclock time.
BibTeX Citation
@article{ art_66, keywords = { adaptive MCMC, Bayesian inference, measure transformation, optimal transport }, doi = { 10.1137/17M1134640 }, title = { Transport map accelerated Markov chain Monte Carlo }, author = { M. Parno and Y. M. Marzouk }, journal = { SIAM/ASA Journal on Uncertainty Quantification }, volume = { 6 }, number = { 2 }, pages = { 645--682 }, month = { 0 }, year = { 2018 }, editor = { }, } -
Q. Zhou, W. Liu, J. Li, Y. M. Marzouk.
An approximate empirical Bayesian method for large-scale linear-Gaussian inverse problems
Inverse Problems, 34 (2018), pp. 095001.
Available Links
Abstract
We study Bayesian inference methods for solving linear inverse problems, focusing on hierarchical formulations where the prior or the likelihood function depend on unspecified hyperparameters. In practice, these hyperparameters are often determined via an empirical Bayesian method that maximizes the marginal likelihood function, i.e., the probability density of the data conditional on the hyperparameters. Evaluating the marginal likelihood, however, is computationally challenging for large-scale problems. In this work, we present a method to approximately evaluate marginal likelihood functions, based on a low-rank approximation of the update from the prior covariance to the posterior covariance. We show that this approximation is optimal in a minimax sense. Moreover, we provide an efficient algorithm to implement the proposed method, based on a combination of the randomized SVD and a spectral approximation method to compute square roots of the prior covariance matrix. Several numerical examples demonstrate good performance of the proposed method.
BibTeX Citation
@article{ art_65, title = { An approximate empirical Bayesian method for large-scale linear-Gaussian inverse problems }, editor = { }, author = { Q. Zhou and W. Liu and J. Li and Y. M. Marzouk }, journal = { Inverse Problems }, doi = { 10.1088/1361-6420/aac287 }, year = { 2018 }, arxiv = { 1705.07646 }, volume = { 34 }, number = { 9 }, month = { 0 }, pages = { 095001 }, } -
R. Baptista, Y. M. Marzouk, K. Willcox, B. Peherstorfer.
Optimal approximations of coupling in multidisciplinary models
AIAA Journal, 56 (2018), pp. 2412--2428.
Available Links
Abstract
This paper presents a methodology for identifying important discipline couplings in multicomponent engineering systems. Coupling among disciplines contributes significantly to the computational cost of analyzing a system, and can become particularly burdensome when coupled analyses are embedded within a design or optimization loop. In many cases, disciplines may be weakly coupled, so that some of the coupling or interaction terms can be neglected without significantly. impacting the accuracy of the system output. Typical practice derives such approximations in an ad hoc manner using expert opinion and domain experience. This work proposes a new approach that formulates an optimization problem to find a model that optimally balances accuracy of the model outputs with the sparsity of the discipline couplings. An adaptive sequential Monte Carlo. sampling-based technique is used to efficiently search the combinatorial model space of different discipline couplings. An algorithm for selecting an optimal model is presented and illustrated in a fire detection satellite model and a turbine engine cycle analysis model.
BibTeX Citation
@article{ art_64, doi = { 10.2514/1.J056888 }, title = { Optimal approximations of coupling in multidisciplinary models }, author = { R. Baptista and Y. M. Marzouk and K. Willcox and B. Peherstorfer }, journal = { AIAA Journal }, volume = { 56 }, number = { 6 }, pages = { 2412--2428 }, month = { 0 }, year = { 2018 }, editor = { }, } -
R. Mohammadi-Ghazi, Y. M. Marzouk, O. Büyüköztürk.
Conditional classifiers and boosted conditional Gaussian mixture models for novelty detection
Pattern Recognition, 81 (2018), pp. 601--614.
Available Links
Abstract
Novelty detection is an important task in a variety of applications such as object recognition, defect localization, medical diagnostics, and event detection. The objective of novelty detection is to distinguish one class, for which data are available, from all other possible classes when there is insufficient information to build an explicit model for the latter. The data from the observed class are usually represented in terms of certain features which can be modeled as random variables (RV). An important challenge for novelty detection in multivariate problems is characterizing the statistical dependencies among these RVs. Failure to consider these dependencies may lead to inaccurate predictions, usually in the form of high false positive rates. In this study, we propose conditional classifiers as a new approach for novelty detection that is capable of accounting for statistical dependencies of the relevant RVs without simplifying assumptions. To implement the proposed idea, we use Gaussian mixture models (GMM) along with forward stage-wise additive modeling and boosting methods to learn the conditional densities of RVs that represent our observed data. The resulting model, which is called a boosted conditional GMM, is then used as a basis for classification. To test the performance of the proposed method, we apply it to a realistic application problem for analyzing sensor networks and compare the results to those obtained with a one-class support vector machine.
BibTeX Citation
@article{ art_63, title = { Conditional classifiers and boosted conditional Gaussian mixture models for novelty detection }, keywords = { Novelty detection, mixture models, graphical models, conditional dependence, conditional density, additive modeling, boosting, false positive }, editor = { }, author = { R. Mohammadi-Ghazi and Y. M. Marzouk and O. Büyüköztürk }, journal = { Pattern Recognition }, doi = { 10.1016/j.patcog.2018.03.022 }, year = { 2018 }, volume = { 81 }, month = { 0 }, pages = { 601--614 }, } -
A. Beskos, A. Jasra, K. J. H. Law, Y. M. Marzouk, Y. Zhou.
Multilevel sequential Monte Carlo with dimension-independent likelihood-informed proposals
SIAM/ASA Journal on Uncertainty Quantification, 6 (2018), pp. 762--786.
Available Links
Abstract
In this article we develop a new sequential Monte Carlo (SMC) method for multilevel (ML) Monte Carlo estimation. In particular, the method can be used to estimate expectations with respect to a target probability distribution over an infinite-dimensional and non-compact space as given, for example, by a Bayesian inverse problem with Gaussian random field prior. Under suitable assumptions the MLSMC method has the optimal $O(\epsilon^{-2})$ bound on the cost to obtain a mean-square error of $O(\epsilon^{2})$. The algorithm is accelerated by dimension-independent likelihood-informed (DILI) proposals designed for Gaussian priors, leveraging a novel variation which uses empirical sample covariance information in lieu of Hessian information, hence eliminating the requirement for gradient evaluations. The efficiency of the algorithm is illustrated on two examples: inversion of noisy pressure measurements in a PDE model of Darcy flow to recover the posterior distribution of the permeability field, and inversion of noisy measurements of the solution of an SDE to recover the posterior path measure.
BibTeX Citation
@article{ art_62, title = { Multilevel sequential Monte Carlo with dimension-independent likelihood-informed proposals }, keywords = { multilevel Monte Carlo, sequential Monte Carlo, Bayesian inverse problem }, editor = { }, author = { A. Beskos and A. Jasra and K. J. H. Law and Y. M. Marzouk and Y. Zhou }, journal = { SIAM/ASA Journal on Uncertainty Quantification }, doi = { 10.1137/17M1120993 }, year = { 2018 }, arxiv = { 1703.04866 }, volume = { 6 }, month = { 0 }, pages = { 762--786 }, } -
M. Chilenski, M. Greenwald, Y. M. Marzouk, J. Rice, A. E. White.
Efficient design and verification of diagnostics for impurity transport experiments
Review of Scientific Instruments, 89 (2018), pp. 013504.
Available Links
Abstract
Recent attempts to measure impurity transport in Alcator C-Mod using an x-ray imaging crystal spectrometer and laser blow-off impurity injector have failed to yield unique reconstructions of the transport coefficient profiles. This paper presents a fast, linearized model which was constructed to estimate diagnostic requirements for impurity transport experiments. The analysis shows that the spectroscopic diagnostics on Alcator C-Mod should be capable of inferring simple profiles of impurity diffusion $D_Z$ and convection $V_Z$ accurate to better than ±10% uncertainty, suggesting that the failure to infer unique $D_Z$, $V_Z$ from experimental data is attributable to an inadequate analysis procedure rather than the result of insufficient diagnostics. Furthermore, the analysis reveals that spatial resolution is more important than temporal resolution for typical diagnostic sampling rates and noise levels. This approach can be adapted to design and verify diagnostics for transport experiments on any magnetic confinement device.
BibTeX Citation
@article{ art_61, title = { Efficient design and verification of diagnostics for impurity transport experiments }, editor = { }, author = { M. Chilenski and M. Greenwald and Y. M. Marzouk and J. Rice and A. E. White }, journal = { Review of Scientific Instruments }, doi = { 10.1063/1.4997251 }, year = { 2018 }, volume = { 89 }, month = { 1 }, pages = { 013504 }, } -
P. Conrad, A. Davis, Y. M. Marzouk, N. Pillai, A. Smith.
Parallel local approximation MCMC for expensive models
SIAM/ASA Journal on Uncertainty Quantification, 6 (2018), pp. 339--373.
Available Links
Abstract
Performing Bayesian inference via Markov chain Monte Carlo (MCMC) can be exceedingly expensive when posterior evaluations invoke the evaluation of a. computationally expensive model, such as a system of partial differential equations. In recent work [Conrad et al. JASA 2016, arXiv:1402.1694], we described a framework for constructing and refining local approximations of such models during an MCMC simulation. These posterior--adapted approximations harness regularity of the model to reduce the computational cost of inference while preserving asymptotic exactness of the Markov chain. Here we describe two extensions of that work. First, we prove that samplers running in parallel can collaboratively construct a shared posterior approximation while ensuring ergodicity of each associated chain, providing a novel opportunity for exploiting parallel computation in MCMC. Second, focusing on the Metropolis--adjusted Langevin algorithm, we describe how a proposal distribution can successfully employ gradients and other relevant information extracted from the approximation. We investigate the practical performance of our strategies using two challenging inference problems, the first in subsurface hydrology and the second in glaciology. Using local approximations constructed via parallel chains, we successfully reduce the run time needed to characterize the posterior distributions in these problems from days to hours and from months to days, respectively, dramatically improving the tractability of Bayesian inference.
BibTeX Citation
@article{ art_60, title = { Parallel local approximation MCMC for expensive models }, keywords = { Markov chain Monte Carlo, parallel computing, Metropolis-adjusted Langevin algorithm, Bayesian inference, approximation theory, local regression, surrogate modeling }, editor = { }, author = { P. Conrad and A. Davis and Y. M. Marzouk and N. Pillai and A. Smith }, journal = { SIAM/ASA Journal on Uncertainty Quantification }, doi = { 10.1137/16M1084080 }, year = { 2018 }, arxiv = { 1607.02788 }, volume = { 6 }, number = { 1 }, month = { 0 }, pages = { 339--373 }, } -
A. Gorodetsky, S. Karaman, Y. M. Marzouk.
High-dimensional stochastic optimal control using continuous tensor decompositions
International Journal of Robotics Research, 37 (2018), pp. 340--377.
Available Links
Abstract
Motion planning and control problems are embedded and essential in almost all robotics applications. These problems are often formulated as stochastic optimal control problems and solved using dynamic programming algorithms. Unfortunately, most existing algorithms that guarantee convergence to optimal solutions suffer from the curse of dimensionality: the run time of the algorithm grows exponentially with the dimension of the state space of the system. We propose novel dynamic programming algorithms that alleviate the curse of dimensionality in problems that exhibit certain low-rank structure. The proposed algorithms are based on continuous tensor decompositions recently developed by the authors. Essentially, the algorithms represent high-dimensional functions (e.g., the value function) in a compressed format, and directly perform dynamic programming computations (e.g., value iteration, policy iteration) in this format. Under certain technical assumptions, the new algorithms guarantee convergence towards optimal solutions with arbitrary precision. Furthermore, the run times of the new algorithms scale polynomially with the state dimension and polynomially with the ranks of the value function. This approach realizes substantial computational savings in “compressible” problem instances, where value functions admit low-rank approximations. We demonstrate the new algorithms in a wide range of problems, including a simulated six-dimensional agile quadcopter maneuvering example and a seven-dimensional aircraft perching example. In some of these examples, we estimate computational savings of up to 10 orders of magnitude over standard value iteration algorithms. We further demonstrate the algorithms running in real time on board a quadcopter during a flight experiment under motion capture.
BibTeX Citation
@article{ art_59, title = { High-dimensional stochastic optimal control using continuous tensor decompositions }, keywords = { stochastic optimal control, motion planning, dynamic programming, tensor decompositions }, editor = { }, author = { A. Gorodetsky and S. Karaman and Y. M. Marzouk }, journal = { International Journal of Robotics Research }, doi = { 10.1177/0278364917753994 }, year = { 2018 }, arxiv = { arXiv:1611.04706 }, volume = { 37 }, number = { 2-3 }, month = { 3 }, pages = { 340--377 }, } -
J. Jagalur-Mohan, B. Jha, Z. Wang, R. Juanes, Y. M. Marzouk.
Inferring fault frictional and reservoir hydraulic properties from injection-induced seismicity
Geophysical Research Letters, 45 (2018), pp. 1313--1320.
Available Links
Abstract
Characterizing the rheological properties of faults and the evolution of fault friction during seismic slip are fundamental problems in geology and seismology. Recent increases in the frequency of induced earthquakes have intensified the need for robust methods to estimate fault properties. Here, we present a novel approach for aquifer- and fault-property estimation, which combines coupled multiphysics simulation of injection-induced seismicity with adaptive surrogate-based Bayesian inversion. In a synthetic 2D model, we use aquifer pressure, ground displacements, and fault slip measurements during fluid injection to estimate the dynamic fault friction, the critical slip distance, and the aquifer permeability. Our forward model allows us to observe non-monotonic evolutions of shear traction and slip on the fault resulting from the interplay of several physical mechanisms, including injection-induced aquifer expansion, stress transfer along the fault, and slip-induced stress relaxation. This interplay provides the basis for a successful joint inversion of induced seismicity, yielding well-informed Bayesian posterior distributions of dynamic friction and critical slip. We uncover an inverse relationship between dynamic friction and critical slip distance, which is in agreement with the small dynamic friction and large critical slip reported during seismicity on mature faults.
BibTeX Citation
@article{ art_58, title = { Inferring fault frictional and reservoir hydraulic properties from injection-induced seismicity }, editor = { }, author = { J. Jagalur-Mohan and B. Jha and Z. Wang and R. Juanes and Y. M. Marzouk }, journal = { Geophysical Research Letters }, doi = { 10.1002/2017GL075925 }, year = { 2018 }, volume = { 45 }, number = { 3 }, month = { 0 }, pages = { 1313--1320 }, } -
A. Spantini, D. Bigoni, Y. M. Marzouk.
Inference via low-dimensional couplings
The Journal of Machine Learning Research, 19 (2018), pp. 1--71.
Available Links
Abstract
Integration against an intractable probability measure is among the fundamental challenges of statistical inference, particularly in the Bayesian setting. A principled approach to this problem seeks a deterministic coupling of the measure of interest with a tractable ``reference'' measure (e.g., a standard Gaussian). This coupling is induced by a transport map, and enables direct simulation from the desired measure simply by evaluating the transport map at samples from the reference. Yet characterizing such a map---e.g., representing and evaluating it---grows challenging in high dimensions. The central contribution of this paper is to establish a link between the Markov properties of the target measure and the existence of certain low-dimensional couplings, induced by transport maps that are sparse or decomposable. Our analysis not only facilitates the construction of couplings in high-dimensional settings, but also suggests new inference methodologies. For instance, in the context of nonlinear and non-Gaussian state space models, we describe new variational algorithms for online filtering, smoothing, and parameter estimation. These algorithms implicitly characterize---via a transport map---the full posterior distribution of the sequential inference problem using local operations only incrementally more complex than regular filtering, while avoiding importance sampling or resampling.
BibTeX Citation
@article{ art_57, keywords = { transport map, rearrangement, Bayesian inference, variational inference, graphical model, Markov random field, sparsity, Kalman recursions, filtering, smoothing, joint parameter-state estimation, state-space model }, title = { Inference via low-dimensional couplings }, author = { A. Spantini and D. Bigoni and Y. M. Marzouk }, journal = { The Journal of Machine Learning Research }, volume = { 19 }, number = { 66 }, pages = { 1--71 }, month = { 0 }, year = { 2018 }, editor = { }, } -
C. Gu, Y. M. Marzouk, M. N. Toksöz.
Waveform-based Bayesian full moment tensor inversion and uncertainty determination for induced seismicity in an oil/gas field
Geophysical Journal International, 212 (2018), pp. 1963--1985.
Abstract
Small earthquakes occur due to natural tectonic motions and are induced by oil and gas production processes. In many oil/gas fields and hydrofracking processeshydrofrackings, induced earthquakes result from fluid extraction or injection. The locations and source mechanisms of these earthquakes provide valuable information about the reservoirs. Analysis of induced seismic events has mostly assumed a double-couple source mechanism. However, recent studies have shown a non-negligible percentage of non-double-couple components of source moment tensors in hydraulic fracturing events, assuming a full moment tensor source mechanism. Without uncertainty quantification of the moment tensor solution, it is difficult to determine the reliability of these source models. This study develops a Bayesian method to perform waveform-based full moment tensor inversion and uncertainty quantification for induced seismic events, accounting for both location and velocity model uncertainties. We conduct tests with synthetic events to validate the method, and then apply our newly developed Bayesian inversion approach to real induced seismicity in an oil/gas field in the sultanate of Oman—determining the uncertainties in the source mechanism and in the location of that event.
BibTeX Citation
@article{ art_54, title = { Waveform-based Bayesian full moment tensor inversion and uncertainty determination for induced seismicity in an oil/gas field }, keywords = { induced seismicity, Bayesian inversion, full moment tensor, uncertainty quantification }, editor = { }, author = { C. Gu and Y. M. Marzouk and M. N. Toksöz }, journal = { Geophysical Journal International }, doi = { 10.1093/gji/ggx517 }, year = { 2018 }, volume = { 212 }, number = { 1 }, month = { 3 }, pages = { 1963--1985 }, } -
A. Marques, Q. Wang, Y. M. Marzouk.
Data-driven integral boundary layer modeling for airfoil performance prediction in the laminar regime
AIAA Journal, 56 (2018), pp. 482--496.
Available Links
Abstract
Many simulation tools for airfoil analysis and design are based on an integral approximation of the boundary layer. This approximate formulation cannot resolve the full dynamics of boundary-layer flows and hence requires additional models to account for unresolved effects. This paper introduces a new, data-driven, probabilistic model of these unresolved effects for the incompressible and laminar regime. To construct this model, methods from supervised learning have been applied to a dataset containing over 1550 airfoils. The result is a model that 1) is based on a large dataset of realistic airfoil configurations, and 2) quantifies the model inadequacy associated with the use of an approximate boundary-layer formulation. A stochastic version of the airfoil design tool XFOIL has also been created by replacing its original boundary-layer model with the probabilistic model developed here. This stochastic version of XFOIL has been applied to compute the drag polars of two airfoils at low Reynolds numbers and the results are compared with experimental data.
BibTeX Citation
@article{ art_53, title = { Data-driven integral boundary layer modeling for airfoil performance prediction in the laminar regime }, editor = { }, author = { A. Marques and Q. Wang and Y. M. Marzouk }, journal = { AIAA Journal }, doi = { 10.2514/1.J055877 }, year = { 2018 }, volume = { 56 }, number = { 2 }, month = { 0 }, pages = { 482--496 }, } -
W. Ji, J. Wang, O. Zahm, Y. M. Marzouk, B. Yang, Z. Ren, C. K. J. H. Law.
Shared low-dimensional subspaces for propagating kinetic uncertainty to multiple outputs
Combustion and Flame, 190 (2018), pp. 146--157.
Available Links
Abstract
Forward propagation of kinetic uncertainty in combustion simulations usually adopts response surface techniques to accelerate Monte Carlo sampling. Yet it is computationally challenging to build response surfaces for high-dimensional input parameters and expensive combustion models. This study uses the active subspace method to identify a low-dimensional subspace of the input space, within which response surfaces can be built. Active subspace methods have previously been developed only for single (scalar) model outputs, however. This paper introduces a new method that can simultaneously approximate the marginal probability density functions of multiple outputs using a single low-dimensional shared subspace. We identify the shared subspace by solving a least-squares system to compute an appropriate combination of single-output active subspaces. Because the identification of the active subspace for each individual output may require a significant number of samples, this process may be computationally intractable for expensive models such as turbulent combustion simulations. Instead, we propose a heuristic approach that learns the relevant subspaces from cheaper combustion models. The performance of the active subspace for a single output, and of the shared subspace for multiple outputs, is first demonstrated with the ignition delay times and laminar flame speeds of hydrogen/air, methane/air, and dimethyl ether (DME)/air mixtures. Then we demonstrate extrapolatory performance of the shared subspace: using a shared subspace trained on the ignition delays at constant volume, we perform forward propagation of kinetic uncertainties through zero-dimensional HCCI simulations—in particular, single-stage ignition of a natural gas/air mixture and two-stage ignition of a DME/air mixture. We show that the shared subspace can accurately reproduce the probability of ignition failure and the probability density of ignition crank angle conditioned on successful ignition, given uncertainty in the kinetics.
BibTeX Citation
@article{ art_52, title = { Shared low-dimensional subspaces for propagating kinetic uncertainty to multiple outputs }, keywords = { Uncertainty propagation, dimension reduction, active subspaces, shared subspaces, multiple outputs }, editor = { }, author = { W. Ji and J. Wang and O. Zahm and Y. M. Marzouk and B. Yang and Z. Ren and C. K. J. H. Law }, journal = { Combustion and Flame }, doi = { 10.1016/j.combustflame.2017.11.021 }, year = { 2018 }, volume = { 190 }, month = { 0 }, pages = { 146--157 }, } -
R. Morrison, R. Baptista, Y. M. Marzouk.
Beyond normality: Learning sparse probabilistic graphical models in the non-Gaussian setting
Advances in Neural Information Processing Systems (NIPS), 30 (2017).
Abstract
We present an algorithm to identify sparse dependence structure in continuous and non-Gaussian probability distributions, given a corresponding set of data. The conditional independence structure of an arbitrary distribution can be represented as an undirected graph (or Markov random field), but most algorithms for learning this structure are restricted to the discrete or Gaussian cases. Our new approach allows for more realistic and accurate descriptions of the distribution in question, and in turn better estimates of its sparse Markov structure. Sparsity in the graph is of interest as it can accelerate inference, improve sampling methods, and reveal important dependencies between variables. The algorithm relies on exploiting the connection between the sparsity of the graph and the sparsity of transport maps, which deterministically couple one probability measure to another.
BibTeX Citation
@article{ art_51, title = { Beyond normality: Learning sparse probabilistic graphical models in the non-Gaussian setting }, author = { R. Morrison and R. Baptista and Y. M. Marzouk }, journal = { Advances in Neural Information Processing Systems (NIPS) }, volume = { 30 }, month = { 0 }, year = { 2017 }, editor = { }, } -
M. Chilenski, M. Greenwald, A. Hubbard, J. W. Hughes, J. Lee, Y. M. Marzouk, J. Rice, A. E. White.
Experimentally testing the dependence of momentum transport on second derivatives using Gaussian process regression
Nuclear Fusion, 57 (2017), pp. 126013.
Available Links
Abstract
It remains an open question to explain the dramatic change in intrinsic rotation induced by slight changes in electron density (White et al. 2013, Phys. Plasmas 20, 056106). One proposed explanation is that momentum transport is sensitive to the second derivatives of the temperature and density profiles (Lee et al. 2015, Plasma Phys. Controlled Fusion 57, 125006), but it is widely considered to be impossible to measure these higher derivatives. In this paper, we show that it is possible to estimate second derivatives of electron density and temperature using a nonparametric regression technique known as Gaussian process regression (GPR). This technique avoids over-constraining the fit by not assuming an explicit functional form for the fitted curve. The uncertainties, obtained rigorously using Markov chain Monte Carlo (MCMC) sampling, are small enough that it is reasonable to explore hypotheses which depend on second derivatives. It is found that the differences in the second derivatives of $n_e$ and $T_e$ between the peaked and hollow rotation cases are rather small, suggesting that changes in the second derivatives are not likely to explain the experimental results.
BibTeX Citation
@article{ art_50, title = { Experimentally testing the dependence of momentum transport on second derivatives using Gaussian process regression }, editor = { }, author = { M. Chilenski and M. Greenwald and A. Hubbard and J. W. Hughes and J. Lee and Y. M. Marzouk and J. Rice and A. E. White }, journal = { Nuclear Fusion }, doi = { 10.1088/1741-4326/aa8387 }, year = { 2017 }, volume = { 57 }, number = { 12 }, month = { 9 }, pages = { 126013 }, } -
A. Spantini, T. Cui, K. Willcox, L. Tenorio, Y. M. Marzouk.
Goal-oriented optimal approximations of Bayesian linear inverse problems
SIAM Journal on Scientific Computing, 39 (2017), pp. S167--S196.
Available Links
Abstract
We propose optimal dimensionality reduction techniques for the solution of goal-oriented linear-Gaussian inverse problems, where the quantity of interest (QoI) is a function of the inversion parameters. These approximations are suitable for large-scale applications. In particular, we study the approximation of the posterior covariance of the QoI as a low-rank negative update of its prior covariance, and prove optimality of this update with respect to the natural geodesic distance on the manifold of symmetric positive definite matrices. Assuming exact knowledge of the posterior mean of the QoI, the optimality results extend to optimality in distribution with respect to the Kullback-Leibler divergence and the Hellinger distance between the associated distributions. We also propose approximation of the posterior mean of the QoI as a low-rank linear function of the data, and prove optimality of this approximation with respect to a weighted Bayes risk. Both of these optimal approximations avoid the explicit computation of the full posterior distribution of the parameters and instead focus on directions that are well informed by the data and relevant to the QoI. These directions stem from a balance among all the components of the goal-oriented inverse problem: prior information, forward model, measurement noise, and ultimate goals. We illustrate the theory using a high-dimensional inverse problem in heat transfer.
BibTeX Citation
@article{ art_49, title = { Goal-oriented optimal approximations of Bayesian linear inverse problems }, keywords = { inverse problems, goal--oriented, Bayesian inference, low-rank approximation, covariance approximation, Riemannian metric, geodesic distance, posterior mean approximation, Bayes risk, optimality }, editor = { }, author = { A. Spantini and T. Cui and K. Willcox and L. Tenorio and Y. M. Marzouk }, journal = { SIAM Journal on Scientific Computing }, doi = { 10.1137/16M1082123 }, year = { 2017 }, arxiv = { 1607.01881 }, volume = { 39 }, number = { 5 }, month = { 0 }, pages = { S167--S196 }, } -
Z. Wang, J. M. Bardsley, A. Solonen, T. Cui, Y. M. Marzouk.
Bayesian inverse problems with l1 priors: a randomize-then-optimize approach
SIAM Journal on Scientific Computing, 39 (2017), pp. S140--S166.
Available Links
Abstract
Prior distributions for Bayesian inference that rely on the $l_1$-norm of the parameters are of considerable interest, in part because they promote parameter fields with less regularity than Gaussian priors (e.g., discontinuities and blockiness). These $l_1$-type priors include the total variation (TV) prior and the Besov space $B^s_{1,1}$ prior, and in general yield non-Gaussian posterior distributions. Sampling from these posteriors is challenging, particularly in the inverse problem setting where the parameter space is high-dimensional and the forward problem may be nonlinear. This paper extends the randomize-then-optimize (RTO) method, an optimization-based sampling algorithm developed for Bayesian inverse problems with Gaussian priors, to inverse problems with $l_1$-type priors. We use a variable transformation to convert an $l_1$-type prior to a standard Gaussian prior, such that the posterior distribution of the transformed parameters is amenable to Metropolized sampling via RTO. We demonstrate this approach on several deconvolution problems and an elliptic PDE inverse problem, using TV or Besov space $B^s_{1,1}$ priors. Our results show that the transformed RTO algorithm characterizes the correct posterior distribution and can be more efficient than other sampling algorithms. The variable transformation can also be extended to other non-Gaussian priors.
BibTeX Citation
@article{ art_48, title = { Bayesian inverse problems with l1 priors: a randomize-then-optimize approach }, keywords = { Inverse problems, Bayesian inference, Monte Carlo methods }, editor = { }, author = { Z. Wang and J. M. Bardsley and A. Solonen and T. Cui and Y. M. Marzouk }, journal = { SIAM Journal on Scientific Computing }, doi = { 10.1137/16M1080938 }, year = { 2017 }, arxiv = { 1607.01904 }, volume = { 39 }, number = { 5 }, month = { 0 }, pages = { S140--S166 }, } -
M. Parno, T. Moselhy, Y. M. Marzouk.
A multiscale strategy for Bayesian inference using transport maps
SIAM/ASA Journal on Uncertainty Quantification, 4 (2016), pp. 1160--1190.
Available Links
Abstract
In many inverse problems, model parameters cannot be precisely determined from observational data. Bayesian inference provides a mechanism for capturing the resulting parameter uncertainty, but typically at a high computational cost. This work introduces a multiscale decomposition that exploits conditional independence across scales, when present in certain classes of inverse problems, to decouple Bayesian inference into two stages: (1) a computationally tractable coarse-scale inference problem; and (2) a mapping of the low-dimensional coarse-scale posterior distribution into the original high-dimensional parameter space. This decomposition relies on a characterization of the non-Gaussian joint distribution of coarse- and fine-scale quantities via optimal transport maps. We demonstrate our approach on a sequence of inverse problems arising in subsurface flow, using the multiscale finite element method to discretize the steady state pressure equation. We compare the multiscale strategy with full-dimensional Markov chain Monte Carlo on a problem of moderate dimension (100 parameters) and then use it to infer a conductivity field described by over 10,000 parameters.
BibTeX Citation
@article{ parno-transport-juq-2016, keywords = { Bayesian inference; inverse problems; multiscale modeling; multiscale finite element method; optimal transportation; Markov chain Monte Carlo }, doi = { 10.1137/15M1032478 }, title = { A multiscale strategy for Bayesian inference using transport maps }, author = { M. Parno and T. Moselhy and Y. M. Marzouk }, journal = { SIAM/ASA Journal on Uncertainty Quantification }, volume = { 4 }, number = { 1 }, pages = { 1160--1190 }, month = { 10 }, year = { 2016 }, editor = { }, } -
D. Bigoni, A. Engsig-Karup, Y. M. Marzouk.
Spectral tensor-train decomposition
SIAM Journal on Scientific Computing, 38 (2016), pp. A2405--A2439.
Available Links
Abstract
The accurate approximation of high-dimensional functions is an essential task in uncertainty quantification and many other fields. We propose a new function approximation scheme based on a spectral extension of the tensor-train (TT) decomposition. We first define a functional version of the TT decomposition and analyze its properties. We obtain results on the convergence of the decomposition, revealing links between the regularity of the function, the dimension of the input space, and the TT ranks. We also show that the regularity of the target function is preserved by the univariate functions (i.e., the "cores") comprising the functional TT decomposition. This result motivates an approximation scheme employing polynomial approximations of the cores. For functions with appropriate regularity, the resulting \textit{spectral tensor-train decomposition} combines the favorable dimension-scaling of the TT decomposition with the spectral convergence rate of polynomial approximations, yielding efficient and accurate surrogates for high-dimensional functions. To construct these decompositions, we use the sampling algorithm \texttt{TT-DMRG-cross} to obtain the TT decomposition of tensors resulting from suitable discretizations of the target function. We assess the performance of the method on a range of numerical examples: a modifed set of Genz functions with dimension up to 100, and functions with mixed Fourier modes or with local features. We observe significant improvements in performance over an anisotropic adaptive Smolyak approach. The method is also used to approximate the solution of an elliptic PDE with random input data. The open source software and examples presented in this work are available online.
BibTeX Citation
@article{ art_45, title = { Spectral tensor-train decomposition }, keywords = { approximation theory; tensor-train decomposition; orthogonal polynomials; uncertainty quantification }, editor = { }, author = { D. Bigoni and A. Engsig-Karup and Y. M. Marzouk }, journal = { SIAM Journal on Scientific Computing }, doi = { 10.1137/15M1036919 }, year = { 2016 }, arxiv = { 1405.5713 }, volume = { 38 }, number = { 4 }, month = { 0 }, pages = { A2405--A2439 }, } -
T. Cui, Y. M. Marzouk, K. Willcox.
Scalable posterior approximations for large-scale Bayesian inverse problems via likelihood-informed parameter and state reduction
Journal of Computational Physics, 315 (2016), pp. 363--387.
Available Links
Abstract
Two major bottlenecks to the solution of large-scale Bayesian inverse problems are the scaling of posterior sampling algorithms to high dimensional parameter spaces and the computational cost of forward model evaluations. Yet incomplete or noisy data, the state variation and parameter dependence of the forward model, and correlations in the prior collectively provide useful structure that can be exploited for dimension reduction in this setting---both in the parameter space of the inverse problem and in the state space of the forward model. To this end, we show how to jointly construct low-dimensional subspaces of the parameter space and the state space in order to accelerate the Bayesian solution of the inverse problem. As a byproduct of state dimension reduction, we also show how to identify low-dimensional subspaces of the data in problems with high-dimensional observations. These subspaces enable approximation of the posterior as a product of two factors: (i) a projection of the posterior onto a low-dimensional parameter subspace, wherein the original likelihood is replaced by an approximation involving a reduced model; and (ii) the marginal prior distribution on the high-dimensional complement of the parameter subspace. We present and compare several strategies for constructing these subspaces using only a limited number of forward and adjoint model simulations. The resulting posterior approximations can rapidly be characterized using standard sampling techniques, e.g., Markov chain Monte Carlo. Two numerical examples demonstrate the accuracy and efficiency of our approach: inversion of an integral equation in atmospheric remote sensing, where the data dimension is very high; and the inference of a heterogeneous transmissivity field in a groundwater system, which involves a partial differential equation forward model with high dimensional state and parameters.
BibTeX Citation
@article{ art_44, title = { Scalable posterior approximations for large-scale Bayesian inverse problems via likelihood-informed parameter and state reduction }, keywords = { Inverse problems, Bayesian inference, dimension reduction, model reduction, low-rank approximation, Markov chain Monte Carlo }, editor = { }, author = { T. Cui and Y. M. Marzouk and K. Willcox }, journal = { Journal of Computational Physics }, doi = { doi:10.1016/j.jcp.2016.03.055 }, year = { 2016 }, arxiv = { 1510.06053 }, volume = { 315 }, month = { 6 }, pages = { 363--387 }, } -
A. Gorodetsky, Y. M. Marzouk.
Mercer kernels and integrated variance experimental design: connections between Gaussian process regression and polynomial approximation
SIAM/ASA Journal on Uncertainty Quantification, 4 (2016), pp. 796--828.
Available Links
Abstract
This paper examines experimental design procedures used to develop surrogates of computational models, exploring the interplay between experimental designs and approximation algorithms. We focus on two widely used approximation approaches, Gaussian process (GP) regression and nonintrusive polynomial approximation. First, we introduce algorithms for minimizing a posterior integrated variance (IVAR) design criterion for GP regression. Our formulation treats design as a continuous optimization problem that can be solved with gradient-based methods on complex input domains without resorting to greedy approximations. We show that minimizing IVAR in this way yields point sets with good interpolation properties and that it enables more accurate GP regression than designs based on entropy minimization or mutual information maximization. Second, using a Mercer kernel/eigenfunction perspective on GP regression, we identify conditions under which GP regression coincides with pseudospectral polynomial approximation. Departures from these conditions can be understood as changes either to the kernel or to the experimental design itself. We then show how IVAR-optimal designs, while sacrificing discrete orthogonality of the kernel eigenfunctions, can yield lower approximation error than orthogonalizing point sets. Finally, we compare the performance of adaptive GP regression and adaptive pseudospectral approximation for several classes of target functions, identifying features that are favorable to the GP + IVAR approach.
BibTeX Citation
@article{ art_43, title = { Mercer kernels and integrated variance experimental design: connections between Gaussian process regression and polynomial approximation }, keywords = { Gaussian process regression; experimental design; computer experiments; approximation theory; polynomial approximation; kernel interpolation; uncertainty quantification }, editor = { }, author = { A. Gorodetsky and Y. M. Marzouk }, journal = { SIAM/ASA Journal on Uncertainty Quantification }, doi = { 10.1137/15M1017119 }, year = { 2016 }, arxiv = { 1503.00021 }, volume = { 4 }, number = { 1 }, month = { 0 }, pages = { 796--828 }, } -
A. Solonen, T. Cui, J. Hakkarainen, Y. M. Marzouk.
On dimension reduction in Gaussian filters
Inverse Problems, 32 (2016), pp. 045003.
Available Links
Abstract
A priori dimension reduction is a widely adopted technique for reducing the computational complexity of stationary inverse problems. In this setting, the solution of an inverse problem is parameterized by a low-dimensional basis that is often obtained from the truncated Karhunen-Loève expansion of the prior distribution. For high-dimensional inverse problems equipped with smoothing priors, this technique can lead to drastic reductions in parameter dimension and significant computational savings. In this paper, we extend the concept of a priori dimension reduction to non-stationary inverse problems, in which the goal is to sequentially infer the state of a dynamical system. Our approach proceeds in an offline-online fashion. We first identify a low-dimensional subspace in the state space before solving the inverse problem (the offline phase), using either the method of "snapshots" or regularized covariance estimation. Then this subspace is used to reduce the computational complexity of various filtering algorithms—including the Kalman filter, extended Kalman filter, and ensemble Kalman filter—within a novel subspace-constrained Bayesian prediction-and-update procedure (the online phase). We demonstrate the performance of our new dimension reduction approach on various numerical examples. In some test cases, our approach reduces the dimensionality of the original problem by orders of magnitude and yields up to two orders of magnitude in computational savings.
BibTeX Citation
@article{ art_42, title = { On dimension reduction in Gaussian filters }, editor = { }, author = { A. Solonen and T. Cui and J. Hakkarainen and Y. M. Marzouk }, journal = { Inverse Problems }, doi = { 10.1088/0266-5611/32/4/045003 }, year = { 2016 }, arxiv = { 1508.06452 }, volume = { 32 }, number = { 4 }, month = { 3 }, pages = { 045003 }, } -
B. Peherstorfer, T. Cui, Y. M. Marzouk, K. Willcox.
Multifidelity importance sampling
Computer Methods in Applied Mechanics and Engineering, 300 (2016), pp. 490--509.
Abstract
Estimating statistics of model outputs with the Monte Carlo method often requires a large number of model evaluations. This leads to long runtimes if the model is expensive to evaluate. Importance sampling is one approach that can lead to a reduction in the number of model evaluations. Importance sampling uses a biasing distribution to sample the model more efficiently, but generating such a biasing distribution can be difficult and usually also requires model evaluations. A different strategy to speed up Monte Carlo sampling is to replace the computationally expensive high-fidelity model with a computationally cheap surrogate model; however, because the surrogate model outputs are only approximations of the high-fidelity model outputs, the estimate obtained using a surrogate model is in general biased with respect to the estimate obtained using the high-fidelity model. We introduce a multifidelity importance sampling (MFIS) method, which combines evaluations of both the high-fidelity and a surrogate model. It uses a surrogate model to facilitate the construction of the biasing distribution, but relies on a small number of evaluations of the high-fidelity model to derive an unbiased estimate of the statistics of interest. We prove that the MFIS estimate is unbiased even in the absence of accuracy guarantees on the surrogate model itself. The MFIS method can be used with any type of surrogate model, such as projection-based reduced-order models and data-fit models. Furthermore, the MFIS method is applicable to black-box models, i.e., where only inputs and the corresponding outputs of the high-fidelity and the surrogate model are available but not the details of the models themselves. We demonstrate on nonlinear and time-dependent problems that our MFIS method achieves speedups of up to several orders of magnitude compared to Monte Carlo with importance sampling that uses the high-fidelity model only.
BibTeX Citation
@article{ art_41, keywords = { Monte Carlo; importance sampling; surrogate modeling; multifidelity methods }, doi = { http://dx.doi.org/10.1016/j.cma.2015.12.002 }, title = { Multifidelity importance sampling }, author = { B. Peherstorfer and T. Cui and Y. M. Marzouk and K. Willcox }, journal = { Computer Methods in Applied Mechanics and Engineering }, volume = { 300 }, pages = { 490--509 }, month = { 0 }, year = { 2016 }, editor = { }, } -
T. Cui, K. J. H. Law, Y. M. Marzouk.
Dimension-independent likelihood-informed MCMC
Journal of Computational Physics, 304 (2016), pp. 109--137.
Available Links
Abstract
Many Bayesian inference problems require exploring the posterior distribution of high-dimensional parameters that represent the discretization of an underlying function. This work introduces a family of Markov chain Monte Carlo (MCMC) samplers that can adapt to the particular structure of a posterior distribution over functions. Two distinct lines of research intersect in the methods developed here. First, we introduce a general class of operator-weighted proposal distributions that are well defined on function space, such that the performance of the resulting MCMC samplers is independent of the discretization of the function. Second, by exploiting local Hessian information and any associated low-dimensional structure in the change from prior to posterior distributions, we develop an inhomogeneous discretization scheme for the Langevin stochastic differential equation that yields operator-weighted proposals adapted to the non-Gaussian structure of the posterior. The resulting dimension-independent, likelihood-informed (DILI) MCMC samplers may be useful for a large class of high-dimensional problems where the target probability measure has a density with respect to a Gaussian reference measure. Two nonlinear inverse problems are used to demonstrate the efficiency of these DILI samplers: an elliptic PDE coefficient inverse problem and path reconstruction in a conditioned diffusion.
BibTeX Citation
@article{ art_38, title = { Dimension-independent likelihood-informed MCMC }, keywords = { Markov chain Monte Carlo, likelihood-informed subspace, infinite-dimensional inverse problems, Langevin SDE, conditioned diffusion }, editor = { }, author = { T. Cui and K. J. H. Law and Y. M. Marzouk }, journal = { Journal of Computational Physics }, doi = { doi:10.1016/j.jcp.2015.10.008 }, year = { 2016 }, arxiv = { 1411.3688 }, volume = { 304 }, month = { 1 }, pages = { 109--137 }, } -
P. Conrad, Y. M. Marzouk, N. Pillai, A. Smith.
Accelerating asymptotically exact MCMC for computationally intensive models via local approximations
Journal of the American Statistical Association, 111 (2016), pp. 1591--1607.
Available Links
Abstract
We construct a new framework for accelerating Markov chain Monte Carlo in posterior sampling problems where standard methods are limited by the computational cost of the likelihood, or of numerical models embedded therein. Our approach introduces local approximations of these models into the Metropolis-Hastings kernel, borrowing ideas from deterministic approximation theory, optimization, and experimental design. Previous efforts at integrating approximate models into inference typically sacrifice either the sampler's exactness or efficiency; our work seeks to address these limitations by exploiting useful convergence characteristics of local approximations. We prove the ergodicity of our approximate Markov chain, showing that it samples asymptotically from the \emph{exact} posterior distribution of interest. We describe variations of the algorithm that employ either local polynomial approximations or local Gaussian process regressors. Our theoretical results reinforce the key observation underlying this paper: when the likelihood has some \emph{local} regularity, the number of model evaluations per MCMC step can be greatly reduced without biasing the Monte Carlo average. Numerical experiments demonstrate multiple order-of-magnitude reductions in the number of forward model evaluations used in representative ODE and PDE inference problems, with both synthetic and real data.
BibTeX Citation
@article{ art_36, title = { Accelerating asymptotically exact MCMC for computationally intensive models via local approximations }, keywords = { Markov chain Monte Carlo, experimental design, approximation theory, local approximation, computer experiments, emulators }, editor = { }, author = { P. Conrad and Y. M. Marzouk and N. Pillai and A. Smith }, journal = { Journal of the American Statistical Association }, doi = { 10.1080/01621459.2015.1096787 }, year = { 2016 }, arxiv = { 1402.1694 }, volume = { 111 }, number = { 516 }, month = { 0 }, pages = { 1591--1607 }, } -
A. Spantini, A. Solonen, T. Cui, J. Martin, L. Tenorio, Y. M. Marzouk.
Optimal low-rank approximations of Bayesian linear inverse problems
SIAM Journal on Scientific Computing, 37 (2015), pp. A2451--A2487.
Available Links
Abstract
In the Bayesian approach to inverse problems, data are often informative, relative to the prior, only on a low-dimensional subspace of the parameter space. Significant computational savings can be achieved by using this subspace to characterize and approximate the posterior distribution of the parameters. We first investigate approximation of the posterior covariance matrix as a low-rank update of the prior covariance matrix. We prove optimality of a particular update, based on the leading eigendirections of the matrix pencil defined by the Hessian of the negative log-likelihood and the prior precision, for a broad class of loss functions. This class includes the F\"{o}rstner metric for symmetric positive definite matrices, as well as the Kullback-Leibler divergence and the Hellinger distance between the associated distributions. We also propose two fast approximations of the posterior mean and prove their optimality with respect to a weighted Bayes risk under squared-error loss. These approximations are deployed in an offline-online manner, where a more costly but data-independent offline calculation is followed by fast online evaluations. As a result, these approximations are particularly useful when repeated posterior mean evaluations are required for multiple data sets. We demonstrate our theoretical results with several numerical examples, including high-dimensional X-ray tomography and an inverse heat conduction problem. In both of these examples, the intrinsic low-dimensional structure of the inference problem can be exploited while producing results that are essentially indistinguishable from solutions computed in the full space.
BibTeX Citation
@article{ art_35, title = { Optimal low-rank approximations of Bayesian linear inverse problems }, keywords = { inverse problems, Bayesian inference, low-rank approximation, covariance approximation, Förstner-Moonen metric, posterior mean approximation, Bayes risk, optimality }, editor = { }, author = { A. Spantini and A. Solonen and T. Cui and J. Martin and L. Tenorio and Y. M. Marzouk }, journal = { SIAM Journal on Scientific Computing }, doi = { 10.1137/140977308 }, year = { 2015 }, arxiv = { 1407.3463 }, volume = { 37 }, number = { 6 }, month = { 0 }, pages = { A2451--A2487 }, } -
T. Weng, Z. Zhang, Z. Su, Y. M. Marzouk, A. Melloni, L. Daniel.
Uncertainty quantification of silicon photonic devices with correlated and non-Gaussian random parameters
Optics Express, 23 (2015), pp. 4242--4254.
Available Links
Abstract
Process variations can significantly degrade device performance and chip yield in silicon photonics. In order to reduce the design and production costs, it is highly desirable to predict the statistical behavior of a device before the final fabrication. Monte Carlo is the mainstream computational technique used to estimate the uncertainties caused by process variations. However, it is very often too expensive due to its slow convergence rate. Recently, stochastic spectral methods based on polynomial chaos expansions have emerged as a promising alternative, and they have shown significant speedup over Monte Carlo in many engineering problems. The existing literature mostly assumes that the random parameters are mutually independent. However, in practical applications such assumption may not be necessarily accurate. In this paper, we develop an efficient numerical technique based on stochastic collocation to simulate silicon photonics with correlated and non-Gaussian random parameters. The effectiveness of our proposed technique is demonstrated by the simulation results of a silicon-on-insulator based directional coupler example. Since the mathematic formulation in this paper is very generic, our proposed algorithm can be applied to a large class of photonic design cases as well as to many other engineering problems.
BibTeX Citation
@article{ art_34, title = { Uncertainty quantification of silicon photonic devices with correlated and non-Gaussian random parameters }, editor = { }, author = { T. Weng and Z. Zhang and Z. Su and Y. M. Marzouk and A. Melloni and L. Daniel }, journal = { Optics Express }, doi = { 10.1364/OE.23.004242 }, year = { 2015 }, volume = { 23 }, number = { 4 }, month = { 0 }, pages = { 4242--4254 }, } -
D. Kohler, Y. M. Marzouk, J. Müller, U. Wever.
A new network approach to Bayesian inference in partial differential equations
International Journal for Numerical Methods in Engineering, 104 (2015), pp. 313--329.
Available Links
Abstract
We introduce a novel numerical approach to Bayesian parameter estimation in partial differential equations. The main idea is to translate the equation into a state-discrete dynamic Bayesian network with the discretization of cellular probabilistic automata. There exists a vast pool of inference algorithms in the probabilistic graphical models framework which can be applied to the network. In particular, we reformulate the parameter estimation problem as a filtering problem, discuss requirements for our specific setup, and apply the Boyen-Koller algorithm. To demonstrate our ideas, the scheme is applied to the problem of arsenate advection and adsorption in a water pipe: from measurements of the concentration of dissolved arsenate at the outflow boundary condition, we infer the strength of an arsenate source at the inflow boundary condition.
BibTeX Citation
@article{ art_33, title = { A new network approach to Bayesian inference in partial differential equations }, keywords = { partial differential equations, dynamic Bayesian networks, Boyen--Koller algorithm, cellular probabilistic automata }, editor = { }, author = { D. Kohler and Y. M. Marzouk and J. Müller and U. Wever }, journal = { International Journal for Numerical Methods in Engineering }, doi = { 10.1002/nme.4928 }, year = { 2015 }, volume = { 104 }, number = { 5 }, month = { 11 }, pages = { 313--329 }, } -
M. A. Chilenski, M. Greenwald, Y. M. Marzouk, N. T. Howard, A. E. White, J. Rice, J. R. Walk.
Improved profile fitting and quantification of uncertainty in experimental measurements of impurity transport coefficients using Gaussian process regression
Nuclear Fusion, 55 (2015), pp. 023012.
Available Links
Abstract
The need to fit smooth temperature and density profiles to discrete observations is ubiquitous in plasma physics, but the prevailing techniques for this have many shortcomings that cast doubt on the statistical validity of the results. This issue is amplified in the context of validation of gyrokinetic transport models (Holland et al. 2009, Phys. Plasmas 16, 052301), where the strong sensitivity of the code outputs to input gradients means that inadequacies in the profile fitting technique can easily lead to an incorrect assessment of the degree of agreement with experimental measurements. In order to rectify the shortcomings of standard approaches to profile fitting, we have applied Gaussian process regression (GPR), a powerful nonparametric regression technique, to analyze an Alcator C-Mod L-mode discharge used for past gyrokinetic validation work (Howard et al. 2012, Nucl. Fusion 52, 063002). We show that the GPR techniques can reproduce the previous results while delivering more statistically rigorous fits and uncertainty estimates for both the value and the gradient of plasma profiles with an improved level of automation. We also discuss how the use of GPR can allow for dramatic increases in the rate of convergence of uncertainty propagation for any code that takes experimental profiles as inputs. The new GPR techniques for profile fitting and uncertainty propagation are quite useful and general, and we describe the steps to implementation in detail in this paper. These techniques have the potential to substantially improve the quality of uncertainty estimates on profile fits and the rate of convergence of uncertainty propagation, making them of great interest for wider use in fusion experiments and modeling efforts.
BibTeX Citation
@article{ art_32, title = { Improved profile fitting and quantification of uncertainty in experimental measurements of impurity transport coefficients using Gaussian process regression }, keywords = { plasma physics, Gaussian process regression, uncertainty propagation, model validation }, editor = { }, author = { M. A. Chilenski and M. Greenwald and Y. M. Marzouk and N. T. Howard and A. E. White and J. Rice and J. R. Walk }, journal = { Nuclear Fusion }, doi = { 10.1088/0029-5515/55/2/023012 }, year = { 2015 }, volume = { 55 }, number = { 2 }, month = { 0 }, pages = { 023012 }, } -
R. Aggarwal, M. Demkowicz, Y. M. Marzouk.
Bayesian inference of substrate properties from film behavior
Modelling and Simulation in Materials Science and Engineering, 23 (2015), pp. 015009.
Available Links
Abstract
We demonstrate that, by observing the behavior of a film deposited on a substrate, certain features of the substrate may be inferred with quantified uncertainty using Bayesian methods. We carry out this demonstration on an illustrative film/substrate model, where the substrate is a Gaussian random field and the film is a two-component mixture that obeys the Cahn-Hilliard equation. We construct a stochastic reduced order model to describe the film/substrate interaction and use it to infer substrate properties from film behavior. This quantitative inference strategy may be adapted to other film/substrate systems.
BibTeX Citation
@article{ art_31, title = { Bayesian inference of substrate properties from film behavior }, editor = { }, author = { R. Aggarwal and M. Demkowicz and Y. M. Marzouk }, journal = { Modelling and Simulation in Materials Science and Engineering }, doi = { http://dx.doi.org/10.1088/0965-0393/23/1/015009 }, year = { 2015 }, volume = { 23 }, month = { 0 }, pages = { 015009 }, } -
N. Galagali, Y. M. Marzouk.
Bayesian inference of chemical kinetic models from proposed reactions
Chemical Engineering Science, 123 (2015), pp. 170--190.
Available Links
Abstract
Bayesian inference provides a natural framework for combining experimental data with prior knowledge to develop chemical kinetic models and quantify the associated uncertainties, not only in parameter values but also in model structure. Most existing applications of Bayesian model selection methods to chemical kinetics have been limited to comparisons among a small set of models, however. The significant computational cost of evaluating posterior model probabilities renders traditional Bayesian methods infeasible when the model space becomes large. We present a new framework for tractable Bayesian model inference and uncertainty quantification using a large number of systematically generated model hypotheses. The approach involves imposing point-mass mixture priors over rate constants and exploring the resulting posterior distribution using an adaptive Markov chain Monte Carlo method. The posterior samples are used to identify plausible models, to quantify rate constant uncertainties, and to extract key diagnostic information about model structure---such as the reactions and operating pathways most strongly supported by the data. We provide numerical demonstrations of the proposed framework by inferring kinetic models for catalytic steam and dry reforming of methane using available experimental data.
BibTeX Citation
@article{ art_30, title = { Bayesian inference of chemical kinetic models from proposed reactions }, keywords = { Bayesian inference, chemical kinetics, model selection, Markov chain Monte Carlo, adaptive MCMC, online expectation maximization }, editor = { }, author = { N. Galagali and Y. M. Marzouk }, journal = { Chemical Engineering Science }, doi = { doi:10.1016/j.ces.2014.10.030 }, year = { 2015 }, volume = { 123 }, month = { 2 }, pages = { 170--190 }, comments = { http://www.sciencedirect.com/science/article/pii/S0009250914005983 }, } -
T. Cui, Y. M. Marzouk, K. Willcox.
Data-driven model reduction for the Bayesian solution of inverse problems
International Journal for Numerical Methods in Engineering, 102 (2015), pp. 966--990.
Available Links
Abstract
One of the major challenges in the Bayesian solution of inverse problems governed by partial differential equations (PDEs) is the computational cost of repeatedly evaluating numerical PDE models, as required by Markov chain Monte Carlo (MCMC) methods for posterior sampling. This paper proposes a data-driven projection-based model reduction technique to reduce this computational cost. The proposed technique has two distinctive features. First, the model reduction strategy is tailored to inverse problems: the snapshots used to construct the reduced-order model are computed adaptively from the posterior distribution. Posterior exploration and model reduction are thus pursued simultaneously. Second, to avoid repeated evaluations of the full-scale numerical model as in a standard MCMC method, we couple the full-scale model and the reduced-order model together in the MCMC algorithm. This maintains accurate inference while reducing its overall computational cost. In numerical experiments considering steady-state flow in a porous medium, the data-driven reduced-order model achieves better accuracy than a reduced-order model constructed using the classical approach. It also improves posterior sampling efficiency by several orders of magnitude compared to a standard MCMC method.
BibTeX Citation
@article{ art_29, title = { Data-driven model reduction for the Bayesian solution of inverse problems }, keywords = { model reduction; inverse problem; adaptive Markov chain Monte Carlo; approximate Bayesian inference }, editor = { }, author = { T. Cui and Y. M. Marzouk and K. Willcox }, journal = { International Journal for Numerical Methods in Engineering }, doi = { doi:10.1002/nme.4748 }, year = { 2015 }, arxiv = { 1403.4290 }, volume = { 102 }, number = { 5 }, month = { 0 }, pages = { 966--990 }, } -
T. Cui, J. Martin, Y. M. Marzouk, A. Solonen, A. Spantini.
Likelihood-informed dimension reduction for nonlinear inverse problems
Inverse Problems, 29 (2014), pp. 114015.
Available Links
Abstract
The intrinsic dimensionality of an inverse problem is affected by prior information, the accuracy and number of observations, and the smoothing properties of the forward operator. From a Bayesian perspective, changes from the prior to the posterior may, in many problems, be confined to a relatively low-dimensional subspace of the parameter space. We present a dimension reduction approach that defines and identifies such a subspace, called the "likelihood-informed subspace" (LIS), by characterizing the relative influences of the prior and the likelihood over the support of the posterior distribution. This identification enables new and more efficient computational methods for Bayesian inference with nonlinear forward models and Gaussian priors. In particular, we approximate the posterior distribution as the product of a lower-dimensional posterior defined on the LIS and the prior distribution marginalized onto the complementary subspace. Markov chain Monte Carlo sampling can then proceed in lower dimensions, with significant gains in computational efficiency. We also introduce a Rao-Blackwellization strategy that de-randomizes Monte Carlo estimates of posterior expectations for additional variance reduction. We demonstrate the efficiency of our methods using two numerical examples: inference of permeability in a groundwater system governed by an elliptic PDE, and an atmospheric remote sensing problem based on Global Ozone Monitoring System (GOMOS) observations.
BibTeX Citation
@article{ art_28, title = { Likelihood-informed dimension reduction for nonlinear inverse problems }, keywords = { Inverse problem, Bayesian inference, dimension reduction, low-rank approximation, Markov chain Monte Carlo, variance reduction }, editor = { }, author = { T. Cui and J. Martin and Y. M. Marzouk and A. Solonen and A. Spantini }, journal = { Inverse Problems }, doi = { doi:10.1088/0266-5611/30/11/114015 }, year = { 2014 }, arxiv = { 1403.4680 }, volume = { 29 }, month = { 0 }, pages = { 114015 }, } -
A. Gorodetsky, Y. M. Marzouk.
Efficient localization of discontinuities in complex computational simulations
SIAM Journal on Scientific Computing, 36 (2014), pp. A2584--A2610.
Available Links
Abstract
Surrogate models for computational simulations are input-output approximations that allow computationally intensive analyses, such as uncertainty propagation and inference, to be performed efficiently. When a simulation output does not depend smoothly on its inputs, the error and convergence rate of many approximation methods deteriorate substantially. This paper details a method for efficiently localizing discontinuities in the input parameter domain, so that the model output can be approximated as a piecewise smooth function. The approach comprises an initialization phase, which uses polynomial annihilation to assign function values to different regions and thus seed an automated labeling procedure, followed by a refinement phase that adaptively updates a kernel support vector machine representation of the separating surface via active learning. The overall approach avoids structured grids and exploits any available simplicity in the geometry of the separating surface, thus reducing the number of model evaluations required to localize the discontinuity. The method is illustrated on examples of up to eleven dimensions, including algebraic models and ODE/PDE systems, and demonstrates improved scaling and efficiency over other discontinuity localization approaches.
BibTeX Citation
@article{ art_27, title = { Efficient localization of discontinuities in complex computational simulations }, keywords = { discontinuity detection, polynomial annihilation, function approximation, support vector machines, active learning, uncertainty quantification }, editor = { }, author = { A. Gorodetsky and Y. M. Marzouk }, journal = { SIAM Journal on Scientific Computing }, year = { 2014 }, arxiv = { 1402.2845 }, volume = { 36 }, number = { 6 }, month = { 0 }, pages = { A2584--A2610 }, } -
J. Li, Y. M. Marzouk.
Adaptive construction of surrogates for the Bayesian solution of inverse problems
SIAM Journal on Scientific Computing, 36 (2014), pp. A1163--A1186.
Available Links
Abstract
The Bayesian approach to inverse problems typically relies on posterior sampling approaches, such as Markov chain Monte Carlo, for which the generation of each sample requires one or more evaluations of the parameter-to-observable map or forward model. When these evaluations are computationally intensive, approximations of the forward model are essential to accelerating sample-based inference. Yet the construction of globally accurate approximations for nonlinear forward models can be computationally prohibitive and in fact unnecessary, as the posterior distribution typically concentrates on a small fraction of the support of the prior distribution. We present a new approach that uses stochastic optimization to construct polynomial approximations over a sequence of measures adaptively determined from the data, eventually concentrating on the posterior distribution. The approach yields substantial gains in efficiency and accuracy over prior-based surrogates, as demonstrated via application to inverse problems in partial differential equations.
BibTeX Citation
@article{ art_26, title = { Adaptive construction of surrogates for the Bayesian solution of inverse problems }, keywords = { Bayesian inference, cross-entropy method, importance sampling, inverse problem, Kullback-Leibler divergence, Markov chain Monte Carlo, polynomial chaos }, editor = { }, author = { J. Li and Y. M. Marzouk }, journal = { SIAM Journal on Scientific Computing }, year = { 2014 }, arxiv = { 1309.5524 }, volume = { 36 }, number = { 3 }, month = { 0 }, pages = { A1163--A1186 }, } -
X. Huan, Y. M. Marzouk.
Gradient-based stochastic optimization methods in Bayesian experimental design
International Journal for Uncertainty Quantification, 4 (2014), pp. 479--510.
Available Links
Abstract
Optimal experimental design (OED) seeks experiments expected to yield the most useful data for some purpose. In practical circumstances where experiments are time-consuming or resource-intensive, OED can yield enormous savings. We pursue OED for nonlinear systems from a Bayesian perspective, with the goal of choosing experiments that are optimal for parameter inference. Our objective in this context is the expected information gain in model parameters, which in general can only be estimated using Monte Carlo methods. Maximizing this objective thus becomes a stochastic optimization problem. This paper develops gradient-based stochastic optimization methods for the design of experiments on a continuous parameter space. Given a Monte Carlo estimator of expected information gain, we use infinitesimal perturbation analysis to derive gradients of this estimator. We are then able to formulate two gradient-based stochastic optimization approaches: (i) Robbins-Monro stochastic approximation, and (ii) sample average approximation combined with a deterministic quasi-Newton method. A polynomial chaos approximation of the forward model accelerates objective and gradient evaluations in both cases. We discuss the implementation of these optimization methods, then conduct an empirical comparison of their performance. To demonstrate design in a nonlinear setting with partial differential equation forward models, we use the problem of sensor placement for source inversion. Numerical results yield useful guidelines on the choice of algorithm and sample sizes, assess the impact of estimator bias, and quantify tradeoffs of computational cost versus solution quality and robustness.
BibTeX Citation
@article{ art_25, title = { Gradient-based stochastic optimization methods in Bayesian experimental design }, editor = { }, author = { X. Huan and Y. M. Marzouk }, journal = { International Journal for Uncertainty Quantification }, doi = { 10.1615/Int.J.UncertaintyQuantification.2014006730 }, year = { 2014 }, arxiv = { http://arxiv.org/abs/1212.2228 }, volume = { 4 }, number = { 6 }, month = { 0 }, pages = { 479--510 }, comments = { International Journal for Uncertainty Quantification, in press }, } -
J. Winokur, P. Conrad, I. Sraj, O. Knio, A. Srinivasan, W. C. Thacker, Y. M. Marzouk, M. Iskandarani.
A priori testing of sparse adaptive polynomial chaos expansions using an ocean general circulation model database
Computational Geosciences, 17 (2013), pp. 899-911.
Available Links
Abstract
This work explores the implementation of an adaptive strategy to design sparse ensembles of oceanic simulations suitable for constructing polynomial chaos surrogates. We use a recently developed pseudo-spectral algorithm that is based on a direct application of the Smolyak sparse grid formula and that allows the use of arbitrary admissible sparse grids. The adaptive algorithm is tested using an existing simulation database of the oceanic response to Hurricane Ivan in the Gulf of Mex- ico. The a priori tests demonstrate that sparse and adaptive pseudo-spectral constructions lead to substantial savings over isotropic sparse sampling in the present setting.
BibTeX Citation
@article{ art_24, title = { A priori testing of sparse adaptive polynomial chaos expansions using an ocean general circulation model database }, editor = { }, author = { J. Winokur and P. Conrad and I. Sraj and O. Knio and A. Srinivasan and W. C. Thacker and Y. M. Marzouk and M. Iskandarani }, journal = { Computational Geosciences }, doi = { 10.1007/s10596-013-9361-3 }, year = { 2013 }, volume = { 17 }, number = { 6 }, month = { 0 }, pages = { 899-911 }, } -
P. Conrad, Y. M. Marzouk.
Adaptive Smolyak pseudospectral approximations
SIAM Journal on Scientific Computing, 35 (2013), pp. A2643--A2670.
Available Links
Abstract
Polynomial approximations of computationally intensive models are central to uncertainty quantification. This paper describes an adaptive method for non-intrusive pseudospectral approximation, based on Smolyak's algorithm with generalized sparse grids. We rigorously analyze and extend the non-adaptive method proposed in [Constantine et al. 2012], and compare it to a common alternative approach for using sparse grids to construct polynomial approximations, direct quadrature. Analysis of direct quadrature shows that O(1) errors are an intrinsic property of some configurations of the method, as a consequence of internal aliasing. We provide precise conditions, based on the chosen polynomial basis and quadrature rules, under which this aliasing error occurs. We then establish theoretical results on the accuracy of Smolyak pseudospectral approximation, and show that the Smolyak approximation avoids internal aliasing and makes far more effective use of sparse function evaluations. These results are applicable to broad choices of quadrature rule and generalized sparse grids. Exploiting this flexibility, we introduce a greedy heuristic for adaptive refinement of the pseudospectral approximation. We numerically demonstrate convergence of the algorithm on the Genz test functions, and illustrate the accuracy and efficiency of the adaptive approach on a realistic chemical kinetics problem.
BibTeX Citation
@article{ art_23, title = { Adaptive Smolyak pseudospectral approximations }, keywords = { Smolyak algorithms, sparse grids, orthogonal polynomials, pseudospectral approximation, approximation theory, uncertainty quantification }, editor = { }, author = { P. Conrad and Y. M. Marzouk }, journal = { SIAM Journal on Scientific Computing }, doi = { 10.1137/120890715 }, year = { 2013 }, arxiv = { http://arxiv.org/abs/1209.1406 }, volume = { 35 }, number = { 6 }, month = { 0 }, pages = { A2643--A2670 }, } -
G. Bal, I. Langmore, Y. M. Marzouk.
Bayesian inverse problems with Monte Carlo forward models
Inverse Problems and Imaging, 7 (2013), pp. 81--105.
Available Links
Abstract
The full application of Bayesian inference to inverse problems requires exploration of a posterior distribution that typically does not possess a standard form. In this context, Markov chain Monte Carlo (MCMC) methods are often used. These methods require many evaluations of a computationally intensive forward model to produce the equivalent of one independent sample from the posterior. We consider applications in which approximate forward models at multiple resolution levels are available, each endowed with a probabilistic error estimate. These situations occur, for example, when the forward model involves Monte Carlo integration. We present a novel MCMC method called $MC^3$ that uses low-resolution forward models to approximate draws from a posterior distribution built with the high-resolution forward model. The acceptance ratio is estimated with some statistical error; then a confidence interval for the true acceptance ratio is found, and acceptance is performed correctly with some confidence. The high-resolution models are rarely run and a significant speed up is achieved. Our multiple-resolution forward models themselves are built around a new importance sampling scheme that allows Monte Carlo forward models to be used efficiently in inverse problems. The method is used to solve an inverse transport problem that finds applications in atmospheric remote sensing. We present a path-recycling methodology to efficiently vary parameters in the transport equation. The forward transport equation is solved by a Monte Carlo method that is amenable to the use of $MC^3$ to solve the inverse transport problem using a Bayesian formalism.
BibTeX Citation
@article{ art_22, title = { Bayesian inverse problems with Monte Carlo forward models }, keywords = { linear transport; perturbation Monte Carlo; Bayesian; importance sampling; inverse problems; Markov chain Monte Carlo }, editor = { }, author = { G. Bal and I. Langmore and Y. M. Marzouk }, journal = { Inverse Problems and Imaging }, doi = { 10.3934/ipi.2013.7.81 }, year = { 2013 }, volume = { 7 }, number = { 1 }, month = { 0 }, pages = { 81--105 }, } -
X. Huan, Y. M. Marzouk.
Simulation-based optimal Bayesian experimental design for nonlinear systems
Journal of Computational Physics, 232 (2013), pp. 288--317.
Available Links
Abstract
The optimal selection of experimental conditions is essential to maximizing the value of data for inference and prediction, particularly in situations where experiments are time-consuming and expensive to conduct. We propose a general mathematical framework and an algorithmic approach for optimal experimental design with nonlinear simulation-based models; in particular, we focus on finding sets of experiments that provide the most information about targeted sets of parameters. Our framework employs a Bayesian statistical setting, which provides a foundation for inference from noisy, indirect, and incomplete data, and a natural mechanism for incorporating heterogeneous sources of information. An objective function is constructed from information theoretic measures, reflecting expected information gain from proposed combinations of experiments. Polynomial chaos approximations and a two-stage Monte Carlo sampling method are used to evaluate the expected information gain. Stochastic approximation algorithms are then used to make optimization feasible in computationally intensive and high-dimensional settings. These algorithms are demonstrated on model problems and on nonlinear parameter inference problems arising in detailed combustion kinetics.
BibTeX Citation
@article{ art_21, title = { Simulation-based optimal Bayesian experimental design for nonlinear systems }, keywords = { Uncertainty quantification; Bayesian inference; Optimal experimental design; Nonlinear experimental design; Stochastic approximation; Shannon information; Chemical kinetics }, editor = { }, author = { X. Huan and Y. M. Marzouk }, journal = { Journal of Computational Physics }, doi = { 10.1016/j.jcp.2012.08.013 }, year = { 2013 }, arxiv = { 1108.4146 }, volume = { 232 }, number = { 1 }, month = { 1 }, pages = { 288--317 }, comments = { test }, } -
T. A. Moselhy, Y. M. Marzouk.
Bayesian inference with optimal maps
Journal of Computational Physics, 231 (2012), pp. 7815--7850.
Available Links
Abstract
We present a new approach to Bayesian inference that entirely avoids Markov chain simulation, by constructing a map that pushes forward the prior measure to the posterior measure. Existence and uniqueness of a suitable measure-preserving map is established by formulating the problem in the context of optimal transport theory. We discuss various means of explicitly parameterizing the map and computing it efficiently through solution of an optimization problem, exploiting gradient information from the forward model when possible. The resulting algorithm overcomes many of the computational bottlenecks associated with Markov chain Monte Carlo. Advantages of a map-based representation of the posterior include analytical expressions for posterior moments and the ability to generate arbitrary numbers of independent posterior samples without additional likelihood evaluations or forward solves. The optimization approach also provides clear convergence criteria for posterior approximation and facilitates model selection through automatic evaluation of the marginal likelihood. We demonstrate the accuracy and efficiency of the approach on nonlinear inverse problems of varying dimension, involving the inference of parameters appearing in ordinary and partial differential equations.
BibTeX Citation
@article{ art_20, title = { Bayesian inference with optimal maps }, keywords = { Bayesian inference; Optimal transport; Measure-preserving maps; Inverse problems; Polynomial chaos; Numerical optimization }, editor = { }, author = { T. A. Moselhy and Y. M. Marzouk }, journal = { Journal of Computational Physics }, doi = { 10.1016/j.jcp.2012.07.022 }, year = { 2012 }, volume = { 231 }, number = { 23 }, month = { 0 }, pages = { 7815--7850 }, } -
J. Ray, S. McKenna, B. van Bloemen Waanders, Y. M. Marzouk.
Bayesian reconstruction of binary media with unresolved fine-scale spatial structures
Advances in Water Resources, 44 (2012), pp. 1--19.
Available Links
Abstract
BibTeX Citation
@article{ art_19, title = { Bayesian reconstruction of binary media with unresolved fine-scale spatial structures }, keywords = { Upscaling; Binary media; Bayesian technique; Multiscale inference }, editor = { }, author = { J. Ray and S. McKenna and B. van Bloemen Waanders and Y. M. Marzouk }, journal = { Advances in Water Resources }, doi = { 10.1016/j.advwatres.2012.04.009 }, year = { 2012 }, volume = { 44 }, month = { 8 }, pages = { 1--19 }, } -
A. Narayan, Y. M. Marzouk, D. Xiu.
Sequential data assimilation with multiple models
Journal of Computational Physics, 231 (2012), pp. 6401--6418.
Available Links
Abstract
Data assimilation is an essential tool for predicting the behavior of real physical systems given approximate simulation models and limited observations. For many complex systems, there may exist several models, each with different properties and predictive capabilities. It is desirable to incorporate multiple models into the assimilation procedure in order to obtain a more accurate prediction of the physics than any model alone can provide. In this paper, we propose a framework for conducting sequential data assimilation with multiple models and sources of data. The assimilated solution is a linear combination of all model predictions and data. One notable feature is that the combination takes the most general form with matrix weights. By doing so the method can readily utilize different weights in different sections of the solution state vectors, allow the models and data to have different dimensions, and deal with the case of a singular state covariance. We prove that the proposed assimilation method, termed direct assimilation, minimizes a variational functional, a generalized version of the one used in the classical Kalman filter. We also propose an efficient iterative assimilation method that assimilates two models at a time until all models and data are assimilated. The mathematical equivalence of the iterative method and the direct method is established. Numerical examples are presented to demonstrate the effectiveness of the new method.
BibTeX Citation
@article{ art_18, title = { Sequential data assimilation with multiple models }, keywords = { Uncertainty quantification; Data assimilation; Kalman filter; Model averaging }, editor = { }, author = { A. Narayan and Y. M. Marzouk and D. Xiu }, journal = { Journal of Computational Physics }, doi = { 10.1016/j.jcp.2012.06.002 }, year = { 2012 }, volume = { 231 }, number = { 19 }, month = { 0 }, pages = { 6401--6418 }, } -
R. D. Berry, H. N. Najm, B. J. Debusschere, Y. M. Marzouk, H. Adalsteinsson.
Data-free inference of the joint distribution of uncertain model parameters
Journal of Computational Physics, 231 (2012), pp. 2180-2198.
Available Links
Abstract
A critical problem in accurately estimating uncertainty in model predictions is the lack of details in the literature on the correlation (or full joint distribution) of uncertain model parameters. In this paper we describe a framework and a class of algorithms for analyzing such “missing data” problems in the setting of Bayesian statistics. The analysis focuses on the family of posterior distributions consistent with given statistics (e.g. nominal values, confidence intervals). The combining of consistent distributions is addressed via techniques from the opinion pooling literature. The developed approach allows subsequent propagation of uncertainty in model inputs consistent with reported statistics, in the absence of data.
BibTeX Citation
@article{ art_17, title = { Data-free inference of the joint distribution of uncertain model parameters }, keywords = { Uncertainty quantification; Bayesian statistics; Missing information }, editor = { }, author = { R. D. Berry and H. N. Najm and B. J. Debusschere and Y. M. Marzouk and H. Adalsteinsson }, journal = { Journal of Computational Physics }, doi = { 10.1016/j.jcp.2011.10.031 }, year = { 2012 }, volume = { 231 }, number = { 5 }, month = { 0 }, pages = { 2180-2198 }, } -
B. J. Debusschere, Y. M. Marzouk, H. N. Najm, B. Rhoads, D. A. Goussis, M. Valorani.
Computational singular perturbation with non-parametric tabulation of slow manifolds for time integration of stiff chemical kinetics
Combustion Theory and Modeling, 16 (2011), pp. 173-198.
Available Links
Abstract
This paper presents a novel tabulation strategy for the adaptive numerical integration of chemical kinetics using the computational singular perturbation (CSP) method. The strategy stores and reuses CSP quantities required to filter out fast dissipative processes, resulting in a non-stiff chemical source term. In particular, non-parametric regression on low-dimensional slow invariant manifolds (SIMs) in the chemical state space is used to approximate the CSP vectors spanning the fast chemical subspace and the associated fast chemical time-scales. The relevant manifold and its dimension varies depending on the local number of exhausted modes at every location in the chemical state space. Multiple manifolds are therefore tabulated, corresponding to different numbers of exhausted modes (dimensions) and associated radical species. Non-parametric representations are inherently adaptive, and rely on efficient approximate-nearest-neighbor queries. As the CSP information is only a function of the non-radical species in the system and has relatively small gradients in the chemical state space, tabulation occurs in a lower-dimensional state space and at a relatively coarse level, thereby improving scalability to larger chemical mechanisms. The approach is demonstrated on the simulation of homogeneous constant pressure H2--air and CH4--air ignition, over a range of initial conditions. For CH4--air, results are shown that outperform direct implicit integration of the stiff chemical kinetics while maintaining good accuracy.
BibTeX Citation
@article{ art_16, title = { Computational singular perturbation with non-parametric tabulation of slow manifolds for time integration of stiff chemical kinetics }, keywords = { chemical kinetics, computational singular perturbation, slow manifold, non-parametric regression, nearest neighbors, kd-trees }, editor = { }, author = { B. J. Debusschere and Y. M. Marzouk and H. N. Najm and B. Rhoads and D. A. Goussis and M. Valorani }, journal = { Combustion Theory and Modeling }, doi = { 10.1080/13647830.2011.596575 }, year = { 2011 }, volume = { 16 }, number = { 1 }, month = { 0 }, pages = { 173-198 }, } -
S. McKenna, J. Ray, Y. M. Marzouk, B. van Bloemen Waanders.
Truncated multi-Gaussian fields and effective conductance of binary media
Advances in Water Resources, 34 (2011), pp. 617-626.
Available Links
Abstract
Truncated Gaussian fields provide a flexible model for defining binary media with dispersed (as opposed to layered) inclusions. General properties of excursion sets on these truncated fields are coupled with a distance-based upscaling algorithm and approximations of point process theory to develop an estimation approach for effective conductivity in two-dimensions. Estimation of effective conductivity is derived directly from knowledge of the kernel size used to create the multiGaussian field, defined as the full-width at half maximum (FWHM), the truncation threshold and conductance values of the two modes. Therefore, instantiation of the multiGaussian field is not necessary for estimation of the effective conductance. The critical component of the effective medium approximation developed here is the mean distance between high conductivity inclusions. This mean distance is characterized as a function of the FWHM, the truncation threshold and the ratio of the two modal conductivities. Sensitivity of the resulting effective conductivity to this mean distance is examined for two levels of contrast in the modal conductances and different FWHM sizes. Results demonstrate that the FWHM is a robust measure of mean travel distance in the background medium. The resulting effective conductivities are accurate when compared to numerical results and results obtained from effective media theory, distance-based upscaling and numerical simulation.
BibTeX Citation
@article{ art_15, title = { Truncated multi-Gaussian fields and effective conductance of binary media }, keywords = { Upscaling; Binary media; Effective conductivity }, editor = { }, author = { S. McKenna and J. Ray and Y. M. Marzouk and B. van Bloemen Waanders }, journal = { Advances in Water Resources }, doi = { 10.1016/j.advwatres.2011.02.011 }, year = { 2011 }, volume = { 34 }, number = { 5 }, month = { 5 }, pages = { 617-626 }, } -
F. Schlegel, D. Wee, Y. M. Marzouk, A. F. Ghoniem.
Contributions of the wall boundary layer to the formation of the counter-rotating vortex pair in transverse jets.
Journal of Fluid Mechanics, 676 (2011), pp. 461-490.
Available Links
Abstract
Using high-resolution 3-D vortex simulations, this study seeks a mechanistic understanding of vorticity dynamics in transverse jets at a finite Reynolds number. A full no-slip boundary condition, rigorously formulated in terms of vorticity generation along the channel wall, captures unsteady interactions between the wall boundary layer and the jet -- in particular, the separation of the wall boundary layer and its transport into the interior. For comparison, we also implement a reduced boundary condition that suppresses the separation of the wall boundary layer away from the jet nozzle. By contrasting results obtained with these two boundary conditions, we characterize near-field vortical structures formed as the wall boundary layer separates on the backside of the jet. Using various Eulerian and Lagrangian diagnostics, it is demonstrated that several near-wall vortical structures are formed as the wall boundary layer separates. The counter-rotating vortex pair, manifested by the presence of vortices aligned with the jet trajectory, is initiated closer to the jet exit. Moreover tornado-like wall-normal vortices originate from the separation of spanwise vorticity in the wall boundary layer at the side of the jet and from the entrainment of streamwise wall vortices in the recirculation zone on the lee side. These tornado-like vortices are absent in the case where separation is suppressed. Tornado-like vortices merge with counter-rotating vorticity originating in the jet shear layer, significantly increasing wall-normal circulation and causing deeper jet penetration into the crossflow stream.
BibTeX Citation
@article{ art_14, title = { Contributions of the wall boundary layer to the formation of the counter-rotating vortex pair in transverse jets. }, keywords = { jets; vortex flows }, editor = { }, author = { F. Schlegel and D. Wee and Y. M. Marzouk and A. F. Ghoniem }, journal = { Journal of Fluid Mechanics }, doi = { 10.1017/jfm.2011.59 }, year = { 2011 }, volume = { 676 }, number = { 1 }, month = { 0 }, pages = { 461-490 }, } -
F. Rizzi, M. Salloum, Y. M. Marzouk, R. Xu, M. L. Falk, T. P. Weihs, G. Fritz, O. M. Knio.
Bayesian inference of atomic diffusivity in a binary Ni/Al system based on molecular dynamics
SIAM Multiscale Modeling and Simulation, 9 (2011), pp. 486-512.
Available Links
Abstract
This work focuses on characterizing the integral features of atomic diffusion in Ni/Al nanolaminates based on molecular dynamics (MD) computations. Attention is focused on the simplified problem of extracting the diffusivity, D, in an isothermal system at high temperature. To this end, a mixing measure theory is developed that relies on analyzing the moments of the cumulative distribution functions (CDFs) of the constituents. The mixing measures obtained from replica simulations are exploited in a Bayesian inference framework, based on contrasting these measures with corresponding moments of a dimensionless concentration evolving according to a Fickian process. The noise inherent in the MD simulations is described as a Gaussian process, and this hypothesis is verified both a priori and using a posterior predictive check. Computed values of D for an initially unmixed system rapidly heated to 1500 K are found to be consistent with experimental correlation for diffusion of Ni into molten Al. On the contrary, large discrepancies with experimental predictions are observed when D is estimated based on large-time mean-square displacement (MSD) analysis, and when it is evaluated using the Arrhenius correlation calibrated against experimental measurements of self-propagating front velocities. Implications are finally drawn regarding extension of the present work and potential refinement of continuum modeling approaches.
BibTeX Citation
@article{ art_13, title = { Bayesian inference of atomic diffusivity in a binary Ni/Al system based on molecular dynamics }, editor = { }, author = { F. Rizzi and M. Salloum and Y. M. Marzouk and R. Xu and M. L. Falk and T. P. Weihs and G. Fritz and O. M. Knio }, journal = { SIAM Multiscale Modeling and Simulation }, doi = { 10.1137/10080590X }, year = { 2011 }, volume = { 9 }, number = { 1 }, month = { 0 }, pages = { 486-512 }, } -
J. Ray, Y. M. Marzouk, H. N. Najm.
A Bayesian approach for estimating bioterror attacks from patient data
Statistics in Medicine, 30 (2010), pp. 101-126.
Available Links
Abstract
Terrorist attacks using an aerosolized pathogen have gained credibility as a national security concern after the anthrax attacks of 2001. Inferring some important details of the attack quickly, for example, the number of people infected, the time of infection, and a representative dose received can be crucial to planning a medical response. We use a Bayesian approach, based on a short time series of diagnosed patients, to estimate a joint probability density for these parameters. We first test the formulation with idealized cases and then apply it to realistic scenarios, including the Sverdlovsk anthrax outbreak of 1979. We also use simulated outbreaks to explore the impact of model error, as when the model used for generating simulated epidemic curves does not match the model subsequently used to characterize the attack. We find that in all cases except for the smallest attacks (fewer than 100 infected people), 3--5 days of data are sufficient to characterize the outbreak to a specificity that is useful for directing an emergency response.
BibTeX Citation
@article{ art_12, title = { A Bayesian approach for estimating bioterror attacks from patient data }, keywords = { Bayesian inference;anthrax;Sverdlovsk outbreak;bioterrorism }, editor = { }, author = { J. Ray and Y. M. Marzouk and H. N. Najm }, journal = { Statistics in Medicine }, doi = { 10.1002/sim.4090 }, year = { 2010 }, volume = { 30 }, number = { 2 }, month = { 0 }, pages = { 101-126 }, } -
D. Wee, Y. M. Marzouk, F. Schlegel, A. F. Ghoniem.
Convergence characteristics and computational cost of two algebraic kernels in vortex methods with a tree-code algorithm
SIAM Journal on Scientific Computing, 31 (2009), pp. 2510-2527.
Available Links
Abstract
We study the convergence characteristics of two algebraic kernels used in vortex calculations: the Rosenhead--Moore kernel, which is a low-order kernel, and the Winckelmans--Leonard kernel, which is a high-order kernel. To facilitate the study, a method of evaluating particle-cluster interactions is introduced for the Winckelmans--Leonard kernel. The method is based on Taylor series expansion in Cartesian coordinates, as initially proposed by Lindsay and Krasny [J. Comput. Phys., 172 (2001), pp. 879--907] for the Rosenhead--Moore kernel. A recurrence relation for the Taylor coefficients of the Winckelmans--Leonard kernel is derived by separating the kernel into two parts, and an error estimate is obtained to ensure adaptive error control. The recurrence relation is incorporated into a tree-code to evaluate vorticity-induced velocity. Next, comparison of convergence is made while utilizing the tree-code. Both algebraic kernels lead to convergence, but the Winckelmans--Leonard kernel exhibits a superior convergence rate. The combined desingularization and discretization error from the Winckelmans--Leonard kernel is an order of magnitude smaller than that from the Rosenhead--Moore kernel at a typical resolution. Simulations of vortex rings are performed using the two algebraic kernels in order to compare their performance in a practical setting. In particular, numerical simulations of the side-by-side collision of two identical vortex rings suggest that the three-dimensional evolution of vorticity at finite resolution can be greatly affected by the choice of the kernel. We find that the Winckelmans--Leonard kernel is able to perform the same task with a much smaller number of vortex elements than the Rosenhead--Moore kernel, greatly reducing the overall computational cost.
BibTeX Citation
@article{ art_11, title = { Convergence characteristics and computational cost of two algebraic kernels in vortex methods with a tree-code algorithm }, editor = { }, author = { D. Wee and Y. M. Marzouk and F. Schlegel and A. F. Ghoniem }, journal = { SIAM Journal on Scientific Computing }, doi = { 10.1137/080726872 }, year = { 2009 }, volume = { 31 }, number = { 4 }, month = { 0 }, pages = { 2510-2527 }, } -
Y. M. Marzouk, H. N. Najm.
Dimensionality reduction and polynomial chaos acceleration of Bayesian inference in inverse problems
Journal of Computational Physics, 228 (2009), pp. 1862-1902.
Available Links
Abstract
We consider a Bayesian approach to nonlinear inverse problems in which the unknown quantity is a spatial or temporal field, endowed with a hierarchical Gaussian process prior. Computational challenges in this construction arise from the need for repeated evaluations of the forward model (e.g., in the context of Markov chain Monte Carlo) and are compounded by high dimensionality of the posterior. We address these challenges by introducing truncated Karhunen--Loève expansions, based on the prior distribution, to efficiently parameterize the unknown field and to specify a stochastic forward problem whose solution captures that of the deterministic forward model over the support of the prior. We seek a solution of this problem using Galerkin projection on a polynomial chaos basis, and use the solution to construct a reduced-dimensionality surrogate posterior density that is inexpensive to evaluate. We demonstrate the formulation on a transient diffusion equation with prescribed source terms, inferring the spatially-varying diffusivity of the medium from limited and noisy data.
BibTeX Citation
@article{ art_10, title = { Dimensionality reduction and polynomial chaos acceleration of Bayesian inference in inverse problems }, keywords = { Inverse problems; Bayesian inference; Dimensionality reduction; Polynomial chaos; Markov chain Monte Carlo; Galerkin projection; Gaussian processes; Karhunen--Loève expansion; RKHS }, editor = { }, author = { Y. M. Marzouk and H. N. Najm }, journal = { Journal of Computational Physics }, doi = { 10.1016/j.jcp.2008.11.024 }, year = { 2009 }, volume = { 228 }, number = { 6 }, month = { 0 }, pages = { 1862-1902 }, } -
Y. M. Marzouk, D. Xiu.
A stochastic collocation approach to Bayesian inference in inverse problems
Communications in Computational Physics, 6 (2009), pp. 826-847.
Available Links
Abstract
We present an efficient numerical strategy for the Bayesian solution of inverse problems. Stochastic collocation methods, based on generalized polynomial chaos (gPC), are used to construct a polynomial approximation of the forward solution over the support of the prior distribution. This approximation then defines a surrogate posterior probability density that can be evaluated repeatedly at minimal computational cost. The ability to simulate a large number of samples from the posterior distribution results in very accurate estimates of the inverse solution and its associated uncertainty. Combined with high accuracy of the gPC-based forward solver, the new algorithm can provide great efficiency in practical applications. A rigorous error analysis of the algorithm is conducted, where we establish convergence of the approximate posterior to the true posterior and obtain an estimate of the convergence rate. It is proved that fast (exponential) convergence of the gPC forward solution yields similarly fast (exponential) convergence of the posterior. The numerical strategy and the predicted convergence rates are then demonstrated on nonlinear inverse problems ofvarying smoothness and dimension.
BibTeX Citation
@article{ art_9, title = { A stochastic collocation approach to Bayesian inference in inverse problems }, editor = { }, author = { Y. M. Marzouk and D. Xiu }, journal = { Communications in Computational Physics }, doi = { DOI:prism/16 }, year = { 2009 }, volume = { 6 }, number = { 1 }, month = { 0 }, pages = { 826-847 }, } -
H. N. Najm, B. J. Debusschere, Y. M. Marzouk, S. Widmer, O. LeMaître.
Uncertainty quantification in chemical systems
International Journal for Numerical Methods in Engineering, 80 (2009), pp. 789-814.
Available Links
Abstract
We demonstrate the use of multiwavelet spectral polynomial chaos techniques for uncertainty quantification in non-isothermal ignition of a methane--air system. We employ Bayesian inference for identifying the probabilistic representation of the uncertain parameters and propagate this uncertainty through the ignition process. We analyze the time evolution of moments and probability density functions of the solution. We also examine the role and significance of dependence among the uncertain parameters. We finish with a discussion of the role of non-linearity and the performance of the algorithm.
BibTeX Citation
@article{ art_8, title = { Uncertainty quantification in chemical systems }, keywords = { uncertainty quantification; polynomial chaos; multiwavelets; chemistry; ignition }, editor = { }, author = { H. N. Najm and B. J. Debusschere and Y. M. Marzouk and S. Widmer and O. LeMaître }, journal = { International Journal for Numerical Methods in Engineering }, doi = { 10.1002/nme.2551 }, year = { 2009 }, volume = { 80 }, number = { 6-7 }, month = { 0 }, pages = { 789-814 }, } -
K. Sargsyan, B. J. Debusschere, H. N. Najm, Y. M. Marzouk.
Bayesian inference of spectral expansions for predictability assessment in stochastic reaction networks
Journal of Computational and Theoretical Nanoscience, 6 (2009), pp. 2283-2297.
Available Links
Abstract
Stochastic reaction networks modeled as jump Markov processes serve as the main mathematical representation of biochemical phenomena in cells, particularly when the relevant molecule count is low, causing deterministic macroscale chemical reaction models to fail. Further, as there is mainly empirical knowledge about the rate parameters, parametric uncertainty analysis becomes very important. The conventional predictability tools for deterministic systems do not readily generalize to the stochastic setting. We use spectral polynomial chaos expansions to represent stochastic processes. Bayesian inference techniques with Markov chain Monte Carlo are used to find the best spectral representation of the system state, taking into account not only intrinsic stochastic noise but also parametric uncertainties. A likelihood-based adaptive domain decomposition is introduced and applied, in particular, for the cases when the parameter range includes deterministic bifurcations. We show that the adaptive multidomain polynomial chaos representation captures the correct system behavior for a benchmark bistable Schlögl model for a wide range of parameter variations.
BibTeX Citation
@article{ art_7, title = { Bayesian inference of spectral expansions for predictability assessment in stochastic reaction networks }, keywords = { uncertainty quantification; bayesian inference; polynomial chaos; stochastic reaction networks; domain decomposition; predictability }, editor = { }, author = { K. Sargsyan and B. J. Debusschere and H. N. Najm and Y. M. Marzouk }, journal = { Journal of Computational and Theoretical Nanoscience }, doi = { 10.1166/jctn.2009.1285 }, year = { 2009 }, volume = { 6 }, number = { 10 }, month = { 0 }, pages = { 2283-2297 }, } -
Y. M. Marzouk, H. N. Najm, L. A. Rahn.
Stochastic spectral methods for efficient Bayesian solution of inverse problems
Journal of Computational Physics, 224 (2007), pp. 560-586.
Available Links
Abstract
We present a reformulation of the Bayesian approach to inverse problems, that seeks to accelerate Bayesian inference by using polynomial chaos (PC) expansions to represent random variables. Evaluation of integrals over the unknown parameter space is recast, more efficiently, as Monte Carlo sampling of the random variables underlying the PC expansion. We evaluate the utility of this technique on a transient diffusion problem arising in contaminant source inversion. The accuracy of posterior estimates is examined with respect to the order of the PC representation, the choice of PC basis, and the decomposition of the support of the prior. The computational cost of the new scheme shows significant gains over direct sampling.
BibTeX Citation
@article{ art_6, title = { Stochastic spectral methods for efficient Bayesian solution of inverse problems }, keywords = { Inverse problems; Bayesian inference; Polynomial chaos; Monte Carlo; Markov chain Monte Carlo; Spectral methods; Galerkin projection; Diffusive transport }, editor = { }, author = { Y. M. Marzouk and H. N. Najm and L. A. Rahn }, journal = { Journal of Computational Physics }, doi = { 10.1016/j.jcp.2006.10.010 }, year = { 2007 }, volume = { 224 }, number = { 2 }, month = { 0 }, pages = { 560-586 }, } -
Y. M. Marzouk, A. F. Ghoniem.
Vorticity structure and evolution in a transverse jet
Journal of Fluid Mechanics, 575 (2007), pp. 267-305.
Available Links
Abstract
Transverse jets arise in many applications, including propulsion, effluent dispersion, oil field flows, and V/STOL aerodynamics. This study seeks a fundamental, mechanistic understanding of the structure and evolution of vorticity in the transverse jet. We develop a high-resolution three-dimensional vortex simulation of the transverse jet at large Reynolds number and consider jet-to-crossflow velocity ratios r ranging from 5 to 10. A new formulation of vorticity-flux boundary conditions accounts for the interaction of channel wall vorticity with the jet flow immediately around the orifice. We demonstrate that the nascent jet shear layer contains not only azimuthal vorticity generated in the jet pipe, but wall-normal and azimuthal perturbations resulting from the jet--crossflow interaction. This formulation also yields analytical expressions for vortex lines in the near field as a function of $r$. Transformation of the cylindrical shear layer emanating from the orifice begins with axial elongation of its lee side to form sections of counter-rotating vorticity aligned with the jet trajectory. Periodic roll-up of the shear layer accompanies this deformation, creating complementary vortex arcs on the lee and windward sides of the jet. Counter-rotating vorticity then drives lee-side roll-ups in the windward direction, along the normal to the jet trajectory. Azimuthal vortex arcs of alternating sign thus approach each other on the windward boundary of the jet. Accordingly, initially planar material rings on the shear layer fold completely and assume an interlocking structure that persists for several diameters above the jet exit. Though the near field of the jet is dominated by deformation and periodic roll-up of the shear layer, the resulting counter-rotating vorticity is a pronounced feature of the mean field; in turn, the mean counter-rotation exerts a substantial influence on the deformation of the shear layer. Following the pronounced bending of the trajectory into the crossflow, we observe a sudden breakdown of near-field vortical structures into a dense distribution of smaller scales. Spatial filtering of this region reveals the persistence of counter-rotating streamwise vorticity initiated in the near field.
BibTeX Citation
@article{ art_5, title = { Vorticity structure and evolution in a transverse jet }, editor = { }, author = { Y. M. Marzouk and A. F. Ghoniem }, journal = { Journal of Fluid Mechanics }, doi = { 10.1017/S0022112006004411 }, year = { 2007 }, volume = { 575 }, number = { 1 }, month = { 0 }, pages = { 267-305 }, } -
Y. M. Marzouk, A. F. Ghoniem.
K-means clustering for optimal partitioning and dynamic load balancing of parallel hierarchical N-body simulations
Journal of Computational Physics, 207 (2005), pp. 493-528.
Abstract
A number of complex physical problems can be approached through N-body simulation, from fluid flow at high Reynolds number to gravitational astrophysics and molecular dynamics. In all these applications, direct summation is prohibitively expensive for large N and thus hierarchical methods are employed for fast summation. This work introduces new algorithms, based on k-means clustering, for partitioning parallel hierarchical N-body interactions. We demonstrate that the number of particle--cluster interactions and the order at which they are performed are directly affected by partition geometry. Weighted k-means partitions minimize the sum of clusters’ second moments and create well-localized domains, and thus reduce the computational cost of N-body approximations by enabling the use of lower-order approximations and fewer cells. We also introduce compatible techniques for dynamic load balancing, including adaptive scaling of cluster volumes and adaptive redistribution of cluster centroids. We demonstrate the performance of these algorithms by constructing a parallel treecode for vortex particle simulations, based on the serial variable-order Cartesian code developed by Lindsay and Krasny [Journal of Computational Physics 172 (2) (2001) 879--907]. The method is applied to vortex simulations of a transverse jet. Results show outstanding parallel efficiencies even at high concurrencies, with velocity evaluation errors maintained at or below their serial values; on a realistic distribution of 1.2 million vortex particles, we observe a parallel efficiency of 98% on 1024 processors. Excellent load balance is achieved even in the face of several obstacles, such as an irregular, time-evolving particle distribution containing a range of length scales and the continual introduction of new vortex particles throughout the domain. Moreover, results suggest that k-means yields a more efficient partition of the domain than a global oct-tree.
BibTeX Citation
@article{ art_4, title = { K-means clustering for optimal partitioning and dynamic load balancing of parallel hierarchical N-body simulations }, keywords = { k-means clustering; Treecode; N-body problems; Hierarchical methods; Parallel processing; Load balancing; Particle methods; Vortex methods; Three-dimensional flow; Transverse jet }, editor = { }, author = { Y. M. Marzouk and A. F. Ghoniem }, journal = { Journal of Computational Physics }, doi = { 10.1016/j.jcp.2005.01.021 }, year = { 2005 }, volume = { 207 }, number = { 2 }, month = { 0 }, pages = { 493-528 }, } -
Y. M. Marzouk, A. F. Ghoniem, H. N. Najm.
Toward a flame embedding model for turbulent combustion simulation
AIAA Journal, 41 (2003), pp. 641-652.
Available Links
Abstract
Combustion in turbulent flows may take the form of a thin flame wrapped around vortical structures. For this regime, the flame embedding approach seeks to decouple computations of the outer nonreacting flow and the combustion zone by discretizing the flame surface into a number of elemental flames, each incorporating the local impact of unsteady flow-flame interaction. An unsteady strained laminar flame solver, based on a boundary-layer approximation of combustion in a time-dependent stagnation-point potential flow, is proposed as an elemental flame model. To validate the concept, two-dimensional simulations of premixed flame-vortex interactions are performed for a matrix of vortex strengths and length scales, and a section of the flame is selected for comparison with the flame embedding model results. Results show that using the flame leading-edge strain rate gives reasonable agreement in the cases of low strain rate and weak strain rate gradient within the flame structure. This agreement deteriorates substantially when both are high. We propose two different schemes, both based on averaging the strain rate across the flame structure, and demonstrate that agreement between the one-dimensional model and the two-dimensional simulation greatly improves when the actual strain rate at the reaction zone of the one-dimensional flame is made to match that of the two-dimensional flame.
BibTeX Citation
@article{ art_3, keywords = { Two dimensional model; One dimensional model; Premixed flame; Finite difference method; Numerical simulation; Flame structure; Turbulent flame; Combustion }, title = { Toward a flame embedding model for turbulent combustion simulation }, author = { Y. M. Marzouk and A. F. Ghoniem and H. N. Najm }, journal = { AIAA Journal }, volume = { 41 }, number = { 4 }, pages = { 641-652 }, month = { 0 }, year = { 2003 }, editor = { }, } -
Y. M. Marzouk, A. F. Ghoniem, H. N. Najm.
Dynamic response of strained premixed flames to equivalence ratio gradients
Proceedings of the Combustion Institute, 28 (2000), pp. 1859-1866.
Abstract
Premixed flames encounter gradients of mixture equivalence ratio in stratified charge engines, lean premixed gas-turbine engines, and a variety of other applications. In cases for which the scales—spatial or temporal—of fuel concentration gradients in the reactants are comparable to flame scales, changes in burning rate, flammability limits, and flame structure have been observed. This paper uses an unsteady strained flame in the stagnation point configuration to examine the effect of temporal gradients on combustion in a premixed methane/air mixture. An inexact Newton backtracking method, coupled with a preconditioned Krylov subspace iterative solver, was used to improve the efficiency of the numerical solution and expand its domain of convergence in the presence of detailed chemistry. Results indicate that equivalence ratio variations with timescales lower than 10 ms have significant effects on the burning process, including reaction zone broadening, burning rate enhancement, and extension of the flammability limit toward learner mixtures. While the temperature of a flame processing a stoichiometric-to-lean equivalence ratio gradient decreased slightly within the front side of the reaction zone, radical concentrations remained elevated over the entire flame structure. These characteristics are linked to a feature reminiscent of “back-supported” flames—flames in which a stream of products resulting from burning at higher equivalence ratio is continuously supplied to lower equivalence ratio reactants. The relevant feature is the establishment of a positive temperature gradient on the products side of the flame which maintains the temperature high enough and the radical concentration sufficient to sustain combustion there. Unsteadiness in equivalence ratio produces similar gradients within the flame structure, thus compensating for the change in temperature at the leading edge of the reaction zone and accounting for an observed “flame inertia”. For sufficiently large equivalence ratio gradients, a flame starting in a stoichiometric mixture can burn through a very lean one by taking advantage of this mechanism.
BibTeX Citation
@article{ art_2, title = { Dynamic response of strained premixed flames to equivalence ratio gradients }, editor = { }, author = { Y. M. Marzouk and A. F. Ghoniem and H. N. Najm }, journal = { Proceedings of the Combustion Institute }, doi = { 10.1016/S0082-0784(00)80589-5 }, year = { 2000 }, volume = { 28 }, number = { 2 }, month = { 0 }, pages = { 1859-1866 }, } -
Y. M. Marzouk, D. P. Hart.
Asymmetric autocorrelation function to resolve directional ambiguity in PIV images
Experiments in Fluids, 25 (1998), pp. 401-408.
Available Links
Abstract
Autocorrelation of a double-exposed image, unlike cross-correlation between two images, produces a correlation function that is symmetric about the origin. Thus, while it is possible to calculate the speed and direction of tracer particles in a particle image velocimetry (PIV) image using autocorrelation, it is impossible to tell whether the velocity is in the positive or negative direction. This ambiguity can be resolved by spatially shifting one exposure relative to the next or labeling exposures with color or polarization, but the complexity and limitations of these methods can be prohibitive. It is, however, possible to resolve the sign of the velocity from a triple-exposed image using unequal time intervals between exposures. Triple-exposed images, like double-exposed images, correlate symmetrically about zero. The directional ambiguity, however, can be resolved by calculating the probability that the three exposures occur in a specific temporal order; that is, by assuming that the correlation has a specific sign and testing to see if the assumption is correct. Traditional spectral and statistical correlation techniques are unable to accomplish this. Presented herein is a computationally efficient asymmetric correlation function that is able to differentiate the temporal order of triple exposed images. Included is a discussion of the limitations of this function and of difficulties in experimental implementation.
BibTeX Citation
@article{ art_1, title = { Asymmetric autocorrelation function to resolve directional ambiguity in PIV images }, editor = { }, author = { Y. M. Marzouk and D. P. Hart }, journal = { Experiments in Fluids }, doi = { 10.1007/s003480050247 }, year = { 1998 }, volume = { 25 }, number = { 5-6 }, month = { 0 }, pages = { 401-408 }, }
-
A. Belhadji, D. Sharp, Y. M. Marzouk.
To discretize continually: Mean shift interacting particle systems for Bayesian inference
Preprint, (2026).
Available Links
Abstract
BibTeX Citation
@article{ belhadji-todiscretizecontinually-2026, title = { To discretize continually: Mean shift interacting particle systems for Bayesian inference }, author = { A. Belhadji and D. Sharp and Y. M. Marzouk }, journal = { Preprint }, month = { 5 }, year = { 2026 }, editor = { }, } -
P. Tsimpos, D. Sharp, Y. M. Marzouk.
One-Shot Generative Flows: Existence and Obstructions
Preprint, (2026).
Available Links
Abstract
BibTeX Citation
@article{ tsimpos-oneshotgenerativeflows-2026, title = { One-Shot Generative Flows: Existence and Obstructions }, author = { P. Tsimpos and D. Sharp and Y. M. Marzouk }, journal = { Preprint }, month = { 04 }, year = { 2026 }, editor = { }, } -
P. Tsimpos, E. Calvello, A. Belhadji, N. H. Nelsen.
One Operator for Many Densities: Amortized Approximation of Conditioning by Neural Operators
Preprint, (2026).
Available Links
Abstract
BibTeX Citation
@article{ tsimpos-oneoperator-2026, title = { One Operator for Many Densities: Amortized Approximation of Conditioning by Neural Operators }, author = { P. Tsimpos and E. Calvello and A. Belhadji and N. H. Nelsen }, journal = { Preprint }, month = { 5 }, year = { 2026 }, editor = { }, } -
J. Zou, Y. M. Marzouk.
Data curation for machine learning interatomic potentials by determinantal point processes
Preprint, (2026).
Available Links
Abstract
BibTeX Citation
@article{ zou-datacurationformachine-2026, title = { Data curation for machine learning interatomic potentials by determinantal point processes }, author = { J. Zou and Y. M. Marzouk }, journal = { Preprint }, month = { 03 }, year = { 2026 }, editor = { }, } -
J. Zou, H. C. Lie, Y. M. Marzouk.
Goal-oriented learning of stochastic dynamical systems using error bounds on path-space observables
Preprint, (2026).
Available Links
Abstract
BibTeX Citation
@article{ zou-goalorientedlearning-2026, title = { Goal-oriented learning of stochastic dynamical systems using error bounds on path-space observables }, author = { J. Zou and H. C. Lie and Y. M. Marzouk }, journal = { Preprint }, month = { 03 }, year = { 2026 }, editor = { }, } -
D. Sharp, B. v. B. Waanders, Y. M. Marzouk.
Sampling through iterated approximation: Gradient-free and multi-fidelity Bayesian inference via transport
Preprint, (2026).
Available Links
Abstract
BibTeX Citation
@article{ sharp-samplingthroughiterated-2026, title = { Sampling through iterated approximation: Gradient-free and multi-fidelity Bayesian inference via transport }, author = { D. Sharp and B. v. B. Waanders and Y. M. Marzouk }, journal = { Preprint }, month = { 03 }, year = { 2026 }, editor = { }, } -
Q. Yang, Q. J. Zhu, J. Giezendanner, Y. M. Marzouk, S. Bates, S. Wang.
Conformal Prediction for Generative Models via Adaptive Cluster-Based Density Estimation
Preprint, (2026).
Available Links
Abstract
BibTeX Citation
@article{ yang-conformalpredictionfor-2026, title = { Conformal Prediction for Generative Models via Adaptive Cluster-Based Density Estimation }, author = { Q. Yang and Q. J. Zhu and J. Giezendanner and Y. M. Marzouk and S. Bates and S. Wang }, journal = { Preprint }, month = { 01 }, year = { 2026 }, editor = { }, } -
K. E. Fisher, M. T. C. Li, Y. M. Marzouk, T. Schorlepp.
Precise asymptotic analysis of Sobolev training for random feature models
Preprint, (2025).
Available Links
Abstract
BibTeX Citation
@article{ fisher-preciseasymptoticanalysis-2025, title = { Precise asymptotic analysis of Sobolev training for random feature models }, author = { K. E. Fisher and M. T. C. Li and Y. M. Marzouk and T. Schorlepp }, journal = { Preprint }, month = { 11 }, year = { 2025 }, editor = { }, } -
F. J. Jorgensen, Y. M. Marzouk.
A Bayesian characterization of ensemble Kalman updates
Preprint, (2025).
Available Links
Abstract
BibTeX Citation
@article{ jorgensen-bayesiancharacterization-2025, title = { A Bayesian characterization of ensemble Kalman updates }, author = { F. J. Jorgensen and Y. M. Marzouk }, journal = { Preprint }, month = { 10 }, year = { 2025 }, editor = { }, } -
A. Bryutkin, Y. M. Marzouk.
Neural Triangular Transport Maps: A New Approach Towards Sampling in Lattice QCD
Preprint, (2025).
Available Links
Abstract
BibTeX Citation
@article{ bryutkin-neuraltriangulartransport-2025, title = { Neural Triangular Transport Maps: A New Approach Towards Sampling in Lattice QCD }, author = { A. Bryutkin and Y. M. Marzouk }, journal = { Preprint }, month = { 10 }, year = { 2025 }, editor = { }, } -
J. Glaubitz, Y. M. Marzouk.
Efficient sampling for sparse Bayesian learning using hierarchical prior normalization
Preprint, (2025).
Available Links
Abstract
We introduce an approach for efficient Markov chain Monte Carlo (MCMC) sampling for challenging high-dimensional distributions in sparse Bayesian learning (SBL). The core innovation involves using hierarchical prior-normalizing transport maps (TMs), which are deterministic couplings that transform the sparsity-promoting SBL prior into a standard normal one. We analytically derive these prior-normalizing TMs by leveraging the product-like form of SBL priors and Knothe--Rosenblatt (KR) rearrangements. These transform the complex target posterior into a simpler reference distribution equipped with a standard normal prior that can be sampled more efficiently. Specifically, one can leverage the standard normal prior by using more efficient, structure-exploiting samplers. Our numerical experiments on various inverse problems -- including signal deblurring, inverting the non-linear inviscid Burgers equation, and recovering an impulse image -- demonstrate significant performance improvements for standard MCMC techniques.
BibTeX Citation
@article{ glaubitz-priornormalizedmcmc-2025, doi = { 10.48550/arXiv.2505.23753 }, title = { Efficient sampling for sparse Bayesian learning using hierarchical prior normalization }, author = { J. Glaubitz and Y. M. Marzouk }, journal = { Preprint }, month = { 5 }, year = { 2025 }, editor = { }, } -
G. A. Gottwald, S. Liu, Y. M. Marzouk, S. Reich, X. T. Tong.
Localized Diffusion Models for High Dimensional Distributions Generation
Preprint, (2025).
Available Links
Abstract
Diffusion models are the state-of-the-art tools for various generative tasks. However, estimating high-dimensional score functions makes them potentially suffer from the curse of dimensionality (CoD). This underscores the importance of better understanding and exploiting low-dimensional structure in the target distribution. In this work, we consider locality structure, which describes sparse dependencies between model components. Under locality structure, the score function is effectively low-dimensional, so that it can be estimated by a localized neural network with significantly reduced sample complexity. This motivates the localized diffusion model, where a localized score matching loss is used to train the score function within a localized hypothesis space. We prove that such localization enables diffusion models to circumvent CoD, at the price of additional localization error. Under realistic sample size scaling, we show both theoretically and numerically that a moderate localization radius can balance the statistical and localization error, leading to a better overall performance. The localized structure also facilitates parallel training of diffusion models, making it potentially more efficient for large-scale applications.
BibTeX Citation
@article{ gottwald-localizeddiffusion-2025, doi = { 10.48550/arXiv.2505.04417 }, title = { Localized Diffusion Models for High Dimensional Distributions Generation }, author = { G. A. Gottwald and S. Liu and Y. M. Marzouk and S. Reich and X. T. Tong }, journal = { Preprint }, month = { 5 }, year = { 2025 }, editor = { }, } -
J. Lindbloom, M. Pasha, J. Glaubitz, Y. M. Marzouk.
Priorconditioned Sparsity-Promoting Projection Methods for Deterministic and Bayesian Linear Inverse Problems
Preprint, (2025).
Available Links
Abstract
High-quality reconstructions of signals and images with sharp edges are needed in a wide range of applications. To overcome the large dimensionality of the parameter space and the complexity of the regularization functional, {sparisty-promoting} techniques for both deterministic and hierarchical Bayesian regularization rely on solving a sequence of high-dimensional iteratively reweighted least squares (IRLS) problems on a lower-dimensional subspace. Generalized Krylov subspace (GKS) methods are a particularly potent class of hybrid Krylov schemes that efficiently solve sequences of IRLS problems by projecting large-scale problems into a relatively small subspace and successively enlarging it. We refer to methods that promote sparsity and use GKS as S-GKS. A disadvantage of S-GKS methods is their slow convergence. In this work, we propose techniques that improve the convergence of S-GKS methods by combining them with priorconditioning, which we refer to as PS-GKS. Specifically, integrating the PS-GKS method into the IAS algorithm allows us to automatically select the shape/rate parameter of the involved generalized gamma hyper-prior, which is often fine-tuned otherwise. Furthermore, we proposed and investigated variations of the proposed PS-GKS method, including restarting and recycling (resPS-GKS and recPS-GKS). These respectively leverage restarted and recycled subspaces to overcome situations when memory limitations of storing the basis vectors are a concern. We provide a thorough theoretical analysis showing the benefits of priorconditioning for sparsity-promoting inverse problems. Numerical experiment are used to illustrate that the proposed PS-GKS method and its variants are competitive with or outperform other existing hybrid Krylov methods.
BibTeX Citation
@article{ lindbloom-priorconditionedbayes-2025, doi = { 10.48550/arXiv.2505.01827 }, title = { Priorconditioned Sparsity-Promoting Projection Methods for Deterministic and Bayesian Linear Inverse Problems }, author = { J. Lindbloom and M. Pasha and J. Glaubitz and Y. M. Marzouk }, journal = { Preprint }, month = { 5 }, year = { 2025 }, editor = { }, } -
A. Bryutkin, M. E. Levine, U. Urteaga, Y. M. Marzouk.
Canonical Bayesian Linear System Identification
Preprint, (2025).
Available Links
Abstract
Standard Bayesian approaches for linear time-invariant (LTI) system identification are hindered by parameter non-identifiability; the resulting complex, multi-modal posteriors make inference inefficient and impractical. We solve this problem by embedding canonical forms of LTI systems within the Bayesian framework. We rigorously establish that inference in these minimal parameterizations fully captures all invariant system dynamics (e.g., transfer functions, eigenvalues, predictive distributions of system outputs) while resolving identifiability. This approach unlocks the use of meaningful, structure-aware priors (e.g., enforcing stability via eigenvalues) and ensures conditions for a Bernstein--von Mises theorem -- a link between Bayesian and frequentist large-sample asymptotics that is broken in standard forms. Extensive simulations with modern MCMC methods highlight advantages over standard parameterizations: canonical forms achieve higher computational efficiency, generate interpretable and well-behaved posteriors, and provide robust uncertainty estimates, particularly from limited data.
BibTeX Citation
@article{ bryutkin-canonicalidentification-2025, doi = { 10.48550/arXiv.2507.11535 }, title = { Canonical Bayesian Linear System Identification }, author = { A. Bryutkin and M. E. Levine and U. Urteaga and Y. M. Marzouk }, journal = { Preprint }, month = { 7 }, year = { 2025 }, editor = { }, } -
T. Helin, Y. M. Marzouk, J. R. Rojo-Garcia.
Bayesian optimal experimental design with Wasserstein information criteria
Preprint, (2025).
Available Links
Abstract
Bayesian optimal experimental design (OED) provides a principled framework for selecting the most informative observational settings in experiments. With rapid advances in computational power, Bayesian OED has become increasingly feasible for inference problems involving large-scale simulations, attracting growing interest in fields such as inverse problems. In this paper, we introduce a novel design criterion based on the expected Wasserstein-p distance between the prior and posterior distributions. Especially, for p=2, this criterion shares key parallels with the widely used expected information gain (EIG), which relies on the Kullback--Leibler divergence instead. First, the Wasserstein-2 criterion admits a closed-form solution for Gaussian regression, a property which can be also leveraged for approximative schemes. Second, it can be interpreted as maximizing the information gain measured by the transport cost incurred when updating the prior to the posterior. Our main contribution is a stability analysis of the Wasserstein-1 criterion, where we provide a rigorous error analysis under perturbations of the prior or likelihood. We partially extend this study also to the Wasserstein-2 criterion. In particular, these results yield error rates when empirical approximations of priors are used. Finally, we demonstrate the computability of the Wasserstein-2 criterion and demonstrate our approximation rates through simulations
BibTeX Citation
@article{ helin-wassersteinoed-2025, doi = { 10.48550/arXiv.2504.10092 }, title = { Bayesian optimal experimental design with Wasserstein information criteria }, author = { T. Helin and Y. M. Marzouk and J. R. Rojo-Garcia }, journal = { Preprint }, month = { 4 }, year = { 2025 }, editor = { }, } -
M. Ramgraber, D. Sharp, M. Le Provost, Y. M. Marzouk.
A friendly introduction to triangular transport
Preprint, (2025).
Available Links
Abstract
Decision making under uncertainty is a cross-cutting challenge in science and engineering. Most approaches to this challenge employ probabilistic representations of uncertainty. In complicated systems accessible only via data or black-box models, however, these representations are rarely known. We discuss how to characterize and manipulate such representations using triangular transport maps, which approximate any complex probability distribution as a transformation of a simple, well-understood distribution. The particular structure of triangular transport guarantees many desirable mathematical and computational properties that translate well into solving practical problems. Triangular maps are actively used for density estimation, (conditional) generative modelling, Bayesian inference, data assimilation, optimal experimental design, and related tasks. While there is ample literature on the development and theory of triangular transport methods, this manuscript provides a detailed introduction for scientists interested in employing measure transport without assuming a formal mathematical background. We build intuition for the key foundations of triangular transport, discuss many aspects of its practical implementation, and outline the frontiers of this field.
BibTeX Citation
@article{ ramgraber-introtransport-2025, doi = { 10.48550/arXiv.2503.21673 }, title = { A friendly introduction to triangular transport }, author = { M. Ramgraber and D. Sharp and M. Le Provost and Y. M. Marzouk }, journal = { Preprint }, month = { 3 }, year = { 2025 }, editor = { }, } -
Y. M. Marzouk, Z. Ren, J. Zech.
Distribution learning via neural differential equations: minimal energy regularization and approximation theory
Preprint, (2025).
Available Links
Abstract
Neural ordinary differential equations (ODEs) provide expressive representations of invertible transport maps that can be used to approximate complex probability distributions, e.g., for generative modeling, density estimation, and Bayesian inference. We show that for a large class of transport maps T, there exists a time-dependent ODE velocity field realizing a straight-line interpolation (1−t)x+tT(x), t∈[0,1], of the displacement induced by the map. Moreover, we show that such velocity fields are minimizers of a training objective containing a specific minimum-energy regularization. We then derive explicit upper bounds for the Ck norm of the velocity field that are polynomial in the Ck norm of the corresponding transport map T; in the case of triangular (Knothe--Rosenblatt) maps, we also show that these bounds are polynomial in the Ck norms of the associated source and target densities. Combining these results with stability arguments for distribution approximation via ODEs, we show that Wasserstein or Kullback--Leibler approximation of the target distribution to any desired accuracy ϵ>0 can be achieved by a deep neural network representation of the velocity field whose size is bounded explicitly in terms of ϵ, the dimension, and the smoothness of the source and target densities. The same neural network ansatz yields guarantees on the value of the regularized training objective.
BibTeX Citation
@article{ marzouk-neuralode-2025, doi = { 10.48550/arXiv.2502.03795 }, title = { Distribution learning via neural differential equations: minimal energy regularization and approximation theory }, author = { Y. M. Marzouk and Z. Ren and J. Zech }, journal = { Preprint }, month = { 2 }, year = { 2025 }, editor = { }, } -
K. Fisher, Y. M. Marzouk.
Can Bayesian Neural Networks Make Confident Predictions?
Preprint, (2025).
Available Links
Abstract
Bayesian inference promises a framework for principled uncertainty quantification of neural network predictions. Barriers to adoption include the difficulty of fully characterizing posterior distributions on network parameters and the interpretability of posterior predictive distributions. We demonstrate that under a discretized prior for the inner layer weights, we can exactly characterize the posterior predictive distribution as a Gaussian mixture. This setting allows us to define equivalence classes of network parameter values which produce the same likelihood (training error) and to relate the elements of these classes to the network's scaling regime -- defined via ratios of the training sample size, the size of each layer, and the number of final layer parameters. Of particular interest are distinct parameter realizations that map to low training error and yet correspond to distinct modes in the posterior predictive distribution. We identify settings that exhibit such predictive multimodality, and thus provide insight into the accuracy of unimodal posterior approximations. We also characterize the capacity of a model to "learn from data" by evaluating contraction of the posterior predictive in different scaling regimes.
BibTeX Citation
@article{ fisher-bayesiannets-2025, keywords = { bayesian neural networks,model misspecification,Bayesian inference }, doi = { 10.48550/arXiv.2501.11773 }, title = { Can Bayesian Neural Networks Make Confident Predictions? }, author = { K. Fisher and Y. M. Marzouk }, journal = { Preprint }, month = { 1 }, year = { 2025 }, editor = { }, } -
R. Baptista, A.-A. Pooladian, M. Brennan, Y. M. Marzouk, J. Niles-Weed.
Conditional simulation via entropic optimal transport: Toward non-parametric estimation of conditional Brenier maps
Preprint, (2024).
Available Links
Abstract
Conditional simulation is a fundamental task in statistical modeling: Generate samples from the conditionals given finitely many data points from a joint distribution. One promising approach is to construct conditional Brenier maps, where the components of the map pushforward a reference distribution to conditionals of the target. While many estimators exist, few, if any, come with statistical or algorithmic guarantees. To this end, we propose a non-parametric estimator for conditional Brenier maps based on the computational scalability of \emph{entropic} optimal transport. Our estimator leverages a result of Carlier et al. (2010), which shows that optimal transport maps under a rescaled quadratic cost asymptotically converge to conditional Brenier maps; our estimator is precisely the entropic analogues of these converging maps. We provide heuristic justifications for choosing the scaling parameter in the cost as a function of the number of samples by fully characterizing the Gaussian setting. We conclude by comparing the performance of the estimator to other machine learning and non-parametric approaches on benchmark datasets and Bayesian inference problems.
BibTeX Citation
@article{ baptista-nonparametricbrenier-2024, keywords = { optimal transport,brenier maps,conditional simulation,entropic optimal transport }, doi = { 10.48550/arXiv.2411.07154 }, title = { Conditional simulation via entropic optimal transport: Toward non-parametric estimation of conditional Brenier maps }, author = { R. Baptista and A.-A. Pooladian and M. Brennan and Y. M. Marzouk and J. Niles-Weed }, journal = { Preprint }, month = { 11 }, year = { 2024 }, editor = { }, } -
F. Y. Li, R. Baptista, Y. M. Marzouk.
Expected information gain estimation via density approximations: Sample allocation and dimension reduction
Preprint, (2024).
Available Links
Abstract
Computing expected information gain (EIG) from prior to posterior (equivalently, mutual information between candidate observations and model parameters or other quantities of interest) is a fundamental challenge in Bayesian optimal experimental design. We formulate flexible transport-based schemes for EIG estimation in general nonlinear/non-Gaussian settings, compatible with both standard and implicit Bayesian models. These schemes are representative of two-stage methods for estimating or bounding EIG using marginal and conditional density estimates. In this setting, we analyze the optimal allocation of samples between training (density estimation) and approximation of the outer prior expectation. We show that with this optimal sample allocation, the MSE of the resulting EIG estimator converges more quickly than that of a standard nested Monte Carlo scheme. We then address the estimation of EIG in high dimensions, by deriving gradient-based upper bounds on the mutual information lost by projecting the parameters and/or observations to lower-dimensional subspaces. Minimizing these upper bounds yields projectors and hence low-dimensional EIG approximations that outperform approximations obtained via other linear dimension reduction schemes. Numerical experiments on a PDE-constrained Bayesian inverse problem also illustrate a favorable trade-off between dimension truncation and the modeling of non-Gaussianity, when estimating EIG from finite samples in high dimensions.
BibTeX Citation
@article{ li-eigdimensionreduction-2024, keywords = { expected information gain,optimal experimental design,density estimation,dimension reduction }, doi = { 10.48550/arXiv.2411.08390 }, title = { Expected information gain estimation via density approximations: Sample allocation and dimension reduction }, author = { F. Y. Li and R. Baptista and Y. M. Marzouk }, journal = { Preprint }, month = { 11 }, year = { 2024 }, editor = { }, } -
A. Belhadji, Q. J. Zhu, Y. M. Marzouk.
On the design of scalable, high-precision spherical-radial Fourier features
Preprint, (2024).
Available Links
Abstract
Approximation using Fourier features is a popular technique for scaling kernel methods to large-scale problems, with myriad applications in machine learning and statistics. This method replaces the integral representation of a shift-invariant kernel with a sum using a quadrature rule. The design of the latter is meant to reduce the number of features required for high-precision approximation. Specifically, for the squared exponential kernel, one must design a quadrature rule that approximates the Gaussian measure on ℝd. Previous efforts in this line of research have faced difficulties in higher dimensions. We introduce a new family of quadrature rules that accurately approximate the Gaussian measure in higher dimensions by exploiting its isotropy. These rules are constructed as a tensor product of a radial quadrature rule and a spherical quadrature rule. Compared to previous work, our approach leverages a thorough analysis of the approximation error, which suggests natural choices for both the radial and spherical components. We demonstrate that this family of Fourier features yields improved approximation bounds.
BibTeX Citation
@article{ belhadji-sphericalfourier-2024, doi = { 10.48550/arXiv.2408.13231 }, title = { On the design of scalable, high-precision spherical-radial Fourier features }, author = { A. Belhadji and Q. J. Zhu and Y. M. Marzouk }, journal = { Preprint }, month = { 8 }, year = { 2024 }, editor = { }, } -
M. Le Provost, J. Glaubitz, Y. M. Marzouk.
Preserving linear invariants in ensemble filtering methods
Preprint, (2024).
Available Links
Abstract
Formulating dynamical models for physical phenomena is essential for understanding the interplay between the different mechanisms and predicting the evolution of physical states. However, a dynamical model alone is often insufficient to address these fundamental tasks, as it suffers from model errors and uncertainties. One common remedy is to rely on data assimilation, where the state estimate is updated with observations of the true system. Ensemble filters sequentially assimilate observations by updating a set of samples over time. They operate in two steps: a forecast step that propagates each sample through the dynamical model and an analysis step that updates the samples with incoming observations. For accurate and robust predictions of dynamical systems, discrete solutions must preserve their critical invariants. While modern numerical solvers satisfy these invariants, existing invariant-preserving analysis steps are limited to Gaussian settings and are often not compatible with classical regularization techniques of ensemble filters, e.g., inflation and covariance tapering. The present work focuses on preserving linear invariants, such as mass, stoichiometric balance of chemical species, and electrical charges. Using tools from measure transport theory (Spantini et al., 2022, SIAM Review), we introduce a generic class of nonlinear ensemble filters that automatically preserve desired linear invariants in non-Gaussian filtering problems. By specializing this framework to the Gaussian setting, we recover a constrained formulation of the Kalman filter. Then, we show how to combine existing regularization techniques for the ensemble Kalman filter (Evensen, 1994, J. Geophys. Res.) with the preservation of the linear invariants. Finally, we assess the benefits of preserving linear invariants for the ensemble Kalman filter and nonlinear ensemble filters.
BibTeX Citation
@article{ leprovost-invariantfiltering-2024, keywords = { linear invariants · measure transport · nonlinear filtering · ensemble Kalman filter }, doi = { 10.48550/arXiv.2404.14328 }, title = { Preserving linear invariants in ensemble filtering methods }, author = { M. Le Provost and J. Glaubitz and Y. M. Marzouk }, journal = { Preprint }, month = { 4 }, year = { 2024 }, editor = { }, } -
F. Y. Li, A. Belhadji, Y. M. Marzouk.
Nonlinear Bayesian optimal experimental design using logarithmic Sobolev inequalities
Preprint, (2024).
Available Links
Abstract
We study the problem of selecting experiments from a larger candidate pool, where the goal is to maximize mutual information (MI) between the selected subset and the underlying parameters. Finding the exact solution is to this combinatorial optimization problem is computationally costly, not only due to the complexity of the combinatorial search but also the difficulty of evaluating MI in nonlinear/non-Gaussian settings. We propose greedy approaches based on new computationally inexpensive lower bounds for MI, constructed via log-Sobolev inequalities. We demonstrate that our method outperforms random selection strategies, Gaussian approximations, and nested Monte Carlo (NMC) estimators of MI in various settings, including optimal design for nonlinear models with non-additive noise.
BibTeX Citation
@article{ li-sobolevoed-2024, keywords = { optimal experimental design;log-sobolev }, doi = { 10.48550/arXiv.2402.15053 }, title = { Nonlinear Bayesian optimal experimental design using logarithmic Sobolev inequalities }, author = { F. Y. Li and A. Belhadji and Y. M. Marzouk }, journal = { Preprint }, month = { 2 }, year = { 2024 }, editor = { }, } -
X. Zhang, J. Blanchet, Y. M. Marzouk, V. A. Nguyen, S. Wang.
Wasserstein-based Minimax Estimation of Dependence in Multivariate Regularly Varying Extremes
Preprint, (2023).
Available Links
Abstract
We study minimax risk bounds for estimators of the spectral measure in multivariate linear factor models, where observations are linear combinations of regularly varying latent factors. Non-asymptotic convergence rates are derived for the multivariate Peak-over-Threshold estimator in terms of the p-th order Wasserstein distance, and information-theoretic lower bounds for the minimax risks are established. The convergence rate of the estimator is shown to be minimax optimal under a class of Pareto-type models analogous to the standard class used in the setting of one-dimensional observations known as the Hall-Welsh class. When the estimator is minimax inefficient, a novel two-step estimator is introduced and demonstrated to attain the minimax lower bound. Our analysis bridges the gaps in understanding trade-offs between estimation bias and variance in multivariate extreme value theory.
BibTeX Citation
@article{ zhang-wassersteinminimax-2023, keywords = { optimal transport;minimax estimation }, doi = { 10.48550/arXiv.2312.09862 }, title = { Wasserstein-based Minimax Estimation of Dependence in Multivariate Regularly Varying Extremes }, author = { X. Zhang and J. Blanchet and Y. M. Marzouk and V. A. Nguyen and S. Wang }, journal = { Preprint }, month = { 0 }, year = { 2023 }, editor = { }, } -
M. Le Provost, R. Baptista, J. D. Eldredge, Y. M. Marzouk.
An adaptive ensemble filter for heavy-tailed distributions: tuning-free inflation and localization
Preprint, (2023).
Available Links
Abstract
Heavy tails is a common feature of filtering distributions that results from the nonlinear dynamical and observation processes as well as the uncertainty from physical sensors. In these settings, the Kalman filter and its ensemble version — the ensemble Kalman filter (EnKF) — that have been designed under Gaussian assumptions result in degraded performance. t–distributions are a parametric family of distributions whose tail-heaviness is modulated by a degree of freedom ν. Interestingly, Cauchy and Gaussian distributions correspond to the extreme cases of a t–distribution for ν = 1 and ν = ∞, respectively. Leveraging tools from measure transport (Spantini et al., SIAM Review, 2022), we present a generalization of the EnKF whose prior-to-posterior update leads to exact inference for t–distributions. We demonstrate that this filter is less sensitive to outlying synthetic observations generated by the observation model for small ν. Moreover, it recovers the Kalman filter for ν = ∞. For nonlinear state-space models with heavy-tailed noise, we propose an algorithm to estimate the prior-to-posterior update from samples of joint forecast distribution of the states and observations. We rely on a regularized expectation-maximization (EM) algorithm to estimate the mean, scale matrix, and degree of freedom of heavy-tailed t–distributions from limited samples (Finegold and Drton, arXiv preprint, 2014). Leveraging the conditional independence of the joint forecast distribution, we regularize the scale matrix with an l1 sparsity-promoting penalization of the log-likelihood at each iteration of the EM algorithm. This l1 regularization draws upon the graphical lasso algorithm (Friedman et al., Biostatistics, 2008) to estimate sparse covariance matrix in the Gaussian setting. By sequentially estimating the degree of freedom at each analysis step, our filter has the appealing feature of adapting the prior-to-posterior update to the tail-heaviness of the data. This new filter intrinsically embeds an adaptive and data-dependent multiplicative inflation mechanism complemented with an adaptive localization through the l1-penalization of the estimated scale matrix. We demonstrate the benefits of this new ensemble filter on challenging filtering problems with heavy-tailed noise.
BibTeX Citation
@article{ le-provost-heavytailedfilter-2023, keywords = { ensemble Kalman filter;t–distribution;transport maps }, doi = { 10.48550/arXiv.2310.08741 }, title = { An adaptive ensemble filter for heavy-tailed distributions: tuning-free inflation and localization }, author = { M. Le Provost and R. Baptista and J. D. Eldredge and Y. M. Marzouk }, journal = { Preprint }, month = { 0 }, year = { 2023 }, editor = { }, } -
P.-B. Rubio, Y. M. Marzouk, M. Parno.
A transport approach to sequential simulation-based inference
Preprint, (2023).
Available Links
Abstract
We present a new transport-based approach to efficiently perform sequential Bayesian inference of static model parameters. The strategy is based on the extraction of conditional distribution from the joint distribution of parameters and data, via the estimation of structured (e.g., block triangular) transport maps. This gives explicit surrogate models for the likelihood functions and their gradients. This allow gradient-based characterizations of posterior density via transport maps in a model-free, online phase. This framework is well suited for parameter estimation in case of complex noise models including nuisance parameters and when the forward model is only known as a black box. The numerical application of this method is performed in the context of characterization of ice thickness with conductivity measurements.
BibTeX Citation
@article{ rubio-sequentialtransport-2023, keywords = { transport, ssm, inference }, doi = { https://doi.org/10.48550/arXiv.2308.13940 }, title = { A transport approach to sequential simulation-based inference }, author = { P.-B. Rubio and Y. M. Marzouk and M. Parno }, journal = { Preprint }, month = { 8 }, year = { 2023 }, editor = { }, } -
B. J. Zhang, Y. M. Marzouk, K. Spiliopoulos.
Transport map unadjusted Langevin algorithms
Preprint, (2023).
Available Links
Abstract
Langevin dynamics are widely used in sampling high-dimensional, non-Gaussian distributions whose densities are known up to a normalizing constant. In particular, there is strong interest in unadjusted Langevin algorithms (ULA), which directly discretize Langevin dynamics to estimate expectations over the target distribution. We study the use of transport maps that approximately normalize a target distribution as a way to precondition and accelerate the convergence of Langevin dynamics. We show that in continuous time, when a transport map is applied to Langevin dynamics, the result is a Riemannian manifold Langevin dynamics (RMLD) with metric defined by the transport map. This connection suggests more systematic ways of learning metrics, and also yields alternative discretizations of the RMLD described by the map, which we study. Moreover, we show that under certain conditions, when the transport map is used in conjunction with ULA, we can improve the geometric rate of convergence of the output process in the 2--Wasserstein distance. Illustrative numerical results complement our theoretical claims.
BibTeX Citation
@article{ zhang-transportula-2023, keywords = { angevin dynamics, transport maps, Bayesian inference, Markov chain Monte Carlo }, doi = { 10.48550/arXiv.2302.07227 }, title = { Transport map unadjusted Langevin algorithms }, author = { B. J. Zhang and Y. M. Marzouk and K. Spiliopoulos }, journal = { Preprint }, month = { 2 }, year = { 2023 }, editor = { }, } -
Available Links
Abstract
We study the convergence properties, in Hellinger and related distances, of nonparametric density estimators based on measure transport. These estimators represent the measure of interest as the pushforward of a chosen reference distribution under a transport map, where the map is chosen via a maximum likelihood objective (equivalently, minimizing an empirical Kullback-Leibler loss) or a penalized version thereof. We establish concentration inequalities for a general class of penalized measure transport estimators, by combining techniques from M-estimation with analytical properties of the transport-based density representation. We then demonstrate the implications of our theory for the case of triangular Knothe-Rosenblatt (KR) transports on the $d$-dimensional unit cube, and show that both penalized and unpenalized versions of such estimators achieve minimax optimal convergence rates over Hölder classes of densities. Specifically, we establish optimal rates for unpenalized nonparametric maximum likelihood estimation over bounded Hölder-type balls, and then for certain Sobolev-penalized estimators and sieved wavelet estimators.
BibTeX Citation
@article{ wang-tde-2022, title = { On minimax density estimation via measure transport }, author = { S. Wang and Y. M. Marzouk }, journal = { Preprint }, month = { 0 }, year = { 2022 }, editor = { }, } -
X. Zhang, J. Blanchet, Y. M. Marzouk, V. A. Nguyen, S. Wang.
Distributionally robust Gaussian process regression and Bayesian inverse problems
Preprint, (2022).
Available Links
Abstract
We study a distributionally robust optimization formulation (i.e., a min-max game) for two representative problems in Bayesian nonparametric estimation: Gaussian process regression and, more generally, linear inverse problems. Our formulation seeks the best mean-squared error predictor, in an infinite-dimensional space, against an adversary who chooses the worst-case model in a Wasserstein ball around a nominal infinite-dimensional Bayesian model. The transport cost is chosen to control features such as the degree of roughness of the sample paths that the adversary is allowed to inject. We show that the game has a well-defined value (i.e., strong duality holds in the sense that max-min equals min-max) and that there exists a unique Nash equilibrium which can be computed by a sequence of finite-dimensional approximations. Crucially, the worst-case distribution is itself Gaussian. We explore properties of the Nash equilibrium and the effects of hyperparameters through a set of numerical experiments, demonstrating the versatility of our modeling framework.
BibTeX Citation
@article{ zhang-dro-2022, title = { Distributionally robust Gaussian process regression and Bayesian inverse problems }, author = { X. Zhang and J. Blanchet and Y. M. Marzouk and V. A. Nguyen and S. Wang }, journal = { Preprint }, month = { 0 }, year = { 2022 }, editor = { }, } -
R. Baptista, Y. M. Marzouk, O. Zahm.
Gradient-based data and parameter dimension reduction for Bayesian models: an information theoretic perspective
Preprint, (2022).
Available Links
Abstract
We consider the problem of reducing the dimensions of parameters and data in non-Gaussian Bayesian inference problems. Our goal is to identify an "informed" subspace of the parameters and an "informative" subspace of the data so that a high-dimensional inference problem can be approximately reformulated in low-to-moderate dimensions, thereby improving the computational efficiency of many inference techniques. To do so, we exploit gradient evaluations of the log-likelihood function. Furthermore, we use an information-theoretic analysis to derive a bound on the posterior error due to parameter and data dimension reduction. This bound relies on logarithmic Sobolev inequalities, and it reveals the appropriate dimensions of the reduced variables. We compare our method with classical dimension reduction techniques, such as principal component analysis and canonical correlation analysis, on applications ranging from mechanics to image processing.
BibTeX Citation
@article{ baptista-dimred-2022, keywords = { Bayesian inference, gradient-based dimension reduction, logarithmic Sobolev inequalities, conditional mutual information, low-dimensional subspaces, coordinate selection }, title = { Gradient-based data and parameter dimension reduction for Bayesian models: an information theoretic perspective }, author = { R. Baptista and Y. M. Marzouk and O. Zahm }, journal = { Preprint }, month = { 0 }, year = { 2022 }, editor = { }, } -
B. J. Zhang, T. Sahai, Y. M. Marzouk.
Computing eigenfunctions of the multidimensional Ornstein-Uhlenbeck operator
Preprint, (2021).
Available Links
Abstract
We discuss approaches to computing eigenfunctions of the Ornstein--Uhlenbeck (OU) operator in more than two dimensions. While the spectrum of the OU operator and theoretical properties of its eigenfunctions have been well characterized in previous research, the practical computation of general eigenfunctions has not been resolved. We review special cases for which the eigenfunctions can be expressed exactly in terms of commonly used orthogonal polynomials. Then we present a tractable approach for computing the eigenfunctions in general cases and comment on its dimension dependence.
BibTeX Citation
@article{ zhang-eigenfunctions-2021, doi = { 10.48550/arXiv.2110.09229 }, title = { Computing eigenfunctions of the multidimensional Ornstein-Uhlenbeck operator }, author = { B. J. Zhang and T. Sahai and Y. M. Marzouk }, journal = { Preprint }, month = { 0 }, year = { 2021 }, editor = { }, } -
A. Scarinci, M. Fehler, Y. M. Marzouk.
Bayesian inference under model misspecification using transport-Lagrangian distances: an application to seismic inversion
Preprint, (2021).
Available Links
Abstract
Model misspecification constitutes a major obstacle to reliable inference in many inverse problems. Inverse problems in seismology, for example, are particularly affected by misspecification of wave propagation velocities. In this paper, we focus on a specific seismic inverse problem---full-waveform moment tensor inversion - and develop a Bayesian framework that seeks robustness to velocity misspecification. A novel element of our framework is the use of transport-Lagrangian (TL) distances between observed and model predicted waveforms to specify a loss function, and the use of this loss to define a generalized belief update via a Gibbs posterior. The TL distance naturally disregards certain features of the data that are more sensitive to model misspecification, and therefore produces less biased or dispersed posterior distributions in this setting. To make the latter notion precise, we use several diagnostics to assess the quality of inference and uncertainty quantification, i.e., continuous rank probability scores and rank histograms. We interpret these diagnostics in the Bayesian setting and compare the results to those obtained using more typical Gaussian noise models and squared-error loss, under various scenarios of misspecification. Finally, we discuss potential generalizability of the proposed framework to a broader class of inverse problems affected by model misspecification.
BibTeX Citation
@article{ scarinci-tl-2021, title = { Bayesian inference under model misspecification using transport-Lagrangian distances: an application to seismic inversion }, author = { A. Scarinci and M. Fehler and Y. M. Marzouk }, journal = { Preprint }, month = { 0 }, year = { 2021 }, editor = { }, } -
C. Feng, Y. M. Marzouk.
A layered multiple importance sampling scheme for focused optimal Bayesian experimental design
Preprint, (2019).
Available Links
Abstract
We develop a new computational approach for "focused" optimal Bayesian experimental design with nonlinear models, with the goal of maximizing expected information gain in targeted subsets of model parameters. Our approach considers uncertainty in the full set of model parameters, but employs a design objective that can exploit learning trade-offs among different parameter subsets. We introduce a new layered multiple importance sampling scheme that provides consistent estimates of expected information gain in this focused setting. This sampling scheme yields significant reductions in estimator bias and variance for a given computational effort, making optimal design more tractable for a wide range of computationally intensive problems.
BibTeX Citation
@article{ art_77, title = { A layered multiple importance sampling scheme for focused optimal Bayesian experimental design }, keywords = { optimal experimental design, Bayesian inference, Monte Carlo methods, multiple importance sampling, expected information gain, mutual information }, editor = { }, author = { C. Feng and Y. M. Marzouk }, journal = { Preprint }, year = { 2019 }, arxiv = { 1903.11187 }, month = { 0 }, } -
F. Augustin, Y. M. Marzouk.
A trust region method for derivative-free nonlinear constrained stochastic optimization
Preprint, (2017).
Available Links
Abstract
We present the algorithm SNOWPAC for derivative-free constrained stochastic optimization. The algorithm builds on a model-based approach for deterministic nonlinear constrained derivative-free optimization that introduces an ``inner boundary path'' to locally convexify the feasible domain and ensure feasible trial steps. We extend this deterministic method via a generalized trust region approach that accounts for noisy evaluations of the objective and constraints. To reduce the impact of noise, we fit consistent Gaussian processes to past objective and constraint evaluations. Our approach incorporates a wide variety of probabilistic risk or deviation measures in both the objective and the constraints. Numerical benchmarking demonstrates SNOWPAC's efficiency and highlights the accuracy of the optimization solutions found.
BibTeX Citation
@article{ art_55, title = { A trust region method for derivative-free nonlinear constrained stochastic optimization }, editor = { }, author = { F. Augustin and Y. M. Marzouk }, journal = { Preprint }, year = { 2017 }, arxiv = { 1703.04156 }, month = { 0 }, } -
X. Huan, Y. M. Marzouk.
Sequential Bayesian optimal experimental design via approximate dynamic programming
Preprint, (2016).
Available Links
Abstract
The design of multiple experiments is commonly undertaken via suboptimal strategies, such as batch (open-loop) design that omits feedback or greedy (myopic) design that does not account for future effects. This paper introduces new strategies for the optimal design of sequential experiments. First, we rigorously formulate the general sequential optimal experimental design (sOED) problem as a dynamic program. Batch and greedy designs are shown to result from special cases of this formulation. We then focus on sOED for parameter inference, adopting a Bayesian formulation with an information theoretic design objective. To make the problem tractable, we develop new numerical approaches for nonlinear design with continuous parameter, design, and observation spaces. We approximate the optimal policy by using backward induction with regression to construct and refine value function approximations in the dynamic program. The proposed algorithm iteratively generates trajectories via exploration and exploitation to improve approximation accuracy in frequently visited regions of the state space. Numerical results are verified against analytical solutions in a linear-Gaussian setting. Advantages over batch and greedy design are then demonstrated on a nonlinear source inversion problem where we seek an optimal policy for sequential sensing.
BibTeX Citation
@article{ huan-sequential-2016, keywords = { sequential experimental design, Bayesian experimental design, approximate dynamic programming, feedback control policy, lookahead, approximate value iteration, information gain }, title = { Sequential Bayesian optimal experimental design via approximate dynamic programming }, author = { X. Huan and Y. M. Marzouk }, journal = { Preprint }, month = { 0 }, year = { 2016 }, editor = { }, } -
N. Lowry, R. Mangoubi, M. Desai, Y. M. Marzouk, P. Sammak.
Bayesian level sets for image segmentation
Preprint, (2015).
Abstract
This paper presents a new algorithm for image segmentation and classification, Bayesian Level Sets (BLS). BLS harnesses the advantages of two well-known algorithms: variational level sets and finite mixture model EM (FMM-EM). Like FMM-EM, BLS has a simple, probabilistic implementation which natively extends to an arbitrary number of classes. Via a level set-inspired geometric prior, BLS returns smooth, regular segmenting contours that are robust to noise. In practice, BLS is also observed to be robust to fairly lenient initial conditions. A comparative analysis of the three algorithms (BLS, level set, FMM-EM) is presented, and the advantages of BLS are quantitatively demonstrated on realistic applications such as pluripotent stem cell colonies, brain MRI phantoms, and stem cell nuclei.
BibTeX Citation
@article{ art_39, title = { Bayesian level sets for image segmentation }, editor = { }, author = { N. Lowry and R. Mangoubi and M. Desai and Y. M. Marzouk and P. Sammak }, journal = { Preprint }, year = { 2015 }, month = { 0 }, } -
F. Augustin, Y. M. Marzouk.
NOWPAC: A provably convergent derivative-free nonlinear optimizer with path-augmented constraints
Preprint, (2014).
Available Links
Abstract
This paper proposes the algorithm NOWPAC (Nonlinear Optimization With Path-Augmented Constraints) for nonlinear constrained derivative-free optimization. The algorithm uses a trust region framework based on fully linear models for the objective function and the constraints. A new constraint-handling scheme based on an inner boundary path allows for the computation of feasible trial steps using models for the constraints. We prove that the iterates computed by NOWPAC converge to a local first order critical point. We also discuss the convergence of NOWPAC in situations where evaluations of the objective function or the constraints are inexact, e.g., corrupted by numerical errors. For this, we determine a rate of decay that the magnitude of these numerical errors must satisfy, while approaching the critical point, to guarantee convergence. In settings where adjusting the accuracy of the objective or constraint evaluations is not possible, as is often the case in practical applications, we introduce an error indicator to detect these regimes and prevent deterioration of the optimization results.
BibTeX Citation
@article{ augustin-nowpac-2014, title = { NOWPAC: A provably convergent derivative-free nonlinear optimizer with path-augmented constraints }, author = { F. Augustin and Y. M. Marzouk }, journal = { Preprint }, month = { 0 }, year = { 2014 }, editor = { }, }
-
A. Belhadji, D. Sharp, Y. M. Marzouk.
Weighted quantization using MMD: From mean field to mean shift via gradient flows
In 29th International Conference on Artificial Intelligence and Statistics (2026).
Abstract
Approximating a probability distribution using a set of particles is a fundamental problem in machine learning and statistics, with applications including clustering and quantization. Formally, we seek a weighted mixture of Dirac measures that best approximates the target distribution. While much existing work relies on the Wasserstein distance to quantify approximation errors, maximum mean discrepancy (MMD) has received comparatively less attention, especially when allowing for variable particle weights. We argue that a Wasserstein-Fisher-Rao gradient flow is well-suited for designing quantizations optimal under MMD. We show that a system of interacting particles satisfying a set of ODEs discretizes this flow. We further derive a new fixed-point algorithm called mean shift interacting particles (MSIP). We show that MSIP extends the classical mean shift algorithm, widely used for identifying modes in kernel density estimators. Moreover, we show that MSIP can be interpreted as preconditioned gradient descent and that it acts as a relaxation of Lloyd's algorithm for clustering. Our unification of gradient flows, mean shift, and MMD-optimal quantization yields algorithms that are more robust than state-of-the-art methods, as demonstrated via high-dimensional and multi-modal numerical experiments.
BibTeX Citation
@inproceedings{ belhadji-weightedquant-2026, title = { Weighted quantization using MMD: From mean field to mean shift via gradient flows }, author = { A. Belhadji and D. Sharp and Y. M. Marzouk }, journal = { Preprint }, month = { 1 }, year = { 2026 }, booktitle = { 29th International Conference on Artificial Intelligence and Statistics }, editor = { }, } -
L. Aolaritei, Z. O. Wang, J. Zhu, M. I. Jordan, Y. M. Marzouk.
Conformal Prediction under Lévy-Prokhorov Distribution Shifts: Robustness to Local and Global Perturbations
In Advances in Neural Information Processing Systems 38 (2025), pp. 63811--63840.
Abstract
Conformal prediction provides a powerful framework for constructing prediction intervals with finite-sample guarantees, yet its robustness under distribution shifts remains a significant challenge. This paper addresses this limitation by modeling distribution shifts using Levy-Prokhorov (LP) ambiguity sets, which capture both local and global perturbations. We provide a self-contained overview of LP ambiguity sets and their connections to popular metrics such as Wasserstein and Total Variation. We show that the link between conformal prediction and LP ambiguity sets is a natural one: by propagating the LP ambiguity set through the scoring function, we reduce complex high-dimensional distribution shifts to manageable one-dimensional distribution shifts, enabling exact quantification of worst-case quantiles and coverage. Building on this analysis, we construct robust conformal prediction intervals that remain valid under distribution shifts, explicitly linking LP parameters to interval width and confidence levels. Experimental results on real-world datasets demonstrate the effectiveness of the proposed approach.
BibTeX Citation
@inproceedings{ aolaritei-conformal-2025, title = { Conformal Prediction under Lévy-Prokhorov Distribution Shifts: Robustness to Local and Global Perturbations }, author = { L. Aolaritei and Z. O. Wang and J. Zhu and M. I. Jordan and Y. M. Marzouk }, journal = { Preprint }, volume = { 38 }, pages = { 63811--63840 }, month = { 12 }, year = { 2025 }, booktitle = { Advances in Neural Information Processing Systems }, publisher = { Curran Associates, Inc. }, editor = { D. Belgrave and C. Zhang and H. Lin and R. Pascanu and P. Koniusz and M. Ghassemi and N. Chen }, } -
P. Tsimpos, Y. M. Marzouk.
An Eulerian Perspective on Straight-Line Sampling
In 2nd edition of Frontiers in Probabilistic Inference: Learning meets Sampling (2025).
Available Links
Abstract
BibTeX Citation
@inproceedings{ tsimpos-eulerianperspectivestraight-2025, title = { An Eulerian Perspective on Straight-Line Sampling }, author = { P. Tsimpos and Y. M. Marzouk }, month = { 11 }, year = { 2025 }, booktitle = { 2nd edition of Frontiers in Probabilistic Inference: Learning meets Sampling }, editor = { }, } -
A. Maurais, B. Hosseini, Y. M. Marzouk.
Learning Paths for Dynamic Measure Transport: A Control Perspective
In 2nd edition of Frontiers in Probabilistic Inference: Learning meets Sampling (2025).
Available Links
Abstract
BibTeX Citation
@inproceedings{ maurais-learningpathsfordynamic-2025, title = { Learning Paths for Dynamic Measure Transport: A Control Perspective }, author = { A. Maurais and B. Hosseini and Y. M. Marzouk }, month = { 11 }, year = { 2025 }, booktitle = { 2nd edition of Frontiers in Probabilistic Inference: Learning meets Sampling }, editor = { }, } -
P. Tsimpos, Z. Ren, J. Zech, Y. M. Marzouk.
Optimal Scheduling of Dynamic Transport
In Proceedings of Thirty Eighth Conference on Learning Theory 291 (2025), pp. 5441-5505.
Abstract
Flow-based methods for sampling and generative modeling use continuous-time dynamical systems to represent a {transport map} that pushes forward a source measure to a target measure. The introduction of a time axis provides considerable design freedom, and a central question is how to exploit this freedom. Though many popular methods seek straight line (i.e., zero acceleration) trajectories, we show here that a specific class of ``curved'' trajectories can significantly improve approximation and learning. In particular, we consider the unit-time interpolation of any given transport map T and seek the schedule τ:[0,1]→[0,1] that minimizes the spatial Lipschitz constant of the corresponding velocity field over all times t∈[0,1]. This quantity is crucial as it allows for control of the approximation error when the velocity field is learned from data. We show that, for a broad class of source/target measures and transport maps T, the \emph{optimal schedule} can be computed in closed form, and that the resulting optimal Lipschitz constant is \emph{exponentially smaller} than that induced by an identity schedule (corresponding to, for instance, the Wasserstein geodesic). Our proof technique relies on the calculus of variations and Γ-convergence, allowing us to approximate the aforementioned degenerate objective by a family of smooth, tractable problems.
BibTeX Citation
@inproceedings{ tsimpos-optimalscheduling-2025, doi = { 10.48550/arXiv.2504.14425 }, title = { Optimal Scheduling of Dynamic Transport }, author = { P. Tsimpos and Z. Ren and J. Zech and Y. M. Marzouk }, journal = { Preprint }, volume = { 291 }, pages = { 5441-5505 }, month = { 6 }, year = { 2025 }, booktitle = { Proceedings of Thirty Eighth Conference on Learning Theory }, series = { Proceedings of Machine Learning Research }, publisher = { PMLR }, editor = { N. Haghtalab and A. Moitra }, } -
S. Liaw, R. Morrison, Y. M. Marzouk, R. Baptista.
Learning Local Neighborhoods of Non-Gaussian Graphical Models
In Proceedings of the AAAI Conference on Artificial Intelligence 39 (2025), pp. 18711--18718.
Available Links
Abstract
Identifying the Markov properties or conditional independencies of a collection of random variables is a fundamental task in statistics for modeling and inference. Existing approaches often learn the structure of a probabilistic graph, which encodes these dependencies, by assuming that the variables follow a distribution with a simple parametric form. Moreover, the computational cost of many algorithms scales poorly for high-dimensional distributions, as they need to estimate all the edges in the graph simultaneously. In this work, we propose a scalable algorithm to infer the conditional independence relationships of each variable by exploiting the local Markov property. The proposed method, named Localized Sparsity Identification for Non-Gaussian Distributions (L-SING), estimates the graph by using flexible classes of transport maps to represent the conditional distribution for each variable. We show that L-SING includes existing approaches, such as neighborhood selection with Lasso, as a special case. We demonstrate the effectiveness of our algorithm in both Gaussian and non-Gaussian settings by comparing it to existing methods. Lastly, we show the scalability of the proposed approach by applying it to high-dimensional non-Gaussian examples, including a biological dataset with more than 150 variables.
BibTeX Citation
@inproceedings{ liaw-localneighborhood-2025, doi = { 10.1609/aaai.v39i18.34059 }, title = { Learning Local Neighborhoods of Non-Gaussian Graphical Models }, author = { S. Liaw and R. Morrison and Y. M. Marzouk and R. Baptista }, volume = { 39 }, number = { 18 }, pages = { 18711--18718 }, month = { 0 }, year = { 2025 }, booktitle = { Proceedings of the AAAI Conference on Artificial Intelligence }, editor = { }, } -
A. Maurais, Y. M. Marzouk.
Sampling in Unit Time with Kernel Fisher-Rao Flow
In International Conference on Machine Learning (ICML) 235 (2024), pp. 35138--35162.
Available Links
Abstract
We introduce a new mean-field ODE and corresponding interacting particle systems (IPS) for sampling from an unnormalized target density. The IPS are gradient-free, available in closed form, and only require the ability to sample from a reference density and compute the (unnormalized) target-to-reference density ratio. The mean-field ODE is obtained by solving a Poisson equation for a velocity field that transports samples along the geometric mixture of the two densities, which is the path of a particular Fisher-Rao gradient flow. We employ a RKHS ansatz for the velocity field, which makes the Poisson equation tractable and enables discretization of the resulting mean-field ODE over finite samples. The mean-field ODE can be additionally be derived from a discrete-time perspective as the limit of successive linearizations of the Monge-Ampère equations within a framework known as sample-driven optimal transport. We introduce a stochastic variant of our approach and demonstrate empirically that our IPS can produce high-quality samples from varied target distributions, outperforming comparable gradient-free particle systems and competitive with gradient-based alternatives.
BibTeX Citation
@inproceedings{ maurais-kfrflow-2024, keywords = { gradient flow;generative modelling;sampling }, title = { Sampling in Unit Time with Kernel Fisher-Rao Flow }, author = { A. Maurais and Y. M. Marzouk }, journal = { Preprint }, volume = { 235 }, pages = { 35138--35162 }, month = { 7 }, year = { 2024 }, booktitle = { International Conference on Machine Learning (ICML) }, editor = { R Salakhutdinov and Z Kolter and K Heller and A Weller and N Oliver and J Scarlett and F Berkenkamp }, } -
N. Chandramoorthy, F. Schaefer, Y. M. Marzouk.
A score-based operator Newton method for measure transport
In Proceedings of The 27th International Conference on Artificial Intelligence and Statistics 238 (2024), pp. 3349--3357.
Abstract
Transportation of probability measures underlies many core tasks in statistics and machine learning, from variational inference to generative modeling. A typical goal is to represent a target probability measure of interest as the push-forward of a tractable source measure through a learned map. We present a new construction of such a transport map, given the ability to evaluate the score of the target distribution. Specifically, we characterize the map as a zero of an infinite-dimensional score-residual operator and derive a Newton-type method for iteratively constructing such a zero. We prove convergence of these iterations by invoking classical elliptic regularity theory for partial differential equations (PDE) and show that this construction enjoys rapid convergence, under smoothness assumptions on the target score. A key element of our approach is a generalization of the elementary Newton method to infinite-dimensional operators, other forms of which have appeared in nonlinear PDE and in dynamical systems. Our Newton construction, while developed in a functional setting, also suggests new iterative algorithms for approximating transport maps.
BibTeX Citation
@inproceedings{ chandramoorthy-newtonmethodtransport-2024, keywords = { Transportation of measure, score-based modeling, optimal transport, Newton method, KAM iteration, elliptic PDE. }, doi = { 10.48550/arXiv.2305.09792 }, title = { A score-based operator Newton method for measure transport }, author = { N. Chandramoorthy and F. Schaefer and Y. M. Marzouk }, journal = { Artificial Intelligence and Statistics (AISTATS) }, volume = { 238 }, pages = { 3349--3357 }, month = { 5 }, year = { 2024 }, booktitle = { Proceedings of The 27th International Conference on Artificial Intelligence and Statistics }, organization = { PMLR }, editor = { }, } -
A. Maurais, Y. M. Marzouk.
Adaptive Algorithms for Continuous-Time Transport: Homotopy-Driven Sampling and a New Interacting Particle System
In NeurIPS 2023 Workshop on Optimal Transport and Machine Learning (2023).
Available Links
Abstract
We propose a new dynamic algorithm which transports samples from a reference distribution to a target distribution in unit time, given access to the target-to-reference density ratio. Our approach is to seek a sequence of transport maps that push forward the reference along a path given by a geometric mixture of the two densities. We take the maps to be simply parameterized, local, sample-driven optimal transport maps which we identify by approximately solving a root-finding problem formulated using importance weights. When feature functions for the maps are taken to be kernels, we obtain a novel interacting particle system from which we derive finite-particle and mean-field ODEs. In discrete time, we introduce an adaptive algorithm for simulating this interacting particle system which adjusts the ODE time steps based on the quality of the transport, automatically uncovering a good "schedule" for traversing the geometric mixture of densities.
BibTeX Citation
@inproceedings{ maurais-dynamictransport-2023, keywords = { optimal transport, homotopy, interacting particle system, mean field ODE, adaptivity }, title = { Adaptive Algorithms for Continuous-Time Transport: Homotopy-Driven Sampling and a New Interacting Particle System }, author = { A. Maurais and Y. M. Marzouk }, month = { 12 }, year = { 2023 }, booktitle = { NeurIPS 2023 Workshop on Optimal Transport and Machine Learning }, editor = { }, } -
A. Maurais, T. Alsup, B. Peherstorfer, Y. M. Marzouk.
Multi-Fidelity Covariance Estimation in the Log-Euclidean Geometry
In Proceedings of the 40th International Conference on Machine Learning 202 (2023), pp. 24214--24235.
Available Links
Abstract
We introduce a multi-fidelity estimator of covariance matrices that employs the log-Euclidean geometry of the symmetric positive-definite manifold. The estimator fuses samples from a hierarchy of data sources of differing fidelities and costs for variance reduction while guaranteeing definiteness, in contrast with previous approaches. The new estimator makes covariance estimation tractable in applications where simulation or data collection is expensive; to that end, we develop an optimal sample allocation scheme that minimizes the mean-squared error of the estimator given a fixed budget. Guaranteed definiteness is crucial to metric learning, data assimilation, and other downstream tasks. Evaluations of our approach using data from physical applications (heat conduction, fluid dynamics) demonstrate more accurate metric learning and speedups of more than one order of magnitude compared to benchmarks.
BibTeX Citation
@inproceedings{ maurais-covlogeuclidean-2023, keywords = { multifidelity, covariance }, title = { Multi-Fidelity Covariance Estimation in the Log-Euclidean Geometry }, author = { A. Maurais and T. Alsup and B. Peherstorfer and Y. M. Marzouk }, volume = { 202 }, pages = { 24214--24235 }, month = { 7 }, year = { 2023 }, booktitle = { Proceedings of the 40th International Conference on Machine Learning }, series = { Proceedings of Machine Learning Research }, publisher = { PMLR }, editor = { Krause, Andrea and Brunskill, Emma and Cho, Kyunghyun and Engelhardt, Barbara and Sabato, Sivan and Scarlett, Jonathan }, } -
M. Brennan, R. Baptista, Y. M. Marzouk.
Dimension reduction via score ratio matching
In NeurIPS 2022 Workshop on Score-Based Methods (2022).
Abstract
We propose a method to detect a low-dimensional subspace where a non-Gaussian target distribution departs from a known reference distribution (e.g., a standard Gaussian). We identify this subspace from gradients of the log-ratio between the target and reference densities, which we call the score ratio. Given only samples from the target distribution, we estimate these gradients via score ratio matching, with a tailored parameterization and a regularization method that expose the low-dimensional structure we seek. We show that our approach outperforms standard score matching for dimension reduction of in-class distributions, and that several benchmark UCI datasets in fact exhibit this type of low dimensionality.
BibTeX Citation
@inproceedings{ brennan-dimension-2022, title = { Dimension reduction via score ratio matching }, author = { M. Brennan and R. Baptista and Y. M. Marzouk }, month = { 0 }, year = { 2022 }, booktitle = { NeurIPS 2022 Workshop on Score-Based Methods }, editor = { }, } -
A. Tewari, B. Wheelock, J. Clark, D. Foster, M. T. C. Li, Y. M. Marzouk.
Subsurface uncertainty quantification with deep geologic priors: A variational Bayesian framework
In Second International Meeting for Applied Geoscience & Energy (2022).
Available Links
Abstract
BibTeX Citation
@inproceedings{ tewari-deeppriors-2022, doi = { 10.1190/image2022-3750132.1 }, title = { Subsurface uncertainty quantification with deep geologic priors: A variational Bayesian framework }, author = { A. Tewari and B. Wheelock and J. Clark and D. Foster and M. T. C. Li and Y. M. Marzouk }, month = { 0 }, year = { 2022 }, booktitle = { Second International Meeting for Applied Geoscience & Energy }, editor = { }, } -
B. J. Zhang, T. Sahai, Y. M. Marzouk.
Sampling via controlled stochastic dynamical systems
In NeurIPS 2021 workshop: I (Still) Can't Believe It's Not Better! (2021).
Available Links
Abstract
BibTeX Citation
@inproceedings{ zhang-sampling-2021, title = { Sampling via controlled stochastic dynamical systems }, author = { B. J. Zhang and T. Sahai and Y. M. Marzouk }, month = { 0 }, year = { 2021 }, booktitle = { NeurIPS 2021 workshop: I (Still) Can't Believe It's Not Better! }, editor = { }, } -
A. Scarinci, Y. M. Marzouk, C. Gu, M. Fehler, U. bin Waheed, S. Kaka, B. Dia.
Transport Lagrangian misfit measures and velocity model uncertainty in Bayesian moment tensor inversion
In First International Meeting for Applied Geoscience & Energy Expanded Abstracts (2021).
Available Links
Abstract
BibTeX Citation
@inproceedings{ scarinci-tlseg-2021, doi = { 10.1190/segam2021-3594806.1 }, title = { Transport Lagrangian misfit measures and velocity model uncertainty in Bayesian moment tensor inversion }, author = { A. Scarinci and Y. M. Marzouk and C. Gu and M. Fehler and U. bin Waheed and S. Kaka and B. Dia }, month = { 0 }, year = { 2021 }, booktitle = { First International Meeting for Applied Geoscience & Energy Expanded Abstracts }, editor = { }, } -
M. Le Provost, R. Baptista, Y. M. Marzouk, J. Eldredge.
A low-rank nonlinear ensemble filter for vortex models of aerodynamic flows
In AIAA Scitech 2021 Forum (2021).
Available Links
Abstract
Robustly estimating the separated flow about an airfoil is critical in the design of any closed-loop controller. Darakananda et al. (Phys. Rev. Fluids, 2018) successfully used an ensemble Kalman filter (EnKF) to sequentially estimate the flow using an inviscid vortex model and distributed surface pressure readings. To tackle challenging inference problems with limited observations, classical localization schemes suppress correlations at long distances. However, these techniques would be harmful in our case due to the existence of physical long-range interactions between vortices and pressure readings. Instead, these interactions are best described as interactions between clusters of variables. This work proposes a systematic procedure to identify these clusters of variables from a nonlinear observation model. By projecting the states and observations onto these new sets of variables, the inference is performed in a low-dimensional subspace of the state and the observations. To perform consistent inference with the nonlinear model, we use the stochastic map filter (SMF): a natural generalization of the EnKF that relies on interpretable nonlinear prior-to-posterior transformations (Spantini et al., arXiv, 2019). We combine the identification of these clusters of variables with the SMF to derive a low-rank nonlinear ensemble filter. This filter is assessed on the response of a translating plate at 20 degrees that undergoes strong and overlapping pulses applied near the leading-edge. Our framework outperforms the EnKF at estimating the surface pressure distribution along the entire plate, with only two pressure sensors (placed at the edges of the plate) for collecting measurements.
BibTeX Citation
@inproceedings{ leprovost-lr-2021, keywords = { inviscid vortex model, disturbed separated flow, data assimilation, nonlinear ensemble filter, measure transport, low-rank projections }, doi = { 10.2514/6.2021-1937 }, title = { A low-rank nonlinear ensemble filter for vortex models of aerodynamic flows }, author = { M. Le Provost and R. Baptista and Y. M. Marzouk and J. Eldredge }, month = { 0 }, year = { 2021 }, booktitle = { AIAA Scitech 2021 Forum }, editor = { }, } -
J. Eldredge, M. Le Provost, R. Baptista, Y. M. Marzouk.
Applications of ensemble Kalman filtered vortex modeling to gust--wing interactions
In AIAA Scitech 2021 Forum (2021).
Available Links
Abstract
Lightweight aerial vehicles can be strongly affected by environmental disturbances (gusts). It is important to have tools for estimating their aerodynamic response to such gusts and to other disturbances, such as agile maneuvers and flow actuators. In recent work, a framework has been developed for predicting the state of disturbed aerodynamic flows, without prior knowledge of the disturbance, by assimilating surface pressure measurements into an ensemble of aggregated vortex models. In this framework, a randomized ensemble of inexpensive models are advanced in each step to forecast the flow state, and then corrected in a linear analysis step by assimilating new measurements. Further advances to this Ensemble Kalman filter framework have been made in a recent paper (Le Provost and Eldredge 2020). In the present work, we demonstrate the use of the improved framework on encounters of a two-dimensional flat plate with gusts of various amplitudes. In particular, we explore the dependence of the aerodynamic response on the gust strength and configuration, and show that the estimation framework works well even for very large amplitude disturbances.
BibTeX Citation
@inproceedings{ eldredge-gust-2021, doi = { 0.2514/6.2021-1936 }, title = { Applications of ensemble Kalman filtered vortex modeling to gust--wing interactions }, author = { J. Eldredge and M. Le Provost and R. Baptista and Y. M. Marzouk }, month = { 0 }, year = { 2021 }, booktitle = { AIAA Scitech 2021 Forum }, editor = { }, } -
M. Izzatullah, R. Baptista, L. Mackey, Y. M. Marzouk, D. Peter.
Bayesian seismic inversion: measuring Langevin MCMC sample quality with kernels
In SEG Technical Program Expanded Abstracts (2020).
Available Links
Abstract
The Bayesian framework is commonly used to quantify uncertainty in seismic inversion. To perform Bayesian inference, Markov chain Monte Carlo (MCMC) algorithms are regarded as the gold standard technique for sampling from the posterior probability distribution. Consistent MCMC methods have trouble for complex, high-dimensional models, and most methods scale poorly to large datasets, such as those arising in seismic inversion. As an alternative, approximate MCMC methods based on unadjusted Langevin dynamics offer scalability and more rapid sampling at the cost of biased inference. However, when assessing the quality of approximate MCMC samples for characterizing the posterior distribution, most diagnostics fail to account for these biases. In this work, we introduce the kernel Stein discrepancy (KSD) as a diagnostic tool to determine the convergence of MCMC samples for Bayesian seismic inversion. We demonstrate the use of the KSD for measuring sample quality and selecting the optimal Langevin MCMC algorithm for two Gaussian Bayesian inference problems.
BibTeX Citation
@inproceedings{ izzatullah-langevin-2020, doi = { 10.1190/segam2020-3422419.1 }, title = { Bayesian seismic inversion: measuring Langevin MCMC sample quality with kernels }, author = { M. Izzatullah and R. Baptista and L. Mackey and Y. M. Marzouk and D. Peter }, month = { 0 }, year = { 2020 }, booktitle = { SEG Technical Program Expanded Abstracts }, editor = { }, } -
F. Menhorn, G. Geraci, D. Seidl,, M. S. Eldred, R. King, H.-J. Bungartz, Y. M. Marzouk.
Higher moment multilevel estimators for optimization under uncertainty applied to wind plant design
In AIAA Scitech 2020 Forum (2020).
Available Links
Abstract
The design of wind power plants is a complex engineering task that requires accurate numerical simulations that include both computational fluid and structural dynamics. The accurate prediction of the global performance of the plant is therefore based on the possibility to capture the evolving dynamics for several fluid scales, from local eddies affecting the loading on the turbine blades up to large eddies that form the interacting wakes downstream with respect to each rotor. The presence of these complex flow structures and their interactions requires the use of high-fidelity tools and high resolution grids. At the same time the operative conditions of the plant are intrinsically stochastic in their nature and uncertainty quantification techniques are needed to both characterize the sources of uncertainty and propagate them through the numerical codes. The high computational burden of this task is very often prohibitive for a single plant configuration and it is exacerbated in the case of a design process in which an uncertainty quantification propagation is required for each design iteration. In this work we present and discuss the integration of multilevel uncertainty quantification and design strategies that have the potential to drastically reduce the overall cost of an optimization under uncertainty study. The main objective of the multilevel strategy is to combine computational tools with different accuracy and computational cost such that information from a hierarchy of resolutions can be efficiently fused to decrease the overall cost without compromising the overall accuracy. We focus on algorithmic advancements for both the forward uncertainty quantification step and the optimization step as well. We present multilevel estimators for higher moments, particularly the variance which is employed in robust optimization problems and show the advantage compared to standard multilevel estimators. We use simple model problems to describe the different algorithmic components and their features and performance. As a final demonstration we consider a wind plant design problem involving two turbines for which we combine fluid dynamics tools with different numerical accuracy based on Reynolds Averaged Navier-Stokes equations. Finally, we compare the performance of the newly developed multilevel strategies with their single fidelity counterpart.
BibTeX Citation
@inproceedings{ menhorn-moments-2020, doi = { https://doi.org/10.2514/6.2020-1952 }, title = { Higher moment multilevel estimators for optimization under uncertainty applied to wind plant design }, author = { F. Menhorn and G. Geraci and D. Seidl, and M. S. Eldred and R. King and H.-J. Bungartz and Y. M. Marzouk }, month = { 0 }, year = { 2020 }, booktitle = { AIAA Scitech 2020 Forum }, editor = { }, } -
G. Geraci, F. Menhorn, X. Huan, C. Safta, Y. M. Marzouk, H. Najm, M. Eldred.
Progress in scramjet design optimization under uncertainty using simulations of the HIFiRE Direct Connect Rig
In AIAA Scitech 2019 Forum (2019).
Available Links
Abstract
BibTeX Citation
@inproceedings{ geraci-hifire-2019, doi = { 10.2514/6.2019-0725 }, title = { Progress in scramjet design optimization under uncertainty using simulations of the HIFiRE Direct Connect Rig }, author = { G. Geraci and F. Menhorn and X. Huan and C. Safta and Y. M. Marzouk and H. Najm and M. Eldred }, month = { 0 }, year = { 2019 }, booktitle = { AIAA Scitech 2019 Forum }, editor = { }, } -
B. J. Zhang, B.-Y. Min, T. Sahai, Y. M. Marzouk.
Rare event simulation of rotorcraft systems
(2018).
Available Links
Abstract
We demonstrate an algorithm for efficient rare event sampling in a rotorcraft model. Helicopter design parameters are typically chosen for efficient performance in cruise and hover. At the same time, structural components such as the length of the tail are chosen so that the rotorcraft is stable under perturbation by environmental factors such as noisy wind. In the face of stochastic forcing, however, environmental conditions may still lead to rare accidents despite good engineering design. We adapt a recent dynamic importance sampling algorithm for small-noise diffusions, derived from the theory of large deviations, to efficient sampling of rare events in a model rotorcraft system. The method achieves variance reduction in estimating the probabilities of stall events, and helps identify the dynamics leading to these phenomena.
BibTeX Citation
@inproceedings{ conf_32, title = { Rare event simulation of rotorcraft systems }, editor = { }, author = { B. J. Zhang and B.-Y. Min and T. Sahai and Y. M. Marzouk }, address = { 2018 AIAA SciTech Forum and 20th AIAA Non-Deterministic Approaches Conference }, year = { 2018 }, publication = { AIAA-2018-1181 }, } -
A. Gorodetsky, S. Karaman, Y. M. Marzouk.
Low-rank tensor integration for Gaussian filtering of continuous time nonlinear systems
(2017).
Available Links
Abstract
Integration-based Gaussian filters such as unscented, cubature, and Gauss-Hermite filters are effective ways to assimilate data and models within nonlinear systems. Traditionally, these filters have only been applicable for systems with a handful of states due to stability and scalability issues. In this paper, we present a new integration method for scaling quadrature-based filters to higher dimensions. Our approach begins by decomposing the dynamics and observation models into separated, low-rank tensor formats. Once in low-rank tensor format, adaptive integration techniques may be used to efficiently propagate the mean and covariance of the distribution of the system state with computational complexity that is polynomial in dimension and rank. Simulation results are shown on nonlinear chaotic systems with 20 state variables.
BibTeX Citation
@inproceedings{ conf_30, title = { Low-rank tensor integration for Gaussian filtering of continuous time nonlinear systems }, editor = { }, author = { A. Gorodetsky and S. Karaman and Y. M. Marzouk }, address = { 56th IEEE Conference on Decision and Control (CDC) }, year = { 2017 }, } -
P.-Y. Gousenbourger, E. M. Massart, A. Musolas, P.-A. Absil, J. M. Hendrickx, L. Jacques, Y. M. Marzouk.
Piecewise-Bézier C^1 smoothing on manifolds with application to wind field estimation
(2017).
Abstract
We propose an algorithm for fitting $C^1$ piecewise-Bézier curves to (possibly corrupted) data points on manifolds. The curve is chosen as a compromise between proximity to data points and regularity. We apply our algorithm as an example to fit a curve to a set of low-rank covariance matrices, a task arising in wind field modeling. We show that our algorithm has denoising abilities for this application.
BibTeX Citation
@inproceedings{ conf_29, title = { Piecewise-Bézier C^1 smoothing on manifolds with application to wind field estimation }, editor = { }, author = { P.-Y. Gousenbourger and E. M. Massart and A. Musolas and P.-A. Absil and J. M. Hendrickx and L. Jacques and Y. M. Marzouk }, address = { 25th European Symposium on Artificial Neural Networks, Computational Intelligence, and Machine Learning (ESANN 2017) }, year = { 2017 }, publication = { ISBN 978-287587039-1 }, } -
R. Baptista, Y. M. Marzouk, K. Willcox, B. Peherstorfer.
Optimal approximations of coupling in multi-disciplinary models
(2017).
Available Links
Abstract
Design of complex engineering systems requires coupled analyses of the multiple disciplines affecting system performance. The coupling among disciplines typically contributes significantly to the computational cost of analyzing the system, and can become particularly burdensome when coupled analyses are embedded within a design or optimization loop. In many cases, disciplines may be weakly coupled, so that some of the coupling or interaction terms can be neglected without significantly impacting the accuracy of the system output. However, typical practice derives such approximations in an ad hoc manner using expert opinion and domain experience. This paper proposes a new approach that formulates an optimization problem to find an optimal model that balances accuracy of the model outputs with the sparsity of the discipline couplings. An adaptive sequential Monte Carlo sampling technique is used to efficiently search the combinatorial model space of different discipline couplings. Finally, an algorithm for optimal model selection is presented and applied to identify the important discipline couplings in a fire detection satellite model and a turbine engine cycle analysis model.
BibTeX Citation
@inproceedings{ conf_28, title = { Optimal approximations of coupling in multi-disciplinary models }, editor = { }, author = { R. Baptista and Y. M. Marzouk and K. Willcox and B. Peherstorfer }, address = { 2017 AIAA SciTech Forum and 19th AIAA Non-Deterministic Approaches Conference }, year = { 2017 }, publication = { AIAA 2017-1935 }, } -
D. Bigoni, A. Spantini, Y. M. Marzouk.
Adaptive construction of measure transports for Bayesian inference
(2016).
Available Links
Abstract
Measure transport provides a useful tool for characterizing multivariate non-Gaussian target distributions arising in Bayesian inference. The transport approach seeks a parametric map that pushes forward a chosen reference distribution to the target/posterior distribution, through minimization of a certain Kullback--Leibler divergence. Among the distinguishing features of this approach is the availability of a tractable error estimator for posterior approximation, along with the idea that transport can be cast as an infinite dimensional optimization problem whose variations can be evaluated in closed form. We use these ingredients to develop a method for adaptively constructing transport maps---balancing the complexity of the map representation, approximation error, and computational cost.
BibTeX Citation
@inproceedings{ conf_27, title = { Adaptive construction of measure transports for Bayesian inference }, author = { D. Bigoni and A. Spantini and Y. M. Marzouk }, month = { 0 }, year = { 2016 }, address = { NIPS 2016 workshop on Advances in Approximate Bayesian Inference }, editor = { }, } -
A. Spantini, D. Bigoni, Y. M. Marzouk.
Variational inference via decomposable transports: new algorithms for Bayesian filtering and smoothing
(2016).
Abstract
We describe a variational inference method that approximates an intractable target measure as the pushforward of a tractable distribution (e.g., a Gaussian) through a transport map. We then show how such transport maps can be decomposed---i.e., factorized into the composition of finitely many low-dimensional maps. We use the notion of decomposable transports to derive new deterministic online algorithms for Bayesian filtering and smoothing in nonlinear/non-Gaussian state-space models with static parameters, and illustrate the theory on a stochastic volatility model.
BibTeX Citation
@inproceedings{ conf_26, title = { Variational inference via decomposable transports: new algorithms for Bayesian filtering and smoothing }, author = { A. Spantini and D. Bigoni and Y. M. Marzouk }, month = { 0 }, year = { 2016 }, address = { NIPS 2016 workshop on Advances in Approximate Bayesian Inference }, editor = { }, } -
J. Alora, A. Gorodetsky, S. Karaman, Y. M. Marzouk, N. Lowry.
Automated synthesis of low-rank control systems from sc-LTL specifications using tensor-train decompositions
(2016).
Available Links
Abstract
Correct-by-design automated construction of control systems has attracted a tremendous amount of attention. However, most existing algorithms for automated construction suffer from the curse of dimensionality, i.e., their run time scales exponentially with increasing dimensionality of the state space. As a result, typically, systems with only a few degrees of freedom are considered. In this paper, we propose a novel algorithm based on the tensor-train decomposition that solves stochastic optimal control problems with syntactically co-safe linear temporal logic specifications. We show that, under certain conditions, the run time of the proposed algorithm scales polynomially with the dimensionality of the state space and the rank of the optimal cost-to-go function. We demonstrate the algorithm in a six-dimensional problem instance involving a simple airplane model. In this example, the proposed algorithm provides up to four orders of computational savings when compared to the standard value iteration algorithm.
BibTeX Citation
@inproceedings{ conf_25, title = { Automated synthesis of low-rank control systems from sc-LTL specifications using tensor-train decompositions }, editor = { }, author = { J. Alora and A. Gorodetsky and S. Karaman and Y. M. Marzouk and N. Lowry }, address = { 55th IEEE Conference on Decision and Control (CDC) }, year = { 2016 }, } -
A. Marques, Q. Wang, Y. M. Marzouk.
Data-driven probabilistic boundary layer modeling for airfoil performance prediction
(2016).
Available Links
Abstract
Many simulation tools for airfoil analysis and design are based on an integral approximation of the boundary layer. This approximate formulation cannot resolve the full dynamics of boundary layer flows, and hence requires additional models to account for unresolved effects. We introduce a new, data-driven, probabilistic model of these unresolved effects for the incompressible and laminar regime. To construct this model, we apply methods from supervised learning to a dataset containing over 1,550 airfoils. The result is a model that (i) is based on a large dataset of realistic airfoil configurations, and (ii) quantifies the model inadequacy associated with the use of an approximate boundary layer formulation. We also create a stochastic version of the airfoil design tool XFOIL by replacing its original boundary layer model with the probabilistic model developed here. We apply this stochastic version of XFOIL to compute the drag polars of two airfoils at low Reynolds numbers, and compare the results with experimental data.
BibTeX Citation
@inproceedings{ conf_24, title = { Data-driven probabilistic boundary layer modeling for airfoil performance prediction }, editor = { }, author = { A. Marques and Q. Wang and Y. M. Marzouk }, address = { 34th AIAA Applied Aerodynamics Conference, AIAA AVIATION Forum }, year = { 2016 }, publication = { AIAA-2016-3864 }, } -
A. Gorodetsky, S. Karaman, Y. M. Marzouk.
Efficient high-dimensional stochastic optimal motion control using tensor-train decomposition
(2015).
Available Links
Abstract
Stochastic optimal control problems frequently arise as motion control problems in the context of robotics. Unfortunately, all existing approaches that guarantee arbitrary precision suffer from the curse of dimensionality: the computational effort invested by the algorithm grows exponentially fast with increasing dimensionality of the state space of the underlying dynamic system governing the robot. In this paper, we propose a novel algorithm that utilizes compressed representations to efficiently solve stochastic optimal control problems with arbitrary precision. The running time of the new algorithms scale linearly with increasing dimensionality of the state space! The running time also depends polynomially on the rank of the value function, a measure that quantifies the intrinsic geometric complexity of the value function, due to the geometry and physics embedded in the problem instance at hand. The new algorithms are based on the recent analysis and algorithms for tensor decomposition, generalizing matrix decomposition algorithms, e.g., the singular value decomposition, to three or more dimensions. In computational experiments, we show the computational effort of the new algorithm also scales linearly with the discretization resolution of the state space. We also demonstrate the new algorithm on a problem involving the perching of an aircraft, represented by a nonlinear non-holonomic longitudinal model with a seven-dimensional state space, the full numerical solution to which was not obtained before. In this example, we estimate that the proposed algorithm runs more than seven orders of magnitude faster than the naive value iteration.
BibTeX Citation
@inproceedings{ conf_23, title = { Efficient high-dimensional stochastic optimal motion control using tensor-train decomposition }, editor = { }, author = { A. Gorodetsky and S. Karaman and Y. M. Marzouk }, address = { Robotics: Science and Systems (RSS) XI. Rome, Italy. }, year = { 2015 }, } -
M. E. Gharamti, Y. M. Marzouk, X. Huan, I. Hoteit.
A greedy approach for placement of subsurface aquifer wells in an ensemble filtering framework
(2015).
Available Links
Abstract
Optimizing wells placement may help in better understanding subsurface solute transport and detecting contaminant plumes. In this work, we use the ensemble Kalman filter (EnKF) as a data assimilation tool and propose a greedy observational design algorithm to optimally select aquifer wells locations for updating the prior contaminant ensemble. The algorithm is greedy in the sense that it operates sequentially, without taking into account expected future gains. The selection criteria is based on maximizing the information gain that the EnKF carries during the update of the prior uncertainties. We test the efficiency of this algorithm in a synthetic aquifer system where a contaminant plume is set to migrate over a 30 years period across a heterogenous domain.
BibTeX Citation
@inproceedings{ conf_22, title = { A greedy approach for placement of subsurface aquifer wells in an ensemble filtering framework }, editor = { }, author = { M. E. Gharamti and Y. M. Marzouk and X. Huan and I. Hoteit }, address = { Dynamic Data-Driven Environmental Systems Science (DyDESS), Cambridge, MA. }, year = { 2015 }, publication = { Springer, Lecture Notes in Computer Science, volume 8964 }, } -
Available Links
Abstract
BibTeX Citation
@inproceedings{ conf_21, title = { Uncertainty quantification in high-performance computing }, editor = { }, author = { F. Augustin and Y. M. Marzouk }, address = { First SIGPLAN Workshop on Probabilistic and Approximate Computing (Collocated with PLDI 2014). Edinburgh, UK. }, year = { 2014 }, } -
N. Lowry, R. Mangoubi, M. Desai, Y. M. Marzouk, P. Sammak.
Texton-based segmentation and classification of human embryonic stem cell colonies using multi-stage Bayesian level sets
(2012).
Available Links
Abstract
We present a texton-based, multi-stage Bayesian level set algorithm which we use to segment colony images of hESC and their derivatives. We extend our previous research segmenting stem cells according to multiresolution texture methods to accommodate colonies and tissues with diffuse and varied textures via a filter bank approach similar to the MR8. Texture features computed for test images are classified via comparison with learned sets of class-specific textural primitives, known as textons. Encompassing this texture model is the new Bayesian level set algorithm, which smoothes and regularizes classification similar to level sets but is simpler in its probabilistic implementation. The resulting algorithm accurately and automatically classifies images of pluripotent hESC and trophectoderm colonies for high-content screening applications.
BibTeX Citation
@inproceedings{ conf_20, title = { Texton-based segmentation and classification of human embryonic stem cell colonies using multi-stage Bayesian level sets }, editor = { }, author = { N. Lowry and R. Mangoubi and M. Desai and Y. M. Marzouk and P. Sammak }, address = { 9th IEEE International Symposium on Biomedical Imaging (ISBI). Barcelona, Spain. }, year = { 2012 }, } -
T. Coles, H. Najm, Y. M. Marzouk.
CSP simplification of chemical kinetic systems under uncertainty
(2011).
Available Links
Abstract
Chemical kinetic systems contain both considerable uncertainty in their rate parameters and dynamics at multiple time scales. The latter feature aids model reduction in the deterministic case, but model reduction under uncertainty raises new challenges. We use computational singular perturbation (CSP) to calculate probabilistic “importance indices” for species-reaction pairs. Distributions of these indices are used to form reduced models that yield predictions within probabilistic bounds determined by the full model, or, alternatively, preserve entire output distributions of the full model.
BibTeX Citation
@inproceedings{ conf_19, title = { CSP simplification of chemical kinetic systems under uncertainty }, editor = { }, author = { T. Coles and H. Najm and Y. M. Marzouk }, address = { International Workshop on Model Reduction in Reacting Flow/13th International Conference on Numerical Combustion. Corfu, Greece }, year = { 2011 }, } -
N. Lowry, R. Mangoubi, M. Desai, Y. M. Marzouk, P. Sammak.
A unified approach to expectation maximization and level set segmentation applied to stem cell and brain MRI images
(2011).
Abstract
BibTeX Citation
@inproceedings{ conf_18, title = { A unified approach to expectation maximization and level set segmentation applied to stem cell and brain MRI images }, editor = { }, author = { N. Lowry and R. Mangoubi and M. Desai and Y. M. Marzouk and P. Sammak }, address = { IEEE International Symposium on Biomedical Imaging. Chicago, IL }, year = { 2011 }, } -
Available Links
Abstract
BibTeX Citation
@inproceedings{ conf_17, title = { Optimal Bayesian experimental design for combustion kinetics. }, editor = { }, author = { X. Huan and Y. M. Marzouk }, address = { 49th AIAA Aerospace Sciences Meeting. Orlando, FL }, year = { 2011 }, publication = { AIAA paper 2011--0513. }, } -
B. Debusschere, B. Rhoads, H. Najm, Y. M. Marzouk, D. Goussis, M. Valorani, M. Frenklach.
Time integration of chemical kinetics with computational singular perturbation and tabulation
(2009).
Abstract
BibTeX Citation
@inproceedings{ conf_16, title = { Time integration of chemical kinetics with computational singular perturbation and tabulation }, editor = { }, author = { B. Debusschere and B. Rhoads and H. Najm and Y. M. Marzouk and D. Goussis and M. Valorani and M. Frenklach }, address = { Western States Section of the Combustion Institute, Fall Meeting, Irvine, CA }, year = { 2009 }, } -
Y. M. Marzouk, B. Debusschere, H. Najm, D. Goussis, M. Valorani, M. Frenklach.
Time integration of reacting flows with CSP tabulation
(2009).
Abstract
BibTeX Citation
@inproceedings{ conf_15, title = { Time integration of reacting flows with CSP tabulation }, editor = { }, author = { Y. M. Marzouk and B. Debusschere and H. Najm and D. Goussis and M. Valorani and M. Frenklach }, address = { Second International Workshop on Model Reduction in Reacting Flows, Notre Dame, IN }, year = { 2009 }, } -
H. N. Najm, C. Safta, Y. M. Marzouk, J. Ray, M. Valorani, D. Goussis.
High-order AMR computations of reacting flow with adaptive reduction of chemical stiffness
(2008).
Available Links
Abstract
No abstract available.
BibTeX Citation
@inproceedings{ conf_14, title = { High-order AMR computations of reacting flow with adaptive reduction of chemical stiffness }, editor = { }, author = { H. N. Najm and C. Safta and Y. M. Marzouk and J. Ray and M. Valorani and D. Goussis }, address = { SciDAC 2008, Seattle, WA. }, year = { 2008 }, publication = { Journal of Physics: Conference Series 125 }, } -
J. Ray, Y. M. Marzouk.
A Bayesian method for inferring transmission chains in a partially observed epidemic
(2008).
Available Links
Abstract
BibTeX Citation
@inproceedings{ conf_13, title = { A Bayesian method for inferring transmission chains in a partially observed epidemic }, editor = { }, author = { J. Ray and Y. M. Marzouk }, address = { Proceedings of the 2008 Joint Statistical Meetings; Denver, CO. }, year = { 2008 }, publication = { American Statistical Association }, } -
J. Ray, Y. M. Marzouk, H. N. Najm, M. Kraus, P. Fast.
Estimating bioterror attacks from patient data: a Bayesian approach
In Proceedings of the American Statistical Association. (2006).
Abstract
No abstract available.
BibTeX Citation
@inproceedings{ conf_12, title = { Estimating bioterror attacks from patient data: a Bayesian approach }, editor = { }, author = { J. Ray and Y. M. Marzouk and H. N. Najm and M. Kraus and P. Fast }, address = { RAND/ASA Conference on Quantitative Methods & Statistical Applications in Defense and National Security., Santa Monica, CA, USA }, year = { 2006 }, booktitle = { Proceedings of the American Statistical Association. }, } -
Y. M. Marzouk, A. F. Ghoniem.
Actuating transverse jets via nozzle-edge vortical perturbations
(2006).
Available Links
Abstract
BibTeX Citation
@inproceedings{ conf_11, title = { Actuating transverse jets via nozzle-edge vortical perturbations }, editor = { }, author = { Y. M. Marzouk and A. F. Ghoniem }, address = { AIAA paper 2006-1492 }, year = { 2006 }, publication = { 44th AIAA Aerospace Sciences Meeting. Reno, NV. }, } -
Y. M. Marzouk, H. N. Najm, L. A. Rahn.
Stochastic spectral methods for efficient Bayesian solution of inverse problems
(2005).
Available Links
Abstract
No abstract available.
BibTeX Citation
@inproceedings{ conf_10, title = { Stochastic spectral methods for efficient Bayesian solution of inverse problems }, editor = { }, author = { Y. M. Marzouk and H. N. Najm and L. A. Rahn }, address = { Bayesian Inference and Maximum Entropy Methods in Science and Engineering }, year = { 2005 }, publication = { San Jose, CA. American Institute of Physics, CP803: 104--110 }, } -
R. L. Speth, Y. M. Marzouk, A. F. Ghoniem.
A quasi-one-dimensional unsteady laminar flame formulation with independent strain rate and curvature
(2005).
Abstract
No abstract available.
BibTeX Citation
@inproceedings{ conf_9, title = { A quasi-one-dimensional unsteady laminar flame formulation with independent strain rate and curvature }, editor = { }, author = { R. L. Speth and Y. M. Marzouk and A. F. Ghoniem }, address = { Third MIT Conference on Computational Fluid and Solid Mechanics }, year = { 2005 }, publication = { K.J. Bathe, ed., Elsevier }, } -
Y. M. Marzouk, A. F. Ghoniem.
Simulations of high Reynolds number transverse jets and analysis of the underlying vortical structures
(2005).
Available Links
Abstract
BibTeX Citation
@inproceedings{ conf_8, title = { Simulations of high Reynolds number transverse jets and analysis of the underlying vortical structures }, editor = { }, author = { Y. M. Marzouk and A. F. Ghoniem }, address = { AIAA paper 2005-0308 }, year = { 2005 }, publication = { 43rd AIAA Aerospace Sciences Meeting. Reno, NV. Invited session }, } -
D. H. Wee, Y. M. Marzouk, A. F. Ghoniem.
Lagrangian simulation of a jet in crossflow at finite Reynolds number
(2005).
Available Links
Abstract
BibTeX Citation
@inproceedings{ conf_7, title = { Lagrangian simulation of a jet in crossflow at finite Reynolds number }, editor = { }, author = { D. H. Wee and Y. M. Marzouk and A. F. Ghoniem }, address = { AIAA paper 2005-0291 }, year = { 2005 }, publication = { 43rd AIAA Aerospace Sciences Meeting. Reno, NV. }, } -
R. L. Speth, Y. M. Marzouk, A. F. Ghoniem.
Impact of hydrogen addition on flame response to stretch and curvature
(2005).
Available Links
Abstract
BibTeX Citation
@inproceedings{ conf_6, title = { Impact of hydrogen addition on flame response to stretch and curvature }, editor = { }, author = { R. L. Speth and Y. M. Marzouk and A. F. Ghoniem }, address = { AIAA paper 2005-0143 }, year = { 2005 }, publication = { 43rd AIAA Aerospace Sciences Meeting. Reno, NV. }, } -
Available Links
Abstract
BibTeX Citation
@inproceedings{ conf_5, title = { Vorticity formulation for an actuated jet in crossflow }, editor = { }, author = { Y. M. Marzouk and A. F. Ghoniem }, address = { AIAA paper 2004-0096 }, year = { 2004 }, publication = { 42nd AIAA Aerospace Sciences Meeting. Reno, NV. }, } -
Y. M. Marzouk, A. F. Ghoniem.
Vorticity generation mechanisms and correct boundary conditions for transverse jet simulation
(2003).
Abstract
No abstract available.
BibTeX Citation
@inproceedings{ conf_4, title = { Vorticity generation mechanisms and correct boundary conditions for transverse jet simulation }, editor = { }, author = { Y. M. Marzouk and A. F. Ghoniem }, address = { Proceedings of the Second MIT Conference on Computational Fluid and Solid Mechanics }, year = { 2003 }, publication = { K. J. Bathe, ed., Elsevier }, } -
Y. M. Marzouk, A. F. Ghoniem, H. N. Najm.
Mechanism of streamwise vorticity formation in a transverse jet
(2002).
Available Links
Abstract
BibTeX Citation
@inproceedings{ conf_3, title = { Mechanism of streamwise vorticity formation in a transverse jet }, editor = { }, author = { Y. M. Marzouk and A. F. Ghoniem and H. N. Najm }, address = { AIAA paper 2002-1063 }, year = { 2002 }, publication = { 40th AIAA Aerospace Sciences Meeting. Reno, NV. }, } -
Y. M. Marzouk, A. F. Ghoniem, H. N. Najm.
Can a one-dimensional strained flame model flame-vortex interactions?
(2002).
Abstract
No abstract available.
BibTeX Citation
@inproceedings{ conf_2, title = { Can a one-dimensional strained flame model flame-vortex interactions? }, editor = { }, author = { Y. M. Marzouk and A. F. Ghoniem and H. N. Najm }, address = { Proceedings of the Second Mediterranean Combustion Symposium: 372--383 }, year = { 2002 }, } -
Y. M. Marzouk, A. F. Ghoniem, H. N. Najm.
A flame embedding model for turbulent combustion simulation
(2000).
Available Links
Abstract
BibTeX Citation
@inproceedings{ conf_1, title = { A flame embedding model for turbulent combustion simulation }, editor = { }, author = { Y. M. Marzouk and A. F. Ghoniem and H. N. Najm }, address = { AIAA paper 2000-0866 }, year = { 2000 }, publication = { 38th AIAA Aerospace Sciences Meeting. Reno, NV. }, }
-
Abstract
BibTeX Citation
@misc{ button2018, title = { Engineering Notebook: Wringing Out The Risks }, author = { K. Button }, month = { 0 }, editor = { }, howpublished = { Aerospace America, March 2018 }, } -
Y. M. Marzouk, T. Moselhy, M. Parno, A. Spantini.
Sampling via Measure Transport: An Introduction
In Handbook of Uncertainty Quantification (2016), pp. 1--41.
Available Links
Abstract
We present the fundamentals of a measure transport approach to sampling. The idea is to construct a deterministic coupling -- i.e., a transport map -- between a complex ``target'' probability measure of interest and a simpler reference measure. Given a transport map, one can generate arbitrarily many independent and unweighted samples from the target simply by pushing forward reference samples through the map. If the map is endowed with a triangular structure, one can also easily generate samples from conditionals of the target measure. We consider two different and complementary scenarios: first, when only evaluations of the unnormalized target density are available and, second, when the target distribution is known only through a finite collection of samples. We show that in both settings, the desired transports can be characterized as the solutions of variational problems. We then address practical issues associated with the optimization-based construction of transports: choosing finite-dimensional parameterizations of the map, enforcing monotonicity, quantifying the error of approximate transports, and refining approximate transports by enriching the corresponding approximation spaces. Approximate transports can also be used to ``Gaussianize'' complex distributions and thus precondition conventional asymptotically exact sampling schemes. We place the measure transport approach in broader context, describing connections with other optimization-based samplers, with inference and density estimation schemes using optimal transport, and with alternative transformation-based approaches to simulation. We also sketch current work aimed at the construction of transport maps in high dimensions, exploiting essential features of the target distribution (e.g., conditional independence, low-rank structure). The approaches and algorithms presented here have direct applications to Bayesian computation and to broader problems of stochastic simulation.
BibTeX Citation
@inbook{ marzouk-transport-2016, doi = { 10.1007/978-3-319-11259-6_23-1 }, title = { Sampling via Measure Transport: An Introduction }, author = { Y. M. Marzouk and T. Moselhy and M. Parno and A. Spantini }, pages = { 1--41 }, month = { 0 }, year = { 2016 }, booktitle = { Handbook of Uncertainty Quantification }, publisher = { Springer International Publishing }, address = { Cham }, editor = { R. Ghanem and D. Higdon and H. Owhadi }, } -
Available Links
Abstract
BibTeX Citation
@misc{ aamag12, title = { Quantifying uncertainty in the physical world }, author = { Y. M. Marzouk }, month = { 0 }, editor = { }, howpublished = { AeroAstro Magazine No. 12, 2014--2015. }, } -
Y. M. Marzouk, K. E. Willcox.
Confronting energy and environment's toughest challenges with computational engineering
Available Links
Abstract
BibTeX Citation
@misc{ aamag7, title = { Confronting energy and environment's toughest challenges with computational engineering }, author = { Y. M. Marzouk and K. E. Willcox }, month = { 0 }, editor = { }, howpublished = { AeroAstro Magazine No. 7, 2009--2010 }, } -
R. Aggarwal, M. J. Demkowicz, Y. M. Marzouk.
Information-driven experimental design in materials science
In Information Science for Materials Discovery and Design (2016), pp. 13--44.
Abstract
Optimal experimental design (OED) aims to maximize the value of experiments and the data they produce. OED ensures efficient allocation of limited resources, especially when numerous repeated experiments cannot be performed. This chapter presents a fully Bayesian and decision theoretic approach to OED---accounting for uncertainties in models, model parameters, and experimental outcomes, and allowing optimality to be defined according to a range of possible experimental goals. We demonstrate this approach on two illustrative problems in materials research. The first example is a parameter inference problem. Its goal is to determine a substrate property from the behavior of a film deposited thereon. We design experiments to yield maximal information about the substrate property using only two measurements. The second example is a model selection problem. We design an experiment that optimally distinguishes between two models for helium trapping at interfaces. In both instances, we provide model-based justifications for why the selected experiments are optimal. Moreover, both examples illustrate the utility of reduced-order or surrogate models in optimal experimental design.
BibTeX Citation
@inbook{ doi:10.1007/978-3-319-23871-5_2, doi = { 10.1007/978-3-319-23871-5_2 }, title = { Information-driven experimental design in materials science }, author = { R. Aggarwal and M. J. Demkowicz and Y. M. Marzouk }, pages = { 13--44 }, month = { 0 }, year = { 2016 }, booktitle = { Information Science for Materials Discovery and Design }, publisher = { Springer International Publishing }, address = { Cham }, editor = { T. Lookman and F. Alexander and K. Rajan }, } -
Y. M. Marzouk, K. E. Willcox.
Uncertainty Quantification
In The Princeton Companion to Applied Mathematics II (2015), pp. 131--134.
Available Links
Abstract
BibTeX Citation
@inbook{ isbn:9780691150390, title = { Uncertainty Quantification }, author = { Y. M. Marzouk and K. E. Willcox }, volume = { II }, pages = { 131--134 }, month = { 0 }, year = { 2015 }, booktitle = { The Princeton Companion to Applied Mathematics }, chapter = { 34 }, publisher = { Princeton University Press }, editor = { N. Higham and M. Dennis and P. Glendinning and P. Martin and F. Santosa and J. Tanner }, } -
M. Frangos, Y. M. Marzouk, K. Willcox, B. van Bloemen Waanders.
Surrogate and reduced-order modeling: a comparison of approaches for large-scale statistical inverse problems
In Computational Methods for Large Scale Inverse Problems and Uncertainty Quantification (2010).
Abstract
BibTeX Citation
@incollection{ doi:10.1002/9780470685853.ch7, doi = { 10.1002/9780470685853.ch7 }, title = { Surrogate and reduced-order modeling: a comparison of approaches for large-scale statistical inverse problems }, author = { M. Frangos and Y. M. Marzouk and K. Willcox and B. van Bloemen Waanders }, month = { 0 }, year = { 2010 }, booktitle = { Computational Methods for Large Scale Inverse Problems and Uncertainty Quantification }, chapter = { 7 }, publisher = { Wiley }, editor = { L. Biegler and G. Biros and O. Ghattas and M. Heinkenschloss and D. Keyes and B. Mallick and Y. M. Marzouk and L. Tenorio and B. van Bloemen Waanders and K. Willcox }, } -
L. Biegler, G. Biros, O. Ghattas, M. Heinkenschloss, D. Keyes, B. Mallick, Y. M. Marzouk, L. Tenorio, B. van Bloemen Waanders, K. Willcox.
Large-Scale Inverse Problems and Quantification of Uncertainty
(2010).
Abstract
BibTeX Citation
@book{ doi:10.1002/9780470685853, keywords = { statistical methods and approximation methods, inverse problem solving - estimating unknown objects from indirect noisy observations, earth's subsurface mapping and seismic waves, ill-posed inverse problems - in science and engineering, precursors to quantification of uncertainties - prediction and decision-making, inverse problems, determining input - or given noisy data, crucial unmet need for scalable numerical algorithm development - and large-scale inverse problem solution, computational methods - for large-scale inverse problems, inverse problems and statistical characterizations - uncertainties modeled randomly, approximation techniques - replacing forward model with inexpensive surrogates }, doi = { 10.1002/9780470685853 }, title = { Large-Scale Inverse Problems and Quantification of Uncertainty }, author = { L. Biegler and G. Biros and O. Ghattas and M. Heinkenschloss and D. Keyes and B. Mallick and Y. M. Marzouk and L. Tenorio and B. van Bloemen Waanders and K. Willcox }, month = { 0 }, year = { 2010 }, publisher = { John Wiley & Sons, Ltd }, editor = { }, }