diff --git a/.gitignore b/.gitignore
new file mode 100644
index 00000000..353f2f30
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,17 @@
+*.o
+*~
+
+# OS-specific: Mac
+*.dSYM
+.DS_Store
+
+Laghos
+
+# QtCreator files
+*.cflags
+*.config
+*.creator
+*.creator.user
+*.cxxflags
+*.files
+*.includes
\ No newline at end of file
diff --git a/README.md b/README.md
index 287e41fe..34abf116 100644
--- a/README.md
+++ b/README.md
@@ -94,6 +94,9 @@ Other computational motives in Laghos include the following:
   partially assembled) and is applied just twice per "assembly". Both the
   preparation and the application costs are important for this operator.
 - Domain-decomposed MPI parallelism.
+- Adaptive mesh refinement (AMR) with parallel partitioning and load balancing
+  based on MFEM's non-conforming mesh algorithm that partitions a space-filling 
+  curve. Only the Sedov problem (#1) is supported.
 - Optional in-situ visualization with [GLVis](http:/glvis.org) and data output
   for visualization and data analysis with [VisIt](http://visit.llnl.gov).
 
@@ -251,6 +254,28 @@ The latter produces the following specific internal energy plot (notice the `-vi
 
 <img src="data/tp.png" width="500" height="500">
 
+#### AMR Sedov problem
+The AMR version only runs with problem 1 (Sedov blast). New parameters are:
+
+- `-amr`: turn on AMR mode
+- `-ae` or `--amr-estimator`: available estimators are 0:Custom, 1:Rho, 2:ZZ and 3:Kelly
+- `-ar` or `--amr-ref-threshold`: tweak the refinement threshold
+- `-ad` or `--amr-deref-threshold`: tweak the derefinement threshold
+- `-aj` or `--amr-jac-threshold`: tweak the refinement threshold for the Rho estimator
+- `-am` or `--amr-max-level`: twweak the max level of refinement
+
+One of the sample runs is:
+```sh
+mpirun -np 8 laghos -p 1 -dim 3 -rs 4 -amr -tf 0.6 -ar 1e-3
+```
+
+This produces the following plots:
+
+<table border="0">
+<td><img src="data/sedov-amr-900.png">
+<td><img src="data/sedov-amr-2463.png">
+</table>
+
 ## Verification of Results
 
 To make sure the results are correct, we tabulate reference final iterations
diff --git a/amr/README.md b/amr/README.md
deleted file mode 100644
index 13f06548..00000000
--- a/amr/README.md
+++ /dev/null
@@ -1,123 +0,0 @@
-               __                __
-              / /   ____  ____  / /_  ____  _____
-             / /   / __ `/ __ `/ __ \/ __ \/ ___/
-            / /___/ /_/ / /_/ / / / / /_/ (__  )
-           /_____/\__,_/\__, /_/ /_/\____/____/
-                       /____/
-
-        High-order Lagrangian Hydrodynamics Miniapp
-
-                       AMR version
-
-
-## Overview
-
-This directory contains the automatic mesh refinement (AMR) version of **Laghos**
-(LAGrangian High-Order Solver), which is currently considered experimental and
-is NOT the official benchmark version of the miniapp.
-
-For more details about Laghos see the [README file](../README.md) in the
-top-level directory.
-
-The Laghos miniapp is part of the [CEED software suite](http://ceed.exascaleproject.org/software),
-a collection of software benchmarks, miniapps, libraries and APIs for
-efficient exascale discretizations based on high-order finite element
-and spectral element methods. See http://github.com/ceed for more
-information and source code availability.
-
-The CEED research is supported by the [Exascale Computing Project](https://exascaleproject.org/exascale-computing-project)
-(17-SC-20-SC), a collaborative effort of two U.S. Department of Energy
-organizations (Office of Science and the National Nuclear Security
-Administration) responsible for the planning and preparation of a
-[capable exascale ecosystem](https://exascaleproject.org/what-is-exascale),
-including software, applications, hardware, advanced system engineering and early
-testbed platforms, in support of the nation’s exascale computing imperative.
-
-
-## Differences from the official benchmark version
-
-The AMR version differs from the official benchmark version of Laghos (in the
-top-level directory) in the following ways:
-
-1. The `-amr` parameter turns on dynamic AMR.
-2. The code includes functionality to change the mesh and the hydro operator on
-   the fly.
-3. Parallel partitioning and load balancing is based on MFEM's non-conforming
-   mesh algorithm that partitions a space-filling curve. METIS is not required.
-
-
-## Limitations
-
-- The current AMR implementation is just a demonstration.
-- Only the Sedov problem is supported, as the refinement/derefinement decisions
-  are very simple and tailored specifically to Sedov.
-- Partial assembly is currently not supported in AMR mode. Also, the hydro
-  operator update is currently not efficient (e.g., the whole mass matrix is
-  reassembled on each mesh change).
-- MFEM currently does not support derefinement interpolation for non-nodal bases.
-  The AMR version therefore does not use `BasisType::Positive` for the L2 space.
-
-
-## Building
-
-The AMR version can be built following the same [instructions](../README.md) as
-for the top-level directory.
-
-
-## Running
-
-The AMR version only runs with problem 1 (Sedov blast). New parameters are:
-
-- `-amr`: turn on AMR mode
-- `-rt` or `--ref-threshold`: tweak the refinement threshold
-- `-dt` or `--deref-threshold`: tweak the derefinement threshold
-
-One of the sample runs is:
-```sh
-mpirun -np 8 laghos -p 1 -m ../data/cube01_hex.mesh -rs 4 -tf 0.6 -rt 1e-3 -amr
-```
-
-This produces the following plots at steps 900 and 2463:
-
-<table border="0">
-<td><img src="data/sedov-amr-900.png">
-<td><img src="data/sedov-amr-2463.png">
-</table>
-
-
-## Verification of Results
-
-To make sure the results are correct, we tabulate reference final iterations
-(`step`), time steps (`dt`) and energies (`|e|`) for the runs listed below:
-
-1. `mpirun -np 8 laghos -p 1 -m ../data/square01_quad.mesh -rs 4 -tf 0.8 -amr`
-2. `mpirun -np 8 laghos -p 1 -m ../data/square01_quad.mesh -rs 4 -tf 0.8 -ok 3 -ot 2 -amr`
-3. `mpirun -np 8 laghos -p 1 -m ../data/cube01_hex.mesh -rs 3 -tf 0.6 -amr`
-4. `mpirun -np 8 laghos -p 1 -m ../data/cube01_hex.mesh -rs 4 -tf 0.6 -rt 1e-3 -amr`
-
-| run | `step` | `dt` | `e` |
-| --- | ------ | ---- | ----- |
-|  1. | 2374 | 0.000308 | 90.9397751791 |
-|  2. | 2727 | 0.000458 | 168.0063715464 |
-|  3. |  998 | 0.001262 | 388.6322346715 |
-|  4. | 2463 | 0.000113 | 1703.2772575684 |
-
-An implementation is considered valid if the final energy values are all within
-round-off distance from the above reference values.
-
-
-## Contact
-
-You can reach the Laghos team by emailing laghos@llnl.gov or by leaving a
-comment in the [issue tracker](https://github.com/CEED/Laghos/issues).
-
-
-## Copyright
-
-The following copyright applies to each file in the CEED software suite,
-unless otherwise stated in the file:
-
-> Copyright (c) 2017, Lawrence Livermore National Security, LLC. Produced at the
-> Lawrence Livermore National Laboratory. LLNL-CODE-734707. All Rights reserved.
-
-See files LICENSE and NOTICE in the top-level directory for details.
diff --git a/amr/laghos.cpp b/amr/laghos.cpp
deleted file mode 100644
index 94fa9b02..00000000
--- a/amr/laghos.cpp
+++ /dev/null
@@ -1,959 +0,0 @@
-// Copyright (c) 2017, Lawrence Livermore National Security, LLC. Produced at
-// the Lawrence Livermore National Laboratory. LLNL-CODE-734707. All Rights
-// reserved. See files LICENSE and NOTICE for details.
-//
-// This file is part of CEED, a collection of benchmarks, miniapps, software
-// libraries and APIs for efficient high-order finite element and spectral
-// element discretizations for exascale applications. For more information and
-// source code availability see http://github.com/ceed.
-//
-// The CEED research is supported by the Exascale Computing Project 17-SC-20-SC,
-// a collaborative effort of two U.S. Department of Energy organizations (Office
-// of Science and the National Nuclear Security Administration) responsible for
-// the planning and preparation of a capable exascale ecosystem, including
-// software, applications, hardware, advanced system engineering and early
-// testbed platforms, in support of the nation's exascale computing imperative.
-//
-//                     __                __
-//                    / /   ____  ____  / /_  ____  _____
-//                   / /   / __ `/ __ `/ __ \/ __ \/ ___/
-//                  / /___/ /_/ / /_/ / / / / /_/ (__  )
-//                 /_____/\__,_/\__, /_/ /_/\____/____/
-//                             /____/
-//
-//             High-order Lagrangian Hydrodynamics Miniapp
-//
-// Laghos(LAGrangian High-Order Solver) is a miniapp that solves the
-// time-dependent Euler equation of compressible gas dynamics in a moving
-// Lagrangian frame using unstructured high-order finite element spatial
-// discretization and explicit high-order time-stepping. Laghos is based on the
-// numerical algorithm described in the following article:
-//
-//    V. Dobrev, Tz. Kolev and R. Rieben, "High-order curvilinear finite element
-//    methods for Lagrangian hydrodynamics", SIAM Journal on Scientific
-//    Computing, (34) 2012, pp. B606–B641, https://doi.org/10.1137/120864672.
-//
-//             *** THIS IS AN AUTOMATIC MESH REFINEMENT DEMO ***
-//
-// Sample runs:
-//    mpirun -np 8 laghos -p 1 -m ../data/square01_quad.mesh -rs 4 -tf 0.8 -amr
-//    mpirun -np 8 laghos -p 1 -m ../data/square01_quad.mesh -rs 4 -tf 0.8 -ok 3 -ot 2 -amr
-//    mpirun -np 8 laghos -p 1 -m ../data/cube01_hex.mesh -rs 3 -tf 0.6 -amr
-//    mpirun -np 8 laghos -p 1 -m ../data/cube01_hex.mesh -rs 4 -tf 0.6 -rt 1e-3 -amr
-//
-// Test problems:
-//    p = 1  --> Sedov blast.
-
-
-#include "laghos_solver.hpp"
-#include <memory>
-#include <iostream>
-#include <fstream>
-
-using namespace std;
-using namespace mfem;
-using namespace mfem::hydrodynamics;
-
-// Choice for the problem setup.
-int problem;
-
-void display_banner(ostream & os);
-
-void AMRUpdate(BlockVector &S, BlockVector &S_tmp,
-               Array<int> &true_offset,
-               ParGridFunction &x_gf,
-               ParGridFunction &v_gf,
-               ParGridFunction &e_gf);
-
-void GetZeroBCDofs(ParMesh *pmesh, ParFiniteElementSpace *pspace,
-                   int bdr_attr_max, Array<int> &ess_tdofs);
-
-void FindElementsWithVertex(const Mesh* mesh, const Vertex &vert,
-                            const double size, Array<int> &elements);
-
-void GetPerElementMinMax(const GridFunction &gf,
-                         Vector &elem_min, Vector &elem_max,
-                         int int_order = -1);
-
-
-int main(int argc, char *argv[])
-{
-   // Initialize MPI.
-   MPI_Session mpi(argc, argv);
-   int myid = mpi.WorldRank();
-
-   // Print the banner.
-   if (mpi.Root()) { display_banner(cout); }
-
-   // Parse command-line options.
-   const char *mesh_file = "data/square01_quad.mesh";
-   int rs_levels = 0;
-   int rp_levels = 0;
-   int order_v = 2;
-   int order_e = 1;
-   int ode_solver_type = 4;
-   double t_final = 0.5;
-   double cfl = 0.5;
-   double cg_tol = 1e-8;
-   int cg_max_iter = 300;
-   int max_tsteps = -1;
-   bool p_assembly = true;
-   bool visualization = false;
-   int vis_steps = 5;
-   bool visit = false;
-   bool gfprint = false;
-   const char *basename = "results/Laghos";
-   int partition_type = 111;
-   bool amr = false;
-   double ref_threshold = 2e-4;
-   double deref_threshold = 0.75;
-   const int nc_limit = 1;
-   const double blast_energy = 0.25;
-   const double blast_position[] = {0.0, 0.0, 0.0};
-   const double blast_amr_size = 1e-10;
-
-   OptionsParser args(argc, argv);
-   args.AddOption(&mesh_file, "-m", "--mesh",
-                  "Mesh file to use.");
-   args.AddOption(&rs_levels, "-rs", "--refine-serial",
-                  "Number of times to refine the mesh uniformly in serial.");
-   args.AddOption(&rp_levels, "-rp", "--refine-parallel",
-                  "Number of times to refine the mesh uniformly in parallel.");
-
-   args.AddOption(&problem, "-p", "--problem", "Problem setup to use.");
-   args.AddOption(&order_v, "-ok", "--order-kinematic",
-                  "Order (degree) of the kinematic finite element space.");
-   args.AddOption(&order_e, "-ot", "--order-thermo",
-                  "Order (degree) of the thermodynamic finite element space.");
-   args.AddOption(&ode_solver_type, "-s", "--ode-solver",
-                  "ODE solver: 1 - Forward Euler,\n\t"
-                  "            2 - RK2 SSP, 3 - RK3 SSP, 4 - RK4, 6 - RK6.");
-   args.AddOption(&t_final, "-tf", "--t-final",
-                  "Final time; start time is 0.");
-
-   args.AddOption(&cfl, "-cfl", "--cfl", "CFL-condition number.");
-   args.AddOption(&cg_tol, "-cgt", "--cg-tol",
-                  "Relative CG tolerance (velocity linear solve).");
-   args.AddOption(&cg_max_iter, "-cgm", "--cg-max-steps",
-                  "Maximum number of CG iterations (velocity linear solve).");
-   args.AddOption(&max_tsteps, "-ms", "--max-steps",
-                  "Maximum number of steps (negative means no restriction).");
-   args.AddOption(&p_assembly, "-pa", "--partial-assembly", "-fa",
-                  "--full-assembly",
-                  "Activate 1D tensor-based assembly (partial assembly).");
-
-   args.AddOption(&amr, "-amr", "--enable-amr", "-no-amr", "--disable-amr",
-                  "Experimental adaptive mesh refinement (problem 1 only).");
-   args.AddOption(&ref_threshold, "-rt", "--ref-threshold",
-                  "AMR refinement threshold.");
-   args.AddOption(&deref_threshold, "-dt", "--deref-threshold",
-                  "AMR derefinement threshold (0 = no derefinement).");
-
-   args.AddOption(&visualization, "-vis", "--visualization", "-no-vis",
-                  "--no-visualization",
-                  "Enable or disable GLVis visualization.");
-   args.AddOption(&vis_steps, "-vs", "--visualization-steps",
-                  "Visualize every n-th timestep.");
-   args.AddOption(&visit, "-visit", "--visit", "-no-visit", "--no-visit",
-                  "Enable or disable VisIt visualization.");
-   args.AddOption(&gfprint, "-print", "--print", "-no-print", "--no-print",
-                  "Enable or disable result output (files in mfem format).");
-   args.AddOption(&basename, "-k", "--outputfilename",
-                  "Name of the visit dump files");
-
-   args.AddOption(&partition_type, "-pt", "--partition",
-                  "Customized x/y/z Cartesian MPI partitioning of the serial mesh.\n\t"
-                  "Here x,y,z are relative task ratios in each direction.\n\t"
-                  "Example: with 48 mpi tasks and -pt 321, one would get a Cartesian\n\t"
-                  "partition of the serial mesh by (6,4,2) MPI tasks in (x,y,z).\n\t"
-                  "NOTE: the serially refined mesh must have the appropriate number\n\t"
-                  "of zones in each direction, e.g., the number of zones in direction x\n\t"
-                  "must be divisible by the number of MPI tasks in direction x.\n\t"
-                  "Available options: 11, 21, 111, 211, 221, 311, 321, 322, 432.");
-   args.Parse();
-   if (!args.Good())
-   {
-      if (mpi.Root()) { args.PrintUsage(cout); }
-      return 1;
-   }
-
-   if (amr && problem != 1)
-   {
-      if (mpi.Root()) { cout << "AMR only supported for problem 1." << endl; }
-      return 0;
-   }
-
-   if (mpi.Root()) { args.PrintOptions(cout); }
-
-   // Read the serial mesh from the given mesh file on all processors.
-   // Refine the mesh in serial to increase the resolution.
-   Mesh *mesh = new Mesh(mesh_file, 1, 1);
-   const int dim = mesh->Dimension();
-   if (!amr)
-   {
-      for (int lev = 0; lev < rs_levels; lev++)
-      {
-         mesh->UniformRefinement();
-      }
-   }
-   else
-   {
-      // Initial refinement for AMR demo on Sedov
-      mesh->EnsureNCMesh();
-      for (int lev = 0; lev < rs_levels; lev++)
-      {
-         mesh->RefineAtVertex(Vertex(blast_position[0], blast_position[1],
-                                     blast_position[2]),
-                              blast_amr_size);
-      }
-   }
-
-   if (p_assembly && dim == 1)
-   {
-      p_assembly = false;
-      if (mpi.Root())
-      {
-         cout << "Laghos does not support PA in 1D. Switching to FA." << endl;
-      }
-   }
-   if (p_assembly && amr)
-   {
-      p_assembly = false;
-      if (mpi.Root())
-      {
-         cout << "Laghos currently does not support PA for AMR. "
-              "Switching to FA." << endl;
-      }
-   }
-
-   // Parallel partitioning of the mesh.
-   ParMesh *pmesh = NULL;
-   const int num_tasks = mpi.WorldSize(); int unit;
-   int *nxyz = new int[3];
-   switch (partition_type)
-   {
-      case 11:
-      case 111:
-         unit = floor(pow(num_tasks, 1.0 / dim) + 1e-2);
-         for (int d = 0; d < dim; d++) { nxyz[d] = unit; }
-         if (dim == 2) { nxyz[2] = 0; }
-         break;
-      case 21: // 2D
-         unit = floor(pow(num_tasks / 2, 1.0 / 2) + 1e-2);
-         nxyz[0] = 2 * unit; nxyz[1] = unit; nxyz[2] = 0;
-         break;
-      case 211: // 3D.
-         unit = floor(pow(num_tasks / 2, 1.0 / 3) + 1e-2);
-         nxyz[0] = 2 * unit; nxyz[1] = unit; nxyz[2] = unit;
-         break;
-      case 221: // 3D.
-         unit = floor(pow(num_tasks / 4, 1.0 / 3) + 1e-2);
-         nxyz[0] = 2 * unit; nxyz[1] = 2 * unit; nxyz[2] = unit;
-         break;
-      case 311: // 3D.
-         unit = floor(pow(num_tasks / 3, 1.0 / 3) + 1e-2);
-         nxyz[0] = 3 * unit; nxyz[1] = unit; nxyz[2] = unit;
-         break;
-      case 321: // 3D.
-         unit = floor(pow(num_tasks / 6, 1.0 / 3) + 1e-2);
-         nxyz[0] = 3 * unit; nxyz[1] = 2 * unit; nxyz[2] = unit;
-         break;
-      case 322: // 3D.
-         unit = floor(pow(2 * num_tasks / 3, 1.0 / 3) + 1e-2);
-         nxyz[0] = 3 * unit / 2; nxyz[1] = unit; nxyz[2] = unit;
-         break;
-      case 432: // 3D.
-         unit = floor(pow(num_tasks / 3, 1.0 / 3) + 1e-2);
-         nxyz[0] = 2 * unit; nxyz[1] = 3 * unit / 2; nxyz[2] = unit;
-         break;
-      default:
-         if (myid == 0)
-         {
-            cout << "Unknown partition type: " << partition_type << '\n';
-         }
-         delete mesh;
-         MPI_Finalize();
-         return 3;
-   }
-   int product = 1;
-   for (int d = 0; d < dim; d++) { product *= nxyz[d]; }
-   if (myid == 0)
-   {
-      cout << nxyz[0] << " " << nxyz[1] << " " << nxyz[2] << " "
-           << product << " " << num_tasks << endl;
-   }
-   if (product == num_tasks)
-   {
-      int *partitioning = mesh->CartesianPartitioning(nxyz);
-      pmesh = new ParMesh(MPI_COMM_WORLD, *mesh, partitioning);
-      delete partitioning;
-   }
-   else
-   {
-      if (myid == 0)
-      {
-         cout << "Non-Cartesian partitioning through METIS will be used.\n";
-#ifndef MFEM_USE_METIS
-         cout << "MFEM was built without METIS. "
-              << "Adjust the number of tasks to use a Cartesian split." << endl;
-#endif
-      }
-#ifndef MFEM_USE_METIS
-      return 1;
-#endif
-      pmesh = new ParMesh(MPI_COMM_WORLD, *mesh);
-   }
-   delete [] nxyz;
-   delete mesh;
-
-   // Refine the mesh further in parallel to increase the resolution.
-   for (int lev = 0; lev < rp_levels; lev++)
-   {
-      pmesh->UniformRefinement();
-   }
-
-   int nzones = pmesh->GetNE(), nzones_min, nzones_max;
-   MPI_Reduce(&nzones, &nzones_min, 1, MPI_INT, MPI_MIN, 0, pmesh->GetComm());
-   MPI_Reduce(&nzones, &nzones_max, 1, MPI_INT, MPI_MAX, 0, pmesh->GetComm());
-   if (myid == 0)
-   { cout << "Zones min/max: " << nzones_min << " " << nzones_max << endl; }
-
-   int amr_max_level = rs_levels + rp_levels;
-
-   // Define the parallel finite element spaces. We use:
-   // - H1 (Gauss-Lobatto, continuous) for position and velocity.
-   // - L2 (Bernstein, discontinuous) for specific internal energy.
-   L2_FECollection L2FEC(order_e, dim/*, BasisType::Positive*/);
-   H1_FECollection H1FEC(order_v, dim);
-   ParFiniteElementSpace L2FESpace(pmesh, &L2FEC);
-   ParFiniteElementSpace H1FESpace(pmesh, &H1FEC, pmesh->Dimension());
-
-   // Boundary conditions: all tests use v.n = 0 on the boundary, and we assume
-   // that the boundaries are straight.
-   Array<int> ess_tdofs;
-   int bdr_attr_max = pmesh->bdr_attributes.Max();
-   GetZeroBCDofs(pmesh, &H1FESpace, bdr_attr_max, ess_tdofs);
-
-   // Define the explicit ODE solver used for time integration.
-   ODESolver *ode_solver = NULL;
-   switch (ode_solver_type)
-   {
-      case 1: ode_solver = new ForwardEulerSolver; break;
-      case 2: ode_solver = new RK2Solver(0.5); break;
-      case 3: ode_solver = new RK3SSPSolver; break;
-      case 4: ode_solver = new RK4Solver; break;
-      case 6: ode_solver = new RK6Solver; break;
-      default:
-         if (myid == 0)
-         {
-            cout << "Unknown ODE solver type: " << ode_solver_type << '\n';
-         }
-         delete pmesh;
-         MPI_Finalize();
-         return 3;
-   }
-
-   HYPRE_Int glob_size_l2 = L2FESpace.GlobalTrueVSize();
-   HYPRE_Int glob_size_h1 = H1FESpace.GlobalTrueVSize();
-
-   if (mpi.Root())
-   {
-      cout << "Number of kinematic (position, velocity) dofs: "
-           << glob_size_h1 << endl;
-      cout << "Number of specific internal energy dofs: "
-           << glob_size_l2 << endl;
-   }
-
-   int Vsize_l2 = L2FESpace.GetVSize();
-   int Vsize_h1 = H1FESpace.GetVSize();
-
-   // The monolithic BlockVector stores unknown fields as:
-   // - 0 -> position
-   // - 1 -> velocity
-   // - 2 -> specific internal energy
-
-   Array<int> true_offset(4);
-   true_offset[0] = 0;
-   true_offset[1] = true_offset[0] + Vsize_h1;
-   true_offset[2] = true_offset[1] + Vsize_h1;
-   true_offset[3] = true_offset[2] + Vsize_l2;
-   BlockVector S(true_offset);
-
-   // Define GridFunction objects for the position, velocity and specific
-   // internal energy.  There is no function for the density, as we can always
-   // compute the density values given the current mesh position, using the
-   // property of pointwise mass conservation.
-   ParGridFunction x_gf, v_gf, e_gf;
-   x_gf.MakeRef(&H1FESpace, S, true_offset[0]);
-   v_gf.MakeRef(&H1FESpace, S, true_offset[1]);
-   e_gf.MakeRef(&L2FESpace, S, true_offset[2]);
-
-   // Initialize x_gf using the starting mesh coordinates. This also links the
-   // mesh positions to the values in x_gf.
-   pmesh->SetNodalGridFunction(&x_gf);
-
-   // Initialize the velocity.
-   VectorFunctionCoefficient v_coeff(pmesh->Dimension(), v0);
-   v_gf.ProjectCoefficient(v_coeff);
-
-   // Initialize density and specific internal energy values. We interpolate in
-   // a non-positive basis to get the correct values at the dofs.  Then we do an
-   // L2 projection to the positive basis in which we actually compute. The goal
-   // is to get a high-order representation of the initial condition. Note that
-   // this density is a temporary function and it will not be updated during the
-   // time evolution.
-   ParGridFunction rho0_gf(&L2FESpace);
-   {
-      FunctionCoefficient rho_coeff(hydrodynamics::rho0);
-      L2_FECollection l2_fec(order_e, pmesh->Dimension());
-      ParFiniteElementSpace l2_fes(pmesh, &l2_fec);
-
-      ParGridFunction l2_rho(&l2_fes), l2_e(&l2_fes);
-      l2_rho.ProjectCoefficient(rho_coeff);
-      rho0_gf.ProjectGridFunction(l2_rho);
-
-      if (problem == 1)
-      {
-         // For the Sedov test, we use a delta function at the origin.
-         DeltaCoefficient e_coeff(blast_position[0], blast_position[1],
-                                  blast_position[2], blast_energy);
-         l2_e.ProjectCoefficient(e_coeff);
-      }
-      else
-      {
-         FunctionCoefficient e_coeff(e0);
-         l2_e.ProjectCoefficient(e_coeff);
-      }
-      e_gf.ProjectGridFunction(l2_e);
-   }
-
-   // Space-dependent ideal gas coefficient over the Lagrangian mesh.
-   Coefficient *material_pcf = new FunctionCoefficient(hydrodynamics::gamma);
-
-   // Additional details, depending on the problem.
-   int source = 0; bool visc = false;
-   switch (problem)
-   {
-      case 0: if (pmesh->Dimension() == 2) { source = 1; }
-         visc = false; break;
-      case 1: visc = true; break;
-      case 2: visc = true; break;
-      case 3: visc = true; break;
-      default: MFEM_ABORT("Wrong problem specification!");
-   }
-
-   LagrangianHydroOperator oper(S.Size(), H1FESpace, L2FESpace,
-                                ess_tdofs, rho0_gf, source, cfl, material_pcf,
-                                visc, p_assembly, cg_tol, cg_max_iter);
-
-   if (amr)
-   {
-      // set a base for h0, this will be further divided in UpdateQuadratureData
-      // TODO: for AMR, the treatment of h0 needs more work
-      double elem_size = 0.5; // coarse element size (TODO calculate)
-      double h0 = elem_size / order_v;
-      oper.SetH0(h0);
-   }
-
-   socketstream vis_rho, vis_v, vis_e;
-   char vishost[] = "localhost";
-   int  visport   = 19916;
-
-   ParGridFunction rho_gf;
-   if (visualization || visit)
-   {
-      oper.ComputeDensity(rho_gf);
-   }
-
-   if (visualization)
-   {
-      // Make sure all MPI ranks have sent their 'v' solution before initiating
-      // another set of GLVis connections (one from each rank):
-      MPI_Barrier(pmesh->GetComm());
-
-      int Wx = 0, Wy = 0; // window position
-      const int Ww = 500, Wh = 500; // window size
-      int offx = Ww+10; // window offsets
-
-      VisualizeField(vis_rho, vishost, visport, rho_gf,
-                     "Density", Wx, Wy, Ww, Wh);
-      Wx += offx;
-      VisualizeField(vis_v, vishost, visport, v_gf,
-                     "Velocity", Wx, Wy, Ww, Wh);
-      Wx += offx;
-      VisualizeField(vis_e, vishost, visport, e_gf,
-                     "Specific Internal Energy", Wx, Wy, Ww, Wh);
-   }
-
-   // Save data for VisIt visualization.
-   VisItDataCollection visit_dc(basename, pmesh);
-   if (visit)
-   {
-      visit_dc.RegisterField("Density",  &rho_gf);
-      visit_dc.RegisterField("Velocity", &v_gf);
-      visit_dc.RegisterField("Specific Internal Energy", &e_gf);
-      visit_dc.SetCycle(0);
-      visit_dc.SetTime(0.0);
-      visit_dc.Save();
-   }
-
-   // Perform time-integration (looping over the time iterations, ti, with a
-   // time-step dt). The object oper is of type LagrangianHydroOperator that
-   // defines the Mult() method that used by the time integrators.
-   ode_solver->Init(oper);
-   oper.ResetTimeStepEstimate();
-   double t = 0.0, dt = oper.GetTimeStepEstimate(S), t_old;
-   bool last_step = false;
-   int steps = 0;
-   BlockVector S_old(S);
-   for (int ti = 1; !last_step; ti++)
-   {
-      if (t + dt >= t_final)
-      {
-         dt = t_final - t;
-         last_step = true;
-      }
-      if (steps == max_tsteps) { last_step = true; }
-
-      S_old = S;
-      t_old = t;
-      oper.ResetTimeStepEstimate();
-
-      // S is the vector of dofs, t is the current time, and dt is the time step
-      // to advance.
-      ode_solver->Step(S, t, dt);
-      steps++;
-
-      // Adaptive time step control.
-      const double dt_est = oper.GetTimeStepEstimate(S);
-      if (dt_est < dt)
-      {
-         // Repeat (solve again) with a decreased time step - decrease of the
-         // time estimate suggests appearance of oscillations.
-         dt *= 0.85;
-         if (dt < numeric_limits<double>::epsilon())
-         { MFEM_ABORT("The time step crashed!"); }
-         t = t_old;
-         S = S_old;
-         oper.ResetQuadratureData();
-         if (mpi.Root()) { cout << "Repeating step " << ti << endl; }
-         ti--; continue;
-      }
-      else if (dt_est > 1.25 * dt) { dt *= 1.02; }
-
-      // Make sure that the mesh corresponds to the new solution state.
-      pmesh->NewNodes(x_gf, false);
-
-      if (last_step || (ti % vis_steps) == 0)
-      {
-         double loc_norm = e_gf * e_gf, tot_norm;
-         MPI_Allreduce(&loc_norm, &tot_norm, 1, MPI_DOUBLE, MPI_SUM,
-                       pmesh->GetComm());
-         if (mpi.Root())
-         {
-            cout << fixed;
-            cout << "step " << setw(5) << ti
-                 << ",\tt = " << setw(5) << setprecision(4) << t
-                 << ",\tdt = " << setw(5) << setprecision(6) << dt
-                 << ",\t|e| = " << setprecision(10)
-                 << sqrt(tot_norm) << endl;
-         }
-
-         // Make sure all ranks have sent their 'v' solution before initiating
-         // another set of GLVis connections (one from each rank):
-         MPI_Barrier(pmesh->GetComm());
-
-         if (visualization || visit || gfprint)
-         {
-            oper.ComputeDensity(rho_gf);
-         }
-         if (visualization)
-         {
-            int Wx = 0, Wy = 0; // window position
-            int Ww = 500, Wh = 500; // window size
-            int offx = Ww+10; // window offsets
-
-            VisualizeField(vis_rho, vishost, visport, rho_gf,
-                           "Density", Wx, Wy, Ww, Wh);
-            Wx += offx;
-            VisualizeField(vis_v, vishost, visport,
-                           v_gf, "Velocity", Wx, Wy, Ww, Wh);
-            Wx += offx;
-            VisualizeField(vis_e, vishost, visport, e_gf,
-                           "Specific Internal Energy", Wx, Wy, Ww,Wh);
-            Wx += offx;
-         }
-
-         if (visit)
-         {
-            visit_dc.SetCycle(ti);
-            visit_dc.SetTime(t);
-            visit_dc.Save();
-         }
-
-         if (gfprint)
-         {
-            ostringstream mesh_name, rho_name, v_name, e_name;
-            mesh_name << basename << "_" << ti
-                      << "_mesh." << setfill('0') << setw(6) << myid;
-            rho_name  << basename << "_" << ti
-                      << "_rho." << setfill('0') << setw(6) << myid;
-            v_name << basename << "_" << ti
-                   << "_v." << setfill('0') << setw(6) << myid;
-            e_name << basename << "_" << ti
-                   << "_e." << setfill('0') << setw(6) << myid;
-
-            ofstream mesh_ofs(mesh_name.str().c_str());
-            mesh_ofs.precision(8);
-            pmesh->Print(mesh_ofs);
-            mesh_ofs.close();
-
-            ofstream rho_ofs(rho_name.str().c_str());
-            rho_ofs.precision(8);
-            rho_gf.Save(rho_ofs);
-            rho_ofs.close();
-
-            ofstream v_ofs(v_name.str().c_str());
-            v_ofs.precision(8);
-            v_gf.Save(v_ofs);
-            v_ofs.close();
-
-            ofstream e_ofs(e_name.str().c_str());
-            e_ofs.precision(8);
-            e_gf.Save(e_ofs);
-            e_ofs.close();
-         }
-      }
-
-      if (amr)
-      {
-         Vector &error_est = oper.GetZoneMaxVisc();
-
-         Vector v_max, v_min;
-         GetPerElementMinMax(v_gf, v_min, v_max);
-
-         bool mesh_changed = false;
-
-         // make a list of elements to refine
-         Array<int> refs;
-         for (int i = 0; i < pmesh->GetNE(); i++)
-         {
-            if (error_est(i) > ref_threshold
-                && pmesh->pncmesh->GetElementDepth(i) < amr_max_level
-                && (v_min(i) < 1e-3 || ti < 50) // only refine the still area
-               )
-            {
-               refs.Append(i);
-            }
-         }
-
-         int nref = pmesh->ReduceInt(refs.Size());
-         if (nref)
-         {
-            pmesh->GeneralRefinement(refs, 1, nc_limit);
-            mesh_changed = true;
-
-            if (myid == 0)
-            {
-               cout << "Refined " << nref << " elements." << endl;
-            }
-         }
-         else if (deref_threshold)
-         {
-            oper.ComputeDensity(rho_gf);
-
-            Vector rho_max, rho_min;
-            GetPerElementMinMax(rho_gf, rho_min, rho_max);
-
-            // simple derefinement based on zone maximum rho in post-shock region
-            double rho_max_max = rho_max.Size() ? rho_max.Max() : 0.0;
-            double threshold, loc_threshold = deref_threshold * rho_max_max;
-            MPI_Allreduce(&loc_threshold, &threshold, 1, MPI_DOUBLE, MPI_MAX,
-                          pmesh->GetComm());
-
-            // make sure the blast point is never derefined
-            Array<int> elements;
-			FindElementsWithVertex(pmesh, Vertex(blast_position[0],
-                                                 blast_position[1],
-                                                 blast_position[2]),
-                                   blast_amr_size, elements);
-			for (int i = 0; i < elements.Size(); i++)
-            {
-               int index = elements[i];
-			   if (index >= 0) { rho_max(index) = 1e10; }
-            }
-
-            // also, only derefine where the mesh is in motion, i.e. after the shock
-            for (int i = 0; i < pmesh->GetNE(); i++)
-            {
-               if (v_min(i) < 0.1) { rho_max(i) = 1e10; }
-            }
-
-            const int op = 2; // maximum value of fine elements
-            mesh_changed = pmesh->DerefineByError(rho_max, threshold,
-                                                  nc_limit, op);
-            if (mesh_changed && myid == 0)
-            {
-               cout << "Derefined, threshold = " << threshold << endl;
-            }
-         }
-
-         if (mesh_changed)
-         {
-            // update state and operator
-            AMRUpdate(S, S_old, true_offset, x_gf, v_gf, e_gf);
-            oper.AMRUpdate(S, true);
-
-            pmesh->Rebalance();
-
-            // update state and operator
-            AMRUpdate(S, S_old, true_offset, x_gf, v_gf, e_gf);
-            oper.AMRUpdate(S, false);
-
-            GetZeroBCDofs(pmesh, &H1FESpace, bdr_attr_max, ess_tdofs);
-
-            ode_solver->Init(oper);
-
-            //H1FESpace.PrintPartitionStats();
-         }
-      }
-   }
-
-   switch (ode_solver_type)
-   {
-      case 2: steps *= 2; break;
-      case 3: steps *= 3; break;
-      case 4: steps *= 4; break;
-      case 6: steps *= 6;
-   }
-   oper.PrintTimingData(mpi.Root(), steps);
-
-   if (visualization)
-   {
-      vis_v.close();
-      vis_e.close();
-   }
-
-   // Free the used memory.
-   delete ode_solver;
-   delete pmesh;
-   delete material_pcf;
-
-   return 0;
-}
-
-
-void GetZeroBCDofs(ParMesh *pmesh, ParFiniteElementSpace *pspace,
-                   int bdr_attr_max, Array<int> &ess_tdofs)
-{
-   ess_tdofs.SetSize(0);
-   Array<int> ess_bdr(bdr_attr_max), tdofs1d;
-   for (int d = 0; d < pmesh->Dimension(); d++)
-   {
-      // Attributes 1/2/3 correspond to fixed-x/y/z boundaries, i.e., we must
-      // enforce v_x/y/z = 0 for the velocity components.
-      ess_bdr = 0; ess_bdr[d] = 1;
-      pspace->GetEssentialTrueDofs(ess_bdr, tdofs1d, d);
-      ess_tdofs.Append(tdofs1d);
-   }
-}
-
-void AMRUpdate(BlockVector &S, BlockVector &S_tmp,
-               Array<int> &true_offset,
-               ParGridFunction &x_gf,
-               ParGridFunction &v_gf,
-               ParGridFunction &e_gf)
-{
-   ParFiniteElementSpace* H1FESpace = x_gf.ParFESpace();
-   ParFiniteElementSpace* L2FESpace = e_gf.ParFESpace();
-
-   H1FESpace->Update();
-   L2FESpace->Update();
-
-   int Vsize_h1 = H1FESpace->GetVSize();
-   int Vsize_l2 = L2FESpace->GetVSize();
-
-   true_offset[0] = 0;
-   true_offset[1] = true_offset[0] + Vsize_h1;
-   true_offset[2] = true_offset[1] + Vsize_h1;
-   true_offset[3] = true_offset[2] + Vsize_l2;
-
-   S_tmp = S;
-   S.Update(true_offset);
-
-   const Operator* H1Update = H1FESpace->GetUpdateOperator();
-   const Operator* L2Update = L2FESpace->GetUpdateOperator();
-
-   H1Update->Mult(S_tmp.GetBlock(0), S.GetBlock(0));
-   H1Update->Mult(S_tmp.GetBlock(1), S.GetBlock(1));
-   L2Update->Mult(S_tmp.GetBlock(2), S.GetBlock(2));
-
-   x_gf.MakeRef(H1FESpace, S, true_offset[0]);
-   v_gf.MakeRef(H1FESpace, S, true_offset[1]);
-   e_gf.MakeRef(L2FESpace, S, true_offset[2]);
-
-   S_tmp.Update(true_offset);
-}
-
-void FindElementsWithVertex(const Mesh* mesh, const Vertex &vert,
-                            const double size, Array<int> &elements)
-{
-   Array<int> v;
-
-   for (int i = 0; i < mesh->GetNE(); i++)
-   {
-      mesh->GetElementVertices(i, v);
-      for (int j = 0; j < v.Size(); j++)
-      {
-         double dist = 0.0;
-         for (int l = 0; l < mesh->SpaceDimension(); l++)
-         {
-            double d = vert(l) - mesh->GetVertex(v[j])[l];
-            dist += d*d;
-         }
-         if (dist <= size*size) { elements.Append(i); break; }
-      }
-   }
-}
-
-void Pow(Vector &vec, double p)
-{
-   for (int i = 0; i < vec.Size(); i++)
-   {
-      vec(i) = std::pow(vec(i), p);
-   }
-}
-
-void GetPerElementMinMax(const GridFunction &gf,
-                         Vector &elem_min, Vector &elem_max,
-                         int int_order)
-{
-   const FiniteElementSpace *space = gf.FESpace();
-   int ne = space->GetNE();
-
-   if (int_order < 0) { int_order = space->GetOrder(0) + 1; }
-
-   elem_min.SetSize(ne);
-   elem_max.SetSize(ne);
-
-   Vector vals, tmp;
-   for (int i = 0; i < ne; i++)
-   {
-      int geom = space->GetFE(i)->GetGeomType();
-      const IntegrationRule &ir = IntRules.Get(geom, int_order);
-
-      gf.GetValues(i, ir, vals);
-
-      if (space->GetVDim() > 1)
-      {
-         Pow(vals, 2.0);
-         for (int vd = 1; vd < space->GetVDim(); vd++)
-         {
-            gf.GetValues(i, ir, tmp, vd+1);
-            Pow(tmp, 2.0);
-            vals += tmp;
-         }
-         Pow(vals, 0.5);
-      }
-
-      elem_min(i) = vals.Min();
-      elem_max(i) = vals.Max();
-   }
-}
-
-namespace mfem
-{
-
-namespace hydrodynamics
-{
-
-double rho0(const Vector &x)
-{
-   switch (problem)
-   {
-      case 0: return 1.0;
-      case 1: return 1.0;
-      case 2: if (x(0) < 0.5) { return 1.0; }
-         else { return 0.1; }
-      case 3: if (x(0) > 1.0 && x(1) <= 1.5) { return 1.0; }
-         else { return 0.125; }
-      default: MFEM_ABORT("Bad number given for problem id!"); return 0.0;
-   }
-}
-
-double gamma(const Vector &x)
-{
-   switch (problem)
-   {
-      case 0: return 5./3.;
-      case 1: return 1.4;
-      case 2: return 1.4;
-      case 3: if (x(0) > 1.0 && x(1) <= 1.5) { return 1.4; }
-         else { return 1.5; }
-      default: MFEM_ABORT("Bad number given for problem id!"); return 0.0;
-   }
-}
-
-void v0(const Vector &x, Vector &v)
-{
-   switch (problem)
-   {
-      case 0:
-         v(0) =  sin(M_PI*x(0)) * cos(M_PI*x(1));
-         v(1) = -cos(M_PI*x(0)) * sin(M_PI*x(1));
-         if (x.Size() == 3)
-         {
-            v(0) *= cos(M_PI*x(2));
-            v(1) *= cos(M_PI*x(2));
-            v(2) = 0.0;
-         }
-         break;
-      case 1: v = 0.0; break;
-      case 2: v = 0.0; break;
-      case 3: v = 0.0; break;
-      default: MFEM_ABORT("Bad number given for problem id!");
-   }
-}
-
-double e0(const Vector &x)
-{
-   switch (problem)
-   {
-      case 0:
-      {
-         const double denom = 2.0 / 3.0;  // (5/3 - 1) * density.
-         double val;
-         if (x.Size() == 2)
-         {
-            val = 1.0 + (cos(2*M_PI*x(0)) + cos(2*M_PI*x(1))) / 4.0;
-         }
-         else
-         {
-            val = 100.0 + ((cos(2*M_PI*x(2)) + 2) *
-                           (cos(2*M_PI*x(0)) + cos(2*M_PI*x(1))) - 2) / 16.0;
-         }
-         return val/denom;
-      }
-      case 1: return 0.0; // This case is initialized in main().
-      case 2: if (x(0) < 0.5) { return 1.0 / rho0(x) / (gamma(x) - 1.0); }
-         else { return 0.1 / rho0(x) / (gamma(x) - 1.0); }
-      case 3: if (x(0) > 1.0) { return 0.1 / rho0(x) / (gamma(x) - 1.0); }
-         else { return 1.0 / rho0(x) / (gamma(x) - 1.0); }
-      default: MFEM_ABORT("Bad number given for problem id!"); return 0.0;
-   }
-}
-
-} // namespace hydrodynamics
-
-} // namespace mfem
-
-void display_banner(ostream & os)
-{
-   os << endl
-      << "       __                __                 " << endl
-      << "      / /   ____  ____  / /_  ____  _____   " << endl
-      << "     / /   / __ `/ __ `/ __ \\/ __ \\/ ___/ " << endl
-      << "    / /___/ /_/ / /_/ / / / / /_/ (__  )    " << endl
-      << "   /_____/\\__,_/\\__, /_/ /_/\\____/____/  " << endl
-      << "               /____/                       " << endl << endl;
-}
diff --git a/amr/laghos_assembly.cpp b/amr/laghos_assembly.cpp
deleted file mode 100644
index 6888ba46..00000000
--- a/amr/laghos_assembly.cpp
+++ /dev/null
@@ -1,1072 +0,0 @@
-// Copyright (c) 2017, Lawrence Livermore National Security, LLC. Produced at
-// the Lawrence Livermore National Laboratory. LLNL-CODE-734707. All Rights
-// reserved. See files LICENSE and NOTICE for details.
-//
-// This file is part of CEED, a collection of benchmarks, miniapps, software
-// libraries and APIs for efficient high-order finite element and spectral
-// element discretizations for exascale applications. For more information and
-// source code availability see http://github.com/ceed.
-//
-// The CEED research is supported by the Exascale Computing Project (17-SC-20-SC)
-// a collaborative effort of two U.S. Department of Energy organizations (Office
-// of Science and the National Nuclear Security Administration) responsible for
-// the planning and preparation of a capable exascale ecosystem, including
-// software, applications, hardware, advanced system engineering and early
-// testbed platforms, in support of the nation's exascale computing imperative.
-
-#include "laghos_assembly.hpp"
-
-#ifdef MFEM_USE_MPI
-
-using namespace std;
-
-namespace mfem
-{
-
-namespace hydrodynamics
-{
-
-const Tensors1D *tensors1D = NULL;
-const FastEvaluator *evaluator = NULL;
-
-Tensors1D::Tensors1D(int H1order, int L2order, int nqp1D)
-   : HQshape1D(H1order + 1, nqp1D),
-     HQgrad1D(H1order + 1, nqp1D),
-     LQshape1D(L2order + 1, nqp1D)
-{
-   // In this miniapp we assume:
-   // - Gauss-Legendre quadrature points.
-   // - Gauss-Lobatto continuous kinematic basis.
-   // - Bernstein discontinuous thermodynamic basis.
-
-   const double *quad1D_pos = poly1d.GetPoints(nqp1D - 1,
-                                               Quadrature1D::GaussLegendre);
-   Poly_1D::Basis &basisH1 = poly1d.GetBasis(H1order,
-                                             Quadrature1D::GaussLobatto);
-   Vector col, grad_col;
-   for (int q = 0; q < nqp1D; q++)
-   {
-      HQshape1D.GetColumnReference(q, col);
-      HQgrad1D.GetColumnReference(q, grad_col);
-      basisH1.Eval(quad1D_pos[q], col, grad_col);
-   }
-   for (int q = 0; q < nqp1D; q++)
-   {
-      LQshape1D.GetColumnReference(q, col);
-      poly1d.CalcBernstein(L2order, quad1D_pos[q], col);
-   }
-}
-
-void FastEvaluator::GetL2Values(const Vector &vecL2, Vector &vecQ) const
-{
-   const int nL2dof1D = tensors1D->LQshape1D.Height(),
-             nqp1D    = tensors1D->LQshape1D.Width();
-   if (dim == 2)
-   {
-      DenseMatrix E(vecL2.GetData(), nL2dof1D, nL2dof1D);
-      DenseMatrix LQ(nL2dof1D, nqp1D);
-
-      vecQ.SetSize(nqp1D * nqp1D);
-      DenseMatrix QQ(vecQ.GetData(), nqp1D, nqp1D);
-
-      // LQ_j2_k1 = E_j1_j2 LQs_j1_k1  -- contract in x direction.
-      // QQ_k1_k2 = LQ_j2_k1 LQs_j2_k2 -- contract in y direction.
-      MultAtB(E, tensors1D->LQshape1D, LQ);
-      MultAtB(LQ, tensors1D->LQshape1D, QQ);
-   }
-   else
-   {
-      DenseMatrix E(vecL2.GetData(), nL2dof1D*nL2dof1D, nL2dof1D);
-      DenseMatrix LL_Q(nL2dof1D * nL2dof1D, nqp1D),
-                  L_LQ(LL_Q.GetData(), nL2dof1D, nL2dof1D*nqp1D),
-                  Q_LQ(nqp1D, nL2dof1D*nqp1D);
-
-      vecQ.SetSize(nqp1D * nqp1D * nqp1D);
-      DenseMatrix QQ_Q(vecQ.GetData(), nqp1D * nqp1D, nqp1D);
-
-      // LLQ_j1_j2_k3  = E_j1_j2_j3 LQs_j3_k3   -- contract in z direction.
-      // QLQ_k1_j2_k3  = LQs_j1_k1 LLQ_j1_j2_k3 -- contract in x direction.
-      // QQQ_k1_k2_k3  = QLQ_k1_j2_k3 LQs_j2_k2 -- contract in y direction.
-      // The last step does some reordering (it's not product of matrices).
-      mfem::Mult(E, tensors1D->LQshape1D, LL_Q);
-      MultAtB(tensors1D->LQshape1D, L_LQ, Q_LQ);
-      for (int k1 = 0; k1 < nqp1D; k1++)
-      {
-         for (int k2 = 0; k2 < nqp1D; k2++)
-         {
-            for (int k3 = 0; k3 < nqp1D; k3++)
-            {
-               QQ_Q(k1 + nqp1D*k2, k3) = 0.0;
-               for (int j2 = 0; j2 < nL2dof1D; j2++)
-               {
-                  QQ_Q(k1 + nqp1D*k2, k3) +=
-                     Q_LQ(k1, j2 + k3*nL2dof1D) * tensors1D->LQshape1D(j2, k2);
-               }
-            }
-         }
-      }
-   }
-}
-
-void FastEvaluator::GetVectorGrad(const DenseMatrix &vec, DenseTensor &J) const
-{
-   const int nH1dof1D = tensors1D->HQshape1D.Height(),
-             nqp1D    = tensors1D->LQshape1D.Width();
-   DenseMatrix X;
-
-   if (dim == 2)
-   {
-      const int nH1dof = nH1dof1D * nH1dof1D;
-      DenseMatrix HQ(nH1dof1D, nqp1D), QQ(nqp1D, nqp1D);
-      Vector x(nH1dof);
-
-      const H1_QuadrilateralElement *fe =
-         dynamic_cast<const H1_QuadrilateralElement *>(H1FESpace.GetFE(0));
-      const Array<int> &dof_map = fe->GetDofMap();
-
-      for (int c = 0; c < 2; c++)
-      {
-         // Transfer from the mfem's H1 local numbering to the tensor structure
-         // numbering.
-         for (int j = 0; j < nH1dof; j++) { x[j] = vec(dof_map[j], c); }
-         X.UseExternalData(x.GetData(), nH1dof1D, nH1dof1D);
-
-         // HQ_i2_k1  = X_i1_i2 HQg_i1_k1  -- gradients in x direction.
-         // QQ_k1_k2  = HQ_i2_k1 HQs_i2_k2 -- contract  in y direction.
-         MultAtB(X, tensors1D->HQgrad1D, HQ);
-         MultAtB(HQ, tensors1D->HQshape1D, QQ);
-
-         // Set the (c,0) component of the Jacobians at all quadrature points.
-         for (int k1 = 0; k1 < nqp1D; k1++)
-         {
-            for (int k2 = 0; k2 < nqp1D; k2++)
-            {
-               const int idx = k2 * nqp1D + k1;
-               J(idx)(c, 0) = QQ(k1, k2);
-            }
-         }
-
-         // HQ_i2_k1  = X_i1_i2 HQs_i1_k1  -- contract  in x direction.
-         // QQ_k1_k2  = HQ_i2_k1 HQg_i2_k2 -- gradients in y direction.
-         MultAtB(X, tensors1D->HQshape1D, HQ);
-         MultAtB(HQ, tensors1D->HQgrad1D, QQ);
-
-         // Set the (c,1) component of the Jacobians at all quadrature points.
-         for (int k1 = 0; k1 < nqp1D; k1++)
-         {
-            for (int k2 = 0; k2 < nqp1D; k2++)
-            {
-               const int idx = k2 * nqp1D + k1;
-               J(idx)(c, 1) = QQ(k1, k2);
-            }
-         }
-      }
-   }
-   else
-   {
-      const int nH1dof = nH1dof1D * nH1dof1D * nH1dof1D;
-      DenseMatrix HH_Q(nH1dof1D * nH1dof1D, nqp1D),
-                  H_HQ(HH_Q.GetData(), nH1dof1D, nH1dof1D * nqp1D),
-                  Q_HQ(nqp1D, nH1dof1D*nqp1D), QQ_Q(nqp1D * nqp1D, nqp1D);
-      Vector x(nH1dof);
-
-      const H1_HexahedronElement *fe =
-         dynamic_cast<const H1_HexahedronElement *>(H1FESpace.GetFE(0));
-      const Array<int> &dof_map = fe->GetDofMap();
-
-      for (int c = 0; c < 3; c++)
-      {
-         // Transfer from the mfem's H1 local numbering to the tensor structure
-         // numbering.
-         for (int j = 0; j < nH1dof; j++) { x[j] = vec(dof_map[j], c); }
-         X.UseExternalData(x.GetData(), nH1dof1D * nH1dof1D, nH1dof1D);
-
-         // HHQ_i1_i2_k3 = X_i1_i2_i3 HQs_i3_k3   -- contract  in z direction.
-         // QHQ_k1_i2_k3 = HQg_i1_k1 HHQ_i1_i2_k3 -- gradients in x direction.
-         // QQQ_k1_k2_k3 = QHQ_k1_i2_k3 HQs_i2_k2 -- contract  in y direction.
-         // The last step does some reordering (it's not product of matrices).
-         mfem::Mult(X, tensors1D->HQshape1D, HH_Q);
-         MultAtB(tensors1D->HQgrad1D, H_HQ, Q_HQ);
-         for (int k1 = 0; k1 < nqp1D; k1++)
-         {
-            for (int k2 = 0; k2 < nqp1D; k2++)
-            {
-               for (int k3 = 0; k3 < nqp1D; k3++)
-               {
-                  QQ_Q(k1 + nqp1D*k2, k3) = 0.0;
-                  for (int i2 = 0; i2 < nH1dof1D; i2++)
-                  {
-                     QQ_Q(k1 + nqp1D*k2, k3) += Q_HQ(k1, i2 + k3*nH1dof1D) *
-                                                tensors1D->HQshape1D(i2, k2);
-                  }
-               }
-            }
-         }
-         // Set the (c,0) component of the Jacobians at all quadrature points.
-         for (int k1 = 0; k1 < nqp1D; k1++)
-         {
-            for (int k2 = 0; k2 < nqp1D; k2++)
-            {
-               for (int k3 = 0; k3 < nqp1D; k3++)
-               {
-                  const int idx = k3*nqp1D*nqp1D + k2*nqp1D + k1;
-                  J(idx)(c, 0) = QQ_Q(k1 + k2*nqp1D, k3);
-               }
-            }
-         }
-
-         // HHQ_i1_i2_k3 = X_i1_i2_i3 HQs_i3_k3   -- contract  in z direction.
-         // QHQ_k1_i2_k3 = HQs_i1_k1 HHQ_i1_i2_k3 -- contract  in x direction.
-         // QQQ_k1_k2_k3 = QHQ_k1_i2_k3 HQg_i2_k2 -- gradients in y direction.
-         // The last step does some reordering (it's not product of matrices).
-         mfem::Mult(X, tensors1D->HQshape1D, HH_Q);
-         MultAtB(tensors1D->HQshape1D, H_HQ, Q_HQ);
-         for (int k1 = 0; k1 < nqp1D; k1++)
-         {
-            for (int k2 = 0; k2 < nqp1D; k2++)
-            {
-               for (int k3 = 0; k3 < nqp1D; k3++)
-               {
-                  QQ_Q(k1 + nqp1D*k2, k3) = 0.0;
-                  for (int i2 = 0; i2 < nH1dof1D; i2++)
-                  {
-                     QQ_Q(k1 + nqp1D*k2, k3) += Q_HQ(k1, i2 + k3*nH1dof1D) *
-                                                tensors1D->HQgrad1D(i2, k2);
-                  }
-               }
-            }
-         }
-         // Set the (c,1) component of the Jacobians at all quadrature points.
-         for (int k1 = 0; k1 < nqp1D; k1++)
-         {
-            for (int k2 = 0; k2 < nqp1D; k2++)
-            {
-               for (int k3 = 0; k3 < nqp1D; k3++)
-               {
-                  const int idx = k3*nqp1D*nqp1D + k2*nqp1D + k1;
-                  J(idx)(c, 1) = QQ_Q(k1 + k2*nqp1D, k3);
-               }
-            }
-         }
-
-         // HHQ_i1_i2_k3 = X_i1_i2_i3 HQg_i3_k3   -- gradients in z direction.
-         // QHQ_k1_i2_k3 = HQs_i1_k1 HHQ_i1_i2_k3 -- contract  in x direction.
-         // QQQ_k1_k2_k3 = QHQ_k1_i2_k3 HQs_i2_k2 -- contract  in y direction.
-         // The last step does some reordering (it's not product of matrices).
-         mfem::Mult(X, tensors1D->HQgrad1D, HH_Q);
-         MultAtB(tensors1D->HQshape1D, H_HQ, Q_HQ);
-         for (int k1 = 0; k1 < nqp1D; k1++)
-         {
-            for (int k2 = 0; k2 < nqp1D; k2++)
-            {
-               for (int k3 = 0; k3 < nqp1D; k3++)
-               {
-                  QQ_Q(k1 + nqp1D*k2, k3) = 0.0;
-                  for (int i2 = 0; i2 < nH1dof1D; i2++)
-                  {
-                     QQ_Q(k1 + nqp1D*k2, k3) += Q_HQ(k1, i2 + k3*nH1dof1D) *
-                                                tensors1D->HQshape1D(i2, k2);
-                  }
-               }
-            }
-         }
-         // Set the (c,2) component of the Jacobians at all quadrature points.
-         for (int k1 = 0; k1 < nqp1D; k1++)
-         {
-            for (int k2 = 0; k2 < nqp1D; k2++)
-            {
-               for (int k3 = 0; k3 < nqp1D; k3++)
-               {
-                  const int idx = k3*nqp1D*nqp1D + k2*nqp1D + k1;
-                  J(idx)(c, 2) = QQ_Q(k1 + k2*nqp1D, k3);
-               }
-            }
-         }
-      }
-   }
-}
-
-void DensityIntegrator::AssembleRHSElementVect(const FiniteElement &fe,
-                                               ElementTransformation &Tr,
-                                               Vector &elvect)
-{
-   const int ip_cnt = IntRule->GetNPoints();
-   Vector shape(fe.GetDof());
-
-   elvect.SetSize(fe.GetDof());
-   elvect = 0.0;
-
-   for (int q = 0; q < ip_cnt; q++)
-   {
-      fe.CalcShape(IntRule->IntPoint(q), shape);
-      // Note that rhoDetJ = rho0DetJ0.
-      shape *= quad_data.rho0DetJ0w(Tr.ElementNo*ip_cnt + q);
-      elvect += shape;
-   }
-}
-
-void ForceIntegrator::AssembleElementMatrix2(const FiniteElement &trial_fe,
-                                             const FiniteElement &test_fe,
-                                             ElementTransformation &Trans,
-                                             DenseMatrix &elmat)
-{
-   const int nqp = IntRule->GetNPoints();
-   const int dim = trial_fe.GetDim();
-   const int zone_id = Trans.ElementNo;
-   const int h1dofs_cnt = test_fe.GetDof();
-   const int l2dofs_cnt = trial_fe.GetDof();
-
-   elmat.SetSize(h1dofs_cnt*dim, l2dofs_cnt);
-   elmat = 0.0;
-
-   DenseMatrix vshape(h1dofs_cnt, dim), loc_force(h1dofs_cnt, dim);
-   Vector shape(l2dofs_cnt), Vloc_force(loc_force.Data(), h1dofs_cnt*dim);
-
-   for (int q = 0; q < nqp; q++)
-   {
-      const IntegrationPoint &ip = IntRule->IntPoint(q);
-
-      // Form stress:grad_shape at the current point.
-      test_fe.CalcDShape(ip, vshape);
-      for (int i = 0; i < h1dofs_cnt; i++)
-      {
-         for (int vd = 0; vd < dim; vd++) // Velocity components.
-         {
-            loc_force(i, vd) = 0.0;
-            for (int gd = 0; gd < dim; gd++) // Gradient components.
-            {
-               loc_force(i, vd) +=
-                  quad_data.stressJinvT(vd)(zone_id*nqp + q, gd) * vshape(i,gd);
-            }
-         }
-      }
-
-      trial_fe.CalcShape(ip, shape);
-      AddMultVWt(Vloc_force, shape, elmat);
-   }
-}
-
-void ForcePAOperator::Mult(const Vector &vecL2, Vector &vecH1) const
-{
-   if      (dim == 2) { MultQuad(vecL2, vecH1); }
-   else if (dim == 3) { MultHex(vecL2, vecH1); }
-   else { MFEM_ABORT("Unsupported dimension"); }
-}
-
-void ForcePAOperator::MultTranspose(const Vector &vecH1, Vector &vecL2) const
-{
-   if      (dim == 2) { MultTransposeQuad(vecH1, vecL2); }
-   else if (dim == 3) { MultTransposeHex(vecH1, vecL2); }
-   else { MFEM_ABORT("Unsupported dimension"); }
-}
-
-// Force matrix action on quadrilateral elements in 2D.
-void ForcePAOperator::MultQuad(const Vector &vecL2, Vector &vecH1) const
-{
-   const int nH1dof1D = tensors1D->HQshape1D.Height(),
-             nL2dof1D = tensors1D->LQshape1D.Height(),
-             nqp1D    = tensors1D->HQshape1D.Width(),
-             nqp      =  nqp1D * nqp1D,
-             nH1dof   = nH1dof1D * nH1dof1D;
-   Array<int> h1dofs, l2dofs;
-   Vector e(nL2dof1D * nL2dof1D);
-   DenseMatrix E(e.GetData(), nL2dof1D, nL2dof1D);
-   DenseMatrix LQ(nL2dof1D, nqp1D), HQ(nH1dof1D, nqp1D), QQ(nqp1D, nqp1D),
-               HHx(nH1dof1D, nH1dof1D), HHy(nH1dof1D, nH1dof1D);
-   // Quadrature data for a specific direction.
-   DenseMatrix QQd(nqp1D, nqp1D);
-   double *data_qd = QQd.GetData(), *data_q = QQ.GetData();
-
-   const H1_QuadrilateralElement *fe =
-      dynamic_cast<const H1_QuadrilateralElement *>(H1FESpace.GetFE(0));
-   const Array<int> &dof_map = fe->GetDofMap();
-
-   vecH1 = 0.0;
-   for (int z = 0; z < nzones; z++)
-   {
-      // Note that the local numbering for L2 is the tensor numbering.
-      L2FESpace.GetElementDofs(z, l2dofs);
-      vecL2.GetSubVector(l2dofs, e);
-
-      // LQ_j2_k1 = E_j1_j2 LQs_j1_k1  -- contract in x direction.
-      // QQ_k1_k2 = LQ_j2_k1 LQs_j2_k2 -- contract in y direction.
-      MultAtB(E, tensors1D->LQshape1D, LQ);
-      MultAtB(LQ, tensors1D->LQshape1D, QQ);
-
-      // Iterate over the components (x and y) of the result.
-      for (int c = 0; c < 2; c++)
-      {
-         // QQd_k1_k2 *= stress_k1_k2(c,0)  -- stress that scales d[v_c]_dx.
-         // HQ_i2_k1   = HQs_i2_k2 QQ_k1_k2 -- contract in y direction.
-         // HHx_i1_i2  = HQg_i1_k1 HQ_i2_k1 -- gradients in x direction.
-         double *d = quad_data->stressJinvT(c).GetData() + z*nqp;
-         for (int q = 0; q < nqp; q++) { data_qd[q] = data_q[q] * d[q]; };
-         MultABt(tensors1D->HQshape1D, QQd, HQ);
-         MultABt(tensors1D->HQgrad1D, HQ, HHx);
-
-         // QQd_k1_k2 *= stress_k1_k2(c,1) -- stress that scales d[v_c]_dy.
-         // HQ_i2_k1  = HQg_i2_k2 QQ_k1_k2 -- gradients in y direction.
-         // HHy_i1_i2 = HQ_i1_k1 HQ_i2_k1  -- contract in x direction.
-         d = quad_data->stressJinvT(c).GetData() + 1*nzones*nqp + z*nqp;
-         for (int q = 0; q < nqp; q++) { data_qd[q] = data_q[q] * d[q]; };
-         MultABt(tensors1D->HQgrad1D, QQd, HQ);
-         MultABt(tensors1D->HQshape1D, HQ, HHy);
-
-         // Set the c-component of the result.
-         H1FESpace.GetElementVDofs(z, h1dofs);
-         for (int i1 = 0; i1 < nH1dof1D; i1++)
-         {
-            for (int i2 = 0; i2 < nH1dof1D; i2++)
-            {
-               // Transfer from the mfem's H1 local numbering to the tensor
-               // structure numbering.
-               const int idx = i2 * nH1dof1D + i1;
-               vecH1[h1dofs[c*nH1dof + dof_map[idx]]] +=
-                  HHx(i1, i2) + HHy(i1, i2);
-            }
-         }
-      }
-   }
-}
-
-// Force matrix action on hexahedral elements in 3D.
-void ForcePAOperator::MultHex(const Vector &vecL2, Vector &vecH1) const
-{
-   const int nH1dof1D = tensors1D->HQshape1D.Height(),
-             nL2dof1D = tensors1D->LQshape1D.Height(),
-             nqp1D    = tensors1D->HQshape1D.Width(),
-             nqp      = nqp1D * nqp1D * nqp1D,
-             nH1dof   = nH1dof1D * nH1dof1D * nH1dof1D;
-   Array<int> h1dofs, l2dofs;
-
-   Vector e(nL2dof1D * nL2dof1D * nL2dof1D);
-   DenseMatrix E(e.GetData(), nL2dof1D*nL2dof1D, nL2dof1D);
-
-   DenseMatrix HH_Q(nH1dof1D * nH1dof1D, nqp1D),
-               H_HQ(HH_Q.GetData(), nH1dof1D, nH1dof1D*nqp1D),
-               Q_HQ(nqp1D, nH1dof1D*nqp1D);
-   DenseMatrix LL_Q(nL2dof1D * nL2dof1D, nqp1D),
-               L_LQ(LL_Q.GetData(), nL2dof1D, nL2dof1D*nqp1D),
-               Q_LQ(nqp1D, nL2dof1D*nqp1D);
-   DenseMatrix QQ_Q(nqp1D * nqp1D, nqp1D), QQ_Qc(nqp1D * nqp1D, nqp1D);
-   double *qqq = QQ_Q.GetData(), *qqqc = QQ_Qc.GetData();
-   DenseMatrix HHHx(nH1dof1D * nH1dof1D, nH1dof1D),
-               HHHy(nH1dof1D * nH1dof1D, nH1dof1D),
-               HHHz(nH1dof1D * nH1dof1D, nH1dof1D);
-
-   const H1_HexahedronElement *fe =
-      dynamic_cast<const H1_HexahedronElement *>(H1FESpace.GetFE(0));
-   const Array<int> &dof_map = fe->GetDofMap();
-
-   vecH1 = 0.0;
-   for (int z = 0; z < nzones; z++)
-   {
-      // Note that the local numbering for L2 is the tensor numbering.
-      L2FESpace.GetElementDofs(z, l2dofs);
-      vecL2.GetSubVector(l2dofs, e);
-
-      // LLQ_j1_j2_k3  = E_j1_j2_j3 LQs_j3_k3   -- contract in z direction.
-      // QLQ_k1_j2_k3  = LQs_j1_k1 LLQ_j1_j2_k3 -- contract in x direction.
-      // QQQ_k1_k2_k3  = QLQ_k1_j2_k3 LQs_j2_k2 -- contract in y direction.
-      // The last step does some reordering (it's not product of matrices).
-      mfem::Mult(E, tensors1D->LQshape1D, LL_Q);
-      MultAtB(tensors1D->LQshape1D, L_LQ, Q_LQ);
-      for (int k1 = 0; k1 < nqp1D; k1++)
-      {
-         for (int k2 = 0; k2 < nqp1D; k2++)
-         {
-            for (int k3 = 0; k3 < nqp1D; k3++)
-            {
-               QQ_Q(k1 + nqp1D*k2, k3) = 0.0;
-               for (int j2 = 0; j2 < nL2dof1D; j2++)
-               {
-                  QQ_Q(k1 + nqp1D*k2, k3) +=
-                     Q_LQ(k1, j2 + k3*nL2dof1D) * tensors1D->LQshape1D(j2, k2);
-               }
-            }
-         }
-      }
-
-      // Iterate over the components (x, y, z) of the result.
-      for (int c = 0; c < 3; c++)
-      {
-         // QQQc_k1_k2_k3 *= stress_k1_k2_k3(c,0) -- stress scaling d[v_c]_dx.
-         double *d = quad_data->stressJinvT(c).GetData() + z*nqp;
-         for (int q = 0; q < nqp; q++) { qqqc[q] = qqq[q] * d[q]; };
-
-         // QHQ_k1_i2_k3  = QQQc_k1_k2_k3 HQs_i2_k2 -- contract  in y direction.
-         // The first step does some reordering (it's not product of matrices).
-         // HHQ_i1_i2_k3  = HQg_i1_k1 QHQ_k1_i2_k3  -- gradients in x direction.
-         // HHHx_i1_i2_i3 = HHQ_i1_i2_k3 HQs_i3_k3  -- contract  in z direction.
-         for (int k1 = 0; k1 < nqp1D; k1++)
-         {
-            for (int i2 = 0; i2 < nH1dof1D; i2++)
-            {
-               for (int k3 = 0; k3 < nqp1D; k3++)
-               {
-                  Q_HQ(k1, i2 + nH1dof1D*k3) = 0.0;
-                  for (int k2 = 0; k2 < nqp1D; k2++)
-                  {
-                     Q_HQ(k1, i2 + nH1dof1D*k3) +=
-                        QQ_Qc(k1 + nqp1D*k2, k3) * tensors1D->HQshape1D(i2, k2);
-                  }
-               }
-            }
-         }
-         mfem::Mult(tensors1D->HQgrad1D, Q_HQ, H_HQ);
-         MultABt(HH_Q, tensors1D->HQshape1D, HHHx);
-
-         // QQQc_k1_k2_k3 *= stress_k1_k2_k3(c,1) -- stress scaling d[v_c]_dy.
-         d = quad_data->stressJinvT(c).GetData() + 1*nzones*nqp + z*nqp;
-         for (int q = 0; q < nqp; q++) { qqqc[q] = qqq[q] * d[q]; };
-
-         // QHQ_k1_i2_k3  = QQQc_k1_k2_k3 HQg_i2_k2 -- gradients in y direction.
-         // The first step does some reordering (it's not product of matrices).
-         // HHQ_i1_i2_k3  = HQs_i1_k1 QHQ_k1_i2_k3  -- contract  in x direction.
-         // HHHy_i1_i2_i3 = HHQ_i1_i2_k3 HQs_i3_k3  -- contract  in z direction.
-         for (int k1 = 0; k1 < nqp1D; k1++)
-         {
-            for (int i2 = 0; i2 < nH1dof1D; i2++)
-            {
-               for (int k3 = 0; k3 < nqp1D; k3++)
-               {
-                  Q_HQ(k1, i2 + nH1dof1D*k3) = 0.0;
-                  for (int k2 = 0; k2 < nqp1D; k2++)
-                  {
-                     Q_HQ(k1, i2 + nH1dof1D*k3) +=
-                        QQ_Qc(k1 + nqp1D*k2, k3) * tensors1D->HQgrad1D(i2, k2);
-                  }
-               }
-            }
-         }
-         mfem::Mult(tensors1D->HQshape1D, Q_HQ, H_HQ);
-         MultABt(HH_Q, tensors1D->HQshape1D, HHHy);
-
-         // QQQc_k1_k2_k3 *= stress_k1_k2_k3(c,2) -- stress scaling d[v_c]_dz.
-         d = quad_data->stressJinvT(c).GetData() + 2*nzones*nqp + z*nqp;
-         for (int q = 0; q < nqp; q++) { qqqc[q] = qqq[q] * d[q]; };
-
-         // QHQ_k1_i2_k3  = QQQc_k1_k2_k3 HQg_i2_k2 -- contract  in y direction.
-         // The first step does some reordering (it's not product of matrices).
-         // HHQ_i1_i2_k3  = HQs_i1_k1 QHQ_k1_i2_k3  -- contract  in x direction.
-         // HHHz_i1_i2_i3 = HHQ_i1_i2_k3 HQs_i3_k3  -- gradients in z direction.
-         for (int k1 = 0; k1 < nqp1D; k1++)
-         {
-            for (int i2 = 0; i2 < nH1dof1D; i2++)
-            {
-               for (int k3 = 0; k3 < nqp1D; k3++)
-               {
-                  Q_HQ(k1, i2 + nH1dof1D*k3) = 0.0;
-                  for (int k2 = 0; k2 < nqp1D; k2++)
-                  {
-                     Q_HQ(k1, i2 + nH1dof1D*k3) +=
-                        QQ_Qc(k1 + nqp1D*k2, k3) * tensors1D->HQshape1D(i2, k2);
-                  }
-               }
-            }
-         }
-         mfem::Mult(tensors1D->HQshape1D, Q_HQ, H_HQ);
-         MultABt(HH_Q, tensors1D->HQgrad1D, HHHz);
-
-         // Set the c-component of the result.
-         H1FESpace.GetElementVDofs(z, h1dofs);
-         for (int i1 = 0; i1 < nH1dof1D; i1++)
-         {
-            for (int i2 = 0; i2 < nH1dof1D; i2++)
-            {
-               for (int i3 = 0; i3 < nH1dof1D; i3++)
-               {
-                  // Transfer from the mfem's H1 local numbering to the tensor
-                  // structure numbering.
-                  const int idx = i3*nH1dof1D*nH1dof1D + i2*nH1dof1D + i1;
-                  vecH1[h1dofs[c*nH1dof + dof_map[idx]]] +=
-                     HHHx(i1 + i2*nH1dof1D, i3) +
-                     HHHy(i1 + i2*nH1dof1D, i3) +
-                     HHHz(i1 + i2*nH1dof1D, i3);
-               }
-            }
-         }
-      }
-   }
-}
-
-// Transpose force matrix action on quadrilateral elements in 2D.
-void ForcePAOperator::MultTransposeQuad(const Vector &vecH1,
-                                        Vector &vecL2) const
-{
-   const int nH1dof1D = tensors1D->HQshape1D.Height(),
-             nL2dof1D = tensors1D->LQshape1D.Height(),
-             nqp1D    = tensors1D->HQshape1D.Width(),
-             nqp      = nqp1D * nqp1D,
-             nH1dof   = nH1dof1D * nH1dof1D;
-   Array<int> h1dofs, l2dofs;
-   Vector v(nH1dof * 2), e(nL2dof1D * nL2dof1D);
-   DenseMatrix V, E(e.GetData(), nL2dof1D, nL2dof1D);
-   DenseMatrix HQ(nH1dof1D, nqp1D), LQ(nL2dof1D, nqp1D),
-               QQc(nqp1D, nqp1D), QQ(nqp1D, nqp1D);
-   double *qqc = QQc.GetData();
-
-   const H1_QuadrilateralElement *fe =
-      dynamic_cast<const H1_QuadrilateralElement *>(H1FESpace.GetFE(0));
-   const Array<int> &dof_map = fe->GetDofMap();
-
-   for (int z = 0; z < nzones; z++)
-   {
-      H1FESpace.GetElementVDofs(z, h1dofs);
-
-      // Form (stress:grad_v) at all quadrature points.
-      QQ = 0.0;
-      for (int c = 0; c < 2; c++)
-      {
-         // Transfer from the mfem's H1 local numbering to the tensor structure
-         // numbering.
-         for (int j = 0; j < nH1dof; j++)
-         {
-            v[c*nH1dof + j] = vecH1[h1dofs[c*nH1dof + dof_map[j]]];
-         }
-         // Connect to [v_c], i.e., the c-component of v.
-         V.UseExternalData(v.GetData() + c*nH1dof, nH1dof1D, nH1dof1D);
-
-         // HQ_i2_k1   = V_i1_i2 HQg_i1_k1  -- gradients in x direction.
-         // QQc_k1_k2  = HQ_i2_k1 HQs_i2_k2 -- contract  in y direction.
-         // QQc_k1_k2 *= stress_k1_k2(c,0)  -- stress that scales d[v_c]_dx.
-         MultAtB(V, tensors1D->HQgrad1D, HQ);
-         MultAtB(HQ, tensors1D->HQshape1D, QQc);
-         double *d = quad_data->stressJinvT(c).GetData() + z*nqp;
-         for (int q = 0; q < nqp; q++) { qqc[q] *= d[q]; }
-         // Add the (stress(c,0) * d[v_c]_dx) part of (stress:grad_v).
-         QQ += QQc;
-
-         // HQ_i2_k1   = V_i1_i2 HQs_i1_k1  -- contract  in x direction.
-         // QQc_k1_k2  = HQ_i2_k1 HQg_i2_k2 -- gradients in y direction.
-         // QQc_k1_k2 *= stress_k1_k2(c,1)  -- stress that scales d[v_c]_dy.
-         MultAtB(V, tensors1D->HQshape1D, HQ);
-         MultAtB(HQ, tensors1D->HQgrad1D, QQc);
-         d = quad_data->stressJinvT(c).GetData() + 1*nzones*nqp + z*nqp;
-         for (int q = 0; q < nqp; q++) { qqc[q] *= d[q]; }
-         // Add the (stress(c,1) * d[v_c]_dy) part of (stress:grad_v).
-         QQ += QQc;
-      }
-
-      // LQ_j1_k2 = LQs_j1_k1 QQ_k1_k2 -- contract in x direction.
-      // E_j1_j2  = LQ_j1_k2 LQs_j2_k2 -- contract in y direction.
-      mfem::Mult(tensors1D->LQshape1D, QQ, LQ);
-      MultABt(LQ, tensors1D->LQshape1D, E);
-
-      L2FESpace.GetElementDofs(z, l2dofs);
-      vecL2.SetSubVector(l2dofs, e);
-   }
-}
-
-// Transpose force matrix action on hexahedral elements in 3D.
-void ForcePAOperator::MultTransposeHex(const Vector &vecH1, Vector &vecL2) const
-{
-   const int nH1dof1D = tensors1D->HQshape1D.Height(),
-             nL2dof1D = tensors1D->LQshape1D.Height(),
-             nqp1D    = tensors1D->HQshape1D.Width(),
-             nqp      = nqp1D * nqp1D * nqp1D,
-             nH1dof   = nH1dof1D * nH1dof1D * nH1dof1D;
-   Array<int> h1dofs, l2dofs;
-
-   Vector v(nH1dof * 3), e(nL2dof1D * nL2dof1D * nL2dof1D);
-   DenseMatrix V, E(e.GetData(), nL2dof1D * nL2dof1D, nL2dof1D);
-
-   DenseMatrix HH_Q(nH1dof1D * nH1dof1D, nqp1D),
-               H_HQ(HH_Q.GetData(), nH1dof1D, nH1dof1D * nqp1D),
-               Q_HQ(nqp1D, nH1dof1D*nqp1D);
-   DenseMatrix LL_Q(nL2dof1D * nL2dof1D, nqp1D),
-               L_LQ(LL_Q.GetData(), nL2dof1D, nL2dof1D * nqp1D),
-               Q_LQ(nqp1D, nL2dof1D*nqp1D);
-   DenseMatrix QQ_Q(nqp1D * nqp1D, nqp1D),  QQ_Qc(nqp1D * nqp1D, nqp1D);
-   double *qqqc = QQ_Qc.GetData();
-
-   const H1_HexahedronElement *fe =
-      dynamic_cast<const H1_HexahedronElement *>(H1FESpace.GetFE(0));
-   const Array<int> &dof_map = fe->GetDofMap();
-
-   for (int z = 0; z < nzones; z++)
-   {
-      H1FESpace.GetElementVDofs(z, h1dofs);
-
-      // Form (stress:grad_v) at all quadrature points.
-      QQ_Q = 0.0;
-      for (int c = 0; c < 3; c++)
-      {
-         // Transfer from the mfem's H1 local numbering to the tensor structure
-         // numbering.
-         for (int j = 0; j < nH1dof; j++)
-         {
-            v[c*nH1dof + j] = vecH1[h1dofs[c*nH1dof + dof_map[j]]];
-         }
-         // Connect to [v_c], i.e., the c-component of v.
-         V.UseExternalData(v.GetData() + c*nH1dof, nH1dof1D*nH1dof1D, nH1dof1D);
-
-         // HHQ_i1_i2_k3  = V_i1_i2_i3 HQs_i3_k3   -- contract  in z direction.
-         // QHQ_k1_i2_k3  = HQg_i1_k1 HHQ_i1_i2_k3 -- gradients in x direction.
-         // QQQc_k1_k2_k3 = QHQ_k1_i2_k3 HQs_i2_k2 -- contract  in y direction.
-         // The last step does some reordering (it's not product of matrices).
-         mfem::Mult(V, tensors1D->HQshape1D, HH_Q);
-         MultAtB(tensors1D->HQgrad1D, H_HQ, Q_HQ);
-         for (int k1 = 0; k1 < nqp1D; k1++)
-         {
-            for (int k2 = 0; k2 < nqp1D; k2++)
-            {
-               for (int k3 = 0; k3 < nqp1D; k3++)
-               {
-                  QQ_Qc(k1 + nqp1D*k2, k3) = 0.0;
-                  for (int i2 = 0; i2 < nH1dof1D; i2++)
-                  {
-                     QQ_Qc(k1 + nqp1D*k2, k3) += Q_HQ(k1, i2 + k3*nH1dof1D) *
-                                                 tensors1D->HQshape1D(i2, k2);
-                  }
-               }
-            }
-         }
-         // QQQc_k1_k2_k3 *= stress_k1_k2_k3(c,0) -- stress scaling d[v_c]_dx.
-         double *d = quad_data->stressJinvT(c).GetData() + z*nqp;
-         for (int q = 0; q < nqp; q++) { qqqc[q] *= d[q]; };
-         // Add the (stress(c,0) * d[v_c]_dx) part of (stress:grad_v).
-         QQ_Q += QQ_Qc;
-
-         // HHQ_i1_i2_k3  = V_i1_i2_i3 HQs_i3_k3   -- contract  in z direction.
-         // QHQ_k1_i2_k3  = HQs_i1_k1 HHQ_i1_i2_k3 -- contract  in x direction.
-         // QQQc_k1_k2_k3 = QHQ_k1_i2_k3 HQg_i2_k2 -- gradients in y direction.
-         // The last step does some reordering (it's not product of matrices).
-         mfem::Mult(V, tensors1D->HQshape1D, HH_Q);
-         MultAtB(tensors1D->HQshape1D, H_HQ, Q_HQ);
-         for (int k1 = 0; k1 < nqp1D; k1++)
-         {
-            for (int k2 = 0; k2 < nqp1D; k2++)
-            {
-               for (int k3 = 0; k3 < nqp1D; k3++)
-               {
-                  QQ_Qc(k1 + nqp1D*k2, k3) = 0.0;
-                  for (int i2 = 0; i2 < nH1dof1D; i2++)
-                  {
-                     QQ_Qc(k1 + nqp1D*k2, k3) += Q_HQ(k1, i2 + k3*nH1dof1D) *
-                                                 tensors1D->HQgrad1D(i2, k2);
-                  }
-               }
-            }
-         }
-         // QQQc_k1_k2_k3 *= stress_k1_k2_k3(c,1) -- stress scaling d[v_c]_dy.
-         d = quad_data->stressJinvT(c).GetData() + 1*nzones*nqp + z*nqp;
-         for (int q = 0; q < nqp; q++) { qqqc[q] *= d[q]; };
-         // Add the (stress(c,1) * d[v_c]_dy) part of (stress:grad_v).
-         QQ_Q += QQ_Qc;
-
-         // HHQ_i1_i2_k3  = V_i1_i2_i3 HQg_i3_k3   -- gradients in z direction.
-         // QHQ_k1_i2_k3  = HQs_i1_k1 HHQ_i1_i2_k3 -- contract  in x direction.
-         // QQQc_k1_k2_k3 = QHQ_k1_i2_k3 HQs_i2_k2 -- contract  in y direction.
-         // The last step does some reordering (it's not product of matrices).
-         mfem::Mult(V, tensors1D->HQgrad1D, HH_Q);
-         MultAtB(tensors1D->HQshape1D, H_HQ, Q_HQ);
-         for (int k1 = 0; k1 < nqp1D; k1++)
-         {
-            for (int k2 = 0; k2 < nqp1D; k2++)
-            {
-               for (int k3 = 0; k3 < nqp1D; k3++)
-               {
-                  QQ_Qc(k1 + nqp1D*k2, k3) = 0.0;
-                  for (int i2 = 0; i2 < nH1dof1D; i2++)
-                  {
-                     QQ_Qc(k1 + nqp1D*k2, k3) += Q_HQ(k1, i2 + k3*nH1dof1D) *
-                                                 tensors1D->HQshape1D(i2, k2);
-                  }
-               }
-            }
-         }
-         // QQQc_k1_k2_k3 *= stress_k1_k2_k3(c,2) -- stress scaling d[v_c]_dz.
-         d = quad_data->stressJinvT(c).GetData() + 2*nzones*nqp + z*nqp;
-         for (int q = 0; q < nqp; q++) { qqqc[q] *= d[q]; };
-         // Add the (stress(c,2) * d[v_c]_dz) part of (stress:grad_v).
-         QQ_Q += QQ_Qc;
-      }
-
-      // QLQ_k1_j2_k3 = QQQ_k1_k2_k3 LQs_j2_k2 -- contract in y direction.
-      // The first step does some reordering (it's not product of matrices).
-      // LLQ_j1_j2_k3 = LQs_j1_k1 QLQ_k1_j2_k3 -- contract in x direction.
-      // E_j1_j2_i3   = LLQ_j1_j2_k3 LQs_j3_k3 -- contract in z direction.
-      for (int k1 = 0; k1 < nqp1D; k1++)
-      {
-         for (int j2 = 0; j2 < nL2dof1D; j2++)
-         {
-            for (int k3 = 0; k3 < nqp1D; k3++)
-            {
-               Q_LQ(k1, j2 + nL2dof1D*k3) = 0.0;
-               for (int k2 = 0; k2 < nqp1D; k2++)
-               {
-                  Q_LQ(k1, j2 + nL2dof1D*k3) +=
-                     QQ_Q(k1 + nqp1D*k2, k3) * tensors1D->LQshape1D(j2, k2);
-               }
-            }
-         }
-      }
-      mfem::Mult(tensors1D->LQshape1D, Q_LQ, L_LQ);
-      MultABt(LL_Q, tensors1D->LQshape1D, E);
-
-      L2FESpace.GetElementDofs(z, l2dofs);
-      vecL2.SetSubVector(l2dofs, e);
-   }
-}
-
-void MassPAOperator::Mult(const Vector &x, Vector &y) const
-{
-   const int comp_size = FESpace.GetNDofs();
-   for (int c = 0; c < dim; c++)
-   {
-      Vector x_comp(x.GetData() + c * comp_size, comp_size),
-             y_comp(y.GetData() + c * comp_size, comp_size);
-      if      (dim == 2) { MultQuad(x_comp, y_comp); }
-      else if (dim == 3) { MultHex(x_comp, y_comp); }
-      else { MFEM_ABORT("Unsupported dimension"); }
-   }
-}
-
-// Mass matrix action on quadrilateral elements in 2D.
-void MassPAOperator::MultQuad(const Vector &x, Vector &y) const
-{
-   const H1_QuadrilateralElement *fe_H1 =
-      dynamic_cast<const H1_QuadrilateralElement *>(FESpace.GetFE(0));
-   const DenseMatrix &HQs = tensors1D->HQshape1D;
-
-   const int ndof1D = HQs.Height(), nqp1D = HQs.Width();
-   DenseMatrix HQ(ndof1D, nqp1D), QQ(nqp1D, nqp1D);
-   Vector xz(ndof1D * ndof1D), yz(ndof1D * ndof1D);
-   DenseMatrix X(xz.GetData(), ndof1D, ndof1D),
-               Y(yz.GetData(), ndof1D, ndof1D);
-   Array<int> dofs;
-   double *qq = QQ.GetData();
-   const int nqp = nqp1D * nqp1D;
-
-   y.SetSize(x.Size());
-   y = 0.0;
-
-   for (int z = 0; z < nzones; z++)
-   {
-      FESpace.GetElementDofs(z, dofs);
-      // Transfer from the mfem's H1 local numbering to the tensor structure
-      // numbering.
-      const Array<int> &dof_map = fe_H1->GetDofMap();
-      for (int j = 0; j < xz.Size(); j++)
-      {
-         xz[j] = x[dofs[dof_map[j]]];
-      }
-
-      // HQ_i1_k2 = X_i1_i2 HQs_i2_k2  -- contract in y direction.
-      // QQ_k1_k2 = HQs_i1_k1 HQ_i1_k2 -- contract in x direction.
-      mfem::Mult(X, HQs, HQ);
-      MultAtB(HQs, HQ, QQ);
-
-      // QQ_k1_k2 *= quad_data_k1_k2 -- scaling with quadrature values.
-      double *d = quad_data->rho0DetJ0w.GetData() + z*nqp;
-      for (int q = 0; q < nqp; q++) { qq[q] *= d[q]; }
-
-      // HQ_i1_k2 = HQs_i1_k1 QQ_k1_k2 -- contract in x direction.
-      // Y_i1_i2  = HQ_i1_k2 HQs_i2_k2 -- contract in y direction.
-      mfem::Mult(HQs, QQ, HQ);
-      MultABt(HQ, HQs, Y);
-
-      for (int j = 0; j < yz.Size(); j++)
-      {
-         y[dofs[dof_map[j]]] += yz[j];
-      }
-   }
-}
-
-// Mass matrix action on hexahedral elements in 3D.
-void MassPAOperator::MultHex(const Vector &x, Vector &y) const
-{
-   const H1_HexahedronElement *fe_H1 =
-      dynamic_cast<const H1_HexahedronElement *>(FESpace.GetFE(0));
-   const DenseMatrix &HQs = tensors1D->HQshape1D;
-
-   const int ndof1D = HQs.Height(), nqp1D = HQs.Width();
-   DenseMatrix HH_Q(ndof1D * ndof1D, nqp1D);
-   DenseMatrix H_HQ(HH_Q.GetData(), ndof1D, ndof1D*nqp1D);
-   DenseMatrix Q_HQ(nqp1D, ndof1D*nqp1D);
-   DenseMatrix QQ_Q(nqp1D*nqp1D, nqp1D);
-   double *qqq = QQ_Q.GetData();
-   Vector xz(ndof1D * ndof1D * ndof1D), yz(ndof1D * ndof1D * ndof1D);
-   DenseMatrix X(xz.GetData(), ndof1D*ndof1D, ndof1D),
-               Y(yz.GetData(), ndof1D*ndof1D, ndof1D);
-   const int nqp = nqp1D * nqp1D * nqp1D;
-   Array<int> dofs;
-
-   y.SetSize(x.Size());
-   y = 0.0;
-
-   for (int z = 0; z < nzones; z++)
-   {
-      FESpace.GetElementDofs(z, dofs);
-      // Transfer from the mfem's H1 local numbering to the tensor structure
-      // numbering.
-      const Array<int> &dof_map = fe_H1->GetDofMap();
-      for (int j = 0; j < xz.Size(); j++)
-      {
-         xz[j] = x[dofs[dof_map[j]]];
-      }
-
-      // HHQ_i1_i2_k3  = X_i1_i2_i3 HQs_i3_k3   -- contract in z direction.
-      // QHQ_k1_i2_k3  = HQs_i1_k1 HHQ_i1_i2_k3 -- contract in x direction.
-      // QQQ_k1_k2_k3  = QHQ_k1_i2_k3 HQs_i2_k2 -- contract in y direction.
-      // The last step does some reordering (it's not product of matrices).
-      mfem::Mult(X, HQs, HH_Q);
-      MultAtB(HQs, H_HQ, Q_HQ);
-      for (int k1 = 0; k1 < nqp1D; k1++)
-      {
-         for (int k2 = 0; k2 < nqp1D; k2++)
-         {
-            for (int k3 = 0; k3 < nqp1D; k3++)
-            {
-               QQ_Q(k1 + nqp1D*k2, k3) = 0.0;
-               for (int i2 = 0; i2 < ndof1D; i2++)
-               {
-                  QQ_Q(k1 + nqp1D*k2, k3) +=
-                     Q_HQ(k1, i2 + k3*ndof1D) * HQs(i2, k2);
-               }
-            }
-         }
-      }
-
-      // QQQ_k1_k2_k3 *= quad_data_k1_k2_k3 -- scaling with quadrature values.
-      double *d = quad_data->rho0DetJ0w.GetData() + z*nqp;
-      for (int q = 0; q < nqp; q++) { qqq[q] *= d[q]; }
-
-      // QHQ_k1_i2_k3 = QQQ_k1_k2_k3 HQs_i2_k2 -- contract in y direction.
-      // The first step does some reordering (it's not product of matrices).
-      // HHQ_i1_i2_k3 = HQs_i1_k1 QHQ_k1_i2_k3 -- contract in x direction.
-      // Y_i1_i2_i3   = HHQ_i1_i2_k3 HQs_i3_k3 -- contract in z direction.
-      for (int k1 = 0; k1 < nqp1D; k1++)
-      {
-         for (int i2 = 0; i2 < ndof1D; i2++)
-         {
-            for (int k3 = 0; k3 < nqp1D; k3++)
-            {
-               Q_HQ(k1, i2 + ndof1D*k3) = 0.0;
-               for (int k2 = 0; k2 < nqp1D; k2++)
-               {
-                  Q_HQ(k1, i2 + ndof1D*k3) +=
-                     QQ_Q(k1 + nqp1D*k2, k3) * HQs(i2, k2);
-               }
-            }
-         }
-      }
-      mfem::Mult(HQs, Q_HQ, H_HQ);
-      MultABt(HH_Q, HQs, Y);
-
-      for (int j = 0; j < yz.Size(); j++)
-      {
-         y[dofs[dof_map[j]]] += yz[j];
-      }
-   }
-}
-
-void LocalMassPAOperator::Mult(const Vector &x, Vector &y) const
-{
-   if      (dim == 2) { MultQuad(x, y); }
-   else if (dim == 3) { MultHex(x, y); }
-   else { MFEM_ABORT("Unsupported dimension"); }
-}
-
-// L2 mass matrix action on a single quadrilateral element in 2D.
-void LocalMassPAOperator::MultQuad(const Vector &x, Vector &y) const
-{
-   const DenseMatrix &LQs = tensors1D->LQshape1D;
-
-   y.SetSize(x.Size());
-   y = 0.0;
-
-   const int ndof1D = LQs.Height(), nqp1D = LQs.Width();
-   DenseMatrix LQ(ndof1D, nqp1D), QQ(nqp1D, nqp1D);
-   DenseMatrix X(x.GetData(), ndof1D, ndof1D), Y(y.GetData(), ndof1D, ndof1D);
-   double *qq = QQ.GetData();
-   const int nqp = nqp1D * nqp1D;
-
-   // LQ_i1_k2 = X_i1_i2 LQs_i2_k2  -- contract in y direction.
-   // QQ_k1_k2 = LQs_i1_k1 LQ_i1_k2 -- contract in x direction.
-   mfem::Mult(X, LQs, LQ);
-   MultAtB(LQs, LQ, QQ);
-
-   // QQ_k1_k2 *= quad_data_k1_k2 -- scaling with quadrature values.
-   const double *d = quad_data->rho0DetJ0w.GetData() + zone_id*nqp;
-   for (int q = 0; q < nqp; q++) { qq[q] *= d[q]; }
-
-   // LQ_i1_k2 = LQs_i1_k1 QQ_k1_k2 -- contract in x direction.
-   // Y_i1_i2  = LQ_i1_k2 LQs_i2_k2 -- contract in y direction.
-   mfem::Mult(LQs, QQ, LQ);
-   MultABt(LQ, LQs, Y);
-}
-
-// L2 mass matrix action on a single hexahedral element in 3D.
-void LocalMassPAOperator::MultHex(const Vector &x, Vector &y) const
-{
-   const DenseMatrix &LQs = tensors1D->LQshape1D;
-
-   y.SetSize(x.Size());
-   y = 0.0;
-
-   const int ndof1D = LQs.Height(), nqp1D = LQs.Width();
-   DenseMatrix LL_Q(ndof1D * ndof1D, nqp1D);
-   DenseMatrix L_LQ(LL_Q.GetData(), ndof1D, ndof1D*nqp1D);
-   DenseMatrix Q_LQ(nqp1D, ndof1D*nqp1D);
-   DenseMatrix QQ_Q(nqp1D*nqp1D, nqp1D);
-   double *qqq = QQ_Q.GetData();
-   DenseMatrix X(x.GetData(), ndof1D*ndof1D, ndof1D),
-               Y(y.GetData(), ndof1D*ndof1D, ndof1D);
-   const int nqp = nqp1D * nqp1D * nqp1D;
-
-   // LLQ_i1_i2_k3  = X_i1_i2_i3 LQs_i3_k3   -- contract in z direction.
-   // QLQ_k1_i2_k3  = LQs_i1_k1 LLQ_i1_i2_k3 -- contract in x direction.
-   // QQQ_k1_k2_k3  = QLQ_k1_i2_k3 LQs_i2_k2 -- contract in y direction.
-   // The last step does some reordering (it's not product of matrices).
-   mfem::Mult(X, LQs, LL_Q);
-   MultAtB(LQs, L_LQ, Q_LQ);
-   for (int k1 = 0; k1 < nqp1D; k1++)
-   {
-      for (int k2 = 0; k2 < nqp1D; k2++)
-      {
-         for (int k3 = 0; k3 < nqp1D; k3++)
-         {
-            QQ_Q(k1 + nqp1D*k2, k3) = 0.0;
-            for (int i2 = 0; i2 < ndof1D; i2++)
-            {
-               QQ_Q(k1 + nqp1D*k2, k3) +=
-                  Q_LQ(k1, i2 + k3*ndof1D) * LQs(i2, k2);
-            }
-         }
-      }
-   }
-
-   // QQQ_k1_k2_k3 *= quad_data_k1_k2_k3 -- scaling with quadrature values.
-   double *d = quad_data->rho0DetJ0w.GetData() + zone_id*nqp;
-   for (int q = 0; q < nqp; q++) { qqq[q] *= d[q]; }
-
-   // QLQ_k1_i2_k3 = QQQ_k1_k2_k3 LQs_i2_k2 -- contract in y direction.
-   // The first step does some reordering (it's not product of matrices).
-   // LLQ_i1_i2_k3 = LQs_i1_k1 QLQ_k1_i2_k3 -- contract in x direction.
-   // Y_i1_i2_i3   = LLQ_i1_i2_k3 DQs_i3_k3 -- contract in z direction.
-   for (int k1 = 0; k1 < nqp1D; k1++)
-   {
-      for (int i2 = 0; i2 < ndof1D; i2++)
-      {
-         for (int k3 = 0; k3 < nqp1D; k3++)
-         {
-            Q_LQ(k1, i2 + ndof1D*k3) = 0.0;
-            for (int k2 = 0; k2 < nqp1D; k2++)
-            {
-               Q_LQ(k1, i2 + ndof1D*k3) +=
-                  QQ_Q(k1 + nqp1D*k2, k3) * LQs(i2, k2);
-            }
-         }
-      }
-   }
-   mfem::Mult(LQs, Q_LQ, L_LQ);
-   MultABt(LL_Q, LQs, Y);
-}
-
-} // namespace hydrodynamics
-
-} // namespace mfem
-
-#endif // MFEM_USE_MPI
diff --git a/amr/laghos_assembly.hpp b/amr/laghos_assembly.hpp
deleted file mode 100644
index 012c277f..00000000
--- a/amr/laghos_assembly.hpp
+++ /dev/null
@@ -1,230 +0,0 @@
-// Copyright (c) 2017, Lawrence Livermore National Security, LLC. Produced at
-// the Lawrence Livermore National Laboratory. LLNL-CODE-734707. All Rights
-// reserved. See files LICENSE and NOTICE for details.
-//
-// This file is part of CEED, a collection of benchmarks, miniapps, software
-// libraries and APIs for efficient high-order finite element and spectral
-// element discretizations for exascale applications. For more information and
-// source code availability see http://github.com/ceed.
-//
-// The CEED research is supported by the Exascale Computing Project 17-SC-20-SC,
-// a collaborative effort of two U.S. Department of Energy organizations (Office
-// of Science and the National Nuclear Security Administration) responsible for
-// the planning and preparation of a capable exascale ecosystem, including
-// software, applications, hardware, advanced system engineering and early
-// testbed platforms, in support of the nation's exascale computing imperative.
-
-#ifndef MFEM_LAGHOS_ASSEMBLY
-#define MFEM_LAGHOS_ASSEMBLY
-
-#include "mfem.hpp"
-
-
-#ifdef MFEM_USE_MPI
-
-#include <memory>
-#include <iostream>
-
-namespace mfem
-{
-
-namespace hydrodynamics
-{
-
-// Container for all data needed at quadrature points.
-struct QuadratureData
-{
-   // TODO: use QuadratureFunctions?
-
-   // Reference to physical Jacobian for the initial mesh. These are computed
-   // only at time zero and stored here.
-   DenseTensor Jac0inv;
-
-   // Quadrature data used for full/partial assembly of the force operator. At
-   // each quadrature point, it combines the stress, inverse Jacobian,
-   // determinant of the Jacobian and the integration weight. It must be
-   // recomputed in every time step.
-   DenseTensor stressJinvT;
-
-   // Quadrature data used for full/partial assembly of the mass matrices. At
-   // time zero, we compute and store (rho0 * det(J0) * qp_weight) at each
-   // quadrature point. Note the at any other time, we can compute
-   // rho = rho0 * det(J0) / det(J), representing the notion of pointwise mass
-   // conservation.
-   Vector rho0DetJ0w;
-
-   // Initial length scale. This represents a notion of local mesh size. We
-   // assume that all initial zones have similar size.
-   double h0;
-
-   // Estimate of the minimum time step over all quadrature points. This is
-   // recomputed at every time step to achieve adaptive time stepping.
-   double dt_est;
-
-   QuadratureData(int dim, int nzones, int quads_per_zone)
-      : Jac0inv(dim, dim, nzones * quads_per_zone),
-        stressJinvT(nzones * quads_per_zone, dim, dim),
-        rho0DetJ0w(nzones * quads_per_zone) { }
-
-   void Resize(int dim, int nzones, int quads_per_zone)
-   {
-      Jac0inv.SetSize(dim, dim, nzones * quads_per_zone);
-      stressJinvT.SetSize(nzones * quads_per_zone, dim, dim);
-      rho0DetJ0w.SetSize(nzones * quads_per_zone);
-   }
-};
-
-// Stores values of the one-dimensional shape functions and gradients at all 1D
-// quadrature points. All sizes are (dofs1D_cnt x quads1D_cnt).
-struct Tensors1D
-{
-   // H1 shape functions and gradients, L2 shape functions.
-   DenseMatrix HQshape1D, HQgrad1D, LQshape1D;
-
-   Tensors1D(int H1order, int L2order, int nqp1D);
-};
-extern const Tensors1D *tensors1D;
-
-class FastEvaluator
-{
-   const int dim;
-   ParFiniteElementSpace &H1FESpace;
-
-public:
-   FastEvaluator(ParFiniteElementSpace &h1fes)
-      : dim(h1fes.GetMesh()->Dimension()), H1FESpace(h1fes) { }
-
-   void GetL2Values(const Vector &vecL2, Vector &vecQP) const;
-   // The input vec is an H1 function with dim components, over a zone.
-   // The output is J_ij = d(vec_i) / d(x_j) with ij = 1 .. dim.
-   void GetVectorGrad(const DenseMatrix &vec, DenseTensor &J) const;
-};
-extern const FastEvaluator *evaluator;
-
-// This class is used only for visualization. It assembles (rho, phi) in each
-// zone, which is used by LagrangianHydroOperator::ComputeDensity to do an L2
-// projection of the density.
-class DensityIntegrator : public LinearFormIntegrator
-{
-private:
-   const QuadratureData &quad_data;
-
-public:
-   DensityIntegrator(QuadratureData &quad_data_) : quad_data(quad_data_) { }
-
-   virtual void AssembleRHSElementVect(const FiniteElement &fe,
-                                       ElementTransformation &Tr,
-                                       Vector &elvect);
-};
-
-// Assembles element contributions to the global force matrix. This class is
-// used for the full assembly case; it's not used with partial assembly.
-class ForceIntegrator : public BilinearFormIntegrator
-{
-private:
-   const QuadratureData &quad_data;
-
-public:
-   ForceIntegrator(QuadratureData &quad_data_) : quad_data(quad_data_) { }
-
-   virtual void AssembleElementMatrix2(const FiniteElement &trial_fe,
-                                       const FiniteElement &test_fe,
-                                       ElementTransformation &Trans,
-                                       DenseMatrix &elmat);
-};
-
-// Performs partial assembly, which corresponds to (and replaces) the use of the
-// LagrangianHydroOperator::Force global matrix.
-class ForcePAOperator : public Operator
-{
-private:
-   const int dim, nzones;
-
-   QuadratureData *quad_data;
-   ParFiniteElementSpace &H1FESpace, &L2FESpace;
-
-   // Force matrix action on quadrilateral elements in 2D.
-   void MultQuad(const Vector &vecL2, Vector &vecH1) const;
-   // Force matrix action on hexahedral elements in 3D.
-   void MultHex(const Vector &vecL2, Vector &vecH1) const;
-
-   // Transpose force matrix action on quadrilateral elements in 2D.
-   void MultTransposeQuad(const Vector &vecH1, Vector &vecL2) const;
-   // Transpose force matrix action on hexahedral elements in 3D.
-   void MultTransposeHex(const Vector &vecH1, Vector &vecL2) const;
-
-public:
-   ForcePAOperator(QuadratureData *quad_data_,
-                   ParFiniteElementSpace &h1fes, ParFiniteElementSpace &l2fes)
-      : dim(h1fes.GetMesh()->Dimension()), nzones(h1fes.GetMesh()->GetNE()),
-        quad_data(quad_data_), H1FESpace(h1fes), L2FESpace(l2fes) { }
-
-   virtual void Mult(const Vector &vecL2, Vector &vecH1) const;
-   virtual void MultTranspose(const Vector &vecH1, Vector &vecL2) const;
-
-   ~ForcePAOperator() { }
-};
-
-// Performs partial assembly for the velocity mass matrix.
-class MassPAOperator : public Operator
-{
-private:
-   const int dim, nzones;
-
-   QuadratureData *quad_data;
-   ParFiniteElementSpace &FESpace;
-
-   // Mass matrix action on quadrilateral elements in 2D.
-   void MultQuad(const Vector &x, Vector &y) const;
-   // Mass matrix action on hexahedral elements in 3D.
-   void MultHex(const Vector &x, Vector &y) const;
-
-public:
-   MassPAOperator(QuadratureData *quad_data_, ParFiniteElementSpace &fes)
-      : Operator(fes.GetVSize()),
-        dim(fes.GetMesh()->Dimension()), nzones(fes.GetMesh()->GetNE()),
-        quad_data(quad_data_), FESpace(fes)
-   { }
-
-   // Mass matrix action.
-   virtual void Mult(const Vector &x, Vector &y) const;
-
-   virtual const Operator *GetProlongation() const
-   { return FESpace.GetProlongationMatrix(); }
-   virtual const Operator *GetRestriction() const
-   { return FESpace.GetRestrictionMatrix(); }
-};
-
-// Performs partial assembly for the energy mass matrix on a single zone.
-// Used to perform local CG solves, thus avoiding unnecessary communication.
-class LocalMassPAOperator : public Operator
-{
-private:
-   const int dim;
-   int zone_id;
-
-   QuadratureData *quad_data;
-
-   // Mass matrix action on a quadrilateral element in 2D.
-   void MultQuad(const Vector &x, Vector &y) const;
-   // Mass matrix action on a hexahedral element in 3D.
-   void MultHex(const Vector &x, Vector &y) const;
-
-public:
-   LocalMassPAOperator(QuadratureData *quad_data_, ParFiniteElementSpace &fes)
-      : Operator(fes.GetFE(0)->GetDof()),
-        dim(fes.GetMesh()->Dimension()), zone_id(0),
-        quad_data(quad_data_)
-   { }
-   void SetZoneId(int zid) { zone_id = zid; }
-
-   virtual void Mult(const Vector &x, Vector &y) const;
-};
-
-} // namespace hydrodynamics
-
-} // namespace mfem
-
-#endif // MFEM_USE_MPI
-
-#endif // MFEM_LAGHOS_ASSEMBLY
diff --git a/amr/laghos_solver.cpp b/amr/laghos_solver.cpp
deleted file mode 100644
index b50dd0e7..00000000
--- a/amr/laghos_solver.cpp
+++ /dev/null
@@ -1,737 +0,0 @@
-// Copyright (c) 2017, Lawrence Livermore National Security, LLC. Produced at
-// the Lawrence Livermore National Laboratory. LLNL-CODE-734707. All Rights
-// reserved. See files LICENSE and NOTICE for details.
-//
-// This file is part of CEED, a collection of benchmarks, miniapps, software
-// libraries and APIs for efficient high-order finite element and spectral
-// element discretizations for exascale applications. For more information and
-// source code availability see http://github.com/ceed.
-//
-// The CEED research is supported by the Exascale Computing Project 17-SC-20-SC,
-// a collaborative effort of two U.S. Department of Energy organizations (Office
-// of Science and the National Nuclear Security Administration) responsible for
-// the planning and preparation of a capable exascale ecosystem, including
-// software, applications, hardware, advanced system engineering and early
-// testbed platforms, in support of the nation's exascale computing imperative.
-
-#include "laghos_solver.hpp"
-
-#ifdef MFEM_USE_MPI
-
-using namespace std;
-
-namespace mfem
-{
-
-namespace hydrodynamics
-{
-
-void VisualizeField(socketstream &sock, const char *vishost, int visport,
-                    ParGridFunction &gf, const char *title,
-                    int x, int y, int w, int h, bool vec)
-{
-   ParMesh &pmesh = *gf.ParFESpace()->GetParMesh();
-   MPI_Comm comm = pmesh.GetComm();
-
-   int num_procs, myid;
-   MPI_Comm_size(comm, &num_procs);
-   MPI_Comm_rank(comm, &myid);
-
-   bool newly_opened = false;
-   int connection_failed;
-
-   do
-   {
-      if (myid == 0)
-      {
-         if (!sock.is_open() || !sock)
-         {
-            sock.open(vishost, visport);
-            newly_opened = true;
-         }
-         sock << "solution\n";
-      }
-
-      sock.precision(8);
-      pmesh.PrintAsOne(sock);
-      gf.SaveAsOne(sock);
-
-      if (myid == 0 && newly_opened)
-      {
-         const char* keys = (gf.FESpace()->GetMesh()->Dimension() == 2)
-                            ? "mAcRjlPPPPPPPP" : "maaAcl";
-
-         sock << "window_title '" << title << "'\n"
-              << "window_geometry "
-              << x << " " << y << " " << w << " " << h << "\n"
-              << "keys " << keys;
-         if ( vec ) { sock << "vvv"; }
-         sock << endl;
-      }
-
-      if (myid == 0)
-      {
-         connection_failed = !sock && !newly_opened;
-      }
-      MPI_Bcast(&connection_failed, 1, MPI_INT, 0, comm);
-   }
-   while (connection_failed);
-}
-
-void VisualizeElementValues(socketstream &sock, const char *vishost,
-                            int visport,
-                            ParMesh* pmesh, Vector &values, const char *title,
-                            int x, int y, int w, int h)
-{
-   L2_FECollection l2_fec(0, pmesh->Dimension());
-   ParFiniteElementSpace l2_fes(pmesh, &l2_fec);
-   ParGridFunction l2_gf(&l2_fes);
-   l2_gf = values;
-   VisualizeField(sock, vishost, visport, l2_gf, title, x, y, w, h);
-}
-
-
-LagrangianHydroOperator::LagrangianHydroOperator(int size,
-                                                 ParFiniteElementSpace &h1_fes,
-                                                 ParFiniteElementSpace &l2_fes,
-                                                 Array<int> &essential_tdofs,
-                                                 ParGridFunction &rho0,
-                                                 int source_type_, double cfl_,
-                                                 Coefficient *material_,
-                                                 bool visc, bool pa,
-                                                 double cgt, int cgiter)
-   : TimeDependentOperator(size),
-     H1FESpace(h1_fes), L2FESpace(l2_fes),
-     ess_tdofs(essential_tdofs),
-     nzones(h1_fes.GetMesh()->GetNE()),
-     dim(h1_fes.GetMesh()->Dimension()),
-     l2dofs_cnt(l2_fes.GetFE(0)->GetDof()),
-     h1dofs_cnt(h1_fes.GetFE(0)->GetDof()),
-     source_type(source_type_), cfl(cfl_),
-     use_viscosity(visc), p_assembly(pa), cg_rel_tol(cgt), cg_max_iter(cgiter),
-     material_pcf(material_),
-     rho0(rho0),
-     rho0_coeff(&rho0),
-     x0_gf(&h1_fes),
-     Mv(&h1_fes), Me_inv(l2dofs_cnt, l2dofs_cnt, nzones),
-     integ_rule(IntRules.Get(h1_fes.GetMesh()->GetElementBaseGeometry(0),
-                             3*h1_fes.GetOrder(0) + l2_fes.GetOrder(0) - 1)),
-     quad_data(dim, nzones, integ_rule.GetNPoints()),
-     quad_data_is_current(false),
-     Force(&l2_fes, &h1_fes), ForcePA(&quad_data, h1_fes, l2_fes),
-     VMassPA(&quad_data, H1FESpace), locEMassPA(&quad_data, l2_fes),
-     locCG(), timer()
-{
-   // Standard local assembly and inversion for energy mass matrices.
-   DenseMatrix Me(l2dofs_cnt);
-   DenseMatrixInverse inv(&Me);
-   MassIntegrator mi(rho0_coeff, &integ_rule);
-   for (int i = 0; i < nzones; i++)
-   {
-      mi.AssembleElementMatrix(*l2_fes.GetFE(i),
-                               *l2_fes.GetElementTransformation(i), Me);
-      inv.Factor();
-      inv.GetInverseMatrix(Me_inv(i));
-   }
-
-   // Standard assembly for the velocity mass matrix.
-   VectorMassIntegrator *vmi = new VectorMassIntegrator(rho0_coeff, &integ_rule);
-   Mv.AddDomainIntegrator(vmi);
-   Mv.Assemble();
-
-   // Values of rho0DetJ0 and Jac0inv at all quadrature points.
-   const int nqp = integ_rule.GetNPoints();
-   Vector rho_vals(nqp);
-   for (int i = 0; i < nzones; i++)
-   {
-      rho0.GetValues(i, integ_rule, rho_vals);
-      ElementTransformation *T = h1_fes.GetElementTransformation(i);
-      for (int q = 0; q < nqp; q++)
-      {
-         const IntegrationPoint &ip = integ_rule.IntPoint(q);
-         T->SetIntPoint(&ip);
-
-         DenseMatrixInverse Jinv(T->Jacobian());
-         Jinv.GetInverseMatrix(quad_data.Jac0inv(i*nqp + q));
-
-         const double rho0DetJ0 = T->Weight() * rho_vals(q);
-         quad_data.rho0DetJ0w(i*nqp + q) = rho0DetJ0 *
-                                           integ_rule.IntPoint(q).weight;
-      }
-   }
-
-   // Save initial (undeformed) mesh configuration for use in AMRUpdate later.
-   x0_gf = *(h1_fes.GetMesh()->GetNodes());
-
-   // Initial local mesh size (assumes all mesh elements are of the same type).
-   double loc_area = 0.0, glob_area;
-   int loc_z_cnt = nzones, glob_z_cnt;
-   ParMesh *pm = H1FESpace.GetParMesh();
-   for (int i = 0; i < nzones; i++) { loc_area += pm->GetElementVolume(i); }
-   MPI_Allreduce(&loc_area, &glob_area, 1, MPI_DOUBLE, MPI_SUM, pm->GetComm());
-   MPI_Allreduce(&loc_z_cnt, &glob_z_cnt, 1, MPI_INT, MPI_SUM, pm->GetComm());
-   switch (pm->GetElementBaseGeometry(0))
-   {
-      case Geometry::SEGMENT:
-         quad_data.h0 = glob_area / glob_z_cnt; break;
-      case Geometry::SQUARE:
-         quad_data.h0 = sqrt(glob_area / glob_z_cnt); break;
-      case Geometry::TRIANGLE:
-         quad_data.h0 = sqrt(2.0 * glob_area / glob_z_cnt); break;
-      case Geometry::CUBE:
-         quad_data.h0 = pow(glob_area / glob_z_cnt, 1.0/3.0); break;
-      case Geometry::TETRAHEDRON:
-         quad_data.h0 = pow(6.0 * glob_area / glob_z_cnt, 1.0/3.0); break;
-      default: MFEM_ABORT("Unknown zone type!");
-   }
-   quad_data.h0 /= (double) H1FESpace.GetOrder(0);
-
-   ForceIntegrator *fi = new ForceIntegrator(quad_data);
-   fi->SetIntRule(&integ_rule);
-   Force.AddDomainIntegrator(fi);
-   // Make a dummy assembly to figure out the sparsity.
-   Force.Assemble(0);
-   Force.Finalize(0);
-
-   if (p_assembly)
-   {
-      tensors1D = new Tensors1D(H1FESpace.GetFE(0)->GetOrder(),
-                                L2FESpace.GetFE(0)->GetOrder(),
-                                int(floor(0.7 + pow(nqp, 1.0 / dim))));
-      evaluator = new FastEvaluator(H1FESpace);
-   }
-
-   locCG.SetOperator(locEMassPA);
-   locCG.iterative_mode = false;
-   locCG.SetRelTol(1e-8);
-   locCG.SetAbsTol(1e-8 * numeric_limits<double>::epsilon());
-   locCG.SetMaxIter(200);
-   locCG.SetPrintLevel(0);
-}
-
-void LagrangianHydroOperator::Mult(const Vector &S, Vector &dS_dt) const
-{
-   dS_dt = 0.0;
-
-   // Make sure that the mesh positions correspond to the ones in S. This is
-   // needed only because some mfem time integrators don't update the solution
-   // vector at every intermediate stage (hence they don't change the mesh).
-   Vector* sptr = (Vector*) &S;
-   ParGridFunction x;
-   x.MakeRef(&H1FESpace, *sptr, 0);
-   H1FESpace.GetParMesh()->NewNodes(x, false);
-
-   UpdateQuadratureData(S);
-
-   // The monolithic BlockVector stores the unknown fields as follows:
-   // - Position
-   // - Velocity
-   // - Specific Internal Energy
-
-   const int VsizeL2 = L2FESpace.GetVSize();
-   const int VsizeH1 = H1FESpace.GetVSize();
-
-   ParGridFunction v, e;
-   v.MakeRef(&H1FESpace, *sptr, VsizeH1);
-   e.MakeRef(&L2FESpace, *sptr, VsizeH1*2);
-
-   ParGridFunction dx, dv, de;
-   dx.MakeRef(&H1FESpace, dS_dt, 0);
-   dv.MakeRef(&H1FESpace, dS_dt, VsizeH1);
-   de.MakeRef(&L2FESpace, dS_dt, VsizeH1*2);
-
-   // Set dx_dt = v (explicit).
-   dx = v;
-
-   if (!p_assembly)
-   {
-      //Force = 0.0;
-      Force.Update();
-      timer.sw_force.Start();
-      Force.Assemble();
-      timer.sw_force.Stop();
-   }
-
-   // Solve for velocity.
-   Vector one(VsizeL2), rhs(VsizeH1), B, X; one = 1.0;
-   if (p_assembly)
-   {
-      timer.sw_force.Start();
-      ForcePA.Mult(one, rhs);
-      timer.sw_force.Stop();
-      rhs.Neg();
-
-      Operator *cVMassPA;
-      VMassPA.FormLinearSystem(ess_tdofs, dv, rhs, cVMassPA, X, B);
-      CGSolver cg(H1FESpace.GetParMesh()->GetComm());
-      cg.SetOperator(*cVMassPA);
-      cg.SetRelTol(cg_rel_tol); cg.SetAbsTol(0.0);
-      cg.SetMaxIter(cg_max_iter);
-      cg.SetPrintLevel(0);
-      timer.sw_cgH1.Start();
-      cg.Mult(B, X);
-      timer.sw_cgH1.Stop();
-      timer.H1cg_iter += cg.GetNumIterations();
-      VMassPA.RecoverFEMSolution(X, rhs, dv);
-      delete cVMassPA;
-   }
-   else
-   {
-      timer.sw_force.Start();
-      Force.Mult(one, rhs);
-      timer.sw_force.Stop();
-      rhs.Neg();
-
-      HypreParMatrix A;
-      Mv.FormLinearSystem(ess_tdofs, dv, rhs, A, X, B);
-      CGSolver cg(H1FESpace.GetParMesh()->GetComm());
-      cg.SetOperator(A);
-      cg.SetRelTol(cg_rel_tol); cg.SetAbsTol(0.0);
-      cg.SetMaxIter(cg_max_iter);
-      cg.SetPrintLevel(-1);
-      timer.sw_cgH1.Start();
-      cg.Mult(B, X);
-      timer.sw_cgH1.Stop();
-      timer.H1cg_iter += cg.GetNumIterations();
-      Mv.RecoverFEMSolution(X, rhs, dv);
-   }
-
-   // Solve for energy, assemble the energy source if such exists.
-   LinearForm *e_source = NULL;
-   if (source_type == 1) // 2D Taylor-Green.
-   {
-      e_source = new LinearForm(&L2FESpace);
-      TaylorCoefficient coeff;
-      DomainLFIntegrator *d = new DomainLFIntegrator(coeff, &integ_rule);
-      e_source->AddDomainIntegrator(d);
-      e_source->Assemble();
-   }
-   Array<int> l2dofs;
-   Vector e_rhs(VsizeL2), loc_rhs(l2dofs_cnt), loc_de(l2dofs_cnt);
-   if (p_assembly)
-   {
-      timer.sw_force.Start();
-      ForcePA.MultTranspose(v, e_rhs);
-      timer.sw_force.Stop();
-
-      if (e_source) { e_rhs += *e_source; }
-      for (int z = 0; z < nzones; z++)
-      {
-         L2FESpace.GetElementDofs(z, l2dofs);
-         e_rhs.GetSubVector(l2dofs, loc_rhs);
-         locEMassPA.SetZoneId(z);
-         timer.sw_cgL2.Start();
-         locCG.Mult(loc_rhs, loc_de);
-         timer.sw_cgL2.Stop();
-         timer.L2dof_iter += locCG.GetNumIterations() * l2dofs_cnt;
-         de.SetSubVector(l2dofs, loc_de);
-      }
-   }
-   else
-   {
-      timer.sw_force.Start();
-      Force.MultTranspose(v, e_rhs);
-      timer.sw_force.Stop();
-      if (e_source) { e_rhs += *e_source; }
-      for (int z = 0; z < nzones; z++)
-      {
-         L2FESpace.GetElementDofs(z, l2dofs);
-         e_rhs.GetSubVector(l2dofs, loc_rhs);
-         timer.sw_cgL2.Start();
-         Me_inv(z).Mult(loc_rhs, loc_de);
-         timer.sw_cgL2.Stop();
-         timer.L2dof_iter += l2dofs_cnt;
-         de.SetSubVector(l2dofs, loc_de);
-      }
-   }
-   delete e_source;
-
-   quad_data_is_current = false;
-}
-
-double LagrangianHydroOperator::GetTimeStepEstimate(const Vector &S) const
-{
-   Vector* sptr = (Vector*) &S;
-   ParGridFunction x;
-   x.MakeRef(&H1FESpace, *sptr, 0);
-   H1FESpace.GetParMesh()->NewNodes(x, false);
-   UpdateQuadratureData(S);
-
-   double glob_dt_est;
-   MPI_Allreduce(&quad_data.dt_est, &glob_dt_est, 1, MPI_DOUBLE, MPI_MIN,
-                 H1FESpace.GetParMesh()->GetComm());
-   return glob_dt_est;
-}
-
-void LagrangianHydroOperator::ResetTimeStepEstimate() const
-{
-   quad_data.dt_est = numeric_limits<double>::infinity();
-}
-
-void LagrangianHydroOperator::ComputeDensity(ParGridFunction &rho)
-{
-   rho.SetSpace(&L2FESpace);
-
-   DenseMatrix Mrho(l2dofs_cnt);
-   Vector rhs(l2dofs_cnt), rho_z(l2dofs_cnt);
-   Array<int> dofs(l2dofs_cnt);
-   DenseMatrixInverse inv(&Mrho);
-
-   MassIntegrator mi(&integ_rule);
-   DensityIntegrator di(quad_data);
-   di.SetIntRule(&integ_rule);
-
-   for (int i = 0; i < nzones; i++)
-   {
-      di.AssembleRHSElementVect(*L2FESpace.GetFE(i),
-                                *L2FESpace.GetElementTransformation(i), rhs);
-      mi.AssembleElementMatrix(*L2FESpace.GetFE(i),
-                               *L2FESpace.GetElementTransformation(i), Mrho);
-      inv.Factor();
-      inv.Mult(rhs, rho_z);
-
-      L2FESpace.GetElementDofs(i, dofs);
-      rho.SetSubVector(dofs, rho_z);
-   }
-}
-
-void LagrangianHydroOperator::PrintTimingData(bool IamRoot, int steps)
-{
-   double my_rt[5], rt_max[5];
-   my_rt[0] = timer.sw_cgH1.RealTime();
-   my_rt[1] = timer.sw_cgL2.RealTime();
-   my_rt[2] = timer.sw_force.RealTime();
-   my_rt[3] = timer.sw_qdata.RealTime();
-   my_rt[4] = my_rt[0] + my_rt[2] + my_rt[3];
-   MPI_Reduce(my_rt, rt_max, 5, MPI_DOUBLE, MPI_MAX, 0, H1FESpace.GetComm());
-
-   HYPRE_Int mydata[2], alldata[2];
-   mydata[0] = timer.L2dof_iter;
-   mydata[1] = timer.quad_tstep;
-   MPI_Reduce(mydata, alldata, 2, HYPRE_MPI_INT, MPI_SUM, 0,
-              H1FESpace.GetComm());
-
-   if (IamRoot)
-   {
-      const HYPRE_Int H1gsize = H1FESpace.GlobalTrueVSize(),
-                      L2gsize = L2FESpace.GlobalTrueVSize();
-      using namespace std;
-      cout << endl;
-      cout << "CG (H1) total time: " << rt_max[0] << endl;
-      cout << "CG (H1) rate (megadofs x cg_iterations / second): "
-           << 1e-6 * H1gsize * timer.H1cg_iter / rt_max[0] << endl;
-      cout << endl;
-      cout << "CG (L2) total time: " << rt_max[1] << endl;
-      cout << "CG (L2) rate (megadofs x cg_iterations / second): "
-           << 1e-6 * alldata[0] / rt_max[1] << endl;
-      cout << endl;
-      // The Force operator is applied twice per time step, on the H1 and the L2
-      // vectors, respectively.
-      cout << "Forces total time: " << rt_max[2] << endl;
-      cout << "Forces rate (megadofs x timesteps / second): "
-           << 1e-6 * steps * (H1gsize + L2gsize) / rt_max[2] << endl;
-      cout << endl;
-      cout << "UpdateQuadData total time: " << rt_max[3] << endl;
-      cout << "UpdateQuadData rate (megaquads x timesteps / second): "
-           << 1e-6 * alldata[1] * integ_rule.GetNPoints() / rt_max[3] << endl;
-      cout << endl;
-      cout << "Major kernels total time (seconds): " << rt_max[4] << endl;
-      cout << "Major kernels total rate (megadofs x time steps / second): "
-           << 1e-6 * H1gsize * steps / rt_max[4] << endl;
-   }
-}
-
-LagrangianHydroOperator::~LagrangianHydroOperator()
-{
-   delete tensors1D;
-}
-
-void LagrangianHydroOperator::UpdateQuadratureData(const Vector &S) const
-{
-   if (quad_data_is_current) { return; }
-   timer.sw_qdata.Start();
-
-   const int nqp = integ_rule.GetNPoints();
-
-   ParGridFunction x, v, e;
-   Vector* sptr = (Vector*) &S;
-   x.MakeRef(&H1FESpace, *sptr, 0);
-   v.MakeRef(&H1FESpace, *sptr, H1FESpace.GetVSize());
-   e.MakeRef(&L2FESpace, *sptr, 2*H1FESpace.GetVSize());
-   Vector e_vals, e_loc(l2dofs_cnt), vector_vals(h1dofs_cnt * dim);
-   DenseMatrix Jpi(dim), sgrad_v(dim), Jinv(dim), stress(dim), stressJiT(dim),
-               vecvalMat(vector_vals.GetData(), h1dofs_cnt, dim);
-   DenseTensor grad_v_ref(dim, dim, nqp);
-   Array<int> L2dofs, H1dofs;
-
-   zone_max_visc.SetSize(nzones);
-   zone_max_visc = 0.0;
-
-   zone_vgrad.SetSize(nzones);
-   zone_vgrad = 0.0;
-
-   // Batched computations are needed, because hydrodynamic codes usually
-   // involve expensive computations of material properties. Although this
-   // miniapp uses simple EOS equations, we still want to represent the batched
-   // cycle structure.
-   int nzones_batch = 3;
-   const int nbatches =  nzones / nzones_batch + 1; // +1 for the remainder.
-   int nqp_batch = nqp * nzones_batch;
-   double *gamma_b = new double[nqp_batch],
-   *rho_b = new double[nqp_batch],
-   *e_b   = new double[nqp_batch],
-   *p_b   = new double[nqp_batch],
-   *cs_b  = new double[nqp_batch];
-   // Jacobians of reference->physical transformations for all quadrature points
-   // in the batch.
-   DenseTensor *Jpr_b = new DenseTensor[nqp_batch];
-   for (int b = 0; b < nbatches; b++)
-   {
-      int z_id = b * nzones_batch; // Global index over zones.
-      // The last batch might not be full.
-      if (z_id == nzones) { break; }
-      else if (z_id + nzones_batch > nzones)
-      {
-         nzones_batch = nzones - z_id;
-         nqp_batch    = nqp * nzones_batch;
-      }
-
-      double min_detJ = numeric_limits<double>::infinity();
-      for (int z = 0; z < nzones_batch; z++)
-      {
-         ElementTransformation *T = H1FESpace.GetElementTransformation(z_id);
-         Jpr_b[z].SetSize(dim, dim, nqp);
-
-         if (p_assembly)
-         {
-            // Energy values at quadrature point.
-            L2FESpace.GetElementDofs(z_id, L2dofs);
-            e.GetSubVector(L2dofs, e_loc);
-            evaluator->GetL2Values(e_loc, e_vals);
-
-            // All reference->physical Jacobians at the quadrature points.
-            H1FESpace.GetElementVDofs(z_id, H1dofs);
-            x.GetSubVector(H1dofs, vector_vals);
-            evaluator->GetVectorGrad(vecvalMat, Jpr_b[z]);
-         }
-         else
-         {
-            e.GetValues(z_id, integ_rule, e_vals);
-         }
-
-         for (int q = 0; q < nqp; q++)
-         {
-            const IntegrationPoint &ip = integ_rule.IntPoint(q);
-            T->SetIntPoint(&ip);
-            if (!p_assembly) { Jpr_b[z](q) = T->Jacobian(); }
-            const double detJ = Jpr_b[z](q).Det();
-            min_detJ = min(min_detJ, detJ);
-
-            const int idx = z * nqp + q;
-            if (material_pcf == NULL) { gamma_b[idx] = 5./3.; } // Ideal gas.
-            else { gamma_b[idx] = material_pcf->Eval(*T, ip); }
-            rho_b[idx] = quad_data.rho0DetJ0w(z_id*nqp + q) / detJ / ip.weight;
-            e_b[idx]   = max(0.0, e_vals(q));
-         }
-         ++z_id;
-      }
-
-      // Batched computation of material properties.
-      ComputeMaterialProperties(nqp_batch, gamma_b, rho_b, e_b, p_b, cs_b);
-
-      z_id -= nzones_batch;
-      for (int z = 0; z < nzones_batch; z++)
-      {
-         ElementTransformation *T = H1FESpace.GetElementTransformation(z_id);
-
-         if (p_assembly)
-         {
-            // All reference->physical Jacobians at the quadrature points.
-            H1FESpace.GetElementVDofs(z_id, H1dofs);
-            v.GetSubVector(H1dofs, vector_vals);
-            evaluator->GetVectorGrad(vecvalMat, grad_v_ref);
-         }
-         for (int q = 0; q < nqp; q++)
-         {
-            const IntegrationPoint &ip = integ_rule.IntPoint(q);
-            T->SetIntPoint(&ip);
-            // Note that the Jacobian was already computed above. We've chosen
-            // not to store the Jacobians for all batched quadrature points.
-            const DenseMatrix &Jpr = Jpr_b[z](q);
-            CalcInverse(Jpr, Jinv);
-            const double detJ = Jpr.Det(), rho = rho_b[z*nqp + q],
-                         p = p_b[z*nqp + q], sound_speed = cs_b[z*nqp + q];
-
-            stress = 0.0;
-            for (int d = 0; d < dim; d++) { stress(d, d) = -p; }
-
-            double visc_coeff = 0.0, det_v_grad = 0.0;
-            if (use_viscosity)
-            {
-               // Compression-based length scale at the point. The first
-               // eigenvector of the symmetric velocity gradient gives the
-               // direction of maximal compression. This is used to define the
-               // relative change of the initial length scale.
-               if (p_assembly)
-               {
-                  mfem::Mult(grad_v_ref(q), Jinv, sgrad_v);
-               }
-               else
-               {
-                  v.GetVectorGradient(*T, sgrad_v);
-               }
-               sgrad_v.Symmetrize();
-               det_v_grad = sgrad_v.Det();
-
-               double eig_val_data[3], eig_vec_data[9];
-               if (dim==1)
-               {
-                  eig_val_data[0] = sgrad_v(0, 0);
-                  eig_vec_data[0] = 1.;
-               }
-               else { sgrad_v.CalcEigenvalues(eig_val_data, eig_vec_data); }
-               Vector compr_dir(eig_vec_data, dim);
-               // Computes the initial->physical transformation Jacobian.
-               mfem::Mult(Jpr, quad_data.Jac0inv(z_id*nqp + q), Jpi);
-               Vector ph_dir(dim); Jpi.Mult(compr_dir, ph_dir);
-               // Change of the initial mesh size in the compression direction.
-               double h0 = quad_data.h0;
-               ParNCMesh* pncmesh = H1FESpace.GetParMesh()->pncmesh;
-               if (pncmesh)
-               {
-                  // TODO: h0 should be a smooth function
-                  h0 /= (1 << pncmesh->GetElementDepth(z_id));
-               }
-               const double h = h0 * ph_dir.Norml2() / compr_dir.Norml2();
-
-               // Measure of maximal compression.
-               const double mu = eig_val_data[0];
-               visc_coeff = 2.0 * rho * h * h * fabs(mu);
-               if (mu < 0.0) { visc_coeff += 0.5 * rho * h * sound_speed; }
-               stress.Add(visc_coeff, sgrad_v);
-            }
-
-            // Time step estimate at the point. Here the more relevant length
-            // scale is related to the actual mesh deformation; we use the min
-            // singular value of the ref->physical Jacobian. In addition, the
-            // time step estimate should be aware of the presence of shocks.
-            const double h_min =
-               Jpr.CalcSingularvalue(dim-1) / (double) H1FESpace.GetOrder(0);
-            const double inv_dt = sound_speed / h_min +
-                                  2.5 * visc_coeff / rho / h_min / h_min;
-            if (min_detJ < 0.0)
-            {
-               // This will force repetition of the step with smaller dt.
-               quad_data.dt_est = 0.0;
-            }
-            else
-            {
-               quad_data.dt_est = min(quad_data.dt_est, cfl * (1.0 / inv_dt) );
-            }
-
-            // Quadrature data for partial assembly of the force operator.
-            MultABt(stress, Jinv, stressJiT);
-            stressJiT *= integ_rule.IntPoint(q).weight * detJ;
-            for (int vd = 0 ; vd < dim; vd++)
-            {
-               for (int gd = 0; gd < dim; gd++)
-               {
-                  quad_data.stressJinvT(vd)(z_id*nqp + q, gd) =
-                     stressJiT(vd, gd);
-               }
-            }
-
-            // Track maximum artificial viscosity per zone
-            zone_max_visc(z_id) = std::max(visc_coeff, zone_max_visc(z_id));
-            zone_vgrad(z_id) = std::max(std::abs(det_v_grad), zone_vgrad(z_id));
-            //zone_vgrad(z_id) += std::abs(det_v_grad);
-         }
-         ++z_id;
-      }
-   }
-   delete [] gamma_b;
-   delete [] rho_b;
-   delete [] e_b;
-   delete [] p_b;
-   delete [] cs_b;
-   delete [] Jpr_b;
-   quad_data_is_current = true;
-
-   timer.sw_qdata.Stop();
-   timer.quad_tstep += nzones;
-}
-
-void LagrangianHydroOperator::AMRUpdate(const Vector &S, bool quick)
-{
-   ParMesh *pmesh = H1FESpace.GetParMesh();
-
-   width = height = S.Size();
-   nzones = pmesh->GetNE();
-
-   x0_gf.Update();
-   rho0.Update();
-
-   if (quick) { return; }
-
-   // go back to initial mesh configuration temporarily
-   int own_nodes = 0;
-   GridFunction *x_gf = &x0_gf;
-   pmesh->SwapNodes(x_gf, own_nodes);
-
-   // update mass matrix
-   // TODO: don't reassemble everything!
-   Mv.Update();
-   Mv.Assemble();
-
-   // update Me_inv
-   // TODO: do this better too
-   {
-      Me_inv.SetSize(l2dofs_cnt, l2dofs_cnt, nzones);
-
-      DenseMatrix Me(l2dofs_cnt);
-      DenseMatrixInverse inv(&Me);
-      MassIntegrator mi(rho0_coeff, &integ_rule);
-      for (int i = 0; i < nzones; i++)
-      {
-         mi.AssembleElementMatrix(*L2FESpace.GetFE(i),
-                                  *L2FESpace.GetElementTransformation(i), Me);
-         inv.Factor();
-         inv.GetInverseMatrix(Me_inv(i));
-      }
-   }
-
-   // resize quadrature data and make sure 'stressJinvT' will be recomputed
-   quad_data.Resize(dim, nzones, integ_rule.GetNPoints());
-   quad_data_is_current = false;
-
-   // update 'rho0DetJ0' and 'Jac0inv' at all quadrature points
-   // TODO: remove code duplication
-   const int nqp = integ_rule.GetNPoints();
-   Vector rho_vals(nqp);
-   for (int i = 0; i < nzones; i++)
-   {
-      rho0.GetValues(i, integ_rule, rho_vals);
-      ElementTransformation *T = H1FESpace.GetElementTransformation(i);
-      for (int q = 0; q < nqp; q++)
-      {
-         const IntegrationPoint &ip = integ_rule.IntPoint(q);
-         T->SetIntPoint(&ip);
-
-         DenseMatrixInverse Jinv(T->Jacobian());
-         Jinv.GetInverseMatrix(quad_data.Jac0inv(i*nqp + q));
-
-         const double rho0DetJ0 = T->Weight() * rho_vals(q);
-         quad_data.rho0DetJ0w(i*nqp + q) = rho0DetJ0 *
-                                           integ_rule.IntPoint(q).weight;
-      }
-   }
-
-   // swap back to deformed mesh configuration
-   pmesh->SwapNodes(x_gf, own_nodes);
-}
-
-} // namespace hydrodynamics
-
-} // namespace mfem
-
-#endif // MFEM_USE_MPI
diff --git a/amr/laghos_solver.hpp b/amr/laghos_solver.hpp
deleted file mode 100644
index 747547c5..00000000
--- a/amr/laghos_solver.hpp
+++ /dev/null
@@ -1,191 +0,0 @@
-// Copyright (c) 2017, Lawrence Livermore National Security, LLC. Produced at
-// the Lawrence Livermore National Laboratory. LLNL-CODE-734707. All Rights
-// reserved. See files LICENSE and NOTICE for details.
-//
-// This file is part of CEED, a collection of benchmarks, miniapps, software
-// libraries and APIs for efficient high-order finite element and spectral
-// element discretizations for exascale applications. For more information and
-// source code availability see http://github.com/ceed.
-//
-// The CEED research is supported by the Exascale Computing Project 17-SC-20-SC,
-// a collaborative effort of two U.S. Department of Energy organizations (Office
-// of Science and the National Nuclear Security Administration) responsible for
-// the planning and preparation of a capable exascale ecosystem, including
-// software, applications, hardware, advanced system engineering and early
-// testbed platforms, in support of the nation's exascale computing imperative.
-
-#ifndef MFEM_LAGHOS_SOLVER
-#define MFEM_LAGHOS_SOLVER
-
-#include "mfem.hpp"
-#include "laghos_assembly.hpp"
-
-#ifdef MFEM_USE_MPI
-
-#include <memory>
-#include <iostream>
-#include <fstream>
-
-namespace mfem
-{
-
-namespace hydrodynamics
-{
-
-/// Visualize the given parallel grid function, using a GLVis server on the
-/// specified host and port. Set the visualization window title, and optionally,
-/// its geometry.
-void VisualizeField(socketstream &sock, const char *vishost, int visport,
-                    ParGridFunction &gf, const char *title,
-                    int x = 0, int y = 0, int w = 400, int h = 400,
-                    bool vec = false);
-
-/// Visualize per element scalar values.
-void VisualizeElementValues(socketstream &sock, const char *vishost,
-                            int visport,
-                            ParMesh* pmesh, Vector &values, const char *title,
-                            int x, int y, int w, int h);
-
-// These are defined in laghos.cpp
-double rho0(const Vector &);
-void v0(const Vector &, Vector &);
-double e0(const Vector &);
-double gamma(const Vector &);
-
-struct TimingData
-{
-   // Total times for all major computations:
-   // CG solves (H1 and L2) / force RHS assemblies / quadrature computations.
-   StopWatch sw_cgH1, sw_cgL2, sw_force, sw_qdata;
-
-   // These accumulate the total processed dofs or quad points:
-   // #(CG iterations) for the H1 CG solve.
-   // #dofs  * #(CG iterations) for the L2 CG solve.
-   // #quads * #(RK sub steps) for the quadrature data computations.
-   int H1cg_iter, L2dof_iter, quad_tstep;
-
-   TimingData()
-      : H1cg_iter(0), L2dof_iter(0), quad_tstep(0) { }
-};
-
-// Given a solutions state (x, v, e), this class performs all necessary
-// computations to evaluate the new slopes (dx_dt, dv_dt, de_dt).
-class LagrangianHydroOperator : public TimeDependentOperator
-{
-protected:
-   ParFiniteElementSpace &H1FESpace;
-   ParFiniteElementSpace &L2FESpace;
-
-   Array<int> &ess_tdofs;
-
-   int nzones;
-   const int dim, l2dofs_cnt, h1dofs_cnt, source_type;
-   const double cfl;
-   const bool use_viscosity, p_assembly;
-   const double cg_rel_tol;
-   const int cg_max_iter;
-   Coefficient *material_pcf;
-
-   ParGridFunction &rho0;
-   GridFunctionCoefficient rho0_coeff; // TODO: remove when Mv update improved
-   ParGridFunction x0_gf; // copy of initial mesh position
-
-   // Velocity mass matrix and local inverses of the energy mass matrices. These
-   // are constant in time, due to the pointwise mass conservation property.
-   mutable ParBilinearForm Mv;
-   DenseTensor Me_inv;
-
-   // Integration rule for all assemblies.
-   const IntegrationRule &integ_rule;
-
-   // Data associated with each quadrature point in the mesh. These values are
-   // recomputed at each time step.
-   mutable QuadratureData quad_data;
-   mutable bool quad_data_is_current;
-
-   // Force matrix that combines the kinematic and thermodynamic spaces. It is
-   // assembled in each time step and then it is used to compute the final
-   // right-hand sides for momentum and specific internal energy.
-   mutable MixedBilinearForm Force;
-
-   // Same as above, but done through partial assembly.
-   ForcePAOperator ForcePA;
-
-   // Mass matrices done through partial assembly:
-   // velocity (coupled H1 assembly) and energy (local L2 assemblies).
-   mutable MassPAOperator VMassPA;
-   mutable LocalMassPAOperator locEMassPA;
-
-   // Linear solver for energy.
-   CGSolver locCG;
-
-   mutable Vector zone_max_visc, zone_vgrad;
-
-   mutable TimingData timer;
-
-   void ComputeMaterialProperties(int nvalues, const double gamma[],
-                                  const double rho[], const double e[],
-                                  double p[], double cs[]) const
-   {
-      for (int v = 0; v < nvalues; v++)
-      {
-         p[v]  = (gamma[v] - 1.0) * rho[v] * e[v];
-         cs[v] = sqrt(gamma[v] * (gamma[v]-1.0) * e[v]);
-      }
-   }
-
-   void UpdateQuadratureData(const Vector &S) const;
-
-public:
-   LagrangianHydroOperator(int size, ParFiniteElementSpace &h1_fes,
-                           ParFiniteElementSpace &l2_fes,
-                           Array<int> &essential_tdofs, ParGridFunction &rho0,
-                           int source_type_, double cfl_,
-                           Coefficient *material_, bool visc, bool pa,
-                           double cgt, int cgiter);
-
-   // Solve for dx_dt, dv_dt and de_dt.
-   virtual void Mult(const Vector &S, Vector &dS_dt) const;
-
-   // Calls UpdateQuadratureData to compute the new quad_data.dt_estimate.
-   double GetTimeStepEstimate(const Vector &S) const;
-   void ResetTimeStepEstimate() const;
-   void ResetQuadratureData() const { quad_data_is_current = false; }
-
-   // The density values, which are stored only at some quadrature points, are
-   // projected as a ParGridFunction.
-   void ComputeDensity(ParGridFunction &rho);
-
-   // Update all internal data on mesh change.
-   void AMRUpdate(const Vector &S, bool quick);
-
-   void SetH0(double h0) { quad_data.h0 = h0; }
-   double GetH0() const { return quad_data.h0; }
-
-   Vector& GetZoneMaxVisc() { return zone_max_visc; }
-   Vector& GetZoneVGrad() { return zone_vgrad; }
-
-   void PrintTimingData(bool IamRoot, int steps);
-
-   ~LagrangianHydroOperator();
-};
-
-class TaylorCoefficient : public Coefficient
-{
-   virtual double Eval(ElementTransformation &T,
-                       const IntegrationPoint &ip)
-   {
-      Vector x(2);
-      T.Transform(ip, x);
-      return 3.0 / 8.0 * M_PI * ( cos(3.0*M_PI*x(0)) * cos(M_PI*x(1)) -
-                                  cos(M_PI*x(0))     * cos(3.0*M_PI*x(1)) );
-   }
-};
-
-} // namespace hydrodynamics
-
-} // namespace mfem
-
-#endif // MFEM_USE_MPI
-
-#endif // MFEM_LAGHOS
diff --git a/amr/makefile b/amr/makefile
deleted file mode 100644
index 0a7fdd3a..00000000
--- a/amr/makefile
+++ /dev/null
@@ -1,183 +0,0 @@
-# Copyright (c) 2017, Lawrence Livermore National Security, LLC. Produced at
-# the Lawrence Livermore National Laboratory. LLNL-CODE-734707. All Rights
-# reserved. See files LICENSE and NOTICE for details.
-#
-# This file is part of CEED, a collection of benchmarks, miniapps, software
-# libraries and APIs for efficient high-order finite element and spectral
-# element discretizations for exascale applications. For more information and
-# source code availability see http://github.com/ceed.
-#
-# The CEED research is supported by the Exascale Computing Project 17-SC-20-SC,
-# a collaborative effort of two U.S. Department of Energy organizations (Office
-# of Science and the National Nuclear Security Administration) responsible for
-# the planning and preparation of a capable exascale ecosystem, including
-# software, applications, hardware, advanced system engineering and early
-# testbed platforms, in support of the nation's exascale computing imperative.
-
-define LAGHOS_HELP_MSG
-
-Laghos makefile targets:
-
-   make
-   make status/info
-   make install
-   make clean
-   make distclean
-   make style
-
-Examples:
-
-make -j 4
-   Build Laghos using the current configuration options from MFEM.
-   (Laghos requires the MFEM finite element library, and uses its compiler and
-    linker options in its build process.)
-make status
-   Display information about the current configuration.
-make install PREFIX=<dir>
-   Install the Laghos executable in <dir>.
-make clean
-   Clean the Laghos executable, library and object files.
-make distclean
-   In addition to "make clean", remove the local installation directory and some
-   run-time generated files.
-make style
-   Format the Laghos C++ source files using the Artistic Style (astyle) settings
-   from MFEM.
-
-endef
-
-# Default installation location
-PREFIX = ./bin
-INSTALL = /usr/bin/install
-
-# Use the MFEM build directory
-MFEM_DIR = ../../mfem
-CONFIG_MK = $(MFEM_DIR)/config/config.mk
-TEST_MK = $(MFEM_DIR)/config/test.mk
-# Use the MFEM install directory
-# MFEM_DIR = ../mfem/mfem
-# CONFIG_MK = $(MFEM_DIR)/config.mk
-# TEST_MK = $(MFEM_DIR)/test.mk
-
-# Use two relative paths to MFEM: first one for compilation in '.' and second
-# one for compilation in 'lib'.
-MFEM_DIR1 := $(MFEM_DIR)
-MFEM_DIR2 := $(realpath $(MFEM_DIR))
-
-# Use the compiler used by MFEM. Get the compiler and the options for compiling
-# and linking from MFEM's config.mk. (Skip this if the target does not require
-# building.)
-MFEM_LIB_FILE = mfem_is_not_built
-ifeq (,$(filter help clean distclean style,$(MAKECMDGOALS)))
-   -include $(CONFIG_MK)
-endif
-
-CXX = $(MFEM_CXX)
-CPPFLAGS = $(MFEM_CPPFLAGS)
-CXXFLAGS = $(MFEM_CXXFLAGS)
-
-# MFEM config does not define C compiler
-CC     = gcc
-CFLAGS = -O3
-
-# Optional link flags
-LDFLAGS =
-
-OPTIM_OPTS = -O3
-DEBUG_OPTS = -g -Wall
-LAGHOS_DEBUG = $(MFEM_DEBUG)
-ifneq ($(LAGHOS_DEBUG),$(MFEM_DEBUG))
-   ifeq ($(LAGHOS_DEBUG),YES)
-      CXXFLAGS = $(DEBUG_OPTS)
-   else
-      CXXFLAGS = $(OPTIM_OPTS)
-   endif
-endif
-
-LAGHOS_FLAGS = $(CPPFLAGS) $(CXXFLAGS) $(MFEM_INCFLAGS)
-LAGHOS_LIBS = $(MFEM_LIBS)
-
-ifeq ($(LAGHOS_DEBUG),YES)
-   LAGHOS_FLAGS += -DLAGHOS_DEBUG
-endif
-
-LIBS = $(strip $(LAGHOS_LIBS) $(LDFLAGS))
-CCC  = $(strip $(CXX) $(LAGHOS_FLAGS))
-Ccc  = $(strip $(CC) $(CFLAGS) $(GL_OPTS))
-
-SOURCE_FILES = laghos.cpp laghos_solver.cpp laghos_assembly.cpp
-OBJECT_FILES1 = $(SOURCE_FILES:.cpp=.o)
-OBJECT_FILES = $(OBJECT_FILES1:.c=.o)
-HEADER_FILES = laghos_solver.hpp laghos_assembly.hpp
-
-# Targets
-
-.PHONY: all clean distclean install status info opt debug test style clean-build clean-exec
-
-.SUFFIXES: .c .cpp .o
-.cpp.o:
-	cd $(<D); $(CCC) -c $(<F)
-.c.o:
-	cd $(<D); $(Ccc) -c $(<F)
-
-laghos: override MFEM_DIR = $(MFEM_DIR1)
-laghos:	$(OBJECT_FILES) $(CONFIG_MK) $(MFEM_LIB_FILE)
-	$(CCC) -o laghos $(OBJECT_FILES) $(LIBS)
-
-all: laghos
-
-opt:
-	$(MAKE) "LAGHOS_DEBUG=NO"
-
-debug:
-	$(MAKE) "LAGHOS_DEBUG=YES"
-
-$(OBJECT_FILES): override MFEM_DIR = $(MFEM_DIR2)
-$(OBJECT_FILES): $(HEADER_FILES) $(CONFIG_MK) $(MFEM_LIB_FILE)
-
-MFEM_TESTS = laghos
-include $(TEST_MK)
-# Testing: Specific execution options
-RUN_MPI = $(MFEM_MPIEXEC) $(MFEM_MPIEXEC_NP) 4
-test: laghos
-	@$(call mfem-test,$<, $(RUN_MPI), Laghos miniapp,\
-	-p 0 -m data/square01_quad.mesh -rs 3 -tf 0.1)
-# Testing: "test" target and mfem-test* variables are defined in MFEM's
-# config/test.mk
-
-# Generate an error message if the MFEM library is not built and exit
-$(CONFIG_MK) $(MFEM_LIB_FILE):
-	$(error The MFEM library is not built)
-
-clean: clean-build clean-exec
-
-clean-build:
-	rm -rf laghos *.o *~ *.dSYM
-clean-exec:
-	rm -rf ./results
-
-distclean: clean
-	rm -rf bin/
-
-install: laghos
-	mkdir -p $(PREFIX)
-	$(INSTALL) -m 750 laghos $(PREFIX)
-
-help:
-	$(info $(value LAGHOS_HELP_MSG))
-	@true
-
-status info:
-	$(info MFEM_DIR    = $(MFEM_DIR))
-	$(info LAGHOS_FLAGS = $(LAGHOS_FLAGS))
-	$(info LAGHOS_LIBS  = $(value LAGHOS_LIBS))
-	$(info PREFIX      = $(PREFIX))
-	@true
-
-ASTYLE = astyle --options=$(MFEM_DIR1)/config/mfem.astylerc
-FORMAT_FILES := $(SOURCE_FILES) $(HEADER_FILES)
-
-style:
-	@if ! $(ASTYLE) $(FORMAT_FILES) | grep Formatted; then\
-	   echo "No source files were changed.";\
-	fi
diff --git a/amr/data/sedov-amr-2463.png b/data/sedov-amr-2463.png
similarity index 100%
rename from amr/data/sedov-amr-2463.png
rename to data/sedov-amr-2463.png
diff --git a/amr/data/sedov-amr-900.png b/data/sedov-amr-900.png
similarity index 100%
rename from amr/data/sedov-amr-900.png
rename to data/sedov-amr-900.png
diff --git a/laghos.cpp b/laghos.cpp
index d2c787db..75e39331 100644
--- a/laghos.cpp
+++ b/laghos.cpp
@@ -62,21 +62,14 @@
 #include <fstream>
 #include <sys/time.h>
 #include <sys/resource.h>
+
+#include "laghos_amr.hpp" // problem, dim, e0, rho0, gamma_func, v0
 #include "laghos_solver.hpp"
 
 using std::cout;
 using std::endl;
 using namespace mfem;
 
-// Choice for the problem setup.
-static int problem, dim;
-
-// Forward declarations.
-double e0(const Vector &);
-double rho0(const Vector &);
-double gamma_func(const Vector &);
-void v0(const Vector &, Vector &);
-
 static long GetMaxRssMB();
 static void display_banner(std::ostream&);
 static void Checks(const int ti, const double norm, int &checks);
@@ -111,6 +104,8 @@ int main(int argc, char *argv[])
    bool impose_visc = false;
    bool visualization = false;
    int vis_steps = 5;
+   int vis_windows = 3;
+   const char *vis_keys = nullptr;
    bool visit = false;
    bool gfprint = false;
    const char *basename = "results/Laghos";
@@ -123,6 +118,14 @@ int main(int argc, char *argv[])
    int dev = 0;
    double blast_energy = 0.25;
    double blast_position[] = {0.0, 0.0, 0.0};
+   bool amr = false;
+   int amr_estimator = amr::estimator::custom;
+   double amr_ref_threshold = 2e-4;
+   double amr_jac_threshold = 0.92;
+   double amr_deref_threshold = 0.75;
+   int amr_max_level = rs_levels + rp_levels;
+   const double amr_blast_eps = 1e-10;
+   const int amr_nc_limit = 1; // maximum level of hanging nodes
 
    OptionsParser args(argc, argv);
    args.AddOption(&dim, "-dim", "--dimension", "Dimension of the problem.");
@@ -166,6 +169,9 @@ int main(int argc, char *argv[])
                   "Enable or disable GLVis visualization.");
    args.AddOption(&vis_steps, "-vs", "--visualization-steps",
                   "Visualize every n-th timestep.");
+   args.AddOption(&vis_windows, "-vw", "--visualization-windows",
+                  "Number of visualization windows to open: 1~3");
+   args.AddOption(&vis_keys, "-vk", "--visualization-keys", "Keys for vis.");
    args.AddOption(&visit, "-visit", "--visit", "-no-visit", "--no-visit",
                   "Enable or disable VisIt visualization.");
    args.AddOption(&gfprint, "-print", "--print", "-no-print", "--no-print",
@@ -192,14 +198,37 @@ int main(int argc, char *argv[])
    args.AddOption(&gpu_aware_mpi, "-gam", "--gpu-aware-mpi", "-no-gam",
                   "--no-gpu-aware-mpi", "Enable GPU aware MPI communications.");
    args.AddOption(&dev, "-dev", "--dev", "GPU device to use.");
+
+   args.AddOption(&amr, "-amr", "--enable-amr", "-no-amr", "--disable-amr",
+                  "Experimental adaptive mesh refinement (problem 1 only).");
+   args.AddOption(&amr_estimator, "-ae", "--amr-estimator",
+                  "AMR estimator: 0:Custom, 1:Rho, 2:ZZ, 3:Kelly");
+   args.AddOption(&amr_ref_threshold, "-ar", "--amr-ref-threshold",
+                  "AMR refinement threshold.");
+   args.AddOption(&amr_deref_threshold, "-ad", "--amr-deref-threshold",
+                  "AMR derefinement threshold (0 = no derefinement).");
+   args.AddOption(&amr_jac_threshold, "-aj", "--amr-jac-threshold",
+                  "AMR refinement threshold.");
+   args.AddOption(&amr_max_level, "-am", "--amr-max-level",
+                  "AMR max refined level (default to 'rs_levels + rp_levels')");
    args.Parse();
    if (!args.Good())
    {
       if (mpi.Root()) { args.PrintUsage(cout); }
       return 1;
    }
+
    if (mpi.Root()) { args.PrintOptions(cout); }
 
+   // Check AMR configuration: only Sedov problem (#1) is supported for now
+   // does this still holds?
+   /*if (amr && problem != 1)
+   {
+      if (mpi.Root())
+      { std::cout << "AMR only supported for problem 1." << std::endl; }
+      return 0;
+   }*/
+
    // Configure the device from the command line options
    Device backend;
    backend.Configure(device, dev);
@@ -208,7 +237,7 @@ int main(int argc, char *argv[])
 
    // On all processors, use the default builtin 1D/2D/3D mesh or read the
    // serial one given on the command line.
-   Mesh *mesh;
+   Mesh *mesh = NULL;
    if (strncmp(mesh_file, "default", 7) != 0)
    {
       mesh = new Mesh(mesh_file, true, true);
@@ -221,7 +250,7 @@ int main(int argc, char *argv[])
          mesh->GetBdrElement(0)->SetAttribute(1);
          mesh->GetBdrElement(1)->SetAttribute(1);
       }
-      if (dim == 2)
+      else if (dim == 2)
       {
          mesh = new Mesh(Mesh::MakeCartesian2D(2, 2, Element::QUADRILATERAL,
                                                true));
@@ -233,7 +262,7 @@ int main(int argc, char *argv[])
             bel->SetAttribute(attr);
          }
       }
-      if (dim == 3)
+      else if (dim == 3)
       {
          mesh = new Mesh(Mesh::MakeCartesian3D(2, 2, 2, Element::HEXAHEDRON,
                                                true));
@@ -245,6 +274,7 @@ int main(int argc, char *argv[])
             bel->SetAttribute(attr);
          }
       }
+      else { MFEM_ABORT("Dimension should be set"); mesh = new Mesh(); }
    }
    dim = mesh->Dimension();
 
@@ -260,6 +290,22 @@ int main(int argc, char *argv[])
 
    // Refine the mesh in serial to increase the resolution.
    for (int lev = 0; lev < rs_levels; lev++) { mesh->UniformRefinement(); }
+   //amr_max_level = std::max(amr_max_level, rs_levels + rp_levels);s
+
+   if (amr)
+   {
+      mesh->EnsureNCMesh();
+      // refine at the blast position for problem 1
+      if (problem == 1)
+      {
+         Vertex blast {blast_position[0], blast_position[1], blast_position[2]};
+         for (int lev = 0; lev < rs_levels; lev++)
+         {
+            mesh->RefineAtVertex(blast, amr_blast_eps);
+         }
+      }
+   }
+
    const int mesh_NE = mesh->GetNE();
    if (mpi.Root())
    {
@@ -416,20 +462,8 @@ int main(int argc, char *argv[])
    // Boundary conditions: all tests use v.n = 0 on the boundary, and we assume
    // that the boundaries are straight.
    Array<int> ess_tdofs, ess_vdofs;
-   {
-      Array<int> ess_bdr(pmesh->bdr_attributes.Max()), dofs_marker, dofs_list;
-      for (int d = 0; d < pmesh->Dimension(); d++)
-      {
-         // Attributes 1/2/3 correspond to fixed-x/y/z boundaries,
-         // i.e., we must enforce v_x/y/z = 0 for the velocity components.
-         ess_bdr = 0; ess_bdr[d] = 1;
-         H1FESpace.GetEssentialTrueDofs(ess_bdr, dofs_list, d);
-         ess_tdofs.Append(dofs_list);
-         H1FESpace.GetEssentialVDofs(ess_bdr, dofs_marker, d);
-         FiniteElementSpace::MarkerToList(dofs_marker, dofs_list);
-         ess_vdofs.Append(dofs_list);
-      }
-   }
+   const int bdr_attr_max = pmesh->bdr_attributes.Max();
+   GetZeroBCDofs(pmesh, H1FESpace, bdr_attr_max, ess_tdofs, ess_vdofs);
 
    // Define the explicit ODE solver used for time integration.
    ODESolver *ode_solver = NULL;
@@ -506,26 +540,28 @@ int main(int argc, char *argv[])
    // time evolution.
    ParGridFunction rho0_gf(&L2FESpace);
    FunctionCoefficient rho0_coeff(rho0);
-   L2_FECollection l2_fec(order_e, pmesh->Dimension());
-   ParFiniteElementSpace l2_fes(pmesh, &l2_fec);
-   ParGridFunction l2_rho0_gf(&l2_fes), l2_e(&l2_fes);
-   l2_rho0_gf.ProjectCoefficient(rho0_coeff);
-   rho0_gf.ProjectGridFunction(l2_rho0_gf);
-   if (problem == 1)
    {
-      // For the Sedov test, we use a delta function at the origin.
-      DeltaCoefficient e_coeff(blast_position[0], blast_position[1],
-                               blast_position[2], blast_energy);
-      l2_e.ProjectCoefficient(e_coeff);
-   }
-   else
-   {
-      FunctionCoefficient e_coeff(e0);
-      l2_e.ProjectCoefficient(e_coeff);
+      L2_FECollection l2_fec(order_e, pmesh->Dimension());
+      ParFiniteElementSpace l2_fes(pmesh, &l2_fec);
+      ParGridFunction l2_rho0_gf(&l2_fes), l2_e(&l2_fes);
+      l2_rho0_gf.ProjectCoefficient(rho0_coeff);
+      rho0_gf.ProjectGridFunction(l2_rho0_gf);
+      if (problem == 1)
+      {
+         // For the Sedov test, we use a delta function at the origin.
+         DeltaCoefficient e_coeff(blast_position[0], blast_position[1],
+                                  blast_position[2], blast_energy);
+         l2_e.ProjectCoefficient(e_coeff);
+      }
+      else
+      {
+         FunctionCoefficient e_coeff(e0);
+         l2_e.ProjectCoefficient(e_coeff);
+      }
+      e_gf.ProjectGridFunction(l2_e);
+      // Sync the data location of e_gf with its base, S
+      e_gf.SyncAliasMemory(S);
    }
-   e_gf.ProjectGridFunction(l2_e);
-   // Sync the data location of e_gf with its base, S
-   e_gf.SyncAliasMemory(S);
 
    // Piecewise constant ideal gas coefficient over the Lagrangian mesh. The
    // gamma values are projected on function that's constant on the moving mesh.
@@ -551,13 +587,32 @@ int main(int argc, char *argv[])
    }
    if (impose_visc) { visc = true; }
 
+   // Time dependent hydro operator
    hydrodynamics::LagrangianHydroOperator hydro(S.Size(),
-                                                H1FESpace, L2FESpace, ess_tdofs,
+                                                H1FESpace, L2FESpace,
+                                                ess_tdofs,
                                                 rho0_coeff, rho0_gf,
                                                 mat_gf, source, cfl,
-                                                visc, vorticity, p_assembly,
+                                                visc, vorticity,
+                                                p_assembly, amr,
                                                 cg_tol, cg_max_iter, ftz_tol,
                                                 order_q);
+   // AMR operator
+   amr::AMR *AMR = nullptr;
+   if (amr)
+   {
+      AMR = new amr::AMR(pmesh,
+                         amr_estimator,
+                         amr_ref_threshold,
+                         amr_jac_threshold,
+                         amr_deref_threshold,
+                         amr_max_level,
+                         amr_nc_limit,
+                         amr_blast_eps,
+                         blast_energy,
+                         blast_position);
+      AMR->Setup(x_gf);
+   }
 
    socketstream vis_rho, vis_v, vis_e;
    char vishost[] = "localhost";
@@ -577,19 +632,27 @@ int main(int argc, char *argv[])
       vis_v.precision(8);
       vis_e.precision(8);
       int Wx = 0, Wy = 0; // window position
-      const int Ww = 350, Wh = 350; // window size
+      int Ww = vis_windows==1 ? 1024 : 350,
+          Wh = vis_windows==1 ?  768 : 350; // window size
       int offx = Ww+10; // window offsets
       if (problem != 0 && problem != 4)
       {
          hydrodynamics::VisualizeField(vis_rho, vishost, visport, rho_gf,
-                                       "Density", Wx, Wy, Ww, Wh);
+                                       "Density", vis_keys, Wx, Wy, Ww, Wh);
+      }
+      if (vis_windows > 1)
+      {
+         Wx += offx;
+         hydrodynamics::VisualizeField(vis_v, vishost, visport, v_gf,
+                                       "Velocity", vis_keys, Wx, Wy, Ww, Wh);
+      }
+      if (vis_windows > 2)
+      {
+         Wx += offx;
+         hydrodynamics::VisualizeField(vis_e, vishost, visport, e_gf,
+                                       "Specific Internal Energy",
+                                       vis_keys, Wx, Wy, Ww, Wh);
       }
-      Wx += offx;
-      hydrodynamics::VisualizeField(vis_v, vishost, visport, v_gf,
-                                    "Velocity", Wx, Wy, Ww, Wh);
-      Wx += offx;
-      hydrodynamics::VisualizeField(vis_e, vishost, visport, e_gf,
-                                    "Specific Internal Energy", Wx, Wy, Ww, Wh);
    }
 
    // Save data for VisIt visualization.
@@ -648,6 +711,9 @@ int main(int argc, char *argv[])
       t_old = t;
       hydro.ResetTimeStepEstimate();
 
+      // Reset the associated refiner and derefiner
+      if (amr) { AMR->Reset(); }
+
       // S is the vector of dofs, t is the current time, and dt is the time step
       // to advance.
       ode_solver->Step(S, t, dt);
@@ -727,21 +793,28 @@ int main(int argc, char *argv[])
          if (visualization)
          {
             int Wx = 0, Wy = 0; // window position
-            int Ww = 350, Wh = 350; // window size
+            int Ww = vis_windows==1 ? 1024 : 350,
+                Wh = vis_windows==1 ?  768 : 350; // window size
             int offx = Ww+10; // window offsets
             if (problem != 0 && problem != 4)
             {
                hydrodynamics::VisualizeField(vis_rho, vishost, visport, rho_gf,
-                                             "Density", Wx, Wy, Ww, Wh);
+                                             "Density", vis_keys, Wx, Wy, Ww, Wh);
+            }
+            if (vis_windows > 1)
+            {
+               Wx += offx;
+               hydrodynamics::VisualizeField(vis_v, vishost, visport, v_gf,
+                                             "Velocity",
+                                             vis_keys, Wx, Wy, Ww, Wh);
+            }
+            if (vis_windows > 2)
+            {
+               Wx += offx;
+               hydrodynamics::VisualizeField(vis_e, vishost, visport, e_gf,
+                                             "Specific Internal Energy",
+                                             vis_keys, Wx, Wy, Ww,Wh);
             }
-            Wx += offx;
-            hydrodynamics::VisualizeField(vis_v, vishost, visport,
-                                          v_gf, "Velocity", Wx, Wy, Ww, Wh);
-            Wx += offx;
-            hydrodynamics::VisualizeField(vis_e, vishost, visport, e_gf,
-                                          "Specific Internal Energy",
-                                          Wx, Wy, Ww,Wh);
-            Wx += offx;
          }
 
          if (visit)
@@ -781,6 +854,13 @@ int main(int argc, char *argv[])
          }
       }
 
+      // AMR update
+      if (amr)
+      {
+         AMR->Update(hydro, ode_solver, S, S_old, x_gf, v_gf, e_gf, mat_gf,
+                     offset, bdr_attr_max, ess_tdofs, ess_vdofs);
+      }
+
       // Problems checks
       if (check)
       {
@@ -860,189 +940,6 @@ int main(int argc, char *argv[])
    return 0;
 }
 
-double rho0(const Vector &x)
-{
-   switch (problem)
-   {
-      case 0: return 1.0;
-      case 1: return 1.0;
-      case 2: return (x(0) < 0.5) ? 1.0 : 0.1;
-      case 3: return (dim == 2) ? (x(0) > 1.0 && x(1) > 1.5) ? 0.125 : 1.0
-                        : x(0) > 1.0 && ((x(1) < 1.5 && x(2) < 1.5) ||
-                                         (x(1) > 1.5 && x(2) > 1.5)) ? 0.125 : 1.0;
-      case 4: return 1.0;
-      case 5:
-      {
-         if (x(0) >= 0.5 && x(1) >= 0.5) { return 0.5313; }
-         if (x(0) <  0.5 && x(1) <  0.5) { return 0.8; }
-         return 1.0;
-      }
-      case 6:
-      {
-         if (x(0) <  0.5 && x(1) >= 0.5) { return 2.0; }
-         if (x(0) >= 0.5 && x(1) <  0.5) { return 3.0; }
-         return 1.0;
-      }
-      case 7: return x(1) >= 0.0 ? 2.0 : 1.0;
-      default: MFEM_ABORT("Bad number given for problem id!"); return 0.0;
-   }
-}
-
-double gamma_func(const Vector &x)
-{
-   switch (problem)
-   {
-      case 0: return 5.0 / 3.0;
-      case 1: return 1.4;
-      case 2: return 1.4;
-      case 3:
-         if (dim == 1) { return (x(0) > 0.5) ? 1.4 : 1.5; }
-         else { return (x(0) > 1.0 && x(1) <= 1.5) ? 1.4 : 1.5; }
-      case 4: return 5.0 / 3.0;
-      case 5: return 1.4;
-      case 6: return 1.4;
-      case 7: return 5.0 / 3.0;
-      default: MFEM_ABORT("Bad number given for problem id!"); return 0.0;
-   }
-}
-
-static double rad(double x, double y) { return sqrt(x*x + y*y); }
-
-void v0(const Vector &x, Vector &v)
-{
-   const double atn = dim!=1 ? pow((x(0)*(1.0-x(0))*4*x(1)*(1.0-x(1))*4.0),
-                                   0.4) : 0.0;
-   switch (problem)
-   {
-      case 0:
-         v(0) =  sin(M_PI*x(0)) * cos(M_PI*x(1));
-         v(1) = -cos(M_PI*x(0)) * sin(M_PI*x(1));
-         if (x.Size() == 3)
-         {
-            v(0) *= cos(M_PI*x(2));
-            v(1) *= cos(M_PI*x(2));
-            v(2) = 0.0;
-         }
-         break;
-      case 1: v = 0.0; break;
-      case 2: v = 0.0; break;
-      case 3: v = 0.0; break;
-      case 4:
-      {
-         v = 0.0;
-         const double r = rad(x(0), x(1));
-         if (r < 0.2)
-         {
-            v(0) =  5.0 * x(1);
-            v(1) = -5.0 * x(0);
-         }
-         else if (r < 0.4)
-         {
-            v(0) =  2.0 * x(1) / r - 5.0 * x(1);
-            v(1) = -2.0 * x(0) / r + 5.0 * x(0);
-         }
-         else { }
-         break;
-      }
-      case 5:
-      {
-         v = 0.0;
-         if (x(0) >= 0.5 && x(1) >= 0.5) { v(0)=0.0*atn, v(1)=0.0*atn; return;}
-         if (x(0) <  0.5 && x(1) >= 0.5) { v(0)=0.7276*atn, v(1)=0.0*atn; return;}
-         if (x(0) <  0.5 && x(1) <  0.5) { v(0)=0.0*atn, v(1)=0.0*atn; return;}
-         if (x(0) >= 0.5 && x(1) <  0.5) { v(0)=0.0*atn, v(1)=0.7276*atn; return; }
-         MFEM_ABORT("Error in problem 5!");
-         return;
-      }
-      case 6:
-      {
-         v = 0.0;
-         if (x(0) >= 0.5 && x(1) >= 0.5) { v(0)=+0.75*atn, v(1)=-0.5*atn; return;}
-         if (x(0) <  0.5 && x(1) >= 0.5) { v(0)=+0.75*atn, v(1)=+0.5*atn; return;}
-         if (x(0) <  0.5 && x(1) <  0.5) { v(0)=-0.75*atn, v(1)=+0.5*atn; return;}
-         if (x(0) >= 0.5 && x(1) <  0.5) { v(0)=-0.75*atn, v(1)=-0.5*atn; return;}
-         MFEM_ABORT("Error in problem 6!");
-         return;
-      }
-      case 7:
-      {
-         v = 0.0;
-         v(1) = 0.02 * exp(-2*M_PI*x(1)*x(1)) * cos(2*M_PI*x(0));
-         break;
-      }
-      default: MFEM_ABORT("Bad number given for problem id!");
-   }
-}
-
-double e0(const Vector &x)
-{
-   switch (problem)
-   {
-      case 0:
-      {
-         const double denom = 2.0 / 3.0;  // (5/3 - 1) * density.
-         double val;
-         if (x.Size() == 2)
-         {
-            val = 1.0 + (cos(2*M_PI*x(0)) + cos(2*M_PI*x(1))) / 4.0;
-         }
-         else
-         {
-            val = 100.0 + ((cos(2*M_PI*x(2)) + 2) *
-                           (cos(2*M_PI*x(0)) + cos(2*M_PI*x(1))) - 2) / 16.0;
-         }
-         return val/denom;
-      }
-      case 1: return 0.0; // This case in initialized in main().
-      case 2: return (x(0) < 0.5) ? 1.0 / rho0(x) / (gamma_func(x) - 1.0)
-                        : 0.1 / rho0(x) / (gamma_func(x) - 1.0);
-      case 3: return (x(0) > 1.0) ? 0.1 / rho0(x) / (gamma_func(x) - 1.0)
-                        : 1.0 / rho0(x) / (gamma_func(x) - 1.0);
-      case 4:
-      {
-         const double r = rad(x(0), x(1)), rsq = x(0) * x(0) + x(1) * x(1);
-         const double gamma = 5.0 / 3.0;
-         if (r < 0.2)
-         {
-            return (5.0 + 25.0 / 2.0 * rsq) / (gamma - 1.0);
-         }
-         else if (r < 0.4)
-         {
-            const double t1 = 9.0 - 4.0 * log(0.2) + 25.0 / 2.0 * rsq;
-            const double t2 = 20.0 * r - 4.0 * log(r);
-            return (t1 - t2) / (gamma - 1.0);
-         }
-         else { return (3.0 + 4.0 * log(2.0)) / (gamma - 1.0); }
-      }
-      case 5:
-      {
-         const double irg = 1.0 / rho0(x) / (gamma_func(x) - 1.0);
-         if (x(0) >= 0.5 && x(1) >= 0.5) { return 0.4 * irg; }
-         if (x(0) <  0.5 && x(1) >= 0.5) { return 1.0 * irg; }
-         if (x(0) <  0.5 && x(1) <  0.5) { return 1.0 * irg; }
-         if (x(0) >= 0.5 && x(1) <  0.5) { return 1.0 * irg; }
-         MFEM_ABORT("Error in problem 5!");
-         return 0.0;
-      }
-      case 6:
-      {
-         const double irg = 1.0 / rho0(x) / (gamma_func(x) - 1.0);
-         if (x(0) >= 0.5 && x(1) >= 0.5) { return 1.0 * irg; }
-         if (x(0) <  0.5 && x(1) >= 0.5) { return 1.0 * irg; }
-         if (x(0) <  0.5 && x(1) <  0.5) { return 1.0 * irg; }
-         if (x(0) >= 0.5 && x(1) <  0.5) { return 1.0 * irg; }
-         MFEM_ABORT("Error in problem 6!");
-         return 0.0;
-      }
-      case 7:
-      {
-         const double rho = rho0(x), gamma = gamma_func(x);
-         return (6.0 - rho * x(1)) / (gamma - 1.0) / rho;
-      }
-      default: MFEM_ABORT("Bad number given for problem id!"); return 0.0;
-   }
-}
-
 static void display_banner(std::ostream &os)
 {
    os << endl
diff --git a/laghos_amr.cpp b/laghos_amr.cpp
new file mode 100644
index 00000000..330917b4
--- /dev/null
+++ b/laghos_amr.cpp
@@ -0,0 +1,493 @@
+// Copyright (c) 2017, Lawrence Livermore National Security, LLC. Produced at
+// the Lawrence Livermore National Laboratory. LLNL-CODE-734707. All Rights
+// reserved. See files LICENSE and NOTICE for details.
+//
+// This file is part of CEED, a collection of benchmarks, miniapps, software
+// libraries and APIs for efficient high-order finite element and spectral
+// element discretizations for exascale applications. For more information and
+// source code availability see http://github.com/ceed.
+//
+// The CEED research is supported by the Exascale Computing Project 17-SC-20-SC,
+// a collaborative effort of two U.S. Department of Energy organizations (Office
+// of Science and the National Nuclear Security Administration) responsible for
+// the planning and preparation of a capable exascale ecosystem, including
+// software, applications, hardware, advanced system engineering and early
+// testbed platforms, in support of the nation's exascale computing imperative.
+
+#include "laghos_solver.hpp"
+#include "laghos_amr.hpp"
+
+namespace mfem
+{
+
+namespace amr
+{
+
+static const char *EstimatorName(const int est)
+{
+   switch (static_cast<amr::estimator>(est))
+   {
+      case amr::estimator::custom: return "Custom";
+      case amr::estimator::jjt: return "JJt";
+      case amr::estimator::zz: return "ZZ";
+      case amr::estimator::kelly: return "Kelly";
+      default: MFEM_ABORT("Unknown estimator!");
+   }
+   return nullptr;
+}
+
+static void FindElementsWithVertex(const Mesh* mesh, const Vertex &vert,
+                                   const double size, Array<int> &elements)
+{
+   Array<int> v;
+
+   for (int i = 0; i < mesh->GetNE(); i++)
+   {
+      mesh->GetElementVertices(i, v);
+      for (int j = 0; j < v.Size(); j++)
+      {
+         double dist = 0.0;
+         for (int l = 0; l < mesh->SpaceDimension(); l++)
+         {
+            double d = vert(l) - mesh->GetVertex(v[j])[l];
+            dist += d*d;
+         }
+         if (dist <= size*size) { elements.Append(i); break; }
+      }
+   }
+}
+
+static void Pow(Vector &vec, double p)
+{
+   for (int i = 0; i < vec.Size(); i++)
+   {
+      vec(i) = std::pow(vec(i), p);
+   }
+}
+
+static void GetPerElementMinMax(const GridFunction &gf,
+                                Vector &elem_min, Vector &elem_max,
+                                int int_order = -1)
+{
+   const FiniteElementSpace *space = gf.FESpace();
+   int ne = space->GetNE();
+
+   if (int_order < 0) { int_order = space->GetOrder(0) + 1; }
+
+   elem_min.SetSize(ne);
+   elem_max.SetSize(ne);
+
+   Vector vals, tmp;
+   for (int i = 0; i < ne; i++)
+   {
+      int geom = space->GetFE(i)->GetGeomType();
+      const IntegrationRule &ir = IntRules.Get(geom, int_order);
+
+      gf.GetValues(i, ir, vals);
+
+      if (space->GetVDim() > 1)
+      {
+         Pow(vals, 2.0);
+         for (int vd = 1; vd < space->GetVDim(); vd++)
+         {
+            gf.GetValues(i, ir, tmp, vd+1);
+            Pow(tmp, 2.0);
+            vals += tmp;
+         }
+         Pow(vals, 0.5);
+      }
+
+      elem_min(i) = vals.Min();
+      elem_max(i) = vals.Max();
+   }
+}
+
+void AMREstimatorIntegrator::ComputeElementFlux1(const FiniteElement &el,
+                                                 ElementTransformation &Trans,
+                                                 const Vector &u,
+                                                 const FiniteElement &fluxelem,
+                                                 Vector &flux)
+{
+   const int dof = el.GetDof();
+   const int dim = el.GetDim();
+   const int sdim = Trans.GetSpaceDim();
+
+   DenseMatrix dshape(dof, dim);
+   DenseMatrix invdfdx(dim, sdim);
+   Vector vec(dim), pointflux(sdim);
+
+   const IntegrationRule &ir = fluxelem.GetNodes();
+   const int NQ = ir.GetNPoints();
+   flux.SetSize(NQ * sdim);
+
+   for (int q = 0; q < NQ; q++)
+   {
+      const IntegrationPoint &ip = ir.IntPoint(q);
+      el.CalcDShape(ip, dshape);
+      dshape.MultTranspose(u, vec);
+
+      Trans.SetIntPoint (&ip);
+      CalcInverse(Trans.Jacobian(), invdfdx);
+      invdfdx.MultTranspose(vec, pointflux);
+
+      for (int d = 0; d < sdim; d++)
+      {
+         flux(NQ*d+q) = pointflux(d);
+      }
+   }
+}
+
+void AMREstimatorIntegrator::ComputeElementFlux2(const int el,
+                                                 const FiniteElement &fe,
+                                                 ElementTransformation &Trans,
+                                                 const FiniteElement &fluxelem,
+                                                 Vector &flux)
+{
+   const int dim = fe.GetDim();
+   const int sdim = Trans.GetSpaceDim();
+
+   DenseMatrix Jadjt(dim, sdim), Jadj(dim, sdim);
+
+   const IntegrationRule &ir = fluxelem.GetNodes();
+   const int NQ = ir.GetNPoints();
+   flux.SetSize(NQ * sdim);
+
+   constexpr double NL_DMAX = std::numeric_limits<double>::max();
+   double minW = +NL_DMAX;
+   double maxW = -NL_DMAX;
+
+   const int depth = pmesh->pncmesh->GetElementDepth(el);
+
+   for (int q = 0; q < NQ; q++)
+   {
+      const IntegrationPoint &ip = ir.IntPoint(q);
+      Trans.SetIntPoint(&ip);
+      CalcAdjugate(Trans.Jacobian(), Jadj);
+      Jadjt = Jadj;
+      Jadjt.Transpose();
+      const double w = Jadjt.Weight();
+      minW = std::fmin(minW, w);
+      maxW = std::fmax(maxW, w);
+      MFEM_VERIFY(std::fabs(maxW) > 1e-13, "");
+      const double rho = minW / maxW;
+      MFEM_VERIFY(rho <= 1.0, "");
+      for (int d = 0; d < sdim; d++)
+      {
+         const int iq = NQ*d + q;
+         flux(iq) = 1.0 - rho;
+         if (rho > jac_threshold) { continue; }
+         if (depth > max_level) { continue; }
+         flux(iq) = rho;
+      }
+   }
+}
+
+void AMREstimatorIntegrator::ComputeElementFlux(const FiniteElement &fe,
+                                                ElementTransformation &Tr,
+                                                Vector &u,
+                                                const FiniteElement &fluxelem,
+                                                Vector &flux,
+                                                bool with_coef,
+                                                const IntegrationRule *ir)
+{
+   MFEM_VERIFY(ir == nullptr, "ir not supported");
+   MFEM_VERIFY(NE == pmesh->GetNE(), "");
+   // ZZ comes with with_coef set to true, not Kelly
+   switch (flux_mode)
+   {
+      case mode::diffusion:
+      {
+         DiffusionIntegrator::ComputeElementFlux(fe, Tr, u,
+                                                 fluxelem, flux, with_coef);
+         break;
+      }
+      case mode::one:
+      {
+         AMREstimatorIntegrator::ComputeElementFlux1(fe, Tr, u, fluxelem, flux);
+         break;
+      }
+      case mode::two:
+      {
+         AMREstimatorIntegrator::ComputeElementFlux2(e++, fe, Tr, fluxelem, flux);
+         break;
+      }
+      default: MFEM_ABORT("Unknown mode!");
+   }
+}
+
+AMR::AMR(ParMesh *pmesh, int estimator,
+         double ref_t, double jac_t, double deref_t,
+         int max_level,  int nc_limit,
+         double size_b, double energy_b, double *blast_xyz):
+   pmesh(pmesh),
+   myid(pmesh->GetMyRank()),
+   dim(pmesh->Dimension()),
+   sdim(pmesh->SpaceDimension()),
+   flux_fec(order, dim),
+   flux_fes(pmesh, &flux_fec, sdim),
+   opt(
+{
+   estimator, ref_t, jac_t, deref_t, max_level, nc_limit,
+   size_b, energy_b, Vertex(blast_xyz[0], blast_xyz[1], blast_xyz[2])
+}) { }
+
+AMR::~AMR() { }
+
+void AMR::Setup(ParGridFunction &x_gf)
+{
+   if (myid == 0)
+   {
+      std::cout << "AMR setup with "
+                << amr::EstimatorName(opt.estimator) << " estimator"
+                << std::endl;
+   }
+
+   if (opt.estimator == amr::estimator::zz)
+   {
+      integ = new AMREstimatorIntegrator(pmesh, opt.max_level, opt.jac_threshold);
+      smooth_flux_fec = new RT_FECollection(order-1, dim);
+      auto smooth_flux_fes = new ParFiniteElementSpace(pmesh, smooth_flux_fec);
+      estimator = new L2ZienkiewiczZhuEstimator(*integ, x_gf, &flux_fes,
+                                                smooth_flux_fes);
+   }
+
+   if (opt.estimator == amr::estimator::kelly)
+   {
+      integ = new AMREstimatorIntegrator(pmesh, opt.max_level, opt.jac_threshold);
+      estimator = new KellyErrorEstimator(*integ, x_gf, flux_fes);
+   }
+
+   if (estimator)
+   {
+      const double hysteresis = 0.25;
+      const double max_elem_error = 1.0e-6;
+      refiner = new ThresholdRefiner(*estimator);
+      refiner->SetTotalErrorFraction(0.0);
+      refiner->SetLocalErrorGoal(max_elem_error);
+      refiner->PreferConformingRefinement();
+      refiner->SetNCLimit(opt.nc_limit);
+
+      derefiner = new ThresholdDerefiner(*estimator);
+      derefiner->SetOp(2); // 0:min, 1:sum, 2:max
+      derefiner->SetThreshold(hysteresis * max_elem_error);
+      derefiner->SetNCLimit(opt.nc_limit);
+   }
+}
+
+void AMR::Reset()
+{
+   if (integ) { integ->Reset(); }
+   if (refiner) { refiner->Reset(); }
+   if (derefiner) { derefiner->Reset(); }
+}
+
+void AMR::Update(hydrodynamics::LagrangianHydroOperator &hydro,
+                 ODESolver *ode_solver,
+                 BlockVector &S,
+                 BlockVector &S_old,
+                 ParGridFunction &x,
+                 ParGridFunction &v,
+                 ParGridFunction &e,
+                 ParGridFunction &m,
+                 Array<int> &true_offset,
+                 const int bdr_attr_max,
+                 Array<int> &ess_tdofs,
+                 Array<int> &ess_vdofs)
+{
+   Vector v_max, v_min;
+   Array<Refinement> refined;
+   bool mesh_updated = false;
+   const int NE = pmesh->GetNE();
+   ParFiniteElementSpace &H1FESpace = *x.ParFESpace();
+   constexpr double NL_DMAX = std::numeric_limits<double>::max();
+
+   GetPerElementMinMax(v, v_min, v_max);
+
+   switch (opt.estimator)
+   {
+      case amr::estimator::custom:
+      {
+         Vector &error_est = hydro.GetZoneMaxVisc();
+         for (int el = 0; el < NE; el++)
+         {
+            if (error_est(el) > opt.ref_threshold &&
+                pmesh->pncmesh->GetElementDepth(el) < opt.max_level &&
+                (v_min(el) < 1e-3)) // only refine the still area
+            {
+               refined.Append(Refinement(el));
+            }
+         }
+         break;
+      }
+
+      case amr::estimator::jjt:
+      {
+         const double art = opt.jac_threshold;
+         const int h1_order = H1FESpace.GetOrder(0) + 1;
+         DenseMatrix Jadjt, Jadj(dim, pmesh->SpaceDimension());
+         MFEM_VERIFY(art >= 0.0, "AMR threshold should be positive");
+         for (int el = 0; el < NE; el++)
+         {
+            double minW = +NL_DMAX;
+            double maxW = -NL_DMAX;
+            const int depth = pmesh->pncmesh->GetElementDepth(el);
+            ElementTransformation *eTr = pmesh->GetElementTransformation(el);
+            const Geometry::Type &type = pmesh->GetElement(el)->GetGeometryType();
+            const IntegrationRule *ir = &IntRules.Get(type, h1_order);
+            const int NQ = ir->GetNPoints();
+            for (int q = 0; q < NQ; q++)
+            {
+               eTr->SetIntPoint(&ir->IntPoint(q));
+               const DenseMatrix &J = eTr->Jacobian();
+               CalcAdjugate(J, Jadj);
+               Jadjt = Jadj;
+               Jadjt.Transpose();
+               const double w = Jadjt.Weight();
+               minW = std::fmin(minW, w);
+               maxW = std::fmax(maxW, w);
+            }
+            if (std::fabs(maxW) != 0.0)
+            {
+               const double rho = minW / maxW;
+               MFEM_VERIFY(rho <= 1.0, "");
+               if (rho < art && depth < opt.max_level)
+               {
+                  refined.Append(Refinement(el));
+               }
+            }
+         }
+         break;
+      }
+
+      case amr::estimator::zz:
+      case amr::estimator::kelly:
+      {
+         refiner->Apply(*pmesh);
+         if (refiner->Refined()) { mesh_updated = true; }
+         MFEM_VERIFY(!refiner->Derefined() && !refiner->Rebalanced(), "");
+         break;
+      }
+      default: MFEM_ABORT("Unknown AMR estimator!");
+   }
+
+   // custom and JJt use 'refined', ZZ and Kelly will set 'mesh_updated'
+   const int nref = pmesh->ReduceInt(refined.Size());
+   if (nref && !mesh_updated)
+   {
+      constexpr int non_conforming = 1;
+      pmesh->GeneralRefinement(refined, non_conforming, opt.nc_limit);
+      mesh_updated = true;
+      if (myid == 0)
+      {
+         std::cout << "Refined " << nref << " elements." << std::endl;
+      }
+   }
+   else if ((opt.estimator == amr::estimator::custom ||
+             opt.estimator == amr::estimator::jjt) &&
+            opt.deref_threshold >= 0.0 && !mesh_updated)
+   {
+      ParGridFunction rho_gf;
+      Vector rho_max, rho_min;
+      hydro.ComputeDensity(rho_gf);
+      GetPerElementMinMax(rho_gf, rho_min, rho_max);
+
+      // Derefinement based on zone maximum rho in post-shock region
+      const double rho_max_max = rho_max.Size() ? rho_max.Max() : 0.0;
+      double threshold, loc_threshold = opt.deref_threshold * rho_max_max;
+      MPI_Allreduce(&loc_threshold, &threshold, 1, MPI_DOUBLE, MPI_MAX,
+                    pmesh->GetComm());
+
+      // make sure the blast point is never derefined
+      if (opt.estimator == amr::estimator::custom)
+      {
+         Array<int> elements;
+         FindElementsWithVertex(pmesh, opt.blast_position, opt.blast_size, elements);
+         for (int i = 0; i < elements.Size(); i++)
+         {
+            int index = elements[i];
+            if (index >= 0) { rho_max(index) = NL_DMAX; }
+         }
+         // only derefine where the mesh is in motion, i.e. after the shock
+         for (int i = 0; i < pmesh->GetNE(); i++)
+         {
+            if (v_min(i) < 0.1) { rho_max(i) = NL_DMAX; }
+         }
+      }
+
+      const int op = 2; // operator on the 'maximum' value of the fine elements
+      mesh_updated = pmesh->DerefineByError(rho_max, threshold, opt.nc_limit, op);
+      if (mesh_updated && myid == 0)
+      {
+         //std::cout << "Derefined, threshold = " << threshold << std::endl;
+      }
+   }
+   else if ((opt.estimator == amr::estimator::zz ||
+             opt.estimator == amr::estimator::kelly) && !mesh_updated)
+   {
+      MFEM_VERIFY(derefiner,"");
+      if (derefiner->Apply(*pmesh))
+      {
+         //if (myid == 0) { std::cout << "\nDerefined elements." << std::endl; }
+      }
+      if (derefiner->Derefined()) { mesh_updated = true; }
+   }
+   else { /* nothing to do */ }
+
+   if (mesh_updated)
+   {
+      auto Update = [&]()
+      {
+         ParFiniteElementSpace* H1FESpace = x.ParFESpace();
+         ParFiniteElementSpace* L2FESpace = e.ParFESpace();
+         ParFiniteElementSpace* MEFESpace = m.ParFESpace();
+
+         H1FESpace->Update();
+         L2FESpace->Update();
+         MEFESpace->Update();
+
+         const int h1_vsize = H1FESpace->GetVSize();
+         const int l2_vsize = L2FESpace->GetVSize();
+
+         true_offset[0] = 0;
+         true_offset[1] = true_offset[0] + h1_vsize;
+         true_offset[2] = true_offset[1] + h1_vsize;
+         true_offset[3] = true_offset[2] + l2_vsize;
+
+         S_old = S;
+         S.Update(true_offset);
+         const Operator* h1_update = H1FESpace->GetUpdateOperator();
+         const Operator* l2_update = L2FESpace->GetUpdateOperator();
+         h1_update->Mult(S_old.GetBlock(0), S.GetBlock(0));
+         h1_update->Mult(S_old.GetBlock(1), S.GetBlock(1));
+         l2_update->Mult(S_old.GetBlock(2), S.GetBlock(2));
+
+         x.MakeRef(H1FESpace, S, true_offset[0]);
+         v.MakeRef(H1FESpace, S, true_offset[1]);
+         e.MakeRef(L2FESpace, S, true_offset[2]);
+         x.SyncAliasMemory(S);
+         v.SyncAliasMemory(S);
+         e.SyncAliasMemory(S);
+
+         S_old.Update(true_offset);
+         m.Update();
+      };
+
+      constexpr bool quick = true;
+
+      Update();
+      hydro.AMRUpdate(S, quick);
+
+      pmesh->Rebalance();
+
+      Update();
+      hydro.AMRUpdate(S, not quick);
+
+      GetZeroBCDofs(pmesh, H1FESpace, bdr_attr_max, ess_tdofs, ess_vdofs);
+      ode_solver->Init(hydro);
+      //H1FESpace.PrintPartitionStats();
+   }
+}
+
+} // namespace amr
+
+} // namespace mfem
diff --git a/laghos_amr.hpp b/laghos_amr.hpp
new file mode 100644
index 00000000..3ced430e
--- /dev/null
+++ b/laghos_amr.hpp
@@ -0,0 +1,153 @@
+// Copyright (c) 2017, Lawrence Livermore National Security, LLC. Produced at
+// the Lawrence Livermore National Laboratory. LLNL-CODE-734707. All Rights
+// reserved. See files LICENSE and NOTICE for details.
+//
+// This file is part of CEED, a collection of benchmarks, miniapps, software
+// libraries and APIs for efficient high-order finite element and spectral
+// element discretizations for exascale applications. For more information and
+// source code availability see http://github.com/ceed.
+//
+// The CEED research is supported by the Exascale Computing Project 17-SC-20-SC,
+// a collaborative effort of two U.S. Department of Energy organizations (Office
+// of Science and the National Nuclear Security Administration) responsible for
+// the planning and preparation of a capable exascale ecosystem, including
+// software, applications, hardware, advanced system engineering and early
+// testbed platforms, in support of the nation's exascale computing imperative.
+
+#ifndef MFEM_LAGHOS_AMR
+#define MFEM_LAGHOS_AMR
+
+#include "mfem.hpp"
+#include "laghos_solver.hpp"
+
+namespace mfem
+{
+
+namespace amr
+{
+
+enum estimator: int
+{
+   custom = 0, // adapted from the amr subdir: uses visc and vgrad per element
+   jjt = 1,    // uses rho = minW/maxW, with W is the determinant of the
+   // adjugate of the transformation's Jacobian.
+   zz = 2,
+   kelly = 3
+};
+
+class AMREstimatorIntegrator: public DiffusionIntegrator
+{
+   int NE, e;
+   ParMesh *pmesh;
+   enum class mode { diffusion, one, two };
+   const mode flux_mode;
+   ConstantCoefficient one {1.0};
+   const int max_level;
+   const double jac_threshold;
+public:
+   AMREstimatorIntegrator(ParMesh *pmesh,
+                          const int max_level,
+                          const double jac_threshold,
+                          const mode flux_mode = mode::two):
+      DiffusionIntegrator(one),
+      NE(pmesh->GetNE()),
+      pmesh(pmesh),
+      flux_mode(flux_mode),
+      max_level(max_level),
+      jac_threshold(jac_threshold) { }
+
+   void Reset() { e = 0; NE = pmesh->GetNE(); }
+
+   double ComputeFluxEnergy(const FiniteElement &fluxelem,
+                            ElementTransformation &Trans,
+                            Vector &flux, Vector *d_energy = NULL)
+   {
+      if (flux_mode == mode::diffusion)
+      {
+         return DiffusionIntegrator::ComputeFluxEnergy(fluxelem, Trans, flux, d_energy);
+      }
+      // Not implemented for other modes
+      MFEM_ABORT("Not implemented!");
+      return 0.0;
+   }
+
+   virtual void ComputeElementFlux(const FiniteElement &el,
+                                   ElementTransformation &Trans,
+                                   Vector &u, const FiniteElement &fluxelem,
+                                   Vector &flux, bool with_coef = true,
+                                   const IntegrationRule *ir = NULL);
+private:
+   void ComputeElementFlux1(const FiniteElement &el,
+                            ElementTransformation &Trans,
+                            const Vector &u,
+                            const FiniteElement &fluxelem,
+                            Vector &flux);
+
+   void ComputeElementFlux2(const int e,
+                            const FiniteElement &el,
+                            ElementTransformation &Trans,
+                            const FiniteElement &fluxelem,
+                            Vector &flux);
+};
+
+// AMR operator
+class AMR
+{
+   const int order = 3; // should be computed
+   ParMesh *pmesh;
+   const int myid, dim, sdim;
+
+   L2_FECollection flux_fec;
+   ParFiniteElementSpace flux_fes;
+
+   RT_FECollection *smooth_flux_fec = nullptr;
+   ErrorEstimator *estimator = nullptr;
+   ThresholdRefiner *refiner = nullptr;
+   ThresholdDerefiner *derefiner = nullptr;
+   AMREstimatorIntegrator *integ = nullptr;
+
+   const struct Options
+   {
+      int estimator;
+      double ref_threshold;
+      double jac_threshold;
+      double deref_threshold;
+      int max_level;
+      int nc_limit;
+      double blast_size;
+      double blast_energy;
+      Vertex blast_position;
+   } opt;
+
+public:
+   AMR(ParMesh *pmesh,
+       int estimator,
+       double ref_t, double jac_t, double deref_t,
+       int max_level, int nc_limit,
+       double blast_size, double blast_energy, double *blast_position);
+
+   ~AMR();
+
+   void Setup(ParGridFunction&);
+
+   void Reset();
+
+   void Update(hydrodynamics::LagrangianHydroOperator &hydro,
+               ODESolver *ode_solver,
+               BlockVector &S,
+               BlockVector &S_old,
+               ParGridFunction &x,
+               ParGridFunction &v,
+               ParGridFunction &e,
+               ParGridFunction &m,
+               Array<int> &true_offset,
+               const int bdr_attr_max,
+               Array<int> &ess_tdofs,
+               Array<int> &ess_vdofs);
+};
+
+} // namespace amr
+
+} // namespace mfem
+
+#endif // MFEM_LAGHOS_AMR
diff --git a/laghos_assembly.cpp b/laghos_assembly.cpp
index 60ee07f3..53cd4e30 100644
--- a/laghos_assembly.cpp
+++ b/laghos_assembly.cpp
@@ -14,8 +14,11 @@
 // software, applications, hardware, advanced system engineering and early
 // testbed platforms, in support of the nation's exascale computing imperative.
 
-#include "laghos_assembly.hpp"
 #include <unordered_map>
+#include "laghos_assembly.hpp"
+#include "general/forall.hpp"
+
+using namespace mfem;
 
 namespace mfem
 {
@@ -23,6 +26,170 @@ namespace mfem
 namespace hydrodynamics
 {
 
+double ComputeVolumeIntegral(const int DIM, const int NE, const int NQ,
+                             const int Q1D, const int VDIM,
+                             const double ln_norm,
+                             const Vector& mass,
+                             const Vector& f)
+{
+
+   Vector integrand(NE*NQ);
+   const auto f_vals = Reshape(f.Read(), VDIM, NQ, NE);
+   auto I = Reshape(integrand.Write(), NQ, NE);
+
+   if (DIM == 1)
+   {
+      for (int e=0; e < NE; ++e)
+      {
+         for (int q = 0; q < NQ; ++q)
+         {
+            double vmag = 0;
+            for (int k = 0; k < VDIM; k++)
+            {
+               vmag += pow(f_vals(k,q,e),ln_norm);
+            }
+            I(q,e) = vmag;
+         }
+      }
+   }
+   else if (DIM == 2)
+   {
+      MFEM_FORALL_2D(e, NE, Q1D, Q1D, 1,
+      {
+         MFEM_FOREACH_THREAD(qy,y,Q1D)
+         {
+            MFEM_FOREACH_THREAD(qx,x,Q1D)
+            {
+               const int q = qx + qy * Q1D;
+               double vmag = 0;
+               for (int k = 0; k < VDIM; k++)
+               {
+                  vmag += pow(f_vals(k,q,e),ln_norm);
+               }
+               I(q,e) = vmag;
+            }
+         }
+      });
+   }
+   else if (DIM == 3)
+   {
+      MFEM_FORALL_3D(e, NE, Q1D, Q1D, Q1D,
+      {
+         MFEM_FOREACH_THREAD(qz,z,Q1D)
+         {
+            MFEM_FOREACH_THREAD(qy,y,Q1D)
+            {
+               MFEM_FOREACH_THREAD(qx,x,Q1D)
+               {
+                  const int q = qx + (qy + qz * Q1D) * Q1D;
+                  double vmag = 0;
+                  for (int k = 0; k < VDIM; k++)
+                  {
+                     vmag += pow(f_vals(k,q,e),ln_norm);
+                  }
+                  I(q,e) = vmag;
+               }
+            }
+         }
+      });
+   }
+   const double integral = integrand * mass;
+   return integral;
+}
+
+void Rho0DetJ0Vol(const int dim, const int NE,
+                  const IntegrationRule &ir,
+                  ParMesh *pmesh,
+                  ParFiniteElementSpace &L2,
+                  const ParGridFunction &rho0,
+                  hydrodynamics::QuadratureData &qdata,
+                  double &volume)
+{
+   const int NQ = ir.GetNPoints();
+   const int Q1D = IntRules.Get(Geometry::SEGMENT,ir.GetOrder()).GetNPoints();
+   const int flags = GeometricFactors::JACOBIANS|GeometricFactors::DETERMINANTS;
+   const GeometricFactors *geom = pmesh->GetGeometricFactors(ir, flags);
+   Vector rho0Q(NQ*NE);
+   rho0Q.UseDevice(true);
+   Vector j, detj;
+   const QuadratureInterpolator *qi = L2.GetQuadratureInterpolator(ir);
+   qi->Mult(rho0, QuadratureInterpolator::VALUES, rho0Q, j, detj);
+   const auto W = ir.GetWeights().Read();
+   const auto R = Reshape(rho0Q.Read(), NQ, NE);
+   const auto J = Reshape(geom->J.Read(), NQ, dim, dim, NE);
+   const auto detJ = Reshape(geom->detJ.Read(), NQ, NE);
+   auto V = Reshape(qdata.rho0DetJ0w.Write(), NQ, NE);
+   Memory<double> &Jinv_m = qdata.Jac0inv.GetMemory();
+   const MemoryClass mc = Device::GetMemoryClass();
+   const int Ji_total_size = qdata.Jac0inv.TotalSize();
+   auto invJ = Reshape(Jinv_m.Write(mc, Ji_total_size), dim, dim, NQ, NE);
+   Vector vol(NE*NQ), one(NE*NQ);
+   auto A = Reshape(vol.Write(), NQ, NE);
+   auto O = Reshape(one.Write(), NQ, NE);
+   MFEM_ASSERT(dim==2 || dim==3, "");
+   if (dim==2)
+   {
+      MFEM_FORALL_2D(e, NE, Q1D, Q1D, 1,
+      {
+         MFEM_FOREACH_THREAD(qy,y,Q1D)
+         {
+            MFEM_FOREACH_THREAD(qx,x,Q1D)
+            {
+               const int q = qx + qy * Q1D;
+               const double J11 = J(q,0,0,e);
+               const double J12 = J(q,1,0,e);
+               const double J21 = J(q,0,1,e);
+               const double J22 = J(q,1,1,e);
+               const double det = detJ(q,e);
+               V(q,e) =  W[q] * R(q,e) * det;
+               const double r_idetJ = 1.0 / det;
+               invJ(0,0,q,e) =  J22 * r_idetJ;
+               invJ(1,0,q,e) = -J12 * r_idetJ;
+               invJ(0,1,q,e) = -J21 * r_idetJ;
+               invJ(1,1,q,e) =  J11 * r_idetJ;
+               A(q,e) = W[q] * det;
+               O(q,e) = 1.0;
+            }
+         }
+      });
+   }
+   else
+   {
+      MFEM_FORALL_3D(e, NE, Q1D, Q1D, Q1D,
+      {
+         MFEM_FOREACH_THREAD(qz,z,Q1D)
+         {
+            MFEM_FOREACH_THREAD(qy,y,Q1D)
+            {
+               MFEM_FOREACH_THREAD(qx,x,Q1D)
+               {
+                  const int q = qx + (qy + qz * Q1D) * Q1D;
+                  const double J11 = J(q,0,0,e), J12 = J(q,0,1,e), J13 = J(q,0,2,e);
+                  const double J21 = J(q,1,0,e), J22 = J(q,1,1,e), J23 = J(q,1,2,e);
+                  const double J31 = J(q,2,0,e), J32 = J(q,2,1,e), J33 = J(q,2,2,e);
+                  const double det = detJ(q,e);
+                  V(q,e) = W[q] * R(q,e) * det;
+                  const double r_idetJ = 1.0 / det;
+                  invJ(0,0,q,e) = r_idetJ * ((J22 * J33)-(J23 * J32));
+                  invJ(1,0,q,e) = r_idetJ * ((J32 * J13)-(J33 * J12));
+                  invJ(2,0,q,e) = r_idetJ * ((J12 * J23)-(J13 * J22));
+                  invJ(0,1,q,e) = r_idetJ * ((J23 * J31)-(J21 * J33));
+                  invJ(1,1,q,e) = r_idetJ * ((J33 * J11)-(J31 * J13));
+                  invJ(2,1,q,e) = r_idetJ * ((J13 * J21)-(J11 * J23));
+                  invJ(0,2,q,e) = r_idetJ * ((J21 * J32)-(J22 * J31));
+                  invJ(1,2,q,e) = r_idetJ * ((J31 * J12)-(J32 * J11));
+                  invJ(2,2,q,e) = r_idetJ * ((J11 * J22)-(J12 * J21));
+                  A(q,e) = W[q] * det;
+                  O(q,e) = 1.0;
+               }
+            }
+         }
+      });
+   }
+   qdata.rho0DetJ0w.HostRead();
+   volume = vol * one;
+}
+
 void DensityIntegrator::AssembleRHSElementVect(const FiniteElement &fe,
                                                ElementTransformation &Tr,
                                                Vector &elvect)
@@ -150,14 +317,16 @@ void ForceMult2D(const int NE,
                  const DenseTensor &sJit_,
                  const Vector &x, Vector &y)
 {
-   auto b = Reshape(B_.Read(), Q1D, L1D);
-   auto bt = Reshape(Bt_.Read(), D1D, Q1D);
-   auto gt = Reshape(Gt_.Read(), D1D, Q1D);
-   const double *StressJinvT = Read(sJit_.GetMemory(), Q1D*Q1D*NE*DIM*DIM);
-   auto sJit = Reshape(StressJinvT, Q1D, Q1D, NE, DIM, DIM);
-   auto energy = Reshape(x.Read(), L1D, L1D, NE);
    const double eps1 = std::numeric_limits<double>::epsilon();
    const double eps2 = eps1*eps1;
+
+   const auto b = Reshape(B_.Read(), Q1D, L1D);
+   const auto bt = Reshape(Bt_.Read(), D1D, Q1D);
+   const auto gt = Reshape(Gt_.Read(), D1D, Q1D);
+   const double *StressJinvT = Read(sJit_.GetMemory(), Q1D*Q1D*NE*DIM*DIM);
+   const auto sJit = Reshape(StressJinvT, Q1D, Q1D, NE, DIM, DIM);
+   const auto energy = Reshape(x.Read(), L1D, L1D, NE);
+
    auto velocity = Reshape(y.Write(), D1D, D1D, DIM, NE);
 
    MFEM_FORALL_2D(e, NE, Q1D, Q1D, 1,
@@ -301,14 +470,16 @@ void ForceMult3D(const int NE,
                  const DenseTensor &sJit_,
                  const Vector &x, Vector &y)
 {
-   auto b = Reshape(B_.Read(), Q1D, L1D);
-   auto bt = Reshape(Bt_.Read(), D1D, Q1D);
-   auto gt = Reshape(Gt_.Read(), D1D, Q1D);
-   const double *StressJinvT = Read(sJit_.GetMemory(), Q1D*Q1D*Q1D*NE*DIM*DIM);
-   auto sJit = Reshape(StressJinvT, Q1D, Q1D, Q1D, NE, DIM, DIM);
-   auto energy = Reshape(x.Read(), L1D, L1D, L1D, NE);
    const double eps1 = std::numeric_limits<double>::epsilon();
    const double eps2 = eps1*eps1;
+
+   const auto b = Reshape(B_.Read(), Q1D, L1D);
+   const auto bt = Reshape(Bt_.Read(), D1D, Q1D);
+   const auto gt = Reshape(Gt_.Read(), D1D, Q1D);
+   const double *StressJinvT = Read(sJit_.GetMemory(), Q1D*Q1D*Q1D*NE*DIM*DIM);
+   const auto sJit = Reshape(StressJinvT, Q1D, Q1D, Q1D, NE, DIM, DIM);
+   const auto energy = Reshape(x.Read(), L1D, L1D, L1D, NE);
+
    auto velocity = Reshape(y.Write(), D1D, D1D, D1D, DIM, NE);
 
    MFEM_FORALL_3D(e, NE, Q1D, Q1D, Q1D,
@@ -569,12 +740,13 @@ void ForceMultTranspose2D(const int NE,
                           const DenseTensor &sJit_,
                           const Vector &x, Vector &y)
 {
-   auto b = Reshape(B_.Read(), Q1D, D1D);
-   auto g = Reshape(G_.Read(), Q1D, D1D);
-   auto bt = Reshape(Bt_.Read(), L1D, Q1D);
+   const auto b = Reshape(B_.Read(), Q1D, D1D);
+   const auto g = Reshape(G_.Read(), Q1D, D1D);
+   const auto bt = Reshape(Bt_.Read(), L1D, Q1D);
    const double *StressJinvT = Read(sJit_.GetMemory(), Q1D*Q1D*NE*DIM*DIM);
-   auto sJit = Reshape(StressJinvT, Q1D, Q1D, NE, DIM, DIM);
-   auto velocity = Reshape(x.Read(), D1D, D1D, DIM, NE);
+   const auto sJit = Reshape(StressJinvT, Q1D, Q1D, NE, DIM, DIM);
+   const auto velocity = Reshape(x.Read(), D1D, D1D, DIM, NE);
+
    auto energy = Reshape(y.Write(), L1D, L1D, NE);
 
    MFEM_FORALL_2D(e, NE, Q1D, Q1D, NBZ,
@@ -718,12 +890,13 @@ void ForceMultTranspose3D(const int NE,
                           const Vector &v_,
                           Vector &e_)
 {
-   auto b = Reshape(B_.Read(), Q1D, D1D);
-   auto g = Reshape(G_.Read(), Q1D, D1D);
-   auto bt = Reshape(Bt_.Read(), L1D, Q1D);
+   const auto b = Reshape(B_.Read(), Q1D, D1D);
+   const auto g = Reshape(G_.Read(), Q1D, D1D);
+   const auto bt = Reshape(Bt_.Read(), L1D, Q1D);
    const double *StressJinvT = Read(sJit_.GetMemory(), Q1D*Q1D*Q1D*NE*DIM*DIM);
-   auto sJit = Reshape(StressJinvT, Q1D, Q1D, Q1D, NE, DIM, DIM);
-   auto velocity = Reshape(v_.Read(), D1D, D1D, D1D, DIM, NE);
+   const auto sJit = Reshape(StressJinvT, Q1D, Q1D, Q1D, NE, DIM, DIM);
+   const auto velocity = Reshape(v_.Read(), D1D, D1D, D1D, DIM, NE);
+
    auto energy = Reshape(e_.Write(), L1D, L1D, L1D, NE);
 
    MFEM_FORALL_3D(e, NE, Q1D, Q1D, Q1D,
diff --git a/laghos_assembly.hpp b/laghos_assembly.hpp
index 0699352e..ba577d52 100644
--- a/laghos_assembly.hpp
+++ b/laghos_assembly.hpp
@@ -18,8 +18,6 @@
 #define MFEM_LAGHOS_ASSEMBLY
 
 #include "mfem.hpp"
-#include "general/forall.hpp"
-#include "linalg/dtensor.hpp"
 
 namespace mfem
 {
@@ -55,12 +53,33 @@ struct QuadratureData
    // recomputed at every time step to achieve adaptive time stepping.
    double dt_est;
 
-   QuadratureData(int dim, int NE, int quads_per_el)
-      : Jac0inv(dim, dim, NE * quads_per_el),
-        stressJinvT(NE * quads_per_el, dim, dim),
-        rho0DetJ0w(NE * quads_per_el) { }
+   QuadratureData(int dim, int NE, int NQ)
+      : Jac0inv(dim, dim, NE * NQ),
+        stressJinvT(NE * NQ, dim, dim),
+        rho0DetJ0w(NE * NQ) { }
+
+   void Resize(int dim, int NE, int NQ)
+   {
+      Jac0inv.SetSize(dim, dim, NE * NQ);
+      stressJinvT.SetSize(NE * NQ, dim, dim);
+      rho0DetJ0w.SetSize(NE * NQ);
+   }
 };
 
+double ComputeVolumeIntegral(const int DIM, const int NE, const int NQ,
+                             const int Q1D, const int VDIM,
+                             const double ln_norm,
+                             const mfem::Vector& mass,
+                             const mfem::Vector& f);
+
+void Rho0DetJ0Vol(const int dim, const int NE,
+                  const IntegrationRule &ir,
+                  ParMesh *pmesh,
+                  ParFiniteElementSpace &L2,
+                  const ParGridFunction &rho0,
+                  QuadratureData &qdata,
+                  double &volume);
+
 // This class is used only for visualization. It assembles (rho, phi) in each
 // zone, which is used by LagrangianHydroOperator::ComputeDensity to do an L2
 // projection of the density.
diff --git a/laghos_solver.cpp b/laghos_solver.cpp
index fe9f9acd..3fa6e8b6 100644
--- a/laghos_solver.cpp
+++ b/laghos_solver.cpp
@@ -14,10 +14,11 @@
 // software, applications, hardware, advanced system engineering and early
 // testbed platforms, in support of the nation's exascale computing imperative.
 
-#include "general/forall.hpp"
+#include <iostream>
+#include <unordered_map>
 #include "laghos_solver.hpp"
+#include "general/forall.hpp"
 #include "linalg/kernels.hpp"
-#include <unordered_map>
 
 #ifdef MFEM_USE_MPI
 
@@ -29,6 +30,7 @@ namespace hydrodynamics
 
 void VisualizeField(socketstream &sock, const char *vishost, int visport,
                     ParGridFunction &gf, const char *title,
+                    const char *vis_keys,
                     int x, int y, int w, int h, bool vec)
 {
    gf.HostRead();
@@ -60,7 +62,8 @@ void VisualizeField(socketstream &sock, const char *vishost, int visport,
 
       if (myid == 0 && newly_opened)
       {
-         const char* keys = (gf.FESpace()->GetMesh()->Dimension() == 2)
+         const char* keys = vis_keys ? vis_keys :
+                            (gf.FESpace()->GetMesh()->Dimension() == 2)
                             ? "mAcRjl" : "mmaaAcl";
 
          sock << "window_title '" << title << "'\n"
@@ -80,14 +83,6 @@ void VisualizeField(socketstream &sock, const char *vishost, int visport,
    while (connection_failed);
 }
 
-static void Rho0DetJ0Vol(const int dim, const int NE,
-                         const IntegrationRule &ir,
-                         ParMesh *pmesh,
-                         ParFiniteElementSpace &L2,
-                         const ParGridFunction &rho0,
-                         QuadratureData &qdata,
-                         double &volume);
-
 LagrangianHydroOperator::LagrangianHydroOperator(const int size,
                                                  ParFiniteElementSpace &h1,
                                                  ParFiniteElementSpace &l2,
@@ -100,12 +95,15 @@ LagrangianHydroOperator::LagrangianHydroOperator(const int size,
                                                  const bool visc,
                                                  const bool vort,
                                                  const bool p_assembly,
+                                                 const bool amr,
                                                  const double cgt,
                                                  const int cgiter,
                                                  double ftz,
                                                  const int oq) :
    TimeDependentOperator(size),
-   H1(h1), L2(l2), H1c(H1.GetParMesh(), H1.FEColl(), 1),
+   H1(h1),
+   L2(l2),
+   H1c(H1.GetParMesh(), H1.FEColl(), 1),
    pmesh(H1.GetParMesh()),
    H1Vsize(H1.GetVSize()),
    H1TVSize(H1.TrueVSize()),
@@ -116,17 +114,26 @@ LagrangianHydroOperator::LagrangianHydroOperator(const int size,
    block_offsets(4),
    x_gf(&H1),
    ess_tdofs(ess_tdofs),
-   dim(pmesh->Dimension()),
    NE(pmesh->GetNE()),
+   dim(pmesh->Dimension()),
    l2dofs_cnt(L2.GetFE(0)->GetDof()),
    h1dofs_cnt(H1.GetFE(0)->GetDof()),
-   source_type(source), cfl(cfl),
+   source_type(source),
+   cfl(cfl),
    use_viscosity(visc),
    use_vorticity(vort),
    p_assembly(p_assembly),
-   cg_rel_tol(cgt), cg_max_iter(cgiter),ftz_tol(ftz),
+   amr(amr),
+   cg_rel_tol(cgt),
+   cg_max_iter(cgiter),
+   ftz_tol(ftz),
    gamma_gf(gamma_gf),
-   Mv(&H1), Mv_spmat_copy(),
+   rho0_gf(rho0_gf),
+   x0_gf(&H1),
+   rho0_coeff(rho0_coeff),
+   rho0_gf_coeff(&rho0_gf),
+   Mv(&H1),
+   Mv_spmat_copy(),
    Me(l2dofs_cnt, l2dofs_cnt, NE),
    Me_inv(l2dofs_cnt, l2dofs_cnt, NE),
    ir(IntRules.Get(pmesh->GetElementBaseGeometry(0),
@@ -136,7 +143,9 @@ LagrangianHydroOperator::LagrangianHydroOperator(const int size,
    qdata_is_current(false),
    forcemat_is_assembled(false),
    Force(&L2, &H1),
-   ForcePA(nullptr), VMassPA(nullptr), EMassPA(nullptr),
+   ForcePA(nullptr),
+   VMassPA(nullptr),
+   EMassPA(nullptr),
    VMassPA_Jprec(nullptr),
    CG_VMass(H1.GetParMesh()->GetComm()),
    CG_EMass(L2.GetParMesh()->GetComm()),
@@ -150,6 +159,8 @@ LagrangianHydroOperator::LagrangianHydroOperator(const int size,
    rhs_c_gf(&H1c),
    dvc_gf(&H1c)
 {
+   x0_gf = *pmesh->GetNodes();
+
    block_offsets[0] = 0;
    block_offsets[1] = block_offsets[0] + H1Vsize;
    block_offsets[2] = block_offsets[1] + H1Vsize;
@@ -292,6 +303,163 @@ LagrangianHydroOperator::~LagrangianHydroOperator()
    }
 }
 
+void LagrangianHydroOperator::AMRUpdate(const Vector &S, const bool quick)
+{
+   width = height = S.Size();
+   H1c.Update();
+   pmesh = H1.GetParMesh();
+   H1Vsize = H1.GetVSize();
+   H1TVSize = H1.TrueVSize();
+   H1GTVSize = H1.GlobalTrueVSize();
+   L2Vsize = L2.GetVSize();
+   L2TVSize = L2.TrueVSize();
+   L2GTVSize = L2.GlobalTrueVSize();
+   block_offsets[0] = 0;
+   block_offsets[1] = block_offsets[0] + H1Vsize;
+   block_offsets[2] = block_offsets[1] + H1Vsize;
+   block_offsets[3] = block_offsets[2] + L2Vsize;
+   NE = pmesh->GetNE();
+   l2dofs_cnt = (L2.GetFE(0)->GetDof());
+   h1dofs_cnt = (H1.GetFE(0)->GetDof());
+   rho0_gf.Update();
+   x0_gf.Update();
+   qdata.Resize(dim, NE, ir.GetNPoints());
+   qdata_is_current = false;
+   forcemat_is_assembled = false;
+   delete ForcePA; ForcePA = nullptr;
+   delete VMassPA; VMassPA = nullptr;
+   delete EMassPA; EMassPA = nullptr;
+   delete VMassPA_Jprec; VMassPA_Jprec = nullptr;
+   delete qupdate; qupdate = nullptr;
+   one.SetSize(L2Vsize);
+   one = 1.0;
+   rhs.SetSize(H1Vsize);
+   e_rhs.SetSize(L2Vsize);
+
+   if (quick) { return; }
+
+   // go back to initial mesh configuration temporarily
+   int own_nodes = 0;
+   GridFunction *x0 = &x0_gf;
+   pmesh->SwapNodes(x0, own_nodes);
+
+   if (p_assembly)
+   {
+      gamma_gf.Update();
+      pmesh->DeleteGeometricFactors();
+      qupdate = new QUpdate(dim, NE, Q1D, use_viscosity, use_vorticity, cfl,
+                            &timer, gamma_gf, ir, H1, L2);
+      ForcePA = new ForcePAOperator(qdata, H1, L2, ir);
+      VMassPA = new MassPAOperator(H1c, ir, rho0_coeff);
+      EMassPA = new MassPAOperator(L2, ir, rho0_coeff);
+      // Inside the above constructors for mass, there is reordering of the mesh
+      // nodes which is performed on the host. Since the mesh nodes are a
+      // subvector, so we need to sync with the rest of the base vector (which
+      // is assumed to be in the memory space used by the mfem::Device).
+      H1.GetParMesh()->GetNodes()->ReadWrite();
+      // Attributes 1/2/3 correspond to fixed-x/y/z boundaries, i.e.,
+      // we must enforce v_x/y/z = 0 for the velocity components.
+      const int bdr_attr_max = H1.GetMesh()->bdr_attributes.Max();
+      Array<int> ess_bdr(bdr_attr_max);
+      for (int c = 0; c < dim; c++)
+      {
+         ess_bdr = 0;
+         ess_bdr[c] = 1;
+         H1c.GetEssentialTrueDofs(ess_bdr, c_tdofs[c]);
+         c_tdofs[c].Read();
+      }
+      X.SetSize(H1c.GetTrueVSize());
+      B.SetSize(H1c.GetTrueVSize());
+      X.UseDevice(true);
+      B.UseDevice(true);
+      rhs.UseDevice(true);
+      e_rhs.UseDevice(true);
+
+      // Setup the preconditioner of the velocity mass operator.
+      // BC are handled by the VMassPA, so ess_tdofs here can be empty.
+      Array<int> empty;
+      VMassPA_Jprec = new OperatorJacobiSmoother(VMassPA->GetBF(), empty);
+      CG_VMass.SetPreconditioner(*VMassPA_Jprec);
+
+      CG_VMass.SetOperator(*VMassPA);
+      CG_VMass.SetRelTol(cg_rel_tol);
+      CG_VMass.SetAbsTol(0.0);
+      CG_VMass.SetMaxIter(cg_max_iter);
+      CG_VMass.SetPrintLevel(-1);
+
+      CG_EMass.SetOperator(*EMassPA);
+      CG_EMass.iterative_mode = false;
+      CG_EMass.SetRelTol(cg_rel_tol);
+      CG_EMass.SetAbsTol(0.0);
+      CG_EMass.SetMaxIter(cg_max_iter);
+      CG_EMass.SetPrintLevel(-1);
+   }
+   else
+   {
+      Force.Update();
+      Force.Assemble(0);
+      Force.Finalize(0);
+      Mv.Update();
+      Mv.Assemble();
+      Mv_spmat_copy = Mv.SpMat();
+      Me.SetSize(l2dofs_cnt, l2dofs_cnt, NE);
+      Me_inv.SetSize(l2dofs_cnt, l2dofs_cnt, NE);
+      MassIntegrator mi(rho0_coeff, &ir);
+      for (int e = 0; e < NE; e++)
+      {
+         DenseMatrixInverse inv(&Me(e));
+         const FiniteElement &fe = *L2.GetFE(e);
+         ElementTransformation &Tr = *L2.GetElementTransformation(e);
+         mi.AssembleElementMatrix(fe, Tr, Me(e));
+         inv.Factor();
+         inv.GetInverseMatrix(Me_inv(e));
+      }
+   }
+   // update 'rho0DetJ0' and 'Jac0inv' at all quadrature points
+   // TODO: remove code duplication
+   int Ne, ne = NE;
+   double Volume, vol = 0.0;
+   if (dim > 1 && p_assembly)
+   {
+      Rho0DetJ0Vol(dim, NE, ir, pmesh, L2, rho0_gf, qdata, vol);
+   }
+   else
+   {
+      const int NQ = ir.GetNPoints();
+      Vector rho_vals(NQ);
+      for (int e = 0; e < NE; e++)
+      {
+         rho0_gf.GetValues(e, ir, rho_vals);
+         ElementTransformation *T = H1.GetElementTransformation(e);
+         for (int q = 0; q < NQ; q++)
+         {
+            const IntegrationPoint &ip = ir.IntPoint(q);
+            T->SetIntPoint(&ip);
+            DenseMatrixInverse Jinv(T->Jacobian());
+            Jinv.GetInverseMatrix(qdata.Jac0inv(e*NQ + q));
+            const double rho0DetJ0 = T->Weight() * rho_vals(q);
+            qdata.rho0DetJ0w(e*NQ + q) = rho0DetJ0 * ir.IntPoint(q).weight;
+         }
+      }
+      for (int e = 0; e < NE; e++) { vol += pmesh->GetElementVolume(e); }
+   }
+   MPI_Allreduce(&vol, &Volume, 1, MPI_DOUBLE, MPI_SUM, pmesh->GetComm());
+   MPI_Allreduce(&ne, &Ne, 1, MPI_INT, MPI_SUM, pmesh->GetComm());
+   switch (pmesh->GetElementBaseGeometry(0))
+   {
+      case Geometry::SEGMENT: qdata.h0 = Volume / Ne; break;
+      case Geometry::SQUARE: qdata.h0 = sqrt(Volume / Ne); break;
+      case Geometry::TRIANGLE: qdata.h0 = sqrt(2.0 * Volume / Ne); break;
+      case Geometry::CUBE: qdata.h0 = pow(Volume / Ne, 1./3.); break;
+      case Geometry::TETRAHEDRON: qdata.h0 = pow(6.0 * Volume / Ne, 1./3.); break;
+      default: MFEM_ABORT("Unknown zone type!");
+   }
+   qdata.h0 /= (double) H1.GetOrder(0);
+
+   // swap back to deformed mesh configuration
+   pmesh->SwapNodes(x0, own_nodes);
+}
+
 void LagrangianHydroOperator::Mult(const Vector &S, Vector &dS_dt) const
 {
    // Make sure that the mesh positions correspond to the ones in S. This is
@@ -498,7 +666,7 @@ void LagrangianHydroOperator::ComputeDensity(ParGridFunction &rho) const
 {
    rho.SetSpace(&L2);
    DenseMatrix Mrho(l2dofs_cnt);
-   Vector rhs(l2dofs_cnt), rho_z(l2dofs_cnt);
+   Vector rhs_z(l2dofs_cnt), rho_z(l2dofs_cnt);
    Array<int> dofs(l2dofs_cnt);
    DenseMatrixInverse inv(&Mrho);
    MassIntegrator mi(&ir);
@@ -508,10 +676,10 @@ void LagrangianHydroOperator::ComputeDensity(ParGridFunction &rho) const
    {
       const FiniteElement &fe = *L2.GetFE(e);
       ElementTransformation &eltr = *L2.GetElementTransformation(e);
-      di.AssembleRHSElementVect(fe, eltr, rhs);
+      di.AssembleRHSElementVect(fe, eltr, rhs_z);
       mi.AssembleElementMatrix(fe, eltr, Mrho);
       inv.Factor();
-      inv.Mult(rhs, rho_z);
+      inv.Mult(rhs_z, rho_z);
       L2.GetElementDofs(e, dofs);
       rho.SetSubVector(dofs, rho_z);
    }
@@ -671,7 +839,6 @@ void LagrangianHydroOperator::PrintTimingData(bool IamRoot, int steps,
 
    if (IamRoot)
    {
-      using namespace std;
       // FOM = (FOM1 * T1 + FOM2 * T2 + FOM3 * T3) / (T1 + T2 + T3)
       const HYPRE_Int H1iter = p_assembly ? (timer.H1iter/dim) : timer.H1iter;
       const double FOM1 = 1e-6 * H1GTVSize * H1iter / T[0];
@@ -679,57 +846,57 @@ void LagrangianHydroOperator::PrintTimingData(bool IamRoot, int steps,
       const double FOM3 = 1e-6 * alldata[1] * ir.GetNPoints() / T[3];
       const double FOM = (FOM1 * T[0] + FOM2 * T[2] + FOM3 * T[3]) / T[4];
       const double FOM0 = 1e-6 * steps * (H1GTVSize + L2GTVSize) / T[4];
-      cout << endl;
-      cout << "CG (H1) total time: " << T[0] << endl;
-      cout << "CG (H1) rate (megadofs x cg_iterations / second): "
-           << FOM1 << endl;
-      cout << endl;
-      cout << "CG (L2) total time: " << T[1] << endl;
-      cout << "CG (L2) rate (megadofs x cg_iterations / second): "
-           << 1e-6 * alldata[0] / T[1] << endl;
-      cout << endl;
-      cout << "Forces total time: " << T[2] << endl;
-      cout << "Forces rate (megadofs x timesteps / second): "
-           << FOM2 << endl;
-      cout << endl;
-      cout << "UpdateQuadData total time: " << T[3] << endl;
-      cout << "UpdateQuadData rate (megaquads x timesteps / second): "
-           << FOM3 << endl;
-      cout << endl;
-      cout << "Major kernels total time (seconds): " << T[4] << endl;
-      cout << "Major kernels total rate (megadofs x time steps / second): "
-           << FOM << endl;
+      std::cout << std::endl;
+      std::cout << "CG (H1) total time: " << T[0] << std::endl;
+      std::cout << "CG (H1) rate (megadofs x cg_iterations / second): "
+                << FOM1 << std::endl;
+      std::cout << std::endl;
+      std::cout << "CG (L2) total time: " << T[1] << std::endl;
+      std::cout << "CG (L2) rate (megadofs x cg_iterations / second): "
+                << 1e-6 * alldata[0] / T[1] << std::endl;
+      std::cout << std::endl;
+      std::cout << "Forces total time: " << T[2] << std::endl;
+      std::cout << "Forces rate (megadofs x timesteps / second): "
+                << FOM2 << std::endl;
+      std::cout << std::endl;
+      std::cout << "UpdateQuadData total time: " << T[3] << std::endl;
+      std::cout << "UpdateQuadData rate (megaquads x timesteps / second): "
+                << FOM3 << std::endl;
+      std::cout << std::endl;
+      std::cout << "Major kernels total time (seconds): " << T[4] << std::endl;
+      std::cout << "Major kernels total rate (megadofs x time steps / second): "
+                << FOM << std::endl;
       if (!fom) { return; }
       const int QPT = ir.GetNPoints();
       const HYPRE_Int GNZones = alldata[2];
       const long ndofs = 2*H1GTVSize + L2GTVSize + QPT*GNZones;
-      cout << endl;
-      cout << "| Ranks " << "| Zones   "
-           << "| H1 dofs " << "| L2 dofs "
-           << "| QP "      << "| N dofs   "
-           << "| FOM0   "
-           << "| FOM1   " << "| T1   "
-           << "| FOM2   " << "| T2   "
-           << "| FOM3   " << "| T3   "
-           << "| FOM    " << "| TT   "
-           << "|" << endl;
-      cout << setprecision(3);
-      cout << "| " << setw(6) << H1.GetNRanks()
-           << "| " << setw(8) << GNZones
-           << "| " << setw(8) << H1GTVSize
-           << "| " << setw(8) << L2GTVSize
-           << "| " << setw(3) << QPT
-           << "| " << setw(9) << ndofs
-           << "| " << setw(7) << FOM0
-           << "| " << setw(7) << FOM1
-           << "| " << setw(5) << T[0]
-           << "| " << setw(7) << FOM2
-           << "| " << setw(5) << T[2]
-           << "| " << setw(7) << FOM3
-           << "| " << setw(5) << T[3]
-           << "| " << setw(7) << FOM
-           << "| " << setw(5) << T[4]
-           << "| " << endl;
+      std::cout << std::endl;
+      std::cout << "| Ranks " << "| Zones   "
+                << "| H1 dofs " << "| L2 dofs "
+                << "| QP "      << "| N dofs   "
+                << "| FOM0   "
+                << "| FOM1   " << "| T1   "
+                << "| FOM2   " << "| T2   "
+                << "| FOM3   " << "| T3   "
+                << "| FOM    " << "| TT   "
+                << "|" << std::endl;
+      std::cout << std::setprecision(3);
+      std::cout << "| " << std::setw(6) << H1.GetNRanks()
+                << "| " << std::setw(8) << GNZones
+                << "| " << std::setw(8) << H1GTVSize
+                << "| " << std::setw(8) << L2GTVSize
+                << "| " << std::setw(3) << QPT
+                << "| " << std::setw(9) << ndofs
+                << "| " << std::setw(7) << FOM0
+                << "| " << std::setw(7) << FOM1
+                << "| " << std::setw(5) << T[0]
+                << "| " << std::setw(7) << FOM2
+                << "| " << std::setw(5) << T[2]
+                << "| " << std::setw(7) << FOM3
+                << "| " << std::setw(5) << T[3]
+                << "| " << std::setw(7) << FOM
+                << "| " << std::setw(5) << T[4]
+                << "| " << std::endl;
    }
 }
 
@@ -749,10 +916,21 @@ void LagrangianHydroOperator::UpdateQuadratureData(const Vector &S) const
    qdata_is_current = true;
    forcemat_is_assembled = false;
 
-   if (dim > 1 && p_assembly) { return qupdate->UpdateQuadratureData(S, qdata); }
+   zone_max_visc.SetSize(NE);
+   zone_max_visc = 0.0;
+
+   zone_vgrad.SetSize(NE);
+   zone_vgrad = 0.0;
+
+   if (dim > 1 && p_assembly)
+   {
+      qupdate->UpdateQuadratureData(S, qdata, zone_max_visc, zone_vgrad);
+      return;
+   }
 
    // This code is only for the 1D/FA mode
    timer.sw_qdata.Start();
+   qdata.rho0DetJ0w.HostRead();
    const int nqp = ir.GetNPoints();
    ParGridFunction x, v, e;
    Vector* sptr = const_cast<Vector*>(&S);
@@ -829,7 +1007,7 @@ void LagrangianHydroOperator::UpdateQuadratureData(const Vector &S) const
                          p = p_b[z*nqp + q], sound_speed = cs_b[z*nqp + q];
             stress = 0.0;
             for (int d = 0; d < dim; d++) { stress(d, d) = -p; }
-            double visc_coeff = 0.0;
+            double visc_coeff = 0.0, det_v_grad = 0.0;
             if (use_viscosity)
             {
                // Compression-based length scale at the point. The first
@@ -847,6 +1025,7 @@ void LagrangianHydroOperator::UpdateQuadratureData(const Vector &S) const
                }
 
                sgrad_v.Symmetrize();
+               det_v_grad = sgrad_v.Det();
                double eig_val_data[3], eig_vec_data[9];
                if (dim==1)
                {
@@ -904,6 +1083,9 @@ void LagrangianHydroOperator::UpdateQuadratureData(const Vector &S) const
                      stressJiT(vd, gd);
                }
             }
+            // Track maximum artificial viscosity per zone
+            zone_max_visc(z_id) = std::max(visc_coeff, zone_max_visc(z_id));
+            zone_vgrad(z_id) = std::max(std::abs(det_v_grad), zone_vgrad(z_id));
          }
          ++z_id;
       }
@@ -999,6 +1181,8 @@ void QUpdateBody(const int NE, const int e,
                  const double* __restrict__ d_grad_v_ext,
                  const double* __restrict__ d_Jac0inv,
                  double *d_dt_est,
+                 double *d_el_max_visc,
+                 double *d_el_max_vgrad,
                  double *d_stressJinvT)
 {
    constexpr int DIM2 = DIM*DIM;
@@ -1018,7 +1202,7 @@ void QUpdateBody(const int NE, const int e,
    const double S = sqrt(gamma * (gamma - 1.0) * E);
    for (int k = 0; k < DIM2; k++) { stress[k] = 0.0; }
    for (int d = 0; d < DIM; d++) { stress[d*DIM+d] = -P; }
-   double visc_coeff = 0.0;
+   double visc_coeff = 0.0, det_v_grad = 0.0;
    if (use_viscosity)
    {
       // Compression-based length scale at the point. The first
@@ -1037,6 +1221,7 @@ void QUpdateBody(const int NE, const int e,
       }
 
       kernels::Symmetrize(DIM, sgrad_v);
+      det_v_grad = kernels::Det<DIM>(sgrad_v);
       if (DIM == 1)
       {
          eig_val_data[0] = sgrad_v[0];
@@ -1099,99 +1284,10 @@ void QUpdateBody(const int NE, const int e,
          d_stressJinvT[offset] = stressJiT[vd + gd*DIM];
       }
    }
-}
-
-static void Rho0DetJ0Vol(const int dim, const int NE,
-                         const IntegrationRule &ir,
-                         ParMesh *pmesh,
-                         ParFiniteElementSpace &L2,
-                         const ParGridFunction &rho0,
-                         QuadratureData &qdata,
-                         double &volume)
-{
-   const int NQ = ir.GetNPoints();
-   const int Q1D = IntRules.Get(Geometry::SEGMENT,ir.GetOrder()).GetNPoints();
-   const int flags = GeometricFactors::JACOBIANS|GeometricFactors::DETERMINANTS;
-   const GeometricFactors *geom = pmesh->GetGeometricFactors(ir, flags);
-   Vector rho0Q(NQ*NE);
-   rho0Q.UseDevice(true);
-   Vector j, detj;
-   const QuadratureInterpolator *qi = L2.GetQuadratureInterpolator(ir);
-   qi->Mult(rho0, QuadratureInterpolator::VALUES, rho0Q, j, detj);
-   const auto W = ir.GetWeights().Read();
-   const auto R = Reshape(rho0Q.Read(), NQ, NE);
-   const auto J = Reshape(geom->J.Read(), NQ, dim, dim, NE);
-   const auto detJ = Reshape(geom->detJ.Read(), NQ, NE);
-   auto V = Reshape(qdata.rho0DetJ0w.Write(), NQ, NE);
-   Memory<double> &Jinv_m = qdata.Jac0inv.GetMemory();
-   const MemoryClass mc = Device::GetMemoryClass();
-   const int Ji_total_size = qdata.Jac0inv.TotalSize();
-   auto invJ = Reshape(Jinv_m.Write(mc, Ji_total_size), dim, dim, NQ, NE);
-   Vector vol(NE*NQ), one(NE*NQ);
-   auto A = Reshape(vol.Write(), NQ, NE);
-   auto O = Reshape(one.Write(), NQ, NE);
-   MFEM_ASSERT(dim==2 || dim==3, "");
-   if (dim==2)
-   {
-      MFEM_FORALL_2D(e, NE, Q1D, Q1D, 1,
-      {
-         MFEM_FOREACH_THREAD(qy,y,Q1D)
-         {
-            MFEM_FOREACH_THREAD(qx,x,Q1D)
-            {
-               const int q = qx + qy * Q1D;
-               const double J11 = J(q,0,0,e);
-               const double J12 = J(q,1,0,e);
-               const double J21 = J(q,0,1,e);
-               const double J22 = J(q,1,1,e);
-               const double det = detJ(q,e);
-               V(q,e) =  W[q] * R(q,e) * det;
-               const double r_idetJ = 1.0 / det;
-               invJ(0,0,q,e) =  J22 * r_idetJ;
-               invJ(1,0,q,e) = -J12 * r_idetJ;
-               invJ(0,1,q,e) = -J21 * r_idetJ;
-               invJ(1,1,q,e) =  J11 * r_idetJ;
-               A(q,e) = W[q] * det;
-               O(q,e) = 1.0;
-            }
-         }
-      });
-   }
-   else
-   {
-      MFEM_FORALL_3D(e, NE, Q1D, Q1D, Q1D,
-      {
-         MFEM_FOREACH_THREAD(qz,z,Q1D)
-         {
-            MFEM_FOREACH_THREAD(qy,y,Q1D)
-            {
-               MFEM_FOREACH_THREAD(qx,x,Q1D)
-               {
-                  const int q = qx + (qy + qz * Q1D) * Q1D;
-                  const double J11 = J(q,0,0,e), J12 = J(q,0,1,e), J13 = J(q,0,2,e);
-                  const double J21 = J(q,1,0,e), J22 = J(q,1,1,e), J23 = J(q,1,2,e);
-                  const double J31 = J(q,2,0,e), J32 = J(q,2,1,e), J33 = J(q,2,2,e);
-                  const double det = detJ(q,e);
-                  V(q,e) = W[q] * R(q,e) * det;
-                  const double r_idetJ = 1.0 / det;
-                  invJ(0,0,q,e) = r_idetJ * ((J22 * J33)-(J23 * J32));
-                  invJ(1,0,q,e) = r_idetJ * ((J32 * J13)-(J33 * J12));
-                  invJ(2,0,q,e) = r_idetJ * ((J12 * J23)-(J13 * J22));
-                  invJ(0,1,q,e) = r_idetJ * ((J23 * J31)-(J21 * J33));
-                  invJ(1,1,q,e) = r_idetJ * ((J33 * J11)-(J31 * J13));
-                  invJ(2,1,q,e) = r_idetJ * ((J13 * J21)-(J11 * J23));
-                  invJ(0,2,q,e) = r_idetJ * ((J21 * J32)-(J22 * J31));
-                  invJ(1,2,q,e) = r_idetJ * ((J31 * J12)-(J32 * J11));
-                  invJ(2,2,q,e) = r_idetJ * ((J11 * J22)-(J12 * J21));
-                  A(q,e) = W[q] * det;
-                  O(q,e) = 1.0;
-               }
-            }
-         }
-      });
-   }
-   qdata.rho0DetJ0w.HostRead();
-   volume = vol * one;
+   // Track maximum artificial viscosity per zone
+   // /!\ Race condition, only used with the custom amr estimator
+   d_el_max_visc[e] = fmax(visc_coeff, d_el_max_visc[e]);
+   d_el_max_vgrad[e] = fmax(fabs(det_v_grad), d_el_max_vgrad[e]);
 }
 
 template<int DIM, int Q1D> static inline
@@ -1210,6 +1306,8 @@ void QKernel(const int NE, const int NQ,
              const Vector &grad_v_ext,
              const DenseTensor &Jac0inv,
              Vector &dt_est,
+             Vector &el_max_visc,
+             Vector &el_max_vgrad,
              DenseTensor &stressJinvT)
 {
    constexpr int DIM2 = DIM*DIM;
@@ -1221,6 +1319,8 @@ void QKernel(const int NE, const int NQ,
    const auto d_grad_v_ext = grad_v_ext.Read();
    const auto d_Jac0inv = Read(Jac0inv.GetMemory(), Jac0inv.TotalSize());
    auto d_dt_est = dt_est.ReadWrite();
+   auto d_max_visc = el_max_visc.ReadWrite();
+   auto d_max_vgrad = el_max_vgrad.ReadWrite();
    auto d_stressJinvT = Write(stressJinvT.GetMemory(), stressJinvT.TotalSize());
    if (DIM == 2)
    {
@@ -1245,7 +1345,7 @@ void QKernel(const int NE, const int NQ,
                                 compr_dir, Jpi, ph_dir, stressJiT,
                                 d_gamma, d_weights, d_Jacobians, d_rho0DetJ0w,
                                 d_e_quads, d_grad_v_ext, d_Jac0inv,
-                                d_dt_est, d_stressJinvT);
+                                d_dt_est, d_max_visc, d_max_vgrad, d_stressJinvT);
             }
          }
          MFEM_SYNC_THREAD;
@@ -1276,7 +1376,7 @@ void QKernel(const int NE, const int NQ,
                                    compr_dir, Jpi, ph_dir, stressJiT,
                                    d_gamma, d_weights, d_Jacobians, d_rho0DetJ0w,
                                    d_e_quads, d_grad_v_ext, d_Jac0inv,
-                                   d_dt_est, d_stressJinvT);
+                                   d_dt_est, d_max_visc, d_max_vgrad, d_stressJinvT);
                }
             }
          }
@@ -1285,7 +1385,10 @@ void QKernel(const int NE, const int NQ,
    }
 }
 
-void QUpdate::UpdateQuadratureData(const Vector &S, QuadratureData &qdata)
+void QUpdate::UpdateQuadratureData(const Vector &S,
+                                   QuadratureData &qdata,
+                                   Vector &zone_max_visc,
+                                   Vector &zone_max_vgrad)
 {
    timer->sw_qdata.Start();
    Vector* S_p = const_cast<Vector*>(&S);
@@ -1304,6 +1407,8 @@ void QUpdate::UpdateQuadratureData(const Vector &S, QuadratureData &qdata)
    q2->SetOutputLayout(QVectorLayout::byVDIM);
    q2->Values(e, q_e);
    q_dt_est = qdata.dt_est;
+   Vector &el_max_visc = zone_max_visc;
+   Vector &el_max_vgrad = zone_max_vgrad;
    const int id = (dim << 4) | Q1D;
    typedef void (*fQKernel)(const int NE, const int NQ,
                             const bool use_viscosity,
@@ -1315,7 +1420,10 @@ void QUpdate::UpdateQuadratureData(const Vector &S, QuadratureData &qdata)
                             const Vector &Jacobians, const Vector &rho0DetJ0w,
                             const Vector &e_quads, const Vector &grad_v_ext,
                             const DenseTensor &Jac0inv,
-                            Vector &dt_est, DenseTensor &stressJinvT);
+                            Vector &dt_est,
+                            Vector &el_max_visc,
+                            Vector &el_max_vgrad,
+                            DenseTensor &stressJinvT);
    static std::unordered_map<int, fQKernel> qupdate =
    {
       {0x24,&QKernel<2,4>}, {0x26,&QKernel<2,6>}, {0x28,&QKernel<2,8>},
@@ -1329,7 +1437,10 @@ void QUpdate::UpdateQuadratureData(const Vector &S, QuadratureData &qdata)
    qupdate[id](NE, NQ, use_viscosity, use_vorticity, qdata.h0, h1order,
                cfl, infinity, gamma_gf, ir.GetWeights(), q_dx,
                qdata.rho0DetJ0w, q_e, q_dv,
-               qdata.Jac0inv, q_dt_est, qdata.stressJinvT);
+               qdata.Jac0inv,
+               q_dt_est,
+               el_max_visc, el_max_vgrad,
+               qdata.stressJinvT);
    qdata.dt_est = q_dt_est.Min();
    timer->sw_qdata.Stop();
    timer->quad_tstep += NE;
diff --git a/laghos_solver.hpp b/laghos_solver.hpp
index 2bbd1c1c..66cd7e23 100644
--- a/laghos_solver.hpp
+++ b/laghos_solver.hpp
@@ -18,13 +18,220 @@
 #define MFEM_LAGHOS_SOLVER
 
 #include "mfem.hpp"
-#include "laghos_assembly.hpp"
+#include "laghos_assembly.hpp" // QuadratureData
 
 #ifdef MFEM_USE_MPI
 
 namespace mfem
 {
 
+// Choice for the problem setup, statically used in functions below.
+static int problem, dim;
+
+static double gamma_func(const Vector &x)
+{
+   switch (problem)
+   {
+      case 0: return 5.0 / 3.0;
+      case 1: return 1.4;
+      case 2: return 1.4;
+      case 3:
+         if (dim == 1) { return (x(0) > 0.5) ? 1.4 : 1.5; }
+         else { return (x(0) > 1.0 && x(1) <= 1.5) ? 1.4 : 1.5; }
+      case 4: return 5.0 / 3.0;
+      case 5: return 1.4;
+      case 6: return 1.4;
+      case 7: return 5.0 / 3.0;
+      default: MFEM_ABORT("Bad number given for problem id!"); return 0.0;
+   }
+}
+
+static double rho0(const Vector &x)
+{
+   switch (problem)
+   {
+      case 0: return 1.0;
+      case 1: return 1.0;
+      case 2: return (x(0) < 0.5) ? 1.0 : 0.1;
+      case 3: return (dim == 2) ? (x(0) > 1.0 && x(1) > 1.5) ? 0.125 : 1.0
+                        : x(0) > 1.0 && ((x(1) < 1.5 && x(2) < 1.5) ||
+                                         (x(1) > 1.5 && x(2) > 1.5)) ? 0.125 : 1.0;
+
+      case 4: return 1.0;
+      case 5:
+      {
+         if (x(0) >= 0.5 && x(1) >= 0.5) { return 0.5313; }
+         if (x(0) <  0.5 && x(1) <  0.5) { return 0.8; }
+         return 1.0;
+      }
+      case 6:
+      {
+         if (x(0) <  0.5 && x(1) >= 0.5) { return 2.0; }
+         if (x(0) >= 0.5 && x(1) <  0.5) { return 3.0; }
+         return 1.0;
+      }
+      case 7: return x(1) >= 0.0 ? 2.0 : 1.0;
+      default: MFEM_ABORT("Bad number given for problem id!"); return 0.0;
+   }
+}
+
+static double radius(double x, double y) { return sqrt(x*x + y*y); }
+
+inline double e0(const Vector &x)
+{
+   switch (problem)
+   {
+      case 0:
+      {
+         const double denom = 2.0 / 3.0;  // (5/3 - 1) * density.
+         double val;
+         if (x.Size() == 2)
+         {
+            val = 1.0 + (cos(2*M_PI*x(0)) + cos(2*M_PI*x(1))) / 4.0;
+         }
+         else
+         {
+            val = 100.0 + ((cos(2*M_PI*x(2)) + 2) *
+                           (cos(2*M_PI*x(0)) + cos(2*M_PI*x(1))) - 2) / 16.0;
+         }
+         return val/denom;
+      }
+      case 1: return 0.0; // This case in initialized in main().
+      case 2: return (x(0) < 0.5) ? 1.0 / rho0(x) / (gamma_func(x) - 1.0)
+                        : 0.1 / rho0(x) / (gamma_func(x) - 1.0);
+      case 3: return (x(0) > 1.0) ? 0.1 / rho0(x) / (gamma_func(x) - 1.0)
+                        : 1.0 / rho0(x) / (gamma_func(x) - 1.0);
+      case 4:
+      {
+         const double r = radius(x(0), x(1)), rsq = x(0) * x(0) + x(1) * x(1);
+         const double gamma = 5.0 / 3.0;
+         if (r < 0.2)
+         {
+            return (5.0 + 25.0 / 2.0 * rsq) / (gamma - 1.0);
+         }
+         else if (r < 0.4)
+         {
+            const double t1 = 9.0 - 4.0 * log(0.2) + 25.0 / 2.0 * rsq;
+            const double t2 = 20.0 * r - 4.0 * log(r);
+            return (t1 - t2) / (gamma - 1.0);
+         }
+         else { return (3.0 + 4.0 * log(2.0)) / (gamma - 1.0); }
+      }
+      case 5:
+      {
+         const double irg = 1.0 / rho0(x) / (gamma_func(x) - 1.0);
+         if (x(0) >= 0.5 && x(1) >= 0.5) { return 0.4 * irg; }
+         if (x(0) <  0.5 && x(1) >= 0.5) { return 1.0 * irg; }
+         if (x(0) <  0.5 && x(1) <  0.5) { return 1.0 * irg; }
+         if (x(0) >= 0.5 && x(1) <  0.5) { return 1.0 * irg; }
+         MFEM_ABORT("Error in problem 5!");
+         return 0.0;
+      }
+      case 6:
+      {
+         const double irg = 1.0 / rho0(x) / (gamma_func(x) - 1.0);
+         if (x(0) >= 0.5 && x(1) >= 0.5) { return 1.0 * irg; }
+         if (x(0) <  0.5 && x(1) >= 0.5) { return 1.0 * irg; }
+         if (x(0) <  0.5 && x(1) <  0.5) { return 1.0 * irg; }
+         if (x(0) >= 0.5 && x(1) <  0.5) { return 1.0 * irg; }
+         MFEM_ABORT("Error in problem 6!");
+         return 0.0;
+      }
+      case 7:
+      {
+         const double rho = rho0(x), gamma = gamma_func(x);
+         return (6.0 - rho * x(1)) / (gamma - 1.0) / rho;
+      }
+      default: MFEM_ABORT("Bad number given for problem id!"); return 0.0;
+   }
+}
+
+inline void v0(const Vector &x, Vector &v)
+{
+   const double atn =
+      dim!=1 ? pow((x(0)*(1.0-x(0))*4*x(1)*(1.0-x(1))*4.0),0.4) : 0.0;
+   switch (problem)
+   {
+      case 0:
+         v(0) =  sin(M_PI*x(0)) * cos(M_PI*x(1));
+         v(1) = -cos(M_PI*x(0)) * sin(M_PI*x(1));
+         if (x.Size() == 3)
+         {
+            v(0) *= cos(M_PI*x(2));
+            v(1) *= cos(M_PI*x(2));
+            v(2) = 0.0;
+         }
+         break;
+      case 1: v = 0.0; break;
+      case 2: v = 0.0; break;
+      case 3: v = 0.0; break;
+      case 4:
+      {
+         v = 0.0;
+         const double r = radius(x(0), x(1));
+         if (r < 0.2)
+         {
+            v(0) =  5.0 * x(1);
+            v(1) = -5.0 * x(0);
+         }
+         else if (r < 0.4)
+         {
+            v(0) =  2.0 * x(1) / r - 5.0 * x(1);
+            v(1) = -2.0 * x(0) / r + 5.0 * x(0);
+         }
+         else { }
+         break;
+      }
+      case 5:
+      {
+         v = 0.0;
+         if (x(0) >= 0.5 && x(1) >= 0.5) { v(0)=0.0*atn, v(1)=0.0*atn; return;}
+         if (x(0) <  0.5 && x(1) >= 0.5) { v(0)=0.7276*atn, v(1)=0.0*atn; return;}
+         if (x(0) <  0.5 && x(1) <  0.5) { v(0)=0.0*atn, v(1)=0.0*atn; return;}
+         if (x(0) >= 0.5 && x(1) <  0.5) { v(0)=0.0*atn, v(1)=0.7276*atn; return; }
+         MFEM_ABORT("Error in problem 5!");
+         return;
+      }
+      case 6:
+      {
+         v = 0.0;
+         if (x(0) >= 0.5 && x(1) >= 0.5) { v(0)=+0.75*atn, v(1)=-0.5*atn; return;}
+         if (x(0) <  0.5 && x(1) >= 0.5) { v(0)=+0.75*atn, v(1)=+0.5*atn; return;}
+         if (x(0) <  0.5 && x(1) <  0.5) { v(0)=-0.75*atn, v(1)=+0.5*atn; return;}
+         if (x(0) >= 0.5 && x(1) <  0.5) { v(0)=-0.75*atn, v(1)=-0.5*atn; return;}
+         MFEM_ABORT("Error in problem 6!");
+         return;
+      }
+      case 7:
+      {
+         v = 0.0;
+         v(1) = 0.02 * exp(-2*M_PI*x(1)*x(1)) * cos(2*M_PI*x(0));
+         break;
+      }
+      default: MFEM_ABORT("Bad number given for problem id!");
+   }
+}
+
+inline void GetZeroBCDofs(ParMesh *pmesh, ParFiniteElementSpace &H1,
+                          const int bdr_attr_max,
+                          Array<int> &ess_tdofs, Array<int> &ess_vdofs)
+{
+   ess_tdofs.SetSize(0);
+   ess_vdofs.SetSize(0);
+   Array<int> ess_bdr(bdr_attr_max), dofs_marker, dofs_list;
+   for (int d = 0; d < pmesh->Dimension(); d++)
+   {
+      // Attributes 1/2/3 correspond to fixed-x/y/z boundaries,
+      // i.e., we must enforce v_x/y/z = 0 for the velocity components.
+      ess_bdr = 0; ess_bdr[d] = 1;
+      H1.GetEssentialTrueDofs(ess_bdr, dofs_list, d);
+      ess_tdofs.Append(dofs_list);
+      H1.GetEssentialVDofs(ess_bdr, dofs_marker, d);
+      FiniteElementSpace::MarkerToList(dofs_marker, dofs_list);
+      ess_vdofs.Append(dofs_list);
+   }
+}
+
 namespace hydrodynamics
 {
 
@@ -33,6 +240,7 @@ namespace hydrodynamics
 /// its geometry.
 void VisualizeField(socketstream &sock, const char *vishost, int visport,
                     ParGridFunction &gf, const char *title,
+                    const char *keys = nullptr,
                     int x = 0, int y = 0, int w = 400, int h = 400,
                     bool vec = false);
 
@@ -55,42 +263,7 @@ struct TimingData
       L2dof(l2d), H1iter(0), L2iter(0), quad_tstep(0) { }
 };
 
-class QUpdate
-{
-private:
-   const int dim, vdim, NQ, NE, Q1D;
-   const bool use_viscosity, use_vorticity;
-   const double cfl;
-   TimingData *timer;
-   const IntegrationRule &ir;
-   ParFiniteElementSpace &H1, &L2;
-   const Operator *H1R;
-   Vector q_dt_est, q_e, e_vec, q_dx, q_dv;
-   const QuadratureInterpolator *q1,*q2;
-   const ParGridFunction &gamma_gf;
-public:
-   QUpdate(const int d, const int ne, const int q1d,
-           const bool visc, const bool vort,
-           const double cfl, TimingData *t,
-           const ParGridFunction &gamma_gf,
-           const IntegrationRule &ir,
-           ParFiniteElementSpace &h1, ParFiniteElementSpace &l2):
-      dim(d), vdim(h1.GetVDim()),
-      NQ(ir.GetNPoints()), NE(ne), Q1D(q1d),
-      use_viscosity(visc), use_vorticity(vort), cfl(cfl),
-      timer(t), ir(ir), H1(h1), L2(l2),
-      H1R(H1.GetElementRestriction(ElementDofOrdering::LEXICOGRAPHIC)),
-      q_dt_est(NE*NQ),
-      q_e(NE*NQ),
-      e_vec(NQ*NE*vdim),
-      q_dx(NQ*NE*vdim*vdim),
-      q_dv(NQ*NE*vdim*vdim),
-      q1(H1.GetQuadratureInterpolator(ir)),
-      q2(L2.GetQuadratureInterpolator(ir)),
-      gamma_gf(gamma_gf) { }
-
-   void UpdateQuadratureData(const Vector &S, QuadratureData &qdata);
-};
+class QUpdate;
 
 // Given a solutions state (x, v, e), this class performs all necessary
 // computations to evaluate the new slopes (dx_dt, dv_dt, de_dt).
@@ -101,23 +274,32 @@ class LagrangianHydroOperator : public TimeDependentOperator
    mutable ParFiniteElementSpace H1c;
    ParMesh *pmesh;
    // FE spaces local and global sizes
-   const int H1Vsize;
-   const int H1TVSize;
-   const HYPRE_Int H1GTVSize;
-   const int L2Vsize;
-   const int L2TVSize;
-   const HYPRE_Int L2GTVSize;
+   int H1Vsize;
+   int H1TVSize;
+   HYPRE_Int H1GTVSize;
+   int L2Vsize;
+   int L2TVSize;
+   HYPRE_Int L2GTVSize;
    Array<int> block_offsets;
    // Reference to the current mesh configuration.
    mutable ParGridFunction x_gf;
    const Array<int> &ess_tdofs;
-   const int dim, NE, l2dofs_cnt, h1dofs_cnt, source_type;
+   int NE;
+   const int dim;
+   int l2dofs_cnt, h1dofs_cnt;
+   const int source_type;
    const double cfl;
-   const bool use_viscosity, use_vorticity, p_assembly;
+   const bool use_viscosity, use_vorticity, p_assembly, amr;
    const double cg_rel_tol;
    const int cg_max_iter;
    const double ftz_tol;
-   const ParGridFunction &gamma_gf;
+   ParGridFunction &gamma_gf;
+
+   ParGridFunction rho0_gf;
+   ParGridFunction x0_gf; // copy of initial mesh position
+   Coefficient &rho0_coeff;
+   GridFunctionCoefficient rho0_gf_coeff; // TODO: remove when Mv update improved
+
    // Velocity mass matrix and local inverses of the energy mass matrices. These
    // are constant in time, due to the pointwise mass conservation property.
    mutable ParBilinearForm Mv;
@@ -147,6 +329,7 @@ class LagrangianHydroOperator : public TimeDependentOperator
    mutable Vector X, B, one, rhs, e_rhs;
    mutable ParGridFunction rhs_c_gf, dvc_gf;
    mutable Array<int> c_tdofs[3];
+   mutable Vector zone_max_visc, zone_vgrad;
 
    virtual void ComputeMaterialProperties(int nvalues, const double gamma[],
                                           const double rho[], const double e[],
@@ -173,6 +356,7 @@ class LagrangianHydroOperator : public TimeDependentOperator
                            const int source,
                            const double cfl,
                            const bool visc, const bool vort, const bool pa,
+                           const bool amr,
                            const double cgt, const int cgiter, double ftz_tol,
                            const int order_q);
    ~LagrangianHydroOperator();
@@ -202,6 +386,51 @@ class LagrangianHydroOperator : public TimeDependentOperator
    const Array<int> &GetBlockOffsets() const { return block_offsets; }
 
    void PrintTimingData(bool IamRoot, int steps, const bool fom) const;
+
+   void SetH0(double h0) { qdata.h0 = h0; }
+   double GetH0() const { return qdata.h0; }
+
+   Vector& GetZoneMaxVisc() { return zone_max_visc; }
+   Vector& GetZoneVGrad() { return zone_vgrad; }
+
+   void AMRUpdate(const Vector&, const bool quick);
+};
+
+class QUpdate
+{
+private:
+   const int dim, vdim, NQ, NE, Q1D;
+   const bool use_viscosity, use_vorticity;
+   const double cfl;
+   TimingData *timer;
+   const IntegrationRule &ir;
+   ParFiniteElementSpace &H1, &L2;
+   const Operator *H1R;
+   Vector q_dt_est, q_e, e_vec, q_dx, q_dv;
+   const QuadratureInterpolator *q1,*q2;
+   const ParGridFunction &gamma_gf;
+public:
+   QUpdate(const int d, const int ne, const int q1d,
+           const bool visc, const bool vort,
+           const double cfl, TimingData *t,
+           const ParGridFunction &gamma_gf,
+           const IntegrationRule &ir,
+           ParFiniteElementSpace &h1, ParFiniteElementSpace &l2):
+      dim(d), vdim(h1.GetVDim()),
+      NQ(ir.GetNPoints()), NE(ne), Q1D(q1d),
+      use_viscosity(visc), use_vorticity(vort), cfl(cfl),
+      timer(t), ir(ir), H1(h1), L2(l2),
+      H1R(H1.GetElementRestriction(ElementDofOrdering::LEXICOGRAPHIC)),
+      q_dt_est(NE*NQ),
+      q_e(NE*NQ),
+      e_vec(NQ*NE*vdim),
+      q_dx(NQ*NE*vdim*vdim),
+      q_dv(NQ*NE*vdim*vdim),
+      q1(H1.GetQuadratureInterpolator(ir)),
+      q2(L2.GetQuadratureInterpolator(ir)),
+      gamma_gf(gamma_gf) { }
+
+   void UpdateQuadratureData(const Vector&, QuadratureData&, Vector&, Vector&);
 };
 
 // TaylorCoefficient used in the 2D Taylor-Green problem.
diff --git a/makefile b/makefile
index a59f8443..07a9fae1 100644
--- a/makefile
+++ b/makefile
@@ -111,7 +111,7 @@ OBJECT_FILES = $(SOURCE_FILES:.cpp=.o)
 laghos: $(OBJECT_FILES) $(CONFIG_MK) $(MFEM_LIB_FILE)
 	$(MFEM_CXX) $(MFEM_LINK_FLAGS) -o laghos $(OBJECT_FILES) $(LIBS)
 
-all:;@$(MAKE) -j $(NPROC) laghos
+all: laghos
 
 $(OBJECT_FILES): $(HEADER_FILES) $(CONFIG_MK)