[Trilinos-Users] Muelu issue with exported Tpetra CrsMatrix
C.S. Natarajan
csnataraj at gmail.com
Thu Aug 14 12:00:11 MDT 2014
Hi,
I am running into a strange issue when building the MG hierarchy with
Muelu. This only happens for large problems. I am pasting a minimal example
that can reproduce the error. The example assembles a Laplace operator on a
structured grid with Q1 elements with Dirichlet boundary condition in the Z
direction and Neumann BC's in the X and Y direction. In the example there
are two global node maps. gmap and gmap2. gmap is used to build matrix A
which has an unequal distribution of nodes in each processor. gmap2 is used
to export matrix A from gmap to matrix B which contains approximately the
same number of nodes in each processor. For a problem of size 512^3, while
A builds fine, building the hierarchy with matrix B fails with a memory
corruption error that ends with a MPI_Reduce_scatter error. I am using the
public git version as of 08/11. The build is with intel C++ 14.0.0 and
support for long long int is enabled (do i also need to enable ilp64 from
mkl?)
I would really appreciate any input on what I might be doing wrong or
whether there is a bug in the Muelu library.
TIA,
C.S.N
#include <sstream>
//Epetra Includes
#include "Epetra_BlockMap.h"
#include "Epetra_Comm.h"
#include "Epetra_MpiComm.h"
#include "Epetra_VbrMatrix.h"
#include "Epetra_SerialDenseMatrix.h"
#include "Epetra_FEVector.h"
#include "Epetra_FEVbrMatrix.h"
// Intrepid includes
#include "Intrepid_ArrayTools.hpp"
#include "Intrepid_FunctionSpaceTools.hpp"
#include "Intrepid_FieldContainer.hpp"
#include "Intrepid_CellTools.hpp"
#include "Intrepid_HGRAD_HEX_C1_FEM.hpp"
#include "Intrepid_RealSpaceTools.hpp"
#include "Intrepid_DefaultCubatureFactory.hpp"
#include "Intrepid_Utils.hpp"
#include <Intrepid_HGRAD_QUAD_C1_FEM.hpp>
//Sacado Includes
#include "Sacado.hpp"
//Shards Includes
#include "Shards_CellTopology.hpp"
//Teuchos Includes
#include "Teuchos_Array.hpp"
#include "Teuchos_ArrayView.hpp"
#include "Teuchos_ArrayViewDecl.hpp"
#include "Teuchos_BLAS.hpp"
#include "Teuchos_CommandLineProcessor.hpp"
#include "Teuchos_DefaultMpiComm.hpp"
#include "Teuchos_FancyOStream.hpp"
#include "Teuchos_GlobalMPISession.hpp"
#include "Teuchos_oblackholestream.hpp"
#include "Teuchos_OrdinalTraits.hpp"
#include "Teuchos_RCP.hpp"
#include "Teuchos_ScalarTraits.hpp"
#include "Teuchos_SerialDenseMatrix.hpp"
#include "Teuchos_ArrayRCPDecl.hpp"
//Lone survivor from Tpetra
#include "Tpetra_DefaultPlatform.hpp"
#include "Tpetra_Version.hpp"
#include "Tpetra_BlockMap_decl.hpp"
#include "Tpetra_BlockMap.hpp"
#include "Tpetra_Vector.hpp"
#include "Tpetra_VbrMatrix.hpp"
#include "Tpetra_DistObject_decl.hpp"
#include "Tpetra_BlockMultiVector.hpp"
#include "Tpetra_BlockMultiVector_decl.hpp"
#include "Tpetra_MultiVector.hpp"
//Muelu includes
#include "MueLu.hpp"
#include "MueLu_FactoryManager.hpp"
#include "MueLu_DirectSolver.hpp"
#include "MueLu_Hierarchy.hpp"
#include "MueLu_SaPFactory.hpp"
#include "MueLu_GenericRFactory.hpp"
#include "MueLu_RAPFactory.hpp"
#include "MueLu_TentativePFactory.hpp"
#include "MueLu_Ifpack2Smoother.hpp"
#include "MueLu_SmootherFactory.hpp"
#include "MueLu_RigidBodyModeFactory.hpp"
#include <MueLu_UseDefaultTypes.hpp>
#include <MueLu_MLParameterListInterpreter.hpp>
#include "MueLu_Amesos2Smoother.hpp"
#include <BelosConfigDefs.hpp>
#include <BelosLinearProblem.hpp>
#include <BelosBlockCGSolMgr.hpp>
#include <BelosXpetraAdapter.hpp>
#include <BelosMueLuAdapter.hpp>
#include <MueLu_UseShortNames.hpp>
using namespace Intrepid;
typedef double ST;
typedef Teuchos::ScalarTraits<ST> SCT;
typedef SCT::magnitudeType MT;
typedef long long int LLT;
typedef Kokkos::DefaultNode::DefaultNodeType NO;
typedef Tpetra::CrsMatrix<ST, LO, LLT, NO, LMO> matrix_type;
typedef Tpetra::Map<LO, LLT, NO> map_type;
using Teuchos::RCP;
using Teuchos::rcp;
int main(int argc, char *argv[])
{
using Teuchos::Array;
Teuchos::oblackholestream nullStream;
Teuchos::GlobalMPISession mpiSession(&argc,&argv, NULL);
Epetra_MpiComm ecomm(MPI_COMM_WORLD);
Teuchos::RCP<const Teuchos::Comm<int> > comm =
Tpetra::DefaultPlatform::getDefaultPlatform().getComm();
size_t myRank = comm->getRank();
size_t nProc = comm->getSize();
std::ostream &out = (myRank == 0)?std::cout:nullStream;
Teuchos::RCP<Teuchos::FancyOStream> fout =
Teuchos::getFancyOStream(Teuchos::rcpFromRef (out));
Teuchos::CommandLineProcessor clp;
int GNEX = 64;
clp.setOption("NX", &GNEX, "Number of elements in X-direction.");
int GNEY = 64;
clp.setOption("NY", &GNEY, "Number of elements in Y-direction.");
int GNEZ = 64;
clp.setOption("NZ", &GNEZ, "Number of elements in Z-direction.");
long long grow = 128;
clp.setOption("grow", &grow, "Global row number");
std::string matrix("original");
clp.setOption("matrix", &matrix, "original = A, exported = b");
clp.recogniseAllOptions(true);
clp.throwExceptions(false);
Teuchos::CommandLineProcessor::EParseCommandLineReturn parseReturn
= clp.parse(argc, argv);
if(parseReturn ==
Teuchos::CommandLineProcessor::PARSE_HELP_PRINTED) return 0;
if(parseReturn != Teuchos::CommandLineProcessor::PARSE_SUCCESSFUL)
return 1;
const int numGElements =
GNEX*GNEY*GNEZ;
const int GNNX = GNEX+1, GNNY = GNEY+1, GNNZ=
GNEZ+1;
const int numGNodes = GNNX*GNNY*GNNZ;
const int numGDof =
GNNX*GNNY*GNNZ;
out << "\t\t Number of global elements is : " << numGElements <<
"\n";
out << "\t\t Number of global nodes is : " << numGNodes << "\n";
Epetra_Map elemZMap(GNEZ, 0, ecomm);
long count = GNNZ/nProc;
long np; np = GNNZ - (nProc*count);
count += (myRank < np)?1:0;
long long int nnu =
count*GNNX*GNNY;
LO indexBase = 0;
Teuchos::RCP<const Tpetra::Map<LO, LLT> > gmap = Teuchos::rcp(new
Tpetra::Map<LO,LLT>(-1, nnu, indexBase, comm));
int numLNodes = gmap->getNodeNumElements();
int startOverlapZ = (myRank ==
0)?0:(gmap->getGlobalElement(0))-GNNX*GNNY;
int endOverlapZ = (myRank ==
nProc-1)?(gmap->getGlobalElement(0)+(count-1)*GNNX*GNNY):(gmap->getGlobalElement(0)+count*GNNX*GNNY);
startOverlapZ /= (GNNX*GNNY); endOverlapZ /=
(GNNX*GNNY);
int startX = 0; int endX = GNNX;
int startUY = (myRank == 0)?
startOverlapZ:(startOverlapZ+1);
// Unique map start index
int endUY = (myRank ==
nProc-1)?endOverlapZ:(endOverlapZ-1); //
Unique map end index
int numLElemZ = elemZMap.NumMyElements();
int numLElemX = GNEX, numLElemY = GNEY;
int numLElems = numLElemX*numLElemY*numLElemZ;
int startElemZ = elemZMap.GID(0);
int endElemZ = elemZMap.GID(numLElemZ-1);
int startNodeZ = startElemZ;
int endNodeZ = endElemZ+1;
typedef shards::CellTopology CellTopology;
CellTopology h8(shards::getCellTopologyData<shards::Hexahedron<8>
>() );
// Get dimensions
int numNodesPerElem = h8.getNodeCount();
int spaceDim = h8.getDimension();
FieldContainer<int> elemNode(numLElems, numNodesPerElem);
int lElem = 0;
for(int k = startElemZ; k <= endElemZ; k++)
for (int j = 0; j < GNEY; j++)
for (int i = 0; i < GNEX; i++)
{
elemNode(lElem,0) = (GNNX*GNNY)*k +
(GNNX)*j + i;
elemNode(lElem,1) = (GNNX*GNNY)*k +
(GNNX)*j + i + 1;
elemNode(lElem,2) = (GNNX*GNNY)*k +
(GNNX)*(j + 1) + i + 1;
elemNode(lElem,3) = (GNNX*GNNY)*k +
(GNNX)*(j + 1) + i;
elemNode(lElem,4) = (GNNX*GNNY)*(k+1) +
(GNNX)*j + i;
elemNode(lElem,5) = (GNNX*GNNY)*(k+1) +
(GNNX)*j + i + 1;
elemNode(lElem,6) = (GNNX*GNNY)*(k+1) +
(GNNX)*(j + 1) + i + 1;
elemNode(lElem,7) = (GNNX*GNNY)*(k+1) +
(GNNX)*(j + 1) + i;
lElem++;
}
double leftX = 0.0, rightX = 1.0;
double leftY = 0.0, rightY = 1.0;
double leftZ = 0.0, rightZ = 1.0;
hx = hy = hz = 1;
numLNodes = (numLElemZ+1)*(numLElemY+1)*(numLElemX+1);
FieldContainer<double> nodeCoord(numLNodes, spaceDim);
FieldContainer<int> nodeOnBoundary(numLNodes);
int inode = 0;
for (int k = startNodeZ; k <= endNodeZ; k++)
for (int j = 0; j < GNNY; j++)
for (int i = 0; i < GNNX; i++)
{
nodeCoord(inode,0) = leftX + (double)i*hx;
nodeCoord(inode,1) = leftY + (double)j*hy;
nodeCoord(inode,2) = leftZ +
(double)k*hz;
inode++;
}
DefaultCubatureFactory<double> cubFactory;
int cubDegree = 3;
Teuchos::RCP<Cubature<double> > hexCub = cubFactory.create(h8,
cubDegree);
int cubDim = hexCub->getDimension();
int numCubPoints = hexCub->getNumPoints();
FieldContainer<double> cubPoints(numCubPoints, cubDim);
FieldContainer<double> cubWeights(numCubPoints);
hexCub->getCubature(cubPoints, cubWeights);
Basis_HGRAD_HEX_C1_FEM<double, FieldContainer<double> >
hexHGradBasis;
int numFieldsG = hexHGradBasis.getCardinality();
FieldContainer<double> hexGVals(numFieldsG, numCubPoints);
FieldContainer<double> hexGrads(numFieldsG, numCubPoints, spaceDim);
hexHGradBasis.getValues(hexGVals, cubPoints, OPERATOR_VALUE);
hexHGradBasis.getValues(hexGrads, cubPoints, OPERATOR_GRAD);
typedef CellTools<double> CellTools;
typedef FunctionSpaceTools fst;
int numCells = 1;
FieldContainer<double> hexNodes(numCells, numNodesPerElem,
spaceDim);
FieldContainer<double> hexJacobian(numCells, numCubPoints,
spaceDim, spaceDim);
FieldContainer<double> hexJacobInv(numCells, numCubPoints,
spaceDim, spaceDim);
FieldContainer<double> hexJacobDet(numCells, numCubPoints);
FieldContainer<double> localStiffMatrix(numCells, numFieldsG,
numFieldsG);
FieldContainer<double> weightedMeasure(numCells, numCubPoints);
FieldContainer<double> hexGradsTransformed(numCells, numFieldsG,
numCubPoints, spaceDim);
FieldContainer<double> hexGradsTransformedWeighted(numCells,
numFieldsG, numCubPoints, spaceDim);
FieldContainer<double> rhsData(numCells, numCubPoints);
FieldContainer<double> localRHS(numCells, numFieldsG);
FieldContainer<double> hexGValsTransformed(numCells, numFieldsG,
numCubPoints);
FieldContainer<double> hexGValsTransformedWeighted(numCells,
numFieldsG, numCubPoints);
FieldContainer<double> physCubPoints(numCells, numCubPoints,
cubDim);
Teuchos::Array<ST> values(8);
Teuchos::Array<long long int> cols(8);
Teuchos::RCP<matrix_type> A = rcp(new matrix_type(gmap, 27));
long long int rowIndex = 0, colIndex = 0, ierr, ix;
for(int i = 0; i < lElem; i++)
{
for(int j = 0; j < numNodesPerElem; j++)
{
ix = (elemNode(i,j)%GNNX);
hexNodes(0,j,0) = ix*hx;
hexNodes(0,j,1) = ((elemNode(i,j)%(GNNX*GNNY) -
ix)/GNNX)*hy;
hexNodes(0,j,2) = (elemNode(i,j)/(GNNX*GNNY))*hz;
}
CellTools::setJacobian(hexJacobian, cubPoints, hexNodes,
h8);
CellTools::setJacobianInv(hexJacobInv, hexJacobian );
CellTools::setJacobianDet(hexJacobDet, hexJacobian );
fst::HGRADtransformGRAD<double>(hexGradsTransformed,
hexJacobInv, hexGrads);
fst::computeCellMeasure<double>(weightedMeasure,
hexJacobDet, cubWeights);
fst::multiplyMeasure<double>(hexGradsTransformedWeighted,
weightedMeasure, hexGradsTransformed);
fst::integrate<double>(localStiffMatrix,
hexGradsTransformed, hexGradsTransformedWeighted, COMP_BLAS);
for (int row = 0; row < numFieldsG; row++)
{
rowIndex = elemNode(i,row);
for (int col = 0; col < numFieldsG; col++)
{
cols[col] = elemNode(i,col);
values[col] = localStiffMatrix(0, row, col);
}
if ((hexNodes(0,row,2) == 0) || (hexNodes(0,row,2)
== GNEZ))
std::fill(values.begin(), values.end(), 0);
A->insertGlobalValues(rowIndex, cols, values);
}
}
LLT drow = 0, maxind = 0, down = GNNX*GNNY, top =
GNNX*GNNY*(GNNZ-1);
LO num_local_nodes;
maxind = gmap->getMaxGlobalIndex();
num_local_nodes = gmap->getNodeNumElements();
if ((drow < down) || (maxind >= top))
{
for(int i = 0; i < num_local_nodes; i++)
{
drow = gmap->getGlobalElement(i);
if ((drow < down) || (drow >= top))
A->replaceGlobalValues(drow,
Teuchos::tuple<long long int>(drow), Teuchos::tuple<ST>(1.0));
}
}
A->globalAssemble();
A->fillComplete();
RCP<Tpetra::Vector<SC, LO, LLT, NO> > x1 = rcp(new
Tpetra::Vector<SC, LO, LLT, NO> (A->getDomainMap ()));
RCP<Tpetra::Vector<SC, LO, LLT, NO> > b1 = rcp(new
Tpetra::Vector<SC, LO, LLT, NO> (A->getRangeMap ()));
x1->putScalar(1.0);
A->apply(*x1, *b1);
out << "Norm of b1 is " << b1->norm2() << std::endl;
Teuchos::RCP< map_type> gmap2 = rcp (new map_type (numGNodes,
indexBase, comm, Tpetra::GloballyDistributed));
Teuchos::RCP<matrix_type> B = Teuchos::rcp(new matrix_type (gmap2,
0));
typedef Tpetra::Export<LO, LLT, NO> export_type;
export_type exporter (gmap, gmap2);
B->doExport (*A, exporter, Tpetra::INSERT);
B->fillComplete();
RCP<Tpetra::Vector<SC, LO, LLT, NO> > x2 = rcp(new
Tpetra::Vector<SC, LO, LLT, NO> (B->getDomainMap ()));
RCP<Tpetra::Vector<SC, LO, LLT, NO> > b2 = rcp(new
Tpetra::Vector<SC, LO, LLT, NO> (B->getRangeMap ()));
x2->putScalar(1.0);
B->apply(*x2, *b2);
out << "Norm of b2 is " << b2->norm2() << std::endl;
Teuchos::Array<ST> crsvals;
size_t NumEntries = 0;
Teuchos::Array<LLT> indices;
NumEntries = B->getNumEntriesInGlobalRow(grow);
if (NumEntries != Teuchos::OrdinalTraits<size_t>::invalid())
{
std::cout << "outputting crs values " << std::endl;
indices.resize(NumEntries); crsvals.resize(NumEntries);
A->getGlobalRowCopy (grow, indices(), crsvals(),
NumEntries);
for(int i = 0; i < indices.size(); i++)
std::cout << indices[i] << " " << crsvals[i] <<
"\n";
}
Xpetra::UnderlyingLib lib = Xpetra::UseTpetra;
Teuchos::RCP<Xpetra::CrsMatrix<ST, LO, LLT, NO, LMO> > mueluM_;
if (matrix == "original")
mueluM_ = rcp(new Xpetra::TpetraCrsMatrix<ST, LO, LLT, NO,
LMO>(A));
else
mueluM_ = rcp(new Xpetra::TpetraCrsMatrix<ST, LO, LLT, NO,
LMO>(B));
Teuchos::RCP<Xpetra::Matrix <ST, LO, LLT, NO, LMO> > mueluA =
rcp(new Xpetra::CrsMatrixWrap<ST, LO, LLT, NO, LMO>(mueluM_));
Teuchos::RCP<MueLu::Hierarchy<ST, LO, LLT, NO, LMO> > H = rcp(new
MueLu::Hierarchy<ST, LO, LLT, NO, LMO> (mueluA));
out << "Begin setup of Heirarchy ..." << std::endl;
H->Setup();
out << "Finished setup of Hierarchy ..." << std::endl;
}
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://software.sandia.gov/pipermail/trilinos-users/attachments/20140814/c3f04d83/attachment.html>
More information about the Trilinos-Users
mailing list