[Trilinos-Users] [EXTERNAL] Muelu issue with exported Tpetra CrsMatrix

C.S. Natarajan csnataraj at gmail.com
Thu Aug 14 15:20:40 MDT 2014


Hi Andrey, thanks for taking a look. Yeah, 512^3 was the first wall I hit
(256 ^3 seemed to work fine) and I didn't run it through valgrind. The
output just before the corruption and the trace are below (compiled with
cxx flags -O0 -g)

Cheers,
C.S.N


 Level 3
  Prolongator smoothing (MueLu::SaPFactory)
   Build (MueLu::TentativePFactory)
    Build (MueLu::CoupledAggregationFactory)
     Build (MueLu::CoalesceDropFactory)
      lightweight wrap = 0
      CoalesceDropFactory::Build(): found blockdim=1 from strided maps.
offset=0
      Build (MueLu::AmalgamationFactory)
       AmalagamationFactory::Build(): found fullblocksize=1 and
stridedblocksize=1 from strided maps. offset=0
      CoalesceDropFactory::SetupAmalgamationData() # of amalgamated
blocks=421729
      CoalesceDropFactory: nodeMap 3364/421729 elements
      Detected 0 Dirichlet nodes
     Coarsen Uncoupled (MueLu::LocalAggregationAlgorithm)
      Ordering:                  Natural
      Min nodes per aggregate:   1
      Max nbrs already selected: 0
      Nodes aggregated = 390513 (421729)
      Total aggregates = 35554
     AggregateLeftovers (MueLu::LeftoverAggregationAlgorithm)
      Phase 1 - nodes aggregated = 420475
      Phase 1 - total aggregates = 35554
      Phase 3 - additional aggregates = 0
======= Backtrace: =========
/lib64/libc.so.6(+0x75358)[0x2b5438a4a358]
/lib64/libc.so.6(+0x7899d)[0x2b5438a4d99d]
/lib64/libc.so.6(__libc_malloc+0x77)[0x2b5438a4f3e7]
/usr/lib64/libstdc++.so.6(_Znwm+0x1d)[0x2b543857259d]
/gpfs07/vol0/BCF/RD/users/natac0/intrepid/bin/fem_3d(_ZN6Xpetra8toXpetraIixN13KokkosClassic10SerialNodeEEEKN7Teuchos3RCPIKNS_3MapIT_T0_T1_EEEERKNS4_IKN6Tpetra3MapIS6_S7_S8_EEEE+0x42)[0xd5097a]
/gpfs07/vol0/BCF/RD/users/natac0/intrepid/bin/fem_3d(_ZNK6Xpetra17TpetraMultiVectorIdixN13KokkosClassic10SerialNodeEE6getMapEv+0x65)[0xb748c9]
/gpfs07/vol0/BCF/RD/users/natac0/intrepid/bin/fem_3d(_ZNK5MueLu28CoupledAggregationCommHelperIixN13KokkosClassic10SerialNodeENS1_12AltSparseOpsIviS2_NS1_7details28AltSparseOpsDefaultAllocatorIiS2_EEEEE23ArbitrateAndCommunicateERN6Xpetra6VectorIdixS2_EERNSA_IiixS2_EEPSD_b+0xd7)[0xb46863]
/gpfs07/vol0/BCF/RD/users/natac0/intrepid/bin/fem_3d(_ZNK5MueLu28CoupledAggregationCommHelperIixN13KokkosClassic10SerialNodeENS1_12AltSparseOpsIviS2_NS1_7details28AltSparseOpsDefaultAllocatorIiS2_EEEEE23ArbitrateAndCommunicateERN6Xpetra6VectorIdixS2_EERNS_10AggregatesIixS2_S7_EEb+0x89)[0xb46785]
/gpfs07/vol0/BCF/RD/users/natac0/intrepid/bin/fem_3d(_ZNK5MueLu28LeftoverAggregationAlgorithmIixN13KokkosClassic10SerialNodeENS1_12AltSparseOpsIviS2_NS1_7details28AltSparseOpsDefaultAllocatorIiS2_EEEEE18AggregateLeftoversERKNS_9GraphBaseIixS2_S7_EERNS_10AggregatesIixS2_S7_EE+0x5e1c)[0xb42d80]
/gpfs07/vol0/BCF/RD/users/natac0/intrepid/bin/fem_3d(_ZNK5MueLu25CoupledAggregationFactoryIixN13KokkosClassic10SerialNodeENS1_12AltSparseOpsIviS2_NS1_7details28AltSparseOpsDefaultAllocatorIiS2_EEEEE5BuildERNS_5LevelE+0x60c)[0xb459ac]
/gpfs07/vol0/BCF/RD/users/natac0/intrepid/bin/fem_3d(_ZNK5MueLu22SingleLevelFactoryBase9CallBuildERNS_5LevelE+0x39)[0xad08e9]
/gpfs07/vol0/BCF/RD/users/natac0/intrepid/bin/fem_3d(_ZN5MueLu5Level3GetIN7Teuchos3RCPINS_10AggregatesIixN13KokkosClassic10SerialNodeENS5_12AltSparseOpsIviS6_NS5_7details28AltSparseOpsDefaultAllocatorIiS6_EEEEEEEEEERT_RKSsPKNS_11FactoryBaseE+0xbb6)[0xac201e]
/gpfs07/vol0/BCF/RD/users/natac0/intrepid/bin/fem_3d(_ZNK5MueLu7Factory3GetIN7Teuchos3RCPINS_10AggregatesIixN13KokkosClassic10SerialNodeENS5_12AltSparseOpsIviS6_NS5_7details28AltSparseOpsDefaultAllocatorIiS6_EEEEEEEEEET_RNS_5LevelERKSs+0x79)[0xacede5]
/gpfs07/vol0/BCF/RD/users/natac0/intrepid/bin/fem_3d(_ZNK5MueLu17TentativePFactoryIdixN13KokkosClassic10SerialNodeENS1_12AltSparseOpsIviS2_NS1_7details28AltSparseOpsDefaultAllocatorIiS2_EEEEE6BuildPERNS_5LevelESA_+0x28f)[0xb01feb]
/gpfs07/vol0/BCF/RD/users/natac0/intrepid/bin/fem_3d(_ZNK5MueLu17TentativePFactoryIdixN13KokkosClassic10SerialNodeENS1_12AltSparseOpsIviS2_NS1_7details28AltSparseOpsDefaultAllocatorIiS2_EEEEE5BuildERNS_5LevelESA_+0x45)[0xb01d55]
/gpfs07/vol0/BCF/RD/users/natac0/intrepid/bin/fem_3d(_ZNK5MueLu19TwoLevelFactoryBase9CallBuildERNS_5LevelE+0x3fa)[0xad5dd6]
/gpfs07/vol0/BCF/RD/users/natac0/intrepid/bin/fem_3d(_ZN5MueLu5Level3GetIN7Teuchos3RCPIN6Xpetra6MatrixIdixN13KokkosClassic10SerialNodeENS6_12AltSparseOpsIviS7_NS6_7details28AltSparseOpsDefaultAllocatorIiS7_EEEEEEEEEERT_RKSsPKNS_11FactoryBaseE+0xbb6)[0xabdf52]
/gpfs07/vol0/BCF/RD/users/natac0/intrepid/bin/fem_3d(_ZNK5MueLu10SaPFactoryIdixN13KokkosClassic10SerialNodeENS1_12AltSparseOpsIviS2_NS1_7details28AltSparseOpsDefaultAllocatorIiS2_EEEEE6BuildPERNS_5LevelESA_+0x5af)[0xb101ab]
/gpfs07/vol0/BCF/RD/users/natac0/intrepid/bin/fem_3d(_ZNK5MueLu10SaPFactoryIdixN13KokkosClassic10SerialNodeENS1_12AltSparseOpsIviS2_NS1_7details28AltSparseOpsDefaultAllocatorIiS2_EEEEE5BuildERNS_5LevelESA_+0x45)[0xb0fbf5]
/gpfs07/vol0/BCF/RD/users/natac0/intrepid/bin/fem_3d(_ZNK5MueLu19TwoLevelFactoryBase9CallBuildERNS_5LevelE+0x3fa)[0xad5dd6]
/gpfs07/vol0/BCF/RD/users/natac0/intrepid/bin/fem_3d(_ZN5MueLu5Level3GetIN7Teuchos3RCPIN6Xpetra6MatrixIdixN13KokkosClassic10SerialNodeENS6_12AltSparseOpsIviS7_NS6_7details28AltSparseOpsDefaultAllocatorIiS7_EEEEEEEEEERT_RKSsPKNS_11FactoryBaseE+0xbb6)[0xabdf52]
/gpfs07/vol0/BCF/RD/users/natac0/intrepid/bin/fem_3d(_ZNK5MueLu13TopRAPFactoryIdixN13KokkosClassic10SerialNodeENS1_12AltSparseOpsIviS2_NS1_7details28AltSparseOpsDefaultAllocatorIiS2_EEEEE5BuildERNS_5LevelESA_+0xd5)[0xaf0879]
/gpfs07/vol0/BCF/RD/users/natac0/intrepid/bin/fem_3d(_ZN5MueLu9HierarchyIdixN13KokkosClassic10SerialNodeENS1_12AltSparseOpsIviS2_NS1_7details28AltSparseOpsDefaultAllocatorIiS2_EEEEE5SetupEiN7Teuchos3PtrIKNS_18FactoryManagerBaseEEESD_SD_+0x1cc5)[0xada501]
/gpfs07/vol0/BCF/RD/users/natac0/intrepid/bin/fem_3d(_ZN5MueLu9HierarchyIdixN13KokkosClassic10SerialNodeENS1_12AltSparseOpsIviS2_NS1_7details28AltSparseOpsDefaultAllocatorIiS2_EEEEE5SetupERKNS_18FactoryManagerBaseEii+0x111b)[0xadbf2b]
/gpfs07/vol0/BCF/RD/users/natac0/intrepid/bin/fem_3d(main+0x49ee)[0x71436c]
/lib64/libc.so.6(__libc_start_main+0xe6)[0x2b54389f3c36]
/gpfs07/vol0/BCF/RD/users/natac0/intrepid/bin/fem_3d[0x70b189]



On Thu, Aug 14, 2014 at 3:25 PM, Andrey Prokopenko <aprokop at sandia.gov>
wrote:

> Hi Natarajan,
>
> I'm going to take a look at it. Is 512^3 the smallest reproducible size?
> Have you tried running a smaller problem through valgrind?
>
> Could you please also attach the output which happens before the
> corruption? That would help us to identify the responsible factory. It
> would also be great if you have the stack.
>
> -Andrey
>
>
> On Thu 14 Aug 2014 12:00:11 PM MDT, C.S. Natarajan wrote:
>
>> Hi,
>>       I am running into a strange issue when building the MG hierarchy
>> with Muelu. This only happens for large problems. I am pasting a
>> minimal example that can reproduce the error. The example assembles a
>> Laplace operator on a structured grid with Q1 elements with Dirichlet
>> boundary condition in the Z direction and Neumann BC's in the X and Y
>> direction. In the example there are two global node maps. gmap and
>> gmap2. gmap is used to build matrix A which has an unequal
>> distribution of nodes in each processor. gmap2 is used to export
>> matrix A from gmap to matrix B which contains approximately the same
>> number of nodes in each processor. For a problem of size 512^3, while
>> A builds fine, building the hierarchy with matrix B fails with a
>> memory corruption error that ends with a MPI_Reduce_scatter error. I
>> am using the public git version as of 08/11. The build is with intel
>> C++ 14.0.0 and support for long long int is enabled (do i also need to
>> enable ilp64 from mkl?)
>>
>> I would really appreciate any input on what I might be doing wrong or
>> whether there is a bug in the Muelu library.
>>
>>
>> TIA,
>> C.S.N
>>
>> #include <sstream>
>>
>> //Epetra Includes
>> #include "Epetra_BlockMap.h"
>> #include "Epetra_Comm.h"
>> #include "Epetra_MpiComm.h"
>> #include "Epetra_VbrMatrix.h"
>> #include "Epetra_SerialDenseMatrix.h"
>> #include "Epetra_FEVector.h"
>> #include "Epetra_FEVbrMatrix.h"
>>
>> // Intrepid includes
>> #include "Intrepid_ArrayTools.hpp"
>> #include "Intrepid_FunctionSpaceTools.hpp"
>> #include "Intrepid_FieldContainer.hpp"
>> #include "Intrepid_CellTools.hpp"
>> #include "Intrepid_HGRAD_HEX_C1_FEM.hpp"
>> #include "Intrepid_RealSpaceTools.hpp"
>> #include "Intrepid_DefaultCubatureFactory.hpp"
>> #include "Intrepid_Utils.hpp"
>> #include <Intrepid_HGRAD_QUAD_C1_FEM.hpp>
>>
>> //Sacado Includes
>> #include "Sacado.hpp"
>>
>> //Shards Includes
>> #include "Shards_CellTopology.hpp"
>>
>> //Teuchos Includes
>> #include "Teuchos_Array.hpp"
>> #include "Teuchos_ArrayView.hpp"
>> #include "Teuchos_ArrayViewDecl.hpp"
>> #include "Teuchos_BLAS.hpp"
>> #include "Teuchos_CommandLineProcessor.hpp"
>> #include "Teuchos_DefaultMpiComm.hpp"
>> #include "Teuchos_FancyOStream.hpp"
>> #include "Teuchos_GlobalMPISession.hpp"
>> #include "Teuchos_oblackholestream.hpp"
>> #include "Teuchos_OrdinalTraits.hpp"
>> #include "Teuchos_RCP.hpp"
>> #include "Teuchos_ScalarTraits.hpp"
>> #include "Teuchos_SerialDenseMatrix.hpp"
>> #include "Teuchos_ArrayRCPDecl.hpp"
>>
>> //Lone survivor from Tpetra
>> #include "Tpetra_DefaultPlatform.hpp"
>> #include "Tpetra_Version.hpp"
>> #include "Tpetra_BlockMap_decl.hpp"
>> #include "Tpetra_BlockMap.hpp"
>> #include "Tpetra_Vector.hpp"
>> #include "Tpetra_VbrMatrix.hpp"
>> #include "Tpetra_DistObject_decl.hpp"
>> #include "Tpetra_BlockMultiVector.hpp"
>> #include "Tpetra_BlockMultiVector_decl.hpp"
>> #include "Tpetra_MultiVector.hpp"
>>
>> //Muelu includes
>> #include "MueLu.hpp"
>> #include "MueLu_FactoryManager.hpp"
>> #include "MueLu_DirectSolver.hpp"
>> #include "MueLu_Hierarchy.hpp"
>> #include "MueLu_SaPFactory.hpp"
>> #include "MueLu_GenericRFactory.hpp"
>> #include "MueLu_RAPFactory.hpp"
>> #include "MueLu_TentativePFactory.hpp"
>> #include "MueLu_Ifpack2Smoother.hpp"
>> #include "MueLu_SmootherFactory.hpp"
>> #include "MueLu_RigidBodyModeFactory.hpp"
>> #include <MueLu_UseDefaultTypes.hpp>
>> #include <MueLu_MLParameterListInterpreter.hpp>
>> #include "MueLu_Amesos2Smoother.hpp"
>> #include <BelosConfigDefs.hpp>
>> #include <BelosLinearProblem.hpp>
>> #include <BelosBlockCGSolMgr.hpp>
>> #include <BelosXpetraAdapter.hpp>
>> #include <BelosMueLuAdapter.hpp>
>> #include <MueLu_UseShortNames.hpp>
>>
>>
>>
>>
>> using namespace Intrepid;
>> typedef double                            ST;
>> typedef Teuchos::ScalarTraits<ST>        SCT;
>> typedef SCT::magnitudeType                MT;
>> typedef long long int LLT;
>> typedef Kokkos::DefaultNode::DefaultNodeType NO;
>> typedef Tpetra::CrsMatrix<ST, LO, LLT, NO, LMO> matrix_type;
>> typedef Tpetra::Map<LO, LLT, NO> map_type;
>> using Teuchos::RCP;
>> using Teuchos::rcp;
>>
>> int main(int argc, char *argv[])
>> {
>>
>>         using Teuchos::Array;
>>         Teuchos::oblackholestream nullStream;
>>         Teuchos::GlobalMPISession mpiSession(&argc,&argv, NULL);
>>         Epetra_MpiComm ecomm(MPI_COMM_WORLD);
>>
>>         Teuchos::RCP<const Teuchos::Comm<int> > comm =
>> Tpetra::DefaultPlatform::getDefaultPlatform().getComm();
>>         size_t myRank = comm->getRank();
>>         size_t nProc = comm->getSize();
>>         std::ostream &out = (myRank == 0)?std::cout:nullStream;
>>         Teuchos::RCP<Teuchos::FancyOStream> fout =
>> Teuchos::getFancyOStream(Teuchos::rcpFromRef (out));
>>
>>         Teuchos::CommandLineProcessor clp;
>>
>>         int GNEX = 64;
>>         clp.setOption("NX", &GNEX, "Number of elements in X-direction.");
>>
>>         int GNEY = 64;
>>         clp.setOption("NY", &GNEY, "Number of elements in Y-direction.");
>>
>>         int GNEZ = 64;
>>         clp.setOption("NZ", &GNEZ, "Number of elements in Z-direction.");
>>
>>         long long grow = 128;
>>         clp.setOption("grow", &grow, "Global row number");
>>
>>         std::string matrix("original");
>>         clp.setOption("matrix", &matrix, "original = A, exported = b");
>>
>>         clp.recogniseAllOptions(true);
>>         clp.throwExceptions(false);
>>
>>         Teuchos::CommandLineProcessor::EParseCommandLineReturn
>> parseReturn = clp.parse(argc, argv);
>>         if(parseReturn ==
>> Teuchos::CommandLineProcessor::PARSE_HELP_PRINTED) return 0;
>>         if(parseReturn !=
>> Teuchos::CommandLineProcessor::PARSE_SUCCESSFUL) return 1;
>>
>>         const int numGElements = GNEX*GNEY*GNEZ;
>>         const int GNNX = GNEX+1, GNNY = GNEY+1, GNNZ= GNEZ+1;
>>         const int numGNodes    = GNNX*GNNY*GNNZ;
>>         const int numGDof      = GNNX*GNNY*GNNZ;
>>         out << "\t\t Number of global elements is : " << numGElements
>> << "\n";
>>         out << "\t\t Number of global nodes is    : " << numGNodes <<
>> "\n";
>>
>>         Epetra_Map elemZMap(GNEZ, 0, ecomm);
>>         long count = GNNZ/nProc;
>>         long np; np = GNNZ - (nProc*count);
>>         count += (myRank < np)?1:0;
>>
>>         long long int nnu = count*GNNX*GNNY;
>>         LO indexBase = 0;
>>         Teuchos::RCP<const Tpetra::Map<LO, LLT> > gmap =
>> Teuchos::rcp(new Tpetra::Map<LO,LLT>(-1, nnu, indexBase, comm));
>>         int numLNodes = gmap->getNodeNumElements();
>>
>>         int startOverlapZ = (myRank ==
>> 0)?0:(gmap->getGlobalElement(0))-GNNX*GNNY;
>>
>>         int endOverlapZ = (myRank ==
>> nProc-1)?(gmap->getGlobalElement(0)+(count-1)*GNNX*GNNY):(gmap->
>> getGlobalElement(0)+count*GNNX*GNNY);
>>
>>
>>         startOverlapZ /= (GNNX*GNNY); endOverlapZ /= (GNNX*GNNY);
>>
>>         int startX = 0; int endX = GNNX;
>>
>>         int startUY = (myRank == 0)?
>> startOverlapZ:(startOverlapZ+1);
>> // Unique map start index
>>         int endUY   = (myRank ==
>> nProc-1)?endOverlapZ:(endOverlapZ-1);
>> //
>> Unique map end index
>>
>>         int numLElemZ = elemZMap.NumMyElements();
>>         int numLElemX = GNEX, numLElemY = GNEY;
>>         int numLElems = numLElemX*numLElemY*numLElemZ;
>>         int startElemZ = elemZMap.GID(0);
>>         int endElemZ = elemZMap.GID(numLElemZ-1);
>>         int startNodeZ = startElemZ;
>>         int endNodeZ = endElemZ+1;
>>
>>         typedef shards::CellTopology    CellTopology;
>>         CellTopology
>> h8(shards::getCellTopologyData<shards::Hexahedron<8> >() );
>>         // Get dimensions
>>         int numNodesPerElem = h8.getNodeCount();
>>         int spaceDim = h8.getDimension();
>>
>>         FieldContainer<int> elemNode(numLElems, numNodesPerElem);
>>         int lElem = 0;
>>         for(int k = startElemZ; k <= endElemZ; k++)
>>                 for (int j = 0; j < GNEY; j++)
>>                         for (int i = 0; i < GNEX; i++)
>>                         {
>>                                 elemNode(lElem,0) = (GNNX*GNNY)*k +
>> (GNNX)*j + i;
>>                                 elemNode(lElem,1) = (GNNX*GNNY)*k +
>> (GNNX)*j + i + 1;
>>                                 elemNode(lElem,2) = (GNNX*GNNY)*k +
>> (GNNX)*(j + 1) + i + 1;
>>                                 elemNode(lElem,3) = (GNNX*GNNY)*k +
>> (GNNX)*(j + 1) + i;
>>                                 elemNode(lElem,4) = (GNNX*GNNY)*(k+1)
>> + (GNNX)*j + i;
>>                                 elemNode(lElem,5) = (GNNX*GNNY)*(k+1)
>> + (GNNX)*j + i + 1;
>>                                 elemNode(lElem,6) = (GNNX*GNNY)*(k+1)
>> + (GNNX)*(j + 1) + i + 1;
>>                                 elemNode(lElem,7) = (GNNX*GNNY)*(k+1)
>> + (GNNX)*(j + 1) + i;
>>                                 lElem++;
>>                         }
>>
>>         double leftX = 0.0, rightX = 1.0;
>>         double leftY = 0.0, rightY = 1.0;
>>         double leftZ = 0.0, rightZ = 1.0;
>>
>>         hx = hy = hz = 1;
>>         numLNodes = (numLElemZ+1)*(numLElemY+1)*(numLElemX+1);
>>         FieldContainer<double> nodeCoord(numLNodes, spaceDim);
>>         FieldContainer<int> nodeOnBoundary(numLNodes);
>>
>>         int inode = 0;
>>
>>         for (int k = startNodeZ; k <= endNodeZ; k++)
>>                 for (int j = 0; j < GNNY; j++)
>>                         for (int i = 0; i < GNNX; i++)
>>                         {
>>                                 nodeCoord(inode,0) = leftX + (double)i*hx;
>>                                 nodeCoord(inode,1) = leftY + (double)j*hy;
>>                                 nodeCoord(inode,2) = leftZ +
>> (double)k*hz;
>>                                 inode++;
>>                         }
>>
>>         DefaultCubatureFactory<double>  cubFactory;
>>         int cubDegree = 3;
>>         Teuchos::RCP<Cubature<double> > hexCub = cubFactory.create(h8,
>> cubDegree);
>>
>>         int cubDim       = hexCub->getDimension();
>>         int numCubPoints = hexCub->getNumPoints();
>>
>>         FieldContainer<double> cubPoints(numCubPoints, cubDim);
>>         FieldContainer<double> cubWeights(numCubPoints);
>>
>>         hexCub->getCubature(cubPoints, cubWeights);
>>
>>         Basis_HGRAD_HEX_C1_FEM<double, FieldContainer<double> >
>> hexHGradBasis;
>>         int numFieldsG = hexHGradBasis.getCardinality();
>>
>>         FieldContainer<double> hexGVals(numFieldsG, numCubPoints);
>>         FieldContainer<double> hexGrads(numFieldsG, numCubPoints,
>> spaceDim);
>>
>>
>>         hexHGradBasis.getValues(hexGVals, cubPoints, OPERATOR_VALUE);
>>         hexHGradBasis.getValues(hexGrads, cubPoints, OPERATOR_GRAD);
>>
>>
>>         typedef CellTools<double>  CellTools;
>>         typedef FunctionSpaceTools fst;
>>         int numCells = 1;
>>
>>         FieldContainer<double> hexNodes(numCells, numNodesPerElem,
>> spaceDim);
>>
>>         FieldContainer<double> hexJacobian(numCells, numCubPoints,
>> spaceDim, spaceDim);
>>         FieldContainer<double> hexJacobInv(numCells, numCubPoints,
>> spaceDim, spaceDim);
>>         FieldContainer<double> hexJacobDet(numCells, numCubPoints);
>>
>>         FieldContainer<double> localStiffMatrix(numCells, numFieldsG,
>> numFieldsG);
>>         FieldContainer<double> weightedMeasure(numCells, numCubPoints);
>>         FieldContainer<double> hexGradsTransformed(numCells,
>> numFieldsG, numCubPoints, spaceDim);
>>         FieldContainer<double> hexGradsTransformedWeighted(numCells,
>> numFieldsG, numCubPoints, spaceDim);
>>
>>         FieldContainer<double> rhsData(numCells, numCubPoints);
>>         FieldContainer<double> localRHS(numCells, numFieldsG);
>>         FieldContainer<double> hexGValsTransformed(numCells,
>> numFieldsG, numCubPoints);
>>         FieldContainer<double> hexGValsTransformedWeighted(numCells,
>> numFieldsG, numCubPoints);
>>         FieldContainer<double> physCubPoints(numCells, numCubPoints,
>> cubDim);
>>
>>         Teuchos::Array<ST> values(8);
>>         Teuchos::Array<long long int> cols(8);
>>         Teuchos::RCP<matrix_type> A = rcp(new matrix_type(gmap, 27));
>>
>>         long long int rowIndex = 0, colIndex = 0, ierr, ix;
>>         for(int i = 0; i < lElem; i++)
>>         {
>>                 for(int j = 0; j < numNodesPerElem; j++)
>>                 {
>>
>>                         ix = (elemNode(i,j)%GNNX);
>>                         hexNodes(0,j,0) = ix*hx;
>>                         hexNodes(0,j,1) = ((elemNode(i,j)%(GNNX*GNNY)
>> - ix)/GNNX)*hy;
>>                         hexNodes(0,j,2) = (elemNode(i,j)/(GNNX*GNNY))*hz;
>>                 }
>>
>>                 CellTools::setJacobian(hexJacobian, cubPoints,
>> hexNodes, h8);
>>                 CellTools::setJacobianInv(hexJacobInv, hexJacobian );
>>                 CellTools::setJacobianDet(hexJacobDet, hexJacobian );
>>
>>                 fst::HGRADtransformGRAD<double>(hexGradsTransformed,
>> hexJacobInv, hexGrads);
>>
>>                 fst::computeCellMeasure<double>(weightedMeasure,
>> hexJacobDet, cubWeights);
>>
>>
>> fst::multiplyMeasure<double>(hexGradsTransformedWeighted,
>> weightedMeasure, hexGradsTransformed);
>>
>>                 fst::integrate<double>(localStiffMatrix,
>> hexGradsTransformed, hexGradsTransformedWeighted, COMP_BLAS);
>>                 for (int row = 0; row < numFieldsG; row++)
>>                 {
>>                         rowIndex = elemNode(i,row);
>>                         for (int col = 0; col < numFieldsG; col++)
>>                         {
>>                                 cols[col] = elemNode(i,col);
>>                                 values[col] = localStiffMatrix(0, row,
>> col);
>>                         }
>>                         if ((hexNodes(0,row,2) == 0) ||
>> (hexNodes(0,row,2) == GNEZ))
>>                                 std::fill(values.begin(),
>> values.end(), 0);
>>                         A->insertGlobalValues(rowIndex, cols, values);
>>                 }
>>
>>         }
>>
>>         LLT drow = 0, maxind = 0, down = GNNX*GNNY, top =
>> GNNX*GNNY*(GNNZ-1);
>>         LO num_local_nodes;
>>         maxind = gmap->getMaxGlobalIndex();
>>
>>         num_local_nodes = gmap->getNodeNumElements();
>>         if ((drow < down) || (maxind >= top))
>>         {
>>                 for(int i = 0; i < num_local_nodes; i++)
>>                 {
>>                         drow = gmap->getGlobalElement(i);
>>                         if ((drow < down) || (drow >= top))
>>                                 A->replaceGlobalValues(drow,
>> Teuchos::tuple<long long int>(drow), Teuchos::tuple<ST>(1.0));
>>                 }
>>         }
>>
>>         A->globalAssemble();
>>         A->fillComplete();
>>
>>         RCP<Tpetra::Vector<SC, LO, LLT, NO> > x1 = rcp(new
>> Tpetra::Vector<SC, LO, LLT, NO> (A->getDomainMap ()));
>>         RCP<Tpetra::Vector<SC, LO, LLT, NO> > b1 = rcp(new
>> Tpetra::Vector<SC, LO, LLT, NO> (A->getRangeMap ()));
>>         x1->putScalar(1.0);
>>         A->apply(*x1, *b1);
>>         out << "Norm of b1 is " << b1->norm2() << std::endl;
>>
>>
>>         Teuchos::RCP< map_type> gmap2 = rcp (new map_type (numGNodes,
>> indexBase, comm, Tpetra::GloballyDistributed));
>>         Teuchos::RCP<matrix_type> B = Teuchos::rcp(new matrix_type
>> (gmap2, 0));
>>         typedef Tpetra::Export<LO, LLT, NO> export_type;
>>         export_type exporter (gmap, gmap2);
>>         B->doExport (*A, exporter, Tpetra::INSERT);
>>         B->fillComplete();
>>
>>         RCP<Tpetra::Vector<SC, LO, LLT, NO> > x2 = rcp(new
>> Tpetra::Vector<SC, LO, LLT, NO> (B->getDomainMap ()));
>>         RCP<Tpetra::Vector<SC, LO, LLT, NO> > b2 = rcp(new
>> Tpetra::Vector<SC, LO, LLT, NO> (B->getRangeMap ()));
>>         x2->putScalar(1.0);
>>         B->apply(*x2, *b2);
>>         out << "Norm of b2 is " << b2->norm2() << std::endl;
>>
>>         Teuchos::Array<ST> crsvals;
>>         size_t NumEntries = 0;
>>         Teuchos::Array<LLT> indices;
>>         NumEntries = B->getNumEntriesInGlobalRow(grow);
>>         if (NumEntries != Teuchos::OrdinalTraits<size_t>::invalid())
>>         {
>>                 std::cout << "outputting crs values " << std::endl;
>>                 indices.resize(NumEntries); crsvals.resize(NumEntries);
>>                 A->getGlobalRowCopy (grow, indices(), crsvals(),
>> NumEntries);
>>                 for(int i = 0; i < indices.size(); i++)
>>                         std::cout << indices[i] << " " << crsvals[i]
>> << "\n";
>>         }
>>
>>         Xpetra::UnderlyingLib lib = Xpetra::UseTpetra;
>>         Teuchos::RCP<Xpetra::CrsMatrix<ST, LO, LLT, NO, LMO> > mueluM_;
>>         if (matrix == "original")
>>                 mueluM_ = rcp(new Xpetra::TpetraCrsMatrix<ST, LO, LLT,
>> NO, LMO>(A));
>>         else
>>                 mueluM_ = rcp(new Xpetra::TpetraCrsMatrix<ST, LO, LLT,
>> NO, LMO>(B));
>>
>>         Teuchos::RCP<Xpetra::Matrix <ST, LO, LLT, NO, LMO> > mueluA =
>> rcp(new Xpetra::CrsMatrixWrap<ST, LO, LLT, NO, LMO>(mueluM_));
>>
>>         Teuchos::RCP<MueLu::Hierarchy<ST, LO, LLT, NO, LMO> > H =
>> rcp(new MueLu::Hierarchy<ST, LO, LLT, NO, LMO> (mueluA));
>>
>>         out << "Begin setup of Heirarchy ..." << std::endl;
>>
>>         H->Setup();
>>
>>         out << "Finished setup of Hierarchy ..." << std::endl;
>>
>> }
>>
>>
>> _______________________________________________
>> Trilinos-Users mailing list
>> Trilinos-Users at software.sandia.gov
>> https://software.sandia.gov/mailman/listinfo/trilinos-users
>>
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://software.sandia.gov/pipermail/trilinos-users/attachments/20140814/c92d1a63/attachment-0001.html>


More information about the Trilinos-Users mailing list