C++amp矩阵分块
参考自:https://msdn.microsoft.com/en-us/library/hh873135.aspx
#include <iostream> #include <iomanip> #include <amp.h> using namespace concurrency; const int ROWS = 8; const int COLS = 9; // tileRow and tileColumn specify the tile that each thread is in. // globalRow and globalColum specify the location of the thread in the array_view. // localRow and localColumn specify the location of the thread relativie to the tile. struct Description { int value; int tileRow; int tileColumn; int globalRow; int globalColumn; int localRow; int localColumn; }; // A helper function for formatting the output. void SetConsoleColor(int color) { int colorValue = (color == 4) ? 4 : 2; SetConsoleTextAttribute(GetStdHandle(STD_OUTPUT_HANDLE), colorValue); } // A helper function for farmatting the output. void SetConsoleSize(int height, int width) { COORD coord; coord.X = width; coord.Y = height; SetConsoleScreenBufferSize(GetStdHandle(STD_OUTPUT_HANDLE), coord); SMALL_RECT *rect = new SMALL_RECT(); rect->Left = 0; rect->Top = 0; rect->Right = width; rect->Bottom = height; SetConsoleWindowInfo(GetStdHandle(STD_OUTPUT_HANDLE), true, rect); } // This method creates an 8 x 9 matrix of Description structures. In the // call to parallel_for_each, the structure is updated with tile,global, and local indices. void TilingDescription() { // Create 72(8x9) Description structures. std::vector<Description> descs; for (int i = 0; i < ROWS * COLS; i++) { Description d = { i, 0, 0, 0, 0, 0, 0 }; descs.push_back(d); } // Create an array_view from the Description structures. extent<2> matrix(ROWS, COLS); array_view<Description, 2> descriptions(matrix, descs); // Update each Description with the tile, global. and local indices. parallel_for_each(descriptions.extent.tile<2, 3>(), [=](tiled_index<2, 3> t_idx) restrict(amp) { descriptions[t_idx].globalRow = t_idx.global[0]; descriptions[t_idx].globalColumn = t_idx.global[1]; descriptions[t_idx].tileRow = t_idx.tile[0]; descriptions[t_idx].tileColumn = t_idx.tile[1]; descriptions[t_idx].localRow = t_idx.local[0]; descriptions[t_idx].localColumn = t_idx.local[1]; }); // Print out the Description structure for each element in the matrix. // Tiles are displayed in red and green to distinguish them from each other. SetConsoleSize(100, 150); for (int row = 0; row < ROWS; row++) { for (int column = 0; column < COLS; column++) { SetConsoleColor((descriptions(row, column).tileRow + descriptions(row, column).tileColumn) % 2); std::cout << "Value: " << std::setw(2) << descriptions(row, column).value << " "; } std::cout << "\n"; for (int column = 0; column < COLS; column++) { SetConsoleColor((descriptions(row, column).tileRow + descriptions(row, column).tileColumn) % 2); std::cout << "Tile: " << "(" << descriptions(row, column).tileRow << "," << descriptions(row, column).tileColumn << ") "; } std::cout << "\n"; for (int column = 0; column < COLS; column++) { SetConsoleColor((descriptions(row, column).tileRow + descriptions(row, column).tileColumn) % 2); std::cout << "Global: " << "(" << descriptions(row, column).globalRow << "," << descriptions(row, column).globalColumn << ")"; } std::cout << "\n"; for (int column = 0; column < COLS; column++) { SetConsoleColor((descriptions(row, column).tileRow + descriptions(row, column).tileColumn) % 2); std::cout << "Local: " << "(" << descriptions(row, column).localRow << "," << descriptions(row, column).localColumn << ")"; } std::cout << "\n"; std::cout << "\n"; } } #define SAMPLESIZE 2 #define MATRIXSIZE 8 void SamplingExample() { // Create data and array_view for the matrix. std::vector<float> rawData; for (int i = 0; i < MATRIXSIZE * MATRIXSIZE; i++) { rawData.push_back((float)i); } extent<2> dataExtent(MATRIXSIZE, MATRIXSIZE); array_view<float, 2> matrix(dataExtent, rawData); // Create the array for the averages. // There is one element in the output for each tile in the data. std::vector<float> outputData; int outputSize = MATRIXSIZE / SAMPLESIZE; for (int j = 0; j < outputSize * outputSize; j++) { outputData.push_back((float)0); } extent<2> outputExtent(MATRIXSIZE / SAMPLESIZE, MATRIXSIZE / SAMPLESIZE); array<float, 2> averages(outputExtent, outputData.begin(), outputData.end()); // Use tiles that are SAMPLESIZE x SAMPLESIZE // Find the average of the values in each tile. // The only reference-type variable you can pass into the parallel_fo_each_call // is a concurrency::array. parallel_for_each(matrix.extent.tile<SAMPLESIZE, SAMPLESIZE>(), [=, &averages](tiled_index<SAMPLESIZE, SAMPLESIZE> t_idx) restrict(amp) { // Copy the values of the tile into a tile-sized array. tile_static float tileValues[SAMPLESIZE][SAMPLESIZE]; tileValues[t_idx.local[0]][t_idx.local[1]] = matrix[t_idx]; // Wait for the tile-sized array to load before you calculate the average. t_idx.barrier.wait(); // IF you remove the if statement, then the calculation executes for every // thread in the tile, and makes the same assignemnt to averages each time. if (t_idx.local[0] == 0 && t_idx.local[1] == 0) { for (int trow = 0; trow < SAMPLESIZE; trow++) { for (int tcol = 0; tcol < SAMPLESIZE; tcol++) { averages(t_idx.tile[0], t_idx.tile[1]) += tileValues[trow][tcol]; } } averages(t_idx.tile[0], t_idx.tile[1]) /= (float)(SAMPLESIZE * SAMPLESIZE); } } ); // Print out the results. // You cannot access the values in aveages directly. You must copy them // vack to a CPU variable. outputData = averages; for (int row = 0; row < outputSize; row++) { for (int col = 0; col < outputSize; col++) { std::cout << outputData[row * outputSize + col] << " "; } std::cout << "\n"; } // Output for SAMPLESSIZE = 2 is: // 4.5 6.5 8.5 10.5 // 20.5 22.5 24.5 26.5 // 36.5 38.5 40.5 42.5 // 52.5 54.5 56.5 58.5 // Output for SAMPLESIZE = 4 is: // 13.5 17.5 // 45.5 49.5 } void main() { //TilingDescription(); SamplingExample(); char wait; std::cin >> wait; }
郑重声明:本站内容如果来自互联网及其他传播媒体,其版权均属原媒体及文章作者所有。转载目的在于传递更多信息及用于网络分享,并不代表本站赞同其观点和对其真实性负责,也不构成任何其他建议。