https://pastein.ru/t/kD
скопируйте уникальную ссылку для отправки
Загрузка данных
#include <cstdlib>
#include <ostream>
#include <iostream>
#include <iomanip>
#include <chrono>
#include <stdio.h>
#include <time.h>
#include <cuda.h>
#include "cuda_runtime.h"
#include <cuda_runtime_api.h>
#include <device_functions.h>
#include "device_launch_parameters.h"
using namespace std;
double transform_matrix_cpu(
int* first_matrix,
int* second_matrix,
const int matrix_height,
const int matrix_width) {
auto start_cpu = chrono::steady_clock::now();
for (auto i = 0; i < matrix_height; i++)
for (auto j = 0; j < matrix_width; j++) {
second_matrix[i * matrix_width + j / 4 + j % 4 * matrix_width / 4] = first_matrix[i * matrix_width + j];
}
auto end_cpu = chrono::steady_clock::now();
auto cpu_time = end_cpu - start_cpu;
return chrono::duration <double, milli>(cpu_time).count();
}
__global__ void kernelGpu(
int* first_matrix,
int* second_matrix,
const int matrix_height,
const int matrix_width) {
int width = blockIdx.x * blockDim.x + threadIdx.x;
int height = blockIdx.y * blockDim.y + threadIdx.y;
if (width >= matrix_width || height >= matrix_height)
return;
int offset = width % 4;
int out_width = width / 4 + offset * matrix_width / 4;
second_matrix[height * matrix_width + out_width] = first_matrix[height * matrix_width + width];
}
float transform_matrix_gpu(
int* first_matrix,
int* second_matrix,
const int matrix_height,
const int matrix_width) {
cudaEvent_t startTime;
cudaEvent_t stopTime;
int* gpu_first_matrix;
int* gpu_second_matrix;
cudaMalloc((void**)&gpu_first_matrix, matrix_height * matrix_width * sizeof(int));
cudaMemcpy(gpu_first_matrix, first_matrix, matrix_height * matrix_width * sizeof(int), cudaMemcpyHostToDevice);
cudaMalloc((void**)&gpu_second_matrix, matrix_height * matrix_width * sizeof(int));
dim3 grid;
dim3 block(32, 32);
grid.x = matrix_height / block.x;
if (matrix_height % block.x != 0)
grid.x += 1;
grid.y = matrix_width / block.y;
if (matrix_width % block.y != 0)
grid.y += 1;
cudaEventCreate(&startTime);
cudaEventCreate(&stopTime);
cudaEventRecord(startTime);
kernelGpu << <grid, block >> > (
gpu_first_matrix,
gpu_second_matrix,
matrix_height,
matrix_width);
cudaEventRecord(stopTime);
cudaEventSynchronize(stopTime);
float result_time;
cudaEventElapsedTime(&result_time, startTime, stopTime);
cudaMemcpy(second_matrix, gpu_second_matrix, matrix_height * matrix_width * sizeof(int), cudaMemcpyDeviceToHost);
return result_time;
}
bool compare_matrix(int* first, int* second, int height, int width) {
for (auto i = 0; i < height; i++)
for (auto j = 0; j < width; j++)
if (first[i * width + j] != second[i * width + j])
return false;
return true;
}
int* initialize_matrix(const int height, const int width) {
const auto matrix = static_cast<int*>(calloc(height * width, sizeof(int)));
return matrix;
}
void fill_random_matrix(int* matrix, int height, int width) {
short initializer = 0;
for (auto i = 0; i < height; i++)
for (auto j = 0; j < width; j++)
matrix[i * width + j] = rand() % 100 + 1;
}
void show_matrix(int* matrix, const int height, const int width) {
int h = height > 16 ? 16 : height;
int w = width > 16 ? 16 : width;
for (auto i = 0; i < h; i++) {
for (auto j = 0; j < w; j++)
cout << setw(4) << matrix[i * width + j];
cout << endl;
}
}
int main() {
int matrix_height;
int matrix_width;
cout << "Matrix height: ";
cin >> matrix_height;
cout << "Matrix width: ";
cin >> matrix_width;
matrix_width = matrix_width - matrix_width % 4 + (matrix_width % 4 != 0 ? 4 : 0);
const auto first_matrix = initialize_matrix(matrix_height, matrix_width);
auto second_matrix = initialize_matrix(matrix_height, matrix_width);
auto third_matrix = initialize_matrix(matrix_height, matrix_width);
fill_random_matrix(first_matrix, matrix_height, matrix_width);
auto cpu_time = transform_matrix_cpu(
first_matrix,
second_matrix,
matrix_height,
matrix_width);
auto gpu_time = transform_matrix_gpu(
first_matrix,
third_matrix,
matrix_height,
matrix_width);
show_matrix(first_matrix, matrix_height, matrix_width);
cout << endl;
show_matrix(second_matrix, matrix_height, matrix_width);
cout << endl;
show_matrix(third_matrix, matrix_height, matrix_width);
cout << endl;
cout << "CPU Time: " << cpu_time << " ms." << endl;
cout << "GPU Time: " << gpu_time << " ms." << endl;
const string equal = compare_matrix(second_matrix, third_matrix, matrix_height, matrix_width) == 1 ? "true" : "false";
cout << "Compare CPU and GPU - " << equal << endl;
system("pause");
}