#include #include #ifdef __unix__ #include "CL/opencl.hpp" #endif #ifdef _WIN32 #include "CL/cl.hpp" #endif #include #include #include struct finfo { char *fname; std::uintmax_t fsize; size_t flength; }; std::vector read_file(char *fname) { auto fsize = std::filesystem::file_size(fname); if (fsize % sizeof(float) != 0) { std::cerr << fname << " size is not a multiple of float size" << std::endl; exit(1); } size_t flength = fsize / sizeof(float); std::ifstream file(fname, std::ios::binary); if (!file) { std::cerr << "Failed to open " << fname << std::endl; exit(1); } std::vector vec(flength); file.read(reinterpret_cast(vec.data()), fsize); if (!file) { std::cerr << "Failed to read " << fname << std::endl; exit(1); } return vec; } void write_file(std::vector vec, char *fname) { std::ofstream file(fname, std::ios::binary); if (!file) { std::cerr << "Failed to open " << fname << std::endl; exit(1); } file.write(reinterpret_cast(vec.data()), sizeof(float) * vec.size()); if (!file) { std::cerr << "Failed to write " << fname << std::endl; exit(1); } } int main(int argc, char *argv[]) { if (argc != 4) { std::cerr << "Two input and one output files must be provided:" << std::endl; std::cerr << "./filter input_file.bin filter_coefs.bin output_file.bin" << std::endl; exit(1); } // Исходные данные std::vector input_file = read_file(argv[1]); std::vector filter_coefs = read_file(argv[2]); size_t output_length = input_file.size() - filter_coefs.size() + 1; std::vector output_file(output_length); // Получаем платформы и устройства std::vector platforms; cl::Platform::get(&platforms); cl::Device device; for (const auto &platform : platforms) { std::vector devices; platform.getDevices(CL_DEVICE_TYPE_CPU, &devices); if (!devices.empty()) { device = devices.front(); break; } } // Создаем контекст и очередь команд cl::Context context(device); cl::CommandQueue queue(context, device); // Выделяем память на GPU cl::Buffer input_buffer(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, sizeof(float) * input_file.size(), input_file.data()); cl::Buffer filter_buffer(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, sizeof(float) * filter_coefs.size(), filter_coefs.data()); cl::Buffer output_buffer(context, CL_MEM_WRITE_ONLY, sizeof(float) * output_file.size()); // Компилируем ядро const char *kernel_code = R"( __kernel void convolve(__constant const float *input, __constant const float *filter, __global float *output, const int filter_length) { int id = get_global_id(0); float sum = 0.0f; for (int i = 0; i < filter_length; ++i) { const int input_pos = id - i + filter_length - 1; sum += input[input_pos] * filter[i]; } output[id] = sum; } )"; cl::Program program(context, kernel_code); program.build("-cl-std=CL1.2"); // Создаем ядро и устанавливаем аргументы cl::Kernel kernel(program, "convolve"); kernel.setArg(0, input_buffer); kernel.setArg(1, filter_buffer); kernel.setArg(2, output_buffer); kernel.setArg(3, static_cast(filter_coefs.size())); // Запускаем ядро queue.enqueueNDRangeKernel(kernel, cl::NullRange, cl::NDRange(output_length), cl::NullRange); // Копируем результат обратно на CPU queue.enqueueReadBuffer(output_buffer, CL_TRUE, 0, sizeof(float) * output_length, output_file.data()); // Выводим результат for (float c : output_file) { std::cout << c << " "; } write_file(output_file, argv[3]); return 0; }