本文共 4638 字,大约阅读时间需要 15 分钟。
参考:
#include "cuda_runtime.h"#include "device_functions.h"#include "cublas_v2.h"#include "device_launch_parameters.h"#include#include #include #include #include #include #include using namespace std;#include #include #include #include #include struct Point { float x; float y; float z; int cluster; int noise; //-1 noise};int eps = 2;//neighborhood radiusint min_nb = 3;Point host_sample[2500];int block_num, thread_num;float __device__ dev_euclidean_distance(const Point &src, const Point &dest) { float res = (src.x - dest.x) * (src.x - dest.x) + (src.y - dest.y) * (src.y - dest.y) + (src.z - dest.z) * (src.z - dest.z); return sqrt(res);}/*to get the total list*/void __global__ dev_region_query(Point* sample, int num, int* neighbors, int eps, int min_nb) { unsigned int tid = blockIdx.x * blockDim.x + threadIdx.x; unsigned int line,col,pointer = tid; unsigned int count; while (pointer < num * num) {//全场唯一id line = pointer / num; col = pointer % num; float radius; if (line <= col) { radius = dev_euclidean_distance(sample[line], sample[col]); if (radius < eps) { neighbors[pointer] = 1; } neighbors[col * num + line] = neighbors[pointer];//对角线 } pointer += blockDim.x * gridDim.x; } __syncthreads(); pointer = tid; while (pointer < num) { count = 0; line = pointer * num; for (int i = 0; i < num; i++) { if (pointer != i && neighbors[line+i]) {//除了p点外邻域元素个数 count++; } } if (count >= min_nb) { sample[pointer].noise++; } pointer += blockDim.x * gridDim.x; }}void host_algorithm_dbscan(Point* host_sample, int num) { /*sample*/ Point* cuda_sample; cudaMalloc((void**)&cuda_sample, num * sizeof(Point)); cudaMemcpy(cuda_sample, host_sample, num * sizeof(Point), cudaMemcpyHostToDevice); /*neighbor list*/ int *host_neighbor = new int[num*num](); int *dev_neighbor; cudaMalloc((void**)&dev_neighbor, num * num * sizeof(int)); dev_region_query << > > (cuda_sample, num, dev_neighbor, eps, min_nb); cudaMemcpy(host_sample, cuda_sample, num * sizeof(Point), cudaMemcpyDeviceToHost); cudaMemcpy(host_neighbor, dev_neighbor, num * num * sizeof(int), cudaMemcpyDeviceToHost); queue expand; int cur_cluster = 0; for (int i = 0; i < num; i++) { if (host_sample[i].noise >= 0 && host_sample[i].cluster < 1) { host_sample[i].cluster = ++cur_cluster; int src = i * num; for (int j = 0; j < num; j++) { if (host_neighbor[src + j]) { host_sample[j].cluster = cur_cluster; expand.push(j); } } while (!expand.empty()) {/*expand the cluster*/ if (host_sample[expand.front()].noise >= 0) { src = expand.front() * num; for (int j = 0; j < num; j++) { if (host_neighbor[src + j] && host_sample[j].cluster < 1) { host_sample[j].cluster = cur_cluster; expand.push(j); } } } expand.pop(); } } } cudaFree(cuda_sample);cudaFree(dev_neighbor);}int main(int argc, char* argv[]) { cv::Mat ori_image = cv::imread("result___.tiff", -1); cv::cvtColor(ori_image, ori_image, cv::COLOR_BGR2RGB); cv::Mat image_out; ori_image.convertTo(image_out, CV_32FC3); //cv::Size dsize = cv::Size(800, 600); //cv::Mat image_out = cv::Mat(dsize, CV_32FC3); //cv::resize(floatMat, image_out, dsize, 0, 0, cv::INTER_NEAREST); int m = image_out.rows; int n = image_out.cols; int o = image_out.channels(); if (o != 3) return 0; int bh = 50; int bw = 50; int tsize = bh*bw; clock_t starts, finishs; double duration; starts = clock(); ofstream fout; fout.open("result.txt"); cout << "------>TOTAL SAMPLE NUMB0->" << tsize << "<-----" << endl; cout << "------>BL0CK=10 & THREAD=100<-------- " << endl; block_num = 32; thread_num = 256; cout << "CALCULATING BY CUDA GTX965M......\n" << endl; cudaEvent_t start, end; cudaEventCreate(&start); cudaEventCreate(&end); cudaEventRecord(start, 0); vector typecc(tsize, 0); for (size_t i = 0; i < m; i += bh) { for (size_t j = 0; j < n; j += bw) { cv::Rect rect(j, i, bw, bh); cv::Mat roi = image_out(rect).clone(); roi = roi.t(); cv::Mat dst = roi.reshape(1, tsize); for (size_t k = 0; k < tsize; k++) { host_sample[k].x = dst.at (k, 0); host_sample[k].y = dst.at (k, 1); host_sample[k].z = dst.at (k, 2); host_sample[k].noise = -1; host_sample[k].cluster = -1; } host_algorithm_dbscan(host_sample, tsize); vector typecc(tsize, 0); vector typetemp(tsize, 0); for (size_t k = 0; k < tsize; k++) { typecc[k] = host_sample[k].cluster; typetemp[k] = host_sample[k].cluster; } sort(typecc.begin(), typecc.end()); typecc.erase(unique(typecc.begin(), typecc.end()), typecc.end()); } } cudaEventRecord(end, 0); cudaEventSynchronize(end); float time; cudaEventElapsedTime(&time, start, end); cout<<"time: "<< time <<"ms --device\n"< "< << endl; } fout.close(); system("pause"); return 0;}
转载地址:http://kzwlf.baihongyu.com/