I'm using opencv242 + VS2010 by a notebook.
I tried to do some simple test of the GPU block in OpenCV, but it showed the GPU is 100 times slower than CPU codes.
In this code, I just turn the color image to grayscale image, use the function of cvtColor
Here is my code, PART1 is CPU code(test cpu RGB2GRAY), PART2 is upload image to GPU, PART3 is GPU RGB2GRAY, PART4 is CPU RGB2GRAY again. There are 3 things makes me so wondering:
1 In my code, part1 is 0.3ms, while part4 (which is exactly same with part1) is 40ms!!!
2 The part2 which upload image to GPU is 6000ms!!!
3 Part3( GPU codes) is 11ms, it is so slow for this simple image!
#include "StdAfx.h"
#include <iostream>
#include "opencv2/opencv.hpp"
#include "opencv2/gpu/gpu.hpp"
#include "opencv2/gpu/gpumat.hpp"
#include "opencv2/core/core.hpp"
#include "opencv2/highgui/highgui.hpp"
#include <cuda.h>
#include <cuda_runtime_api.h>
#include <ctime>
#include <windows.h>
using namespace std;
using namespace cv;
using namespace cv::gpu;
int main()
{
LARGE_INTEGER freq;
LONGLONG QPart1,QPart6;
double dfMinus, dfFreq, dfTim;
QueryPerformanceFrequency(&freq);
dfFreq = (double)freq.QuadPart;
cout<<getCudaEnabledDeviceCount()<<endl;
Mat img_src = imread("d:\CUDA\train.png", 1);
// PART1 CPU code~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
// From color image to grayscale image.
QueryPerformanceCounter(&freq);
QPart1 = freq.QuadPart;
Mat img_gray;
cvtColor(img_src,img_gray,CV_BGR2GRAY);
QueryPerformanceCounter(&freq);
QPart6 = freq.QuadPart;
dfMinus = (double)(QPart6 - QPart1);
dfTim = 1000 * dfMinus / dfFreq;
printf("CPU RGB2GRAY running time is %.2f ms
",dfTim);
// PART2 GPU upload image~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
GpuMat gimg_src;
QueryPerformanceCounter(&freq);
QPart1 = freq.QuadPart;
gimg_src.upload(img_src);
QueryPerformanceCounter(&freq);
QPart6 = freq.QuadPart;
dfMinus = (double)(QPart6 - QPart1);
dfTim = 1000 * dfMinus / dfFreq;
printf("Read image running time is %.2f ms
",dfTim);
GpuMat dst1;
QueryPerformanceCounter(&freq);
QPart1 = freq.QuadPart;
/*dst.upload(src_host);*/
dst1.upload(imread("d:\CUDA\train.png", 1));
QueryPerformanceCounter(&freq);
QPart6 = freq.QuadPart;
dfMinus = (double)(QPart6 - QPart1);
dfTim = 1000 * dfMinus / dfFreq;
printf("Read image running time 2 is %.2f ms
",dfTim);
// PART3~ GPU code~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
// gpuimage From color image to grayscale image.
QueryPerformanceCounter(&freq);
QPart1 = freq.QuadPart;
GpuMat gimg_gray;
gpu::cvtColor(gimg_src,gimg_gray,CV_BGR2GRAY);
QueryPerformanceCounter(&freq);
QPart6 = freq.QuadPart;
dfMinus = (double)(QPart6 - QPart1);
dfTim = 1000 * dfMinus / dfFreq;
printf("GPU RGB2GRAY running time is %.2f ms
",dfTim);
// PART4~CPU code(again)~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
// gpuimage From color image to grayscale image.
QueryPerformanceCounter(&freq);
QPart1 = freq.QuadPart;
Mat img_gray2;
cvtColor(img_src,img_gray2,CV_BGR2GRAY);
BOOL i_test=QueryPerformanceCounter(&freq);
printf("%d
",i_test);
QPart6 = freq.QuadPart;
dfMinus = (double)(QPart6 - QPart1);
dfTim = 1000 * dfMinus / dfFreq;
printf("CPU RGB2GRAY running time is %.2f ms
",dfTim);
cvWaitKey();
getchar();
return 0;
}
See Question&Answers more detail:os