C# vs C++ 全局照明渲染性能比试

作者：网络转载发布时间：[ 2015/2/10 13:41:35 ] 推荐标签：C++ C# 软件开发

　　近有多篇讨论程序语言趋势的博文，其中谈及到C#的性能问题。本人之前未做过相关测试，自己的回覆流于理论猜测，所以花了点时间做个简单实验，比较C#和C++的性能。
　　实验内容
　　赵姐夫在此回覆认为，C#比C/C++慢，主要在于.Net平台的垃圾回收(garbage collection， GC)机制。若是计算密集型应用，C#和C++产生的原生代码，速度应该相差不大。我对此半信半疑。想到之前看过一个用99行C++代码实现的全局照明(global illumination， GI)渲染程序smallpt ，是纯计算密集的。而且在运算期间，若用C#实现，基本上连GC都可以不用。因此，把该99行代码移植至C#。
　　此渲染器的一些特点如下:
　　使用蒙地卡罗路径追踪(Monte Carlo path-tracing)来产生全局照明效果
　　支持三种双向反射分布函数(bidirectional reflectance distribution function， BRDF): 镜射(specular)、漫射(diffuse)和玻璃(即纯折射的介质)
　　从漫射光源产生柔和阴影(soft shadow)
　　使用2x2超采样(super-sampling)去实现反锯齿
　　使用OpenMP作并行运算，充份利用多核性能
　　当中的术语及技术，之后可能会于图形学博文系列里探讨。本文主要以性能为题。
　　C++版本
　　以下是C++版本代码，作了些许修改。修改地方加上了MILO注译。
#include <math.h>   // smallpt， a Path Tracer by Kevin Beason， 2008
#include <stdlib.h> // Make : g++ -O3 -fopenmp smallpt.cpp -o smallpt
#include <stdio.h> //        Remove "-fopenmp" for g++ version < 4.2
#include <time.h>     // MILO
#include "erand48.inc" // MILO
#define M_PI 3.141592653589793238462643 // MILO
struct Vec {        // Usage: time ./smallpt 5000 && xv image.ppm
double x， y， z;                  // position， also color (r，g，b)
Vec(double x_=0， double y_=0， double z_=0){ x=x_; y=y_; z=z_; }
Vec operator+(const Vec &b) const { return Vec(x+b.x，y+b.y，z+b.z); }
Vec operator-(const Vec &b) const { return Vec(x-b.x，y-b.y，z-b.z); }
Vec operator*(double b) const { return Vec(x*b，y*b，z*b); }
Vec mult(const Vec &b) const { return Vec(x*b.x，y*b.y，z*b.z); }
Vec& norm(){ return *this = *this * (1/sqrt(x*x+y*y+z*z)); }
double dot(const Vec &b) const { return x*b.x+y*b.y+z*b.z; } // cross:
Vec operator%(const Vec &b){return Vec(y*b.z-z*b.y，z*b.x-x*b.z，x*b.y-y*b.x);}
};
struct Ray { Vec o， d; Ray(const Vec &o_， const Vec &d_) : o(o_)， d(d_) {} };
enum Refl_t { DIFF， SPEC， REFR }; // material types， used in radiance()
struct Sphere {
double rad;       // radius
Vec p， e， c;      // position， emission， color
Refl_t refl;      // reflection type (DIFFuse， SPECular， REFRactive)
Sphere(double rad_， Vec p_， Vec e_， Vec c_， Refl_t refl_):
rad(rad_)， p(p_)， e(e_)， c(c_)， refl(refl_) {}
double intersect(const Ray &r) const { // returns distance， 0 if nohit
Vec op = p-r.o; // Solve t^2*d.d + 2*t*(o-p).d + (o-p).(o-p)-R^2 = 0
double t， eps=1e-4， b=op.dot(r.d)， det=b*b-op.dot(op)+rad*rad;
if (det<0) return 0; else det=sqrt(det);
return (t=b-det)>eps ? t : ((t=b+det)>eps ? t : 0);
}
};
Sphere spheres[] = {//Scene: radius， position， emission， color， material
Sphere(1e5， Vec( 1e5+1，40.8，81.6)， Vec()，Vec(.75，.25，.25)，DIFF)，//Left
Sphere(1e5， Vec(-1e5+99，40.8，81.6)，Vec()，Vec(.25，.25，.75)，DIFF)，//Rght
Sphere(1e5， Vec(50，40.8， 1e5)，     Vec()，Vec(.75，.75，.75)，DIFF)，//Back
Sphere(1e5， Vec(50，40.8，-1e5+170)， Vec()，Vec()，           DIFF)，//Frnt
Sphere(1e5， Vec(50， 1e5， 81.6)，    Vec()，Vec(.75，.75，.75)，DIFF)，//Botm
Sphere(1e5， Vec(50，-1e5+81.6，81.6)，Vec()，Vec(.75，.75，.75)，DIFF)，//Top
Sphere(16.5，Vec(27，16.5，47)，       Vec()，Vec(1，1，1)*.999， SPEC)，//Mirr
Sphere(16.5，Vec(73，16.5，78)，       Vec()，Vec(1，1，1)*.999， REFR)，//Glas
Sphere(600， Vec(50，681.6-.27，81.6)，Vec(12，12，12)， Vec()， DIFF) //Lite
};
inline double clamp(double x){ return x<0 ? 0 : x>1 ? 1 : x; }
inline int toInt(double x){ return int(pow(clamp(x)，1/2.2)*255+.5); }
inline bool intersect(const Ray &r， double &t， int &id){
double n=sizeof(spheres)/sizeof(Sphere)， d， inf=t=1e20;
for(int i=int(n);i--;) if((d=spheres[i].intersect(r))&&d<t){t=d;id=i;}
return t<inf;
}
Vec radiance(const Ray &r， int depth， unsigned short *Xi){
double t;                               // distance to intersection
int id=0;                               // id of intersected object
if (!intersect(r， t， id)) return Vec(); // if miss， return black
const Sphere &obj = spheres[id];        // the hit object
Vec x=r.o+r.d*t， n=(x-obj.p).norm()， nl=n.dot(r.d)<0?n:n*-1， f=obj.c;
double p = f.x>f.y && f.x>f.z ? f.x : f.y>f.z ? f.y : f.z; // max refl
if (++depth>5) if (erand48(Xi)<p) f=f*(1/p); else return obj.e; //R.R.
if (depth > 100) return obj.e; // MILO
if (obj.refl == DIFF){                  // Ideal DIFFUSE reflection
double r1=2*M_PI*erand48(Xi)， r2=erand48(Xi)， r2s=sqrt(r2);
Vec w=nl， u=((fabs(w.x)>.1?Vec(0，1):Vec(1))%w).norm()， v=w%u;
Vec d = (u*cos(r1)*r2s + v*sin(r1)*r2s + w*sqrt(1-r2)).norm();
return obj.e + f.mult(radiance(Ray(x，d)，depth，Xi));
} else if (obj.refl == SPEC)            // Ideal SPECULAR reflection
return obj.e + f.mult(radiance(Ray(x，r.d-n*2*n.dot(r.d))，depth，Xi));
Ray reflRay(x， r.d-n*2*n.dot(r.d));     // Ideal dielectric REFRACTION
bool into = n.dot(nl)>0;                // Ray from outside going in?
double nc=1， nt=1.5， nnt=into?nc/nt:nt/nc， ddn=r.d.dot(nl)， cos2t;
if ((cos2t=1-nnt*nnt*(1-ddn*ddn))<0)    // Total internal reflection
return obj.e + f.mult(radiance(reflRay，depth，Xi));
Vec tdir = (r.d*nnt - n*((into?1:-1)*(ddn*nnt+sqrt(cos2t)))).norm();
double a=nt-nc， b=nt+nc， R0=a*a/(b*b)， c = 1-(into?-ddn:tdir.dot(n));
double Re=R0+(1-R0)*c*c*c*c*c，Tr=1-Re，P=.25+.5*Re，RP=Re/P，TP=Tr/(1-P);
return obj.e + f.mult(depth>2 ? (erand48(Xi)<P ?   // Russian roulette
radiance(reflRay，depth，Xi)*RP:radiance(Ray(x，tdir)，depth，Xi)*TP) :
radiance(reflRay，depth，Xi)*Re+radiance(Ray(x，tdir)，depth，Xi)*Tr);
}
int main(int argc， char *argv[]){
clock_t start = clock(); // MILO
int w=512， h=512， samps = argc==2 ? atoi(argv[1])/4 : 250; // # samples
Ray cam(Vec(50，52，295.6)， Vec(0，-0.042612，-1).norm()); // cam pos， dir
Vec cx=Vec(w*.5135/h)， cy=(cx%cam.d).norm()*.5135， r， *c=new Vec[w*h];
#pragma omp parallel for schedule(dynamic， 1) private(r)       // OpenMP
for (int y=0; y<h; y++){                       // Loop over image rows
fprintf(stderr，" Rendering (%d spp) %5.2f%%"，samps*4，100.*y/(h-1));
unsigned short Xi[3]={0，0，y*y*y}; // MILO
for (unsigned short x=0; x<w; x++)   // Loop cols
for (int sy=0， i=(h-y-1)*w+x; sy<2; sy++)     // 2x2 subpixel rows
for (int sx=0; sx<2; sx++， r=Vec()){        // 2x2 subpixel cols
for (int s=0; s<samps; s++){
double r1=2*erand48(Xi)， dx=r1<1 ? sqrt(r1)-1: 1-sqrt(2-r1);
double r2=2*erand48(Xi)， dy=r2<1 ? sqrt(r2)-1: 1-sqrt(2-r2);
Vec d = cx*( ( (sx+.5 + dx)/2 + x)/w - .5) +
cy*( ( (sy+.5 + dy)/2 + y)/h - .5) + cam.d;
r = r + radiance(Ray(cam.o+d*140，d.norm())，0，Xi)*(1./samps);
} // Camera rays are pushed ^^^^^ forward to start in interior
c[i] = c[i] + Vec(clamp(r.x)，clamp(r.y)，clamp(r.z))*.25;
}
}
printf(" %f sec "， (float)(clock() - start)/CLOCKS_PER_SEC); // MILO
FILE *f = fopen("image.ppm"， "w");         // Write image to PPM file.
fprintf(f， "P3 %d %d %d "， w， h， 255);
for (int i=0; i<w*h; i++)
fprintf(f，"%d %d %d "， toInt(c[i].x)， toInt(c[i].y)， toInt(c[i].z));
}
　　由于Visual C++没有erand48()函数，便在网上找到一个PostreSQL的实现。此外，为了比较公平，分别测试使用和禁用OpenMP情况下的性能。
　　本人亦为了显示C++特有的能力，另外作一个版本，采用微软DirectX SDK中的C++ XNA数学库进行SIMD矢量加速(XNA Game Studio也有.Net用的XNA数学库，但本文并没有使用)。由于XNA数学库采用单精度浮点数，对这个特别场景(6面墙壁其实是由6个巨大的球体组成)有超出精度范围的问题。因此，我对这版本里的场境稍作修改。又因为erand48()函数是传回双精度的随机数，多次转换比较慢，此版本换了之前博文使用的LCG实现。