From owner-FreeBSD-users-jp@jp.freebsd.org  Sun Mar 10 10:45:23 1996
Received: by mail.jp.freebsd.org (8.7.3+2.6Wbeta5/8.7.3) id KAA20208
	Sun, 10 Mar 1996 10:45:23 +0900 (JST)
Received: by mail.jp.freebsd.org (8.7.3+2.6Wbeta5/8.7.3) with ESMTP id KAA20203
	for <FreeBSD-users-jp@jp.freebsd.org>; Sun, 10 Mar 1996 10:45:21 +0900 (JST)
From: Hiroshi Murakami <hiroshi@necom830.hpcl.titech.ac.jp>
Message-Id: <199603100137.KAA03540@necom830.hpcl.titech.ac.jp>
Received: by necom830.hpcl.titech.ac.jp (8.6.11/TM2.1)
	id KAA03540; Sun, 10 Mar 1996 10:36:59 +0859
To: FreeBSD-users-jp@jp.freebsd.org
Date: Sun, 10 Mar 96 10:36:58 JST
In-Reply-To: <9603051253.AA08927@n128>; from "Takashi Saeki" at Mar 5, 96 9:52 pm
X-Mailer: ELM [version 2.3 PL11]
Reply-To: FreeBSD-users-jp@jp.freebsd.org
X-Distribute: distribute [version 2.1 (Alpha) patchlevel=19]
X-Sequence: FreeBSD-users-jp 667
Subject: [FreeBSD-users-jp 667] a benchmark of memory-read bandwidth
Errors-To: owner-FreeBSD-users-jp@jp.freebsd.org
Sender: owner-FreeBSD-users-jp@jp.freebsd.org


$@$3$l$O(B, 
	$@MWAG?t(BN$@$NG\@:EY$NG[Ns$NFbMF$N9g7W$NCM$r5a$a$k(B
$@$3$H$K$h$j(B, $@G[Ns$rO"B3E*$KFI$_$@$7$GgS$a$F$$$/>l9g$N(B
$@%G!<%?E>Aw%l!<%H$r7W$k%Y%s%A%^!<%/$G$9(B. 
N $@$O(B 120*(2$@$N%Y%->h(B) $@$NCM$rA*$s$G$$$^$9(B.
N $@$NBg$-$5$K$h$C$F(B,$@G[Ns$,FbIt%-%c%C%7%e$K<}$^$k>l9g(B,
$@Fs<!%-%c%C%7%e$K<}$^$k>l9g(B,$@%-%c%C%7%e$+$i$O$_=P$F$7$^$&>l9g$N(B
$@$G(B,$@@-G=$KJQ2=$,$_$i$l$^$9(B. 

$@<j85$K$O(B Pentium 100MHz $@$N%7%9%F%`$H(B, $@K?=j$K(B Pentium Pro 150MHz
$@$N%^%7%s$,$"$k$@$1$G$9(B. $@$3$N%Y%s%A%^!<%/$G$O$b$m$K3F3,AX$N(B
$@%a%b%j!<$NFI$_$@$7@-G=$r$_$k$3$H$K$J$j$^$9$N$G(B, $@0[$J$k(B CPU $@$d(B
$@0[$J$k<~JU2sO)$N%7%9%F%`$N@-G=$K$O6=L#$,$"$j$^$9(B.

---- $@$A$g$C$H:$$kOC(B ---
Pentium Pro 150MHz $@$G$O(B, gcc-i2.6.3 $@$N@8@.$7$?%P%$%J%j$N<B9TG=N($,(B
gcc-i2.7.0 $@$N@8@.$7$?%P%$%J%j$KHf$Y$F(B,$@$+$J$j0-$/$J$C$F$$$^$9(B.
$@$=$3$G0lC6APJ}$N%3%s%Q%$%i$G%"%;%s%V%i=PNO$r=P$7$F(B, diff $@$K$h$j(B
$@Hf$Y$F$_$k$H(B, gcc-i2.6.3 $@$N=P$7$?%3!<%I$K(B
	.align 2,0x90 
$@$H$$$&9T$,(B5$@8D=jDxM>7W$KF~$C$F$$$k$@$1$,0c$&$H$$$&$3$H$,H=L@$7$^$7$?(B.
$@$3$l$O(B loop$@$N(B $@Ht@h%i%Y%k$r6v?tHVCO$K$9$k0Y$N(BNOP$@$G$O$J$$$G$7$g$&$+(B??
$@$=$l$@$1$N0c$$$G@-G=$,H>J,6a$/$KDc2<$9$k$H$$$&(BPentium Pro $@$K$O:$$C$?$b$N$G$9(B.


================ P5-100 $@$K$h$k7WB,Nc(B ========================

------ gcc-i2.6.3 $@$N@8@.$7$?%P%$%J%j$K$h$k<B9TNc(B ----
p5-100% s-i2.6.3.out
N=    120, size=   0.94KB, cpu= 5.20, load= 4.8e+08, MB/Sec= 739.1
N=    240, size=   1.88KB, cpu= 5.16, load= 4.8e+08, MB/Sec= 744.7
N=    480, size=   3.75KB, cpu= 5.05, load= 4.8e+08, MB/Sec= 759.7
N=    960, size=   7.50KB, cpu= 5.37, load= 4.8e+08, MB/Sec= 715.5
N=   1920, size=  15.00KB, cpu=25.54, load= 4.8e+08, MB/Sec= 150.4
N=   3840, size=  30.00KB, cpu=31.09, load= 4.8e+08, MB/Sec= 123.5
N=   7680, size=  60.00KB, cpu=32.48, load= 4.8e+08, MB/Sec= 118.2
N=  15360, size= 120.00KB, cpu=37.38, load= 4.8e+08, MB/Sec= 102.7
N=  30720, size= 240.00KB, cpu=37.94, load= 4.8e+08, MB/Sec= 101.2
N=  61440, size= 480.00KB, cpu=43.44, load= 4.8e+08, MB/Sec=  88.4
N= 122880, size= 960.00KB, cpu=46.62, load= 4.8e+08, MB/Sec=  82.4
N= 245760, size=1920.00KB, cpu=47.08, load= 4.8e+08, MB/Sec=  81.6
N= 491520, size=3840.00KB, cpu=47.09, load= 4.8e+08, MB/Sec=  81.5
N= 983040, size=7680.00KB, cpu=47.07, load= 4.8e+08, MB/Sec=  81.5
N=1966080, size=15360.00KB,cpu=47.16, load= 4.8e+08, MB/Sec=  81.4

------ gcc-i2.7.0 $@$N@8@.$7$?%P%$%J%j$K$h$k<B9TNc(B ----
%p5-100% s-i2.7.0.out
N=    120, size=   0.94KB, cpu= 5.20, load= 4.8e+08, MB/Sec= 739.1
N=    240, size=   1.88KB, cpu= 5.15, load= 4.8e+08, MB/Sec= 745.9
N=    480, size=   3.75KB, cpu= 5.05, load= 4.8e+08, MB/Sec= 760.9
N=    960, size=   7.50KB, cpu= 5.36, load= 4.8e+08, MB/Sec= 716.5
N=   1920, size=  15.00KB, cpu=25.53, load= 4.8e+08, MB/Sec= 150.4
N=   3840, size=  30.00KB, cpu=25.54, load= 4.8e+08, MB/Sec= 150.4
N=   7680, size=  60.00KB, cpu=31.16, load= 4.8e+08, MB/Sec= 123.2
N=  15360, size= 120.00KB, cpu=33.22, load= 4.8e+08, MB/Sec= 115.6
N=  30720, size= 240.00KB, cpu=37.64, load= 4.8e+08, MB/Sec= 102.0
N=  61440, size= 480.00KB, cpu=43.48, load= 4.8e+08, MB/Sec=  88.3
N= 122880, size= 960.00KB, cpu=46.51, load= 4.8e+08, MB/Sec=  82.6
N= 245760, size=1920.00KB, cpu=47.09, load= 4.8e+08, MB/Sec=  81.5
N= 491520, size=3840.00KB, cpu=47.08, load= 4.8e+08, MB/Sec=  81.5
N= 983040, size=7680.00KB, cpu=47.03, load= 4.8e+08, MB/Sec=  81.6
N=1966080, size=15360.00KB,cpu=47.08, load= 4.8e+08, MB/Sec=  81.5

================ P6-150 $@$K$h$k7WB,Nc(B ========================

------ gcc-i2.6.3 $@$N@8@.$7$?%P%$%J%j$K$h$k<B9TNc(B ----
p6-150% s-i2.6.3.out
N=    120, size=   0.94KB, cpu= 5.66, load= 4.8e+08, MB/Sec= 678.9
N=    240, size=   1.88KB, cpu= 5.70, load= 4.8e+08, MB/Sec= 673.3
N=    480, size=   3.75KB, cpu= 5.70, load= 4.8e+08, MB/Sec= 674.2
N=    960, size=   7.50KB, cpu= 5.73, load= 4.8e+08, MB/Sec= 669.6
N=   1920, size=  15.00KB, cpu= 8.05, load= 4.8e+08, MB/Sec= 476.7
N=   3840, size=  30.00KB, cpu= 8.05, load= 4.8e+08, MB/Sec= 477.2
N=   7680, size=  60.00KB, cpu= 8.05, load= 4.8e+08, MB/Sec= 477.2
N=  15360, size= 120.00KB, cpu= 8.09, load= 4.8e+08, MB/Sec= 474.9
N=  30720, size= 240.00KB, cpu=17.79, load= 4.8e+08, MB/Sec= 215.9
N=  61440, size= 480.00KB, cpu=31.33, load= 4.8e+08, MB/Sec= 122.6
N= 122880, size= 960.00KB, cpu=33.43, load= 4.8e+08, MB/Sec= 114.9
N= 245760, size=1920.00KB, cpu=33.38, load= 4.8e+08, MB/Sec= 115.0
N= 491520, size=3840.00KB, cpu=33.40, load= 4.8e+08, MB/Sec= 114.9
N= 983040, size=7680.00KB, cpu=33.40, load= 4.8e+08, MB/Sec= 114.9
N=1966080, size=15360.00KB,cpu=33.07, load= 4.8e+08, MB/Sec= 116.0

------ gcc-i2.7.0 $@$N@8@.$7$?%P%$%J%j$K$h$k<B9TNc(B ----
p6-150% s-i2.7.0.out
N=    120, size=   0.94KB, cpu= 3.42, load= 4.8e+08, MB/Sec=1122.2
N=    240, size=   1.88KB, cpu= 3.41, load= 4.8e+08, MB/Sec=1127.3
N=    480, size=   3.75KB, cpu= 3.40, load= 4.8e+08, MB/Sec=1129.9
N=    960, size=   7.50KB, cpu= 3.45, load= 4.8e+08, MB/Sec=1114.6
N=   1920, size=  15.00KB, cpu= 8.05, load= 4.8e+08, MB/Sec= 476.7
N=   3840, size=  30.00KB, cpu= 8.04, load= 4.8e+08, MB/Sec= 477.7
N=   7680, size=  60.00KB, cpu= 8.04, load= 4.8e+08, MB/Sec= 477.7
N=  15360, size= 120.00KB, cpu= 8.05, load= 4.8e+08, MB/Sec= 477.2
N=  30720, size= 240.00KB, cpu=20.72, load= 4.8e+08, MB/Sec= 185.3
N=  61440, size= 480.00KB, cpu=33.40, load= 4.8e+08, MB/Sec= 115.0
N= 122880, size= 960.00KB, cpu=33.38, load= 4.8e+08, MB/Sec= 115.0
N= 245760, size=1920.00KB, cpu=33.40, load= 4.8e+08, MB/Sec= 115.0
N= 491520, size=3840.00KB, cpu=33.38, load= 4.8e+08, MB/Sec= 115.0
N= 983040, size=7680.00KB, cpu=33.38, load= 4.8e+08, MB/Sec= 115.0
N=1966080, size=15360.00KB,cpu=33.35, load= 4.8e+08, MB/Sec= 115.1


======================== $@<B:]$N%=!<%9%3!<%I(B ==================
/*
	This code performes good on data load for P5.
	On P5, compile as:
	% gcc-i2.6.3 -O3 -mpentium thisfile.c
	% gcc-i2.7.0 -O3 -mpentium thisfile.c
*/

const char *Version="$Date: 1995/12/28 11:44:15 $";

#define MEGA    (1024*1024)
#define KILO 	1024
#define ALIGN 	(256*KILO)
#define REAL double

REAL *a;

main()
{
int N,LOOP;
int i,r;
float second(),t1,t2,load;
REAL t;
REAL s0,s1,s2,s3,s4;

	printf("s-bench: Version: %s\n",Version);
	a=(REAL*)malloc(sizeof(REAL)*2*MEGA+ALIGN-1);
	a=(REAL*)((((unsigned)(char*)a)+ALIGN-1)/ALIGN*ALIGN);
	print1addr(sizeof(REAL)==sizeof(double),a);

	for(N=120;N<=2*MEGA;N*=2)
	{
		LOOP=480000000/N;

		for(i=0;i<N;i++) {
			a[i]=1.0; 
		}
		sleep(1);
		s0=0.0; s1=0.0; s2=0.0; s3=0.0; s4=0.0;

    		t1=second();
		for(r=0;r<LOOP;r++) {
			for(i=0;i<N;i+=120) {
	s0+=a[i+ 0];s1+=a[i+ 1];s2+=a[i+ 2];s3+=a[i+ 3];s4+=a[i+ 4];
	s0+=a[i+ 5];s1+=a[i+ 6];s2+=a[i+ 7];s3+=a[i+ 8];s4+=a[i+ 9];
	s0+=a[i+10];s1+=a[i+11];s2+=a[i+12];s3+=a[i+13];s4+=a[i+14];
	s0+=a[i+15];s1+=a[i+16];s2+=a[i+17];s3+=a[i+18];s4+=a[i+19];
	s0+=a[i+20];s1+=a[i+21];s2+=a[i+22];s3+=a[i+23];s4+=a[i+24];
	s0+=a[i+25];s1+=a[i+26];s2+=a[i+27];s3+=a[i+28];s4+=a[i+29];
	s0+=a[i+30];s1+=a[i+31];s2+=a[i+32];s3+=a[i+33];s4+=a[i+34];
	s0+=a[i+35];s1+=a[i+36];s2+=a[i+37];s3+=a[i+38];s4+=a[i+39];
	s0+=a[i+40];s1+=a[i+41];s2+=a[i+42];s3+=a[i+43];s4+=a[i+44];
	s0+=a[i+45];s1+=a[i+46];s2+=a[i+47];s3+=a[i+48];s4+=a[i+49];
	s0+=a[i+50];s1+=a[i+51];s2+=a[i+52];s3+=a[i+53];s4+=a[i+54];
	s0+=a[i+55];s1+=a[i+56];s2+=a[i+57];s3+=a[i+58];s4+=a[i+59];

	s0+=a[i+60];s1+=a[i+61];s2+=a[i+62];s3+=a[i+63];s4+=a[i+64];
	s0+=a[i+65];s1+=a[i+66];s2+=a[i+67];s3+=a[i+68];s4+=a[i+69];
	s0+=a[i+70];s1+=a[i+71];s2+=a[i+72];s3+=a[i+73];s4+=a[i+74];
	s0+=a[i+75];s1+=a[i+76];s2+=a[i+77];s3+=a[i+78];s4+=a[i+79];
	s0+=a[i+80];s1+=a[i+81];s2+=a[i+82];s3+=a[i+83];s4+=a[i+84];
	s0+=a[i+85];s1+=a[i+86];s2+=a[i+87];s3+=a[i+88];s4+=a[i+89];
	s0+=a[i+90];s1+=a[i+91];s2+=a[i+92];s3+=a[i+93];s4+=a[i+94];
	s0+=a[i+95];s1+=a[i+96];s2+=a[i+97];s3+=a[i+98];s4+=a[i+99];
	s0+=a[i+100];s1+=a[i+101];s2+=a[i+102];s3+=a[i+103];s4+=a[i+104];
	s0+=a[i+105];s1+=a[i+106];s2+=a[i+107];s3+=a[i+108];s4+=a[i+109];
	s0+=a[i+110];s1+=a[i+111];s2+=a[i+112];s3+=a[i+113];s4+=a[i+114];
	s0+=a[i+115];s1+=a[i+116];s2+=a[i+117];s3+=a[i+118];s4+=a[i+119];
			}
		}
    		t2=second();
    		load = N*LOOP;
    		printf("N=%7d, size=%7.2fKB, cpu=%4.2f, load=%8.1e, MB/Sec=%6.1f\n",
			N, N*sizeof(REAL)/1024.0, t2-t1, load,
			load*sizeof(REAL)/(t2-t1)*1.0e-6
		);
    		t=s0+s1+s2+s3+s4;
    		printf("t=%f\n",t);
	}
    	free(a);
    	exit(0);
}

float second(void)
{
#include <time.h>
	return ((float)((float)clock()/(float)CLOCKS_PER_SEC));
}

print1addr(check,a) int check; void *a;
{
	printf("Octal-address: vec-a:%8o\n", (unsigned)a);
	if(check) {
		if((int)a%8!=0) printf("Warning! a not aligned.\n");
	}
}

========================== $@%3%s%Q%$%k4D6-(B  ===============================

% gcc-i2.6.3 -O3 -mpentium s-bench.5.c -o s-i2.6.3.out # gcc-i2.6.3 $@$N%P%$%J%j(B

% gcc-i2.7.0 -O3 -mpentium s-bench.5.c -o s-i2.7.0.out # gcc-i2.7.0 $@$N%P%$%J%j(B

