From owner-FreeBSD-tech-jp@jp.freebsd.org  Sat Dec  4 21:08:33 1999
Received: (from daemon@localhost)
	by castle.jp.freebsd.org (8.9.3+3.2W/8.7.3) id VAA65742;
	Sat, 4 Dec 1999 21:08:33 +0900 (JST)
	(envelope-from owner-FreeBSD-tech-jp@jp.FreeBSD.org)
Received: from jirass.chino.it.okayama-u.ac.jp (chino-gw.it.okayama-u.ac.jp [150.46.1.3])
	by castle.jp.freebsd.org (8.9.3+3.2W/8.7.3) with ESMTP id VAA65737
	for <FreeBSD-tech-jp@jp.freebsd.org>; Sat, 4 Dec 1999 21:08:33 +0900 (JST)
	(envelope-from kato@chino.it.okayama-u.ac.jp)
Received: from kanegon.chino.it.okayama-u.ac.jp.chino.it.okayama-u.ac.jp (kanegon [150.46.4.34])
	by jirass.chino.it.okayama-u.ac.jp (8.8.7/3.6W) with ESMTP id VAA11132;
	Sat, 4 Dec 1999 21:08:19 +0900 (JST)
Date: Sat, 04 Dec 1999 21:08:18 +0900
Message-ID: <14409.1074.351813.68187Y@kanegon.chino.it.okayama-u.ac.jp>
From: Takekazu KATO <kato@chino.it.okayama-u.ac.jp>
To: FreeBSD-tech-jp@jp.freebsd.org
In-Reply-To: In your message of "Tue, 30 Nov 1999 20:26:47 +0900"
	<14403.46199.390713.29321F@kanegon.chino.it.okayama-u.ac.jp>
References: <14403.8580.310491.72159A@kanegon.chino.it.okayama-u.ac.jp>
	<199911300146.KAA18944@afs.ntc.mita.keio.ac.jp>
	<14403.22780.635961.98255P@tkc.att.ne.jp>
	<14403.28875.928937.72159A@tkc.att.ne.jp>
	<14403.46199.390713.29321F@kanegon.chino.it.okayama-u.ac.jp>
User-Agent: Wanderlust/2.2.2 (You Could Be Mine) SEMI/1.13.6 (Komatsu) FLIM/1.13.2 (Kasanui) Emacs/20.3 (i386-unknown-freebsd2.2.5) MULE/4.0 (HANANOEN)
Organization: Okayama Univ.
MIME-Version: 1.0 (generated by SEMI 1.13.6 - "Komatsu")
Content-Type: multipart/mixed;
 boundary="Multipart_Sat_Dec__4_21:08:18_1999-1"
Reply-To: FreeBSD-tech-jp@jp.freebsd.org
Precedence: list
X-Distribute: distribute version 2.1 (Alpha) patchlevel 24e+990727
X-Sequence: FreeBSD-tech-jp 2642
Subject: [FreeBSD-tech-jp 2642] Re: SSE on FreeBSD
Errors-To: owner-FreeBSD-tech-jp@jp.freebsd.org
Sender: owner-FreeBSD-tech-jp@jp.freebsd.org
X-Originator: kato@chino.it.okayama-u.ac.jp

--Multipart_Sat_Dec__4_21:08:18_1999-1
Content-Type: text/plain; charset=ISO-2022-JP

$B2CF#!w2,;3Bg$G$9!%(B

$BESCf7P2a$H!$<ALd(B($B$H$$$&$+AjCL!)(B)$B$G$9!%(B

$B$H$j$"$($:!$(BCPUID.XMM $B$r%A%'%C%/$7$F(B CR4.OSFXSR $B$rM-8z$K$9$k$^$G$O$G$-(B
$B$^$7$?!%(B

$B$3$l$G!$%7%s%0%k%W%m%;%C%5$G!$(BSSE $B$r;H$&%W%m%;%9$,0l$D$@$1$J$iF0$/$h$&(B
$B$K$J$j$^$7$?!%(B
$B:G8e$K(B patch $B$rIU$1$F$*$-$^$9$N$G!$$*$+$7$JE@$,$"$l$P;XE&$7$F$$$?$@$1(B
$B$k$H9,$$$G$9!%(BFreeBSD-3.3 $B$KBP$9$k(B patch $B$G$9!%$3$N(B patch $B$rEv$F$F!$(B
options SSE $B$r2C$($F(B config, make $B$9$l$P$$$$$h$&$K$J$C$F$$$^$9!%(B


$B$^$:!$0JA0$K$bEj9F$7$?0J2<$N%3!<%I$,F0$/$3$H$,3NG'$G$-$^$7$?!%(B

(XMM $B%l%8%9%?$r;H$C$FC1$K%3%T!<$9$k$@$1$N%3!<%I$G$9!%(B)
--- sse_test.c ---
#include<stdio.h>

int main()
{
  float a[4]={0.0,1.0,2.0,3.0};
  float b[4];
  int i;
  asm(
      "movl %0,%%eax\n"
      //MOVAPS (%%eax),%%xmm0
      ".byte 0x0f\n"
      ".byte 0x28\n"
      ".byte 0x00\n"
      
      "movl %1,%%eax\n"
      
      //MOVAPS %%xmm0,(%%eax)
      ".byte 0x0f\n"
      ".byte 0x29\n"
      ".byte 0x00\n"
      :
      :"g"(a),"g"(b)
      :"eax"
      );
  for(i=0;i<4;i++)
    printf(" %f",b[i]);
  printf("\n");
  return 0;
}
-------

($B<B9T7k2L(B)
> ./sse_test
 0.000000 1.000000 2.000000 3.000000


$B$^$?!$$3$l$@$1$@$H$J$s$J$N$G!$8a8e$N$3!A$@$rF0$+$7$F%9%T!<%I$rHf3S$7$F(B
$B$_$^$7$?!%(B
4$BJ,(B30$BIC$[$I$N(B wav $B%U%!%$%k$r?4M}2;6A%b!<%IL58z$G!$(BSSE $B$r;H$o$J$$>l9g$H(B
$B;H$&>l9g$G$=$l$>$l(B encode $B$7$?7k2L$G$9!%(B
$B$A$J$_$K(B CPU $B$O(B PentiumIII 600MHz $B$G$9!%(B

(SSE $B$r;H$o$J$$>l9g(B)
> gogo -off sse -nopsy  hirose1.wav hirose1-1.mp3 
$B8a8e$N$3!A$@(B ver. 2.11 (Oct 22 1999)
Copyright (C) 1999 PEN@MarineCat and shigeo
          Special thanks to Keiichi SAKAI and URURI
MPEG 1, layer 3 $B%8%g%$%s%H%9%F%l%*(B
$BF~NO<~GH?t(B=44.1kHz $B=PNO<~GH?t(B=44.1kHz $B%S%C%H%l!<%H(B=128kbps
$BF~NO%U%!%$%k(B `hirose1.wav'
$B=PNO%U%!%$%k(B `hirose1-1.mp3'
{  10367/  10368} 100.0% (x 0.07)  re:[00:00:00] to:[00:00:18]
$B%(%s%3!<%I=*N;(B
$B7P2a;~4V(B=  18.32sec


(SSE $B$r;H$C$?>l9g(B)
>gogo -nopsy  hirose1.wav hirose1-2.mp3
$B8a8e$N$3!A$@(B ver. 2.11 (Oct 22 1999)
Copyright (C) 1999 PEN@MarineCat and shigeo
          Special thanks to Keiichi SAKAI and URURI
MPEG 1, layer 3 $B%8%g%$%s%H%9%F%l%*(B
$BF~NO<~GH?t(B=44.1kHz $B=PNO<~GH?t(B=44.1kHz $B%S%C%H%l!<%H(B=128kbps
$BF~NO%U%!%$%k(B `hirose1.wav'
$B=PNO%U%!%$%k(B `hirose1-2.mp3'
{  10367/  10368} 100.0% (x 0.04)  re:[00:00:00] to:[00:00:11]
$B%(%s%3!<%I=*N;(B
$B7P2a;~4V(B=  11.73sec

18.32sec -> 11.73sec $B$G(B 1.56 $BG\$/$i$$$N=hM}B.EY$K$J$j$^$7$?!%(B



$B$"$H$O!$%l%8%9%?$NB`Hr!$I|5l$G$9$,!$4pK\E*$K$O(B fnsave/frstor $B$r(B
fxsave/fxrstor $B$KJQ99$9$l$P$$$$$H$$$&$N$OJ,$C$?$s$G$9$,!$$$$/$D$+LdBj(B
$B$,$"$j$^$9!%(B

1. fxsave/fxrstor $B$,(B as $B$G%5%]!<%H$5$l$F$$$J$$!%(B

2. $BB`Hr$9$kNN0h$N9=B$!$%5%$%:$,JQ$k$N$G!$(B/usr/include/machine/pcb.h $B$G(B
   $BDj5A$5$l$F$$$k(B pcb$B9=B$BN$N%a%s%P$G$"$k(B(save87$B9=B$BN$N(B)pcb_savefpu$B$K(B
   $BBe$o$k?7$?$JB`HrNN0h$,I,MW$H$J$k!%$G!$$3$l$rJQ99$9$k$H$J$k$H!$$+$J(B
   $B$j1F6A$,Bg$-$/$J$j$=$&!%(B


1. $B$K4X$7$F$O!$$^$?(B .byte $B$G%^%7%s%3!<%I$rD>$K=q$/$H$7$F!$(B2. $B$K4X$7$F(B
$BG:$s$G$$$^$9!%$G$-$k$@$1>/$J$$JQ99$GF0$+$9$$$$J}K!$O$J$$$G$7$g$&$+!)(B
# $B$C$F$$$&$+$I$3$^$G1F6A$9$k$N$+$$$^$$$AM}2r$7$-$l$F$$$J$$!%(B

struct pcb $B$H$+(B struct save87 $B$H$+>!<j$KJQ99$7$A$c$C$F$$$$$s$G$7$g$&$+!)(B
$BB>$K$J$K$+1F6A$7$^$;$s$+!)(B

---
Takekazu KATO
Intelligent System Lab.,
Dept. of Information Technology, Fac. of Engineering, Okayama University.
mailto:kato@chino.it.okayama-u.ac.jp
http://www.chino.it.okayama-u.ac.jp/~kato/


--Multipart_Sat_Dec__4_21:08:18_1999-1
Content-Type: application/octet-stream; type=patch
Content-Disposition: attachment; filename="SSE.diff"
Content-Transfer-Encoding: 7bit

diff -cur i386.orig/conf/options.i386 i386/conf/options.i386
--- i386.orig/conf/options.i386	Sat Dec  4 19:50:10 1999
+++ i386/conf/options.i386	Sat Dec  4 19:58:36 1999
@@ -7,6 +7,7 @@
 GPL_MATH_EMULATE	opt_math_emulate.h
 PMAP_SHPGPERPROC	opt_pmap.h
 VM86			opt_vm86.h
+SSE                     opt_sse.h
 
 IBCS2			opt_dontuse.h
 COMPAT_LINUX		opt_dontuse.h
diff -cur i386.orig/i386/identcpu.c i386/i386/identcpu.c
--- i386.orig/i386/identcpu.c	Sat Dec  4 19:50:04 1999
+++ i386/i386/identcpu.c	Sat Dec  4 19:58:29 1999
@@ -577,7 +577,7 @@
 			"\027<b22>"
 			"\030MMX"
 			"\031FXSR"
-			"\032<b25>"
+			"\032XMM"
 			"\033<b26>"
 			"\034<b27>"
 			"\035<b28>"
diff -cur i386.orig/i386/locore.s i386/i386/locore.s
--- i386.orig/i386/locore.s	Sat Dec  4 19:50:04 1999
+++ i386/i386/locore.s	Sat Dec  4 19:58:29 1999
@@ -49,6 +49,7 @@
 #include "opt_nfsroot.h"
 #include "opt_userconfig.h"
 #include "opt_vm86.h"
+#include "opt_sse.h"
 
 #include <sys/syscall.h>
 #include <sys/reboot.h>
@@ -332,6 +333,18 @@
 	movl	%eax, %cr4
 1:
 #endif /* VM86 */
+
+#ifdef SSE
+/*
+ * If the CPU has support for SSE, turn it on.
+ */ 
+	testl	$CPUID_XMM, R(_cpu_feature)
+	jz	1f
+	movl	%cr4, %eax
+	orl	$CR4_OSFXSR, %eax
+	movl	%eax, %cr4
+1:
+#endif
 
 #ifdef BDE_DEBUGGER
 /*
diff -cur i386.orig/i386/mpboot.s i386/i386/mpboot.s
--- i386.orig/i386/mpboot.s	Sat Dec  4 19:50:03 1999
+++ i386/i386/mpboot.s	Sat Dec  4 19:58:30 1999
@@ -35,6 +35,7 @@
  */
 
 #include "opt_vm86.h"
+#include "opt_sse.h"
 
 #include <machine/asmacros.h>		/* miscellaneous asm macros */
 #include <machine/apic.h>
@@ -102,6 +103,18 @@
 	jz	1f
 	movl	%cr4, %eax
 	orl	$CR4_VME, %eax
+	movl	%eax, %cr4
+1:
+#endif
+
+#ifdef SSE
+/*
+ * If the CPU has support for SSE, turn it on.
+ */ 
+	testl	$CPUID_XMM, _cpu_feature
+	jz	1f
+	movl	%cr4, %eax
+	orl	$CR4_OSFXSR, %eax
 	movl	%eax, %cr4
 1:
 #endif
diff -cur i386.orig/include/specialreg.h i386/include/specialreg.h
--- i386.orig/include/specialreg.h	Sat Dec  4 19:50:15 1999
+++ i386/include/specialreg.h	Sat Dec  4 19:58:43 1999
@@ -71,6 +71,8 @@
 #define	CR4_MCE	0x00000040	/* Machine check enable */
 #define	CR4_PGE	0x00000080	/* Page global enable */
 #define	CR4_PCE	0x00000100	/* Performance monitoring counter enable */
+#define CR4_OSFXSR     0x00000200 /* SSE support */
+#define CR4_OSXMMEXCPT 0x00000400 /* SSE exception */
 
 /*
  * CPUID instruction features register
@@ -92,6 +94,8 @@
 #define	CPUID_MCA	0x4000
 #define	CPUID_CMOV	0x8000
 
+#define CPUID_FXSR  0x01000000
+#define CPUID_XMM   0x02000000
 /*
  * Model-specific registers for the i386 family
  */

--Multipart_Sat_Dec__4_21:08:18_1999-1--
