diff --git a/trunk/fcl/CMakeLists.txt b/trunk/fcl/CMakeLists.txt
index 5ea53d32586b098d5e47c0cb18d7db218553b271..1e64b34acda84fa8f46b3f65d4d575f0da2bf09c 100644
--- a/trunk/fcl/CMakeLists.txt
+++ b/trunk/fcl/CMakeLists.txt
@@ -37,7 +37,7 @@ link_directories(${CCD_LIBRARY_DIRS})
 
 add_definitions(-DUSE_SVMLIGHT=0)
 
-add_library(${PROJECT_NAME} SHARED src/AABB.cpp src/OBB.cpp src/RSS.cpp src/kIOS.cpp src/vec_3f.cpp src/traversal_node_base.cpp src/traversal_node_bvhs.cpp src/intersect.cpp src/motion.cpp src/BV_fitter.cpp src/BV_splitter.cpp src/BVH_model.cpp src/BVH_utility.cpp src/transform.cpp src/simple_setup.cpp src/geometric_shapes.cpp src/geometric_shapes_utility.cpp src/geometric_shapes_intersect.cpp src/collision_node.cpp src/traversal_recurse.cpp src/broad_phase_collision.cpp src/collision.cpp src/collision_func_matrix.cpp src/interval_tree.cpp src/conservative_advancement.cpp src/matrix_3f.cpp src/interval.cpp src/interval_vector.cpp src/interval_matrix.cpp src/taylor_model.cpp src/taylor_vector.cpp src/taylor_matrix.cpp src/simd_intersect.cpp src/distance_func_matrix.cpp src/distance.cpp)
+add_library(${PROJECT_NAME} SHARED src/AABB.cpp src/OBB.cpp src/RSS.cpp src/kIOS.cpp src/vec_3f.cpp src/traversal_node_base.cpp src/traversal_node_bvhs.cpp src/intersect.cpp src/motion.cpp src/BV_fitter.cpp src/BV_splitter.cpp src/BVH_model.cpp src/BVH_utility.cpp src/transform.cpp src/simple_setup.cpp src/geometric_shapes.cpp src/geometric_shapes_utility.cpp src/geometric_shapes_intersect.cpp src/collision_node.cpp src/traversal_recurse.cpp src/broad_phase_collision.cpp src/collision.cpp src/collision_func_matrix.cpp src/interval_tree.cpp src/conservative_advancement.cpp src/matrix_3f.cpp src/interval.cpp src/interval_vector.cpp src/interval_matrix.cpp src/taylor_model.cpp src/taylor_vector.cpp src/taylor_matrix.cpp src/distance_func_matrix.cpp src/distance.cpp)
 
 target_link_libraries(${PROJECT_NAME} ${FLANN_LIBRARIES} ${CCD_LIBRARIES})
 
diff --git a/trunk/fcl/include/fcl/broad_phase_collision.h b/trunk/fcl/include/fcl/broad_phase_collision.h
index f9a424d5810618543ffdfda4618d403ec30d845b..a848866b2987c349c99f2e7f6f73b4b05df49576 100644
--- a/trunk/fcl/include/fcl/broad_phase_collision.h
+++ b/trunk/fcl/include/fcl/broad_phase_collision.h
@@ -339,6 +339,8 @@ void SpatialHashingCollisionManager<HashTable>::getObjects(std::vector<Collision
 template<typename HashTable>
 void SpatialHashingCollisionManager<HashTable>::collide(CollisionObject* obj, void* cdata, CollisionCallBack callback) const
 {
+  if(size() == 0) return;
+
   const AABB& obj_aabb = obj->getAABB();
   AABB overlap_aabb;
 
@@ -372,6 +374,8 @@ void SpatialHashingCollisionManager<HashTable>::collide(CollisionObject* obj, vo
 template<typename HashTable>
 void SpatialHashingCollisionManager<HashTable>::distance(CollisionObject* obj, void* cdata_, DistanceCallBack callback) const
 {
+  if(size() == 0) return;
+
   DistanceData* cdata = static_cast<DistanceData*>(cdata_);
   Vec3f delta = (obj->getAABB().max_ - obj->getAABB().min_) * 0.5;
   AABB aabb = obj->getAABB();
@@ -456,6 +460,8 @@ void SpatialHashingCollisionManager<HashTable>::distance(CollisionObject* obj, v
 template<typename HashTable>
 void SpatialHashingCollisionManager<HashTable>::collide(void* cdata, CollisionCallBack callback) const
 {
+  if(size() == 0) return;
+
   std::list<CollisionObject*>::const_iterator it1;
   for(it1 = objs.begin(); it1 != objs.end(); ++it1)
   {
diff --git a/trunk/fcl/include/fcl/simd_intersect.h b/trunk/fcl/include/fcl/simd_intersect.h
index fce7d65758f2ce14e54ef890e18dfe6d6e6c16fc..79c90c9186b582b3b81dafbd38636c6fd25a9f04 100644
--- a/trunk/fcl/include/fcl/simd_intersect.h
+++ b/trunk/fcl/include/fcl/simd_intersect.h
@@ -34,43 +34,208 @@
 
 /** \author Jia Pan */
 
-#ifndef FCL_SSE_INTERSECT
-#define FCL_SSE_INTERSECT
+#ifndef FCL_MULTIPLE_INTERSECT
+#define FCL_MULTIPLE_INTERSECT
 
 #include "fcl/vec_3f.h"
 
+#include <xmmintrin.h>
+#include <pmmintrin.h>
+
+
 namespace fcl
 {
 
-/** \brief four spheres versus four spheres SIMD test */
-bool four_spheres_intersect(const Vec3f& o1, BVH_REAL r1,
-                            const Vec3f& o2, BVH_REAL r2,
-                            const Vec3f& o3, BVH_REAL r3,
-                            const Vec3f& o4, BVH_REAL r4,
-                            const Vec3f& o5, BVH_REAL r5,
-                            const Vec3f& o6, BVH_REAL r6,
-                            const Vec3f& o7, BVH_REAL r7,
-                            const Vec3f& o8, BVH_REAL r8);
 
-/** \brief four spheres versus four AABBs SIMD test */
-bool four_spheres_four_AABBs_intersect(const Vec3f& o1, BVH_REAL r1,
+static inline __m128 sse_four_spheres_intersect(const Vec3f& o1, BVH_REAL r1,
+                                                const Vec3f& o2, BVH_REAL r2,
+                                                const Vec3f& o3, BVH_REAL r3,
+                                                const Vec3f& o4, BVH_REAL r4,
+                                                const Vec3f& o5, BVH_REAL r5,
+                                                const Vec3f& o6, BVH_REAL r6,
+                                                const Vec3f& o7, BVH_REAL r7,
+                                                const Vec3f& o8, BVH_REAL r8)
+{
+  __m128 PX, PY, PZ, PR, QX, QY, QZ, QR;
+  PX = _mm_set_ps(o1[0], o2[0], o3[0], o4[0]);
+  PY = _mm_set_ps(o1[1], o2[1], o3[1], o4[1]);
+  PZ = _mm_set_ps(o1[2], o2[2], o3[2], o4[2]);
+  PR = _mm_set_ps(r1, r2, r3, r4);
+  QX = _mm_set_ps(o5[0], o6[0], o7[0], o8[0]);
+  QY = _mm_set_ps(o5[1], o6[1], o7[1], o8[1]);
+  QZ = _mm_set_ps(o5[2], o6[2], o7[2], o8[2]);
+  QR = _mm_set_ps(r5, r6, r7, r8);
+
+  __m128 T1 = _mm_sub_ps(PX, QX);
+  __m128 T2 = _mm_sub_ps(PY, QY);
+  __m128 T3 = _mm_sub_ps(PZ, QZ);
+  __m128 T4 = _mm_add_ps(PR, QR);
+  T1 = _mm_mul_ps(T1, T1);
+  T2 = _mm_mul_ps(T2, T2);
+  T3 = _mm_mul_ps(T3, T3);
+  T4 = _mm_mul_ps(T4, T4);
+  T1 = _mm_add_ps(T1, T2);
+  T2 = _mm_sub_ps(R2, T3);
+  return _mm_cmple_ps(T1, T2);
+}
+
+
+static inline __m128 sse_four_spheres_four_AABBs_intersect(const Vec3f& o1, BVH_REAL r1,
+                                                           const Vec3f& o2, BVH_REAL r2,
+                                                           const Vec3f& o3, BVH_REAL r3,
+                                                           const Vec3f& o4, BVH_REAL r4,
+                                                           const Vec3f& min1, const Vec3f& max1,
+                                                           const Vec3f& min2, const Vec3f& max2,
+                                                           const Vec3f& min3, const Vec3f& max3,
+                                                           const Vec3f& min4, const Vec3f& max4)
+{
+  __m128 MINX, MINY, MINZ, MAXX, MAXX, MAXY, MAXZ, SX, SY, SZ, SR;
+  MINX = _mm_set_ps(min1[0], min2[0], min3[0], min4[0]);
+  MAXX = _mm_set_ps(max1[0], max2[0], max3[0], max4[0]);
+  MINY = _mm_set_ps(min1[1], min2[1], min3[1], min4[1]);
+  MAXY = _mm_set_ps(max1[1], max2[1], max3[1], max4[1]);
+  MINZ = _mm_set_ps(min1[2], min2[2], min3[2], min4[2]);
+  MAXZ = _mm_set_ps(max1[2], max2[2], max3[2], max4[2]);
+  SX = _mm_set_ps(o1[0], o2[0], o3[0], o4[0]);
+  SY = _mm_set_ps(o1[1], o2[1], o3[1], o4[1]);
+  SZ = _mm_set_ps(o1[2], o2[2], o3[2], o4[2]);
+  SR = _mm_set_ps(r1, r2, r3, r4);
+
+
+  __m128 TX = _mm_max_ps(SX, MINX);
+  __m128 TY = _mm_max_ps(SY, MINY);
+  __m128 TZ = _mm_max_ps(SZ, MINZ);
+  TX = _mm_min_ps(TX, MAXX);
+  TY = _mm_min_ps(TY, MAXY);
+  TZ = _mm_min_ps(TZ, MAXZ);
+  __m128 DX = _mm_sub_ps(SX, TX);
+  __m128 DY = _mm_sub_ps(SY, TY);
+  __m128 DZ = _mm_sub_ps(SZ, TZ);
+  __m128 R2 = _mm_mul_ps(SR, SR);
+  DX = _mm_mul_ps(DX, DX);
+  DY = _mm_mul_ps(DY, DY);
+  DZ = _mm_mul_ps(DZ, DZ);
+  __m128 T1 = _mm_add_ps(DX, DY);
+  __m128 T2 = _mm_sub_ps(R2, DZ);
+  return _mm_cmple_ps(T1, T2);  
+}
+
+
+static inline __m128 sse_four_AABBs_intersect(const Vec3f& min1, const Vec3f& max1,
+                                              const Vec3f& min2, const Vec3f& max2,
+                                              const Vec3f& min3, const Vec3f& max3,
+                                              const Vec3f& min4, const Vec3f& max4,
+                                              const Vec3f& min5, const Vec3f& max5,
+                                              const Vec3f& min6, const Vec3f& max6,
+                                              const Vec3f& min7, const Vec3f& max7,
+                                              const Vec3f& min8, const Vec3f& max8)
+{
+  __m128 MIN1X, MIN1Y, MIN1Z, MAX1X, MAX1Y, MAX1Z, MIN2X, MIN2Y, MIN2Z, MAX2X, MAX2Y, MAX2Z;
+  MIN1X = _mm_set_ps(min1[0], min2[0], min3[0], min4[0]);
+  MAX1X = _mm_set_ps(max1[0], max2[0], max3[0], max4[0]);
+  MIN1Y = _mm_set_ps(min1[1], min2[1], min3[1], min4[1]);
+  MAX1Y = _mm_set_ps(max1[1], max2[1], max3[1], max4[1]);
+  MIN1Z = _mm_set_ps(min1[2], min2[2], min3[2], min4[2]);
+  MAX1Z = _mm_set_ps(max1[2], max2[2], max3[2], max4[2]);
+  MIN2X = _mm_set_ps(min5[0], min6[0], min7[0], min8[0]);
+  MAX2X = _mm_set_ps(max5[0], max6[0], max7[0], max8[0]);
+  MIN2Y = _mm_set_ps(min5[1], min6[1], min7[1], min8[1]);
+  MAX2Y = _mm_set_ps(max5[1], max6[1], max7[1], max8[1]);
+  MIN2Z = _mm_set_ps(min5[2], min6[2], min7[2], min8[2]);
+  MAX2Z = _mm_set_ps(max5[2], max6[2], max7[2], max8[2]);
+
+  __m128 AX = _mm_max_ps(MIN1X, MIN2X);
+  __m128 BX = _mm_min_ps(MAX1X, MAX2X);
+  __m128 AY = _mm_max_ps(MIN1Y, MIN2Y);
+  __m128 BY = _mm_min_ps(MAX1Y, MAX2Y);
+  __m128 AZ = _mm_max_ps(MIN1Z, MIN2Z);
+  __m128 BZ = _mm_min_ps(MAX1Z, MAX2Z);
+  __m128 T1 = _mm_cmple_ps(AX, BX);
+  __m128 T2 = _mm_cmple_ps(AY, BY);
+  __m128 T3 = _mm_cmple_ps(AZ, BZ);
+  __m128 T4 = _mm_and_ps(T1, T2);
+  return _mm_and_ps(T3, T4);
+}
+
+static bool four_spheres_intersect_and(const Vec3f& o1, BVH_REAL r1,
                                        const Vec3f& o2, BVH_REAL r2,
                                        const Vec3f& o3, BVH_REAL r3,
                                        const Vec3f& o4, BVH_REAL r4,
-                                       const Vec3f& min1, const Vec3f& max1,
-                                       const Vec3f& min2, const Vec3f& max2,
-                                       const Vec3f& min3, const Vec3f& max3,
-                                       const Vec3f& min4, const Vec3f& max4);
+                                       const Vec3f& o5, BVH_REAL r5,
+                                       const Vec3f& o6, BVH_REAL r6,
+                                       const Vec3f& o7, BVH_REAL r7,
+                                       const Vec3f& o8, BVH_REAL r8)
+{
+  __m128 CMP = four_spheres_intersect(o1, r1, o2, r2, o3, r3, o4, r4, o5, r5, o6, r6, o7, r7, o8, r8);
+  return _mm_movemask_ps(CMP) == 15.f;
+}
+
+static bool four_spheres_intersect_or(const Vec3f& o1, BVH_REAL r1,
+                                      const Vec3f& o2, BVH_REAL r2,
+                                      const Vec3f& o3, BVH_REAL r3,
+                                      const Vec3f& o4, BVH_REAL r4,
+                                      const Vec3f& o5, BVH_REAL r5,
+                                      const Vec3f& o6, BVH_REAL r6,
+                                      const Vec3f& o7, BVH_REAL r7,
+                                      const Vec3f& o8, BVH_REAL r8)
+{
+  __m128 CMP = four_spheres_intersect(o1, r1, o2, r2, o3, r3, o4, r4, o5, r5, o6, r6, o7, r7, o8, r8);
+  return __mm_movemask_ps(CMP) > 0;
+}
+
+/** \brief four spheres versus four AABBs SIMD test */
+static bool four_spheres_four_AABBs_intersect_and(const Vec3f& o1, BVH_REAL r1,
+                                                  const Vec3f& o2, BVH_REAL r2,
+                                                  const Vec3f& o3, BVH_REAL r3,
+                                                  const Vec3f& o4, BVH_REAL r4,
+                                                  const Vec3f& min1, const Vec3f& max1,
+                                                  const Vec3f& min2, const Vec3f& max2,
+                                                  const Vec3f& min3, const Vec3f& max3,
+                                                  const Vec3f& min4, const Vec3f& max4)
+{
+  __m128 CMP = four_spheres_four_AABBs_intersect(o1, r1, o2, r2, o3, r3, o4, r4, min1, max1, min2, max2, min3, max3, min4, max4);
+  return _mm_movemask_ps(CMP) == 15.f;
+}
+
+static bool four_spheres_four_AABBs_intersect_or(const Vec3f& o1, BVH_REAL r1,
+                                                 const Vec3f& o2, BVH_REAL r2,
+                                                 const Vec3f& o3, BVH_REAL r3,
+                                                 const Vec3f& o4, BVH_REAL r4,
+                                                 const Vec3f& min1, const Vec3f& max1,
+                                                 const Vec3f& min2, const Vec3f& max2,
+                                                 const Vec3f& min3, const Vec3f& max3,
+                                                 const Vec3f& min4, const Vec3f& max4)
+{
+  __m128 CMP = four_spheres_four_AABBs_intersect(o1, r1, o2, r2, o3, r3, o4, r4, min1, max1, min2, max2, min3, max3, min4, max4);
+  return _mm_movemask_ps(CMP) > 0;
+}
 
 /** \brief four AABBs versus four AABBs SIMD test */
-bool four_AABBs_intersect(const Vec3f& min1, const Vec3f& max1,
-                          const Vec3f& min2, const Vec3f& max2,
-                          const Vec3f& min3, const Vec3f& max3,
-                          const Vec3f& min4, const Vec3f& max4,
-                          const Vec3f& min5, const Vec3f& max5,
-                          const Vec3f& min6, const Vec3f& max6,
-                          const Vec3f& min7, const Vec3f& max7,
-                          const Vec3f& min8, const Vec3f& max8);
+static bool four_AABBs_intersect_and(const Vec3f& min1, const Vec3f& max1,
+                                     const Vec3f& min2, const Vec3f& max2,
+                                     const Vec3f& min3, const Vec3f& max3,
+                                     const Vec3f& min4, const Vec3f& max4,
+                                     const Vec3f& min5, const Vec3f& max5,
+                                     const Vec3f& min6, const Vec3f& max6,
+                                     const Vec3f& min7, const Vec3f& max7,
+                                     const Vec3f& min8, const Vec3f& max8)
+{
+  __m128 CMP = four_AABBs_intersect(min1, max1, min2, max2, min3, max3, min4, max4, min5, max5, min6, max6, min7, max7, min8, max8);
+  return _mm_movemask_ps(CMP) == 15.f;
+}
+
+static bool four_AABBs_intersect_or(const Vec3f& min1, const Vec3f& max1,
+                                    const Vec3f& min2, const Vec3f& max2,
+                                    const Vec3f& min3, const Vec3f& max3,
+                                    const Vec3f& min4, const Vec3f& max4,
+                                    const Vec3f& min5, const Vec3f& max5,
+                                    const Vec3f& min6, const Vec3f& max6,
+                                    const Vec3f& min7, const Vec3f& max7,
+                                    const Vec3f& min8, const Vec3f& max8)
+{
+  __m128 CMP = four_AABBs_intersect(min1, max1, min2, max2, min3, max3, min4, max4, min5, max5, min6, max6, min7, max7, min8, max8);
+  return _mm_movemask_ps(CMP) > 0;
+}
 
 }
 
diff --git a/trunk/fcl/src/broad_phase_collision.cpp b/trunk/fcl/src/broad_phase_collision.cpp
index 7d4ae568f5cc578bf9e0326a1557e2e1d5951bf9..3362505935fa4951028c22d5574a88a8c2121f63 100644
--- a/trunk/fcl/src/broad_phase_collision.cpp
+++ b/trunk/fcl/src/broad_phase_collision.cpp
@@ -115,6 +115,8 @@ void NaiveCollisionManager::getObjects(std::vector<CollisionObject*>& objs_) con
 
 void NaiveCollisionManager::collide(CollisionObject* obj, void* cdata, CollisionCallBack callback) const
 {
+  if(size() == 0) return;
+
   std::list<CollisionObject*>::const_iterator it;
   for(it = objs.begin(); it != objs.end(); ++it)
   {
@@ -125,6 +127,8 @@ void NaiveCollisionManager::collide(CollisionObject* obj, void* cdata, Collision
 
 void NaiveCollisionManager::distance(CollisionObject* obj, void* cdata, DistanceCallBack callback) const
 {
+  if(size() == 0) return;
+
   std::list<CollisionObject*>::const_iterator it;
   for(it = objs.begin(); it != objs.end(); ++it)
   {
@@ -135,6 +139,8 @@ void NaiveCollisionManager::distance(CollisionObject* obj, void* cdata, Distance
 
 void NaiveCollisionManager::collide(void* cdata, CollisionCallBack callback) const
 {
+  if(size() == 0) return;
+
   std::list<CollisionObject*>::const_iterator it1;
   std::list<CollisionObject*>::const_iterator it2;
   for(it1 = objs.begin(); it1 != objs.end(); ++it1)
@@ -303,6 +309,8 @@ void SSaPCollisionManager::checkColl(std::vector<CollisionObject*>::const_iterat
 
 void SSaPCollisionManager::collide(CollisionObject* obj, void* cdata, CollisionCallBack callback) const
 {
+  if(size() == 0) return;
+
   static const unsigned int CUTOFF = 100;
 
   DummyCollisionObject dummyHigh(AABB(obj->getAABB().max_));
@@ -358,6 +366,8 @@ void SSaPCollisionManager::checkDis(std::vector<CollisionObject*>::const_iterato
 
 void SSaPCollisionManager::distance(CollisionObject* obj, void* cdata_, DistanceCallBack callback) const
 {
+  if(size() == 0) return;
+
   static const unsigned int CUTOFF = 100;
 
   DistanceData* cdata = static_cast<DistanceData*>(cdata_);
@@ -379,9 +389,7 @@ void SSaPCollisionManager::distance(CollisionObject* obj, void* cdata_, Distance
   while(1)
   {
     old_min_distance = cdata->min_distance;
-    AABB dummy_aabb = AABB(dummy_vector);
-    DummyCollisionObject dummyHigh(dummy_aabb);
-    // DummyCollisionObject dummyHigh(AABB(dummy_vector));
+    DummyCollisionObject dummyHigh((AABB(dummy_vector)));
 
     pos_end1 = std::upper_bound(pos_start1, objs_x.end(), &dummyHigh, SortByXLow());
     unsigned int d1 = pos_end1 - pos_start1;
@@ -456,7 +464,7 @@ void SSaPCollisionManager::distance(CollisionObject* obj, void* cdata_, Distance
 
 void SSaPCollisionManager::collide(void* cdata, CollisionCallBack callback) const
 {
-  if (size() < 1)
+  if (size() == 0)
     return;
   
   // simple sweep and prune method
@@ -855,6 +863,8 @@ void SaPCollisionManager::getObjects(std::vector<CollisionObject*>& objs) const
 
 void SaPCollisionManager::collide(CollisionObject* obj, void* cdata, CollisionCallBack callback) const
 {
+  if(size() == 0) return;
+
   for(std::list<SaPAABB*>::const_iterator it = AABB_arr.begin(); it != AABB_arr.end(); ++it)
   {
     if((*it)->obj->getAABB().overlap(obj->getAABB()))
@@ -867,6 +877,8 @@ void SaPCollisionManager::collide(CollisionObject* obj, void* cdata, CollisionCa
 
 void SaPCollisionManager::distance(CollisionObject* obj, void* cdata_, DistanceCallBack callback) const
 {
+  if(size() == 0) return;
+
   DistanceData* cdata = static_cast<DistanceData*>(cdata_);
   for(std::list<SaPAABB*>::const_iterator it = AABB_arr.begin(); it != AABB_arr.end(); ++it)
   {
@@ -880,6 +892,8 @@ void SaPCollisionManager::distance(CollisionObject* obj, void* cdata_, DistanceC
 
 void SaPCollisionManager::collide(void* cdata, CollisionCallBack callback) const
 {
+  if(size() == 0) return;
+
   for(std::list<SaPPair>::const_iterator it = overlap_pairs.begin(); it != overlap_pairs.end(); ++it)
   {
     CollisionObject* obj1 = it->obj1;
@@ -1097,6 +1111,8 @@ void IntervalTreeCollisionManager::getObjects(std::vector<CollisionObject*>& obj
 
 void IntervalTreeCollisionManager::collide(CollisionObject* obj, void* cdata, CollisionCallBack callback) const
 {
+  if(size() == 0) return;
+
   static const unsigned int CUTOFF = 100;
 
   std::deque<SimpleInterval*> results0, results1, results2;
@@ -1137,6 +1153,8 @@ void IntervalTreeCollisionManager::collide(CollisionObject* obj, void* cdata, Co
 
 void IntervalTreeCollisionManager::distance(CollisionObject* obj, void* cdata_, DistanceCallBack callback) const
 {
+  if(size() == 0) return;
+
   static const unsigned int CUTOFF = 100;
 
   DistanceData* cdata = static_cast<DistanceData*>(cdata_);
@@ -1230,6 +1248,8 @@ void IntervalTreeCollisionManager::distance(CollisionObject* obj, void* cdata_,
 
 void IntervalTreeCollisionManager::collide(void* cdata, CollisionCallBack callback) const
 {
+  if(size() == 0) return;
+
   std::set<CollisionObject*> active;
   std::set<std::pair<CollisionObject*, CollisionObject*> > overlap;
   unsigned int n = endpoints[0].size();
diff --git a/trunk/fcl/src/simd_intersect.cpp b/trunk/fcl/src/simd_intersect.cpp
deleted file mode 100644
index c27e3e000fd503e1080dbea2fca7b87935b63a3c..0000000000000000000000000000000000000000
--- a/trunk/fcl/src/simd_intersect.cpp
+++ /dev/null
@@ -1,406 +0,0 @@
-#include "fcl/simd_intersect.h"
-
-namespace fcl
-{
-
-#if COLLISION_USE_SSE
-#include <xmmintrin.h>
-#include <pmmintrin.h>
-
-static inline __m128 sse_four_spheres_intersect(const Vec3f& o1, BVH_REAL r1,
-                                                const Vec3f& o2, BVH_REAL r2,
-                                                const Vec3f& o3, BVH_REAL r3,
-                                                const Vec3f& o4, BVH_REAL r4,
-                                                const Vec3f& o5, BVH_REAL r5,
-                                                const Vec3f& o6, BVH_REAL r6,
-                                                const Vec3f& o7, BVH_REAL r7,
-                                                const Vec3f& o8, BVH_REAL r8)
-{
-  __m128 PX, PY, PZ, PR, QX, QY, QZ, QR;
-  PX = _mm_set_ps(o1[0], o2[0], o3[0], o4[0]);
-  PY = _mm_set_ps(o1[1], o2[1], o3[1], o4[1]);
-  PZ = _mm_set_ps(o1[2], o2[2], o3[2], o4[2]);
-  PR = _mm_set_ps(r1, r2, r3, r4);
-  QX = _mm_set_ps(o5[0], o6[0], o7[0], o8[0]);
-  QY = _mm_set_ps(o5[1], o6[1], o7[1], o8[1]);
-  QZ = _mm_set_ps(o5[2], o6[2], o7[2], o8[2]);
-  QR = _mm_set_ps(r5, r6, r7, r8);
-
-  __m128 T1 = _mm_sub_ps(PX, QX);
-  __m128 T2 = _mm_sub_ps(PY, QY);
-  __m128 T3 = _mm_sub_ps(PZ, QZ);
-  __m128 T4 = _mm_add_ps(PR, QR);
-  T1 = _mm_mul_ps(T1, T1);
-  T2 = _mm_mul_ps(T2, T2);
-  T3 = _mm_mul_ps(T3, T3);
-  T4 = _mm_mul_ps(T4, T4);
-  T1 = _mm_add_ps(T1, T2);
-  T2 = _mm_sub_ps(R2, T3);
-  return _mm_cmple_ps(T1, T2);
-}
-
-
-static inline __m128 sse_four_spheres_four_AABBs_intersect(const Vec3f& o1, BVH_REAL r1,
-                                                           const Vec3f& o2, BVH_REAL r2,
-                                                           const Vec3f& o3, BVH_REAL r3,
-                                                           const Vec3f& o4, BVH_REAL r4,
-                                                           const Vec3f& min1, const Vec3f& max1,
-                                                           const Vec3f& min2, const Vec3f& max2,
-                                                           const Vec3f& min3, const Vec3f& max3,
-                                                           const Vec3f& min4, const Vec3f& max4)
-{
-  __m128 MINX, MINY, MINZ, MAXX, MAXX, MAXY, MAXZ, SX, SY, SZ, SR;
-  MINX = _mm_set_ps(min1[0], min2[0], min3[0], min4[0]);
-  MAXX = _mm_set_ps(max1[0], max2[0], max3[0], max4[0]);
-  MINY = _mm_set_ps(min1[1], min2[1], min3[1], min4[1]);
-  MAXY = _mm_set_ps(max1[1], max2[1], max3[1], max4[1]);
-  MINZ = _mm_set_ps(min1[2], min2[2], min3[2], min4[2]);
-  MAXZ = _mm_set_ps(max1[2], max2[2], max3[2], max4[2]);
-  SX = _mm_set_ps(o1[0], o2[0], o3[0], o4[0]);
-  SY = _mm_set_ps(o1[1], o2[1], o3[1], o4[1]);
-  SZ = _mm_set_ps(o1[2], o2[2], o3[2], o4[2]);
-  SR = _mm_set_ps(r1, r2, r3, r4);
-
-
-  __m128 TX = _mm_max_ps(SX, MINX);
-  __m128 TY = _mm_max_ps(SY, MINY);
-  __m128 TZ = _mm_max_ps(SZ, MINZ);
-  TX = _mm_min_ps(TX, MAXX);
-  TY = _mm_min_ps(TY, MAXY);
-  TZ = _mm_min_ps(TZ, MAXZ);
-  __m128 DX = _mm_sub_ps(SX, TX);
-  __m128 DY = _mm_sub_ps(SY, TY);
-  __m128 DZ = _mm_sub_ps(SZ, TZ);
-  __m128 R2 = _mm_mul_ps(SR, SR);
-  DX = _mm_mul_ps(DX, DX);
-  DY = _mm_mul_ps(DY, DY);
-  DZ = _mm_mul_ps(DZ, DZ);
-  __m128 T1 = _mm_add_ps(DX, DY);
-  __m128 T2 = _mm_sub_ps(R2, DZ);
-  return _mm_cmple_ps(T1, T2);  
-}
-
-
-static inline __m128 sse_four_AABBs_intersect(const Vec3f& min1, const Vec3f& max1,
-                                              const Vec3f& min2, const Vec3f& max2,
-                                              const Vec3f& min3, const Vec3f& max3,
-                                              const Vec3f& min4, const Vec3f& max4,
-                                              const Vec3f& min5, const Vec3f& max5,
-                                              const Vec3f& min6, const Vec3f& max6,
-                                              const Vec3f& min7, const Vec3f& max7,
-                                              const Vec3f& min8, const Vec3f& max8)
-{
-  __m128 MIN1X, MIN1Y, MIN1Z, MAX1X, MAX1Y, MAX1Z, MIN2X, MIN2Y, MIN2Z, MAX2X, MAX2Y, MAX2Z;
-  MIN1X = _mm_set_ps(min1[0], min2[0], min3[0], min4[0]);
-  MAX1X = _mm_set_ps(max1[0], max2[0], max3[0], max4[0]);
-  MIN1Y = _mm_set_ps(min1[1], min2[1], min3[1], min4[1]);
-  MAX1Y = _mm_set_ps(max1[1], max2[1], max3[1], max4[1]);
-  MIN1Z = _mm_set_ps(min1[2], min2[2], min3[2], min4[2]);
-  MAX1Z = _mm_set_ps(max1[2], max2[2], max3[2], max4[2]);
-  MIN2X = _mm_set_ps(min5[0], min6[0], min7[0], min8[0]);
-  MAX2X = _mm_set_ps(max5[0], max6[0], max7[0], max8[0]);
-  MIN2Y = _mm_set_ps(min5[1], min6[1], min7[1], min8[1]);
-  MAX2Y = _mm_set_ps(max5[1], max6[1], max7[1], max8[1]);
-  MIN2Z = _mm_set_ps(min5[2], min6[2], min7[2], min8[2]);
-  MAX2Z = _mm_set_ps(max5[2], max6[2], max7[2], max8[2]);
-
-  __m128 AX = _mm_max_ps(MIN1X, MIN2X);
-  __m128 BX = _mm_min_ps(MAX1X, MAX2X);
-  __m128 AY = _mm_max_ps(MIN1Y, MIN2Y);
-  __m128 BY = _mm_min_ps(MAX1Y, MAX2Y);
-  __m128 AZ = _mm_max_ps(MIN1Z, MIN2Z);
-  __m128 BZ = _mm_min_ps(MAX1Z, MAX2Z);
-  __m128 T1 = _mm_cmple_ps(AX, BX);
-  __m128 T2 = _mm_cmple_ps(AY, BY);
-  __m128 T3 = _mm_cmple_ps(AZ, BZ);
-  __m128 T4 = _mm_and_ps(T1, T2);
-  return _mm_and_ps(T3, T4);
-}
-
-bool four_spheres_intersect_and(const Vec3f& o1, BVH_REAL r1,
-                                const Vec3f& o2, BVH_REAL r2,
-                                const Vec3f& o3, BVH_REAL r3,
-                                const Vec3f& o4, BVH_REAL r4,
-                                const Vec3f& o5, BVH_REAL r5,
-                                const Vec3f& o6, BVH_REAL r6,
-                                const Vec3f& o7, BVH_REAL r7,
-                                const Vec3f& o8, BVH_REAL r8)
-{
-  __m128 CMP = four_spheres_intersect(o1, r1, o2, r2, o3, r3, o4, r4, o5, r5, o6, r6, o7, r7, o8, r8);
-  return _mm_movemask_ps(CMP) == 15.f;
-}
-
-bool four_spheres_intersect_or(const Vec3f& o1, BVH_REAL r1,
-                               const Vec3f& o2, BVH_REAL r2,
-                               const Vec3f& o3, BVH_REAL r3,
-                               const Vec3f& o4, BVH_REAL r4,
-                               const Vec3f& o5, BVH_REAL r5,
-                               const Vec3f& o6, BVH_REAL r6,
-                               const Vec3f& o7, BVH_REAL r7,
-                               const Vec3f& o8, BVH_REAL r8)
-{
-  __m128 CMP = four_spheres_intersect(o1, r1, o2, r2, o3, r3, o4, r4, o5, r5, o6, r6, o7, r7, o8, r8);
-  return __mm_movemask_ps(CMP) > 0;
-}
-
-/** \brief four spheres versus four AABBs SIMD test */
-bool four_spheres_four_AABBs_intersect_and(const Vec3f& o1, BVH_REAL r1,
-                                           const Vec3f& o2, BVH_REAL r2,
-                                           const Vec3f& o3, BVH_REAL r3,
-                                           const Vec3f& o4, BVH_REAL r4,
-                                           const Vec3f& min1, const Vec3f& max1,
-                                           const Vec3f& min2, const Vec3f& max2,
-                                           const Vec3f& min3, const Vec3f& max3,
-                                           const Vec3f& min4, const Vec3f& max4)
-{
-  __m128 CMP = four_spheres_four_AABBs_intersect(o1, r1, o2, r2, o3, r3, o4, r4, min1, max1, min2, max2, min3, max3, min4, max4);
-  return _mm_movemask_ps(CMP) == 15.f;
-}
-
-bool four_spheres_four_AABBs_intersect_or(const Vec3f& o1, BVH_REAL r1,
-                                          const Vec3f& o2, BVH_REAL r2,
-                                          const Vec3f& o3, BVH_REAL r3,
-                                          const Vec3f& o4, BVH_REAL r4,
-                                          const Vec3f& min1, const Vec3f& max1,
-                                          const Vec3f& min2, const Vec3f& max2,
-                                          const Vec3f& min3, const Vec3f& max3,
-                                          const Vec3f& min4, const Vec3f& max4)
-{
-  __m128 CMP = four_spheres_four_AABBs_intersect(o1, r1, o2, r2, o3, r3, o4, r4, min1, max1, min2, max2, min3, max3, min4, max4);
-  return _mm_movemask_ps(CMP) > 0;
-}
-
-/** \brief four AABBs versus four AABBs SIMD test */
-bool four_AABBs_intersect_and(const Vec3f& min1, const Vec3f& max1,
-                              const Vec3f& min2, const Vec3f& max2,
-                              const Vec3f& min3, const Vec3f& max3,
-                              const Vec3f& min4, const Vec3f& max4,
-                              const Vec3f& min5, const Vec3f& max5,
-                              const Vec3f& min6, const Vec3f& max6,
-                              const Vec3f& min7, const Vec3f& max7,
-                              const Vec3f& min8, const Vec3f& max8)
-{
-  __m128 CMP = four_AABBs_intersect(min1, max1, min2, max2, min3, max3, min4, max4, min5, max5, min6, max6, min7, max7, min8, max8);
-  return _mm_movemask_ps(CMP) == 15.f;
-}
-
-bool four_AABBs_intersect_or(const Vec3f& min1, const Vec3f& max1,
-                             const Vec3f& min2, const Vec3f& max2,
-                             const Vec3f& min3, const Vec3f& max3,
-                             const Vec3f& min4, const Vec3f& max4,
-                             const Vec3f& min5, const Vec3f& max5,
-                             const Vec3f& min6, const Vec3f& max6,
-                             const Vec3f& min7, const Vec3f& max7,
-                             const Vec3f& min8, const Vec3f& max8)
-{
-  __m128 CMP = four_AABBs_intersect(min1, max1, min2, max2, min3, max3, min4, max4, min5, max5, min6, max6, min7, max7, min8, max8);
-  return _mm_movemask_ps(CMP) > 0;
-}
-
-#else
-
-bool four_spheres_intersect_and(const Vec3f& o1, BVH_REAL r1,
-                                const Vec3f& o2, BVH_REAL r2,
-                                const Vec3f& o3, BVH_REAL r3,
-                                const Vec3f& o4, BVH_REAL r4,
-                                const Vec3f& o5, BVH_REAL r5,
-                                const Vec3f& o6, BVH_REAL r6,
-                                const Vec3f& o7, BVH_REAL r7,
-                                const Vec3f& o8, BVH_REAL r8)
-{
-  BVH_REAL r;
-
-  r = r1 + r5;
-  if((o1 - o5).sqrLength() > r * r) return false;
-  r = r2 + r6;
-  if((o2 - o6).sqrLength() > r * r) return false;
-  r = r3 + r7;
-  if((o3 - o7).sqrLength() > r * r) return false;
-  r = r4 + r8;
-  if((o4 - o8).sqrLength() > r * r) return false;
-
-  return true;
-}
-
-bool four_spheres_intersect_or(const Vec3f& o1, BVH_REAL r1,
-                               const Vec3f& o2, BVH_REAL r2,
-                               const Vec3f& o3, BVH_REAL r3,
-                               const Vec3f& o4, BVH_REAL r4,
-                               const Vec3f& o5, BVH_REAL r5,
-                               const Vec3f& o6, BVH_REAL r6,
-                               const Vec3f& o7, BVH_REAL r7,
-                               const Vec3f& o8, BVH_REAL r8)
-{
-  BVH_REAL r;
-
-  r = r1 + r5;
-  if((o1 - o5).sqrLength() < r * r) return true;
-  r = r2 + r6;
-  if((o2 - o6).sqrLength() < r * r) return true;
-  r = r3 + r7;
-  if((o3 - o7).sqrLength() < r * r) return true;
-  r = r4 + r8;
-  if((o4 - o8).sqrLength() < r * r) return true;
-
-  return false;
-}
-
-
-bool four_spheres_four_AABBs_intersect_and(const Vec3f& o1, BVH_REAL r1,
-                                           const Vec3f& o2, BVH_REAL r2,
-                                           const Vec3f& o3, BVH_REAL r3,
-                                           const Vec3f& o4, BVH_REAL r4,
-                                           const Vec3f& min1, const Vec3f& max1,
-                                           const Vec3f& min2, const Vec3f& max2,
-                                           const Vec3f& min3, const Vec3f& max3,
-                                           const Vec3f& min4, const Vec3f& max4)
-{
-  Vec3f o_new;
-  o_new = max(min(o1, max1), min1);
-  if((o_new - o1).sqrLength() > r1 * r1)
-    return false;
-  
-  o_new = max(min(o2, max2), min2);
-  if((o_new - o2).sqrLength() > r2 * r2)
-    return false;
-  
-  o_new = max(min(o3, max3), min3);
-  if((o_new - o3).sqrLength() > r3 * r3)
-    return false;
-  
-  o_new = max(min(o4, max4), min4);
-  if((o_new - o4).sqrLength() > r4 * r4)
-    return false;
-
-  return true;
-}
-
-bool four_spheres_four_AABBs_intersect_or(const Vec3f& o1, BVH_REAL r1,
-                                          const Vec3f& o2, BVH_REAL r2,
-                                          const Vec3f& o3, BVH_REAL r3,
-                                          const Vec3f& o4, BVH_REAL r4,
-                                          const Vec3f& min1, const Vec3f& max1,
-                                          const Vec3f& min2, const Vec3f& max2,
-                                          const Vec3f& min3, const Vec3f& max3,
-                                          const Vec3f& min4, const Vec3f& max4)
-{
-  Vec3f o_new;
-  o_new = max(min(o1, max1), min1);
-  if((o_new - o1).sqrLength() < r1 * r1)
-    return true;
-
-  o_new = max(min(o2, max2), min2);
-  if((o_new - o2).sqrLength() < r2 * r2)
-    return true;
-
-  o_new = max(min(o3, max3), min3);
-  if((o_new - o3).sqrLength() < r3 * r3)
-    return true;
-
-  o_new = max(min(o4, max4), min4);
-  if((o_new - o4).sqrLength() < r4 * r4)
-    return true;
-
-  return false;
-}
-
-
-bool four_AABBs_intersect_and(const Vec3f& min1, const Vec3f& max1,
-                              const Vec3f& min2, const Vec3f& max2,
-                              const Vec3f& min3, const Vec3f& max3,
-                              const Vec3f& min4, const Vec3f& max4,
-                              const Vec3f& min5, const Vec3f& max5,
-                              const Vec3f& min6, const Vec3f& max6,
-                              const Vec3f& min7, const Vec3f& max7,
-                              const Vec3f& min8, const Vec3f& max8)
-{
-  if(min1[0] > max5[0]) return false;
-  if(min1[1] > max5[1]) return false;
-  if(min1[2] > max5[2]) return false;
-
-  if(max1[0] < min5[0]) return false;
-  if(max1[1] < min5[1]) return false;
-  if(max1[2] < min5[2]) return false;
-
-  if(min2[0] > max6[0]) return false;
-  if(min2[1] > max6[1]) return false;
-  if(min2[2] > max6[2]) return false;
-
-  if(max2[0] < min6[0]) return false;
-  if(max2[1] < min6[1]) return false;
-  if(max2[2] < min6[2]) return false;
- 
-  if(min3[0] > max7[0]) return false;
-  if(min3[1] > max7[1]) return false;
-  if(min3[2] > max7[2]) return false;
-
-  if(max3[0] < min7[0]) return false;
-  if(max3[1] < min7[1]) return false;
-  if(max3[2] < min7[2]) return false;
-
-  if(min4[0] > max8[0]) return false;
-  if(min4[1] > max8[1]) return false;
-  if(min4[2] > max8[2]) return false;
-
-  if(max4[0] < min8[0]) return false;
-  if(max4[1] < min8[1]) return false;
-  if(max4[2] < min8[2]) return false;
-
-  return true;
-}
-
-bool four_AABBs_intersect_or(const Vec3f& min1, const Vec3f& max1,
-                             const Vec3f& min2, const Vec3f& max2,
-                             const Vec3f& min3, const Vec3f& max3,
-                             const Vec3f& min4, const Vec3f& max4,
-                             const Vec3f& min5, const Vec3f& max5,
-                             const Vec3f& min6, const Vec3f& max6,
-                             const Vec3f& min7, const Vec3f& max7,
-                             const Vec3f& min8, const Vec3f& max8)
-{
-  if(min1[0] > max5[0]) goto second;
-  if(min1[1] > max5[1]) goto second;
-  if(min1[2] > max5[2]) goto second;
-
-  if(max1[0] < min5[0]) goto second;
-  if(max1[1] < min5[1]) goto second;
-  if(max1[2] < min5[2]) goto second;
-
-  return true;
-
- second:
-  if(min2[0] > max6[0]) goto third;
-  if(min2[1] > max6[1]) goto third;
-  if(min2[2] > max6[2]) goto third;
-
-  if(max2[0] < min6[0]) goto third;
-  if(max2[1] < min6[1]) goto third;
-  if(max2[2] < min6[2]) goto third;
-
-  return true;
-
- third:
-  if(min3[0] > max7[0]) goto fourth;
-  if(min3[1] > max7[1]) goto fourth;
-  if(min3[2] > max7[2]) goto fourth;
-
-  if(max3[0] < min7[0]) goto fourth;
-  if(max3[1] < min7[1]) goto fourth;
-  if(max3[2] < min7[2]) goto fourth;
-
- fourth:
-  if(min4[0] > max8[0]) return false;
-  if(min4[1] > max8[1]) return false;
-  if(min4[2] > max8[2]) return false;
-
-  if(max4[0] < min8[0]) return false;
-  if(max4[1] < min8[1]) return false;
-  if(max4[2] < min8[2]) return false;
-
-  return true;
-}
-
-#endif
-
-
-}