diff --git a/CMakeLists.txt b/CMakeLists.txt
index bc580e7f027f9c1eb9b5296f6080a4276bb918b5..aff3da82302e70a73b76c5c19294631db608ad76 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -97,10 +97,8 @@ ADD_REQUIRED_DEPENDENCY("assimp >= 2.0")
 if(ASSIMP_FOUND)
   if (NOT ${ASSIMP_VERSION} VERSION_LESS "2.0.1150")
     add_definitions(-DHPP_FCL_USE_ASSIMP_UNIFIED_HEADER_NAMES)
-    SET(HPP_FCL_USE_ASSIMP_UNIFIED_HEADER_NAMES TRUE)
     message(STATUS "Assimp version has unified headers")
   else()
-    SET(HPP_FCL_USE_ASSIMP_UNIFIED_HEADER_NAMES FALSE)
     message(STATUS "Assimp version does not have unified headers")
   endif()
 endif()
@@ -123,7 +121,6 @@ SET(${PROJECT_NAME}_HEADERS
   include/hpp/fcl/shape/geometric_shapes.h
   include/hpp/fcl/distance_func_matrix.h
   include/hpp/fcl/collision.h
-  include/hpp/fcl/collision_node.h
   include/hpp/fcl/collision_func_matrix.h
   include/hpp/fcl/distance.h
   include/hpp/fcl/math/matrix_3f.h
@@ -162,10 +159,6 @@ endif ()
 
 pkg_config_append_libs("hpp-fcl")
 PKG_CONFIG_APPEND_BOOST_LIBS(thread date_time filesystem system)
-IF(HPP_FCL_USE_ASSIMP_UNIFIED_HEADER_NAMES)
-  # FCL_USE_ASSIMP_UNIFIED_HEADER_NAMES kept for backard compatibility reasons.
-  PKG_CONFIG_APPEND_CFLAGS("-DFCL_USE_ASSIMP_UNIFIED_HEADER_NAMES -DHPP_FCL_USE_ASSIMP_UNIFIED_HEADER_NAMES")
-ENDIF(HPP_FCL_USE_ASSIMP_UNIFIED_HEADER_NAMES)
 IF(HPP_FCL_HAVE_OCTOMAP)
   # FCL_HAVE_OCTOMAP kept for backward compatibility reasons.
   PKG_CONFIG_APPEND_CFLAGS(
diff --git a/doc/gjk.py b/doc/gjk.py
new file mode 100644
index 0000000000000000000000000000000000000000..a7f172af619a2fa05774a5134cf8b874cb18589d
--- /dev/null
+++ b/doc/gjk.py
@@ -0,0 +1,343 @@
+#!/usr/bin/env python3
+import pdb
+import sys
+
+# ABC = AB^AC
+# (ABC^AJ).a = (j.c - j.b) a.a + (j.a - j.c) b.a + (j.b - j.a) c.a, for j = b or c
+
+segment_fmt = "{j}a_aa"
+plane_fmt = ""
+edge_fmt = "{j}a * {b}a_{c}a + {j}{b} * {c}a_aa - {j}{c} * {b}a_aa"
+
+# These checks must be negative and not positive, as in the cheat sheet.
+# They are the same as in the cheat sheet, except that we consider (...).dot(A) instead of (...).dot(-A)
+plane_tests = [ "C.dot (a_cross_b)", "D.dot(a_cross_c)", "-D.dot(a_cross_b)" ]
+checks =  plane_tests \
+        + [ edge_fmt.format (**{'j':j,'b':"b",'c':"c"}) for j in [ "b", "c" ] ] \
+        + [ edge_fmt.format (**{'j':j,'b':"c",'c':"d"}) for j in [ "c", "d" ] ] \
+        + [ edge_fmt.format (**{'j':j,'b':"d",'c':"b"}) for j in [ "d", "b" ] ] \
+        + [ segment_fmt.format(**{'j':j}) for j in [ "b", "c", "d" ] ]
+checks_hr = [ "ABC.AO >= 0", "ACD.AO >= 0", "ADB.AO >= 0" ] \
+        + [ "(ABC ^ {}).AO >= 0".format(n) for n in [ "AB", "AC" ] ] \
+        + [ "(ACD ^ {}).AO >= 0".format(n) for n in [ "AC", "AD" ] ] \
+        + [ "(ADB ^ {}).AO >= 0".format(n) for n in [ "AD", "AB" ] ] \
+        + [ "AB.AO >= 0", "AC.AO >= 0", "AD.AO >= 0" ]
+
+# weights of the checks.
+weights = [ 2, ] * 3 + [ 3, ] * 6 + [ 1, ] * 3
+
+# Segment tests first, because they have lower weight.
+#tests = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, ]
+tests = [9, 10, 11, 0, 1, 2, 3, 4, 5, 6, 7, 8, ]
+assert len(tests) == len(checks)
+assert sorted(tests) == list(range(len(tests)))
+
+regions = [ "ABC", "ACD", "ADB", "AB", "AC", "AD", "A", "Inside", ]
+cases = list(range(len(regions)))
+
+# The following 3 lists refer to table doc/GJK_tetrahedra_boolean_table.ods
+
+# A check ID is (+/- (index+1)) where a minus sign encodes a NOT operation
+# and index refers to an index in list checks.
+
+# definitions is a list of list of check IDs to be ANDed.
+# For instance, a0.a3.!a4 -> [ 1, 4, -5]
+definitions = [
+        [  1,  4,- 5 ],
+        [  2,  6,- 7 ],
+        [  3,  8,- 9 ],
+        [- 4,  9, 10 ],
+        [- 6,  5, 11 ],
+        [- 8,  7, 12 ],
+        [-10,-11,-12 ],
+        [- 1,- 2,- 3 ],
+        ]
+# conditions is a list of (list of (list of check IDs to be ANDed) to be ORed).
+conditions = [
+        [],
+        [],
+        [],
+        [],
+        [],
+        [],
+        [],
+        [ ], #[ [10, 11, 12], ], # I don't think this is always true...
+        ]
+# rejections is a list of (list of (list of check IDs to be ANDed) to be ORed).
+rejections = [
+        [ [  2,  6,  7], [  3,-  8,-  9], ],
+        [ [  3,  8,  9], [  1,-  4,-  5], ],
+        [ [  1,  4,  5], [  2,-  6,-  7], ],
+        [ [- 1,- 3], ],
+        [ [- 2,- 1], ],
+        [ [- 3,- 2], ],
+        [ [  4,- 5], [  6,- 7], [  8,- 9], ],
+        [],
+        ]
+
+implications = [
+        [ [  4,  5, 10, ], [ 11],],
+        [ [  6,  7, 11, ], [ 12],],
+        [ [  8,  9, 12, ], [ 10],],
+
+        [ [- 4,- 5, 11, ], [ 10],],
+        [ [- 6,- 7, 12, ], [ 11],],
+        [ [- 8,- 9, 10, ], [ 12],],
+
+        [ [  1,  4,  5,  6], [- 7] ],
+        [ [  2,  6,  9,  8], [- 9] ],
+        [ [  3,  8,  9,  4], [- 5] ],
+
+        [ [- 4,  5,  10,], [- 11] ],
+        [ [  4,- 5,- 10,], [  11] ],
+        [ [- 6,  7,  11,], [- 12] ],
+        [ [  6,- 7,- 11,], [  12] ],
+        [ [- 8,  9,  12,], [- 10] ],
+        [ [  8,- 9,- 12,], [  10] ],
+
+        [ [- 4,  9,- 10,], [- 11,- 12] ],
+        [ [- 6,  5,- 11,], [- 12,- 10] ],
+        [ [- 8,  7,- 12,], [- 10,- 11] ],
+        ]
+
+def set_test_values (current_tests, test_values, itest, value):
+    def satisfies (values, indices):
+        for k in indices:
+            if k > 0 and values[ k-1] != True : return False
+            if k < 0 and values[-k-1] != False: return False
+        return True
+
+    remaining_tests = list(current_tests)
+    next_test_values = list(test_values)
+
+    remaining_tests.remove (itest)
+    next_test_values[itest] = value
+    rerun = True
+    while rerun:
+        rerun = False
+        for impl in implications:
+            if satisfies(next_test_values, impl[0]):
+                for id in impl[1]:
+                    k = (id - 1) if id > 0 else (-id-1)
+                    if k in remaining_tests:
+                        next_test_values[k] = (id > 0)
+                        remaining_tests.remove(k)
+                        rerun = True
+                    else:
+                        if next_test_values[k] != (id > 0):
+                            raise ValueError ("Absurd case")
+    return remaining_tests, next_test_values
+
+def apply_test_values (cases, test_values):
+    def canSatisfy (values, indices):
+        for k in indices:
+            if k > 0 and values[ k-1] == False: return False
+            if k < 0 and values[-k-1] == True : return False
+        return True
+    def satisfies (values, indices):
+        for k in indices:
+            if k > 0 and values[ k-1] != True : return False
+            if k < 0 and values[-k-1] != False: return False
+        return True
+
+    # Check all cases.
+    left_cases = []
+    for case in cases:
+        defi = definitions[case]
+        conds = conditions[case]
+        rejs = rejections[case]
+        if satisfies (test_values, defi):
+            # A definition is True, stop recursion
+            return [ case ]
+        if not canSatisfy (test_values, defi):
+            continue
+        for cond in conds:
+            if satisfies (test_values, cond):
+                # A condition is True, stop recursion
+                return [ case ]
+        append = True
+        for rej in rejs:
+            if satisfies (test_values, rej):
+                # A rejection is True, discard this case
+                append = False
+                break
+        if append: left_cases.append (case)
+    return left_cases
+
+def max_number_of_tests (current_tests, cases, test_values = [None,]*len(tests), prevBestScore = float('inf'), prevScore = 0):
+    for test in current_tests:
+        assert test_values[test] == None, "Test " +str(test)+ " already performed"
+
+    left_cases = apply_test_values (cases, test_values)
+
+    if len(left_cases) == 1:
+        return prevScore, { 'case': left_cases[0], }
+    elif len(left_cases) == 0:
+        return prevScore, { 'case': None, 'comments': [ "applied " + str(test_values), "to " + ", ".join([regions[c] for c in cases ]) ] }
+
+    assert len(current_tests) > 0, "No more test but " + str(left_cases) + " remains"
+
+    currentBestScore = prevBestScore
+    bestScore = float('inf')
+    bestOrder = [None, None]
+    for i, test in enumerate(current_tests):
+        assert bestScore >= currentBestScore
+
+        currentScore = prevScore + len(left_cases) * weights[test]
+        #currentScore = prevScore + weights[test]
+        if currentScore > currentBestScore: # Cannot do better -> stop
+            continue
+
+        try:
+            remaining_tests, next_test_values = set_test_values (current_tests, test_values, test, True)
+        except ValueError:
+            remaining_tests = None
+
+        if remaining_tests is not None:
+            # Do not put this in try catch as I do not want other ValueError to be understood as an infeasible branch.
+            score_if_t, order_if_t = max_number_of_tests (remaining_tests, left_cases, next_test_values, currentBestScore, currentScore)
+            if score_if_t >= currentBestScore: # True didn't do better -> stop
+                continue
+        else:
+            score_if_t, order_if_t = prevScore, None
+
+        try:
+            remaining_tests, next_test_values = set_test_values (current_tests, test_values, test, False)
+        except ValueError:
+            remaining_tests = None
+
+        if remaining_tests is not None:
+            # Do not put this in try catch as I do not want other ValueError to be understood as an infeasible branch.
+            score_if_f, order_if_f = max_number_of_tests (remaining_tests, left_cases, next_test_values, currentBestScore, currentScore)
+        else:
+            score_if_f, order_if_f = prevScore, None
+
+        currentScore = max(score_if_t, score_if_f)
+        if currentScore < bestScore:
+            if currentScore < currentBestScore:
+                bestScore = currentScore
+                bestOrder = { 'test': test, 'true': order_if_t, 'false': order_if_f }
+                #pdb.set_trace()
+                currentBestScore = currentScore
+                if len(tests) == len(current_tests):
+                    print ("New best score: {}".format(currentBestScore))
+
+    return bestScore, bestOrder
+
+def printComments (order, indent, file):
+    if 'comments' in order:
+        for comment in order['comments']:
+            print (indent + "// " + comment, file=file)
+
+def printOrder (order, indent = "", start=True,file=sys.stdout):
+    if start:
+        print ("bool GJK::projectTetrahedraOrigin(const Simplex& current, Simplex& next)", file=file)
+        print ("{", file=file)
+        print (indent+"// The code of this function was generated using doc/gjk.py", file=file)
+        print (indent+"const int a = 3, b = 2, c = 1, d = 0;", file=file)
+        for l in "abcd":
+            print (indent+"const Vec3f& {} (current.vertex[{}]->w);".format(l.upper(),l), file=file)
+        print (indent+"const FCL_REAL aa = A.squaredNorm();".format(l), file=file)
+        for l in "dcb":
+            for m in "abcd":
+                if m <= l:
+                    print (indent+"const FCL_REAL {0}{1}    = {2}.dot({3});".format(l,m,l.upper(),m.upper()), file=file)
+                else:
+                    print (indent+"const FCL_REAL& {0}{1}    = {1}{0};".format(l,m), file=file)
+            print (indent+"const FCL_REAL {0}a_aa = {0}a - aa;".format(l), file=file)
+        for l0,l1 in zip("bcd","cdb"):
+            print (indent+"const FCL_REAL {0}a_{1}a = {0}a - {1}a;".format(l0,l1), file=file)
+        for l in "bc":
+            print (indent+"const Vec3f a_cross_{0} = A.cross({1});".format(l,l.upper()), file=file)
+        print("", file=file)
+        print(       "#define REGION_INSIDE()               "+indent+"\\", file=file)
+        print(indent+"  ray.setZero();                      \\", file=file)
+        print(indent+"  next.vertex[0] = current.vertex[d]; \\", file=file)
+        print(indent+"  next.vertex[1] = current.vertex[c]; \\", file=file)
+        print(indent+"  next.vertex[2] = current.vertex[b]; \\", file=file)
+        print(indent+"  next.vertex[3] = current.vertex[a]; \\", file=file)
+        print(indent+"  next.rank=4;                        \\", file=file)
+        print(indent+"  return true;", file=file)
+        print("", file=file)
+
+    if 'case' in order:
+        case = order['case']
+        if case is None:
+            print (indent + "// There are no case corresponding to this set of tests.", file=file)
+            printComments (order, indent, file)
+            print (indent + "assert(false);", file=file)
+            return
+        region = regions[case]
+        print (indent + "// Region " + region, file=file)
+        printComments (order, indent, file)
+        toFree = ['b', 'c', 'd']
+        if region == "Inside":
+            print (indent + "REGION_INSIDE()", file=file)
+            toFree = []
+        elif region == 'A':
+            print (indent + "originToPoint (current, a, A, next, ray);", file=file)
+        elif len(region)==2:
+            a = region[0]
+            B = region[1]
+            print (indent + "originToSegment (current, a, {b}, A, {B}, {B}-A, -{b}a_aa, next, ray);".format(
+                **{ 'b':B.lower(), 'B':B,} ), file=file)
+            toFree.remove(B.lower())
+        elif len(region)==3:
+            B = region[1]
+            C = region[2]
+            test = plane_tests[['ABC','ACD','ADB'].index(region)]
+            if test.startswith('-'): test = test[1:]
+            else:                    test = '-'+test
+            print (indent + "originToTriangle (current, a, {b}, {c}, ({B}-A).cross({C}-A), {t}, next, ray);".format(
+                **{'b':B.lower(), 'c':C.lower(), 'B':B, 'C':C, 't':test}), file=file)
+            toFree.remove(B.lower())
+            toFree.remove(C.lower())
+        else:
+            assert False, "Unknown region " + region
+        for pt in toFree:
+            print (indent + "free_v[nfree++] = current.vertex[{}];".format(pt), file=file)
+    else:
+        assert "test" in order and 'true' in order and 'false' in order
+        check    = checks[order['test']]
+        check_hr = checks_hr[order['test']]
+        printComments (order, indent, file)
+        if order['true'] is None:
+            if order['false'] is None:
+                print (indent + """assert(false && "Case {} should never happen.");""".format(check_hr))
+            else:
+                print (indent + "assert(!({} <= 0)); // Not {}".format(check, check_hr), file=file)
+                printOrder (order['false'], indent=indent, start=False, file=file)
+        elif order['false'] is None:
+            print (indent + "assert({} <= 0); // {}".format(check, check_hr), file=file)
+            printOrder (order['true'], indent=indent, start=False, file=file)
+        else:
+            print (indent + "if ({} <= 0) {{ // if {}".format(check, check_hr), file=file)
+            printOrder (order['true'], indent=indent+"  ", start=False, file=file)
+            print (indent + "}} else {{ // not {}".format(check_hr), file=file)
+            printOrder (order['false'], indent=indent+"  ", start=False, file=file)
+            print (indent + "}} // end of {}".format(check_hr), file=file)
+
+    if start:
+        print ("",file=file)
+        print ("#undef REGION_INSIDE", file=file)
+        print (indent+"return false;", file=file)
+        print ("}", file=file)
+
+def unit_tests ():
+    # a4, a5, a10, a11, a12
+    cases = list(range(len(regions)))
+    pdb.set_trace()
+    left_cases = apply_test_values (cases,
+            test_values=[None,None,None,True,True,None,None,None,None,True,True,True])
+    assert len(left_cases) > 1
+
+#unit_tests()
+
+score, order = max_number_of_tests (tests, cases)
+
+print(score)
+printOrder(order, indent="  ")
+
+# TODO add weights such that:
+# - it is preferred to have all the use of one check in one branch.
+#   idea: ponderate by the number of remaining tests.
diff --git a/include/hpp/fcl/BV/AABB.h b/include/hpp/fcl/BV/AABB.h
index dab21bd47fd6c23987b5cac2f6bdec0034be72ae..c720f37a616f53625392fdc1ba04dde4cf9eb069 100644
--- a/include/hpp/fcl/BV/AABB.h
+++ b/include/hpp/fcl/BV/AABB.h
@@ -96,12 +96,8 @@ public:
   }    
 
   /// Not implemented
-  inline bool overlap(const AABB& other, const CollisionRequest&,
-                      FCL_REAL& sqrDistLowerBound) const
-  {
-    sqrDistLowerBound = sqrt (-1);
-    return overlap (other);
-  }
+  bool overlap(const AABB& other, const CollisionRequest& request,
+               FCL_REAL& sqrDistLowerBound) const;
 
    /// @brief Check whether the AABB contains another AABB
   inline bool contain(const AABB& other) const
diff --git a/include/hpp/fcl/BV/OBBRSS.h b/include/hpp/fcl/BV/OBBRSS.h
index dbd7ff1f6d2a1f5d8a0cfb16b89fb9cf0bd81eb0..74fb3bffaaa60a8600bd8475594605d20f6d60f7 100644
--- a/include/hpp/fcl/BV/OBBRSS.h
+++ b/include/hpp/fcl/BV/OBBRSS.h
@@ -148,18 +148,27 @@ public:
 };
 
 /// @brief Check collision between two OBBRSS, b1 is in configuration (R0, T0) and b2 is in indentity
-bool overlap(const Matrix3f& R0, const Vec3f& T0, const OBBRSS& b1, const OBBRSS& b2);
+inline bool overlap(const Matrix3f& R0, const Vec3f& T0, const OBBRSS& b1, const OBBRSS& b2)
+{
+  return overlap(R0, T0, b1.obb, b2.obb);
+}
 
 /// Check collision between two OBBRSS
 /// \param  b1 first OBBRSS in configuration (R0, T0)
 /// \param  b2 second OBBRSS in identity position
 /// \retval squared lower bound on the distance if OBBRSS do not overlap.
-bool overlap(const Matrix3f& R0, const Vec3f& T0, const OBBRSS& b1,
-             const OBBRSS& b2, const CollisionRequest& request,
-	     FCL_REAL& sqrDistLowerBound);
+inline bool overlap(const Matrix3f& R0, const Vec3f& T0, const OBBRSS& b1,
+                    const OBBRSS& b2, const CollisionRequest& request,
+	            FCL_REAL& sqrDistLowerBound)
+{
+  return overlap(R0, T0, b1.obb, b2.obb, request, sqrDistLowerBound);
+}
 
 /// @brief Computate distance between two OBBRSS, b1 is in configuation (R0, T0) and b2 is in indentity; P and Q, is not NULL, returns the nearest points
-FCL_REAL distance(const Matrix3f& R0, const Vec3f& T0, const OBBRSS& b1, const OBBRSS& b2, Vec3f* P = NULL, Vec3f* Q = NULL);
+inline FCL_REAL distance(const Matrix3f& R0, const Vec3f& T0, const OBBRSS& b1, const OBBRSS& b2, Vec3f* P = NULL, Vec3f* Q = NULL)
+{
+  return distance(R0, T0, b1.rss, b2.rss, P, Q);
+}
 
 }
 
diff --git a/include/hpp/fcl/BVH/BVH_model.h b/include/hpp/fcl/BVH/BVH_model.h
index 031942f1e15fdabef3d6979383d6b9bd4be98192..2a7be848e4d625c65c71e6270d153afcbcbb64e3 100644
--- a/include/hpp/fcl/BVH/BVH_model.h
+++ b/include/hpp/fcl/BVH/BVH_model.h
@@ -54,8 +54,7 @@ namespace fcl
 
 /// @brief A class describing the bounding hierarchy of a mesh model or a point cloud model (which is viewed as a degraded version of mesh)
 template<typename BV>
-class BVHModel : public CollisionGeometry,
-                 private boost::noncopyable
+class BVHModel : public CollisionGeometry
 {
 
 public:
@@ -132,12 +131,14 @@ public:
   /// @brief Access the bv giving the its index
   const BVNode<BV>& getBV(int id) const
   {
+    assert (id < num_bvs);
     return bvs[id];
   }
 
   /// @brief Access the bv giving the its index
   BVNode<BV>& getBV(int id)
   {
+    assert (id < num_bvs);
     return bvs[id];
   }
 
diff --git a/include/hpp/fcl/BVH/BV_fitter.h b/include/hpp/fcl/BVH/BV_fitter.h
index 8a1585343e57b2329512c1223073730737dae615..97a27f9da198a6d88890e6278224c31a32e6b693 100644
--- a/include/hpp/fcl/BVH/BV_fitter.h
+++ b/include/hpp/fcl/BVH/BV_fitter.h
@@ -41,6 +41,7 @@
 #include <hpp/fcl/BVH/BVH_internal.h>
 #include <hpp/fcl/BV/kIOS.h>
 #include <hpp/fcl/BV/OBBRSS.h>
+#include <hpp/fcl/BV/AABB.h>
 #include <iostream>
 
 namespace hpp
@@ -70,6 +71,8 @@ void fit<kIOS>(Vec3f* ps, int n, kIOS& bv);
 template<>
 void fit<OBBRSS>(Vec3f* ps, int n, OBBRSS& bv);
 
+template<>
+void fit<AABB>(Vec3f* ps, int n, AABB& bv);
 
 /// @brief Interface for fitting a bv given the triangles or points inside it.
 template<typename BV>
@@ -91,11 +94,11 @@ public:
 
 /// @brief The class for the default algorithm fitting a bounding volume to a set of points
 template<typename BV>
-class BVFitter : public BVFitterBase<BV>
+class BVFitterTpl : public BVFitterBase<BV>
 {
 public:
   /// @brief default deconstructor
-  virtual ~BVFitter() {}
+  virtual ~BVFitterTpl() {}
 
   /// @brief Prepare the geometry primitive data for fitting
   void set(Vec3f* vertices_, Triangle* tri_indices_, BVHModelType type_)
@@ -115,6 +118,28 @@ public:
     type = type_;
   }
 
+  /// @brief Clear the geometry primitive data
+  void clear()
+  {
+    vertices = NULL;
+    prev_vertices = NULL;
+    tri_indices = NULL;
+    type = BVH_MODEL_UNKNOWN;
+  }
+
+protected:
+
+  Vec3f* vertices;
+  Vec3f* prev_vertices;
+  Triangle* tri_indices;
+  BVHModelType type;
+};
+
+/// @brief The class for the default algorithm fitting a bounding volume to a set of points
+template<typename BV>
+class BVFitter : public BVFitterTpl<BV>
+{
+public:
   /// @brief Compute a bounding volume that fits a set of primitives (points or triangles).
   /// The primitive data was set by set function and primitive_indices is the primitive index relative to the data
   BV fit(unsigned int* primitive_indices, int num_primitives)
@@ -154,200 +179,61 @@ public:
     return bv;
   }
 
-  /// @brief Clear the geometry primitive data
-  void clear()
-  {
-    vertices = NULL;
-    prev_vertices = NULL;
-    tri_indices = NULL;
-    type = BVH_MODEL_UNKNOWN;
-  }
-
-private:
-
-  Vec3f* vertices;
-  Vec3f* prev_vertices;
-  Triangle* tri_indices;
-  BVHModelType type;
+protected:
+  using BVFitterTpl<BV>::vertices;
+  using BVFitterTpl<BV>::prev_vertices;
+  using BVFitterTpl<BV>::tri_indices;
+  using BVFitterTpl<BV>::type;
 };
 
-
 /// @brief Specification of BVFitter for OBB bounding volume
 template<>
-class BVFitter<OBB> : public BVFitterBase<OBB>
+class BVFitter<OBB> : public BVFitterTpl<OBB>
 {
 public:
-  /// @brief Prepare the geometry primitive data for fitting
-  void set(Vec3f* vertices_, Triangle* tri_indices_, BVHModelType type_)
-  {
-    vertices = vertices_;
-    prev_vertices = NULL;
-    tri_indices = tri_indices_;
-    type = type_;
-  }
-
-  /// @brief Prepare the geometry primitive data for fitting, for deformable mesh
-  void set(Vec3f* vertices_, Vec3f* prev_vertices_, Triangle* tri_indices_, BVHModelType type_)
-  {
-    vertices = vertices_;
-    prev_vertices = prev_vertices_;
-    tri_indices = tri_indices_;
-    type = type_;
-  }
-
   /// @brief Compute a bounding volume that fits a set of primitives (points or triangles).
   /// The primitive data was set by set function and primitive_indices is the primitive index relative to the data.
   OBB fit(unsigned int* primitive_indices, int num_primitives);
-
-  /// brief Clear the geometry primitive data
-  void clear()
-  {
-    vertices = NULL;
-    prev_vertices = NULL;
-    tri_indices = NULL;
-    type = BVH_MODEL_UNKNOWN;
-  }
-
-private:
-
-  Vec3f* vertices;
-  Vec3f* prev_vertices;
-  Triangle* tri_indices;
-  BVHModelType type;
 };
 
-
 /// @brief Specification of BVFitter for RSS bounding volume
 template<>
-class BVFitter<RSS> : public BVFitterBase<RSS>
+class BVFitter<RSS> : public BVFitterTpl<RSS>
 {
 public:
-  /// brief Prepare the geometry primitive data for fitting
-  void set(Vec3f* vertices_, Triangle* tri_indices_, BVHModelType type_)
-  {
-    vertices = vertices_;
-    prev_vertices = NULL;
-    tri_indices = tri_indices_;
-    type = type_;
-  }
-
-  /// @brief Prepare the geometry primitive data for fitting, for deformable mesh
-  void set(Vec3f* vertices_, Vec3f* prev_vertices_, Triangle* tri_indices_, BVHModelType type_)
-  {
-    vertices = vertices_;
-    prev_vertices = prev_vertices_;
-    tri_indices = tri_indices_;
-    type = type_;
-  }
-
   /// @brief Compute a bounding volume that fits a set of primitives (points or triangles).
   /// The primitive data was set by set function and primitive_indices is the primitive index relative to the data.
   RSS fit(unsigned int* primitive_indices, int num_primitives);
-
-  /// @brief Clear the geometry primitive data
-  void clear()
-  {
-    vertices = NULL;
-    prev_vertices = NULL;
-    tri_indices = NULL;
-    type = BVH_MODEL_UNKNOWN;
-  }
-
-private:
-
-  Vec3f* vertices;
-  Vec3f* prev_vertices;
-  Triangle* tri_indices;
-  BVHModelType type;
 };
 
-
 /// @brief Specification of BVFitter for kIOS bounding volume
 template<>
-class BVFitter<kIOS> : public BVFitterBase<kIOS>
+class BVFitter<kIOS> : public BVFitterTpl<kIOS>
 {
 public:
-  /// @brief Prepare the geometry primitive data for fitting
-  void set(Vec3f* vertices_, Triangle* tri_indices_, BVHModelType type_)
-  {
-    vertices = vertices_;
-    prev_vertices = NULL;
-    tri_indices = tri_indices_;
-    type = type_;
-  }
-
-  /// @brief Prepare the geometry primitive data for fitting
-  void set(Vec3f* vertices_, Vec3f* prev_vertices_, Triangle* tri_indices_, BVHModelType type_)
-  {
-    vertices = vertices_;
-    prev_vertices = prev_vertices_;
-    tri_indices = tri_indices_;
-    type = type_;
-  }
-
   /// @brief Compute a bounding volume that fits a set of primitives (points or triangles).
   /// The primitive data was set by set function and primitive_indices is the primitive index relative to the data.
   kIOS fit(unsigned int* primitive_indices, int num_primitives);
-
-  /// @brief Clear the geometry primitive data
-  void clear()
-  {
-    vertices = NULL;
-    prev_vertices = NULL;
-    tri_indices = NULL;
-    type = BVH_MODEL_UNKNOWN;
-  }
-
-private:
-  Vec3f* vertices;
-  Vec3f* prev_vertices;
-  Triangle* tri_indices;
-  BVHModelType type;
 };
 
-
 /// @brief Specification of BVFitter for OBBRSS bounding volume
 template<>
-class BVFitter<OBBRSS> : public BVFitterBase<OBBRSS>
+class BVFitter<OBBRSS> : public BVFitterTpl<OBBRSS>
 {
 public:
-  /// @brief Prepare the geometry primitive data for fitting
-  void set(Vec3f* vertices_, Triangle* tri_indices_, BVHModelType type_)
-  {
-    vertices = vertices_;
-    prev_vertices = NULL;
-    tri_indices = tri_indices_;
-    type = type_;
-  }
-
-  /// @brief Prepare the geometry primitive data for fitting
-  void set(Vec3f* vertices_, Vec3f* prev_vertices_, Triangle* tri_indices_, BVHModelType type_)
-  {
-    vertices = vertices_;
-    prev_vertices = prev_vertices_;
-    tri_indices = tri_indices_;
-    type = type_;
-  }
-
   /// @brief Compute a bounding volume that fits a set of primitives (points or triangles).
   /// The primitive data was set by set function and primitive_indices is the primitive index relative to the data.
   OBBRSS fit(unsigned int* primitive_indices, int num_primitives);
+};
 
-  /// @brief Clear the geometry primitive data
-  void clear()
-  {
-    vertices = NULL;
-    prev_vertices = NULL;
-    tri_indices = NULL;
-    type = BVH_MODEL_UNKNOWN;
-  }
-
-private:
-
-  Vec3f* vertices;
-  Vec3f* prev_vertices;
-  Triangle* tri_indices;
-  BVHModelType type;
+/// @brief Specification of BVFitter for AABB bounding volume
+template<>
+class BVFitter<AABB> : public BVFitterTpl<AABB>
+{
+public:
+  /// @brief Compute a bounding volume that fits a set of primitives (points or triangles).
+  /// The primitive data was set by set function and primitive_indices is the primitive index relative to the data.
+  AABB fit(unsigned int* primitive_indices, int num_primitives);
 };
 
 }
diff --git a/include/hpp/fcl/collision_data.h b/include/hpp/fcl/collision_data.h
index 64dd24ff505fabbfc34271b1f56cfa6c99cd2087..ded180506448b294d203d18664626df094d95b79 100644
--- a/include/hpp/fcl/collision_data.h
+++ b/include/hpp/fcl/collision_data.h
@@ -259,7 +259,7 @@ public:
   }
 
   /// @brief get all the contacts
-  void getContacts(std::vector<Contact>& contacts_)
+  void getContacts(std::vector<Contact>& contacts_) const
   {
     contacts_.resize(contacts.size());
     std::copy(contacts.begin(), contacts.end(), contacts_.begin());
diff --git a/include/hpp/fcl/fwd.hh b/include/hpp/fcl/fwd.hh
index 45fc60eceb5bc2e808476d4e72b2843cb045db57..7c5a301667330a84ee1eb7c9c195e2a57ee65b37 100644
--- a/include/hpp/fcl/fwd.hh
+++ b/include/hpp/fcl/fwd.hh
@@ -37,6 +37,8 @@
 #ifndef HPP_FCL_FWD_HH
 #define HPP_FCL_FWD_HH
 
+#include <boost/shared_ptr.hpp>
+
 namespace hpp {
 namespace fcl {
   class CollisionObject;
diff --git a/include/hpp/fcl/mesh_loader/assimp.h b/include/hpp/fcl/mesh_loader/assimp.h
index 3abcf0aecd0426183862a6f1855649ca9d7c85bc..758a3b8a39c40697b00b7d51bb84eeb131698a58 100644
--- a/include/hpp/fcl/mesh_loader/assimp.h
+++ b/include/hpp/fcl/mesh_loader/assimp.h
@@ -37,36 +37,34 @@
 #ifndef HPP_FCL_MESH_LOADER_ASSIMP_H
 #define HPP_FCL_MESH_LOADER_ASSIMP_H
 
-#ifdef HPP_FCL_USE_ASSIMP_UNIFIED_HEADER_NAMES
-  #include <assimp/DefaultLogger.hpp>
-  #include <assimp/IOStream.hpp>
-  #include <assimp/IOSystem.hpp>
-  #include <assimp/scene.h>
-  #include <assimp/Importer.hpp>
-  #include <assimp/postprocess.h>
-#else
-  #include <assimp/DefaultLogger.h>
-  #include <assimp/assimp.hpp>
-  #include <assimp/IOStream.h>
-  #include <assimp/IOSystem.h>
-  #include <assimp/aiScene.h>
-  #include <assimp/aiPostProcess.h>
-#endif
-
 #include <hpp/fcl/BV/OBBRSS.h>
 #include <hpp/fcl/BVH/BVH_model.h>
 
+class aiScene;
+
 namespace hpp
 {
 namespace fcl
 {
 
+namespace internal
+{
+
 struct TriangleAndVertices
 {
   std::vector <fcl::Vec3f> vertices_;
   std::vector <fcl::Triangle> triangles_;
 };
 
+struct Loader {
+  Loader ();
+  ~Loader ();
+
+  void load (const std::string& resource_path);
+
+  aiScene* scene;
+};
+
 /**
  * @brief      Recursive procedure for building a mesh
  *
@@ -76,31 +74,26 @@ struct TriangleAndVertices
  * @param[in]  vertices_offset Current number of vertices in the model
  * @param      tv              Triangles and Vertices of the mesh submodels
  */
-unsigned buildMesh (const fcl::Vec3f & scale,
-                    const aiScene* scene,
-                    const aiNode* node,
-                    unsigned vertices_offset,
-                    TriangleAndVertices & tv);
+void buildMesh (const fcl::Vec3f & scale,
+                const aiScene* scene,
+                unsigned vertices_offset,
+                TriangleAndVertices & tv);
 
 /**
  * @brief      Convert an assimp scene to a mesh
  *
- * @param[in]  name   File (ressource) transformed into an assimp scene in loa
  * @param[in]  scale  Scale to apply when reading the ressource
  * @param[in]  scene  Pointer to the assimp scene
  * @param[out] mesh  The mesh that must be built
  */
 template<class BoundingVolume>   
-inline void meshFromAssimpScene(const std::string & name,
+inline void meshFromAssimpScene(
                          const fcl::Vec3f & scale,
                          const aiScene* scene,
                          const boost::shared_ptr < BVHModel<BoundingVolume> > & mesh)
 {
   TriangleAndVertices tv;
   
-  if (!scene->HasMeshes())
-    throw std::invalid_argument (std::string ("No meshes found in file ")+name);
-  
   int res = mesh->beginModel ();
   
   if (res != fcl::BVH_OK)
@@ -110,13 +103,14 @@ inline void meshFromAssimpScene(const std::string & name,
     throw std::runtime_error (error.str ());
   }
     
-  buildMesh (scale, scene, scene->mRootNode, 
-      (unsigned) mesh->num_vertices, tv);
+  buildMesh (scale, scene, (unsigned) mesh->num_vertices, tv);
   mesh->addSubModel (tv.vertices_, tv.triangles_);
     
   mesh->endModel ();
 }
 
+} // namespace internal
+
 /**
  * @brief      Read a mesh file and convert it to a polyhedral mesh
  *
@@ -129,46 +123,13 @@ inline void loadPolyhedronFromResource (const std::string & resource_path,
                                  const fcl::Vec3f & scale,
                                  const boost::shared_ptr < BVHModel<BoundingVolume> > & polyhedron)
 {
-  Assimp::Importer importer;
-  // set list of ignored parameters (parameters used for rendering)
-  importer.SetPropertyInteger(AI_CONFIG_PP_RVC_FLAGS,
-      aiComponent_TANGENTS_AND_BITANGENTS|
-      aiComponent_COLORS |
-      aiComponent_BONEWEIGHTS |
-      aiComponent_ANIMATIONS |
-      aiComponent_LIGHTS |
-      aiComponent_CAMERAS|
-      aiComponent_TEXTURES |
-      aiComponent_TEXCOORDS |
-      aiComponent_MATERIALS |
-      aiComponent_NORMALS
-      );
-
-  const aiScene* scene = importer.ReadFile(resource_path.c_str(),
-      aiProcess_SortByPType |
-      aiProcess_Triangulate |
-      aiProcess_RemoveComponent |
-      aiProcess_ImproveCacheLocality |
-      // TODO: I (Joseph Mirabel) have no idea whether degenerated triangles are
-      // properly handled. Enabling aiProcess_FindDegenerates would throw an
-      // exception when that happens. Is it too conservative ?
-      // aiProcess_FindDegenerates |
-      aiProcess_JoinIdenticalVertices
-      );
-
-  if (!scene)
-  {
-    const std::string exception_message (std::string ("Could not load resource ") + resource_path + std::string("\n") +
-                                         importer.GetErrorString () + std::string("\n") +
-                                         "Hint: the mesh directory may be wrong.");
-    throw std::invalid_argument(exception_message);
-  }
-  
-  meshFromAssimpScene (resource_path, scale, scene, polyhedron);
-}
+  internal::Loader scene;
+  scene.load (resource_path);
 
+  internal::meshFromAssimpScene (scale, scene.scene, polyhedron);
 }
 
+} // namespace fcl
 } // namespace hpp
 
-#endif // FCL_MESH_LOADER_ASSIMP_H
+#endif // HPP_FCL_MESH_LOADER_ASSIMP_H
diff --git a/include/hpp/fcl/narrowphase/gjk.h b/include/hpp/fcl/narrowphase/gjk.h
index 4748e3e087efa12a8238d0df311eec54ec7b544c..46ecc26eab558cef5b35fe4ef43e709c8e956338 100644
--- a/include/hpp/fcl/narrowphase/gjk.h
+++ b/include/hpp/fcl/narrowphase/gjk.h
@@ -50,53 +50,61 @@ namespace details
 {
 
 /// @brief the support function for shape
-Vec3f getSupport(const ShapeBase* shape, const Vec3f& dir); 
+Vec3f getSupport(const ShapeBase* shape, const Vec3f& dir, bool dirIsNormalized); 
 
 /// @brief Minkowski difference class of two shapes
 ///
+/// \todo template this by the two shapes. The triangle / triangle case can be
+///       easily optimized computing once the triangle shapes[1] into frame0
+///
 /// \note The Minkowski difference is expressed in the frame of the first shape.
 struct MinkowskiDiff
 {
   /// @brief points to two shapes
   const ShapeBase* shapes[2];
 
-  /// @brief rotation from shape0 to shape1
-  Matrix3f toshape1;
+  /// @brief rotation from shape1 to shape0
+  /// such that \f$ p_in_0 = oR1 * p_in_1 + ot1 \f$.
+  Matrix3f oR1;
+
+  /// @brief translation from shape1 to shape0
+  /// such that \f$ p_in_0 = oR1 * p_in_1 + ot1 \f$.
+  Vec3f ot1;
+
+  typedef void (*GetSupportFunction) (const MinkowskiDiff& minkowskiDiff,
+      const Vec3f& dir, bool dirIsNormalized, Vec3f& support0, Vec3f& support1);
+  GetSupportFunction getSupportFunc;
+
+  MinkowskiDiff() : getSupportFunc (NULL) {}
 
-  /// @brief transform from shape1 to shape0 
-  Transform3f toshape0;
+  /// Set the two shapes,
+  /// assuming the relative transformation between them is identity.
+  void set (const ShapeBase* shape0, const ShapeBase* shape1);
 
-  MinkowskiDiff() { }
+  /// Set the two shapes, with a relative transformation.
+  void set (const ShapeBase* shape0, const ShapeBase* shape1,
+      const Transform3f& tf0, const Transform3f& tf1);
 
   /// @brief support function for shape0
-  inline Vec3f support0(const Vec3f& d) const
+  inline Vec3f support0(const Vec3f& d, bool dIsNormalized) const
   {
-    return getSupport(shapes[0], d);
+    return getSupport(shapes[0], d, dIsNormalized);
   }
-  
+
   /// @brief support function for shape1
-  inline Vec3f support1(const Vec3f& d) const
+  inline Vec3f support1(const Vec3f& d, bool dIsNormalized) const
   {
-    return toshape0.transform(getSupport(shapes[1], toshape1 * d));
+    return oR1 * getSupport(shapes[1], oR1.transpose() * d, dIsNormalized) + ot1;
   }
 
   /// @brief support function for the pair of shapes
-  inline Vec3f support(const Vec3f& d) const
-  {
-    return support0(d) - support1(-d);
-  }
-
-  /// @brief support function for the d-th shape (d = 0 or 1)
-  inline Vec3f support(const Vec3f& d, size_t index) const
+  inline void support(const Vec3f& d, bool dIsNormalized, Vec3f& supp0, Vec3f& supp1) const
   {
-    if(index)
-      return support1(d);
-    else
-      return support0(d);
+    assert(getSupportFunc != NULL);
+    getSupportFunc(*this, d, dIsNormalized, supp0, supp1);
   }
 };
 
-
 /// @brief class for GJK algorithm
 ///
 /// \note The computations are performed in the frame of the first shape.
@@ -104,29 +112,25 @@ struct GJK
 {
   struct SimplexV
   {
-    /// @brief support direction
-    Vec3f d; 
-    /// @brieg support vector (i.e., the furthest point on the shape along the support direction)
+    /// @brief support vector for shape 0 and 1.
+    Vec3f w0, w1; 
+    /// @brief support vector (i.e., the furthest point on the shape along the support direction)
     Vec3f w;
-
-    SimplexV () : d(Vec3f::Zero()), w(Vec3f::Zero()) {}
   };
 
   struct Simplex
   {
     /// @brief simplex vertex
     SimplexV* vertex[4];
-    /// @brief weight 
-    FCL_REAL coefficient[4];
     /// @brief size of simplex (number of vertices)
-    size_t rank;
+    short rank;
 
-    Simplex() : rank(0) {}
+    Simplex() {}
   };
 
   enum Status {Valid, Inside, Failed};
 
-  MinkowskiDiff shape;
+  MinkowskiDiff const* shape;
   Vec3f ray;
   FCL_REAL distance;
   Simplex simplices[2];
@@ -141,16 +145,14 @@ struct GJK
   void initialize();
 
   /// @brief GJK algorithm, given the initial value guess
-  Status evaluate(const MinkowskiDiff& shape_, const Vec3f& guess);
+  Status evaluate(const MinkowskiDiff& shape, const Vec3f& guess);
 
   /// @brief apply the support function along a direction, the result is return in sv
-  void getSupport(const Vec3f& d, SimplexV& sv) const;
-
-  /// @brief discard one vertex from the simplex
-  void removeVertex(Simplex& simplex);
-
-  /// @brief append one vertex to the simplex
-  void appendVertex(Simplex& simplex, const Vec3f& v);
+  inline void getSupport(const Vec3f& d, bool dIsNormalized, SimplexV& sv) const
+  {
+    shape->support(d, dIsNormalized, sv.w0, sv.w1);
+    sv.w.noalias() = sv.w0 - sv.w1;
+  }
 
   /// @brief whether the simplex enclose the origin
   bool encloseOrigin();
@@ -161,20 +163,48 @@ struct GJK
     return simplex;
   }
 
+  /// Get the closest points on each object.
+  /// \return true on success
+  static bool getClosestPoints (const Simplex& simplex, Vec3f& w0, Vec3f& w1);
+
   /// @brief get the guess from current simplex
   Vec3f getGuessFromSimplex() const;
 
+  /// @brief Distance threshold for early break.
+  /// GJK stops when it proved the distance is more than this threshold.
+  /// \note The closest points will be erroneous in this case.
+  ///       If you want the closest points, set this to infinity (the default).
+  void setDistanceEarlyBreak (const FCL_REAL& dup)
+  {
+    distance_upper_bound = dup;
+  }
+
 private:
   SimplexV store_v[4];
   SimplexV* free_v[4];
-  size_t nfree;
-  size_t current;
+  short nfree;
+  short current;
   Simplex* simplex;
   Status status;
 
   unsigned int max_iterations;
   FCL_REAL tolerance;
+  FCL_REAL distance_upper_bound;
+
+  /// @brief discard one vertex from the simplex
+  inline void removeVertex(Simplex& simplex);
+
+  /// @brief append one vertex to the simplex
+  inline void appendVertex(Simplex& simplex, const Vec3f& v, bool isNormalized = false);
+
+  /// @brief Project origin (0) onto line a-b
+  bool projectLineOrigin(const Simplex& current, Simplex& next);
+
+  /// @brief Project origin (0) onto triangle a-b-c
+  bool projectTriangleOrigin(const Simplex& current, Simplex& next);
 
+  /// @brief Project origin (0) onto tetrahedran a-b-c-d
+  bool projectTetrahedraOrigin(const Simplex& current, Simplex& next);
 };
 
 
diff --git a/include/hpp/fcl/narrowphase/narrowphase.h b/include/hpp/fcl/narrowphase/narrowphase.h
index 6ecb77acbec15acd4e63dfc516066c27f74151ee..cea51139e33588431f71dbccf3e8c6c69fd329d6 100644
--- a/include/hpp/fcl/narrowphase/narrowphase.h
+++ b/include/hpp/fcl/narrowphase/narrowphase.h
@@ -62,10 +62,7 @@ namespace fcl
       if(enable_cached_guess) guess = cached_guess;
     
       details::MinkowskiDiff shape;
-      shape.shapes[0] = &s1;
-      shape.shapes[1] = &s2;
-      shape.toshape1 = tf2.getRotation().transpose() * tf1.getRotation();
-      shape.toshape0 = tf1.inverseTimes(tf2);
+      shape.set (&s1, &s2, tf1, tf2);
   
       details::GJK gjk((unsigned int )gjk_max_iterations, gjk_tolerance);
       details::GJK::Status gjk_status = gjk.evaluate(shape, -guess);
@@ -79,12 +76,8 @@ namespace fcl
             details::EPA::Status epa_status = epa.evaluate(gjk, -guess);
             if(epa_status != details::EPA::Failed)
               {
-                Vec3f w0 (Vec3f::Zero());
-                for(size_t i = 0; i < epa.result.rank; ++i)
-                  {
-                    w0 += shape.support(epa.result.vertex[i]->d, 0) *
-                      epa.result.coefficient[i];
-                  }
+                Vec3f w0, w1;
+                details::GJK::getClosestPoints (epa.result, w0, w1);
                 if(penetration_depth) *penetration_depth = -epa.depth;
                 if(normal) *normal = tf2.getRotation() * epa.normal;
                 if(contact_points) *contact_points = tf1.transform(w0 - epa.normal*(epa.depth *0.5));
@@ -109,16 +102,18 @@ namespace fcl
      Vec3f& p1, Vec3f& p2, Vec3f& normal) const
     {
       bool col;
-      TriangleP tri(P1, P2, P3);
+      // Express everything in frame 1
+      const Transform3f tf_1M2 (tf1.inverseTimes(tf2));
+      TriangleP tri(
+          tf_1M2.transform (P1),
+          tf_1M2.transform (P2),
+          tf_1M2.transform (P3));
 
       Vec3f guess(1, 0, 0);
       if(enable_cached_guess) guess = cached_guess;
 
       details::MinkowskiDiff shape;
-      shape.shapes[0] = &s;
-      shape.shapes[1] = &tri;
-      shape.toshape1 = tf2.getRotation().transpose() * tf1.getRotation();
-      shape.toshape0 = tf1.inverseTimes(tf2);
+      shape.set (&s, &tri);
   
       details::GJK gjk((unsigned int )gjk_max_iterations, gjk_tolerance);
       details::GJK::Status gjk_status = gjk.evaluate(shape, -guess);
@@ -132,12 +127,8 @@ namespace fcl
             details::EPA epa(epa_max_face_num, epa_max_vertex_num, epa_max_iterations, epa_tolerance);
             details::EPA::Status epa_status = epa.evaluate(gjk, -guess);
             assert (epa_status != details::EPA::Failed); (void) epa_status;
-            Vec3f w0 (Vec3f::Zero());
-            for(size_t i = 0; i < epa.result.rank; ++i)
-              {
-                w0 += shape.support(epa.result.vertex[i]->d, 0) *
-                  epa.result.coefficient[i];
-              }
+            Vec3f w0, w1;
+            details::GJK::getClosestPoints (epa.result, w0, w1);
             distance = -epa.depth;
             normal = -epa.normal;
             p1 = p2 = tf1.transform(w0 - epa.normal*(epa.depth *0.5));
@@ -148,16 +139,15 @@ namespace fcl
         case details::GJK::Failed:
           {
             col = false;
-            Vec3f w0 (Vec3f::Zero()), w1 (Vec3f::Zero());
-            for(size_t i = 0; i < gjk.getSimplex()->rank; ++i)
-              {
-                FCL_REAL p = gjk.getSimplex()->coefficient[i];
-                w0 += shape.support(gjk.getSimplex()->vertex[i]->d, 0) * p;
-                w1 += shape.support(-gjk.getSimplex()->vertex[i]->d, 1) * p;
-              }
-            distance = (w0 - w1).norm();
-            p1 = tf1.transform (w0);
-            p2 = tf1.transform (w1);
+
+            details::GJK::getClosestPoints (*gjk.getSimplex(), p1, p2);
+            // TODO On degenerated case, the closest point may be wrong
+            // (i.e. an object face normal is colinear to gjk.ray
+            // assert (distance == (w0 - w1).norm());
+            distance = gjk.distance;
+
+            p1 = tf1.transform (p1);
+            p2 = tf1.transform (p2);
             assert (distance > 0);
           }
           break;
@@ -183,10 +173,7 @@ namespace fcl
       if(enable_cached_guess) guess = cached_guess;
 
       details::MinkowskiDiff shape;
-      shape.shapes[0] = &s1;
-      shape.shapes[1] = &s2;
-      shape.toshape1 = tf2.getRotation().transpose() * tf1.getRotation();
-      shape.toshape0 = tf1.inverseTimes(tf2);
+      shape.set (&s1, &s2, tf1, tf2);
 
       details::GJK gjk((unsigned int) gjk_max_iterations, gjk_tolerance);
       details::GJK::Status gjk_status = gjk.evaluate(shape, -guess);
@@ -196,13 +183,8 @@ namespace fcl
       {
         // TODO: understand why GJK fails between cylinder and box
         assert (distance * distance < sqrt (eps));
-        Vec3f w0 (Vec3f::Zero()), w1 (Vec3f::Zero());
-        for(size_t i = 0; i < gjk.getSimplex()->rank; ++i)
-        {
-          FCL_REAL p = gjk.getSimplex()->coefficient[i];
-          w0 += shape.support(gjk.getSimplex()->vertex[i]->d, 0) * p;
-          w1 += shape.support(-gjk.getSimplex()->vertex[i]->d, 1) * p;
-        }
+        Vec3f w0, w1;
+        details::GJK::getClosestPoints (*gjk.getSimplex(), w0, w1);
         distance = 0;
         p1 = p2 = tf1.transform (.5* (w0 + w1));
         normal = Vec3f (0,0,0);
@@ -210,18 +192,14 @@ namespace fcl
       }
       else if(gjk_status == details::GJK::Valid)
         {
-          Vec3f w0 (Vec3f::Zero()), w1 (Vec3f::Zero());
-          for(size_t i = 0; i < gjk.getSimplex()->rank; ++i)
-            {
-              FCL_REAL p = gjk.getSimplex()->coefficient[i];
-              w0 += shape.support(gjk.getSimplex()->vertex[i]->d, 0) * p;
-              w1 += shape.support(-gjk.getSimplex()->vertex[i]->d, 1) * p;
-            }
-
-          distance = (w0 - w1).norm();
-
-          p1 = tf1.transform (w0);
-          p2 = tf1.transform (w1);
+          details::GJK::getClosestPoints (*gjk.getSimplex(), p1, p2);
+          // TODO On degenerated case, the closest point may be wrong
+          // (i.e. an object face normal is colinear to gjk.ray
+          // assert (distance == (w0 - w1).norm());
+          distance = gjk.distance;
+
+          p1 = tf1.transform (p1);
+          p2 = tf1.transform (p2);
           return true;
         }
       else
@@ -234,12 +212,8 @@ namespace fcl
               details::EPA::Status epa_status = epa.evaluate(gjk, -guess);
               if(epa_status != details::EPA::Failed)
                 {
-                  Vec3f w0 (Vec3f::Zero());
-                  for(size_t i = 0; i < epa.result.rank; ++i)
-                    {
-                      w0 += shape.support(epa.result.vertex[i]->d, 0) *
-                        epa.result.coefficient[i];
-                    }
+                  Vec3f w0, w1;
+                  details::GJK::getClosestPoints (epa.result, w0, w1);
                   assert (epa.depth >= -eps);
                   distance = std::min (0., -epa.depth);
                   normal = tf2.getRotation() * epa.normal;
@@ -248,17 +222,11 @@ namespace fcl
             }
           else
             {
-              Vec3f w0 (Vec3f::Zero()), w1 (Vec3f::Zero());
-              for(size_t i = 0; i < gjk.getSimplex()->rank; ++i)
-                {
-                  FCL_REAL p = gjk.getSimplex()->coefficient[i];
-                  w0 += shape.support(gjk.getSimplex()->vertex[i]->d, 0) * p;
-                  w1 += shape.support(-gjk.getSimplex()->vertex[i]->d, 1) * p;
-                }
+              details::GJK::getClosestPoints (*gjk.getSimplex(), p1, p2);
               distance = 0;
 
-              p1 = tf1.transform (w0);
-              p2 = tf1.transform (w1);
+              p1 = tf1.transform (p1);
+              p2 = tf1.transform (p2);
             }
           return false;
         }
diff --git a/include/hpp/fcl/shape/geometric_shape_to_BVH_model.h b/include/hpp/fcl/shape/geometric_shape_to_BVH_model.h
index f50309a01012cdbf2c9bf2158c7e2f917ead3549..03172e7dd7c1da10428f699c186f148061d240cb 100644
--- a/include/hpp/fcl/shape/geometric_shape_to_BVH_model.h
+++ b/include/hpp/fcl/shape/geometric_shape_to_BVH_model.h
@@ -52,30 +52,30 @@ namespace fcl
 template<typename BV>
 void generateBVHModel(BVHModel<BV>& model, const Box& shape, const Transform3f& pose)
 {
-  double a = shape.side[0];
-  double b = shape.side[1];
-  double c = shape.side[2];
-  std::vector<Vec3f> points(8);
-  std::vector<Triangle> tri_indices(12);
-  points[0] << 0.5 * a, -0.5 * b, 0.5 * c;
-  points[1] << 0.5 * a, 0.5 * b, 0.5 * c;
-  points[2] << -0.5 * a, 0.5 * b, 0.5 * c;
-  points[3] << -0.5 * a, -0.5 * b, 0.5 * c;
-  points[4] << 0.5 * a, -0.5 * b, -0.5 * c;
-  points[5] << 0.5 * a, 0.5 * b, -0.5 * c;
-  points[6] << -0.5 * a, 0.5 * b, -0.5 * c;
-  points[7] << -0.5 * a, -0.5 * b, -0.5 * c;
-
-  tri_indices[0].set(0, 4, 1);
-  tri_indices[1].set(1, 4, 5);
-  tri_indices[2].set(2, 6, 3);
-  tri_indices[3].set(3, 6, 7);
-  tri_indices[4].set(3, 0, 2);
-  tri_indices[5].set(2, 0, 1);
-  tri_indices[6].set(6, 5, 7);
-  tri_indices[7].set(7, 5, 4);
-  tri_indices[8].set(1, 5, 2);
-  tri_indices[9].set(2, 5, 6);
+  double a = shape.halfSide[0];
+  double b = shape.halfSide[1];
+  double c = shape.halfSide[2];
+  Vec3f points[8];
+  Triangle tri_indices[12];
+  points[0] = Vec3f ( a, -b,  c);
+  points[1] = Vec3f ( a,  b,  c);
+  points[2] = Vec3f (-a,  b,  c);
+  points[3] = Vec3f (-a, -b,  c);
+  points[4] = Vec3f ( a, -b, -c);
+  points[5] = Vec3f ( a,  b, -c);
+  points[6] = Vec3f (-a,  b, -c);
+  points[7] = Vec3f (-a, -b, -c);
+
+  tri_indices[ 0].set(0, 4, 1);
+  tri_indices[ 1].set(1, 4, 5);
+  tri_indices[ 2].set(2, 6, 3);
+  tri_indices[ 3].set(3, 6, 7);
+  tri_indices[ 4].set(3, 0, 2);
+  tri_indices[ 5].set(2, 0, 1);
+  tri_indices[ 6].set(6, 5, 7);
+  tri_indices[ 7].set(7, 5, 4);
+  tri_indices[ 8].set(1, 5, 2);
+  tri_indices[ 9].set(2, 5, 6);
   tri_indices[10].set(3, 7, 0);
   tri_indices[11].set(0, 7, 4);
 
diff --git a/include/hpp/fcl/shape/geometric_shapes.h b/include/hpp/fcl/shape/geometric_shapes.h
index 586fa82645b3a74d4f88960b7d1a8048b5b5639a..7d00796f5b3b2861c9cb6f07c099cf2de0cb979d 100644
--- a/include/hpp/fcl/shape/geometric_shapes.h
+++ b/include/hpp/fcl/shape/geometric_shapes.h
@@ -56,6 +56,8 @@ class ShapeBase : public CollisionGeometry
 public:
   ShapeBase() {}
 
+  virtual ~ShapeBase () {};
+
   /// @brief Get object type: a geometric shape
   OBJECT_TYPE getObjectType() const { return OT_GEOM; }
 };
@@ -80,18 +82,18 @@ public:
 class Box : public ShapeBase
 {
 public:
-  Box(FCL_REAL x, FCL_REAL y, FCL_REAL z) : ShapeBase(), side(x, y, z)
+  Box(FCL_REAL x, FCL_REAL y, FCL_REAL z) : ShapeBase(), halfSide(x/2, y/2, z/2)
   {
   }
 
-  Box(const Vec3f& side_) : ShapeBase(), side(side_) 
+  Box(const Vec3f& side_) : ShapeBase(), halfSide(side_/2) 
   {
   }
 
   Box() {}
 
-  /// @brief box side length
-  Vec3f side;
+  /// @brief box side half-length
+  Vec3f halfSide;
 
   /// @brief Compute AABB
   void computeLocalAABB();
@@ -101,18 +103,14 @@ public:
 
   FCL_REAL computeVolume() const
   {
-    return side[0] * side[1] * side[2];
+    return 8*halfSide.prod();
   }
 
   Matrix3f computeMomentofInertia() const
   {
     FCL_REAL V = computeVolume();
-    FCL_REAL a2 = side[0] * side[0] * V;
-    FCL_REAL b2 = side[1] * side[1] * V;
-    FCL_REAL c2 = side[2] * side[2] * V;
-    return (Matrix3f() << (b2 + c2) / 12, 0, 0,
-                          0, (a2 + c2) / 12, 0,
-                          0, 0, (a2 + b2) / 12).finished();
+    Vec3f s (halfSide.cwiseAbs2() * V);
+    return (Vec3f (s[1] + s[2], s[0] + s[2], s[0] + s[1]) / 3).asDiagonal();
   }
 };
 
diff --git a/include/hpp/fcl/traversal/traversal_node_setup.h b/include/hpp/fcl/traversal/traversal_node_setup.h
index 041b3fa68731525070797d48661ee7cde48d35b5..0912c231d15a567b242911d9e8ee60780ff5fc87 100644
--- a/include/hpp/fcl/traversal/traversal_node_setup.h
+++ b/include/hpp/fcl/traversal/traversal_node_setup.h
@@ -548,7 +548,7 @@ bool initialize(ShapeMeshCollisionTraversalNodeOBBRSS<S, NarrowPhaseSolver>& nod
 
 /// @brief Initialize traversal node for collision between two meshes, given the current transforms
 template<typename BV>
-bool initialize(MeshCollisionTraversalNode<BV>& node,
+bool initialize(MeshCollisionTraversalNode<BV, RelativeTransformationIsIdentity>& node,
                 BVHModel<BV>& model1, Transform3f& tf1,
                 BVHModel<BV>& model2, Transform3f& tf2,
                 CollisionResult& result,
@@ -607,31 +607,33 @@ bool initialize(MeshCollisionTraversalNode<BV>& node,
   return true;
 }
 
+template<typename BV>
+bool initialize(MeshCollisionTraversalNode<BV, 0>& node,
+                const BVHModel<BV>& model1, const Transform3f& tf1,
+                const BVHModel<BV>& model2, const Transform3f& tf2,
+                CollisionResult& result)
+{
+  if(model1.getModelType() != BVH_MODEL_TRIANGLES || model2.getModelType() != BVH_MODEL_TRIANGLES)
+    return false;
 
-/// @brief Initialize traversal node for collision between two meshes, specialized for OBB type
-bool initialize(MeshCollisionTraversalNodeOBB& node,
-                const BVHModel<OBB>& model1, const Transform3f& tf1,
-                const BVHModel<OBB>& model2, const Transform3f& tf2,
-                CollisionResult& result);
+  node.vertices1 = model1.vertices;
+  node.vertices2 = model2.vertices;
 
-/// @brief Initialize traversal node for collision between two meshes, specialized for RSS type
-bool initialize(MeshCollisionTraversalNodeRSS& node,
-                const BVHModel<RSS>& model1, const Transform3f& tf1,
-                const BVHModel<RSS>& model2, const Transform3f& tf2,
-                CollisionResult& result);
+  node.tri_indices1 = model1.tri_indices;
+  node.tri_indices2 = model2.tri_indices;
 
-/// @brief Initialize traversal node for collision between two meshes, specialized for OBBRSS type
-bool initialize(MeshCollisionTraversalNodeOBBRSS& node,
-                const BVHModel<OBBRSS>& model1, const Transform3f& tf1,
-                const BVHModel<OBBRSS>& model2, const Transform3f& tf2,
-                CollisionResult& result);
+  node.model1 = &model1;
+  node.tf1 = tf1;
+  node.model2 = &model2;
+  node.tf2 = tf2;
 
-/// @brief Initialize traversal node for collision between two meshes, specialized for kIOS type
-bool initialize(MeshCollisionTraversalNodekIOS& node,
-                const BVHModel<kIOS>& model1, const Transform3f& tf1,
-                const BVHModel<kIOS>& model2, const Transform3f& tf2,
-                CollisionResult& result);
+  node.result = &result;
 
+  node.RT.R = tf1.getRotation().transpose() * tf2.getRotation();
+  node.RT.T = tf1.getRotation().transpose() * (tf2.getTranslation() - tf1.getTranslation());
+
+  return true;
+}
 
 /// @brief Initialize traversal node for distance between two geometric shapes
 template<typename S1, typename S2, typename NarrowPhaseSolver>
diff --git a/include/hpp/fcl/traversal/traversal_recurse.h b/include/hpp/fcl/traversal/traversal_recurse.h
index f50bd75e2bbb05a5b214f6edefd50e614556d8ea..b93fd55af9da9cc6b2f35bce84de79d8c84424a1 100644
--- a/include/hpp/fcl/traversal/traversal_recurse.h
+++ b/include/hpp/fcl/traversal/traversal_recurse.h
@@ -56,11 +56,8 @@ namespace fcl
 void collisionRecurse(CollisionTraversalNodeBase* node, int b1, int b2,
 		      BVHFrontList* front_list, FCL_REAL& sqrDistLowerBound);
 
-/// @brief Recurse function for collision, specialized for OBB type
-void collisionRecurse(MeshCollisionTraversalNodeOBB* node, int b1, int b2, const Matrix3f& R, const Vec3f& T, BVHFrontList* front_list);
-
-/// @brief Recurse function for collision, specialized for RSS type
-void collisionRecurse(MeshCollisionTraversalNodeRSS* node, int b1, int b2, const Matrix3f& R, const Vec3f& T, BVHFrontList* front_list);
+void collisionNonRecurse(CollisionTraversalNodeBase* node,
+		         BVHFrontList* front_list, FCL_REAL& sqrDistLowerBound);
 
 /// @brief Recurse function for distance
 void distanceRecurse(DistanceTraversalNodeBase* node, int b1, int b2, BVHFrontList* front_list);
diff --git a/src/BV/AABB.cpp b/src/BV/AABB.cpp
index 782e0bc16b778e3a31ad3b92acdd95a1671679a6..e9829aae6756b48c1f554579b8b68dcc37312f44 100644
--- a/src/BV/AABB.cpp
+++ b/src/BV/AABB.cpp
@@ -38,7 +38,7 @@
 #include <hpp/fcl/BV/AABB.h>
 
 #include <limits>
-#include <iostream>
+#include <hpp/fcl/collision_data.h>
 
 namespace hpp
 {
@@ -50,6 +50,21 @@ AABB::AABB() : min_(Vec3f::Constant(std::numeric_limits<FCL_REAL>::max())),
 {
 }
 
+bool AABB::overlap(const AABB& other, const CollisionRequest& request,
+                    FCL_REAL& sqrDistLowerBound) const
+{
+  const FCL_REAL breakDistance (request.break_distance + request.security_margin);
+  const FCL_REAL breakDistance2 = breakDistance * breakDistance;
+
+  sqrDistLowerBound = (min_ - other.max_).array().max(0).matrix().squaredNorm();
+  if(sqrDistLowerBound > breakDistance2) return false;
+
+  sqrDistLowerBound = (other.min_ - max_).array().max(0).matrix().squaredNorm();
+  if(sqrDistLowerBound > breakDistance2) return false;
+
+  return true;
+}
+
 FCL_REAL AABB::distance(const AABB& other, Vec3f* P, Vec3f* Q) const
 {
   FCL_REAL result = 0;
diff --git a/src/BV/OBB.cpp b/src/BV/OBB.cpp
index 9c32f4297f7e655366e9d1cfbb5c949ff0ba3845..1106c965d68b9e89559aa80a3cdbc39e99878530 100644
--- a/src/BV/OBB.cpp
+++ b/src/BV/OBB.cpp
@@ -295,6 +295,71 @@ bool obbDisjoint(const Matrix3f& B, const Vec3f& T, const Vec3f& a, const Vec3f&
 
 }
 
+namespace internal
+{
+  inline FCL_REAL obbDisjoint_check_A_axis (
+      const Vec3f& T, const Vec3f& a, const Vec3f& b, const Matrix3f& Bf)
+  {
+    // |T| - |B| * b - a
+    Vec3f AABB_corner (T.cwiseAbs () - a);
+    AABB_corner.noalias() -= Bf.col(0) * b[0];
+    AABB_corner.noalias() -= Bf.col(1) * b[1];
+    AABB_corner.noalias() -= Bf.col(2) * b[2];
+    return AABB_corner.array().max(0).matrix().squaredNorm ();
+  }
+
+  inline FCL_REAL obbDisjoint_check_B_axis (
+      const Matrix3f& B, const Vec3f& T, const Vec3f& a, const Vec3f& b, const Matrix3f& Bf)
+  {
+    // Bf = |B|
+    // | B^T T| - Bf^T * a - b
+    register FCL_REAL s, t = 0;
+    s = std::abs(B.col(0).dot(T)) - Bf.col(0).dot(a) - b[0];
+    if (s > 0) t += s*s;
+    s = std::abs(B.col(1).dot(T)) - Bf.col(1).dot(a) - b[1];
+    if (s > 0) t += s*s;
+    s = std::abs(B.col(2).dot(T)) - Bf.col(2).dot(a) - b[2];
+    if (s > 0) t += s*s;
+    return t;
+  }
+
+  template <int ib, int jb = (ib+1)%3, int kb = (ib+2)%3 >
+  struct obbDisjoint_check_Ai_cross_Bi
+  {
+    static inline bool run (int ia, int ja, int ka,
+        const Matrix3f& B, const Vec3f& T, const Vec3f& a, const Vec3f& b,
+        const Matrix3f& Bf,
+        const FCL_REAL& breakDistance2, FCL_REAL& squaredLowerBoundDistance)
+    {
+      const FCL_REAL s = T[ka] * B(ja, ib) - T[ja] * B(ka, ib);
+
+      const FCL_REAL diff = fabs(s) - (a[ja] * Bf(ka, ib) + a[ka] * Bf(ja, ib) +
+                                       b[jb] * Bf(ia, kb) + b[kb] * Bf(ia, jb));
+      // We need to divide by the norm || Aia x Bib ||
+      // As ||Aia|| = ||Bib|| = 1, (Aia | Bib)^2  = cosine^2
+      if (diff > 0) {
+        FCL_REAL sinus2 = 1 - Bf (ia,ib) * Bf (ia,ib);
+        if (sinus2 > 1e-6) {
+          squaredLowerBoundDistance = diff * diff / sinus2;
+          if (squaredLowerBoundDistance > breakDistance2) {
+            return true;
+          }
+        }
+        /* // or
+           FCL_REAL sinus2 = 1 - Bf (ia,ib) * Bf (ia,ib);
+           squaredLowerBoundDistance = diff * diff;
+           if (squaredLowerBoundDistance > breakDistance2 * sinus2) {
+           squaredLowerBoundDistance /= sinus2;
+           return true;
+           }
+        // */
+      }
+      return false;
+    }
+  };
+}
+
+
 // B, T orientation and position of 2nd OBB in frame of 1st OBB,
 // a extent of 1st OBB,
 // b extent of 2nd OBB.
@@ -302,193 +367,39 @@ bool obbDisjoint(const Matrix3f& B, const Vec3f& T, const Vec3f& a, const Vec3f&
 // This function tests whether bounding boxes should be broken down.
 //
 bool obbDisjointAndLowerBoundDistance (const Matrix3f& B, const Vec3f& T,
-				       const Vec3f& a, const Vec3f& b,
+                                       const Vec3f& a, const Vec3f& b,
                                        const CollisionRequest& request,
-				       FCL_REAL& squaredLowerBoundDistance)
+                                       FCL_REAL& squaredLowerBoundDistance)
 {
-  FCL_REAL t, s;
-  FCL_REAL diff;
-  FCL_REAL breakDistance (request.break_distance + request.security_margin);
-  FCL_REAL breakDistance2 = breakDistance * breakDistance;
+  const FCL_REAL breakDistance (request.break_distance + request.security_margin);
+  const FCL_REAL breakDistance2 = breakDistance * breakDistance;
 
-  // Matrix3f Bf = abs(B);
-  // Bf += reps;
   Matrix3f Bf (B.cwiseAbs());
 
   // Corner of b axis aligned bounding box the closest to the origin
-  Vec3f AABB_corner (T.cwiseAbs () - Bf * b);
-  Vec3f diff3 (AABB_corner - a);
-  diff3 = diff3.cwiseMax (Vec3f::Zero());
-
-  //for (Vec3f::Index i=0; i<3; ++i) diff3 [i] = std::max (0, diff3 [i]);
-  squaredLowerBoundDistance = diff3.squaredNorm ();
+  squaredLowerBoundDistance = internal::obbDisjoint_check_A_axis (T, a, b, Bf);
   if (squaredLowerBoundDistance > breakDistance2)
     return true;
 
-  AABB_corner = (B.transpose () * T).cwiseAbs ()  - Bf.transpose () * a;
-  // diff3 = | B^T T| - b - Bf^T a
-  diff3 = AABB_corner - b;
-  diff3 = diff3.cwiseMax (Vec3f::Zero());
-  squaredLowerBoundDistance = diff3.squaredNorm ();
-
+  squaredLowerBoundDistance = internal::obbDisjoint_check_B_axis (B, T, a, b, Bf);
   if (squaredLowerBoundDistance > breakDistance2)
     return true;
 
-  // A0 x B0
-  s = T[2] * B(1, 0) - T[1] * B(2, 0);
-  t = ((s < 0.0) ? -s : s); assert (t == fabs (s));
-
-  FCL_REAL sinus2;
-  diff = t - (a[1] * Bf(2, 0) + a[2] * Bf(1, 0) +
-	      b[1] * Bf(0, 2) + b[2] * Bf(0, 1));
-  // We need to divide by the norm || A0 x B0 ||
-  // As ||A0|| = ||B0|| = 1,
-  //              2            2
-  // || A0 x B0 ||  + (A0 | B0)  = 1
-  if (diff > 0) {
-    sinus2 = 1 - Bf (0,0) * Bf (0,0);
-    if (sinus2 > 1e-6) {
-      squaredLowerBoundDistance = diff * diff / sinus2;
-      if (squaredLowerBoundDistance > breakDistance2) {
-	return true;
-      }
-    }
-  }
-
-  // A0 x B1
-  s = T[2] * B(1, 1) - T[1] * B(2, 1);
-  t = ((s < 0.0) ? -s : s); assert (t == fabs (s));
-
-  diff = t - (a[1] * Bf(2, 1) + a[2] * Bf(1, 1) +
-	      b[0] * Bf(0, 2) + b[2] * Bf(0, 0));
-  if (diff > 0) {
-    sinus2 = 1 - Bf (0,1) * Bf (0,1);
-    if (sinus2 > 1e-6) {
-      squaredLowerBoundDistance = diff * diff / sinus2;
-      if (squaredLowerBoundDistance > breakDistance2) {
-	return true;
-      }
-    }
-  }
-
-  // A0 x B2
-  s = T[2] * B(1, 2) - T[1] * B(2, 2);
-  t = ((s < 0.0) ? -s : s); assert (t == fabs (s));
-
-  diff = t - (a[1] * Bf(2, 2) + a[2] * Bf(1, 2) +
-	      b[0] * Bf(0, 1) + b[1] * Bf(0, 0));
-  if (diff > 0) {
-    sinus2 = 1 - Bf (0,2) * Bf (0,2);
-    if (sinus2 > 1e-6) {
-      squaredLowerBoundDistance = diff * diff / sinus2;
-      if (squaredLowerBoundDistance > breakDistance2) {
-	return true;
-      }
-    }
-  }
-
-  // A1 x B0
-  s = T[0] * B(2, 0) - T[2] * B(0, 0);
-  t = ((s < 0.0) ? -s : s); assert (t == fabs (s));
-
-  diff = t - (a[0] * Bf(2, 0) + a[2] * Bf(0, 0) +
-	      b[1] * Bf(1, 2) + b[2] * Bf(1, 1));
-  if (diff > 0) {
-    sinus2 = 1 - Bf (1,0) * Bf (1,0);
-    if (sinus2 > 1e-6) {
-      squaredLowerBoundDistance = diff * diff / sinus2;
-      if (squaredLowerBoundDistance > breakDistance2) {
-	return true;
-      }
-    }
-  }
-
-  // A1 x B1
-  s = T[0] * B(2, 1) - T[2] * B(0, 1);
-  t = ((s < 0.0) ? -s : s); assert (t == fabs (s));
-
-  diff = t - (a[0] * Bf(2, 1) + a[2] * Bf(0, 1) +
-	      b[0] * Bf(1, 2) + b[2] * Bf(1, 0));
-  if (diff > 0) {
-    sinus2 = 1 - Bf (1,1) * Bf (1,1);
-    if (sinus2 > 1e-6) {
-      squaredLowerBoundDistance = diff * diff / sinus2;
-      if (squaredLowerBoundDistance > breakDistance2) {
-	return true;
-      }
-    }
-  }
-
-  // A1 x B2
-  s = T[0] * B(2, 2) - T[2] * B(0, 2);
-  t = ((s < 0.0) ? -s : s); assert (t == fabs (s));
-
-  diff = t - (a[0] * Bf(2, 2) + a[2] * Bf(0, 2) +
-	      b[0] * Bf(1, 1) + b[1] * Bf(1, 0));
-  if (diff > 0) {
-    sinus2 = 1 - Bf (1,2) * Bf (1,2);
-    if (sinus2 > 1e-6) {
-      squaredLowerBoundDistance = diff * diff / sinus2;
-      if (squaredLowerBoundDistance > breakDistance2) {
-	return true;
-      }
-    }
-  }
-
-  // A2 x B0
-  s = T[1] * B(0, 0) - T[0] * B(1, 0);
-  t = ((s < 0.0) ? -s : s); assert (t == fabs (s));
-
-  diff = t - (a[0] * Bf(1, 0) + a[1] * Bf(0, 0) +
-	      b[1] * Bf(2, 2) + b[2] * Bf(2, 1));
-  if (diff > 0) {
-    sinus2 = 1 - Bf (2,0) * Bf (2,0);
-    if (sinus2 > 1e-6) {
-      squaredLowerBoundDistance = diff * diff / sinus2;
-      if (squaredLowerBoundDistance > breakDistance2) {
-	return true;
-      }
-    }
-  }
-
-  // A2 x B1
-  s = T[1] * B(0, 1) - T[0] * B(1, 1);
-  t = ((s < 0.0) ? -s : s); assert (t == fabs (s));
-
-  diff = t - (a[0] * Bf(1, 1) + a[1] * Bf(0, 1) +
-	      b[0] * Bf(2, 2) + b[2] * Bf(2, 0));
-  if (diff > 0) {
-    sinus2 = 1 - Bf (2,1) * Bf (2,1);
-    if (sinus2 > 1e-6) {
-      squaredLowerBoundDistance = diff * diff / sinus2;
-      if (squaredLowerBoundDistance > breakDistance2) {
-	return true;
-      }
-    }
-  }
-
-  // A2 x B2
-  s = T[1] * B(0, 2) - T[0] * B(1, 2);
-  t = ((s < 0.0) ? -s : s); assert (t == fabs (s));
-
-  diff = t - (a[0] * Bf(1, 2) + a[1] * Bf(0, 2) +
-	      b[0] * Bf(2, 1) + b[1] * Bf(2, 0));
-  if (diff > 0) {
-    sinus2 = 1 - Bf (2,2) * Bf (2,2);
-    if (sinus2 > 1e-6) {
-      squaredLowerBoundDistance = diff * diff / sinus2;
-      if (squaredLowerBoundDistance > breakDistance2) {
-	return true;
-      }
-    }
+  // Ai x Bj
+  int ja = 1, ka = 2;
+  for (int ia = 0; ia < 3; ++ia) {
+    if (internal::obbDisjoint_check_Ai_cross_Bi<0>::run (ia, ja, ka,
+          B, T, a, b, Bf, breakDistance2, squaredLowerBoundDistance)) return true;
+    if (internal::obbDisjoint_check_Ai_cross_Bi<1>::run (ia, ja, ka,
+          B, T, a, b, Bf, breakDistance2, squaredLowerBoundDistance)) return true;
+    if (internal::obbDisjoint_check_Ai_cross_Bi<2>::run (ia, ja, ka,
+          B, T, a, b, Bf, breakDistance2, squaredLowerBoundDistance)) return true;
+    ja = ka; ka = ia;
   }
 
   return false;
-
 }
 
-
-
 bool OBB::overlap(const OBB& other) const
 {
   /// compute what transform [R,T] that takes us from cs1 to cs2.
diff --git a/src/BV/OBBRSS.cpp b/src/BV/OBBRSS.cpp
index 5c3c7f830a1363b6cc7286f68815ac620fee8d94..648c1c054145d409635e51cceb9817a0b53de7b2 100644
--- a/src/BV/OBBRSS.cpp
+++ b/src/BV/OBBRSS.cpp
@@ -42,23 +42,6 @@ namespace hpp
 namespace fcl
 {
 
-bool overlap(const Matrix3f& R0, const Vec3f& T0, const OBBRSS& b1, const OBBRSS& b2)
-{
-  return overlap(R0, T0, b1.obb, b2.obb);
-}
-
-bool overlap(const Matrix3f& R0, const Vec3f& T0, const OBBRSS& b1,
-             const OBBRSS& b2, const CollisionRequest& request,
-	     FCL_REAL& sqrDistLowerBound)
-{
-  return overlap(R0, T0, b1.obb, b2.obb, request, sqrDistLowerBound);
-}
-
-FCL_REAL distance(const Matrix3f& R0, const Vec3f& T0, const OBBRSS& b1, const OBBRSS& b2, Vec3f* P, Vec3f* Q)
-{
-  return distance(R0, T0, b1.rss, b2.rss, P, Q);
-}
-
 OBBRSS translate(const OBBRSS& bv, const Vec3f& t)
 {
   OBBRSS res(bv);
diff --git a/src/BVH/BVH_model.cpp b/src/BVH/BVH_model.cpp
index 58ab55200a01cdbc29b3fc03ff554a8264ca1316..7fabab7b61ed40161b12597878c6ff0b8cd7053f 100644
--- a/src/BVH/BVH_model.cpp
+++ b/src/BVH/BVH_model.cpp
@@ -47,7 +47,6 @@ namespace fcl
 
 template<typename BV>
 BVHModel<BV>::BVHModel(const BVHModel<BV>& other) : CollisionGeometry(other),
-                                                    boost::noncopyable(),
                                                     num_tris(other.num_tris),
                                                     num_vertices(other.num_vertices),
                                                     build_state(other.build_state),
diff --git a/src/BVH/BV_fitter.cpp b/src/BVH/BV_fitter.cpp
index 3b6015b6286dfa58a2f7c8a0920295a5374fe932..3ef215079e236f369b65522293f76fbc1eaf4c91 100644
--- a/src/BVH/BV_fitter.cpp
+++ b/src/BVH/BV_fitter.cpp
@@ -434,7 +434,6 @@ void fit(Vec3f* ps, int n, OBB& bv)
   }
 }
 
-
 template<>
 void fit(Vec3f* ps, int n, RSS& bv)
 {
@@ -492,6 +491,16 @@ void fit(Vec3f* ps, int n, OBBRSS& bv)
   }
 }
 
+template<>
+void fit(Vec3f* ps, int n, AABB& bv)
+{
+  if (n <= 0) return;
+  bv = AABB (ps[0]);
+  for(int i = 1; i < n; ++i)
+  {
+    bv += ps[i];
+  }
+}
 
 OBB BVFitter<OBB>::fit(unsigned int* primitive_indices, int num_primitives)
 {
@@ -567,7 +576,6 @@ RSS BVFitter<RSS>::fit(unsigned int* primitive_indices, int num_primitives)
   return bv;
 }
 
-
 kIOS BVFitter<kIOS>::fit(unsigned int* primitive_indices, int num_primitives)
 {
   kIOS bv;
@@ -638,6 +646,47 @@ kIOS BVFitter<kIOS>::fit(unsigned int* primitive_indices, int num_primitives)
   return bv;
 }
 
+AABB BVFitter<AABB>::fit(unsigned int* primitive_indices, int num_primitives)
+{
+  AABB bv;
+  if (num_primitives == 0) return bv;
+
+  if(type == BVH_MODEL_TRIANGLES)             /// The primitive is triangle
+  {
+    Triangle t0 = tri_indices[primitive_indices[0]];
+    bv = AABB (vertices[t0[0]]);
+
+    for(int i = 0; i < num_primitives; ++i)
+    {
+      Triangle t = tri_indices[primitive_indices[i]];
+      bv += vertices[t[0]];
+      bv += vertices[t[1]];
+      bv += vertices[t[2]];
+
+      if(prev_vertices)                      /// can fitting both current and previous frame
+      {
+        bv += prev_vertices[t[0]];
+        bv += prev_vertices[t[1]];
+        bv += prev_vertices[t[2]];
+      }
+    }
+    return bv;
+  }
+  else if(type == BVH_MODEL_POINTCLOUD)       /// The primitive is point
+  {
+    bv = AABB (vertices[primitive_indices[0]]);
+    for(int i = 0; i < num_primitives; ++i)
+    {
+      bv += vertices[primitive_indices[i]];
+
+      if(prev_vertices)                       /// can fitting both current and previous frame
+      {
+        bv += prev_vertices[primitive_indices[i]];
+      }
+    }
+  }
+  return bv;
+}
 
 }
 
diff --git a/src/collision_func_matrix.cpp b/src/collision_func_matrix.cpp
index 6e4e3dbcadc4b2ed82107643bc53a4fc200eccb9..69b01a281455a764e4fedbdb3414c52405b22313 100644
--- a/src/collision_func_matrix.cpp
+++ b/src/collision_func_matrix.cpp
@@ -39,7 +39,7 @@
 #include <hpp/fcl/collision_func_matrix.h>
 
 #include <hpp/fcl/traversal/traversal_node_setup.h>
-#include <hpp/fcl/collision_node.h>
+#include <../src/collision_node.h>
 #include <hpp/fcl/narrowphase/narrowphase.h>
 #include "distance_func_matrix.h"
 
diff --git a/src/collision_node.cpp b/src/collision_node.cpp
index ad936342337342ba1076485ddca314d4d2709188..c69457030e0ea3d387bcb42cfea06a2ba9822c72 100644
--- a/src/collision_node.cpp
+++ b/src/collision_node.cpp
@@ -36,7 +36,7 @@
 /** \author Jia Pan */
 
 
-#include <hpp/fcl/collision_node.h>
+#include <../src/collision_node.h>
 #include <hpp/fcl/traversal/traversal_recurse.h>
 
 namespace hpp
@@ -46,7 +46,8 @@ namespace fcl
 
 void collide(CollisionTraversalNodeBase* node,
              const CollisionRequest& request, CollisionResult& result,
-	     BVHFrontList* front_list)
+	     BVHFrontList* front_list,
+             bool recursive)
 {
   if(front_list && front_list->size() > 0)
   {
@@ -55,7 +56,10 @@ void collide(CollisionTraversalNodeBase* node,
   else
   {
     FCL_REAL sqrDistLowerBound=0;
-    collisionRecurse(node, 0, 0, front_list, sqrDistLowerBound);
+    if (recursive)
+      collisionRecurse(node, 0, 0, front_list, sqrDistLowerBound);
+    else
+      collisionNonRecurse(node, front_list, sqrDistLowerBound);
     result.distance_lower_bound = sqrt (sqrDistLowerBound);
   }
 }
diff --git a/include/hpp/fcl/collision_node.h b/src/collision_node.h
similarity index 97%
rename from include/hpp/fcl/collision_node.h
rename to src/collision_node.h
index 23fd309de6da0d0281da458463727c00e40d367b..2f17fec589163edb1a7e65451d322a511113909e 100644
--- a/include/hpp/fcl/collision_node.h
+++ b/src/collision_node.h
@@ -62,7 +62,8 @@ namespace fcl
   void collide(CollisionTraversalNodeBase* node,
                const CollisionRequest& request,
                CollisionResult& result,
-	       BVHFrontList* front_list = NULL);
+	       BVHFrontList* front_list = NULL,
+               bool recursive = true);
 
 /// @brief distance computation on distance traversal node; can use front list to accelerate
 void distance(DistanceTraversalNodeBase* node, BVHFrontList* front_list = NULL, int qsize = 2);
diff --git a/src/distance_func_matrix.cpp b/src/distance_func_matrix.cpp
index c2573f3a741919f83bc8cddc4612590f42c455e7..a2637a7a82f42f2b2eb0e5164b94dc6e79e5990f 100644
--- a/src/distance_func_matrix.cpp
+++ b/src/distance_func_matrix.cpp
@@ -37,7 +37,7 @@
 
 #include <hpp/fcl/distance_func_matrix.h>
 
-#include <hpp/fcl/collision_node.h>
+#include <../src/collision_node.h>
 #include <hpp/fcl/traversal/traversal_node_setup.h>
 #include <hpp/fcl/narrowphase/narrowphase.h>
 
diff --git a/src/mesh_loader/assimp.cpp b/src/mesh_loader/assimp.cpp
index 7e58853a73f8bdf86b1202e1e08b62cb933f3ca8..26d157d8c876bb309b131430209f47442a73f6c2 100644
--- a/src/mesh_loader/assimp.cpp
+++ b/src/mesh_loader/assimp.cpp
@@ -34,16 +34,94 @@
 
 #include <hpp/fcl/mesh_loader/assimp.h>
 
+#ifdef HPP_FCL_USE_ASSIMP_UNIFIED_HEADER_NAMES
+  #include <assimp/DefaultLogger.hpp>
+  #include <assimp/IOStream.hpp>
+  #include <assimp/IOSystem.hpp>
+  #include <assimp/Importer.hpp>
+  #include <assimp/postprocess.h>
+  #include <assimp/scene.h>
+#else
+  #include <assimp/DefaultLogger.h>
+  #include <assimp/assimp.hpp>
+  #include <assimp/IOStream.h>
+  #include <assimp/IOSystem.h>
+  #include <assimp/aiPostProcess.h>
+  #include <assimp/aiScene.h>
+#endif
+
 namespace hpp
 {
 namespace fcl
 {
   
-unsigned buildMesh (const fcl::Vec3f & scale,
-                    const aiScene* scene,
-                    const aiNode* node,
-                    unsigned vertices_offset,
-                    TriangleAndVertices & tv)
+namespace internal
+{
+
+Loader::Loader () : scene (NULL) {}
+
+Loader::~Loader ()
+{
+ if (scene) delete scene;
+}
+
+void Loader::load (const std::string & resource_path)
+{
+  Assimp::Importer importer;
+  // set list of ignored parameters (parameters used for rendering)
+  importer.SetPropertyInteger(AI_CONFIG_PP_RVC_FLAGS,
+      aiComponent_TANGENTS_AND_BITANGENTS|
+      aiComponent_COLORS |
+      aiComponent_BONEWEIGHTS |
+      aiComponent_ANIMATIONS |
+      aiComponent_LIGHTS |
+      aiComponent_CAMERAS|
+      aiComponent_TEXTURES |
+      aiComponent_TEXCOORDS |
+      aiComponent_MATERIALS |
+      aiComponent_NORMALS
+      );
+
+  importer.ReadFile(resource_path.c_str(),
+      aiProcess_SortByPType |
+      aiProcess_Triangulate |
+      aiProcess_RemoveComponent |
+      aiProcess_ImproveCacheLocality |
+      // TODO: I (Joseph Mirabel) have no idea whether degenerated triangles are
+      // properly handled. Enabling aiProcess_FindDegenerates would throw an
+      // exception when that happens. Is it too conservative ?
+      // aiProcess_FindDegenerates |
+      aiProcess_JoinIdenticalVertices
+      );
+
+  scene = importer.GetOrphanedScene();
+  if (!scene)
+  {
+    const std::string exception_message (std::string ("Could not load resource ") + resource_path + std::string("\n") +
+                                         importer.GetErrorString () + std::string("\n") +
+                                         "Hint: the mesh directory may be wrong.");
+    throw std::invalid_argument(exception_message);
+  }
+
+  if (!scene->HasMeshes())
+    throw std::invalid_argument (std::string ("No meshes found in file ")+resource_path);
+}
+
+/**
+ * @brief      Recursive procedure for building a mesh
+ *
+ * @param[in]  scale           Scale to apply when reading the ressource
+ * @param[in]  scene           Pointer to the assimp scene
+ * @param[in]  node            Current node of the scene
+ * @param[in]  vertices_offset Current number of vertices in the model
+ * @param      tv              Triangles and Vertices of the mesh submodels
+ */
+unsigned recurseBuildMesh (
+    const fcl::Vec3f & scale,
+    const aiScene* scene,
+    const aiNode* node,
+    unsigned vertices_offset,
+    TriangleAndVertices & tv)
 {
   if (!node) return 0;
   
@@ -100,12 +178,20 @@ unsigned buildMesh (const fcl::Vec3f & scale,
   
   for (uint32_t i=0; i < node->mNumChildren; ++i)
   {
-    nbVertices += buildMesh(scale, scene, node->mChildren[i], nbVertices, tv);
+    nbVertices += recurseBuildMesh(scale, scene, node->mChildren[i], nbVertices, tv);
   }
 
   return nbVertices;
 }
 
+void buildMesh (const fcl::Vec3f & scale,
+                    const aiScene* scene,
+                    unsigned vertices_offset,
+                    TriangleAndVertices & tv)
+{
+  recurseBuildMesh (scale, scene, scene->mRootNode, vertices_offset, tv);
 }
 
+} // namespace internal
+} // namespace fcl
 } // namespace hpp
diff --git a/src/narrowphase/details.h b/src/narrowphase/details.h
index ee654943ab14cf258533fecef8366f6ba74f66f2..b8cfeb0f863fda002107c3d11304efb72589646b 100644
--- a/src/narrowphase/details.h
+++ b/src/narrowphase/details.h
@@ -38,7 +38,6 @@
 #ifndef HPP_FCL_SRC_NARROWPHASE_DETAILS_H
 # define HPP_FCL_SRC_NARROWPHASE_DETAILS_H
 
-#include <hpp/fcl/collision_node.h>
 #include <hpp/fcl/traversal/traversal_node_setup.h>
 #include <hpp/fcl/narrowphase/narrowphase.h>
 
@@ -126,7 +125,7 @@ namespace fcl {
       FCL_REAL norm (normal.norm());
       dist = norm - s1.radius - s2.radius;
 
-      FCL_REAL eps (std::numeric_limits<FCL_REAL>::epsilon());
+      static const FCL_REAL eps (std::numeric_limits<FCL_REAL>::epsilon());
       if (norm > eps) {
         normal.normalize();
       } else {
@@ -147,7 +146,7 @@ namespace fcl {
        const Cylinder& s2, const Transform3f& tf2,
        FCL_REAL& dist, Vec3f& p1, Vec3f& p2, Vec3f& normal)
     {
-      FCL_REAL eps (sqrt (std::numeric_limits <FCL_REAL>::epsilon ()));
+      static const FCL_REAL eps (sqrt (std::numeric_limits <FCL_REAL>::epsilon ()));
       FCL_REAL r1 (s1.radius);
       FCL_REAL r2 (s2.radius);
       FCL_REAL lz2 (.5*s2.lz);
@@ -860,8 +859,8 @@ namespace fcl {
 
 
 
-    inline int boxBox2(const Vec3f& side1, const Matrix3f& R1, const Vec3f& T1,
-                       const Vec3f& side2, const Matrix3f& R2, const Vec3f& T2,
+    inline int boxBox2(const Vec3f& halfSide1, const Matrix3f& R1, const Vec3f& T1,
+                       const Vec3f& halfSide2, const Matrix3f& R2, const Vec3f& T2,
                        Vec3f& normal, FCL_REAL* depth, int* return_code,
                        int maxc, std::vector<ContactPoint>& contacts)
     {
@@ -874,8 +873,8 @@ namespace fcl {
       Vec3f pp (R1.transpose() * p); // get pp = p relative to body 1
 
       // get side lengths / 2
-      Vec3f A (side1 * 0.5);
-      Vec3f B (side2 * 0.5);
+      const Vec3f& A (halfSide1);
+      const Vec3f& B (halfSide2);
 
       // Rij is R1'*R2, i.e. the relative rotation between R1 and R2
       Matrix3f R (R1.transpose() * R2);
@@ -979,7 +978,7 @@ namespace fcl {
       Q.array() += fudge2;
 
       Vec3f n;
-      FCL_REAL eps = std::numeric_limits<FCL_REAL>::epsilon();
+      static const FCL_REAL eps = std::numeric_limits<FCL_REAL>::epsilon();
 
       // separating axis = u1 x (v1,v2,v3)
       tmp = pp[2] * R(1, 0) - pp[1] * R(2, 0);
@@ -1443,8 +1442,8 @@ namespace fcl {
       int return_code;
       Vec3f normal;
       FCL_REAL depth;
-      /* int cnum = */ boxBox2(s1.side, tf1.getRotation(), tf1.getTranslation(),
-                               s2.side, tf2.getRotation(), tf2.getTranslation(),
+      /* int cnum = */ boxBox2(s1.halfSide, tf1.getRotation(), tf1.getTranslation(),
+                               s2.halfSide, tf2.getRotation(), tf2.getTranslation(),
                                normal, &depth, &return_code,
                                4, contacts);
 
@@ -1518,11 +1517,9 @@ namespace fcl {
       const Matrix3f& R = tf1.getRotation();
       const Vec3f& T = tf1.getTranslation();
 
-      Vec3f Q = R.transpose() * new_s2.n;
-      Vec3f A(Q[0] * s1.side[0], Q[1] * s1.side[1], Q[2] * s1.side[2]);
-      Vec3f B = A.cwiseAbs();
+      Vec3f Q (R.transpose() * new_s2.n);
 
-      FCL_REAL depth = 0.5 * (B[0] + B[1] + B[2]) - new_s2.signedDistance(T);
+      FCL_REAL depth = Q.cwiseProduct(s1.halfSide).lpNorm<1>() - new_s2.signedDistance(T);
       return (depth >= 0);
     }
 
@@ -1538,27 +1535,17 @@ namespace fcl {
       const Vec3f& T = tf1.getTranslation();
 
       // Q: plane normal expressed in box frame
-      Vec3f Q = R.transpose() * new_s2.n;
+      const Vec3f Q (R.transpose() * new_s2.n);
       // A: scalar products of each side with normal
-      Vec3f A(Q[0] * s1.side[0], Q[1] * s1.side[1], Q[2] * s1.side[2]);
-      Vec3f B = A.cwiseAbs();
+      const Vec3f A (Q.cwiseProduct(s1.halfSide));
 
-      distance = new_s2.signedDistance(T) - 0.5 * (B[0] + B[1] + B[2]);
+      distance = new_s2.signedDistance(T) - A.lpNorm<1>();
       if(distance > 0) {
-        p1 = T;
-        for (Vec3f::Index i=0; i<3; ++i) {
-          p1 -= A [i] > 0 ? (-.5 * s1.side [i] * R.col (i)) :
-            (.5 * s1.side [i] * R.col (i));
-        }
-        p2 = p1 - distance * new_s2.n;
+        p1.noalias() = T + R * (A.array() > 0).select (s1.halfSide, - s1.halfSide);
+        p2.noalias() = p1 - distance * new_s2.n;
         return false;
       }
 
-      Vec3f axis[3];
-      axis[0] = R.col(0);
-      axis[1] = R.col(1);
-      axis[2] = R.col(2);
-
       /// find deepest point
       Vec3f p(T);
       int sign = 0;
@@ -1566,25 +1553,21 @@ namespace fcl {
       if(std::abs(Q[0] - 1) < halfspaceIntersectTolerance<FCL_REAL>() || std::abs(Q[0] + 1) < halfspaceIntersectTolerance<FCL_REAL>())
         {
           sign = (A[0] > 0) ? -1 : 1;
-          p += axis[0] * (0.5 * s1.side[0] * sign);
+          p += R.col(0) * (s1.halfSide[0] * sign);
         }
       else if(std::abs(Q[1] - 1) < halfspaceIntersectTolerance<FCL_REAL>() || std::abs(Q[1] + 1) < halfspaceIntersectTolerance<FCL_REAL>())
         {
           sign = (A[1] > 0) ? -1 : 1;
-          p += axis[1] * (0.5 * s1.side[1] * sign);
+          p += R.col(1) * (s1.halfSide[1] * sign);
         }
       else if(std::abs(Q[2] - 1) < halfspaceIntersectTolerance<FCL_REAL>() || std::abs(Q[2] + 1) < halfspaceIntersectTolerance<FCL_REAL>())
         {
           sign = (A[2] > 0) ? -1 : 1;
-          p += axis[2] * (0.5 * s1.side[2] * sign);
+          p += R.col(2) * (s1.halfSide[2] * sign);
         }
       else
         {
-          for(std::size_t i = 0; i < 3; ++i)
-            {
-              sign = (A[i] > 0) ? -1 : 1;
-              p += axis[i] * (0.5 * s1.side[i] * sign);
-            }
+          p.noalias() += R * (A.array() > 0).select (-s1.halfSide, s1.halfSide);
         }
 
       /// compute the contact point from the deepest point
@@ -2040,33 +2023,32 @@ namespace fcl {
     //                            Vec3f* contact_points,
     //                            FCL_REAL* penetration_depth, Vec3f* normal)
     {
-      FCL_REAL eps (sqrt (std::numeric_limits<FCL_REAL>::epsilon()));
+      static const FCL_REAL eps (sqrt (std::numeric_limits<FCL_REAL>::epsilon()));
       Plane new_s2 = transform(s2, tf2);
 
       const Matrix3f& R = tf1.getRotation();
       const Vec3f& T = tf1.getTranslation();
 
-      Vec3f Q = R.transpose() * new_s2.n;
-      Vec3f A(Q[0] * s1.side[0], Q[1] * s1.side[1], Q[2] * s1.side[2]);
-      Vec3f B = A.cwiseAbs();
+      const Vec3f Q (R.transpose() * new_s2.n);
+      const Vec3f A (Q.cwiseProduct(s1.halfSide));
 
       FCL_REAL signed_dist = new_s2.signedDistance(T);
-      distance = std::abs(signed_dist) - 0.5 * (B[0] + B[1] + B[2]);
+      distance = std::abs(signed_dist) - A.lpNorm<1>();
       if(distance > 0) {
         // Is the box above or below the plane
-        int sign = signed_dist > 0 ? 1 : -1;
+        const bool positive = signed_dist > 0;
         // Set p1 at the center of the box
         p1 = T;
         for (Vec3f::Index i=0; i<3; ++i) {
           // scalar product between box axis and plane normal
-          FCL_REAL alpha (R.col (i).dot (new_s2.n));
-          if (sign * alpha > eps) {
-            p1 -= .5 * R.col (i) * s1.side [i];
-          } else if (sign * alpha < -eps) {
-            p1 += .5 * R.col (i) * s1.side [i];
+          FCL_REAL alpha ((positive ? 1 : -1 ) * R.col (i).dot (new_s2.n));
+          if (alpha > eps) {
+            p1 -= R.col (i) * s1.halfSide [i];
+          } else if (alpha < -eps) {
+            p1 += R.col (i) * s1.halfSide [i];
           }
         }
-        p2 = p1 - sign * distance * new_s2.n;
+        p2 = p1 - ( positive ? distance : -distance) * new_s2.n;
         assert (new_s2.distance (p2) < 3 *eps);
         return false;
       }
@@ -2087,32 +2069,25 @@ namespace fcl {
       if(std::abs(Q[0] - 1) < planeIntersectTolerance<FCL_REAL>() ||
          std::abs(Q[0] + 1) < planeIntersectTolerance<FCL_REAL>())
         {
-          int sign2 = (A[0] > 0) ? -1 : 1;
-          sign2 *= sign;
-          p += axis[0] * (0.5 * s1.side[0] * sign2);
+          int sign2 = (A[0] > 0) ? -sign : sign;
+          p += R.col(0) * (s1.halfSide[0] * sign2);
         }
       else if(std::abs(Q[1] - 1) < planeIntersectTolerance<FCL_REAL>() ||
               std::abs(Q[1] + 1) < planeIntersectTolerance<FCL_REAL>())
         {
-          int sign2 = (A[1] > 0) ? -1 : 1;
-          sign2 *= sign;
-          p += axis[1] * (0.5 * s1.side[1] * sign2);
+          int sign2 = (A[1] > 0) ? -sign : sign;
+          p += R.col(1) * (s1.halfSide[1] * sign2);
         }
       else if(std::abs(Q[2] - 1) < planeIntersectTolerance<FCL_REAL>() ||
               std::abs(Q[2] + 1) < planeIntersectTolerance<FCL_REAL>())
         {
-          int sign2 = (A[2] > 0) ? -1 : 1;
-          sign2 *= sign;
-          p += axis[2] * (0.5 * s1.side[2] * sign2);
+          int sign2 = (A[2] > 0) ? -sign : sign;
+          p += R.col(2) * (s1.halfSide[2] * sign2);
         }
       else
         {
-          for(std::size_t i = 0; i < 3; ++i)
-            {
-              int sign2 = (A[i] > 0) ? -1 : 1;
-              sign2 *= sign;
-              p += axis[i] * (0.5 * s1.side[i] * sign2);
-            }
+          Vec3f tmp(sign * R * s1.halfSide);
+          p += (A.array() > 0).select (- tmp, tmp);
         }
 
       // compute the contact point by project the deepest point onto the plane
@@ -2143,7 +2118,7 @@ namespace fcl {
       bool inside = true;
       for (int i = 0; i < 3; ++i) {
         bool iinside = false;
-        FCL_REAL dist = Rb.col(i).dot(d), side_2 = b.side(i) / 2;
+        FCL_REAL dist = Rb.col(i).dot(d), side_2 = b.halfSide(i);
         if      (dist < -side_2) dist = -side_2; // outside
         else if (dist >  side_2) dist =  side_2; // outside
         else iinside = true;                     // inside
@@ -2651,6 +2626,21 @@ namespace fcl {
 
       return true;
     }
+
+    /// See the prototype below
+    inline FCL_REAL computePenetration
+      (const Vec3f& P1, const Vec3f& P2, const Vec3f& P3,
+       const Vec3f& Q1, const Vec3f& Q2, const Vec3f& Q3,
+       Vec3f& normal)
+    {
+      Vec3f u ((P2-P1).cross (P3-P1));
+      normal = u.normalized ();
+      FCL_REAL depth1 ((P1-Q1).dot (normal));
+      FCL_REAL depth2 ((P1-Q2).dot (normal));
+      FCL_REAL depth3 ((P1-Q3).dot (normal));
+      return std::max (depth1, std::max (depth2, depth3));
+    }
+
     // Compute penetration distance and normal of two triangles in collision
     // Normal is normal of triangle 1 (P1, P2, P3), penetration depth is the
     // minimal distance (Q1, Q2, Q3) should be translated along the normal so
@@ -2669,14 +2659,8 @@ namespace fcl {
       Vec3f globalQ1 (tf2.transform (Q1));
       Vec3f globalQ2 (tf2.transform (Q2));
       Vec3f globalQ3 (tf2.transform (Q3));
-      Vec3f u ((globalP2-globalP1).cross (globalP3-globalP1));
-      normal = u.normalized ();
-      FCL_REAL depth;
-      FCL_REAL depth1 ((globalP1-globalQ1).dot (normal));
-      FCL_REAL depth2 ((globalP1-globalQ2).dot (normal));
-      FCL_REAL depth3 ((globalP1-globalQ3).dot (normal));
-      depth = std::max (depth1, std::max (depth2, depth3));
-      return depth;
+      return computePenetration (globalP1, globalP2, globalP3,
+          globalQ1, globalQ2, globalQ3, normal);
     }
   } // details
 } // namespace fcl
diff --git a/src/narrowphase/gjk.cpp b/src/narrowphase/gjk.cpp
index f6e54cb258bb20eec0c7e8e9c22d3dd4e6ed7912..cb002f446719af855bd3938c6b8d5a1e4e64511f 100644
--- a/src/narrowphase/gjk.cpp
+++ b/src/narrowphase/gjk.cpp
@@ -47,159 +47,361 @@ namespace fcl
 namespace details
 {
 
-Vec3f getSupport(const ShapeBase* shape, const Vec3f& dir)
+struct shape_traits_base
 {
-  FCL_REAL eps (sqrt(std::numeric_limits<FCL_REAL>::epsilon()));
-  switch(shape->getNodeType())
+  enum { NeedNormalizedDir = true
+  };
+};
+
+template <typename Shape> struct shape_traits : shape_traits_base {};
+
+template <> struct shape_traits<TriangleP> : shape_traits_base
+{
+  enum { NeedNormalizedDir = false
+  };
+};
+
+template <> struct shape_traits<Box> : shape_traits_base
+{
+  enum { NeedNormalizedDir = false
+  };
+};
+
+void getShapeSupport(const TriangleP* triangle, const Vec3f& dir, Vec3f& support)
+{
+  FCL_REAL dota = dir.dot(triangle->a);
+  FCL_REAL dotb = dir.dot(triangle->b);
+  FCL_REAL dotc = dir.dot(triangle->c);
+  if(dota > dotb)
   {
-  case GEOM_TRIANGLE:
+    if(dotc > dota)
+      support = triangle->c;
+    else
+      support = triangle->a;
+  }
+  else
+  {
+    if(dotc > dotb)
+      support = triangle->c;
+    else
+      support = triangle->b;
+  }
+}
+
+inline void getShapeSupport(const Box* box, const Vec3f& dir, Vec3f& support)
+{
+  support.noalias() = (dir.array() > 0).select(box->halfSide, -box->halfSide);
+}
+
+inline void getShapeSupport(const Sphere* sphere, const Vec3f& dir, Vec3f& support)
+{
+  support = dir * sphere->radius;
+}
+
+inline void getShapeSupport(const Capsule* capsule, const Vec3f& dir, Vec3f& support)
+{
+  support = capsule->radius * dir;
+  if (dir[2] > 0) support[2] += capsule->lz / 2;
+  else            support[2] -= capsule->lz / 2;
+}
+
+void getShapeSupport(const Cone* cone, const Vec3f& dir, Vec3f& support)
+{
+  // TODO (Joseph Mirabel)
+  // this assumes that the cone radius is, for -0.5 < z < 0.5:
+  // (lz/2 - z) * radius / lz
+  //
+  // I did not change the code myself. However, I think it should be revised.
+  // 1. It can be optimized.
+  // 2. I am not sure this is what conePlaneIntersect and coneHalfspaceIntersect
+  //    assumes...
+  FCL_REAL zdist = dir[0] * dir[0] + dir[1] * dir[1];
+  FCL_REAL len = zdist + dir[2] * dir[2];
+  zdist = std::sqrt(zdist);
+  len = std::sqrt(len);
+  FCL_REAL half_h = cone->lz * 0.5;
+  FCL_REAL radius = cone->radius;
+
+  FCL_REAL sin_a = radius / std::sqrt(radius * radius + 4 * half_h * half_h);
+
+  if(dir[2] > len * sin_a)
+    support = Vec3f(0, 0, half_h);
+  else if(zdist > 0)
+  {
+    FCL_REAL rad = radius / zdist;
+    support = Vec3f(rad * dir[0], rad * dir[1], -half_h);
+  }
+  else
+    support = Vec3f(0, 0, -half_h);
+}
+
+void getShapeSupport(const Cylinder* cylinder, const Vec3f& dir, Vec3f& support)
+{
+  static const FCL_REAL eps (sqrt(std::numeric_limits<FCL_REAL>::epsilon()));
+  FCL_REAL zdist = std::sqrt(dir[0] * dir[0] + dir[1] * dir[1]);
+  FCL_REAL half_h = cylinder->lz * 0.5;
+  if(zdist == 0.0)
+    support = Vec3f(0, 0, (dir[2]>0)? half_h:-half_h);
+  else {
+    FCL_REAL d = cylinder->radius / zdist;
+    FCL_REAL z (0.);
+    if (dir [2] > eps) z = half_h;
+    else if (dir [2] < -eps) z = -half_h;
+    support << d * dir.head<2>(), z;
+  }
+  assert (fabs (support [0] * dir [1] - support [1] * dir [0]) < eps);
+}
+
+void getShapeSupport(const Convex* convex, const Vec3f& dir, Vec3f& support)
+{
+  FCL_REAL maxdot = - std::numeric_limits<FCL_REAL>::max();
+  Vec3f* curp = convex->points;
+  for(int i = 0; i < convex->num_points; ++i, curp+=1)
+  {
+    FCL_REAL dot = dir.dot(*curp);
+    if(dot > maxdot)
     {
-      const TriangleP* triangle = static_cast<const TriangleP*>(shape);
-      FCL_REAL dota = dir.dot(triangle->a);
-      FCL_REAL dotb = dir.dot(triangle->b);
-      FCL_REAL dotc = dir.dot(triangle->c);
-      if(dota > dotb)
-      {
-        if(dotc > dota)
-          return triangle->c;
-        else
-          return triangle->a;
-      }
-      else
-      {
-        if(dotc > dotb)
-          return triangle->c;
-        else
-          return triangle->b;
-      }
+      support = *curp;
+      maxdot = dot;
     }
+  }
+}
+
+#define CALL_GET_SHAPE_SUPPORT(ShapeType)                                      \
+  getShapeSupport (static_cast<const ShapeType*>(shape),                       \
+      (shape_traits<ShapeType>::NeedNormalizedDir && !dirIsNormalized)         \
+      ? dir.normalized() : dir,                                                \
+      support)
+
+Vec3f getSupport(const ShapeBase* shape, const Vec3f& dir, bool dirIsNormalized)
+{
+  Vec3f support;
+  switch(shape->getNodeType())
+  {
+  case GEOM_TRIANGLE:
+    CALL_GET_SHAPE_SUPPORT(TriangleP);
     break;
   case GEOM_BOX:
-    {
-      const Box* box = static_cast<const Box*>(shape);
-      Vec3f res((dir[0]>0)?(box->side[0]/2):(-box->side[0]/2),
-                (dir[1]>0)?(box->side[1]/2):(-box->side[1]/2),
-                (dir[2]>0)?(box->side[2]/2):(-box->side[2]/2));
-      return res;
-    }
+    CALL_GET_SHAPE_SUPPORT(Box);
     break;
   case GEOM_SPHERE:
-    {
-      const Sphere* sphere = static_cast<const Sphere*>(shape);
-      return dir * sphere->radius;
-    }
+    CALL_GET_SHAPE_SUPPORT(Sphere);
     break;
   case GEOM_CAPSULE:
-    {
-      const Capsule* capsule = static_cast<const Capsule*>(shape);
-      FCL_REAL half_h = capsule->lz * 0.5;
-      Vec3f pos1(0, 0, half_h);
-      Vec3f pos2(0, 0, -half_h);
-      Vec3f v = dir * capsule->radius;
-      pos1 += v;
-      pos2 += v;
-      if(dir.dot(pos1) > dir.dot(pos2))
-        return pos1;
-      else return pos2;
-    }
+    CALL_GET_SHAPE_SUPPORT(Capsule);
     break;
   case GEOM_CONE:
-    {
-      const Cone* cone = static_cast<const Cone*>(shape);
-      FCL_REAL zdist = dir[0] * dir[0] + dir[1] * dir[1];
-      FCL_REAL len = zdist + dir[2] * dir[2];
-      zdist = std::sqrt(zdist);
-      len = std::sqrt(len);
-      FCL_REAL half_h = cone->lz * 0.5;
-      FCL_REAL radius = cone->radius;
-
-      FCL_REAL sin_a = radius / std::sqrt(radius * radius + 4 * half_h * half_h);
-
-      if(dir[2] > len * sin_a)
-        return Vec3f(0, 0, half_h);
-      else if(zdist > 0)
-      {
-        FCL_REAL rad = radius / zdist;
-        return Vec3f(rad * dir[0], rad * dir[1], -half_h);
-      }
-      else
-        return Vec3f(0, 0, -half_h);
-    }
+    CALL_GET_SHAPE_SUPPORT(Cone);
     break;
   case GEOM_CYLINDER:
-    {
-      const Cylinder* cylinder = static_cast<const Cylinder*>(shape);
-      FCL_REAL zdist = std::sqrt(dir[0] * dir[0] + dir[1] * dir[1]);
-      FCL_REAL half_h = cylinder->lz * 0.5;
-      Vec3f res;
-      if(zdist == 0.0)
-      {
-        res =  Vec3f(0, 0, (dir[2]>0)? half_h:-half_h);
-      }
-      else
-      {
-        FCL_REAL d = cylinder->radius / zdist;
-        FCL_REAL z (0.);
-        if (dir [2] > eps) z = half_h;
-        else if (dir [2] < -eps) z = -half_h;
-        res =  Vec3f(d * dir[0], d * dir[1], z);
-      }
-      assert (fabs (res [0] * dir [1] - res [1] * dir [0]) < eps);
-      return res;
-    }
+    CALL_GET_SHAPE_SUPPORT(Cylinder);
     break;
   case GEOM_CONVEX:
-    {
-      const Convex* convex = static_cast<const Convex*>(shape);
-      FCL_REAL maxdot = - std::numeric_limits<FCL_REAL>::max();
-      Vec3f* curp = convex->points;
-      Vec3f bestv(0,0,0);
-      for(int i = 0; i < convex->num_points; ++i, curp+=1)
-      {
-        FCL_REAL dot = dir.dot(*curp);
-        if(dot > maxdot)
-        {
-          bestv = *curp;
-          maxdot = dot;
-        }
-      }
-      return bestv;
-    }
+    CALL_GET_SHAPE_SUPPORT(Convex);
     break;
   case GEOM_PLANE:
-	break;
+  case GEOM_HALFSPACE:
   default:
+    support.setZero();
     ; // nothing
   }
 
-  return Vec3f(0, 0, 0);
+  return support;
+}
+
+#undef CALL_GET_SHAPE_SUPPORT
+
+template <typename Shape0, typename Shape1, bool TransformIsIdentity>
+void getSupportTpl (const Shape0* s0, const Shape1* s1,
+    const Matrix3f& oR1, const Vec3f& ot1,
+    const Vec3f& dir, Vec3f& support0, Vec3f& support1)
+{
+  getShapeSupport (s0, dir, support0);
+  if (TransformIsIdentity)
+    getShapeSupport (s1, - dir, support1);
+  else {
+    getShapeSupport (s1, - oR1.transpose() * dir, support1);
+    support1 = oR1 * support1 + ot1;
+  }
+}
+
+template <typename Shape0, typename Shape1, bool TransformIsIdentity>
+void getSupportFuncTpl (const MinkowskiDiff& md,
+    const Vec3f& dir, bool dirIsNormalized, Vec3f& support0, Vec3f& support1)
+{
+  enum { NeedNormalizedDir =
+    bool ( (bool)shape_traits<Shape0>::NeedNormalizedDir
+        || (bool)shape_traits<Shape1>::NeedNormalizedDir)
+  };
+  getSupportTpl<Shape0, Shape1, TransformIsIdentity> (
+      static_cast <const Shape0*>(md.shapes[0]),
+      static_cast <const Shape1*>(md.shapes[1]),
+      md.oR1, md.ot1,
+      (NeedNormalizedDir && !dirIsNormalized) ? dir.normalized() : dir,
+      support0, support1);
+}
+
+template <typename Shape0>
+MinkowskiDiff::GetSupportFunction makeGetSupportFunction1 (const ShapeBase* s1, bool identity)
+{
+  switch(s1->getNodeType())
+  {
+  case GEOM_TRIANGLE:
+    if (identity) return getSupportFuncTpl<Shape0, TriangleP, true >;
+    else          return getSupportFuncTpl<Shape0, TriangleP, false>;
+  case GEOM_BOX:
+    if (identity) return getSupportFuncTpl<Shape0, Box, true >;
+    else          return getSupportFuncTpl<Shape0, Box, false>;
+  case GEOM_SPHERE:
+    if (identity) return getSupportFuncTpl<Shape0, Sphere, true >;
+    else          return getSupportFuncTpl<Shape0, Sphere, false>;
+  case GEOM_CAPSULE:
+    if (identity) return getSupportFuncTpl<Shape0, Capsule, true >;
+    else          return getSupportFuncTpl<Shape0, Capsule, false>;
+  case GEOM_CONE:
+    if (identity) return getSupportFuncTpl<Shape0, Cone, true >;
+    else          return getSupportFuncTpl<Shape0, Cone, false>;
+  case GEOM_CYLINDER:
+    if (identity) return getSupportFuncTpl<Shape0, Cylinder, true >;
+    else          return getSupportFuncTpl<Shape0, Cylinder, false>;
+  case GEOM_CONVEX:
+    if (identity) return getSupportFuncTpl<Shape0, Convex, true >;
+    else          return getSupportFuncTpl<Shape0, Convex, false>;
+  default:
+    throw std::logic_error ("Unsupported geometric shape");
+  }
+}
+
+MinkowskiDiff::GetSupportFunction makeGetSupportFunction0 (const ShapeBase* s0, const ShapeBase* s1, bool identity)
+{
+  switch(s0->getNodeType())
+  {
+  case GEOM_TRIANGLE:
+    return makeGetSupportFunction1<TriangleP> (s1, identity);
+    break;
+  case GEOM_BOX:
+    return makeGetSupportFunction1<Box> (s1, identity);
+    break;
+  case GEOM_SPHERE:
+    return makeGetSupportFunction1<Sphere> (s1, identity);
+    break;
+  case GEOM_CAPSULE:
+    return makeGetSupportFunction1<Capsule> (s1, identity);
+    break;
+  case GEOM_CONE:
+    return makeGetSupportFunction1<Cone> (s1, identity);
+    break;
+  case GEOM_CYLINDER:
+    return makeGetSupportFunction1<Cylinder> (s1, identity);
+    break;
+  case GEOM_CONVEX:
+    return makeGetSupportFunction1<Convex> (s1, identity);
+    break;
+  default:
+    throw std::logic_error ("Unsupported geometric shape");
+  }
+}
+
+void MinkowskiDiff::set (const ShapeBase* shape0, const ShapeBase* shape1,
+    const Transform3f& tf0, const Transform3f& tf1)
+{
+  shapes[0] = shape0;
+  shapes[1] = shape1;
+
+  oR1 = tf0.getRotation().transpose() * tf1.getRotation();
+  ot1 = tf0.getRotation().transpose() * (tf1.getTranslation() - tf0.getTranslation());
+
+  bool identity = (oR1.isIdentity() && ot1.isZero());
+
+  getSupportFunc = makeGetSupportFunction0 (shape0, shape1, identity);
+}
+
+void MinkowskiDiff::set (const ShapeBase* shape0, const ShapeBase* shape1)
+{
+  shapes[0] = shape0;
+  shapes[1] = shape1;
+
+  oR1.setIdentity();
+  ot1.setZero();
+
+  getSupportFunc = makeGetSupportFunction0 (shape0, shape1, true);
 }
 
 void GJK::initialize()
 {
-  ray = Vec3f::Zero();
   nfree = 0;
   status = Failed;
-  current = 0;
-  distance = 0.0;
+  distance_upper_bound = std::numeric_limits<FCL_REAL>::max();
   simplex = NULL;
 }
 
-
 Vec3f GJK::getGuessFromSimplex() const
 {
   return ray;
 }
 
+bool GJK::getClosestPoints (const Simplex& simplex, Vec3f& w0, Vec3f& w1)
+{
+  SimplexV* const* vs = simplex.vertex;
+
+  for (short i = 0; i < simplex.rank; ++i) {
+    assert (vs[i]->w.isApprox (vs[i]->w0 - vs[i]->w1));
+  }
+
+  Project::ProjectResult projection;
+  switch (simplex.rank) {
+    case 1:
+      w0 = vs[0]->w0;
+      w1 = vs[0]->w1;
+      return true;
+    case 2:
+      {
+        const Vec3f& a  = vs[0]->w, a0 = vs[0]->w0, a1 = vs[0]->w1,
+                     b  = vs[1]->w, b0 = vs[1]->w0, b1 = vs[1]->w1;
+        FCL_REAL la, lb;
+        Vec3f N (b - a);
+        la = N.dot(-a);
+        if (la <= 0) {
+          w0 = a0;
+          w1 = a1;
+        } else {
+          lb = N.squaredNorm();
+          if (la > lb) {
+            w0 = b0;
+            w1 = b1;
+          } else {
+            lb = la / lb;
+            la = 1 - lb;
+            w0 = la * a0 + lb * b0;
+            w1 = la * a1 + lb * b1;
+          }
+        }
+      }
+      return true;
+    case 3:
+      // TODO avoid the reprojection
+      projection = Project::projectTriangleOrigin   (vs[0]->w, vs[1]->w, vs[2]->w);
+      break;
+    case 4: // We are in collision.
+      projection = Project::projectTetrahedraOrigin (vs[0]->w, vs[1]->w, vs[2]->w, vs[3]->w);
+      break;
+    default:
+      throw std::logic_error ("The simplex rank must be in [ 1, 4 ]");
+  }
+  w0.setZero();
+  w1.setZero();
+  for (short i = 0; i < simplex.rank; ++i) {
+    w0 += projection.parameterization[i] * vs[i]->w0;
+    w1 += projection.parameterization[i] * vs[i]->w1;
+  }
+  return true;
+}
 
 GJK::Status GJK::evaluate(const MinkowskiDiff& shape_, const Vec3f& guess)
 {
   size_t iterations = 0;
   FCL_REAL alpha = 0;
-  Vec3f lastw[4];
-  size_t clastw = 0;
-  Project::ProjectResult project_res;
-    
+
   free_v[0] = &store_v[0];
   free_v[1] = &store_v[1];
   free_v[2] = &store_v[2];
@@ -208,21 +410,18 @@ GJK::Status GJK::evaluate(const MinkowskiDiff& shape_, const Vec3f& guess)
   nfree = 4;
   current = 0;
   status = Valid;
-  shape = shape_;
+  shape = &shape_;
   distance = 0.0;
   simplices[0].rank = 0;
   ray = guess;
 
-  // appendVertex(simplices[0], (ray.squaredNorm() > 0) ? -ray : Vec3f(1, 0, 0));
   if (ray.squaredNorm() > 0) appendVertex(simplices[0], -ray);
-  else                       appendVertex(simplices[0], Vec3f(1, 0, 0));
-  simplices[0].coefficient[0] = 1;
+  else                       appendVertex(simplices[0], Vec3f(1, 0, 0), true);
   ray = simplices[0].vertex[0]->w;
-  lastw[0] = lastw[1] = lastw[2] = lastw[3] = ray; // cache previous support points, the new support point will compare with it to avoid too close support points
 
   do
   {
-    size_t next = 1 - current;
+    short next = (short)(1 - current);
     Simplex& curr_simplex = simplices[current];
     Simplex& next_simplex = simplices[next];
 
@@ -231,65 +430,52 @@ GJK::Status GJK::evaluate(const MinkowskiDiff& shape_, const Vec3f& guess)
     if(rl < tolerance) // mean origin is near the face of original simplex, return touch
     {
       status = Inside;
+      distance = 0; // rl ?
       break;
     }
 
     appendVertex(curr_simplex, -ray); // see below, ray points away from origin
 
-    // check B: when the new support point is close to previous support points, stop (as the new simplex is degenerated)
+    // check removed (by ?): when the new support point is close to previous support points, stop (as the new simplex is degenerated)
     const Vec3f& w = curr_simplex.vertex[curr_simplex.rank - 1]->w;
-    lastw[clastw = (clastw+1)&3] = w;
 
-    // check C: when the new support point is close to the sub-simplex where the ray point lies, stop (as the new simplex again is degenerated)
+    // check B: no collision if omega > 0
     FCL_REAL omega = ray.dot(w) / rl;
+    if (omega > distance_upper_bound)
+    {
+      distance = omega;
+      break;
+    }
+
+    // check C: when the new support point is close to the sub-simplex where the ray point lies, stop (as the new simplex again is degenerated)
     alpha = std::max(alpha, omega);
     if((rl - alpha) - tolerance * rl <= 0)
     {
       removeVertex(simplices[current]);
+      distance = rl;
       break;
     }
 
+    // This has been rewritten thanks to the excellent video:
+    // https://youtu.be/Qupqu1xe7Io
+    bool inside;
     switch(curr_simplex.rank)
     {
     case 2:
-      project_res = Project::projectLineOrigin(curr_simplex.vertex[0]->w,
-                                               curr_simplex.vertex[1]->w);
+      inside = projectLineOrigin (curr_simplex, next_simplex);
       break;
     case 3:
-      project_res = Project::projectTriangleOrigin(curr_simplex.vertex[0]->w,
-                                                   curr_simplex.vertex[1]->w,
-                                                   curr_simplex.vertex[2]->w); break;
+      inside = projectTriangleOrigin (curr_simplex, next_simplex);
+      break;
     case 4:
-      project_res = Project::projectTetrahedraOrigin(curr_simplex.vertex[0]->w,
-                                                     curr_simplex.vertex[1]->w,
-                                                     curr_simplex.vertex[2]->w,
-                                                     curr_simplex.vertex[3]->w);
+      inside = projectTetrahedraOrigin (curr_simplex, next_simplex);
       break;
     }
-    if(project_res.sqr_distance >= 0)
-    {
-      next_simplex.rank = 0;
-      ray = Vec3f(0,0,0);
-      current = next;
-      for(size_t i = 0; i < curr_simplex.rank; ++i)
-      {
-        if(project_res.encode & (1 << i))
-        {
-          next_simplex.vertex[next_simplex.rank] = curr_simplex.vertex[i];
-          // weights[i]
-          next_simplex.coefficient[next_simplex.rank++] =
-            project_res.parameterization[i];
-          // weights[i]
-          ray += curr_simplex.vertex[i]->w * project_res.parameterization[i];
-        }
-        else
-          free_v[nfree++] = curr_simplex.vertex[i];
-      }
-      if(project_res.encode == 15) status = Inside; // the origin is within the 4-simplex, collision
-    }
-    else
-    {
-      removeVertex(simplices[current]);
+    assert (nfree+next_simplex.rank == 4);
+    current = next;
+    if(inside) {
+      status = Inside;
+      distance = 0;
       break;
     }
 
@@ -298,33 +484,18 @@ GJK::Status GJK::evaluate(const MinkowskiDiff& shape_, const Vec3f& guess)
   } while(status == Valid);
 
   simplex = &simplices[current];
-  switch(status)
-  {
-  case Valid: distance = ray.norm(); break;
-  case Inside: distance = 0; break;
-  default:
-    distance = sqrt (project_res.sqr_distance);
-    break;
-  }
   return status;
 }
 
-void GJK::getSupport(const Vec3f& d, SimplexV& sv) const
-{
-  sv.d.noalias() = d.normalized();
-  sv.w.noalias() = shape.support(sv.d);
-}
-
-void GJK::removeVertex(Simplex& simplex)
+inline void GJK::removeVertex(Simplex& simplex)
 {
   free_v[nfree++] = simplex.vertex[--simplex.rank];
 }
 
-void GJK::appendVertex(Simplex& simplex, const Vec3f& v)
+inline void GJK::appendVertex(Simplex& simplex, const Vec3f& v, bool isNormalized)
 {
-  simplex.coefficient[simplex.rank] = 0; // initial weight 0
   simplex.vertex[simplex.rank] = free_v[--nfree]; // set the memory
-  getSupport(v, *simplex.vertex[simplex.rank++]);
+  getSupport (v, isNormalized, *simplex.vertex[simplex.rank++]);
 }
 
 bool GJK::encloseOrigin()
@@ -337,10 +508,10 @@ bool GJK::encloseOrigin()
       {
         Vec3f axis(Vec3f::Zero());
         axis[i] = 1;
-        appendVertex(*simplex, axis);
+        appendVertex(*simplex, axis, true);
         if(encloseOrigin()) return true;
         removeVertex(*simplex);
-        appendVertex(*simplex, -axis);
+        appendVertex(*simplex, -axis, true);
         if(encloseOrigin()) return true;
         removeVertex(*simplex);
       }
@@ -394,6 +565,547 @@ bool GJK::encloseOrigin()
   return false;
 }
 
+inline void originToPoint (
+    const GJK::Simplex& current, int a,
+    const Vec3f& A,
+    GJK::Simplex& next,
+    Vec3f& ray)
+{
+    // A is the closest to the origin
+    ray = A;
+    next.vertex[0] = current.vertex[a];
+    next.rank = 1;
+}
+
+inline void originToSegment (
+    const GJK::Simplex& current, int a, int b,
+    const Vec3f& A, const Vec3f& B,
+    const Vec3f& AB,
+    const FCL_REAL& ABdotAO,
+    GJK::Simplex& next,
+    Vec3f& ray)
+{
+    // ray = - ( AB ^ AO ) ^ AB = (AB.B) A + (-AB.A) B
+    ray = AB.dot(B) * A + ABdotAO * B;
+
+    next.vertex[0] = current.vertex[b];
+    next.vertex[1] = current.vertex[a];
+    next.rank = 2;
+
+    // To ensure backward compatibility
+    ray /= AB.squaredNorm();
+}
+
+inline void originToTriangle (
+    const GJK::Simplex& current,
+    int a, int b, int c,
+    const Vec3f& ABC,
+    const FCL_REAL& ABCdotAO,
+    GJK::Simplex& next,
+    Vec3f& ray)
+{
+  bool aboveTri (ABCdotAO >= 0);
+  ray = ABC;
+
+  if (aboveTri) {
+    next.vertex[0] = current.vertex[c];
+    next.vertex[1] = current.vertex[b];
+  } else {
+    next.vertex[0] = current.vertex[b];
+    next.vertex[1] = current.vertex[c];
+  }
+  next.vertex[2] = current.vertex[a];
+  next.rank = 3;
+
+  // To ensure backward compatibility
+  ray *= -ABCdotAO / ABC.squaredNorm();
+}
+
+bool GJK::projectLineOrigin(const Simplex& current, Simplex& next)
+{
+  const int a = 1, b = 0;
+  // A is the last point we added.
+  const Vec3f& A = current.vertex[a]->w;
+  const Vec3f& B = current.vertex[b]->w;
+
+  const Vec3f AB = B - A;
+  const FCL_REAL d = AB.dot(-A);
+  assert (d <= AB.squaredNorm());
+
+  if (d <= 0) {
+    // A is the closest to the origin
+    originToPoint (current, a, A, next, ray);
+    free_v[nfree++] = current.vertex[b];
+  } else
+    originToSegment (current, a, b, A, B, AB, d, next, ray);
+  return false;
+}
+
+bool GJK::projectTriangleOrigin(const Simplex& current, Simplex& next)
+{
+  const int a = 2, b = 1, c = 0;
+  // A is the last point we added.
+  const Vec3f& A = current.vertex[a]->w,
+               B = current.vertex[b]->w,
+               C = current.vertex[c]->w;
+
+  const Vec3f AB = B - A,
+              AC = C - A,
+              ABC = AB.cross(AC);
+
+  FCL_REAL edgeAC2o = ABC.cross(AC).dot (-A);
+  if (edgeAC2o >= 0) {
+    FCL_REAL towardsC = AC.dot (-A);
+    if (towardsC >= 0) { // Region 1
+      originToSegment (current, a, c, A, C, AC, towardsC, next, ray);
+      free_v[nfree++] = current.vertex[b];
+    } else { // Region 4 or 5
+      FCL_REAL towardsB = AB.dot(-A);
+      if (towardsB < 0) { // Region 5
+        // A is the closest to the origin
+        originToPoint (current, a, A, next, ray);
+        free_v[nfree++] = current.vertex[b];
+      } else // Region 4
+        originToSegment (current, a, b, A, B, AB, towardsB, next, ray);
+      free_v[nfree++] = current.vertex[c];
+    }
+  } else {
+    FCL_REAL edgeAB2o = AB.cross(ABC).dot (-A);
+    if (edgeAB2o >= 0) { // Region 4 or 5
+      FCL_REAL towardsB = AB.dot(-A);
+      if (towardsB < 0) { // Region 5
+        // A is the closest to the origin
+        originToPoint (current, a, A, next, ray);
+        free_v[nfree++] = current.vertex[b];
+      } else // Region 4
+        originToSegment (current, a, b, A, B, AB, towardsB, next, ray);
+      free_v[nfree++] = current.vertex[c];
+    } else {
+      originToTriangle (current, a, b, c, ABC, ABC.dot(-A), next, ray);
+    }
+  }
+  return false;
+}
+
+bool GJK::projectTetrahedraOrigin(const Simplex& current, Simplex& next)
+{
+  // The code of this function was generated using doc/gjk.py
+  const int a = 3, b = 2, c = 1, d = 0;
+  const Vec3f& A (current.vertex[a]->w);
+  const Vec3f& B (current.vertex[b]->w);
+  const Vec3f& C (current.vertex[c]->w);
+  const Vec3f& D (current.vertex[d]->w);
+  const FCL_REAL aa = A.squaredNorm();
+  const FCL_REAL da    = D.dot(A);
+  const FCL_REAL db    = D.dot(B);
+  const FCL_REAL dc    = D.dot(C);
+  const FCL_REAL dd    = D.dot(D);
+  const FCL_REAL da_aa = da - aa;
+  const FCL_REAL ca    = C.dot(A);
+  const FCL_REAL cb    = C.dot(B);
+  const FCL_REAL cc    = C.dot(C);
+  const FCL_REAL& cd    = dc;
+  const FCL_REAL ca_aa = ca - aa;
+  const FCL_REAL ba    = B.dot(A);
+  const FCL_REAL bb    = B.dot(B);
+  const FCL_REAL& bc    = cb;
+  const FCL_REAL& bd    = db;
+  const FCL_REAL ba_aa = ba - aa;
+  const FCL_REAL ba_ca = ba - ca;
+  const FCL_REAL ca_da = ca - da;
+  const FCL_REAL da_ba = da - ba;
+  const Vec3f a_cross_b = A.cross(B);
+  const Vec3f a_cross_c = A.cross(C);
+
+#define REGION_INSIDE()                 \
+    ray.setZero();                      \
+    next.vertex[0] = current.vertex[d]; \
+    next.vertex[1] = current.vertex[c]; \
+    next.vertex[2] = current.vertex[b]; \
+    next.vertex[3] = current.vertex[a]; \
+    next.rank=4;                        \
+    return true;
+
+  if (ba_aa <= 0) { // if AB.AO >= 0
+    if (-D.dot(a_cross_b) <= 0) { // if ADB.AO >= 0
+      if (ba * da_ba + bd * ba_aa - bb * da_aa <= 0) { // if (ADB ^ AB).AO >= 0
+        if (da_aa <= 0) { // if AD.AO >= 0
+          assert(da * da_ba + dd * ba_aa - db * da_aa <= 0); // (ADB ^ AD).AO >= 0
+          if (ba * ba_ca + bb * ca_aa - bc * ba_aa <= 0) { // if (ABC ^ AB).AO >= 0
+            // Region ABC
+            originToTriangle (current, a, b, c, (B-A).cross(C-A), -C.dot (a_cross_b), next, ray);
+            free_v[nfree++] = current.vertex[d];
+          } else { // not (ABC ^ AB).AO >= 0
+            // Region AB
+            originToSegment (current, a, b, A, B, B-A, -ba_aa, next, ray);
+            free_v[nfree++] = current.vertex[c];
+            free_v[nfree++] = current.vertex[d];
+          } // end of (ABC ^ AB).AO >= 0
+        } else { // not AD.AO >= 0
+          if (ba * ba_ca + bb * ca_aa - bc * ba_aa <= 0) { // if (ABC ^ AB).AO >= 0
+            if (ca * ba_ca + cb * ca_aa - cc * ba_aa <= 0) { // if (ABC ^ AC).AO >= 0
+              if (ca * ca_da + cc * da_aa - cd * ca_aa <= 0) { // if (ACD ^ AC).AO >= 0
+                // Region ACD
+                originToTriangle (current, a, c, d, (C-A).cross(D-A), -D.dot(a_cross_c), next, ray);
+                free_v[nfree++] = current.vertex[b];
+              } else { // not (ACD ^ AC).AO >= 0
+                // Region AC
+                originToSegment (current, a, c, A, C, C-A, -ca_aa, next, ray);
+                free_v[nfree++] = current.vertex[b];
+                free_v[nfree++] = current.vertex[d];
+              } // end of (ACD ^ AC).AO >= 0
+            } else { // not (ABC ^ AC).AO >= 0
+              if (C.dot (a_cross_b) <= 0) { // if ABC.AO >= 0
+                // Region ABC
+                originToTriangle (current, a, b, c, (B-A).cross(C-A), -C.dot (a_cross_b), next, ray);
+                free_v[nfree++] = current.vertex[d];
+              } else { // not ABC.AO >= 0
+                // Region ACD
+                originToTriangle (current, a, c, d, (C-A).cross(D-A), -D.dot(a_cross_c), next, ray);
+                free_v[nfree++] = current.vertex[b];
+              } // end of ABC.AO >= 0
+            } // end of (ABC ^ AC).AO >= 0
+          } else { // not (ABC ^ AB).AO >= 0
+            // Region AB
+            originToSegment (current, a, b, A, B, B-A, -ba_aa, next, ray);
+            free_v[nfree++] = current.vertex[c];
+            free_v[nfree++] = current.vertex[d];
+          } // end of (ABC ^ AB).AO >= 0
+        } // end of AD.AO >= 0
+      } else { // not (ADB ^ AB).AO >= 0
+        if (da * da_ba + dd * ba_aa - db * da_aa <= 0) { // if (ADB ^ AD).AO >= 0
+          // Region ADB
+          originToTriangle (current, a, d, b, (D-A).cross(B-A), D.dot(a_cross_b), next, ray);
+          free_v[nfree++] = current.vertex[c];
+        } else { // not (ADB ^ AD).AO >= 0
+          if (ca * ca_da + cc * da_aa - cd * ca_aa <= 0) { // if (ACD ^ AC).AO >= 0
+            if (da * ca_da + dc * da_aa - dd * ca_aa <= 0) { // if (ACD ^ AD).AO >= 0
+              // Region AD
+              originToSegment (current, a, d, A, D, D-A, -da_aa, next, ray);
+              free_v[nfree++] = current.vertex[b];
+              free_v[nfree++] = current.vertex[c];
+            } else { // not (ACD ^ AD).AO >= 0
+              // Region ACD
+              originToTriangle (current, a, c, d, (C-A).cross(D-A), -D.dot(a_cross_c), next, ray);
+              free_v[nfree++] = current.vertex[b];
+            } // end of (ACD ^ AD).AO >= 0
+          } else { // not (ACD ^ AC).AO >= 0
+            if (da * ca_da + dc * da_aa - dd * ca_aa <= 0) { // if (ACD ^ AD).AO >= 0
+              // Region AD
+              originToSegment (current, a, d, A, D, D-A, -da_aa, next, ray);
+              free_v[nfree++] = current.vertex[b];
+              free_v[nfree++] = current.vertex[c];
+            } else { // not (ACD ^ AD).AO >= 0
+              // Region AC
+              originToSegment (current, a, c, A, C, C-A, -ca_aa, next, ray);
+              free_v[nfree++] = current.vertex[b];
+              free_v[nfree++] = current.vertex[d];
+            } // end of (ACD ^ AD).AO >= 0
+          } // end of (ACD ^ AC).AO >= 0
+        } // end of (ADB ^ AD).AO >= 0
+      } // end of (ADB ^ AB).AO >= 0
+    } else { // not ADB.AO >= 0
+      if (C.dot (a_cross_b) <= 0) { // if ABC.AO >= 0
+        if (ba * ba_ca + bb * ca_aa - bc * ba_aa <= 0) { // if (ABC ^ AB).AO >= 0
+          if (ca * ba_ca + cb * ca_aa - cc * ba_aa <= 0) { // if (ABC ^ AC).AO >= 0
+            if (ca * ca_da + cc * da_aa - cd * ca_aa <= 0) { // if (ACD ^ AC).AO >= 0
+              // Region ACD
+              originToTriangle (current, a, c, d, (C-A).cross(D-A), -D.dot(a_cross_c), next, ray);
+              free_v[nfree++] = current.vertex[b];
+            } else { // not (ACD ^ AC).AO >= 0
+              // Region AC
+              originToSegment (current, a, c, A, C, C-A, -ca_aa, next, ray);
+              free_v[nfree++] = current.vertex[b];
+              free_v[nfree++] = current.vertex[d];
+            } // end of (ACD ^ AC).AO >= 0
+          } else { // not (ABC ^ AC).AO >= 0
+            // Region ABC
+            originToTriangle (current, a, b, c, (B-A).cross(C-A), -C.dot (a_cross_b), next, ray);
+            free_v[nfree++] = current.vertex[d];
+          } // end of (ABC ^ AC).AO >= 0
+        } else { // not (ABC ^ AB).AO >= 0
+          if (ca_aa <= 0) { // if AC.AO >= 0
+            assert(!(ca * ba_ca + cb * ca_aa - cc * ba_aa <= 0)); // Not (ABC ^ AC).AO >= 0
+            if (ba * da_ba + bd * ba_aa - bb * da_aa <= 0) { // if (ADB ^ AB).AO >= 0
+              // Region AB
+              originToSegment (current, a, b, A, B, B-A, -ba_aa, next, ray);
+              free_v[nfree++] = current.vertex[c];
+              free_v[nfree++] = current.vertex[d];
+            } else { // not (ADB ^ AB).AO >= 0
+              // Region AD
+              originToSegment (current, a, d, A, D, D-A, -da_aa, next, ray);
+              free_v[nfree++] = current.vertex[b];
+              free_v[nfree++] = current.vertex[c];
+            } // end of (ADB ^ AB).AO >= 0
+          } else { // not AC.AO >= 0
+            if (da * ca_da + dc * da_aa - dd * ca_aa <= 0) { // if (ACD ^ AD).AO >= 0
+              if (ba * da_ba + bd * ba_aa - bb * da_aa <= 0) { // if (ADB ^ AB).AO >= 0
+                // Region AB
+                originToSegment (current, a, b, A, B, B-A, -ba_aa, next, ray);
+                free_v[nfree++] = current.vertex[c];
+                free_v[nfree++] = current.vertex[d];
+              } else { // not (ADB ^ AB).AO >= 0
+                // Region AD
+                originToSegment (current, a, d, A, D, D-A, -da_aa, next, ray);
+                free_v[nfree++] = current.vertex[b];
+                free_v[nfree++] = current.vertex[c];
+              } // end of (ADB ^ AB).AO >= 0
+            } else { // not (ACD ^ AD).AO >= 0
+              if (ba * da_ba + bd * ba_aa - bb * da_aa <= 0) { // if (ADB ^ AB).AO >= 0
+                // Region AB
+                originToSegment (current, a, b, A, B, B-A, -ba_aa, next, ray);
+                free_v[nfree++] = current.vertex[c];
+                free_v[nfree++] = current.vertex[d];
+              } else { // not (ADB ^ AB).AO >= 0
+                // Region ACD
+                originToTriangle (current, a, c, d, (C-A).cross(D-A), -D.dot(a_cross_c), next, ray);
+                free_v[nfree++] = current.vertex[b];
+              } // end of (ADB ^ AB).AO >= 0
+            } // end of (ACD ^ AD).AO >= 0
+          } // end of AC.AO >= 0
+        } // end of (ABC ^ AB).AO >= 0
+      } else { // not ABC.AO >= 0
+        if (D.dot(a_cross_c) <= 0) { // if ACD.AO >= 0
+          if (ca * ca_da + cc * da_aa - cd * ca_aa <= 0) { // if (ACD ^ AC).AO >= 0
+            if (da * ca_da + dc * da_aa - dd * ca_aa <= 0) { // if (ACD ^ AD).AO >= 0
+              // Region AD
+              originToSegment (current, a, d, A, D, D-A, -da_aa, next, ray);
+              free_v[nfree++] = current.vertex[b];
+              free_v[nfree++] = current.vertex[c];
+            } else { // not (ACD ^ AD).AO >= 0
+              // Region ACD
+              originToTriangle (current, a, c, d, (C-A).cross(D-A), -D.dot(a_cross_c), next, ray);
+              free_v[nfree++] = current.vertex[b];
+            } // end of (ACD ^ AD).AO >= 0
+          } else { // not (ACD ^ AC).AO >= 0
+            if (ca_aa <= 0) { // if AC.AO >= 0
+              if (ca * ba_ca + cb * ca_aa - cc * ba_aa <= 0) { // if (ABC ^ AC).AO >= 0
+                // Region AC
+                originToSegment (current, a, c, A, C, C-A, -ca_aa, next, ray);
+                free_v[nfree++] = current.vertex[b];
+                free_v[nfree++] = current.vertex[d];
+              } else { // not (ABC ^ AC).AO >= 0
+                // Region AD
+                originToSegment (current, a, d, A, D, D-A, -da_aa, next, ray);
+                free_v[nfree++] = current.vertex[b];
+                free_v[nfree++] = current.vertex[c];
+              } // end of (ABC ^ AC).AO >= 0
+            } else { // not AC.AO >= 0
+              // Region AD
+              originToSegment (current, a, d, A, D, D-A, -da_aa, next, ray);
+              free_v[nfree++] = current.vertex[b];
+              free_v[nfree++] = current.vertex[c];
+            } // end of AC.AO >= 0
+          } // end of (ACD ^ AC).AO >= 0
+        } else { // not ACD.AO >= 0
+          // Region Inside
+          REGION_INSIDE()
+        } // end of ACD.AO >= 0
+      } // end of ABC.AO >= 0
+    } // end of ADB.AO >= 0
+  } else { // not AB.AO >= 0
+    if (ca_aa <= 0) { // if AC.AO >= 0
+      if (D.dot(a_cross_c) <= 0) { // if ACD.AO >= 0
+        if (da_aa <= 0) { // if AD.AO >= 0
+          if (ca * ca_da + cc * da_aa - cd * ca_aa <= 0) { // if (ACD ^ AC).AO >= 0
+            if (da * ca_da + dc * da_aa - dd * ca_aa <= 0) { // if (ACD ^ AD).AO >= 0
+              if (da * da_ba + dd * ba_aa - db * da_aa <= 0) { // if (ADB ^ AD).AO >= 0
+                // Region ADB
+                originToTriangle (current, a, d, b, (D-A).cross(B-A), D.dot(a_cross_b), next, ray);
+                free_v[nfree++] = current.vertex[c];
+              } else { // not (ADB ^ AD).AO >= 0
+                // Region AD
+                originToSegment (current, a, d, A, D, D-A, -da_aa, next, ray);
+                free_v[nfree++] = current.vertex[b];
+                free_v[nfree++] = current.vertex[c];
+              } // end of (ADB ^ AD).AO >= 0
+            } else { // not (ACD ^ AD).AO >= 0
+              // Region ACD
+              originToTriangle (current, a, c, d, (C-A).cross(D-A), -D.dot(a_cross_c), next, ray);
+              free_v[nfree++] = current.vertex[b];
+            } // end of (ACD ^ AD).AO >= 0
+          } else { // not (ACD ^ AC).AO >= 0
+            assert(!(da * ca_da + dc * da_aa - dd * ca_aa <= 0)); // Not (ACD ^ AD).AO >= 0
+            if (ca * ba_ca + cb * ca_aa - cc * ba_aa <= 0) { // if (ABC ^ AC).AO >= 0
+              // Region AC
+              originToSegment (current, a, c, A, C, C-A, -ca_aa, next, ray);
+              free_v[nfree++] = current.vertex[b];
+              free_v[nfree++] = current.vertex[d];
+            } else { // not (ABC ^ AC).AO >= 0
+              // Region ABC
+              originToTriangle (current, a, b, c, (B-A).cross(C-A), -C.dot (a_cross_b), next, ray);
+              free_v[nfree++] = current.vertex[d];
+            } // end of (ABC ^ AC).AO >= 0
+          } // end of (ACD ^ AC).AO >= 0
+        } else { // not AD.AO >= 0
+          if (ca * ba_ca + cb * ca_aa - cc * ba_aa <= 0) { // if (ABC ^ AC).AO >= 0
+            if (ca * ca_da + cc * da_aa - cd * ca_aa <= 0) { // if (ACD ^ AC).AO >= 0
+              assert(!(da * ca_da + dc * da_aa - dd * ca_aa <= 0)); // Not (ACD ^ AD).AO >= 0
+              // Region ACD
+              originToTriangle (current, a, c, d, (C-A).cross(D-A), -D.dot(a_cross_c), next, ray);
+              free_v[nfree++] = current.vertex[b];
+            } else { // not (ACD ^ AC).AO >= 0
+              // Region AC
+              originToSegment (current, a, c, A, C, C-A, -ca_aa, next, ray);
+              free_v[nfree++] = current.vertex[b];
+              free_v[nfree++] = current.vertex[d];
+            } // end of (ACD ^ AC).AO >= 0
+          } else { // not (ABC ^ AC).AO >= 0
+            if (C.dot (a_cross_b) <= 0) { // if ABC.AO >= 0
+              assert(ba * ba_ca + bb * ca_aa - bc * ba_aa <= 0); // (ABC ^ AB).AO >= 0
+              // Region ABC
+              originToTriangle (current, a, b, c, (B-A).cross(C-A), -C.dot (a_cross_b), next, ray);
+              free_v[nfree++] = current.vertex[d];
+            } else { // not ABC.AO >= 0
+              if (ca * ca_da + cc * da_aa - cd * ca_aa <= 0) { // if (ACD ^ AC).AO >= 0
+                assert(!(da * ca_da + dc * da_aa - dd * ca_aa <= 0)); // Not (ACD ^ AD).AO >= 0
+                // Region ACD
+                originToTriangle (current, a, c, d, (C-A).cross(D-A), -D.dot(a_cross_c), next, ray);
+                free_v[nfree++] = current.vertex[b];
+              } else { // not (ACD ^ AC).AO >= 0
+                // Region ADB
+                originToTriangle (current, a, d, b, (D-A).cross(B-A), D.dot(a_cross_b), next, ray);
+                free_v[nfree++] = current.vertex[c];
+              } // end of (ACD ^ AC).AO >= 0
+            } // end of ABC.AO >= 0
+          } // end of (ABC ^ AC).AO >= 0
+        } // end of AD.AO >= 0
+      } else { // not ACD.AO >= 0
+        if (C.dot (a_cross_b) <= 0) { // if ABC.AO >= 0
+          if (ca * ba_ca + cb * ca_aa - cc * ba_aa <= 0) { // if (ABC ^ AC).AO >= 0
+            if (ca * ca_da + cc * da_aa - cd * ca_aa <= 0) { // if (ACD ^ AC).AO >= 0
+              if (da * da_ba + dd * ba_aa - db * da_aa <= 0) { // if (ADB ^ AD).AO >= 0
+                // Region ADB
+                originToTriangle (current, a, d, b, (D-A).cross(B-A), D.dot(a_cross_b), next, ray);
+                free_v[nfree++] = current.vertex[c];
+              } else { // not (ADB ^ AD).AO >= 0
+                // Region AD
+                originToSegment (current, a, d, A, D, D-A, -da_aa, next, ray);
+                free_v[nfree++] = current.vertex[b];
+                free_v[nfree++] = current.vertex[c];
+              } // end of (ADB ^ AD).AO >= 0
+            } else { // not (ACD ^ AC).AO >= 0
+              // Region AC
+              originToSegment (current, a, c, A, C, C-A, -ca_aa, next, ray);
+              free_v[nfree++] = current.vertex[b];
+              free_v[nfree++] = current.vertex[d];
+            } // end of (ACD ^ AC).AO >= 0
+          } else { // not (ABC ^ AC).AO >= 0
+            assert(ba * ba_ca + bb * ca_aa - bc * ba_aa <= 0); // (ABC ^ AB).AO >= 0
+            // Region ABC
+            originToTriangle (current, a, b, c, (B-A).cross(C-A), -C.dot (a_cross_b), next, ray);
+            free_v[nfree++] = current.vertex[d];
+          } // end of (ABC ^ AC).AO >= 0
+        } else { // not ABC.AO >= 0
+          if (-D.dot(a_cross_b) <= 0) { // if ADB.AO >= 0
+            if (da * da_ba + dd * ba_aa - db * da_aa <= 0) { // if (ADB ^ AD).AO >= 0
+              // Region ADB
+              originToTriangle (current, a, d, b, (D-A).cross(B-A), D.dot(a_cross_b), next, ray);
+              free_v[nfree++] = current.vertex[c];
+            } else { // not (ADB ^ AD).AO >= 0
+              // Region AD
+              originToSegment (current, a, d, A, D, D-A, -da_aa, next, ray);
+              free_v[nfree++] = current.vertex[b];
+              free_v[nfree++] = current.vertex[c];
+            } // end of (ADB ^ AD).AO >= 0
+          } else { // not ADB.AO >= 0
+            // Region Inside
+            REGION_INSIDE()
+          } // end of ADB.AO >= 0
+        } // end of ABC.AO >= 0
+      } // end of ACD.AO >= 0
+    } else { // not AC.AO >= 0
+      if (da_aa <= 0) { // if AD.AO >= 0
+        if (C.dot (a_cross_b) <= 0) { // if ABC.AO >= 0
+          if (da * ca_da + dc * da_aa - dd * ca_aa <= 0) { // if (ACD ^ AD).AO >= 0
+            if (ba * ba_ca + bb * ca_aa - bc * ba_aa <= 0) { // if (ABC ^ AB).AO >= 0
+              assert(ca * ba_ca + cb * ca_aa - cc * ba_aa <= 0); // (ABC ^ AC).AO >= 0
+              // Region AD
+              originToSegment (current, a, d, A, D, D-A, -da_aa, next, ray);
+              free_v[nfree++] = current.vertex[b];
+              free_v[nfree++] = current.vertex[c];
+            } else { // not (ABC ^ AB).AO >= 0
+              if (da * da_ba + dd * ba_aa - db * da_aa <= 0) { // if (ADB ^ AD).AO >= 0
+                // Region ADB
+                originToTriangle (current, a, d, b, (D-A).cross(B-A), D.dot(a_cross_b), next, ray);
+                free_v[nfree++] = current.vertex[c];
+              } else { // not (ADB ^ AD).AO >= 0
+                // Region AD
+                originToSegment (current, a, d, A, D, D-A, -da_aa, next, ray);
+                free_v[nfree++] = current.vertex[b];
+                free_v[nfree++] = current.vertex[c];
+              } // end of (ADB ^ AD).AO >= 0
+            } // end of (ABC ^ AB).AO >= 0
+          } else { // not (ACD ^ AD).AO >= 0
+            if (D.dot(a_cross_c) <= 0) { // if ACD.AO >= 0
+              assert(ca * ca_da + cc * da_aa - cd * ca_aa <= 0); // (ACD ^ AC).AO >= 0
+              // Region ACD
+              originToTriangle (current, a, c, d, (C-A).cross(D-A), -D.dot(a_cross_c), next, ray);
+              free_v[nfree++] = current.vertex[b];
+            } else { // not ACD.AO >= 0
+              if (ba * ba_ca + bb * ca_aa - bc * ba_aa <= 0) { // if (ABC ^ AB).AO >= 0
+                assert(ca * ba_ca + cb * ca_aa - cc * ba_aa <= 0); // (ABC ^ AC).AO >= 0
+                // There are no case corresponding to this set of tests.
+                // applied [True, False, None, True, True, None, False, None, None, False, False, True]
+                // to ABC, ADB
+                assert(false);
+              } else { // not (ABC ^ AB).AO >= 0
+                // Region ADB
+                originToTriangle (current, a, d, b, (D-A).cross(B-A), D.dot(a_cross_b), next, ray);
+                free_v[nfree++] = current.vertex[c];
+              } // end of (ABC ^ AB).AO >= 0
+            } // end of ACD.AO >= 0
+          } // end of (ACD ^ AD).AO >= 0
+        } else { // not ABC.AO >= 0
+          if (D.dot(a_cross_c) <= 0) { // if ACD.AO >= 0
+            if (da * ca_da + dc * da_aa - dd * ca_aa <= 0) { // if (ACD ^ AD).AO >= 0
+              if (da * da_ba + dd * ba_aa - db * da_aa <= 0) { // if (ADB ^ AD).AO >= 0
+                // Region ADB
+                originToTriangle (current, a, d, b, (D-A).cross(B-A), D.dot(a_cross_b), next, ray);
+                free_v[nfree++] = current.vertex[c];
+              } else { // not (ADB ^ AD).AO >= 0
+                // Region AD
+                originToSegment (current, a, d, A, D, D-A, -da_aa, next, ray);
+                free_v[nfree++] = current.vertex[b];
+                free_v[nfree++] = current.vertex[c];
+              } // end of (ADB ^ AD).AO >= 0
+            } else { // not (ACD ^ AD).AO >= 0
+              assert(ca * ca_da + cc * da_aa - cd * ca_aa <= 0); // (ACD ^ AC).AO >= 0
+              // Region ACD
+              originToTriangle (current, a, c, d, (C-A).cross(D-A), -D.dot(a_cross_c), next, ray);
+              free_v[nfree++] = current.vertex[b];
+            } // end of (ACD ^ AD).AO >= 0
+          } else { // not ACD.AO >= 0
+            if (-D.dot(a_cross_b) <= 0) { // if ADB.AO >= 0
+              if (da * da_ba + dd * ba_aa - db * da_aa <= 0) { // if (ADB ^ AD).AO >= 0
+                // Region ADB
+                originToTriangle (current, a, d, b, (D-A).cross(B-A), D.dot(a_cross_b), next, ray);
+                free_v[nfree++] = current.vertex[c];
+              } else { // not (ADB ^ AD).AO >= 0
+                // Region AD
+                originToSegment (current, a, d, A, D, D-A, -da_aa, next, ray);
+                free_v[nfree++] = current.vertex[b];
+                free_v[nfree++] = current.vertex[c];
+              } // end of (ADB ^ AD).AO >= 0
+            } else { // not ADB.AO >= 0
+              // Region Inside
+              REGION_INSIDE()
+            } // end of ADB.AO >= 0
+          } // end of ACD.AO >= 0
+        } // end of ABC.AO >= 0
+      } else { // not AD.AO >= 0
+        // Region A
+        originToPoint (current, a, A, next, ray);
+        free_v[nfree++] = current.vertex[b];
+        free_v[nfree++] = current.vertex[c];
+        free_v[nfree++] = current.vertex[d];
+      } // end of AD.AO >= 0
+    } // end of AC.AO >= 0
+  } // end of AB.AO >= 0
+
+#undef REGION_INSIDE
+  return false;
+}
 
 void EPA::initialize()
 {
@@ -516,10 +1228,6 @@ EPA::Status EPA::evaluate(GJK& gjk, const Vec3f& guess)
       SimplexV* tmp = simplex.vertex[0];
       simplex.vertex[0] = simplex.vertex[1];
       simplex.vertex[1] = tmp;
-
-      FCL_REAL tmpv = simplex.coefficient[0];
-      simplex.coefficient[0] = simplex.coefficient[1];
-      simplex.coefficient[1] = tmpv;
     }
 
     SimplexF* tetrahedron[] = {newFace(simplex.vertex[0], simplex.vertex[1],
@@ -555,7 +1263,8 @@ EPA::Status EPA::evaluate(GJK& gjk, const Vec3f& guess)
           SimplexV* w = &sv_store[nextsv++];
           bool valid = true;
           best->pass = ++pass;
-          gjk.getSupport(best->n, *w);
+          // At the moment, SimplexF.n is always normalized. This could be revised in the future...
+          gjk.getSupport(best->n, true, *w);
           FCL_REAL wdist = best->n.dot(w->w) - best->d;
           if(wdist > tolerance)
           {
@@ -590,25 +1299,12 @@ EPA::Status EPA::evaluate(GJK& gjk, const Vec3f& guess)
         }
       }
 
-      Vec3f projection = outer.n * outer.d;
       normal = outer.n;
       depth = outer.d;
       result.rank = 3;
       result.vertex[0] = outer.vertex[0];
       result.vertex[1] = outer.vertex[1];
       result.vertex[2] = outer.vertex[2];
-      result.coefficient[0] = ((outer.vertex[1]->w - projection).cross
-                               (outer.vertex[2]->w - projection)).norm();
-      result.coefficient[1] = ((outer.vertex[2]->w - projection).cross
-                               (outer.vertex[0]->w - projection)).norm();
-      result.coefficient[2] = ((outer.vertex[0]->w - projection).cross
-                               (outer.vertex[1]->w - projection)).norm();
-
-      FCL_REAL sum = result.coefficient[0] + result.coefficient[1] +
-        result.coefficient[2];
-      result.coefficient[0] /= sum;
-      result.coefficient[1] /= sum;
-      result.coefficient[2] /= sum;
       return status;
     }
   }
@@ -621,7 +1317,6 @@ EPA::Status EPA::evaluate(GJK& gjk, const Vec3f& guess)
   depth = 0;
   result.rank = 1;
   result.vertex[0] = simplex.vertex[0];
-  result.coefficient[0] = 1;
   return status;
 }
 
diff --git a/src/narrowphase/narrowphase.cpp b/src/narrowphase/narrowphase.cpp
index 2f956c9bf3048e76ab498a131423f119842c85c1..619c77c95f2c458e191e5b9c00ed9770815a0726 100644
--- a/src/narrowphase/narrowphase.cpp
+++ b/src/narrowphase/narrowphase.cpp
@@ -526,9 +526,9 @@ bool GJKSolver::shapeTriangleInteraction
 // +------------+-----+--------+---------+------+----------+-------+------------+----------+
 // |            | box | sphere | capsule | cone | cylinder | plane | half-space | triangle |
 // +------------+-----+--------+---------+------+----------+-------+------------+----------+
-// | box        |     |        |         |      |          |       |            |          |
+// | box        |     |   O    |         |      |          |       |            |          |
 // +------------+-----+--------+---------+------+----------+-------+------------+----------+
-// | sphere     |/////|   O    |    O    |      |          |       |            |    O     |
+// | sphere     |/////|   O    |    O    |      |          |       |            |          |
 // +------------+-----+--------+---------+------+----------+-------+------------+----------+
 // | capsule    |/////|////////|    O    |      |          |       |            |          |
 // +------------+-----+--------+---------+------+----------+-------+------------+----------+
@@ -625,53 +625,44 @@ bool GJKSolver::shapeDistance<Capsule, Capsule>
      const TriangleP& s2, const Transform3f& tf2,
      FCL_REAL& dist, Vec3f& p1, Vec3f& p2, Vec3f& normal) const
   {
-    Vec3f guess(1, 0, 0);
+    const TriangleP
+      t1 (tf1.transform(s1.a), tf1.transform(s1.b), tf1.transform(s1.c)),
+      t2 (tf2.transform(s2.a), tf2.transform(s2.b), tf2.transform(s2.c));
+
+    Vec3f guess;
     if(enable_cached_guess) guess = cached_guess;
+    else guess = (t1.a + t1.b + t1.c - t2.a - t2.b - t2.c) / 3;
     bool enable_penetration (true);
 
     details::MinkowskiDiff shape;
-    shape.shapes[0] = &s1;
-    shape.shapes[1] = &s2;
-    shape.toshape1 = tf2.getRotation().transpose() * tf1.getRotation();
-    shape.toshape0 = tf1.inverseTimes(tf2);
-
-    const Vec3f& P1 (s1.a);
-    const Vec3f& P2 (s1.b);
-    const Vec3f& P3 (s1.c);
-    const Vec3f& Q1 (s2.a);
-    const Vec3f& Q2 (s2.b);
-    const Vec3f& Q3 (s2.c);
+    shape.set (&t1, &t2);
 
     details::GJK gjk((unsigned int) gjk_max_iterations, gjk_tolerance);
     details::GJK::Status gjk_status = gjk.evaluate(shape, -guess);
     if(enable_cached_guess) cached_guess = gjk.getGuessFromSimplex();
 
-    Vec3f w0 (Vec3f::Zero()), w1 (Vec3f::Zero());
-    for(size_t i = 0; i < gjk.getSimplex()->rank; ++i)
-    {
-      FCL_REAL p = gjk.getSimplex()->coefficient[i];
-      w0 += shape.support(gjk.getSimplex()->vertex[i]->d, 0) * p;
-      w1 += shape.support(-gjk.getSimplex()->vertex[i]->d, 1) * p;
-    }
+    details::GJK::getClosestPoints (*gjk.getSimplex(), p1, p2);
+
     if((gjk_status == details::GJK::Valid) ||
        (gjk_status == details::GJK::Failed))
     {
-      dist = (w0 - w1).norm();
-      p1 = tf1.transform (w0);
-      p2 = tf1.transform (w1);
+      // TODO On degenerated case, the closest point may be wrong
+      // (i.e. an object face normal is colinear to gjk.ray
+      // assert (dist == (w0 - w1).norm());
+      dist = gjk.distance;
 
       return true;
     }
     else if (gjk_status == details::GJK::Inside)
     {
-      p1 = tf1.transform (w0);
-      p2 = tf1.transform (w1);
-
       if (enable_penetration) {
         FCL_REAL penetrationDepth = details::computePenetration
-          (P1, P2, P3, Q1, Q2, Q3, tf1, tf2, normal);
+          (t1.a, t1.b, t1.c, t2.a, t2.b, t2.c, normal);
         dist = -penetrationDepth;
         assert (dist <= 1e-6);
+        // GJK says Inside when below GJK.tolerance. So non intersecting
+        // triangle may trigger "Inside" and have no penetration.
+        return penetrationDepth < 0;
       }
       dist = 0;
       return false;
diff --git a/src/shape/geometric_shapes_utility.cpp b/src/shape/geometric_shapes_utility.cpp
index f3a1b0b7f0a0a8549c6b97c0edc8d19739a4bdc2..d0f1825c670082c990a0ec7265209852e1e7cad1 100644
--- a/src/shape/geometric_shapes_utility.cpp
+++ b/src/shape/geometric_shapes_utility.cpp
@@ -51,16 +51,16 @@ namespace details
 std::vector<Vec3f> getBoundVertices(const Box& box, const Transform3f& tf)
 {
   std::vector<Vec3f> result(8);
-  FCL_REAL a = box.side[0] / 2;
-  FCL_REAL b = box.side[1] / 2;
-  FCL_REAL c = box.side[2] / 2;
-  result[0] = tf.transform(Vec3f(a, b, c));
-  result[1] = tf.transform(Vec3f(a, b, -c));
-  result[2] = tf.transform(Vec3f(a, -b, c));
-  result[3] = tf.transform(Vec3f(a, -b, -c));
-  result[4] = tf.transform(Vec3f(-a, b, c));
-  result[5] = tf.transform(Vec3f(-a, b, -c));
-  result[6] = tf.transform(Vec3f(-a, -b, c));
+  FCL_REAL a = box.halfSide[0];
+  FCL_REAL b = box.halfSide[1];
+  FCL_REAL c = box.halfSide[2];
+  result[0] = tf.transform(Vec3f( a,  b,  c));
+  result[1] = tf.transform(Vec3f( a,  b, -c));
+  result[2] = tf.transform(Vec3f( a, -b,  c));
+  result[3] = tf.transform(Vec3f( a, -b, -c));
+  result[4] = tf.transform(Vec3f(-a,  b,  c));
+  result[5] = tf.transform(Vec3f(-a,  b, -c));
+  result[6] = tf.transform(Vec3f(-a, -b,  c));
   result[7] = tf.transform(Vec3f(-a, -b, -c));
   
   return result;
@@ -256,7 +256,7 @@ void computeBV<AABB, Box>(const Box& s, const Transform3f& tf, AABB& bv)
   const Matrix3f& R = tf.getRotation();
   const Vec3f& T = tf.getTranslation();
 
-  Vec3f v_delta (0.5 * R.cwiseAbs() * s.side);
+  Vec3f v_delta (R.cwiseAbs() * s.halfSide);
   bv.max_ = T + v_delta;
   bv.min_ = T - v_delta;
 }
@@ -406,9 +406,9 @@ void computeBV<OBB, Box>(const Box& s, const Transform3f& tf, OBB& bv)
   const Matrix3f& R = tf.getRotation();
   const Vec3f& T = tf.getTranslation();
 
-  bv.To.noalias() = T;
-  bv.axes.noalias() = R;
-  bv.extent = s.side * (FCL_REAL)0.5;
+  bv.To = T;
+  bv.axes = R;
+  bv.extent = s.halfSide;
 }
 
 template<>
diff --git a/src/traversal/traversal_node_setup.cpp b/src/traversal/traversal_node_setup.cpp
index fc816420323c31e4d594469f163608f45ebe1c52..9f236985d0501ee4f93748369c0c870ac4b31f92 100644
--- a/src/traversal/traversal_node_setup.cpp
+++ b/src/traversal/traversal_node_setup.cpp
@@ -44,74 +44,6 @@ namespace hpp
 namespace fcl
 {
 
-
-namespace details
-{
-template<typename BV, typename OrientedNode>
-static inline bool setupMeshCollisionOrientedNode(OrientedNode& node,
-                                                  const BVHModel<BV>& model1, const Transform3f& tf1,
-                                                  const BVHModel<BV>& model2, const Transform3f& tf2,
-                                                  CollisionResult& result)
-{
-  if(model1.getModelType() != BVH_MODEL_TRIANGLES || model2.getModelType() != BVH_MODEL_TRIANGLES)
-    return false;
-
-  node.vertices1 = model1.vertices;
-  node.vertices2 = model2.vertices;
-
-  node.tri_indices1 = model1.tri_indices;
-  node.tri_indices2 = model2.tri_indices;
-
-  node.model1 = &model1;
-  node.tf1 = tf1;
-  node.model2 = &model2;
-  node.tf2 = tf2;
-
-  node.result = &result;
-
-  relativeTransform(tf1.getRotation(), tf1.getTranslation(), tf2.getRotation(), tf2.getTranslation(), node.RT.R, node.RT.T);
-
-  return true;
-}
-
-}
-
-
-bool initialize(MeshCollisionTraversalNodeOBB& node,
-                const BVHModel<OBB>& model1, const Transform3f& tf1,
-                const BVHModel<OBB>& model2, const Transform3f& tf2,
-                CollisionResult& result)
-{
-  return details::setupMeshCollisionOrientedNode(node, model1, tf1, model2, tf2, result);
-}
-
-
-bool initialize(MeshCollisionTraversalNodeRSS& node,
-                const BVHModel<RSS>& model1, const Transform3f& tf1,
-                const BVHModel<RSS>& model2, const Transform3f& tf2,
-                CollisionResult& result)
-{
-  return details::setupMeshCollisionOrientedNode(node, model1, tf1, model2, tf2, result);
-}
-
-
-bool initialize(MeshCollisionTraversalNodekIOS& node,
-                const BVHModel<kIOS>& model1, const Transform3f& tf1,
-                const BVHModel<kIOS>& model2, const Transform3f& tf2,
-                CollisionResult& result)
-{
-  return details::setupMeshCollisionOrientedNode(node, model1, tf1, model2, tf2, result);
-}
-
-bool initialize(MeshCollisionTraversalNodeOBBRSS& node,
-                const BVHModel<OBBRSS>& model1, const Transform3f& tf1,
-                const BVHModel<OBBRSS>& model2, const Transform3f& tf2,
-                CollisionResult& result)
-{
-  return details::setupMeshCollisionOrientedNode(node, model1, tf1, model2, tf2, result);
-}
-
-
 namespace details
 {
 template<typename BV, typename OrientedNode>
diff --git a/src/traversal/traversal_recurse.cpp b/src/traversal/traversal_recurse.cpp
index 050c76f0695e4e7a04f27cba1b45b3d32eb6eb60..d041c158c787f9db447323eec11fb5d60f02e3db 100644
--- a/src/traversal/traversal_recurse.cpp
+++ b/src/traversal/traversal_recurse.cpp
@@ -38,6 +38,8 @@
 
 #include <hpp/fcl/traversal/traversal_recurse.h>
 
+#include <vector>
+
 namespace hpp
 {
 namespace fcl
@@ -89,18 +91,70 @@ void collisionRecurse(CollisionTraversalNodeBase* node, int b1, int b2,
   }
 }
 
-  void collisionRecurse(MeshCollisionTraversalNodeOBB* /*node*/, int /*b1*/,
-                        int /*b2*/, const Matrix3f& /*R*/, const Vec3f& /*T*/,
-                        BVHFrontList* /*front_list*/)
+void collisionNonRecurse(CollisionTraversalNodeBase* node,
+		         BVHFrontList* front_list, FCL_REAL& sqrDistLowerBound)
 {
-  throw std::runtime_error ("Not implemented.");
-}
+  typedef std::pair<int, int> BVPair_t;
+  //typedef std::stack<BVPair_t, std::vector<BVPair_t> > Stack_t;
+  typedef std::vector<BVPair_t> Stack_t;
+
+  Stack_t pairs;
+  pairs.reserve (1000);
+  sqrDistLowerBound = std::numeric_limits<FCL_REAL>::infinity();
+  FCL_REAL sdlb = std::numeric_limits<FCL_REAL>::infinity();
+
+  pairs.push_back (BVPair_t (0, 0));
+
+  while (!pairs.empty()) {
+    int a = pairs.back().first,
+        b = pairs.back().second;
+    pairs.pop_back();
+
+    bool la = node->isFirstNodeLeaf(a);
+    bool lb = node->isSecondNodeLeaf(b);
+
+    // Leaf / Leaf case
+    if (la && lb) {
+      updateFrontList(front_list, a, b);
+
+      // TODO should we test the BVs ?
+      //if(node->BVTesting(a, b, sdlb)) {
+        //if (sdlb < sqrDistLowerBound) sqrDistLowerBound = sdlb;
+        //continue;
+      //}
+      node->leafTesting(a, b, sdlb);
+      if (sdlb < sqrDistLowerBound) sqrDistLowerBound = sdlb;
+      if (node->canStop() && !front_list) return;
+      continue;
+    }
 
-  void collisionRecurse(MeshCollisionTraversalNodeRSS* /*node*/, int /*b1*/,
-                        int /*b2*/, const Matrix3f& /*R*/, const Vec3f& /*T*/,
-                        BVHFrontList* /*front_list*/)
-{
-  throw std::runtime_error ("Not implemented.");
+    // TODO shouldn't we test the leaf triangle against BV is la != lb
+    // if (la && !lb) { // leaf triangle 1 against BV 2
+    // } else if (!la && lb) { // BV 1 against leaf triangle 2
+    // }
+
+    // Check the BV
+    if(node->BVTesting(a, b, sdlb)) {
+      if (sdlb < sqrDistLowerBound) sqrDistLowerBound = sdlb;
+      updateFrontList(front_list, a, b);
+      continue;
+    }
+
+    if(node->firstOverSecond(a, b))
+    {
+      int c1 = node->getFirstLeftChild(a);
+      int c2 = node->getFirstRightChild(a);
+      pairs.push_back (BVPair_t (c2, b));
+      pairs.push_back (BVPair_t (c1, b));
+    }
+    else
+    {
+      int c1 = node->getSecondLeftChild(b);
+      int c2 = node->getSecondRightChild(b);
+      pairs.push_back (BVPair_t (a, c2));
+      pairs.push_back (BVPair_t (a, c1));
+    }
+  }
 }
 
 /** Recurse function for self collision
diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt
index 88ae18961a525dfdac3b4727a41c883faf384d17..e0d1165370ac0af9986bd20ee114832587fef02c 100644
--- a/test/CMakeLists.txt
+++ b/test/CMakeLists.txt
@@ -21,36 +21,35 @@ include_directories(${CMAKE_CURRENT_BINARY_DIR})
 include_directories(SYSTEM ${Boost_INCLUDE_DIRS})
 
 
-add_fcl_test(test_fcl_math test_fcl_math.cpp)
-
-add_fcl_test(test_fcl_collision test_fcl_collision.cpp test_fcl_utility.cpp)
-add_fcl_test(test_fcl_distance test_fcl_distance.cpp test_fcl_utility.cpp)
-add_fcl_test(test_fcl_distance_lower_bound test_fcl_distance_lower_bound.cpp
-  test_fcl_utility.cpp)
-add_fcl_test(test_fcl_geometric_shapes test_fcl_geometric_shapes.cpp test_fcl_utility.cpp)
-#add_fcl_test(test_fcl_broadphase test_fcl_broadphase.cpp test_fcl_utility.cpp)
-#add_fcl_test(test_fcl_shape_mesh_consistency test_fcl_shape_mesh_consistency.cpp test_fcl_utility.cpp)
-add_fcl_test(test_fcl_frontlist test_fcl_frontlist.cpp test_fcl_utility.cpp)
-#add_fcl_test(test_fcl_math test_fcl_math.cpp test_fcl_utility.cpp)
-
-# add_fcl_test(test_fcl_sphere_capsule test_fcl_sphere_capsule.cpp)
-add_fcl_test(test_fcl_capsule_capsule test_fcl_capsule_capsule.cpp test_fcl_utility.cpp)
-add_fcl_test(test_fcl_box_box_distance test_fcl_box_box_distance.cpp test_fcl_utility.cpp)
-add_fcl_test(test_fcl_simple test_fcl_simple.cpp)
-add_fcl_test(test_fcl_capsule_box_1 test_fcl_capsule_box_1.cpp test_fcl_utility.cpp)
-add_fcl_test(test_fcl_capsule_box_2 test_fcl_capsule_box_2.cpp test_fcl_utility.cpp)
-#add_fcl_test(test_fcl_obb test_fcl_obb.cpp)
-
-add_fcl_test(test_fcl_bvh_models test_fcl_bvh_models.cpp test_fcl_utility.cpp)
-
-add_fcl_test(test_fcl_profiling test_fcl_profiling.cpp test_fcl_utility.cpp)
-PKG_CONFIG_USE_DEPENDENCY(test_fcl_profiling assimp)
-
-add_fcl_test(test_fcl_gjk test_fcl_gjk.cpp)
+add_fcl_test(math math.cpp)
+
+add_fcl_test(collision collision.cpp utility.cpp)
+add_fcl_test(distance distance.cpp utility.cpp)
+add_fcl_test(distance_lower_bound distance_lower_bound.cpp utility.cpp)
+add_fcl_test(geometric_shapes geometric_shapes.cpp utility.cpp)
+#add_fcl_test(broadphase broadphase.cpp utility.cpp)
+#add_fcl_test(shape_mesh_consistency shape_mesh_consistency.cpp utility.cpp)
+add_fcl_test(frontlist frontlist.cpp utility.cpp)
+#add_fcl_test(math math.cpp utility.cpp)
+
+# add_fcl_test(sphere_capsule sphere_capsule.cpp)
+add_fcl_test(capsule_capsule capsule_capsule.cpp utility.cpp)
+add_fcl_test(box_box_distance box_box_distance.cpp utility.cpp)
+add_fcl_test(simple simple.cpp)
+add_fcl_test(capsule_box_1 capsule_box_1.cpp utility.cpp)
+add_fcl_test(capsule_box_2 capsule_box_2.cpp utility.cpp)
+add_fcl_test(obb obb.cpp)
+
+add_fcl_test(bvh_models bvh_models.cpp utility.cpp)
+
+add_fcl_test(profiling profiling.cpp utility.cpp)
+PKG_CONFIG_USE_DEPENDENCY(profiling assimp)
+
+add_fcl_test(gjk gjk.cpp)
 if(HPP_FCL_HAVE_OCTOMAP)
-  add_fcl_test(test_fcl_octree test_fcl_octree.cpp test_fcl_utility.cpp)
+  add_fcl_test(octree octree.cpp utility.cpp)
 endif(HPP_FCL_HAVE_OCTOMAP)
 
 ## Benchmark
-add_executable(test-benchmark benchmark.cpp test_fcl_utility.cpp)
+add_executable(test-benchmark benchmark.cpp utility.cpp)
 target_link_libraries(test-benchmark hpp-fcl ${Boost_LIBRARIES})
diff --git a/test/benchmark.cpp b/test/benchmark.cpp
index 28b718c5df33032ccd153c1b949ac7583ec518d4..f545b7df91511b5b976176993beb8b068482d7ad 100644
--- a/test/benchmark.cpp
+++ b/test/benchmark.cpp
@@ -16,8 +16,8 @@
 
 #include <hpp/fcl/traversal/traversal_node_setup.h>
 #include <hpp/fcl/traversal/traversal_node_bvhs.h>
-#include <hpp/fcl/collision_node.h>
-#include "test_fcl_utility.h"
+#include <../src/collision_node.h>
+#include "utility.h"
 #include "fcl_resources/config.h"
 #include <boost/filesystem.hpp>
 
diff --git a/test/benchmark/test_fcl_gjk.output b/test/benchmark/test_fcl_gjk.output
index aaf9b5434d9259016a2fa21b57aee3875c5e5543..f20f15d5373150ab41cd103d3ca74c2b9639a87f 100644
--- a/test/benchmark/test_fcl_gjk.output
+++ b/test/benchmark/test_fcl_gjk.output
@@ -1,4 +1,4 @@
-Result after running test_fcl_gjk in Release mode.
+Result after running gjk in Release mode.
 
 nCol = 2840
 nDiff = 0
diff --git a/test/test_fcl_box_box_distance.cpp b/test/box_box_distance.cpp
similarity index 99%
rename from test/test_fcl_box_box_distance.cpp
rename to test/box_box_distance.cpp
index 0613a2edf7ce26abf7f5ebbeb788bf1740e2fa59..df295a9fadb4ac9aff7eba9f9175fe9611137140 100644
--- a/test/test_fcl_box_box_distance.cpp
+++ b/test/box_box_distance.cpp
@@ -48,7 +48,7 @@
 #include <hpp/fcl/collision_object.h>
 #include <hpp/fcl/shape/geometric_shapes.h>
 
-#include "test_fcl_utility.h"
+#include "utility.h"
 
 typedef boost::shared_ptr <hpp::fcl::CollisionGeometry> CollisionGeometryPtr_t;
 
diff --git a/test/test_fcl_broadphase.cpp b/test/broadphase.cpp
similarity index 99%
rename from test/test_fcl_broadphase.cpp
rename to test/broadphase.cpp
index 320b3f2a0ff341309386a314bd83ce21ba0d1e7b..a7a1676f327350421f984225444e925f1e1924c5 100644
--- a/test/test_fcl_broadphase.cpp
+++ b/test/broadphase.cpp
@@ -45,7 +45,7 @@
 #include <hpp/fcl/broadphase/broadphase.h>
 #include <hpp/fcl/shape/geometric_shape_to_BVH_model.h>
 #include <hpp/fcl/math/transform.h>
-#include "test_fcl_utility.h"
+#include "utility.h"
 
 #if USE_GOOGLEHASH
 #include <sparsehash/sparse_hash_map>
diff --git a/test/test_fcl_bvh_models.cpp b/test/bvh_models.cpp
similarity index 87%
rename from test/test_fcl_bvh_models.cpp
rename to test/bvh_models.cpp
index 82c012862ae731b8eae75edbf01b9752f13d074a..b3bacfb85ba494e3bb0271019a70104a435d1bd5 100644
--- a/test/test_fcl_bvh_models.cpp
+++ b/test/bvh_models.cpp
@@ -50,7 +50,7 @@
 #include "hpp/fcl/shape/geometric_shapes.h"
 #include <hpp/fcl/mesh_loader/assimp.h>
 #include <hpp/fcl/mesh_loader/loader.h>
-#include "test_fcl_utility.h"
+#include "utility.h"
 #include <iostream>
 
 using namespace hpp::fcl;
@@ -72,18 +72,18 @@ void testBVHModelPointCloud()
   }
 
   Box box;
-  double a = box.side[0];
-  double b = box.side[1];
-  double c = box.side[2];
+  double a = box.halfSide[0];
+  double b = box.halfSide[1];
+  double c = box.halfSide[2];
   std::vector<Vec3f> points(8);
-  points[0] << 0.5 * a, -0.5 * b, 0.5 * c;
-  points[1] << 0.5 * a, 0.5 * b, 0.5 * c;
-  points[2] << -0.5 * a, 0.5 * b, 0.5 * c;
-  points[3] << -0.5 * a, -0.5 * b, 0.5 * c;
-  points[4] << 0.5 * a, -0.5 * b, -0.5 * c;
-  points[5] << 0.5 * a, 0.5 * b, -0.5 * c;
-  points[6] << -0.5 * a, 0.5 * b, -0.5 * c;
-  points[7] << -0.5 * a, -0.5 * b, -0.5 * c;
+  points[0] <<  a, -b,  c;
+  points[1] <<  a,  b,  c;
+  points[2] << -a,  b,  c;
+  points[3] << -a, -b,  c;
+  points[4] <<  a, -b, -c;
+  points[5] <<  a,  b, -c;
+  points[6] << -a,  b, -c;
+  points[7] << -a, -b, -c;
 
   int result;
 
@@ -113,19 +113,19 @@ void testBVHModelTriangles()
   Box box(1,1,1);
   AABB aabb (Vec3f(-1,0,-1), Vec3f(1,1,1));
 
-  double a = box.side[0];
-  double b = box.side[1];
-  double c = box.side[2];
+  double a = box.halfSide[0];
+  double b = box.halfSide[1];
+  double c = box.halfSide[2];
   std::vector<Vec3f> points(8);
   std::vector<Triangle> tri_indices(12);
-  points[0] << 0.5 * a, -0.5 * b, 0.5 * c;
-  points[1] << 0.5 * a, 0.5 * b, 0.5 * c;
-  points[2] << -0.5 * a, 0.5 * b, 0.5 * c;
-  points[3] << -0.5 * a, -0.5 * b, 0.5 * c;
-  points[4] << 0.5 * a, -0.5 * b, -0.5 * c;
-  points[5] << 0.5 * a, 0.5 * b, -0.5 * c;
-  points[6] << -0.5 * a, 0.5 * b, -0.5 * c;
-  points[7] << -0.5 * a, -0.5 * b, -0.5 * c;
+  points[0] <<  a, -b,  c;
+  points[1] <<  a,  b,  c;
+  points[2] << -a,  b,  c;
+  points[3] << -a, -b,  c;
+  points[4] <<  a, -b, -c;
+  points[5] <<  a,  b, -c;
+  points[6] << -a,  b, -c;
+  points[7] << -a, -b, -c;
 
   tri_indices[0].set(0, 4, 1);
   tri_indices[1].set(1, 4, 5);
@@ -202,19 +202,19 @@ void testBVHModelSubModel()
   boost::shared_ptr<BVHModel<BV> > model(new BVHModel<BV>);
   Box box;
 
-  double a = box.side[0];
-  double b = box.side[1];
-  double c = box.side[2];
+  double a = box.halfSide[0];
+  double b = box.halfSide[1];
+  double c = box.halfSide[2];
   std::vector<Vec3f> points(8);
   std::vector<Triangle> tri_indices(12);
-  points[0] << 0.5 * a, -0.5 * b, 0.5 * c;
-  points[1] << 0.5 * a, 0.5 * b, 0.5 * c;
-  points[2] << -0.5 * a, 0.5 * b, 0.5 * c;
-  points[3] << -0.5 * a, -0.5 * b, 0.5 * c;
-  points[4] << 0.5 * a, -0.5 * b, -0.5 * c;
-  points[5] << 0.5 * a, 0.5 * b, -0.5 * c;
-  points[6] << -0.5 * a, 0.5 * b, -0.5 * c;
-  points[7] << -0.5 * a, -0.5 * b, -0.5 * c;
+  points[0] <<  a, -b,  c;
+  points[1] <<  a,  b,  c;
+  points[2] << -a,  b,  c;
+  points[3] << -a, -b,  c;
+  points[4] <<  a, -b, -c;
+  points[5] <<  a,  b, -c;
+  points[6] << -a,  b, -c;
+  points[7] << -a, -b, -c;
 
   tri_indices[0].set(0, 4, 1);
   tri_indices[1].set(1, 4, 5);
diff --git a/test/test_fcl_capsule_box_1.cpp b/test/capsule_box_1.cpp
similarity index 98%
rename from test/test_fcl_capsule_box_1.cpp
rename to test/capsule_box_1.cpp
index 8987672a545c2678c6e8c76d6b31081fd31a06e3..9f034b4f62f39edbf70a521e7b3ad77131f272f1 100644
--- a/test/test_fcl_capsule_box_1.cpp
+++ b/test/capsule_box_1.cpp
@@ -49,7 +49,7 @@
 #include <hpp/fcl/collision_object.h>
 #include <hpp/fcl/shape/geometric_shapes.h>
 
-#include "test_fcl_utility.h"
+#include "utility.h"
 
 BOOST_AUTO_TEST_CASE(distance_capsule_box)
 {
@@ -78,7 +78,7 @@ BOOST_AUTO_TEST_CASE(distance_capsule_box)
   BOOST_CHECK_CLOSE (o1 [0], 1.0, 1e-1);
   CHECK_CLOSE_TO_0 (o1 [1], 1e-1);
   BOOST_CHECK_CLOSE (o2 [0], 0.5, 1e-1);
-  CHECK_CLOSE_TO_0 (o1 [1], 1e-1);
+  CHECK_CLOSE_TO_0 (o2 [1], 1e-1);
 
   // Move capsule above box
   tf1 = hpp::fcl::Transform3f (hpp::fcl::Vec3f (0., 0., 8.));
diff --git a/test/test_fcl_capsule_box_2.cpp b/test/capsule_box_2.cpp
similarity index 99%
rename from test/test_fcl_capsule_box_2.cpp
rename to test/capsule_box_2.cpp
index f87b6f60b35bf91522ca6191fe8a7290310b04e8..8a20117ab404ec0d648811b71ef71231968e1ad7 100644
--- a/test/test_fcl_capsule_box_2.cpp
+++ b/test/capsule_box_2.cpp
@@ -42,7 +42,7 @@
 
 #define CHECK_CLOSE_TO_0(x, eps) BOOST_CHECK_CLOSE ((x + 1.0), (1.0), (eps))
 
-#include "test_fcl_utility.h"
+#include "utility.h"
 
 #include <cmath>
 #include <hpp/fcl/distance.h>
diff --git a/test/test_fcl_capsule_capsule.cpp b/test/capsule_capsule.cpp
similarity index 99%
rename from test/test_fcl_capsule_capsule.cpp
rename to test/capsule_capsule.cpp
index 9d13e9b0793d5926a31b15d68c5fe23bf1889e00..ee2a063aab2098e2ebdbe2c62da3419fd0743aa6 100644
--- a/test/test_fcl_capsule_capsule.cpp
+++ b/test/capsule_capsule.cpp
@@ -49,7 +49,7 @@
 #include <hpp/fcl/collision_object.h>
 #include <hpp/fcl/shape/geometric_shapes.h>
 
-#include "test_fcl_utility.h"
+#include "utility.h"
 
 using namespace hpp::fcl;
 typedef boost::shared_ptr <CollisionGeometry> CollisionGeometryPtr_t;
diff --git a/test/collision-bench.py b/test/collision-bench.py
new file mode 100644
index 0000000000000000000000000000000000000000..3497c83a26441f8f3474c2a753a01496a04864a7
--- /dev/null
+++ b/test/collision-bench.py
@@ -0,0 +1,76 @@
+import matplotlib.pyplot as plt
+import csv, sys, numpy as np
+from math import sqrt
+
+filename = sys.argv[1] 
+
+with open(filename, 'r') as file:
+    reader = csv.reader (file, strict = True)
+    fieldnames = None
+    #fieldnames = reader.fieldnames
+    converters = (str, int, int, int, float, lambda x: float(x)*1e-3)
+
+    for row in reader:
+        if fieldnames is None:
+            fieldnames = [ n.strip() for n in row]
+            values = []
+            continue
+
+        values.append ( [ c(v) for v, c in zip(row, converters) ] )
+
+request1 = values[:int(len(values)/2)]
+request2 = values[int(len(values)/2):]
+
+Ntransforms = 1
+while values[0][0:3] == values[Ntransforms][0:3]:
+    Ntransforms += 1
+
+splitMethods = ['avg', 'med', 'cen']
+type = ["o", "or", "r", ]
+BVs = sorted (list (set ([ v[0] for v in request1[::Ntransforms] ])))
+xvals = [ BVs.index(v[0]) + len(BVs)*v[2] + 3*len(BVs)*v[1] for v in request1[::Ntransforms] ]
+cases = [ v[0] + " " + type[v[1]] + " " + splitMethods[v[2]] for v in request1[::Ntransforms] ]
+
+idx_reorder = sorted (list(range(len(xvals))), key=lambda i: xvals[i])
+def reorder (l): return [ l[i] for i in idx_reorder ]
+
+xvals_s = reorder (xvals)
+cases_s = reorder (cases)
+
+onlyLB = True
+# Time
+plt.figure(0)
+for i in range(Ntransforms):
+    if not onlyLB:
+        plt.plot(xvals_s, reorder([ v[5] for v in request1[i::Ntransforms] ]) , '-.o', label=str(i))
+    plt.plot(xvals_s, reorder([ v[5] for v in request2[i::Ntransforms] ]) , ':+',  label=str(i)+"+lb")
+
+plt.legend()
+plt.xticks(ticks=xvals_s, labels=cases_s, rotation=90)
+plt.ylabel('Time (us)')
+plt.yscale('log')
+
+# Time
+plt.figure(2)
+for k in range (0, len(request1), Ntransforms):
+    if not onlyLB:
+        plt.plot([ xvals[int(k/Ntransforms)], ], sum([ v[5] for v in request1[k:k+Ntransforms] ])/Ntransforms)
+    plt.plot([ xvals[int(k/Ntransforms)], ], sum([ v[5] for v in request2[k:k+Ntransforms] ])/Ntransforms)
+
+plt.plot(xvals_s, reorder ([ sum([ v[5] for v in request2[k:k+Ntransforms] ])/Ntransforms for k in range (0, len(request1), Ntransforms) ]))
+
+plt.xticks(ticks=xvals_s, labels=cases_s, rotation=90)
+plt.ylabel('Time (us)')
+plt.yscale('log')
+
+# Distance lower bound
+plt.figure(1)
+for i in range(Ntransforms):
+    if request2[i][3] > 0: continue
+    plt.plot(xvals_s, reorder([ v[4] for v in request2[i::Ntransforms] ]) , ':+',  label=str(i)+"+lb")
+
+plt.legend()
+plt.ylabel('Distance lower bound')
+plt.xticks(ticks=xvals_s, labels=cases_s, rotation=90)
+
+plt.show()
diff --git a/test/collision.cpp b/test/collision.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..d6c0c911dad67d6c78efe1cf4b37166fea12b029
--- /dev/null
+++ b/test/collision.cpp
@@ -0,0 +1,605 @@
+/*
+ * Software License Agreement (BSD License)
+ *
+ *  Copyright (c) 2011-2014, Willow Garage, Inc.
+ *  Copyright (c) 2014-2015, Open Source Robotics Foundation
+ *  All rights reserved.
+ *
+ *  Redistribution and use in source and binary forms, with or without
+ *  modification, are permitted provided that the following conditions
+ *  are met:
+ *
+ *   * Redistributions of source code must retain the above copyright
+ *     notice, this list of conditions and the following disclaimer.
+ *   * Redistributions in binary form must reproduce the above
+ *     copyright notice, this list of conditions and the following
+ *     disclaimer in the documentation and/or other materials provided
+ *     with the distribution.
+ *   * Neither the name of Open Source Robotics Foundation nor the names of its
+ *     contributors may be used to endorse or promote products derived
+ *     from this software without specific prior written permission.
+ *
+ *  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ *  FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ *  COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ *  INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ *  BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ *  LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+ *  CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ *  LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
+ *  ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ *  POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/** \author Joseph Mirabel */
+
+#define BOOST_CHRONO_VERSION 2
+#include <boost/chrono/chrono.hpp>
+#include <boost/chrono/chrono_io.hpp>
+
+#define BOOST_TEST_MODULE FCL_COLLISION
+#define BOOST_TEST_DYN_LINK
+#include <boost/test/unit_test.hpp>
+#include <boost/utility/binary.hpp>
+
+#include <boost/assign/list_of.hpp>
+
+#include <hpp/fcl/traversal/traversal_node_bvhs.h>
+#include <hpp/fcl/traversal/traversal_node_setup.h>
+#include <../src/collision_node.h>
+#include <hpp/fcl/collision.h>
+#include <hpp/fcl/BV/BV.h>
+#include <hpp/fcl/shape/geometric_shapes.h>
+#include <hpp/fcl/narrowphase/narrowphase.h>
+#include <hpp/fcl/mesh_loader/assimp.h>
+#include "utility.h"
+#include "fcl_resources/config.h"
+#include <boost/filesystem.hpp>
+
+using namespace hpp::fcl;
+
+int num_max_contacts = std::numeric_limits<int>::max();
+
+BOOST_AUTO_TEST_CASE(OBB_Box_test)
+{
+  FCL_REAL r_extents[] = {-1000, -1000, -1000, 1000, 1000, 1000};
+  std::vector<Transform3f> rotate_transform;
+  generateRandomTransforms(r_extents, rotate_transform, 1);
+  
+  AABB aabb1;
+  aabb1.min_ = Vec3f(-600, -600, -600);
+  aabb1.max_ = Vec3f(600, 600, 600);
+
+  OBB obb1;
+  convertBV(aabb1, rotate_transform[0], obb1);
+  Box box1;
+  Transform3f box1_tf;
+  constructBox(aabb1, rotate_transform[0], box1, box1_tf);
+
+  FCL_REAL extents[] = {-1000, -1000, -1000, 1000, 1000, 1000};
+  std::size_t n = 1000;
+
+  std::vector<Transform3f> transforms;
+  generateRandomTransforms(extents, transforms, n);
+
+  for(std::size_t i = 0; i < transforms.size(); ++i)
+  {
+    AABB aabb;
+    aabb.min_ = aabb1.min_ * 0.5;
+    aabb.max_ = aabb1.max_ * 0.5;    
+
+    OBB obb2;
+    convertBV(aabb, transforms[i], obb2);
+    
+    Box box2;
+    Transform3f box2_tf;
+    constructBox(aabb, transforms[i], box2, box2_tf);
+
+    GJKSolver solver;
+
+    bool overlap_obb = obb1.overlap(obb2);
+    bool overlap_box = solver.shapeIntersect(box1, box1_tf, box2, box2_tf, NULL, NULL, NULL);
+    
+    BOOST_CHECK(overlap_obb == overlap_box);
+  }
+}
+
+BOOST_AUTO_TEST_CASE(OBB_shape_test)
+{
+  FCL_REAL r_extents[] = {-1000, -1000, -1000, 1000, 1000, 1000};
+  std::vector<Transform3f> rotate_transform;
+  generateRandomTransforms(r_extents, rotate_transform, 1);
+  
+  AABB aabb1;
+  aabb1.min_ = Vec3f(-600, -600, -600);
+  aabb1.max_ = Vec3f(600, 600, 600);
+
+  OBB obb1;
+  convertBV(aabb1, rotate_transform[0], obb1);
+  Box box1;
+  Transform3f box1_tf;
+  constructBox(aabb1, rotate_transform[0], box1, box1_tf);
+
+  FCL_REAL extents[] = {-1000, -1000, -1000, 1000, 1000, 1000};
+  std::size_t n = 1000;
+
+  std::vector<Transform3f> transforms;
+  generateRandomTransforms(extents, transforms, n);
+
+  for(std::size_t i = 0; i < transforms.size(); ++i)
+  {
+    FCL_REAL len = (aabb1.max_[0] - aabb1.min_[0]) * 0.5;
+    OBB obb2;
+    GJKSolver solver;
+ 
+    {  
+      Sphere sphere(len);
+      computeBV(sphere, transforms[i], obb2);
+ 
+      bool overlap_obb = obb1.overlap(obb2);
+      bool overlap_sphere = solver.shapeIntersect(box1, box1_tf, sphere, transforms[i], NULL, NULL, NULL);
+      BOOST_CHECK(overlap_obb >= overlap_sphere);
+    }
+
+    {
+      Capsule capsule(len, 2 * len);
+      computeBV(capsule, transforms[i], obb2);
+      
+      bool overlap_obb = obb1.overlap(obb2);
+      bool overlap_capsule = solver.shapeIntersect(box1, box1_tf, capsule, transforms[i], NULL, NULL, NULL);
+      BOOST_CHECK(overlap_obb >= overlap_capsule);
+    }
+
+    {
+      Cone cone(len, 2 * len);
+      computeBV(cone, transforms[i], obb2);
+      
+      bool overlap_obb = obb1.overlap(obb2);
+      bool overlap_cone = solver.shapeIntersect(box1, box1_tf, cone, transforms[i], NULL, NULL, NULL);
+      BOOST_CHECK(overlap_obb >= overlap_cone);
+    }
+
+    {
+      Cylinder cylinder(len, 2 * len);
+      computeBV(cylinder, transforms[i], obb2);
+      
+      bool overlap_obb = obb1.overlap(obb2);
+      bool overlap_cylinder = solver.shapeIntersect(box1, box1_tf, cylinder, transforms[i], NULL, NULL, NULL);
+      BOOST_CHECK(overlap_obb >= overlap_cylinder);
+    }
+  }
+}
+
+BOOST_AUTO_TEST_CASE(OBB_AABB_test)
+{
+  FCL_REAL extents[] = {-1000, -1000, -1000, 1000, 1000, 1000};
+  std::size_t n = 1000;
+
+  std::vector<Transform3f> transforms;
+  generateRandomTransforms(extents, transforms, n);
+
+  AABB aabb1;
+  aabb1.min_ = Vec3f(-600, -600, -600);
+  aabb1.max_ = Vec3f(600, 600, 600);
+  
+  OBB obb1;
+  convertBV(aabb1, Transform3f(), obb1);
+  
+  for(std::size_t i = 0; i < transforms.size(); ++i)
+  {
+    AABB aabb;
+    aabb.min_ = aabb1.min_ * 0.5;
+    aabb.max_ = aabb1.max_ * 0.5;    
+
+    AABB aabb2 = translate(aabb, transforms[i].getTranslation());
+    
+    OBB obb2;
+    convertBV(aabb, Transform3f(transforms[i].getTranslation()), obb2);
+
+    bool overlap_aabb = aabb1.overlap(aabb2);
+    bool overlap_obb = obb1.overlap(obb2);
+    if(overlap_aabb != overlap_obb)
+    {
+      std::cout << aabb1.min_ << " " << aabb1.max_ << std::endl;
+      std::cout << aabb2.min_ << " " << aabb2.max_ << std::endl;
+      std::cout << obb1.To << " " << obb1.extent << " " << obb1.axes << std::endl;
+      std::cout << obb2.To << " " << obb2.extent << " " << obb2.axes << std::endl;
+    }
+
+    BOOST_CHECK(overlap_aabb == overlap_obb);
+  }
+  std::cout << std::endl;
+}
+
+std::ostream* bench_stream = NULL;
+bool bs_nl = true;
+bool bs_hp = false;
+#define BENCHMARK(stream) if (bench_stream!=NULL) { *bench_stream << (bs_nl ? "" : ", ") << stream; bs_nl = false; }
+#define BENCHMARK_HEADER(stream) if (!bs_hp) { BENCHMARK(stream) }
+#define BENCHMARK_NEXT() if (bench_stream!=NULL && !bs_nl) { *bench_stream << '\n';  bs_nl = true; bs_hp = true; }
+
+typedef std::vector<Contact> Contacts_t;
+typedef boost::mpl::vector<OBB, RSS, AABB, KDOP<24>, KDOP<18>, KDOP<16>, kIOS, OBBRSS> BVs_t;
+std::vector<SplitMethodType> splitMethods = boost::assign::list_of (SPLIT_METHOD_MEAN)(SPLIT_METHOD_MEDIAN)(SPLIT_METHOD_BV_CENTER);
+
+typedef boost::chrono::high_resolution_clock clock_type;
+typedef clock_type::duration duration_type;
+
+#define BV_STR_SPECIALIZATION(bv) \
+  template <> const char* str< bv > () { return #bv; }
+template <typename BV> const char* str();
+BV_STR_SPECIALIZATION(AABB)
+BV_STR_SPECIALIZATION(OBB)
+BV_STR_SPECIALIZATION(RSS)
+BV_STR_SPECIALIZATION(KDOP<24>)
+BV_STR_SPECIALIZATION(KDOP<18>)
+BV_STR_SPECIALIZATION(KDOP<16>)
+BV_STR_SPECIALIZATION(kIOS)
+BV_STR_SPECIALIZATION(OBBRSS)
+
+template <typename T> struct wrap {};
+
+struct base_traits
+{
+  enum { IS_IMPLEMENTED = true
+  };
+};
+
+enum {
+  Oriented = true,
+  NonOriented = false,
+  Recursive = true,
+  NonRecursive = false
+};
+
+template<typename BV, bool Oriented, bool recursive>
+struct traits : base_traits
+{};
+
+template<size_t N, bool recursive>
+struct traits<KDOP<N>, Oriented, recursive> : base_traits
+{
+  enum { IS_IMPLEMENTED = false
+  };
+};
+
+struct mesh_mesh_run_test
+{
+  mesh_mesh_run_test (const std::vector<Transform3f>& _transforms,
+      const CollisionRequest _request
+      ) :
+    transforms (_transforms),
+    request (_request),
+    enable_statistics (false),
+    benchmark (false),
+    isInit (false),
+    indent (0)
+  {}
+
+  const std::vector<Transform3f>& transforms;
+  const CollisionRequest request;
+  bool enable_statistics, benchmark;
+  std::vector<Contacts_t> contacts;
+  std::vector<Contacts_t> contacts_ref;
+  bool isInit;
+
+  int indent;
+
+  const char* getindent()
+  {
+    assert (indent < 9);
+    static const char* t[] = { "", "\t", "\t\t", "\t\t\t", "\t\t\t\t", "\t\t\t\t\t",
+      "\t\t\t\t\t\t", "\t\t\t\t\t\t\t", "\t\t\t\t\t\t\t\t" };
+    return t[indent];
+  }
+
+  template<typename BV>
+  void query (
+      const std::vector<Transform3f>& transforms,
+      SplitMethodType splitMethod,
+      const CollisionRequest request,
+      std::vector<Contacts_t>& contacts
+      )
+  {
+    BENCHMARK_HEADER("BV");
+    BENCHMARK_HEADER("oriented");
+    BENCHMARK_HEADER("Split method");
+    if (enable_statistics) {
+      BENCHMARK_HEADER("num_bv_tests");
+      BENCHMARK_HEADER("num_leaf_tests");
+    }
+    BENCHMARK_HEADER("numContacts");
+    BENCHMARK_HEADER("distance_lower_bound");
+    BENCHMARK_HEADER("time");
+    BENCHMARK_NEXT();
+
+    typedef BVHModel<BV> BVH_t;
+    typedef boost::shared_ptr<BVH_t> BVHPtr_t;
+
+    BVHPtr_t model1 (new BVH_t), model2 (new BVH_t);
+    model1->bv_splitter.reset(new BVSplitter<BV>(splitMethod));
+    model2->bv_splitter.reset(new BVSplitter<BV>(splitMethod));
+
+    loadPolyhedronFromResource (TEST_RESOURCES_DIR "/env.obj", Vec3f::Ones(), model1);
+    loadPolyhedronFromResource (TEST_RESOURCES_DIR "/rob.obj", Vec3f::Ones(), model2);
+
+    clock_type::time_point start, end;
+    const Transform3f tf2;
+    const std::size_t N = transforms.size();
+
+    contacts.resize (3*N);
+
+    if (traits<BV, Oriented, Recursive>::IS_IMPLEMENTED)
+    {
+      BOOST_TEST_MESSAGE (getindent() << "BV: " << str<BV>() << " oriented");
+      ++indent;
+
+      for(std::size_t i = 0; i < transforms.size(); ++i)
+      {
+        start = clock_type::now();
+        const Transform3f& tf1 = transforms[i];
+
+        CollisionResult local_result;
+        MeshCollisionTraversalNode<BV, 0> node (request);
+        node.enable_statistics = enable_statistics;
+
+        bool success = initialize (node,
+            *model1, tf1, *model2, tf2,
+            local_result);
+        BOOST_REQUIRE (success);
+
+        collide(&node, request, local_result);
+
+        end = clock_type::now();
+
+        BENCHMARK(str<BV>());
+        BENCHMARK(1);
+        BENCHMARK(splitMethod);
+        if (enable_statistics) {
+          BOOST_TEST_MESSAGE (getindent() << "statistics: " << node.num_bv_tests << " " << node.num_leaf_tests);
+          BOOST_TEST_MESSAGE (getindent() << "nb contacts: " << local_result.numContacts());
+          BENCHMARK(node.num_bv_tests);
+          BENCHMARK(node.num_leaf_tests);
+        }
+        BENCHMARK(local_result.numContacts());
+        BENCHMARK(local_result.distance_lower_bound);
+        BENCHMARK((end - start).count());
+        BENCHMARK_NEXT();
+
+        if(local_result.numContacts() > 0)
+        {
+          local_result.getContacts(contacts[i]);
+          std::sort(contacts[i].begin(), contacts[i].end());
+        }
+      }
+      --indent;
+    }
+
+    if (traits<BV, NonOriented, Recursive>::IS_IMPLEMENTED)
+    {
+      BOOST_TEST_MESSAGE (getindent() << "BV: " << str<BV>());
+      ++indent;
+
+      for(std::size_t i = 0; i < transforms.size(); ++i)
+      {
+        start = clock_type::now();
+        const Transform3f tf1 = transforms[i];
+
+        CollisionResult local_result;
+        MeshCollisionTraversalNode<BV, RelativeTransformationIsIdentity> node (request);
+        node.enable_statistics = enable_statistics;
+
+        BVH_t* model1_tmp = new BVH_t(*model1);
+        Transform3f tf1_tmp = tf1;
+        BVH_t* model2_tmp = new BVH_t(*model2);
+        Transform3f tf2_tmp = tf2;
+
+        bool success = initialize (node,
+            *model1_tmp, tf1_tmp, *model2_tmp, tf2_tmp,
+            local_result, true, true);
+        BOOST_REQUIRE (success);
+
+        collide(&node, request, local_result);
+        delete model1_tmp;
+        delete model2_tmp;
+
+        end = clock_type::now();
+        BENCHMARK(str<BV>());
+        BENCHMARK(2);
+        BENCHMARK(splitMethod);
+        if (enable_statistics) {
+          BOOST_TEST_MESSAGE (getindent() << "statistics: " << node.num_bv_tests << " " << node.num_leaf_tests);
+          BOOST_TEST_MESSAGE (getindent() << "nb contacts: " << local_result.numContacts());
+          BENCHMARK(node.num_bv_tests);
+          BENCHMARK(node.num_leaf_tests);
+        }
+        BENCHMARK(local_result.numContacts());
+        BENCHMARK(local_result.distance_lower_bound);
+        BENCHMARK((end - start).count());
+        BENCHMARK_NEXT();
+
+        if(local_result.numContacts() > 0)
+        {
+          local_result.getContacts(contacts[i+N]);
+          std::sort(contacts[i+N].begin(), contacts[i+N].end());
+        }
+      }
+      --indent;
+    }
+
+    if (traits<BV, Oriented, NonRecursive>::IS_IMPLEMENTED)
+    {
+      BOOST_TEST_MESSAGE (getindent() << "BV: " << str<BV>() << " oriented non-recursive");
+      ++indent;
+
+      for(std::size_t i = 0; i < transforms.size(); ++i)
+      {
+        start = clock_type::now();
+        const Transform3f tf1 = transforms[i];
+
+        CollisionResult local_result;
+        MeshCollisionTraversalNode<BV, 0> node (request);
+        node.enable_statistics = enable_statistics;
+
+        bool success = initialize (node,
+            *model1, tf1, *model2, tf2,
+            local_result);
+        BOOST_REQUIRE (success);
+
+        collide(&node, request, local_result, NULL, false);
+
+        end = clock_type::now();
+        BENCHMARK(str<BV>());
+        BENCHMARK(0);
+        BENCHMARK(splitMethod);
+        if (enable_statistics) {
+          BOOST_TEST_MESSAGE (getindent() << "statistics: " << node.num_bv_tests << " " << node.num_leaf_tests);
+          BOOST_TEST_MESSAGE (getindent() << "nb contacts: " << local_result.numContacts());
+          BENCHMARK(node.num_bv_tests);
+          BENCHMARK(node.num_leaf_tests);
+        }
+        BENCHMARK(local_result.numContacts());
+        BENCHMARK(local_result.distance_lower_bound);
+        BENCHMARK((end - start).count());
+        BENCHMARK_NEXT();
+
+        if(local_result.numContacts() > 0)
+        {
+          local_result.getContacts(contacts[i+2*N]);
+          std::sort(contacts[i+2*N].begin(), contacts[i+2*N].end());
+        }
+      }
+      --indent;
+    }
+  }
+
+  template<typename BV>
+  void check ()
+  {
+    if (benchmark) return;
+    const std::size_t N = transforms.size();
+
+    BOOST_REQUIRE_EQUAL(contacts.size(), 3*N);
+    BOOST_REQUIRE_EQUAL(contacts.size(), contacts_ref.size());
+
+    if (traits<BV, Oriented, Recursive>::IS_IMPLEMENTED) {
+      for(std::size_t i = 0; i < N; ++i) {
+        BOOST_CHECK_EQUAL(contacts_ref[i].size(), contacts[i].size());
+        for(std::size_t j = 0; j < contacts[i].size(); ++j) {
+          BOOST_CHECK_EQUAL(contacts_ref[i][j].b1, contacts[i][j].b1);
+          BOOST_CHECK_EQUAL(contacts_ref[i][j].b2, contacts[i][j].b2);
+        }
+      }
+    }
+    if (traits<BV, NonOriented, Recursive>::IS_IMPLEMENTED) {
+      for(std::size_t i = N; i < 2*N; ++i) {
+        BOOST_CHECK_EQUAL(contacts_ref[i].size(), contacts[i].size());
+        for(std::size_t j = 0; j < contacts[i].size(); ++j) {
+          BOOST_CHECK_EQUAL(contacts_ref[i][j].b1, contacts[i][j].b1);
+          BOOST_CHECK_EQUAL(contacts_ref[i][j].b2, contacts[i][j].b2);
+        }
+      }
+    }
+    if (traits<BV, Oriented, NonRecursive>::IS_IMPLEMENTED) {
+      for(std::size_t i = 2*N; i < 3*N; ++i) {
+        BOOST_CHECK_EQUAL(contacts_ref[i].size(), contacts[i].size());
+        for(std::size_t j = 0; j < contacts[i].size(); ++j) {
+          BOOST_CHECK_EQUAL(contacts_ref[i][j].b1, contacts[i][j].b1);
+          BOOST_CHECK_EQUAL(contacts_ref[i][j].b2, contacts[i][j].b2);
+        }
+      }
+    }
+  }
+
+  template<typename BV>
+  void operator() (wrap<BV>)
+  {
+    for (std::size_t i = 0; i < splitMethods.size(); ++i)
+    {
+      BOOST_TEST_MESSAGE (getindent() << "splitMethod: " << splitMethods[i]);
+      ++indent;
+      query <BV> (transforms, splitMethods[i], request, (isInit ? contacts : contacts_ref));
+      if (isInit) check<BV> ();
+      isInit = true;
+      --indent;
+    }
+  }
+};
+
+// This test
+//  1. load two objects "env.obj" and "rob.obj" from directory
+//     fcl_resources,
+//  2. generates n random transformation and for each of them denote tf,
+//    2.1 performs a collision test where object 1 is in pose tf. All
+//        the contacts are stored in vector global_pairs.
+//    2.2 performs a series of collision tests with the same object and
+//        the same poses using various methods and various types of bounding
+//        volumes. Each time the contacts are stored in vector global_pairs_now.
+//
+// The methods used to test collision are
+//  - collide_Test that calls function collide with tf for object1 pose and
+//      identity for the second object pose,
+//  - collide_Test2 that moves all vertices of object1 in pose tf and that
+//      calls function collide with identity for both object poses,
+//
+BOOST_AUTO_TEST_CASE(mesh_mesh)
+{
+  std::vector<Transform3f> transforms;
+  FCL_REAL extents[] = {-3000, -3000, 0, 3000, 3000, 3000};
+  std::size_t n = 10;
+
+  generateRandomTransforms(extents, transforms, n);
+
+  Eigen::IOFormat f (Eigen::FullPrecision, 0, ", ", ",", "", "", "(", ")");
+  for(std::size_t i = 0; i < transforms.size(); ++i)
+  {
+    BOOST_TEST_MESSAGE("q" << i << "="
+        << transforms [i].getTranslation  ()          .format (f) << "+"
+        << transforms [i].getQuatRotation ().coeffs ().format (f));
+  }
+
+  // Request all contacts and check that all methods give the same result.
+  mesh_mesh_run_test runner (transforms, CollisionRequest (CONTACT, num_max_contacts));
+  runner.enable_statistics = true;
+  boost::mpl::for_each<BVs_t, wrap<boost::mpl::placeholders::_1> > (runner);
+}
+
+BOOST_AUTO_TEST_CASE(mesh_mesh_benchmark)
+{
+  std::vector<Transform3f> transforms;
+  FCL_REAL extents[] = {-3000, -3000, 0, 3000, 3000, 3000};
+  std::size_t n = 10;
+
+  generateRandomTransforms(extents, transforms, n);
+
+  Eigen::IOFormat f (Eigen::FullPrecision, 0, ", ", ",", "", "", "(", ")");
+  for(std::size_t i = 0; i < transforms.size(); ++i)
+  {
+    BOOST_TEST_MESSAGE("q" << i << "="
+        << transforms [i].getTranslation  ()          .format (f) << "+"
+        << transforms [i].getQuatRotation ().coeffs ().format (f));
+  }
+
+  // Request all contacts and check that all methods give the same result.
+  typedef boost::mpl::vector<OBB, RSS, AABB, KDOP<24>, KDOP<18>, KDOP<16>, kIOS, OBBRSS> BVs_t;
+
+  std::ofstream ofs ("./collision.benchmark.csv", std::ofstream::out);
+  bench_stream = &ofs;
+
+  // without lower bound.
+  mesh_mesh_run_test runner1 (transforms, CollisionRequest ());
+  runner1.enable_statistics = false;
+  runner1.benchmark = true;
+  boost::mpl::for_each<BVs_t, wrap<boost::mpl::placeholders::_1> > (runner1);
+
+  // with lower bound.
+  mesh_mesh_run_test runner2 (transforms, CollisionRequest (DISTANCE_LOWER_BOUND, 1));
+  runner2.enable_statistics = false;
+  runner2.benchmark = true;
+  boost::mpl::for_each<BVs_t, wrap<boost::mpl::placeholders::_1> > (runner2);
+
+  bench_stream = NULL;
+  ofs.close();
+}
diff --git a/test/test_fcl_collision.py b/test/collision.py
similarity index 100%
rename from test/test_fcl_collision.py
rename to test/collision.py
diff --git a/test/test_fcl_distance.cpp b/test/distance.cpp
similarity index 99%
rename from test/test_fcl_distance.cpp
rename to test/distance.cpp
index 35eb58408d061fbafbed6f6f8ff60ad9ffebfae8..94922236c5f892456d757fba37542df0cd81e436 100644
--- a/test/test_fcl_distance.cpp
+++ b/test/distance.cpp
@@ -42,8 +42,8 @@
 
 #include <hpp/fcl/traversal/traversal_node_bvhs.h>
 #include <hpp/fcl/traversal/traversal_node_setup.h>
-#include <hpp/fcl/collision_node.h>
-#include "test_fcl_utility.h"
+#include <../src/collision_node.h>
+#include "utility.h"
 #include <boost/timer.hpp>
 #include "fcl_resources/config.h"
 #include <boost/filesystem.hpp>
@@ -447,7 +447,7 @@ bool collide_Test_OBB(const Transform3f& tf,
   bool success (initialize(node, (const BVHModel<OBB>&)m1, tf,
                            (const BVHModel<OBB>&)m2, Transform3f(),
                            local_result));
-  assert (success);
+  BOOST_REQUIRE (success);
 
   node.enable_statistics = verbose;
 
diff --git a/test/test_fcl_distance_lower_bound.cpp b/test/distance_lower_bound.cpp
similarity index 99%
rename from test/test_fcl_distance_lower_bound.cpp
rename to test/distance_lower_bound.cpp
index ee825ba65ff603fac9c0b29617e24256035e5887..0d5a0645d9060ee6e44f756a0dbece6e4ef5df1f 100644
--- a/test/test_fcl_distance_lower_bound.cpp
+++ b/test/distance_lower_bound.cpp
@@ -45,7 +45,7 @@
 #include <hpp/fcl/narrowphase/narrowphase.h>
 #include <hpp/fcl/collision.h>
 #include <hpp/fcl/distance.h>
-# include "test_fcl_utility.h"
+# include "utility.h"
 # include "fcl_resources/config.h"
 
 using hpp::fcl::Transform3f;
diff --git a/test/test_fcl_distance_lower_bound.py b/test/distance_lower_bound.py
similarity index 100%
rename from test/test_fcl_distance_lower_bound.py
rename to test/distance_lower_bound.py
diff --git a/test/test_fcl_eigen.cpp b/test/eigen.cpp
similarity index 100%
rename from test/test_fcl_eigen.cpp
rename to test/eigen.cpp
diff --git a/test/test_fcl_frontlist.cpp b/test/frontlist.cpp
similarity index 98%
rename from test/test_fcl_frontlist.cpp
rename to test/frontlist.cpp
index 36a5c312423916e04fb09c648c74f81baac85d1e..799b0c4a1c241d283b929450addc4bf9c2f43202 100644
--- a/test/test_fcl_frontlist.cpp
+++ b/test/frontlist.cpp
@@ -43,8 +43,8 @@
 
 #include <hpp/fcl/traversal/traversal_node_bvhs.h>
 #include <hpp/fcl/traversal/traversal_node_setup.h>
-#include <hpp/fcl/collision_node.h>
-#include "test_fcl_utility.h"
+#include <../src/collision_node.h>
+#include "utility.h"
 
 #include "fcl_resources/config.h"
 #include <boost/filesystem.hpp>
@@ -233,7 +233,7 @@ bool collide_front_list_Test(const Transform3f& tf1, const Transform3f& tf2,
   MeshCollisionTraversalNode<BV> node (request);
 
   bool success = initialize <BV>(node, m1, pose1, m2, pose2, local_result);
-  assert (success);
+  BOOST_REQUIRE (success);
 
   node.enable_statistics = verbose;
 
@@ -297,7 +297,7 @@ bool collide_front_list_Test_Oriented(const Transform3f& tf1, const Transform3f&
 
   bool success = initialize (node, (const BVHModel<BV>&)m1, pose1,
                              (const BVHModel<BV>&)m2, pose2, local_result);
-  assert (success);
+  BOOST_REQUIRE (success);
 
   node.enable_statistics = verbose;
 
@@ -310,7 +310,7 @@ bool collide_front_list_Test_Oriented(const Transform3f& tf1, const Transform3f&
   pose1 = tf2;
   success = initialize (node, (const BVHModel<BV>&)m1, pose1,
                         (const BVHModel<BV>&)m2, pose2, local_result);
-  assert (success);
+  BOOST_REQUIRE (success);
 
   local_result.clear();
   collide(&node, request, local_result, &front_list);
@@ -347,7 +347,7 @@ bool collide_Test(const Transform3f& tf,
   MeshCollisionTraversalNode<BV> node (request);
 
   bool success = initialize <BV>(node, m1, pose1, m2, pose2, local_result);
-  assert (success);
+  BOOST_REQUIRE (success);
 
   node.enable_statistics = verbose;
 
diff --git a/test/test_fcl_geometric_shapes.cpp b/test/geometric_shapes.cpp
similarity index 99%
rename from test/test_fcl_geometric_shapes.cpp
rename to test/geometric_shapes.cpp
index b6a773fee090eba557347b06919917a39ed2f718..e65aad57e4ad143f95d317f53a33a03e07c9e7e4 100644
--- a/test/test_fcl_geometric_shapes.cpp
+++ b/test/geometric_shapes.cpp
@@ -44,7 +44,7 @@
 #include <hpp/fcl/narrowphase/narrowphase.h>
 #include <hpp/fcl/collision.h>
 #include <hpp/fcl/distance.h>
-#include "test_fcl_utility.h"
+#include "utility.h"
 #include <iostream>
 #include <hpp/fcl/math/tools.h>
 
@@ -59,6 +59,12 @@ GJKSolver solver2;
 Eigen::IOFormat fmt(Eigen::StreamPrecision, Eigen::DontAlignCols, ", ", ", ", "", "", "", "");
 Eigen::IOFormat pyfmt(Eigen::StreamPrecision, Eigen::DontAlignCols, ", ", ", ", "", "", "[", "]");
 
+typedef Eigen::AngleAxis<FCL_REAL> AngleAxis;
+//static const Vec3f UnitX (1, 0, 0);
+//static const Vec3f UnitY (0, 1, 0);
+static const Vec3f UnitZ (0, 0, 1);
+
+
 namespace hpp {
 namespace fcl {
 std::ostream& operator<< (std::ostream& os, const Transform3f& tf)
@@ -261,7 +267,7 @@ template <typename Sa, typename Sb> void compareShapeDistance (
       << resB.nearest_points[1].format(pyfmt) << '\n'
       );
   // TODO in one case, there is a mismatch between the distances and I cannot say
-  // which one is correct. To visualize the case, use script test/test_fcl_geometric_shapes.py
+  // which one is correct. To visualize the case, use script test/geometric_shapes.py
   BOOST_WARN_CLOSE(resA.min_distance, resB.min_distance, tol);
   //BOOST_CHECK_CLOSE(resA.min_distance, resB.min_distance, tol);
 
@@ -297,9 +303,7 @@ BOOST_AUTO_TEST_CASE (shapeIntersection_cylinderbox)
                        (p2Loc [0] * p2Loc [0] + p2Loc [1] * p2Loc [1]
                         <= s1.radius));
   Vec3f p1Loc (tf2.inverse().transform (p1));
-  bool p1_in_box ((fabs (p1Loc [0]) <= .5 * s2.side [0]) &&
-                  (fabs (p1Loc [1]) <= .5 * s2.side [1]) &&
-                  (fabs (p1Loc [2]) <= .5 * s2.side [2]));
+  bool p1_in_box = (p1Loc.array().abs() <= s2.halfSide.array()).all();
   std::cout << "p2 in cylinder = (" << p2Loc.transpose () << ")" << std::endl;
   std::cout << "p1 in box = (" << p1Loc.transpose () << ")" << std::endl;
 
@@ -316,9 +320,7 @@ BOOST_AUTO_TEST_CASE (shapeIntersection_cylinderbox)
     (p2Loc [0] * p2Loc [0] + p2Loc [1] * p2Loc [1]
      <= s1.radius);
   p1Loc = tf2.inverse().transform (p1);
-  p1_in_box = (fabs (p1Loc [0]) <= .5 * s2.side [0]) &&
-    (fabs (p1Loc [1]) <= .5 * s2.side [1]) &&
-    (fabs (p1Loc [2]) <= .5 * s2.side [2]);
+  p1_in_box = (p1Loc.array().abs() <= s2.halfSide.array()).all();
 
   std::cout << "p2 in cylinder = (" << p2Loc.transpose () << ")" << std::endl;
   std::cout << "p1 in box = (" << p1.transpose () << ")" << std::endl;
@@ -441,9 +443,7 @@ void testBoxBoxContactPoints(const Matrix3f& R)
 
   for (int i = 0; i < 8; ++i)
   {
-    vertices[i][0] *= 0.5 * s2.side[0];
-    vertices[i][1] *= 0.5 * s2.side[1];
-    vertices[i][2] *= 0.5 * s2.side[2];
+    vertices[i].array() *= s2.halfSide.array();
   }
 
   Transform3f tf1 = Transform3f(Vec3f(0, 0, -50));
@@ -484,7 +484,7 @@ BOOST_AUTO_TEST_CASE(shapeIntersection_boxbox)
   Vec3f normal;
 
   Quaternion3f q;
-  q = Eigen::AngleAxis<double>((FCL_REAL)3.140 / 6, Vec3f(0, 0, 1));
+  q = AngleAxis((FCL_REAL)3.140 / 6, UnitZ);
 
   tf1 = Transform3f();
   tf2 = Transform3f();
@@ -3060,7 +3060,7 @@ BOOST_AUTO_TEST_CASE(shapeIntersectionGJK_boxbox)
   Vec3f normal;
 
   Quaternion3f q;
-  q = Eigen::AngleAxis<double>((FCL_REAL)3.140 / 6, Vec3f(0, 0, 1));
+  q = AngleAxis((FCL_REAL)3.140 / 6, UnitZ);
 
   tf1 = Transform3f();
   tf2 = Transform3f();
diff --git a/test/test_fcl_geometric_shapes.py b/test/geometric_shapes.py
similarity index 100%
rename from test/test_fcl_geometric_shapes.py
rename to test/geometric_shapes.py
diff --git a/test/gjk-geometric-tools-benchmark b/test/gjk-geometric-tools-benchmark
index 4104d7511f78c3d9f22b4b16fe63395dcacdd5e5..7e522128798f30f7b695f22cf95f674eae03e7bc 100644
--- a/test/gjk-geometric-tools-benchmark
+++ b/test/gjk-geometric-tools-benchmark
@@ -1,4 +1,4 @@
-Result after running test_fcl_gjk.cpp in Release mode.
+Result after running gjk.cpp in Release mode.
 
 nCol = 2831
 nDiff = 41
diff --git a/test/test_fcl_gjk.cpp b/test/gjk.cpp
similarity index 94%
rename from test/test_fcl_gjk.cpp
rename to test/gjk.cpp
index 3791410fed34e1360dee7b4bd54af5ed816ab254..9ae852de0587454b24d7a968fe1532b7fd9bcc69 100644
--- a/test/test_fcl_gjk.cpp
+++ b/test/gjk.cpp
@@ -269,7 +269,7 @@ BOOST_AUTO_TEST_CASE(distance_triangle_triangle_1)
       }
     }
   }
-  std::cerr << "nCol = " << nCol << std::endl;
+  std::cerr << "nCol / nTotal = " << nCol << " / " << N << std::endl;
   std::cerr << "nDiff = " << nDiff << std::endl;
   // statistics
   clock_t totalTimeGjkColl = 0;
@@ -281,10 +281,10 @@ BOOST_AUTO_TEST_CASE(distance_triangle_triangle_1)
       totalTimeGjkNoColl += results [i].timeGjk;
     }
   }
-  std::cerr << "Total time gjk: " << totalTimeGjkNoColl + totalTimeGjkColl
-            << std::endl;
+  std::cerr << "Total / average time gjk: " << totalTimeGjkNoColl + totalTimeGjkColl
+            << ", " << FCL_REAL(totalTimeGjkNoColl + totalTimeGjkColl) / FCL_REAL(CLOCKS_PER_SEC*N) << "s" << std::endl;
   std::cerr << "-- Collisions -------------------------" << std::endl;
-  std::cerr << "Total time gjk: " << totalTimeGjkColl << std::endl;
+  std::cerr << "Total / average time gjk: " << totalTimeGjkColl << ", " << FCL_REAL(totalTimeGjkColl) / FCL_REAL(CLOCKS_PER_SEC*nCol) << "s" << std::endl;
   std::cerr << "-- No collisions -------------------------" << std::endl;
-  std::cerr << "Total time gjk: " << totalTimeGjkNoColl << std::endl;
+  std::cerr << "Total / average time gjk: " << totalTimeGjkNoColl << ", " << FCL_REAL(totalTimeGjkNoColl) / FCL_REAL(CLOCKS_PER_SEC*(N-nCol)) << "s" << std::endl;
 }
diff --git a/test/test_fcl_gjk.py b/test/gjk.py
similarity index 100%
rename from test/test_fcl_gjk.py
rename to test/gjk.py
diff --git a/test/test_fcl_math.cpp b/test/math.cpp
similarity index 100%
rename from test/test_fcl_math.cpp
rename to test/math.cpp
diff --git a/test/obb.cpp b/test/obb.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..9438596796f27b66d406c9192dabb5cd37c3343f
--- /dev/null
+++ b/test/obb.cpp
@@ -0,0 +1,1371 @@
+/*
+ * Software License Agreement (BSD License)
+ *
+ *  Copyright (c) 2014-2016, CNRS-LAAS
+ *  Author: Florent Lamiraux
+ *  All rights reserved.
+ *
+ *  Redistribution and use in source and binary forms, with or without
+ *  modification, are permitted provided that the following conditions
+ *  are met:
+ *
+ *   * Redistributions of source code must retain the above copyright
+ *     notice, this list of conditions and the following disclaimer.
+ *   * Redistributions in binary form must reproduce the above
+ *     copyright notice, this list of conditions and the following
+ *     disclaimer in the documentation and/or other materials provided
+ *     with the distribution.
+ *   * Neither the name of CNRS nor the names of its
+ *     contributors may be used to endorse or promote products derived
+ *     from this software without specific prior written permission.
+ *
+ *  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ *  FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ *  COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ *  INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ *  BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ *  LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+ *  CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ *  LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
+ *  ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ *  POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <iostream>
+#include <fstream>
+#include <sstream>
+
+#define BOOST_CHRONO_VERSION 2
+#include <boost/chrono/chrono.hpp>
+#include <boost/chrono/chrono_io.hpp>
+
+#include <hpp/fcl/narrowphase/narrowphase.h>
+
+#include "../src/BV/OBB.h"
+#include "../src/distance_func_matrix.h"
+
+using namespace hpp::fcl;
+
+int getNumCPUs()
+{
+  int NumCPUs = 0;
+  int MaxID = -1;
+  std::ifstream f("/proc/cpuinfo");
+  if (!f.is_open()) {
+    std::cerr << "failed to open /proc/cpuinfo\n";
+    return -1;
+  }
+  const std::string Key = "processor";
+  std::string ln;
+  while (std::getline(f, ln)) {
+    if (ln.empty()) continue;
+    size_t SplitIdx = ln.find(':');
+    std::string value;
+    if (SplitIdx != std::string::npos) value = ln.substr(SplitIdx + 1);
+    if (ln.size() >= Key.size() && ln.compare(0, Key.size(), Key) == 0) {
+      NumCPUs++;
+      if (!value.empty()) {
+        int CurID = (int) strtol(value.c_str(), NULL, 10);
+        MaxID = std::max(CurID, MaxID);
+      }
+    }
+  }
+  if (f.bad()) {
+    std::cerr << "Failure reading /proc/cpuinfo\n";
+    return -1;
+  }
+  if (!f.eof()) {
+    std::cerr << "Failed to read to end of /proc/cpuinfo\n";
+    return -1;
+  }
+  f.close();
+
+  if ((MaxID + 1) != NumCPUs) {
+    fprintf(stderr,
+            "CPU ID assignments in /proc/cpuinfo seem messed up."
+            " This is usually caused by a bad BIOS.\n");
+  }
+  return NumCPUs;
+}
+
+bool checkCpuScalingEnabled()
+{
+  int num_cpus = getNumCPUs ();
+
+  // We don't have a valid CPU count, so don't even bother.
+  if (num_cpus <= 0) return false;
+  // On Linux, the CPUfreq subsystem exposes CPU information as files on the
+  // local file system. If reading the exported files fails, then we may not be
+  // running on Linux, so we silently ignore all the read errors.
+  std::string res;
+  for (int cpu = 0; cpu < num_cpus; ++cpu) {
+    std::ostringstream oss;
+    oss << "/sys/devices/system/cpu/cpu" << cpu << "/cpufreq/scaling_governor";
+    std::string governor_file = oss.str();
+    std::ifstream f(governor_file.c_str());
+    if (!f.is_open()) return false;
+    f >> res;
+    if (!f.good()) return false;
+    if (res != "performance") return true;
+  }
+  return false;
+}
+
+void randomOBBs (
+    Vec3f& a, Vec3f& b, FCL_REAL extentNorm)
+{
+  // Extent norm is between 0 and extentNorm on each axis
+  //a = (Vec3f::Ones()+Vec3f::Random()) * extentNorm / (2*sqrt(3));
+  //b = (Vec3f::Ones()+Vec3f::Random()) * extentNorm / (2*sqrt(3));
+
+  a = extentNorm * Vec3f::Random().cwiseAbs().normalized();
+  b = extentNorm * Vec3f::Random().cwiseAbs().normalized();
+}
+
+void randomTransform (
+    Matrix3f& B, Vec3f& T,
+    const Vec3f& a, const Vec3f& b, const FCL_REAL extentNorm)
+{
+  // TODO Should we scale T to a and b norm ?
+  (void) a;
+  (void) b;
+  (void) extentNorm;
+
+  FCL_REAL N = a.norm() + b.norm();
+  // A translation of norm N ensures there is no collision.
+  // Set translation to be between 0 and 2 * N;
+  T = ( Vec3f::Random() / sqrt(3) ) * 1.5 * N;
+  //T.setZero();
+
+  Quaternion3f q;
+  q.coeffs().setRandom();
+  q.normalize();
+  B = q;
+}
+
+#define NB_METHODS 7
+#define MANUAL_PRODUCT 1
+
+#if MANUAL_PRODUCT
+# define PRODUCT(M33,v3) ( M33.col(0) * v3[0] + M33.col(1) * v3[1] + M33.col(2) * v3[2] )
+#else
+# define PRODUCT(M33,v3) (M33*v3)
+#endif
+
+typedef boost::chrono::high_resolution_clock clock_type;
+typedef clock_type::duration duration_type;
+
+const char* sep = ",\t"; 
+const FCL_REAL eps = 1e-10;
+
+const Eigen::IOFormat py_fmt(Eigen::FullPrecision,
+    0,
+    ", ",       // Coeff separator
+    ",\n",      // Row separator
+    "(",        // row prefix
+    ",)",       // row suffix
+    "( ",       // mat prefix
+    ", )"       // mat suffix
+    );
+
+namespace obbDisjoint_impls
+{
+  /// \return true if OBB are disjoint.
+  bool distance (const Matrix3f& B, const Vec3f& T, const Vec3f& a, const Vec3f& b, FCL_REAL& distance)
+  {
+    GJKSolver gjk;
+    Box ba (2*a), bb (2*b);
+    Transform3f tfa, tfb (B, T);
+
+    Vec3f p1, p2, normal;
+    return gjk.shapeDistance (ba, tfa, bb, tfb, distance, p1, p2, normal);
+  }
+
+  inline FCL_REAL _computeDistanceForCase1 (
+      const Vec3f& T, const Vec3f& a, const Vec3f& b, const Matrix3f& Bf)
+  {
+    Vec3f AABB_corner;
+     /* This seems to be slower
+    AABB_corner.noalias() = T.cwiseAbs () - a;
+    AABB_corner.noalias() -= PRODUCT(Bf,b);
+    /*/
+#if MANUAL_PRODUCT
+    AABB_corner.noalias() = T.cwiseAbs () - a;
+    AABB_corner.noalias() -= Bf.col(0) * b[0];
+    AABB_corner.noalias() -= Bf.col(1) * b[1];
+    AABB_corner.noalias() -= Bf.col(2) * b[2];
+#else
+    AABB_corner.noalias() = T.cwiseAbs () - Bf * b - a;
+#endif
+    // */
+    return AABB_corner.array().max(0).matrix().squaredNorm ();
+  }
+
+  inline FCL_REAL _computeDistanceForCase2 (
+      const Matrix3f& B, const Vec3f& T, const Vec3f& a, const Vec3f& b, const Matrix3f& Bf)
+  {
+    /*
+    Vec3f AABB_corner(PRODUCT(B.transpose(), T).cwiseAbs() - b);
+    AABB_corner.noalias() -= PRODUCT(Bf.transpose(), a);
+    return AABB_corner.array().max(0).matrix().squaredNorm ();
+    /*/
+#if MANUAL_PRODUCT
+    register FCL_REAL s, t = 0;
+    s = std::abs(B.col(0).dot(T)) - Bf.col(0).dot(a) - b[0];
+    if (s > 0) t += s*s;
+    s = std::abs(B.col(1).dot(T)) - Bf.col(1).dot(a) - b[1];
+    if (s > 0) t += s*s;
+    s = std::abs(B.col(2).dot(T)) - Bf.col(2).dot(a) - b[2];
+    if (s > 0) t += s*s;
+    return t;
+#else
+    Vec3f AABB_corner((B.transpose () * T).cwiseAbs ()  - Bf.transpose () * a - b);
+    return AABB_corner.array().max(0).matrix().squaredNorm ();
+#endif
+    // */
+  }
+
+  int separatingAxisId (const Matrix3f& B, const Vec3f& T, const Vec3f& a, const Vec3f& b, const FCL_REAL& breakDistance2, FCL_REAL& squaredLowerBoundDistance)
+  {
+    int id = 0;
+
+    Matrix3f Bf (B.cwiseAbs());
+
+    // Corner of b axis aligned bounding box the closest to the origin
+    squaredLowerBoundDistance = _computeDistanceForCase1 (T, a, b, Bf);
+    if (squaredLowerBoundDistance > breakDistance2)
+      return id;
+    ++id;
+
+    // | B^T T| - b - Bf^T a
+    squaredLowerBoundDistance = _computeDistanceForCase2 (B, T, a, b, Bf);
+    if (squaredLowerBoundDistance > breakDistance2)
+      return id;
+    ++id;
+
+    int ja = 1, ka = 2, jb = 1, kb = 2;
+    for (int ia = 0; ia < 3; ++ia) {
+      for (int ib = 0; ib < 3; ++ib) {
+        const FCL_REAL s = T[ka] * B(ja, ib) - T[ja] * B(ka, ib);
+
+        const FCL_REAL diff = fabs(s) - (a[ja] * Bf(ka, ib) + a[ka] * Bf(ja, ib) +
+            b[jb] * Bf(ia, kb) + b[kb] * Bf(ia, jb));
+        // We need to divide by the norm || Aia x Bib ||
+        // As ||Aia|| = ||Bib|| = 1, (Aia | Bib)^2  = cosine^2
+        if (diff > 0) {
+          FCL_REAL sinus2 = 1 - Bf (ia,ib) * Bf (ia,ib);
+          if (sinus2 > 1e-6) {
+            squaredLowerBoundDistance = diff * diff / sinus2;
+            if (squaredLowerBoundDistance > breakDistance2) {
+              return id;
+            }
+          }
+          /* // or
+             FCL_REAL sinus2 = 1 - Bf (ia,ib) * Bf (ia,ib);
+             squaredLowerBoundDistance = diff * diff;
+             if (squaredLowerBoundDistance > breakDistance2 * sinus2) {
+             squaredLowerBoundDistance /= sinus2;
+             return true;
+             }
+          // */
+        }
+        ++id;
+
+        jb = kb; kb = ib;
+      }
+      ja = ka; ka = ia;
+    }
+
+    return id;
+  }
+
+  // ------------------------ 0 --------------------------------------
+  bool withRuntimeLoop (const Matrix3f& B, const Vec3f& T, const Vec3f& a, const Vec3f& b, const FCL_REAL& breakDistance2, FCL_REAL& squaredLowerBoundDistance)
+  {
+    Matrix3f Bf (B.cwiseAbs());
+
+    // Corner of b axis aligned bounding box the closest to the origin
+    squaredLowerBoundDistance = _computeDistanceForCase1 (T, a, b, Bf);
+    if (squaredLowerBoundDistance > breakDistance2)
+      return true;
+
+    // | B^T T| - b - Bf^T a
+    squaredLowerBoundDistance = _computeDistanceForCase2 (B, T, a, b, Bf);
+    if (squaredLowerBoundDistance > breakDistance2)
+      return true;
+
+    int ja = 1, ka = 2, jb = 1, kb = 2;
+    for (int ia = 0; ia < 3; ++ia) {
+      for (int ib = 0; ib < 3; ++ib) {
+        const FCL_REAL s = T[ka] * B(ja, ib) - T[ja] * B(ka, ib);
+
+        const FCL_REAL diff = fabs(s) - (a[ja] * Bf(ka, ib) + a[ka] * Bf(ja, ib) +
+            b[jb] * Bf(ia, kb) + b[kb] * Bf(ia, jb));
+        // We need to divide by the norm || Aia x Bib ||
+        // As ||Aia|| = ||Bib|| = 1, (Aia | Bib)^2  = cosine^2
+        if (diff > 0) {
+          FCL_REAL sinus2 = 1 - Bf (ia,ib) * Bf (ia,ib);
+          if (sinus2 > 1e-6) {
+            squaredLowerBoundDistance = diff * diff / sinus2;
+            if (squaredLowerBoundDistance > breakDistance2) {
+              return true;
+            }
+          }
+          /* // or
+             FCL_REAL sinus2 = 1 - Bf (ia,ib) * Bf (ia,ib);
+             squaredLowerBoundDistance = diff * diff;
+             if (squaredLowerBoundDistance > breakDistance2 * sinus2) {
+             squaredLowerBoundDistance /= sinus2;
+             return true;
+             }
+          // */
+        }
+
+        jb = kb; kb = ib;
+      }
+      ja = ka; ka = ia;
+    }
+
+    return false;
+
+  }
+
+  // ------------------------ 1 --------------------------------------
+  bool withManualLoopUnrolling_1 (const Matrix3f& B, const Vec3f& T, const Vec3f& a, const Vec3f& b, const FCL_REAL& breakDistance2, FCL_REAL& squaredLowerBoundDistance)
+  {
+    FCL_REAL t, s;
+    FCL_REAL diff;
+
+    // Matrix3f Bf = abs(B);
+    // Bf += reps;
+    Matrix3f Bf (B.cwiseAbs());
+
+    // Corner of b axis aligned bounding box the closest to the origin
+    Vec3f AABB_corner (T.cwiseAbs () - Bf * b);
+    Vec3f diff3 (AABB_corner - a);
+    diff3 = diff3.cwiseMax (Vec3f::Zero());
+
+    //for (Vec3f::Index i=0; i<3; ++i) diff3 [i] = std::max (0, diff3 [i]);
+    squaredLowerBoundDistance = diff3.squaredNorm ();
+    if (squaredLowerBoundDistance > breakDistance2)
+      return true;
+
+    AABB_corner = (B.transpose () * T).cwiseAbs ()  - Bf.transpose () * a;
+    // diff3 = | B^T T| - b - Bf^T a
+    diff3 = AABB_corner - b;
+    diff3 = diff3.cwiseMax (Vec3f::Zero());
+    squaredLowerBoundDistance = diff3.squaredNorm ();
+
+    if (squaredLowerBoundDistance > breakDistance2)
+      return true;
+
+    // A0 x B0
+    s = T[2] * B(1, 0) - T[1] * B(2, 0);
+    t = ((s < 0.0) ? -s : s); assert (t == fabs (s));
+
+    FCL_REAL sinus2;
+    diff = t - (a[1] * Bf(2, 0) + a[2] * Bf(1, 0) +
+        b[1] * Bf(0, 2) + b[2] * Bf(0, 1));
+    // We need to divide by the norm || A0 x B0 ||
+    // As ||A0|| = ||B0|| = 1,
+    //              2            2
+    // || A0 x B0 ||  + (A0 | B0)  = 1
+    if (diff > 0) {
+      sinus2 = 1 - Bf (0,0) * Bf (0,0);
+      if (sinus2 > 1e-6) {
+        squaredLowerBoundDistance = diff * diff / sinus2;
+        if (squaredLowerBoundDistance > breakDistance2) {
+          return true;
+        }
+      }
+    }
+
+    // A0 x B1
+    s = T[2] * B(1, 1) - T[1] * B(2, 1);
+    t = ((s < 0.0) ? -s : s); assert (t == fabs (s));
+
+    diff = t - (a[1] * Bf(2, 1) + a[2] * Bf(1, 1) +
+        b[0] * Bf(0, 2) + b[2] * Bf(0, 0));
+    if (diff > 0) {
+      sinus2 = 1 - Bf (0,1) * Bf (0,1);
+      if (sinus2 > 1e-6) {
+        squaredLowerBoundDistance = diff * diff / sinus2;
+        if (squaredLowerBoundDistance > breakDistance2) {
+          return true;
+        }
+      }
+    }
+
+    // A0 x B2
+    s = T[2] * B(1, 2) - T[1] * B(2, 2);
+    t = ((s < 0.0) ? -s : s); assert (t == fabs (s));
+
+    diff = t - (a[1] * Bf(2, 2) + a[2] * Bf(1, 2) +
+        b[0] * Bf(0, 1) + b[1] * Bf(0, 0));
+    if (diff > 0) {
+      sinus2 = 1 - Bf (0,2) * Bf (0,2);
+      if (sinus2 > 1e-6) {
+        squaredLowerBoundDistance = diff * diff / sinus2;
+        if (squaredLowerBoundDistance > breakDistance2) {
+          return true;
+        }
+      }
+    }
+
+    // A1 x B0
+    s = T[0] * B(2, 0) - T[2] * B(0, 0);
+    t = ((s < 0.0) ? -s : s); assert (t == fabs (s));
+
+    diff = t - (a[0] * Bf(2, 0) + a[2] * Bf(0, 0) +
+        b[1] * Bf(1, 2) + b[2] * Bf(1, 1));
+    if (diff > 0) {
+      sinus2 = 1 - Bf (1,0) * Bf (1,0);
+      if (sinus2 > 1e-6) {
+        squaredLowerBoundDistance = diff * diff / sinus2;
+        if (squaredLowerBoundDistance > breakDistance2) {
+          return true;
+        }
+      }
+    }
+
+    // A1 x B1
+    s = T[0] * B(2, 1) - T[2] * B(0, 1);
+    t = ((s < 0.0) ? -s : s); assert (t == fabs (s));
+
+    diff = t - (a[0] * Bf(2, 1) + a[2] * Bf(0, 1) +
+        b[0] * Bf(1, 2) + b[2] * Bf(1, 0));
+    if (diff > 0) {
+      sinus2 = 1 - Bf (1,1) * Bf (1,1);
+      if (sinus2 > 1e-6) {
+        squaredLowerBoundDistance = diff * diff / sinus2;
+        if (squaredLowerBoundDistance > breakDistance2) {
+          return true;
+        }
+      }
+    }
+
+    // A1 x B2
+    s = T[0] * B(2, 2) - T[2] * B(0, 2);
+    t = ((s < 0.0) ? -s : s); assert (t == fabs (s));
+
+    diff = t - (a[0] * Bf(2, 2) + a[2] * Bf(0, 2) +
+        b[0] * Bf(1, 1) + b[1] * Bf(1, 0));
+    if (diff > 0) {
+      sinus2 = 1 - Bf (1,2) * Bf (1,2);
+      if (sinus2 > 1e-6) {
+        squaredLowerBoundDistance = diff * diff / sinus2;
+        if (squaredLowerBoundDistance > breakDistance2) {
+          return true;
+        }
+      }
+    }
+
+    // A2 x B0
+    s = T[1] * B(0, 0) - T[0] * B(1, 0);
+    t = ((s < 0.0) ? -s : s); assert (t == fabs (s));
+
+    diff = t - (a[0] * Bf(1, 0) + a[1] * Bf(0, 0) +
+        b[1] * Bf(2, 2) + b[2] * Bf(2, 1));
+    if (diff > 0) {
+      sinus2 = 1 - Bf (2,0) * Bf (2,0);
+      if (sinus2 > 1e-6) {
+        squaredLowerBoundDistance = diff * diff / sinus2;
+        if (squaredLowerBoundDistance > breakDistance2) {
+          return true;
+        }
+      }
+    }
+
+    // A2 x B1
+    s = T[1] * B(0, 1) - T[0] * B(1, 1);
+    t = ((s < 0.0) ? -s : s); assert (t == fabs (s));
+
+    diff = t - (a[0] * Bf(1, 1) + a[1] * Bf(0, 1) +
+        b[0] * Bf(2, 2) + b[2] * Bf(2, 0));
+    if (diff > 0) {
+      sinus2 = 1 - Bf (2,1) * Bf (2,1);
+      if (sinus2 > 1e-6) {
+        squaredLowerBoundDistance = diff * diff / sinus2;
+        if (squaredLowerBoundDistance > breakDistance2) {
+          return true;
+        }
+      }
+    }
+
+    // A2 x B2
+    s = T[1] * B(0, 2) - T[0] * B(1, 2);
+    t = ((s < 0.0) ? -s : s); assert (t == fabs (s));
+
+    diff = t - (a[0] * Bf(1, 2) + a[1] * Bf(0, 2) +
+        b[0] * Bf(2, 1) + b[1] * Bf(2, 0));
+    if (diff > 0) {
+      sinus2 = 1 - Bf (2,2) * Bf (2,2);
+      if (sinus2 > 1e-6) {
+        squaredLowerBoundDistance = diff * diff / sinus2;
+        if (squaredLowerBoundDistance > breakDistance2) {
+          return true;
+        }
+      }
+    }
+
+    return false;
+
+  }
+
+  // ------------------------ 2 --------------------------------------
+  bool withManualLoopUnrolling_2 (const Matrix3f& B, const Vec3f& T, const Vec3f& a, const Vec3f& b, const FCL_REAL& breakDistance2, FCL_REAL& squaredLowerBoundDistance)
+  {
+    Matrix3f Bf (B.cwiseAbs());
+
+    // Corner of b axis aligned bounding box the closest to the origin
+    squaredLowerBoundDistance = _computeDistanceForCase1 (T, a, b, Bf);
+    if (squaredLowerBoundDistance > breakDistance2)
+      return true;
+
+    squaredLowerBoundDistance = _computeDistanceForCase2 (B, T, a, b, Bf);
+    if (squaredLowerBoundDistance > breakDistance2)
+      return true;
+
+    // A0 x B0
+    FCL_REAL t, s;
+    s = T[2] * B(1, 0) - T[1] * B(2, 0);
+    t = ((s < 0.0) ? -s : s); assert (t == fabs (s));
+
+    FCL_REAL sinus2;
+    FCL_REAL diff;
+    diff = t - (a[1] * Bf(2, 0) + a[2] * Bf(1, 0) +
+        b[1] * Bf(0, 2) + b[2] * Bf(0, 1));
+    // We need to divide by the norm || A0 x B0 ||
+    // As ||A0|| = ||B0|| = 1,
+    //              2            2
+    // || A0 x B0 ||  + (A0 | B0)  = 1
+    if (diff > 0) {
+      sinus2 = 1 - Bf (0,0) * Bf (0,0);
+      if (sinus2 > 1e-6) {
+        squaredLowerBoundDistance = diff * diff / sinus2;
+        if (squaredLowerBoundDistance > breakDistance2) {
+          return true;
+        }
+      }
+    }
+
+    // A0 x B1
+    s = T[2] * B(1, 1) - T[1] * B(2, 1);
+    t = ((s < 0.0) ? -s : s); assert (t == fabs (s));
+
+    diff = t - (a[1] * Bf(2, 1) + a[2] * Bf(1, 1) +
+        b[0] * Bf(0, 2) + b[2] * Bf(0, 0));
+    if (diff > 0) {
+      sinus2 = 1 - Bf (0,1) * Bf (0,1);
+      if (sinus2 > 1e-6) {
+        squaredLowerBoundDistance = diff * diff / sinus2;
+        if (squaredLowerBoundDistance > breakDistance2) {
+          return true;
+        }
+      }
+    }
+
+    // A0 x B2
+    s = T[2] * B(1, 2) - T[1] * B(2, 2);
+    t = ((s < 0.0) ? -s : s); assert (t == fabs (s));
+
+    diff = t - (a[1] * Bf(2, 2) + a[2] * Bf(1, 2) +
+        b[0] * Bf(0, 1) + b[1] * Bf(0, 0));
+    if (diff > 0) {
+      sinus2 = 1 - Bf (0,2) * Bf (0,2);
+      if (sinus2 > 1e-6) {
+        squaredLowerBoundDistance = diff * diff / sinus2;
+        if (squaredLowerBoundDistance > breakDistance2) {
+          return true;
+        }
+      }
+    }
+
+    // A1 x B0
+    s = T[0] * B(2, 0) - T[2] * B(0, 0);
+    t = ((s < 0.0) ? -s : s); assert (t == fabs (s));
+
+    diff = t - (a[0] * Bf(2, 0) + a[2] * Bf(0, 0) +
+        b[1] * Bf(1, 2) + b[2] * Bf(1, 1));
+    if (diff > 0) {
+      sinus2 = 1 - Bf (1,0) * Bf (1,0);
+      if (sinus2 > 1e-6) {
+        squaredLowerBoundDistance = diff * diff / sinus2;
+        if (squaredLowerBoundDistance > breakDistance2) {
+          return true;
+        }
+      }
+    }
+
+    // A1 x B1
+    s = T[0] * B(2, 1) - T[2] * B(0, 1);
+    t = ((s < 0.0) ? -s : s); assert (t == fabs (s));
+
+    diff = t - (a[0] * Bf(2, 1) + a[2] * Bf(0, 1) +
+        b[0] * Bf(1, 2) + b[2] * Bf(1, 0));
+    if (diff > 0) {
+      sinus2 = 1 - Bf (1,1) * Bf (1,1);
+      if (sinus2 > 1e-6) {
+        squaredLowerBoundDistance = diff * diff / sinus2;
+        if (squaredLowerBoundDistance > breakDistance2) {
+          return true;
+        }
+      }
+    }
+
+    // A1 x B2
+    s = T[0] * B(2, 2) - T[2] * B(0, 2);
+    t = ((s < 0.0) ? -s : s); assert (t == fabs (s));
+
+    diff = t - (a[0] * Bf(2, 2) + a[2] * Bf(0, 2) +
+        b[0] * Bf(1, 1) + b[1] * Bf(1, 0));
+    if (diff > 0) {
+      sinus2 = 1 - Bf (1,2) * Bf (1,2);
+      if (sinus2 > 1e-6) {
+        squaredLowerBoundDistance = diff * diff / sinus2;
+        if (squaredLowerBoundDistance > breakDistance2) {
+          return true;
+        }
+      }
+    }
+
+    // A2 x B0
+    s = T[1] * B(0, 0) - T[0] * B(1, 0);
+    t = ((s < 0.0) ? -s : s); assert (t == fabs (s));
+
+    diff = t - (a[0] * Bf(1, 0) + a[1] * Bf(0, 0) +
+        b[1] * Bf(2, 2) + b[2] * Bf(2, 1));
+    if (diff > 0) {
+      sinus2 = 1 - Bf (2,0) * Bf (2,0);
+      if (sinus2 > 1e-6) {
+        squaredLowerBoundDistance = diff * diff / sinus2;
+        if (squaredLowerBoundDistance > breakDistance2) {
+          return true;
+        }
+      }
+    }
+
+    // A2 x B1
+    s = T[1] * B(0, 1) - T[0] * B(1, 1);
+    t = ((s < 0.0) ? -s : s); assert (t == fabs (s));
+
+    diff = t - (a[0] * Bf(1, 1) + a[1] * Bf(0, 1) +
+        b[0] * Bf(2, 2) + b[2] * Bf(2, 0));
+    if (diff > 0) {
+      sinus2 = 1 - Bf (2,1) * Bf (2,1);
+      if (sinus2 > 1e-6) {
+        squaredLowerBoundDistance = diff * diff / sinus2;
+        if (squaredLowerBoundDistance > breakDistance2) {
+          return true;
+        }
+      }
+    }
+
+    // A2 x B2
+    s = T[1] * B(0, 2) - T[0] * B(1, 2);
+    t = ((s < 0.0) ? -s : s); assert (t == fabs (s));
+
+    diff = t - (a[0] * Bf(1, 2) + a[1] * Bf(0, 2) +
+        b[0] * Bf(2, 1) + b[1] * Bf(2, 0));
+    if (diff > 0) {
+      sinus2 = 1 - Bf (2,2) * Bf (2,2);
+      if (sinus2 > 1e-6) {
+        squaredLowerBoundDistance = diff * diff / sinus2;
+        if (squaredLowerBoundDistance > breakDistance2) {
+          return true;
+        }
+      }
+    }
+
+    return false;
+
+  }
+
+  // ------------------------ 3 --------------------------------------
+  template <int ia, int ib,
+    int ja = (ia+1)%3, int ka = (ia+2)%3,
+    int jb = (ib+1)%3, int kb = (ib+2)%3 >
+  struct loop_case_1 {
+    static inline bool run (
+        const Matrix3f& B, const Vec3f& T, const Vec3f& a, const Vec3f& b,
+        const Matrix3f& Bf,
+        const FCL_REAL& breakDistance2, FCL_REAL& squaredLowerBoundDistance)
+    {
+      const FCL_REAL s = T[ka] * B(ja, ib) - T[ja] * B(ka, ib);
+
+      const FCL_REAL diff = fabs(s) - (a[ja] * Bf(ka, ib) + a[ka] * Bf(ja, ib) +
+                                       b[jb] * Bf(ia, kb) + b[kb] * Bf(ia, jb));
+      // We need to divide by the norm || Aia x Bib ||
+      // As ||Aia|| = ||Bib|| = 1, (Aia | Bib)^2  = cosine^2
+      if (diff > 0) {
+        FCL_REAL sinus2 = 1 - Bf (ia,ib) * Bf (ia,ib);
+        if (sinus2 > 1e-6) {
+          squaredLowerBoundDistance = diff * diff / sinus2;
+          if (squaredLowerBoundDistance > breakDistance2) {
+            return true;
+          }
+        }
+        /* // or
+           FCL_REAL sinus2 = 1 - Bf (ia,ib) * Bf (ia,ib);
+           squaredLowerBoundDistance = diff * diff;
+           if (squaredLowerBoundDistance > breakDistance2 * sinus2) {
+           squaredLowerBoundDistance /= sinus2;
+           return true;
+           }
+        // */
+      }
+      return false;
+    }
+  };
+
+  bool withTemplateLoopUnrolling_1 (const Matrix3f& B, const Vec3f& T, const Vec3f& a, const Vec3f& b, const FCL_REAL& breakDistance2, FCL_REAL& squaredLowerBoundDistance)
+  {
+    Matrix3f Bf (B.cwiseAbs());
+
+    // Corner of b axis aligned bounding box the closest to the origin
+    squaredLowerBoundDistance = _computeDistanceForCase1 (T, a, b, Bf);
+    if (squaredLowerBoundDistance > breakDistance2)
+      return true;
+
+    squaredLowerBoundDistance = _computeDistanceForCase2 (B, T, a, b, Bf);
+    if (squaredLowerBoundDistance > breakDistance2)
+      return true;
+
+    // Ai x Bj
+    if (loop_case_1<0,0>::run (B, T, a, b, Bf, breakDistance2, squaredLowerBoundDistance)) return true;
+    if (loop_case_1<0,1>::run (B, T, a, b, Bf, breakDistance2, squaredLowerBoundDistance)) return true;
+    if (loop_case_1<0,2>::run (B, T, a, b, Bf, breakDistance2, squaredLowerBoundDistance)) return true;
+    if (loop_case_1<1,0>::run (B, T, a, b, Bf, breakDistance2, squaredLowerBoundDistance)) return true;
+    if (loop_case_1<1,1>::run (B, T, a, b, Bf, breakDistance2, squaredLowerBoundDistance)) return true;
+    if (loop_case_1<1,2>::run (B, T, a, b, Bf, breakDistance2, squaredLowerBoundDistance)) return true;
+    if (loop_case_1<2,0>::run (B, T, a, b, Bf, breakDistance2, squaredLowerBoundDistance)) return true;
+    if (loop_case_1<2,1>::run (B, T, a, b, Bf, breakDistance2, squaredLowerBoundDistance)) return true;
+    if (loop_case_1<2,2>::run (B, T, a, b, Bf, breakDistance2, squaredLowerBoundDistance)) return true;
+
+    return false;
+  }
+
+  // ------------------------ 4 --------------------------------------
+
+  template <int ib, int jb = (ib+1)%3, int kb = (ib+2)%3 >
+  struct loop_case_2 
+  {
+    static inline bool run (int ia, int ja, int ka,
+        const Matrix3f& B, const Vec3f& T, const Vec3f& a, const Vec3f& b,
+        const Matrix3f& Bf,
+        const FCL_REAL& breakDistance2, FCL_REAL& squaredLowerBoundDistance)
+    {
+      const FCL_REAL s = T[ka] * B(ja, ib) - T[ja] * B(ka, ib);
+
+      const FCL_REAL diff = fabs(s) - (a[ja] * Bf(ka, ib) + a[ka] * Bf(ja, ib) +
+                                       b[jb] * Bf(ia, kb) + b[kb] * Bf(ia, jb));
+      // We need to divide by the norm || Aia x Bib ||
+      // As ||Aia|| = ||Bib|| = 1, (Aia | Bib)^2  = cosine^2
+      if (diff > 0) {
+        FCL_REAL sinus2 = 1 - Bf (ia,ib) * Bf (ia,ib);
+        if (sinus2 > 1e-6) {
+          squaredLowerBoundDistance = diff * diff / sinus2;
+          if (squaredLowerBoundDistance > breakDistance2) {
+            return true;
+          }
+        }
+        /* // or
+           FCL_REAL sinus2 = 1 - Bf (ia,ib) * Bf (ia,ib);
+           squaredLowerBoundDistance = diff * diff;
+           if (squaredLowerBoundDistance > breakDistance2 * sinus2) {
+           squaredLowerBoundDistance /= sinus2;
+           return true;
+           }
+        // */
+      }
+      return false;
+    }
+  };
+
+  bool withPartialTemplateLoopUnrolling_1 (const Matrix3f& B, const Vec3f& T, const Vec3f& a, const Vec3f& b, const FCL_REAL& breakDistance2, FCL_REAL& squaredLowerBoundDistance)
+  {
+    Matrix3f Bf (B.cwiseAbs());
+
+    // Corner of b axis aligned bounding box the closest to the origin
+    squaredLowerBoundDistance = _computeDistanceForCase1 (T, a, b, Bf);
+    if (squaredLowerBoundDistance > breakDistance2)
+      return true;
+
+    squaredLowerBoundDistance = _computeDistanceForCase2 (B, T, a, b, Bf);
+    if (squaredLowerBoundDistance > breakDistance2)
+      return true;
+
+    // Ai x Bj
+    int ja = 1, ka = 2;
+    for (int ia = 0; ia < 3; ++ia) {
+      if (loop_case_2<0>::run (ia, ja, ka,
+            B, T, a, b, Bf, breakDistance2, squaredLowerBoundDistance)) return true;
+      if (loop_case_2<1>::run (ia, ja, ka,
+            B, T, a, b, Bf, breakDistance2, squaredLowerBoundDistance)) return true;
+      if (loop_case_2<2>::run (ia, ja, ka,
+            B, T, a, b, Bf, breakDistance2, squaredLowerBoundDistance)) return true;
+      ja = ka; ka = ia;
+    }
+
+    return false;
+  }
+
+  // ------------------------ 5 --------------------------------------
+  bool originalWithLowerBound (const Matrix3f& B, const Vec3f& T, const Vec3f& a, const Vec3f& b, const FCL_REAL& breakDistance2, FCL_REAL& squaredLowerBoundDistance)
+  {
+    register FCL_REAL t, s;
+    FCL_REAL diff;
+
+    Matrix3f Bf (B.cwiseAbs());
+    squaredLowerBoundDistance = 0;
+
+    // if any of these tests are one-sided, then the polyhedra are disjoint
+
+    // A1 x A2 = A0
+    t = ((T[0] < 0.0) ? -T[0] : T[0]);
+
+    diff = t - (a[0] + Bf.row(0).dot(b));
+    if (diff > 0) {
+      squaredLowerBoundDistance = diff*diff;
+    }
+
+    // A2 x A0 = A1
+    t = ((T[1] < 0.0) ? -T[1] : T[1]);
+
+    diff = t - (a[1] + Bf.row(1).dot(b));
+    if (diff > 0) {
+      squaredLowerBoundDistance += diff*diff;
+    }
+
+    // A0 x A1 = A2
+    t =((T[2] < 0.0) ? -T[2] : T[2]);
+
+    diff = t - (a[2] + Bf.row(2).dot(b));
+    if (diff > 0) {
+      squaredLowerBoundDistance += diff*diff;
+    }
+
+    if (squaredLowerBoundDistance > breakDistance2)
+      return true;
+
+    squaredLowerBoundDistance = 0;
+
+    // B1 x B2 = B0
+    s =  B.col(0).dot(T);
+    t = ((s < 0.0) ? -s : s);
+
+    diff = t - (b[0] + Bf.col(0).dot(a));
+    if (diff > 0) {
+      squaredLowerBoundDistance += diff*diff;
+    }
+
+    // B2 x B0 = B1
+    s = B.col(1).dot(T);
+    t = ((s < 0.0) ? -s : s);
+
+    diff = t - (b[1] + Bf.col(1).dot(a));
+    if (diff > 0) {
+      squaredLowerBoundDistance += diff*diff;
+    }
+
+    // B0 x B1 = B2
+    s = B.col(2).dot(T);
+    t = ((s < 0.0) ? -s : s);
+
+    diff = t - (b[2] + Bf.col(2).dot(a));
+    if (diff > 0) {
+      squaredLowerBoundDistance += diff*diff;
+    }
+
+    if (squaredLowerBoundDistance > breakDistance2)
+      return true;
+
+    // A0 x B0
+    s = T[2] * B(1, 0) - T[1] * B(2, 0);
+    t = ((s < 0.0) ? -s : s);
+
+    FCL_REAL sinus2;
+    diff = t - (a[1] * Bf(2, 0) + a[2] * Bf(1, 0) +
+        b[1] * Bf(0, 2) + b[2] * Bf(0, 1));
+    // We need to divide by the norm || A0 x B0 ||
+    // As ||A0|| = ||B0|| = 1,
+    //              2            2
+    // || A0 x B0 ||  + (A0 | B0)  = 1
+    if (diff > 0) {
+      sinus2 = 1 - Bf (0,0) * Bf (0,0);
+      if (sinus2 > 1e-6) {
+        squaredLowerBoundDistance = diff * diff / sinus2;
+        if (squaredLowerBoundDistance > breakDistance2) {
+          return true;
+        }
+      }
+    }
+
+    // A0 x B1
+    s = T[2] * B(1, 1) - T[1] * B(2, 1);
+    t = ((s < 0.0) ? -s : s);
+
+    diff = t - (a[1] * Bf(2, 1) + a[2] * Bf(1, 1) +
+        b[0] * Bf(0, 2) + b[2] * Bf(0, 0));
+    if (diff > 0) {
+      sinus2 = 1 - Bf (0,1) * Bf (0,1);
+      if (sinus2 > 1e-6) {
+        squaredLowerBoundDistance = diff * diff / sinus2;
+        if (squaredLowerBoundDistance > breakDistance2) {
+          return true;
+        }
+      }
+    }
+
+    // A0 x B2
+    s = T[2] * B(1, 2) - T[1] * B(2, 2);
+    t = ((s < 0.0) ? -s : s);
+
+    diff = t - (a[1] * Bf(2, 2) + a[2] * Bf(1, 2) +
+        b[0] * Bf(0, 1) + b[1] * Bf(0, 0));
+    if (diff > 0) {
+      sinus2 = 1 - Bf (0,2) * Bf (0,2);
+      if (sinus2 > 1e-6) {
+        squaredLowerBoundDistance = diff * diff / sinus2;
+        if (squaredLowerBoundDistance > breakDistance2) {
+          return true;
+        }
+      }
+    }
+
+    // A1 x B0
+    s = T[0] * B(2, 0) - T[2] * B(0, 0);
+    t = ((s < 0.0) ? -s : s);
+
+    diff = t - (a[0] * Bf(2, 0) + a[2] * Bf(0, 0) +
+        b[1] * Bf(1, 2) + b[2] * Bf(1, 1));
+    if (diff > 0) {
+      sinus2 = 1 - Bf (1,0) * Bf (1,0);
+      if (sinus2 > 1e-6) {
+        squaredLowerBoundDistance = diff * diff / sinus2;
+        if (squaredLowerBoundDistance > breakDistance2) {
+          return true;
+        }
+      }
+    }
+
+    // A1 x B1
+    s = T[0] * B(2, 1) - T[2] * B(0, 1);
+    t = ((s < 0.0) ? -s : s);
+
+    diff = t - (a[0] * Bf(2, 1) + a[2] * Bf(0, 1) +
+        b[0] * Bf(1, 2) + b[2] * Bf(1, 0));
+    if (diff > 0) {
+      sinus2 = 1 - Bf (1,1) * Bf (1,1);
+      if (sinus2 > 1e-6) {
+        squaredLowerBoundDistance = diff * diff / sinus2;
+        if (squaredLowerBoundDistance > breakDistance2) {
+          return true;
+        }
+      }
+    }
+
+    // A1 x B2
+    s = T[0] * B(2, 2) - T[2] * B(0, 2);
+    t = ((s < 0.0) ? -s : s);
+
+    diff = t - (a[0] * Bf(2, 2) + a[2] * Bf(0, 2) +
+        b[0] * Bf(1, 1) + b[1] * Bf(1, 0));
+    if (diff > 0) {
+      sinus2 = 1 - Bf (1,2) * Bf (1,2);
+      if (sinus2 > 1e-6) {
+        squaredLowerBoundDistance = diff * diff / sinus2;
+        if (squaredLowerBoundDistance > breakDistance2) {
+          return true;
+        }
+      }
+    }
+
+    // A2 x B0
+    s = T[1] * B(0, 0) - T[0] * B(1, 0);
+    t = ((s < 0.0) ? -s : s);
+
+    diff = t - (a[0] * Bf(1, 0) + a[1] * Bf(0, 0) +
+        b[1] * Bf(2, 2) + b[2] * Bf(2, 1));
+    if (diff > 0) {
+      sinus2 = 1 - Bf (2,0) * Bf (2,0);
+      if (sinus2 > 1e-6) {
+        squaredLowerBoundDistance = diff * diff / sinus2;
+        if (squaredLowerBoundDistance > breakDistance2) {
+          return true;
+        }
+      }
+    }
+
+    // A2 x B1
+    s = T[1] * B(0, 1) - T[0] * B(1, 1);
+    t = ((s < 0.0) ? -s : s);
+
+    diff = t - (a[0] * Bf(1, 1) + a[1] * Bf(0, 1) +
+        b[0] * Bf(2, 2) + b[2] * Bf(2, 0));
+    if (diff > 0) {
+      sinus2 = 1 - Bf (2,1) * Bf (2,1);
+      if (sinus2 > 1e-6) {
+        squaredLowerBoundDistance = diff * diff / sinus2;
+        if (squaredLowerBoundDistance > breakDistance2) {
+          return true;
+        }
+      }
+    }
+
+    // A2 x B2
+    s = T[1] * B(0, 2) - T[0] * B(1, 2);
+    t = ((s < 0.0) ? -s : s);
+
+    diff = t - (a[0] * Bf(1, 2) + a[1] * Bf(0, 2) +
+        b[0] * Bf(2, 1) + b[1] * Bf(2, 0));
+    if (diff > 0) {
+      sinus2 = 1 - Bf (2,2) * Bf (2,2);
+      if (sinus2 > 1e-6) {
+        squaredLowerBoundDistance = diff * diff / sinus2;
+        if (squaredLowerBoundDistance > breakDistance2) {
+          return true;
+        }
+      }
+    }
+
+    return false;
+
+  }
+
+  // ------------------------ 6 --------------------------------------
+  bool originalWithNoLowerBound (const Matrix3f& B, const Vec3f& T, const Vec3f& a, const Vec3f& b, const FCL_REAL& , FCL_REAL& squaredLowerBoundDistance)
+  {
+    register FCL_REAL t, s;
+    const FCL_REAL reps = 1e-6;
+
+    squaredLowerBoundDistance = 0;
+
+    Matrix3f Bf (B.array().abs() + reps);
+    // Bf += reps;
+
+    // if any of these tests are one-sided, then the polyhedra are disjoint
+
+    // A1 x A2 = A0
+    t = ((T[0] < 0.0) ? -T[0] : T[0]);
+
+    // if(t > (a[0] + Bf.dotX(b)))
+    if(t > (a[0] + Bf.row(0).dot(b)))
+      return true;
+
+    // B1 x B2 = B0
+    // s =  B.transposeDotX(T);
+    s =  B.col(0).dot(T);
+    t = ((s < 0.0) ? -s : s);
+
+    // if(t > (b[0] + Bf.transposeDotX(a)))
+    if(t > (b[0] + Bf.col(0).dot(a)))
+      return true;
+
+    // A2 x A0 = A1
+    t = ((T[1] < 0.0) ? -T[1] : T[1]);
+
+    // if(t > (a[1] + Bf.dotY(b)))
+    if(t > (a[1] + Bf.row(1).dot(b)))
+      return true;
+
+    // A0 x A1 = A2
+    t =((T[2] < 0.0) ? -T[2] : T[2]);
+
+    // if(t > (a[2] + Bf.dotZ(b)))
+    if(t > (a[2] + Bf.row(2).dot(b)))
+      return true;
+
+    // B2 x B0 = B1
+    // s = B.transposeDotY(T);
+    s = B.col(1).dot(T);
+    t = ((s < 0.0) ? -s : s);
+
+    // if(t > (b[1] + Bf.transposeDotY(a)))
+    if(t > (b[1] + Bf.col(1).dot(a)))
+      return true;
+
+    // B0 x B1 = B2
+    // s = B.transposeDotZ(T);
+    s = B.col(2).dot(T);
+    t = ((s < 0.0) ? -s : s);
+
+    // if(t > (b[2] + Bf.transposeDotZ(a)))
+    if(t > (b[2] + Bf.col(2).dot(a)))
+      return true;
+
+    // A0 x B0
+    s = T[2] * B(1, 0) - T[1] * B(2, 0);
+    t = ((s < 0.0) ? -s : s);
+
+    if(t > (a[1] * Bf(2, 0) + a[2] * Bf(1, 0) +
+          b[1] * Bf(0, 2) + b[2] * Bf(0, 1)))
+      return true;
+
+    // A0 x B1
+    s = T[2] * B(1, 1) - T[1] * B(2, 1);
+    t = ((s < 0.0) ? -s : s);
+
+    if(t > (a[1] * Bf(2, 1) + a[2] * Bf(1, 1) +
+          b[0] * Bf(0, 2) + b[2] * Bf(0, 0)))
+      return true;
+
+    // A0 x B2
+    s = T[2] * B(1, 2) - T[1] * B(2, 2);
+    t = ((s < 0.0) ? -s : s);
+
+    if(t > (a[1] * Bf(2, 2) + a[2] * Bf(1, 2) +
+          b[0] * Bf(0, 1) + b[1] * Bf(0, 0)))
+      return true;
+
+    // A1 x B0
+    s = T[0] * B(2, 0) - T[2] * B(0, 0);
+    t = ((s < 0.0) ? -s : s);
+
+    if(t > (a[0] * Bf(2, 0) + a[2] * Bf(0, 0) +
+          b[1] * Bf(1, 2) + b[2] * Bf(1, 1)))
+      return true;
+
+    // A1 x B1
+    s = T[0] * B(2, 1) - T[2] * B(0, 1);
+    t = ((s < 0.0) ? -s : s);
+
+    if(t > (a[0] * Bf(2, 1) + a[2] * Bf(0, 1) +
+          b[0] * Bf(1, 2) + b[2] * Bf(1, 0)))
+      return true;
+
+    // A1 x B2
+    s = T[0] * B(2, 2) - T[2] * B(0, 2);
+    t = ((s < 0.0) ? -s : s);
+
+    if(t > (a[0] * Bf(2, 2) + a[2] * Bf(0, 2) +
+          b[0] * Bf(1, 1) + b[1] * Bf(1, 0)))
+      return true;
+
+    // A2 x B0
+    s = T[1] * B(0, 0) - T[0] * B(1, 0);
+    t = ((s < 0.0) ? -s : s);
+
+    if(t > (a[0] * Bf(1, 0) + a[1] * Bf(0, 0) +
+          b[1] * Bf(2, 2) + b[2] * Bf(2, 1)))
+      return true;
+
+    // A2 x B1
+    s = T[1] * B(0, 1) - T[0] * B(1, 1);
+    t = ((s < 0.0) ? -s : s);
+
+    if(t > (a[0] * Bf(1, 1) + a[1] * Bf(0, 1) +
+          b[0] * Bf(2, 2) + b[2] * Bf(2, 0)))
+      return true;
+
+    // A2 x B2
+    s = T[1] * B(0, 2) - T[0] * B(1, 2);
+    t = ((s < 0.0) ? -s : s);
+
+    if(t > (a[0] * Bf(1, 2) + a[1] * Bf(0, 2) +
+          b[0] * Bf(2, 1) + b[1] * Bf(2, 0)))
+      return true;
+
+    return false;
+
+  }
+}
+
+struct BenchmarkResult
+{
+  /// The test ID:
+  /// - 0-10 identifies a separating axes.
+  /// - 11 means no separatins axes could be found. (distance should be <= 0)
+  int ifId;
+  FCL_REAL distance;
+  FCL_REAL squaredLowerBoundDistance;
+  duration_type duration[NB_METHODS];
+  bool failure;
+
+  static std::ostream& headers (std::ostream& os)
+  {
+    duration_type dummy (1);
+    std::string unit = " (" + boost::chrono::duration_units_default<>().get_unit (boost::chrono::duration_style::symbol, dummy) + ")";
+    os << boost::chrono::symbol_format
+      << "separating axis"
+      << sep << "distance lower bound"
+      << sep << "distance"
+      << sep << "failure"
+      << sep << "Runtime Loop" << unit
+      << sep << "Manual Loop Unrolling 1" << unit
+      << sep << "Manual Loop Unrolling 2" << unit
+      << sep << "Template Unrolling" << unit
+      << sep << "Partial Template Unrolling" << unit
+      << sep << "Original (LowerBound)" << unit
+      << sep << "Original (NoLowerBound)" << unit
+      ;
+    return os;
+  }
+
+  std::ostream& print (std::ostream& os) const
+  {
+    os << ifId
+      << sep << std::sqrt(squaredLowerBoundDistance)
+      << sep << distance
+      << sep << failure;
+    for (int i = 0; i < NB_METHODS; ++i) os << sep << duration[i].count();
+    return os;
+  }
+};
+
+std::ostream& operator<< (std::ostream& os, const BenchmarkResult& br)
+{
+  return br.print (os);
+}
+
+BenchmarkResult benchmark_obb_case (
+    const Matrix3f& B, const Vec3f& T,
+    const Vec3f& a, const Vec3f& b,
+    const CollisionRequest& request,
+    std::size_t N)
+{
+  const FCL_REAL breakDistance (request.break_distance + request.security_margin);
+  const FCL_REAL breakDistance2 = breakDistance * breakDistance;
+
+  BenchmarkResult result;
+  // First determine which axis provide the answer
+  result.ifId = obbDisjoint_impls::separatingAxisId (B, T, a, b, breakDistance2,
+      result.squaredLowerBoundDistance);
+  bool disjoint = obbDisjoint_impls::distance (B, T, a, b, result.distance);
+  assert (0 <= result.ifId && result.ifId <= 11);
+
+  // Sanity check
+  result.failure = true;
+  bool overlap = (result.ifId == 11); 
+  FCL_REAL dist_thr = request.break_distance + request.security_margin;
+  if (       !overlap && result.distance <= 0) {
+    std::cerr << "Failure: negative distance for disjoint OBBs.";
+  } else if (!overlap && result.squaredLowerBoundDistance < 0) {
+    std::cerr << "Failure: negative distance lower bound.";
+  } else if (!overlap && eps < std::sqrt (result.squaredLowerBoundDistance) - result.distance) {
+    std::cerr << "Failure: distance is inferior to its lower bound (diff = " << 
+      std::sqrt (result.squaredLowerBoundDistance) - result.distance << ").";
+  } else if (overlap != !disjoint && result.distance >= dist_thr - eps) {
+    std::cerr << "Failure: overlapping test and distance query mismatch.";
+  } else if (overlap              && result.distance >= dist_thr - eps) {
+    std::cerr << "Failure: positive distance for overlapping OBBs.";
+  } else {
+    result.failure = false;
+  }
+  if (result.failure) {
+    std::cerr
+      << "\nR = " << Quaternion3f(B).coeffs().transpose().format (py_fmt)
+      << "\nT = " << T.transpose().format (py_fmt)
+      << "\na = " << a.transpose().format (py_fmt)
+      << "\nb = " << b.transpose().format (py_fmt)
+      << "\nresult = " << result
+      << '\n' << std::endl;
+  }
+
+  // Compute time
+  FCL_REAL tmp;
+  clock_type::time_point start, end;
+
+  // ------------------------ 0 --------------------------------------
+  start = clock_type::now();
+  for (std::size_t i = 0; i < N; ++i)
+    obbDisjoint_impls::withRuntimeLoop (B, T, a, b, breakDistance2, tmp);
+  end = clock_type::now();
+  result.duration[0] = (end - start) / N;
+
+  // ------------------------ 1 --------------------------------------
+  start = clock_type::now();
+  for (std::size_t i = 0; i < N; ++i)
+    obbDisjoint_impls::withManualLoopUnrolling_1 (B, T, a, b, breakDistance2, tmp);
+  end = clock_type::now();
+  result.duration[1] = (end - start) / N;
+
+  // ------------------------ 2 --------------------------------------
+  start = clock_type::now();
+  for (std::size_t i = 0; i < N; ++i)
+    obbDisjoint_impls::withManualLoopUnrolling_2 (B, T, a, b, breakDistance2, tmp);
+  end = clock_type::now();
+  result.duration[2] = (end - start) / N;
+
+  // ------------------------ 3 --------------------------------------
+  start = clock_type::now();
+  for (std::size_t i = 0; i < N; ++i)
+    obbDisjoint_impls::withTemplateLoopUnrolling_1 (B, T, a, b, breakDistance2, tmp);
+  end = clock_type::now();
+  result.duration[3] = (end - start) / N;
+
+  // ------------------------ 4 --------------------------------------
+  start = clock_type::now();
+  for (std::size_t i = 0; i < N; ++i)
+    obbDisjoint_impls::withPartialTemplateLoopUnrolling_1 (B, T, a, b, breakDistance2, tmp);
+  end = clock_type::now();
+  result.duration[4] = (end - start) / N;
+
+  // ------------------------ 5 --------------------------------------
+  start = clock_type::now();
+  for (std::size_t i = 0; i < N; ++i)
+    obbDisjoint_impls::originalWithLowerBound (B, T, a, b, breakDistance2, tmp);
+  end = clock_type::now();
+  result.duration[5] = (end - start) / N;
+
+  // ------------------------ 6 --------------------------------------
+  start = clock_type::now();
+  // The 2 last arguments are unused.
+  for (std::size_t i = 0; i < N; ++i)
+    obbDisjoint_impls::originalWithNoLowerBound (B, T, a, b, breakDistance2, tmp);
+  end = clock_type::now();
+  result.duration[6] = (end - start) / N;
+
+  return result;
+}
+
+std::size_t obb_overlap_and_lower_bound_distance()
+{
+  std::size_t nbFailure = 0;
+
+  // Create two OBBs axis
+  Vec3f a, b;
+  Matrix3f B;
+  Vec3f T;
+  CollisionRequest request;
+
+  static const size_t nbRandomOBB       = 100;
+  static const size_t nbTransformPerOBB = 100;
+  static const size_t nbRunForTimeMeas  = 1000;
+  static const FCL_REAL extentNorm = 1.;
+
+  std::ostream& output = std::cout;
+  output << BenchmarkResult::headers << '\n';
+  
+  BenchmarkResult result;
+  for (std::size_t iobb = 0; iobb < nbRandomOBB; ++iobb) {
+    randomOBBs (a, b, extentNorm);
+    for (std::size_t itf = 0; itf < nbTransformPerOBB; ++itf) {
+      randomTransform (B, T, a, b, extentNorm);
+      result = benchmark_obb_case (B, T, a, b, request, nbRunForTimeMeas);
+      output << result << '\n';
+      if (result.failure) nbFailure++;
+    }
+  }
+  return nbFailure;
+}
+
+int main ()
+{
+  bool cpuScalingEnabled = checkCpuScalingEnabled();
+  if (cpuScalingEnabled)
+    std::cerr <<
+      "CPU scaling is enabled."
+      "\n\tThe benchmark real time measurements may be noisy and will incur extra overhead."
+      "\n\tUse the following commands to turn on and off."
+      "\n\t\tsudo cpufreq-set --governor performance"
+      "\n\t\tsudo cpufreq-set --governor powersave"
+      "\n"
+      ;
+
+  std::size_t nbFailure = obb_overlap_and_lower_bound_distance();
+  if (nbFailure > INT_MAX) return INT_MAX;
+  return (int)nbFailure;
+}
diff --git a/test/obb.py b/test/obb.py
new file mode 100644
index 0000000000000000000000000000000000000000..1f9141cd74306a72d64ac3229b4f2800e67d9bf1
--- /dev/null
+++ b/test/obb.py
@@ -0,0 +1,110 @@
+import matplotlib.pyplot as plt
+import csv, sys, numpy as np
+from math import sqrt
+
+filename = sys.argv[1] 
+
+with open(filename, 'r') as file:
+    reader = csv.reader (file, strict = True)
+    fieldnames = None
+    #fieldnames = reader.fieldnames
+    for row in reader:
+        if fieldnames is None:
+            fieldnames = [ n.strip() for n in row]
+            values = [ [] for _ in fieldnames ]
+            continue
+
+        values[0].append (int(row[0]))
+        for i, v in enumerate(row[1:]):
+            values[i+1].append (float(v))
+
+# Compute mean and variance for each values, for each separating axis
+means = [ [ 0., ] * 12 for _ in fieldnames[4:] ]
+stddevs = [ [ 0., ] * 12 for _ in fieldnames[4:] ]
+nb_occurence = [ 0, ] * 12
+
+for i, id in enumerate(values[0]):
+    nb_occurence[id] += 1
+
+for i, id in enumerate(values[0]):
+    for k, n in enumerate(fieldnames[4:]):
+        v = values[k+4][i]
+        means  [k][id] += v / nb_occurence[id]
+        stddevs[k][id] += v * v / nb_occurence[id]
+
+for k, n in enumerate(fieldnames[4:]):
+    for id in range(12):
+        #means  [k][id] /= nb_occurence[id]
+        #stddevs[k][id] = sqrt (stddevs[k][id]) / nb_occurence[id] - means[k][id])
+        stddevs[k][id] = sqrt (stddevs[k][id] - means[k][id]*means[k][id])
+
+subplots = False
+Nrows = 1
+Ncols = 3
+iplot = 1
+time_vs_sep_axis = True
+nb_occ_sep_axis = False
+avg_time_vs_impl = True
+
+if time_vs_sep_axis:
+    if subplots: plt.subplot (Nrows, Ncols, iplot)
+    else:        plt.figure (iplot)
+    plt.title ("Time, with std dev, versus separating axis")
+    for k, n in enumerate(fieldnames[4:]):
+        #plt.errorbar ([ np.linspace(0, 11, 12) + shift for shift in np.linspace (-0.2, 0.2, ) ], means[k], stddevs[k], label=n)
+        plt.errorbar (np.linspace(0, 11, 12), means[k], stddevs[k], label=n)
+        # plt.errorbar (np.linspace(0, 11, 12), means[k],  [ [ 0 ] * len(stddevs[k]), stddevs[k] ], label=n)
+    plt.xlim([-0.5,11.5])
+    plt.ylabel('Time (ns)')
+    plt.xlabel('Separating axis')
+    plt.legend(loc='upper left')
+
+    axx = plt.gca().twinx()
+    axx.hist (values[0], bins=[ i-0.5 for i in range(13) ], bottom=-0.5, cumulative=True,
+            rwidth=0.5, fill=False, label='Cumulative occurence')
+    axx.set_ylabel('Nb occurence of a separating axis.')
+    plt.legend(loc='lower right')
+
+iplot += 1
+if nb_occ_sep_axis:
+    if subplots: plt.subplot (Nrows, Ncols, iplot)
+    else:        plt.figure (iplot)
+    plt.title ("Nb of occurence per separating axis")
+    plt.hist (values[0], bins=[ i-0.5 for i in range(13) ])
+    plt.ylabel('Nb occurence')
+    plt.xlabel('Separating axis')
+    dlb_id = 1
+    d_id = 2
+    #plt.title ("Time, with std dev, versus distance")
+    #for k, n in enumerate(fieldnames[4:]):
+        #plt.plot (values[dlb_id], values[k+4], '.', label=n)
+
+iplot += 1
+if avg_time_vs_impl:
+    if subplots: plt.subplot (Nrows, Ncols, iplot)
+    else:        plt.figure (iplot)
+    plt.title ("Average time versus the implementation")
+    #plt.boxplot(values[4:], labels=fieldnames[4:], showmeans=True)
+    _mins = np.min (values[4:], axis=1)
+    _maxs = np.max (values[4:], axis=1)
+    _means = np.mean (values[4:], axis=1)
+    _stddev = np.std (values[4:], axis=1)
+    _sorted = sorted ( zip(fieldnames[4:], _means, _stddev, _mins, _maxs), key=lambda x: x[1])
+    plt.errorbar(
+            [ f for f,m,s,l,u in _sorted],
+            [ m for f,m,s,l,u in _sorted],
+            [ s for f,m,s,l,u in _sorted],
+            fmt='go', linestyle='', label='mean and std deviation')
+    plt.plot (
+            [ f for f,m,s,l,u in _sorted],
+            [ l for f,m,s,l,u in _sorted],
+            'b+', ls='', label='min')
+    plt.plot (
+            [ f for f,m,s,l,u in _sorted],
+            [ u for f,m,s,l,u in _sorted],
+            'r+', ls='', label='max')
+    plt.ylabel('Time (ns)')
+    plt.xticks(rotation=20)
+    plt.legend()
+
+plt.show()
diff --git a/test/test_fcl_octree.cpp b/test/octree.cpp
similarity index 99%
rename from test/test_fcl_octree.cpp
rename to test/octree.cpp
index 40951efe67872bcf486440898a3242d554e4015e..65507fa0aff8223ac63846b4baa37f1dfb806bc8 100644
--- a/test/test_fcl_octree.cpp
+++ b/test/octree.cpp
@@ -43,7 +43,7 @@
 #include <hpp/fcl/collision.h>
 #include <hpp/fcl/distance.h>
 #include <hpp/fcl/shape/geometric_shapes.h>
-#include "test_fcl_utility.h"
+#include "utility.h"
 #include "fcl_resources/config.h"
 #include <boost/filesystem.hpp>
 
diff --git a/test/test_fcl_octree.py b/test/octree.py
similarity index 100%
rename from test/test_fcl_octree.py
rename to test/octree.py
diff --git a/test/test_fcl_profiling.cpp b/test/profiling.cpp
similarity index 97%
rename from test/test_fcl_profiling.cpp
rename to test/profiling.cpp
index 7625f3d214a8e499662103ca2469b8b931f890e1..6bfcfab8de5d28d4386f1d2147b9a6b63cd8647b 100644
--- a/test/test_fcl_profiling.cpp
+++ b/test/profiling.cpp
@@ -26,7 +26,7 @@
 #include <hpp/fcl/collision_func_matrix.h>
 #include <hpp/fcl/narrowphase/narrowphase.h>
 #include <hpp/fcl/mesh_loader/assimp.h>
-#include "test_fcl_utility.h"
+#include "utility.h"
 #include "fcl_resources/config.h"
 
 using namespace hpp::fcl;
@@ -253,8 +253,8 @@ int main(int argc, char** argv)
     geoms.push_back(Geometry ("Capsule"   , new Capsule   (2, 1)                                          ));
     geoms.push_back(Geometry ("Cone"      , new Cone      (2, 1)                                          ));
     geoms.push_back(Geometry ("Cylinder"  , new Cylinder  (2, 1)                                          ));
-    geoms.push_back(Geometry ("Plane"     , new Plane     (Vec3f(1,1,1).normalized(), 0)                  ));
-    geoms.push_back(Geometry ("Halfspace" , new Halfspace (Vec3f(1,1,1).normalized(), 0)                  ));
+    //geoms.push_back(Geometry ("Plane"     , new Plane     (Vec3f(1,1,1).normalized(), 0)                  ));
+    //geoms.push_back(Geometry ("Halfspace" , new Halfspace (Vec3f(1,1,1).normalized(), 0)                  ));
     // not implemented // geoms.push_back(Geometry ("TriangleP" , new TriangleP (Vec3f(0,1,0), Vec3f(0,0,1), Vec3f(-1,0,0))     ));
 
     geoms.push_back(Geometry ("rob BVHModel<OBB>"   , objToGeom<OBB>   ((path / "rob.obj").string())));
diff --git a/test/test_fcl_shape_mesh_consistency.cpp b/test/shape_mesh_consistency.cpp
similarity index 99%
rename from test/test_fcl_shape_mesh_consistency.cpp
rename to test/shape_mesh_consistency.cpp
index f2471bf25d43bdc4659288a6a128b79f78c29041..226f15898e91ddccbabef13a2804f26f9e653920 100644
--- a/test/test_fcl_shape_mesh_consistency.cpp
+++ b/test/shape_mesh_consistency.cpp
@@ -44,7 +44,7 @@
 #include <hpp/fcl/shape/geometric_shape_to_BVH_model.h>
 #include <hpp/fcl/distance.h>
 #include <hpp/fcl/collision.h>
-#include "test_fcl_utility.h"
+#include "utility.h"
 
 
 using namespace hpp::fcl;
diff --git a/test/test_fcl_simple.cpp b/test/simple.cpp
similarity index 100%
rename from test/test_fcl_simple.cpp
rename to test/simple.cpp
diff --git a/test/test_fcl_sphere_capsule.cpp b/test/sphere_capsule.cpp
similarity index 100%
rename from test/test_fcl_sphere_capsule.cpp
rename to test/sphere_capsule.cpp
diff --git a/test/test_fcl_collision.cpp b/test/test_fcl_collision.cpp
deleted file mode 100644
index c3b9000b6b4959ffdc2a382fe0ca726098ae4236..0000000000000000000000000000000000000000
--- a/test/test_fcl_collision.cpp
+++ /dev/null
@@ -1,1054 +0,0 @@
-/*
- * Software License Agreement (BSD License)
- *
- *  Copyright (c) 2011-2014, Willow Garage, Inc.
- *  Copyright (c) 2014-2015, Open Source Robotics Foundation
- *  All rights reserved.
- *
- *  Redistribution and use in source and binary forms, with or without
- *  modification, are permitted provided that the following conditions
- *  are met:
- *
- *   * Redistributions of source code must retain the above copyright
- *     notice, this list of conditions and the following disclaimer.
- *   * Redistributions in binary form must reproduce the above
- *     copyright notice, this list of conditions and the following
- *     disclaimer in the documentation and/or other materials provided
- *     with the distribution.
- *   * Neither the name of Open Source Robotics Foundation nor the names of its
- *     contributors may be used to endorse or promote products derived
- *     from this software without specific prior written permission.
- *
- *  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- *  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- *  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
- *  FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
- *  COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
- *  INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
- *  BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
- *  LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
- *  CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- *  LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
- *  ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- *  POSSIBILITY OF SUCH DAMAGE.
- */
-
-/** \author Jia Pan */
-
-#define BOOST_TEST_MODULE FCL_COLLISION
-#define BOOST_TEST_DYN_LINK
-#include <boost/test/unit_test.hpp>
-#include <boost/utility/binary.hpp>
-
-#include <hpp/fcl/traversal/traversal_node_bvhs.h>
-#include <hpp/fcl/traversal/traversal_node_setup.h>
-#include <hpp/fcl/collision_node.h>
-#include <hpp/fcl/collision.h>
-#include <hpp/fcl/BV/BV.h>
-#include <hpp/fcl/shape/geometric_shapes.h>
-#include <hpp/fcl/narrowphase/narrowphase.h>
-#include "test_fcl_utility.h"
-#include "fcl_resources/config.h"
-#include <boost/filesystem.hpp>
-
-using namespace hpp::fcl;
-
-template<typename BV>
-bool collide_Test(const Transform3f& tf,
-                  const std::vector<Vec3f>& vertices1, const std::vector<Triangle>& triangles1,
-                  const std::vector<Vec3f>& vertices2, const std::vector<Triangle>& triangles2, SplitMethodType split_method, bool verbose = true);
-
-template<typename BV>
-bool collide_Test2(const Transform3f& tf,
-                   const std::vector<Vec3f>& vertices1, const std::vector<Triangle>& triangles1,
-                   const std::vector<Vec3f>& vertices2, const std::vector<Triangle>& triangles2, SplitMethodType split_method, bool verbose = true);
-
-template<typename BV, typename TraversalNode>
-bool collide_Test_Oriented(const Transform3f& tf,
-                           const std::vector<Vec3f>& vertices1, const std::vector<Triangle>& triangles1,
-                           const std::vector<Vec3f>& vertices2, const std::vector<Triangle>& triangles2, SplitMethodType split_method, bool verbose = true);
-
-
-template<typename BV>
-bool test_collide_func(const Transform3f& tf,
-                       const std::vector<Vec3f>& vertices1, const std::vector<Triangle>& triangles1,
-                       const std::vector<Vec3f>& vertices2, const std::vector<Triangle>& triangles2, SplitMethodType split_method);
-
-int num_max_contacts = std::numeric_limits<int>::max();
-
-std::vector<Contact> global_pairs;
-std::vector<Contact> global_pairs_now;
-
-BOOST_AUTO_TEST_CASE(OBB_Box_test)
-{
-  FCL_REAL r_extents[] = {-1000, -1000, -1000, 1000, 1000, 1000};
-  std::vector<Transform3f> rotate_transform;
-  generateRandomTransforms(r_extents, rotate_transform, 1);
-  
-  AABB aabb1;
-  aabb1.min_ = Vec3f(-600, -600, -600);
-  aabb1.max_ = Vec3f(600, 600, 600);
-
-  OBB obb1;
-  convertBV(aabb1, rotate_transform[0], obb1);
-  Box box1;
-  Transform3f box1_tf;
-  constructBox(aabb1, rotate_transform[0], box1, box1_tf);
-
-  FCL_REAL extents[] = {-1000, -1000, -1000, 1000, 1000, 1000};
-  std::size_t n = 1000;
-
-  std::vector<Transform3f> transforms;
-  generateRandomTransforms(extents, transforms, n);
-
-  for(std::size_t i = 0; i < transforms.size(); ++i)
-  {
-    AABB aabb;
-    aabb.min_ = aabb1.min_ * 0.5;
-    aabb.max_ = aabb1.max_ * 0.5;    
-
-    OBB obb2;
-    convertBV(aabb, transforms[i], obb2);
-    
-    Box box2;
-    Transform3f box2_tf;
-    constructBox(aabb, transforms[i], box2, box2_tf);
-
-    GJKSolver solver;
-
-    bool overlap_obb = obb1.overlap(obb2);
-    bool overlap_box = solver.shapeIntersect(box1, box1_tf, box2, box2_tf, NULL, NULL, NULL);
-    
-    BOOST_CHECK(overlap_obb == overlap_box);
-  }
-}
-
-BOOST_AUTO_TEST_CASE(OBB_shape_test)
-{
-  FCL_REAL r_extents[] = {-1000, -1000, -1000, 1000, 1000, 1000};
-  std::vector<Transform3f> rotate_transform;
-  generateRandomTransforms(r_extents, rotate_transform, 1);
-  
-  AABB aabb1;
-  aabb1.min_ = Vec3f(-600, -600, -600);
-  aabb1.max_ = Vec3f(600, 600, 600);
-
-  OBB obb1;
-  convertBV(aabb1, rotate_transform[0], obb1);
-  Box box1;
-  Transform3f box1_tf;
-  constructBox(aabb1, rotate_transform[0], box1, box1_tf);
-
-  FCL_REAL extents[] = {-1000, -1000, -1000, 1000, 1000, 1000};
-  std::size_t n = 1000;
-
-  std::vector<Transform3f> transforms;
-  generateRandomTransforms(extents, transforms, n);
-
-  for(std::size_t i = 0; i < transforms.size(); ++i)
-  {
-    FCL_REAL len = (aabb1.max_[0] - aabb1.min_[0]) * 0.5;
-    OBB obb2;
-    GJKSolver solver;
- 
-    {  
-      Sphere sphere(len);
-      computeBV(sphere, transforms[i], obb2);
- 
-      bool overlap_obb = obb1.overlap(obb2);
-      bool overlap_sphere = solver.shapeIntersect(box1, box1_tf, sphere, transforms[i], NULL, NULL, NULL);
-      BOOST_CHECK(overlap_obb >= overlap_sphere);
-    }
-
-    {
-      Capsule capsule(len, 2 * len);
-      computeBV(capsule, transforms[i], obb2);
-      
-      bool overlap_obb = obb1.overlap(obb2);
-      bool overlap_capsule = solver.shapeIntersect(box1, box1_tf, capsule, transforms[i], NULL, NULL, NULL);
-      BOOST_CHECK(overlap_obb >= overlap_capsule);
-    }
-
-    {
-      Cone cone(len, 2 * len);
-      computeBV(cone, transforms[i], obb2);
-      
-      bool overlap_obb = obb1.overlap(obb2);
-      bool overlap_cone = solver.shapeIntersect(box1, box1_tf, cone, transforms[i], NULL, NULL, NULL);
-      BOOST_CHECK(overlap_obb >= overlap_cone);
-    }
-
-    {
-      Cylinder cylinder(len, 2 * len);
-      computeBV(cylinder, transforms[i], obb2);
-      
-      bool overlap_obb = obb1.overlap(obb2);
-      bool overlap_cylinder = solver.shapeIntersect(box1, box1_tf, cylinder, transforms[i], NULL, NULL, NULL);
-      BOOST_CHECK(overlap_obb >= overlap_cylinder);
-    }
-  }
-}
-
-BOOST_AUTO_TEST_CASE(OBB_AABB_test)
-{
-  FCL_REAL extents[] = {-1000, -1000, -1000, 1000, 1000, 1000};
-  std::size_t n = 1000;
-
-  std::vector<Transform3f> transforms;
-  generateRandomTransforms(extents, transforms, n);
-
-  AABB aabb1;
-  aabb1.min_ = Vec3f(-600, -600, -600);
-  aabb1.max_ = Vec3f(600, 600, 600);
-  
-  OBB obb1;
-  convertBV(aabb1, Transform3f(), obb1);
-  
-  for(std::size_t i = 0; i < transforms.size(); ++i)
-  {
-    AABB aabb;
-    aabb.min_ = aabb1.min_ * 0.5;
-    aabb.max_ = aabb1.max_ * 0.5;    
-
-    AABB aabb2 = translate(aabb, transforms[i].getTranslation());
-    
-    OBB obb2;
-    convertBV(aabb, Transform3f(transforms[i].getTranslation()), obb2);
-
-    bool overlap_aabb = aabb1.overlap(aabb2);
-    bool overlap_obb = obb1.overlap(obb2);
-    if(overlap_aabb != overlap_obb)
-    {
-      std::cout << aabb1.min_ << " " << aabb1.max_ << std::endl;
-      std::cout << aabb2.min_ << " " << aabb2.max_ << std::endl;
-      std::cout << obb1.To << " " << obb1.extent << " " << obb1.axes << std::endl;
-      std::cout << obb2.To << " " << obb2.extent << " " << obb2.axes << std::endl;
-    }
-
-    BOOST_CHECK(overlap_aabb == overlap_obb);
-  }
-  std::cout << std::endl;
-}
-
-// This test
-//  1. load two objects "env.obj" and "rob.obj" from directory
-//     fcl_resources,
-//  2. generates n random transformation and for each of them denote tf,
-//    2.1 performs a collision test where object 1 is in pose tf. All
-//        the contacts are stored in vector global_pairs.
-//    2.2 performs a series of collision tests with the same object and
-//        the same poses using various methods and various types of bounding
-//        volumes. Each time the contacts are stored in vector global_pairs_now.
-//
-// The methods used to test collision are
-//  - collide_Test that calls function collide with tf for object1 pose and
-//      identity for the second object pose,
-//  - collide_Test2 that moves all vertices of object1 in pose tf and that
-//      calls function collide with identity for both object poses,
-//
-BOOST_AUTO_TEST_CASE(mesh_mesh)
-{
-  std::vector<Vec3f> p1, p2;
-  std::vector<Triangle> t1, t2;
-  boost::filesystem::path path(TEST_RESOURCES_DIR);
-  
-  loadOBJFile((path / "env.obj").string().c_str(), p1, t1);
-  loadOBJFile((path / "rob.obj").string().c_str(), p2, t2);
-
-  std::vector<Transform3f> transforms;
-  FCL_REAL extents[] = {-3000, -3000, 0, 3000, 3000, 3000};
-  std::size_t n = 10;
-  bool verbose = false;
-
-  generateRandomTransforms(extents, transforms, n);
-
-  // collision
-  for(std::size_t i = 0; i < transforms.size(); ++i)
-  {
-    Eigen::IOFormat f (Eigen::FullPrecision, 0, ", ", ",", "", "", "(", ")");
-
-    std::cerr
-      << "q1=" << transforms [i].getTranslation ().format (f) << "+" <<
-      transforms [i].getQuatRotation ().coeffs ().format (f) << std::endl;
-    global_pairs.clear();
-    global_pairs_now.clear();
-
-    // First test: constacts are stored in vector global_pairs
-    collide_Test<OBB>(transforms[i], p1, t1, p2, t2, SPLIT_METHOD_MEAN, verbose);
-
-    collide_Test<OBB>(transforms[i], p1, t1, p2, t2, SPLIT_METHOD_BV_CENTER, verbose);
-    assert (global_pairs.size() == global_pairs_now.size());
-    for(std::size_t j = 0; j < global_pairs.size(); ++j)
-    {
-      BOOST_CHECK(global_pairs[j].b1 == global_pairs_now[j].b1);
-      BOOST_CHECK(global_pairs[j].b2 == global_pairs_now[j].b2);
-    }
-
-    collide_Test<OBB>(transforms[i], p1, t1, p2, t2, SPLIT_METHOD_MEDIAN, verbose);
-    assert (global_pairs.size() == global_pairs_now.size());
-    for(std::size_t j = 0; j < global_pairs.size(); ++j)
-    {
-      BOOST_CHECK(global_pairs[j].b1 == global_pairs_now[j].b1);
-      BOOST_CHECK(global_pairs[j].b2 == global_pairs_now[j].b2);
-    }
-
-    collide_Test<RSS>(transforms[i], p1, t1, p2, t2, SPLIT_METHOD_MEAN, verbose);
-    assert (global_pairs.size() == global_pairs_now.size());
-    for(std::size_t j = 0; j < global_pairs.size(); ++j)
-    {
-      BOOST_CHECK(global_pairs[j].b1 == global_pairs_now[j].b1);
-      BOOST_CHECK(global_pairs[j].b2 == global_pairs_now[j].b2);
-    }
-
-    collide_Test<RSS>(transforms[i], p1, t1, p2, t2, SPLIT_METHOD_BV_CENTER, verbose);
-    assert (global_pairs.size() == global_pairs_now.size());
-    for(std::size_t j = 0; j < global_pairs.size(); ++j)
-    {
-      BOOST_CHECK(global_pairs[j].b1 == global_pairs_now[j].b1);
-      BOOST_CHECK(global_pairs[j].b2 == global_pairs_now[j].b2);
-    }
-
-    collide_Test<RSS>(transforms[i], p1, t1, p2, t2, SPLIT_METHOD_MEDIAN, verbose);
-    assert (global_pairs.size() == global_pairs_now.size());
-    for(std::size_t j = 0; j < global_pairs.size(); ++j)
-    {
-      BOOST_CHECK(global_pairs[j].b1 == global_pairs_now[j].b1);
-      BOOST_CHECK(global_pairs[j].b2 == global_pairs_now[j].b2);
-    }
-
-    collide_Test<AABB>(transforms[i], p1, t1, p2, t2, SPLIT_METHOD_MEAN, verbose);
-    assert (global_pairs.size() == global_pairs_now.size());
-    for(std::size_t j = 0; j < global_pairs.size(); ++j)
-    {
-      BOOST_CHECK(global_pairs[j].b1 == global_pairs_now[j].b1);
-      BOOST_CHECK(global_pairs[j].b2 == global_pairs_now[j].b2);
-    }
-
-    collide_Test<AABB>(transforms[i], p1, t1, p2, t2, SPLIT_METHOD_BV_CENTER, verbose);
-    assert (global_pairs.size() == global_pairs_now.size());
-    for(std::size_t j = 0; j < global_pairs.size(); ++j)
-    {
-      BOOST_CHECK(global_pairs[j].b1 == global_pairs_now[j].b1);
-      BOOST_CHECK(global_pairs[j].b2 == global_pairs_now[j].b2);
-    }
-
-    collide_Test<AABB>(transforms[i], p1, t1, p2, t2, SPLIT_METHOD_MEDIAN, verbose);
-    assert (global_pairs.size() == global_pairs_now.size());
-    for(std::size_t j = 0; j < global_pairs.size(); ++j)
-    {
-      BOOST_CHECK(global_pairs[j].b1 == global_pairs_now[j].b1);
-      BOOST_CHECK(global_pairs[j].b2 == global_pairs_now[j].b2);
-    }
-
-    collide_Test<KDOP<24> >(transforms[i], p1, t1, p2, t2, SPLIT_METHOD_MEAN, verbose);
-    assert (global_pairs.size() == global_pairs_now.size());
-    for(std::size_t j = 0; j < global_pairs.size(); ++j)
-    {
-      BOOST_CHECK(global_pairs[j].b1 == global_pairs_now[j].b1);
-      BOOST_CHECK(global_pairs[j].b2 == global_pairs_now[j].b2);
-    }
-
-    collide_Test<KDOP<24> >(transforms[i], p1, t1, p2, t2, SPLIT_METHOD_BV_CENTER, verbose);
-    assert (global_pairs.size() == global_pairs_now.size());
-    for(std::size_t j = 0; j < global_pairs.size(); ++j)
-    {
-      BOOST_CHECK(global_pairs[j].b1 == global_pairs_now[j].b1);
-      BOOST_CHECK(global_pairs[j].b2 == global_pairs_now[j].b2);
-    }
-
-    collide_Test<KDOP<24> >(transforms[i], p1, t1, p2, t2, SPLIT_METHOD_MEDIAN, verbose);
-    assert (global_pairs.size() == global_pairs_now.size());
-    for(std::size_t j = 0; j < global_pairs.size(); ++j)
-    {
-      BOOST_CHECK(global_pairs[j].b1 == global_pairs_now[j].b1);
-      BOOST_CHECK(global_pairs[j].b2 == global_pairs_now[j].b2);
-    }
-
-    collide_Test<KDOP<18> >(transforms[i], p1, t1, p2, t2, SPLIT_METHOD_MEAN, verbose);
-    assert (global_pairs.size() == global_pairs_now.size());
-    for(std::size_t j = 0; j < global_pairs.size(); ++j)
-    {
-      BOOST_CHECK(global_pairs[j].b1 == global_pairs_now[j].b1);
-      BOOST_CHECK(global_pairs[j].b2 == global_pairs_now[j].b2);
-    }
-
-    collide_Test<KDOP<18> >(transforms[i], p1, t1, p2, t2, SPLIT_METHOD_BV_CENTER, verbose);
-    assert (global_pairs.size() == global_pairs_now.size());
-    for(std::size_t j = 0; j < global_pairs.size(); ++j)
-    {
-      BOOST_CHECK(global_pairs[j].b1 == global_pairs_now[j].b1);
-      BOOST_CHECK(global_pairs[j].b2 == global_pairs_now[j].b2);
-    }
-
-    collide_Test<KDOP<18> >(transforms[i], p1, t1, p2, t2, SPLIT_METHOD_MEDIAN, verbose);
-    assert (global_pairs.size() == global_pairs_now.size());
-    for(std::size_t j = 0; j < global_pairs.size(); ++j)
-    {
-      BOOST_CHECK(global_pairs[j].b1 == global_pairs_now[j].b1);
-      BOOST_CHECK(global_pairs[j].b2 == global_pairs_now[j].b2);
-    }
-
-    collide_Test<KDOP<16> >(transforms[i], p1, t1, p2, t2, SPLIT_METHOD_MEAN, verbose);
-    assert (global_pairs.size() == global_pairs_now.size());
-    for(std::size_t j = 0; j < global_pairs.size(); ++j)
-    {
-      BOOST_CHECK(global_pairs[j].b1 == global_pairs_now[j].b1);
-      BOOST_CHECK(global_pairs[j].b2 == global_pairs_now[j].b2);
-    }
-
-    collide_Test<KDOP<16> >(transforms[i], p1, t1, p2, t2, SPLIT_METHOD_BV_CENTER, verbose);
-    assert (global_pairs.size() == global_pairs_now.size());
-    for(std::size_t j = 0; j < global_pairs.size(); ++j)
-    {
-      BOOST_CHECK(global_pairs[j].b1 == global_pairs_now[j].b1);
-      BOOST_CHECK(global_pairs[j].b2 == global_pairs_now[j].b2);
-    }
-
-    collide_Test<KDOP<16> >(transforms[i], p1, t1, p2, t2, SPLIT_METHOD_MEDIAN, verbose);
-    assert (global_pairs.size() == global_pairs_now.size());
-    for(std::size_t j = 0; j < global_pairs.size(); ++j)
-    {
-      BOOST_CHECK(global_pairs[j].b1 == global_pairs_now[j].b1);
-      BOOST_CHECK(global_pairs[j].b2 == global_pairs_now[j].b2);
-    }
-
-    collide_Test2<OBB>(transforms[i], p1, t1, p2, t2, SPLIT_METHOD_MEAN, verbose);
-    assert (global_pairs.size() == global_pairs_now.size());
-    for(std::size_t j = 0; j < global_pairs.size(); ++j)
-    {
-      BOOST_CHECK(global_pairs[j].b1 == global_pairs_now[j].b1);
-      BOOST_CHECK(global_pairs[j].b2 == global_pairs_now[j].b2);
-    }
-
-    collide_Test2<OBB>(transforms[i], p1, t1, p2, t2, SPLIT_METHOD_BV_CENTER, verbose);
-    assert (global_pairs.size() == global_pairs_now.size());
-    for(std::size_t j = 0; j < global_pairs.size(); ++j)
-    {
-      BOOST_CHECK(global_pairs[j].b1 == global_pairs_now[j].b1);
-      BOOST_CHECK(global_pairs[j].b2 == global_pairs_now[j].b2);
-    }
-
-    collide_Test2<OBB>(transforms[i], p1, t1, p2, t2, SPLIT_METHOD_MEDIAN, verbose);
-    assert (global_pairs.size() == global_pairs_now.size());
-    for(std::size_t j = 0; j < global_pairs.size(); ++j)
-    {
-      BOOST_CHECK(global_pairs[j].b1 == global_pairs_now[j].b1);
-      BOOST_CHECK(global_pairs[j].b2 == global_pairs_now[j].b2);
-    }
-
-    collide_Test2<RSS>(transforms[i], p1, t1, p2, t2, SPLIT_METHOD_MEAN, verbose);
-    assert (global_pairs.size() == global_pairs_now.size());
-    for(std::size_t j = 0; j < global_pairs.size(); ++j)
-    {
-      BOOST_CHECK(global_pairs[j].b1 == global_pairs_now[j].b1);
-      BOOST_CHECK(global_pairs[j].b2 == global_pairs_now[j].b2);
-    }
-
-    collide_Test2<RSS>(transforms[i], p1, t1, p2, t2, SPLIT_METHOD_BV_CENTER, verbose);
-    assert (global_pairs.size() == global_pairs_now.size());
-    for(std::size_t j = 0; j < global_pairs.size(); ++j)
-    {
-      BOOST_CHECK(global_pairs[j].b1 == global_pairs_now[j].b1);
-      BOOST_CHECK(global_pairs[j].b2 == global_pairs_now[j].b2);
-    }
-
-    collide_Test2<RSS>(transforms[i], p1, t1, p2, t2, SPLIT_METHOD_MEDIAN, verbose);
-    assert (global_pairs.size() == global_pairs_now.size());
-    for(std::size_t j = 0; j < global_pairs.size(); ++j)
-    {
-      BOOST_CHECK(global_pairs[j].b1 == global_pairs_now[j].b1);
-      BOOST_CHECK(global_pairs[j].b2 == global_pairs_now[j].b2);
-    }
-
-    collide_Test2<AABB>(transforms[i], p1, t1, p2, t2, SPLIT_METHOD_MEAN, verbose);
-    assert (global_pairs.size() == global_pairs_now.size());
-    for(std::size_t j = 0; j < global_pairs.size(); ++j)
-    {
-      BOOST_CHECK(global_pairs[j].b1 == global_pairs_now[j].b1);
-      BOOST_CHECK(global_pairs[j].b2 == global_pairs_now[j].b2);
-    }
-
-    collide_Test2<AABB>(transforms[i], p1, t1, p2, t2, SPLIT_METHOD_BV_CENTER, verbose);
-    assert (global_pairs.size() == global_pairs_now.size());
-    for(std::size_t j = 0; j < global_pairs.size(); ++j)
-    {
-      BOOST_CHECK(global_pairs[j].b1 == global_pairs_now[j].b1);
-      BOOST_CHECK(global_pairs[j].b2 == global_pairs_now[j].b2);
-    }
-
-    collide_Test2<AABB>(transforms[i], p1, t1, p2, t2, SPLIT_METHOD_MEDIAN, verbose);
-    assert (global_pairs.size() == global_pairs_now.size());
-    for(std::size_t j = 0; j < global_pairs.size(); ++j)
-    {
-      BOOST_CHECK(global_pairs[j].b1 == global_pairs_now[j].b1);
-      BOOST_CHECK(global_pairs[j].b2 == global_pairs_now[j].b2);
-    }
-
-    collide_Test2<KDOP<24> >(transforms[i], p1, t1, p2, t2, SPLIT_METHOD_MEAN, verbose);
-    assert (global_pairs.size() == global_pairs_now.size());
-    for(std::size_t j = 0; j < global_pairs.size(); ++j)
-    {
-      BOOST_CHECK(global_pairs[j].b1 == global_pairs_now[j].b1);
-      BOOST_CHECK(global_pairs[j].b2 == global_pairs_now[j].b2);
-    }
-
-    collide_Test2<KDOP<24> >(transforms[i], p1, t1, p2, t2, SPLIT_METHOD_BV_CENTER, verbose);
-    assert (global_pairs.size() == global_pairs_now.size());
-    for(std::size_t j = 0; j < global_pairs.size(); ++j)
-    {
-      BOOST_CHECK(global_pairs[j].b1 == global_pairs_now[j].b1);
-      BOOST_CHECK(global_pairs[j].b2 == global_pairs_now[j].b2);
-    }
-
-    collide_Test2<KDOP<24> >(transforms[i], p1, t1, p2, t2, SPLIT_METHOD_MEDIAN, verbose);
-    assert (global_pairs.size() == global_pairs_now.size());
-    for(std::size_t j = 0; j < global_pairs.size(); ++j)
-    {
-      BOOST_CHECK(global_pairs[j].b1 == global_pairs_now[j].b1);
-      BOOST_CHECK(global_pairs[j].b2 == global_pairs_now[j].b2);
-    }
-
-    collide_Test2<KDOP<18> >(transforms[i], p1, t1, p2, t2, SPLIT_METHOD_MEAN, verbose);
-    assert (global_pairs.size() == global_pairs_now.size());
-    for(std::size_t j = 0; j < global_pairs.size(); ++j)
-    {
-      BOOST_CHECK(global_pairs[j].b1 == global_pairs_now[j].b1);
-      BOOST_CHECK(global_pairs[j].b2 == global_pairs_now[j].b2);
-    }
-
-    collide_Test2<KDOP<18> >(transforms[i], p1, t1, p2, t2, SPLIT_METHOD_BV_CENTER, verbose);
-    assert (global_pairs.size() == global_pairs_now.size());
-    for(std::size_t j = 0; j < global_pairs.size(); ++j)
-    {
-      BOOST_CHECK(global_pairs[j].b1 == global_pairs_now[j].b1);
-      BOOST_CHECK(global_pairs[j].b2 == global_pairs_now[j].b2);
-    }
-
-    collide_Test2<KDOP<18> >(transforms[i], p1, t1, p2, t2, SPLIT_METHOD_MEDIAN, verbose);
-    assert (global_pairs.size() == global_pairs_now.size());
-    for(std::size_t j = 0; j < global_pairs.size(); ++j)
-    {
-      BOOST_CHECK(global_pairs[j].b1 == global_pairs_now[j].b1);
-      BOOST_CHECK(global_pairs[j].b2 == global_pairs_now[j].b2);
-    }
-
-    collide_Test2<KDOP<16> >(transforms[i], p1, t1, p2, t2, SPLIT_METHOD_MEAN, verbose);
-    assert (global_pairs.size() == global_pairs_now.size());
-    for(std::size_t j = 0; j < global_pairs.size(); ++j)
-    {
-      BOOST_CHECK(global_pairs[j].b1 == global_pairs_now[j].b1);
-      BOOST_CHECK(global_pairs[j].b2 == global_pairs_now[j].b2);
-    }
-
-    collide_Test2<KDOP<16> >(transforms[i], p1, t1, p2, t2, SPLIT_METHOD_BV_CENTER, verbose);
-    assert (global_pairs.size() == global_pairs_now.size());
-    for(std::size_t j = 0; j < global_pairs.size(); ++j)
-    {
-      BOOST_CHECK(global_pairs[j].b1 == global_pairs_now[j].b1);
-      BOOST_CHECK(global_pairs[j].b2 == global_pairs_now[j].b2);
-    }
-
-    collide_Test2<KDOP<16> >(transforms[i], p1, t1, p2, t2, SPLIT_METHOD_MEDIAN, verbose);
-    assert (global_pairs.size() == global_pairs_now.size());
-    for(std::size_t j = 0; j < global_pairs.size(); ++j)
-    {
-      BOOST_CHECK(global_pairs[j].b1 == global_pairs_now[j].b1);
-      BOOST_CHECK(global_pairs[j].b2 == global_pairs_now[j].b2);
-    }
-
-    collide_Test_Oriented<OBB, MeshCollisionTraversalNodeOBB>(transforms[i], p1, t1, p2, t2, SPLIT_METHOD_MEAN, verbose);
-
-    assert (global_pairs.size() == global_pairs_now.size());
-    for(std::size_t j = 0; j < global_pairs.size(); ++j)
-    {
-      BOOST_CHECK(global_pairs[j].b1 == global_pairs_now[j].b1);
-      BOOST_CHECK(global_pairs[j].b2 == global_pairs_now[j].b2);
-    }
-
-    collide_Test_Oriented<OBB, MeshCollisionTraversalNodeOBB>(transforms[i], p1, t1, p2, t2, SPLIT_METHOD_BV_CENTER, verbose);
-
-    assert (global_pairs.size() == global_pairs_now.size());
-    for(std::size_t j = 0; j < global_pairs.size(); ++j)
-    {
-      BOOST_CHECK(global_pairs[j].b1 == global_pairs_now[j].b1);
-      BOOST_CHECK(global_pairs[j].b2 == global_pairs_now[j].b2);
-    }
-
-    collide_Test_Oriented<OBB, MeshCollisionTraversalNodeOBB>(transforms[i], p1, t1, p2, t2, SPLIT_METHOD_MEDIAN, verbose);
-    assert (global_pairs.size() == global_pairs_now.size());
-    for(std::size_t j = 0; j < global_pairs.size(); ++j)
-    {
-      BOOST_CHECK(global_pairs[j].b1 == global_pairs_now[j].b1);
-      BOOST_CHECK(global_pairs[j].b2 == global_pairs_now[j].b2);
-    }
-
-    collide_Test_Oriented<RSS, MeshCollisionTraversalNodeRSS>(transforms[i], p1, t1, p2, t2, SPLIT_METHOD_MEAN, verbose);
-    assert (global_pairs.size() == global_pairs_now.size());
-    for(std::size_t j = 0; j < global_pairs.size(); ++j)
-    {
-      BOOST_CHECK(global_pairs[j].b1 == global_pairs_now[j].b1);
-      BOOST_CHECK(global_pairs[j].b2 == global_pairs_now[j].b2);
-    }
-
-    collide_Test_Oriented<RSS, MeshCollisionTraversalNodeRSS>(transforms[i], p1, t1, p2, t2, SPLIT_METHOD_BV_CENTER, verbose);
-    assert (global_pairs.size() == global_pairs_now.size());
-    for(std::size_t j = 0; j < global_pairs.size(); ++j)
-    {
-      BOOST_CHECK(global_pairs[j].b1 == global_pairs_now[j].b1);
-      BOOST_CHECK(global_pairs[j].b2 == global_pairs_now[j].b2);
-    }
-
-    collide_Test_Oriented<RSS, MeshCollisionTraversalNodeRSS>(transforms[i], p1, t1, p2, t2, SPLIT_METHOD_MEDIAN, verbose);
-    assert (global_pairs.size() == global_pairs_now.size());
-    for(std::size_t j = 0; j < global_pairs.size(); ++j)
-    {
-      BOOST_CHECK(global_pairs[j].b1 == global_pairs_now[j].b1);
-      BOOST_CHECK(global_pairs[j].b2 == global_pairs_now[j].b2);
-    }
-
-    test_collide_func<RSS>(transforms[i], p1, t1, p2, t2, SPLIT_METHOD_MEDIAN);
-    assert (global_pairs.size() == global_pairs_now.size());
-    for(std::size_t j = 0; j < global_pairs.size(); ++j)
-    {
-      BOOST_CHECK(global_pairs[j].b1 == global_pairs_now[j].b1);
-      BOOST_CHECK(global_pairs[j].b2 == global_pairs_now[j].b2);
-    }
-
-    test_collide_func<OBB>(transforms[i], p1, t1, p2, t2, SPLIT_METHOD_MEDIAN);
-    assert (global_pairs.size() == global_pairs_now.size());
-    for(std::size_t j = 0; j < global_pairs.size(); ++j)
-    {
-      BOOST_CHECK(global_pairs[j].b1 == global_pairs_now[j].b1);
-      BOOST_CHECK(global_pairs[j].b2 == global_pairs_now[j].b2);
-    }
-
-    test_collide_func<AABB>(transforms[i], p1, t1, p2, t2, SPLIT_METHOD_MEDIAN);
-    assert (global_pairs.size() == global_pairs_now.size());
-    for(std::size_t j = 0; j < global_pairs.size(); ++j)
-    {
-      BOOST_CHECK(global_pairs[j].b1 == global_pairs_now[j].b1);
-      BOOST_CHECK(global_pairs[j].b2 == global_pairs_now[j].b2);
-    }
-
-    
-    collide_Test<kIOS>(transforms[i], p1, t1, p2, t2, SPLIT_METHOD_MEAN, verbose);
-    assert (global_pairs.size() == global_pairs_now.size());
-    for(std::size_t j = 0; j < global_pairs.size(); ++j)
-    {
-      BOOST_CHECK(global_pairs[j].b1 == global_pairs_now[j].b1);
-      BOOST_CHECK(global_pairs[j].b2 == global_pairs_now[j].b2);
-    }
-
-    collide_Test<kIOS>(transforms[i], p1, t1, p2, t2, SPLIT_METHOD_MEDIAN, verbose);
-    assert (global_pairs.size() == global_pairs_now.size());
-    for(std::size_t j = 0; j < global_pairs.size(); ++j)
-    {
-      BOOST_CHECK(global_pairs[j].b1 == global_pairs_now[j].b1);
-      BOOST_CHECK(global_pairs[j].b2 == global_pairs_now[j].b2);
-    }
-
-    collide_Test<kIOS>(transforms[i], p1, t1, p2, t2, SPLIT_METHOD_BV_CENTER, verbose);
-    assert (global_pairs.size() == global_pairs_now.size());
-    for(std::size_t j = 0; j < global_pairs.size(); ++j)
-    {
-      BOOST_CHECK(global_pairs[j].b1 == global_pairs_now[j].b1);
-      BOOST_CHECK(global_pairs[j].b2 == global_pairs_now[j].b2);
-    }
-
-    collide_Test2<kIOS>(transforms[i], p1, t1, p2, t2, SPLIT_METHOD_MEAN, verbose);
-    assert (global_pairs.size() == global_pairs_now.size());
-    for(std::size_t j = 0; j < global_pairs.size(); ++j)
-    {
-      BOOST_CHECK(global_pairs[j].b1 == global_pairs_now[j].b1);
-      BOOST_CHECK(global_pairs[j].b2 == global_pairs_now[j].b2);
-    }
-
-    collide_Test2<kIOS>(transforms[i], p1, t1, p2, t2, SPLIT_METHOD_MEDIAN, verbose);
-    assert (global_pairs.size() == global_pairs_now.size());
-    for(std::size_t j = 0; j < global_pairs.size(); ++j)
-    {
-      BOOST_CHECK(global_pairs[j].b1 == global_pairs_now[j].b1);
-      BOOST_CHECK(global_pairs[j].b2 == global_pairs_now[j].b2);
-    }
-
-    collide_Test2<kIOS>(transforms[i], p1, t1, p2, t2, SPLIT_METHOD_BV_CENTER, verbose);
-    assert (global_pairs.size() == global_pairs_now.size());
-    for(std::size_t j = 0; j < global_pairs.size(); ++j)
-    {
-      BOOST_CHECK(global_pairs[j].b1 == global_pairs_now[j].b1);
-      BOOST_CHECK(global_pairs[j].b2 == global_pairs_now[j].b2);
-    }
-    
-    collide_Test_Oriented<kIOS, MeshCollisionTraversalNodekIOS>(transforms[i], p1, t1, p2, t2, SPLIT_METHOD_MEAN, verbose);
-    assert (global_pairs.size() == global_pairs_now.size());
-    for(std::size_t j = 0; j < global_pairs.size(); ++j)
-    {
-      BOOST_CHECK(global_pairs[j].b1 == global_pairs_now[j].b1);
-      BOOST_CHECK(global_pairs[j].b2 == global_pairs_now[j].b2);
-    }
-
-    collide_Test_Oriented<kIOS, MeshCollisionTraversalNodekIOS>(transforms[i], p1, t1, p2, t2, SPLIT_METHOD_MEDIAN, verbose);
-    assert (global_pairs.size() == global_pairs_now.size());
-    for(std::size_t j = 0; j < global_pairs.size(); ++j)
-    {
-      BOOST_CHECK(global_pairs[j].b1 == global_pairs_now[j].b1);
-      BOOST_CHECK(global_pairs[j].b2 == global_pairs_now[j].b2);
-    }
-
-    collide_Test_Oriented<kIOS, MeshCollisionTraversalNodekIOS>(transforms[i], p1, t1, p2, t2, SPLIT_METHOD_BV_CENTER, verbose);
-    assert (global_pairs.size() == global_pairs_now.size());
-    for(std::size_t j = 0; j < global_pairs.size(); ++j)
-    {
-      BOOST_CHECK(global_pairs[j].b1 == global_pairs_now[j].b1);
-      BOOST_CHECK(global_pairs[j].b2 == global_pairs_now[j].b2);
-    }
-
-    test_collide_func<kIOS>(transforms[i], p1, t1, p2, t2, SPLIT_METHOD_MEAN);
-    assert (global_pairs.size() == global_pairs_now.size());
-    for(std::size_t j = 0; j < global_pairs.size(); ++j)
-    {
-      BOOST_CHECK(global_pairs[j].b1 == global_pairs_now[j].b1);
-      BOOST_CHECK(global_pairs[j].b2 == global_pairs_now[j].b2);
-    }
-
-    test_collide_func<kIOS>(transforms[i], p1, t1, p2, t2, SPLIT_METHOD_MEDIAN);
-    assert (global_pairs.size() == global_pairs_now.size());
-    for(std::size_t j = 0; j < global_pairs.size(); ++j)
-    {
-      BOOST_CHECK(global_pairs[j].b1 == global_pairs_now[j].b1);
-      BOOST_CHECK(global_pairs[j].b2 == global_pairs_now[j].b2);
-    }
-
-    test_collide_func<kIOS>(transforms[i], p1, t1, p2, t2, SPLIT_METHOD_BV_CENTER);
-    assert (global_pairs.size() == global_pairs_now.size());
-    for(std::size_t j = 0; j < global_pairs.size(); ++j)
-    {
-      BOOST_CHECK(global_pairs[j].b1 == global_pairs_now[j].b1);
-      BOOST_CHECK(global_pairs[j].b2 == global_pairs_now[j].b2);
-    }
-
-    collide_Test<OBBRSS>(transforms[i], p1, t1, p2, t2, SPLIT_METHOD_MEAN, verbose);
-    assert (global_pairs.size() == global_pairs_now.size());
-    for(std::size_t j = 0; j < global_pairs.size(); ++j)
-    {
-      BOOST_CHECK(global_pairs[j].b1 == global_pairs_now[j].b1);
-      BOOST_CHECK(global_pairs[j].b2 == global_pairs_now[j].b2);
-    }
-
-    collide_Test<OBBRSS>(transforms[i], p1, t1, p2, t2, SPLIT_METHOD_MEDIAN, verbose);
-    assert (global_pairs.size() == global_pairs_now.size());
-    for(std::size_t j = 0; j < global_pairs.size(); ++j)
-    {
-      BOOST_CHECK(global_pairs[j].b1 == global_pairs_now[j].b1);
-      BOOST_CHECK(global_pairs[j].b2 == global_pairs_now[j].b2);
-    }
-
-    collide_Test<OBBRSS>(transforms[i], p1, t1, p2, t2, SPLIT_METHOD_BV_CENTER, verbose);
-    assert (global_pairs.size() == global_pairs_now.size());
-    for(std::size_t j = 0; j < global_pairs.size(); ++j)
-    {
-      BOOST_CHECK(global_pairs[j].b1 == global_pairs_now[j].b1);
-      BOOST_CHECK(global_pairs[j].b2 == global_pairs_now[j].b2);
-    }
-
-    collide_Test2<OBBRSS>(transforms[i], p1, t1, p2, t2, SPLIT_METHOD_MEAN, verbose);
-    assert (global_pairs.size() == global_pairs_now.size());
-    for(std::size_t j = 0; j < global_pairs.size(); ++j)
-    {
-      BOOST_CHECK(global_pairs[j].b1 == global_pairs_now[j].b1);
-      BOOST_CHECK(global_pairs[j].b2 == global_pairs_now[j].b2);
-    }
-
-    collide_Test2<OBBRSS>(transforms[i], p1, t1, p2, t2, SPLIT_METHOD_MEDIAN, verbose);
-    assert (global_pairs.size() == global_pairs_now.size());
-    for(std::size_t j = 0; j < global_pairs.size(); ++j)
-    {
-      BOOST_CHECK(global_pairs[j].b1 == global_pairs_now[j].b1);
-      BOOST_CHECK(global_pairs[j].b2 == global_pairs_now[j].b2);
-    }
-
-    collide_Test2<OBBRSS>(transforms[i], p1, t1, p2, t2, SPLIT_METHOD_BV_CENTER, verbose);
-    assert (global_pairs.size() == global_pairs_now.size());
-    for(std::size_t j = 0; j < global_pairs.size(); ++j)
-    {
-      BOOST_CHECK(global_pairs[j].b1 == global_pairs_now[j].b1);
-      BOOST_CHECK(global_pairs[j].b2 == global_pairs_now[j].b2);
-    }
-    
-    collide_Test_Oriented<OBBRSS, MeshCollisionTraversalNodeOBBRSS>(transforms[i], p1, t1, p2, t2, SPLIT_METHOD_MEAN, verbose);
-    assert (global_pairs.size() == global_pairs_now.size());
-    for(std::size_t j = 0; j < global_pairs.size(); ++j)
-    {
-      BOOST_CHECK(global_pairs[j].b1 == global_pairs_now[j].b1);
-      BOOST_CHECK(global_pairs[j].b2 == global_pairs_now[j].b2);
-    }
-
-    collide_Test_Oriented<OBBRSS, MeshCollisionTraversalNodeOBBRSS>(transforms[i], p1, t1, p2, t2, SPLIT_METHOD_MEDIAN, verbose);
-    assert (global_pairs.size() == global_pairs_now.size());
-    for(std::size_t j = 0; j < global_pairs.size(); ++j)
-    {
-      BOOST_CHECK(global_pairs[j].b1 == global_pairs_now[j].b1);
-      BOOST_CHECK(global_pairs[j].b2 == global_pairs_now[j].b2);
-    }
-
-    collide_Test_Oriented<OBBRSS, MeshCollisionTraversalNodeOBBRSS>(transforms[i], p1, t1, p2, t2, SPLIT_METHOD_BV_CENTER, verbose);
-    assert (global_pairs.size() == global_pairs_now.size());
-    for(std::size_t j = 0; j < global_pairs.size(); ++j)
-    {
-      BOOST_CHECK(global_pairs[j].b1 == global_pairs_now[j].b1);
-      BOOST_CHECK(global_pairs[j].b2 == global_pairs_now[j].b2);
-    }
-
-    test_collide_func<OBBRSS>(transforms[i], p1, t1, p2, t2, SPLIT_METHOD_MEAN);
-    assert (global_pairs.size() == global_pairs_now.size());
-    for(std::size_t j = 0; j < global_pairs.size(); ++j)
-    {
-      BOOST_CHECK(global_pairs[j].b1 == global_pairs_now[j].b1);
-      BOOST_CHECK(global_pairs[j].b2 == global_pairs_now[j].b2);
-    }
-
-    test_collide_func<OBBRSS>(transforms[i], p1, t1, p2, t2, SPLIT_METHOD_MEDIAN);
-    assert (global_pairs.size() == global_pairs_now.size());
-    for(std::size_t j = 0; j < global_pairs.size(); ++j)
-    {
-      BOOST_CHECK(global_pairs[j].b1 == global_pairs_now[j].b1);
-      BOOST_CHECK(global_pairs[j].b2 == global_pairs_now[j].b2);
-    }
-
-    test_collide_func<OBBRSS>(transforms[i], p1, t1, p2, t2, SPLIT_METHOD_BV_CENTER);
-    assert (global_pairs.size() == global_pairs_now.size());
-    for(std::size_t j = 0; j < global_pairs.size(); ++j)
-    {
-      BOOST_CHECK(global_pairs[j].b1 == global_pairs_now[j].b1);
-      BOOST_CHECK(global_pairs[j].b2 == global_pairs_now[j].b2);
-    }
-  }
-}
-
-
-template<typename BV>
-bool collide_Test2(const Transform3f& tf,
-                   const std::vector<Vec3f>& vertices1, const std::vector<Triangle>& triangles1,
-                   const std::vector<Vec3f>& vertices2, const std::vector<Triangle>& triangles2, SplitMethodType split_method, bool verbose)
-{
-  BVHModel<BV> m1;
-  BVHModel<BV> m2;
-  m1.bv_splitter.reset(new BVSplitter<BV>(split_method));
-  m2.bv_splitter.reset(new BVSplitter<BV>(split_method));
-
-  std::vector<Vec3f> vertices1_new(vertices1.size());
-  for(unsigned int i = 0; i < vertices1_new.size(); ++i)
-  {
-    vertices1_new[i] = tf.transform(vertices1[i]);
-  }
-
-
-  m1.beginModel();
-  m1.addSubModel(vertices1_new, triangles1);
-  m1.endModel();
-
-  m2.beginModel();
-  m2.addSubModel(vertices2, triangles2);
-  m2.endModel();
-
-  Transform3f pose1, pose2;
-
-  CollisionResult local_result;
-  CollisionRequest request (CONTACT, num_max_contacts);
-  MeshCollisionTraversalNode<BV> node (request);
-
-  bool success = initialize <BV> (node, m1, pose1, m2, pose2, local_result);
-  assert (success);
-
-  node.enable_statistics = verbose;
-
-  collide(&node, request, local_result);
-
-  if(local_result.numContacts() > 0)
-  {
-    if(global_pairs.size() == 0)
-    {
-      local_result.getContacts(global_pairs);
-      std::sort(global_pairs.begin(), global_pairs.end());
-    }
-    else
-    {
-      local_result.getContacts(global_pairs_now);
-      std::sort(global_pairs_now.begin(), global_pairs_now.end());
-    }
-
-
-    if(verbose)
-      std::cout << "in collision " << local_result.numContacts() << ": " << std::endl;
-    if(verbose) std::cout << node.num_bv_tests << " " << node.num_leaf_tests << std::endl;
-    return true;
-  }
-  else
-  {
-    if(verbose) std::cout << "collision free " << std::endl;
-    if(verbose) std::cout << node.num_bv_tests << " " << node.num_leaf_tests << std::endl;
-    return false;
-  }
-}
-
-// Test collision between two sets of triangles
-// first object is in pose tf, second object is in pose identity.
-template<typename BV>
-bool collide_Test(const Transform3f& tf,
-                  const std::vector<Vec3f>& vertices1, const std::vector<Triangle>& triangles1,
-                  const std::vector<Vec3f>& vertices2, const std::vector<Triangle>& triangles2, SplitMethodType split_method, bool verbose)
-{
-  BVHModel<BV> m1;
-  BVHModel<BV> m2;
-  m1.bv_splitter.reset(new BVSplitter<BV>(split_method));
-  m2.bv_splitter.reset(new BVSplitter<BV>(split_method));
-
-  m1.beginModel();
-  m1.addSubModel(vertices1, triangles1);
-  m1.endModel();
-
-  m2.beginModel();
-  m2.addSubModel(vertices2, triangles2);
-  m2.endModel();
-
-  Transform3f pose1(tf), pose2;
-
-  CollisionResult local_result;
-  CollisionRequest request (CONTACT, num_max_contacts);
-  MeshCollisionTraversalNode<BV> node (request);
-
-  bool success = initialize <BV> (node, m1, pose1, m2, pose2, local_result);
-  assert (success);
-
-  node.enable_statistics = verbose;
-
-  collide(&node, request, local_result);
-
-  if(local_result.numContacts() > 0)
-  {
-    if(global_pairs.size() == 0)
-    {
-      local_result.getContacts(global_pairs);
-      std::sort(global_pairs.begin(), global_pairs.end());
-    }
-    else
-    {
-      local_result.getContacts(global_pairs_now);
-      std::sort(global_pairs_now.begin(), global_pairs_now.end());
-    }
-
-    if(verbose)
-      std::cout << "in collision " << local_result.numContacts() << ": " << std::endl;
-    if(verbose) std::cout << node.num_bv_tests << " " << node.num_leaf_tests << std::endl;
-    return true;
-  }
-  else
-  {
-    if(verbose) std::cout << "collision free " << std::endl;
-    if(verbose) std::cout << node.num_bv_tests << " " << node.num_leaf_tests << std::endl;
-    return false;
-  }
-}
-
-// This function is the same as collide_Test, except that the type of traversal
-// node is chosen via template argument.
-template<typename BV, typename TraversalNode>
-bool collide_Test_Oriented(const Transform3f& tf,
-                           const std::vector<Vec3f>& vertices1, const std::vector<Triangle>& triangles1,
-                           const std::vector<Vec3f>& vertices2, const std::vector<Triangle>& triangles2, SplitMethodType split_method, bool verbose)
-{
-  BVHModel<BV> m1;
-  BVHModel<BV> m2;
-  m1.bv_splitter.reset(new BVSplitter<BV>(split_method));
-  m2.bv_splitter.reset(new BVSplitter<BV>(split_method));
-
-  m1.beginModel();
-  m1.addSubModel(vertices1, triangles1);
-  m1.endModel();
-
-  m2.beginModel();
-  m2.addSubModel(vertices2, triangles2);
-  m2.endModel();
-
-  Transform3f pose1(tf), pose2;
-
-  CollisionResult local_result;
-  CollisionRequest request (CONTACT, num_max_contacts);
-  TraversalNode node (request);
-  bool success = initialize (node, (const BVHModel<BV>&)m1, pose1,
-                             (const BVHModel<BV>&)m2, pose2, local_result);
-  assert (success);
-
-  node.enable_statistics = verbose;
-
-  collide(&node, request, local_result);
-
-  if(local_result.numContacts() > 0)
-  {
-    if(global_pairs.size() == 0)
-    {
-      local_result.getContacts(global_pairs);
-      std::sort(global_pairs.begin(), global_pairs.end());
-    }
-    else
-    {
-      local_result.getContacts(global_pairs_now);
-      std::sort(global_pairs_now.begin(), global_pairs_now.end());
-    }
-
-    if(verbose)
-      std::cout << "in collision " << local_result.numContacts() << ": " << std::endl;
-    if(verbose) std::cout << node.num_bv_tests << " " << node.num_leaf_tests << std::endl;
-    return true;
-  }
-  else
-  {
-    if(verbose) std::cout << "collision free " << std::endl;
-    if(verbose) std::cout << node.num_bv_tests << " " << node.num_leaf_tests << std::endl;
-    return false;
-  }
-}
-
-
-template<typename BV>
-bool test_collide_func(const Transform3f& tf,
-                       const std::vector<Vec3f>& vertices1, const std::vector<Triangle>& triangles1,
-                       const std::vector<Vec3f>& vertices2, const std::vector<Triangle>& triangles2, SplitMethodType split_method)
-{
-  BVHModel<BV> m1;
-  BVHModel<BV> m2;
-  m1.bv_splitter.reset(new BVSplitter<BV>(split_method));
-  m2.bv_splitter.reset(new BVSplitter<BV>(split_method));
-
-  m1.beginModel();
-  m1.addSubModel(vertices1, triangles1);
-  m1.endModel();
-
-  m2.beginModel();
-  m2.addSubModel(vertices2, triangles2);
-  m2.endModel();
-
-  Transform3f pose1(tf), pose2;
-
-  std::vector<Contact> contacts;
-
-  CollisionRequest request (CONTACT, num_max_contacts);
-  CollisionResult result;
-  int num_contacts = collide(&m1, pose1, &m2, pose2, 
-                             request, result);
-	
-  result.getContacts(contacts);
-
-  global_pairs_now.resize(num_contacts);
-
-  for(int i = 0; i < num_contacts; ++i)
-  {
-    global_pairs_now[i].b1 = contacts[i].b1;
-    global_pairs_now[i].b2 = contacts[i].b2;
-  }
-
-  std::sort(global_pairs_now.begin(), global_pairs_now.end());
-
-  if(num_contacts > 0) return true;
-  else return false;
-}
diff --git a/test/test_fcl_obb.cpp b/test/test_fcl_obb.cpp
deleted file mode 100644
index 279d08b66ffe3aac075389197ff2145ecd1ed231..0000000000000000000000000000000000000000
--- a/test/test_fcl_obb.cpp
+++ /dev/null
@@ -1,198 +0,0 @@
-/*
- * Software License Agreement (BSD License)
- *
- *  Copyright (c) 2014-2016, CNRS-LAAS
- *  Author: Florent Lamiraux
- *  All rights reserved.
- *
- *  Redistribution and use in source and binary forms, with or without
- *  modification, are permitted provided that the following conditions
- *  are met:
- *
- *   * Redistributions of source code must retain the above copyright
- *     notice, this list of conditions and the following disclaimer.
- *   * Redistributions in binary form must reproduce the above
- *     copyright notice, this list of conditions and the following
- *     disclaimer in the documentation and/or other materials provided
- *     with the distribution.
- *   * Neither the name of CNRS nor the names of its
- *     contributors may be used to endorse or promote products derived
- *     from this software without specific prior written permission.
- *
- *  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- *  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- *  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
- *  FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
- *  COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
- *  INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
- *  BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
- *  LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
- *  CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- *  LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
- *  ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- *  POSSIBILITY OF SUCH DAMAGE.
- */
-
-#define BOOST_TEST_MODULE FCL_DISTANCE_OBB
-#define BOOST_TEST_DYN_LINK
-#include <boost/test/unit_test.hpp>
-#include <boost/utility/binary.hpp>
-
-#define CHECK_CLOSE_TO_0(x, eps) BOOST_CHECK_CLOSE ((x + 1.0), (1.0), (eps))
-
-#include <boost/test/included/unit_test.hpp>
-#include <hpp/fcl/narrowphase/narrowphase.h>
-
-#include "../src/BV/OBB.h"
-#include "../src/distance_func_matrix.h"
-
-using hpp::fcl::FCL_REAL;
-
-struct Sample
-{
-  hpp::fcl::Matrix3f R;
-  hpp::fcl::Vec3f T;
-  hpp::fcl::Vec3f extent1;
-  hpp::fcl::Vec3f extent2;
-};
-
-struct Result
-{
-  bool overlap;
-  FCL_REAL distance;
-};
-
-BOOST_AUTO_TEST_CASE(obb_overlap)
-{
-  static const size_t nbSamples = 10000;
-  Sample sample [nbSamples];
-  FCL_REAL range = 2;
-  FCL_REAL sumDistance = 0, sumDistanceLowerBound = 0;
-  for (std::size_t i=0; i<nbSamples; ++i) {
-    // sample unit quaternion
-    FCL_REAL u1 = (FCL_REAL) rand() / RAND_MAX;
-    FCL_REAL u2 = (FCL_REAL) rand() / RAND_MAX;
-    FCL_REAL u3 = (FCL_REAL) rand() / RAND_MAX;
-
-    FCL_REAL q1 = sqrt (1-u1)*sin(2*M_PI*u2);
-    FCL_REAL q2 = sqrt (1-u1)*cos(2*M_PI*u2);
-    FCL_REAL q3 = sqrt (u1) * sin(2*M_PI*u3);
-    FCL_REAL q4 = sqrt (u1) * cos(2*M_PI*u3);
-    hpp::fcl::Quaternion3f (q1, q2, q3, q4).toRotation (sample [i].R);
-
-    // sample translation
-    sample [i].T = hpp::fcl::Vec3f ((FCL_REAL) range * rand() / RAND_MAX,
-			       (FCL_REAL) range * rand() / RAND_MAX,
-			       (FCL_REAL) range * rand() / RAND_MAX);
-
-    // sample extents
-    sample [i].extent1 = hpp::fcl::Vec3f ((FCL_REAL) rand() / RAND_MAX,
-				     (FCL_REAL) rand() / RAND_MAX,
-				     (FCL_REAL) rand() / RAND_MAX);
-
-    sample [i].extent2 = hpp::fcl::Vec3f ((FCL_REAL) rand() / RAND_MAX,
-				     (FCL_REAL) rand() / RAND_MAX,
-				     (FCL_REAL) rand() / RAND_MAX);
-  }
-
-  // Compute result for each function
-  Result resultDistance [nbSamples];
-  Result resultDisjoint [nbSamples];
-  Result resultDisjointAndLowerBound [nbSamples];
-
-  hpp::fcl::Transform3f tf1;
-  hpp::fcl::Transform3f tf2;
-  hpp::fcl::Box box1 (0, 0, 0);
-  hpp::fcl::Box box2 (0, 0, 0);
-  hpp::fcl::DistanceRequest request (false, 0, 0, hpp::fcl::GST_INDEP);
-  hpp::fcl::DistanceResult result;
-  FCL_REAL distance;
-  FCL_REAL squaredDistance;
-  timeval t0, t1;
-  hpp::fcl::GJKSolver gjkSolver;
-
-  // ShapeShapeDistance
-  gettimeofday (&t0, NULL);
-  for (std::size_t i=0; i<nbSamples; ++i) {
-    box1.side = 2*sample [i].extent1;
-    box2.side = 2*sample [i].extent2;
-    tf2.setTransform (sample [i].R, sample [i].T);
-    resultDistance [i].distance =
-      hpp::fcl::ShapeShapeDistance<hpp::fcl::Box, hpp::fcl::Box, hpp::fcl::GJKSolver>
-      (&box1, tf1, &box2, tf2, &gjkSolver, request, result);
-    resultDistance [i].overlap = (resultDistance [i].distance < 0);
-    if (resultDistance [i].distance < 0) {
-      resultDistance [i].distance = 0;
-    }
-    result.clear ();
-  }    
-  gettimeofday (&t1, NULL);
-  double t = (t1.tv_sec - t0.tv_sec) + 1e-6*(t1.tv_usec - t0.tv_usec + 0.);
-  std::cout << "Time for " << nbSamples
-	    << " calls to ShapeShapeDistance (in seconds): "
-	    << t << std::endl;
-
-  // obbDisjointAndLowerBoundDistance
-  gettimeofday (&t0, NULL);
-  for (std::size_t i=0; i<nbSamples; ++i) {
-    resultDisjointAndLowerBound [i].overlap =
-      !obbDisjointAndLowerBoundDistance (sample [i].R, sample [i].T,
-					 sample [i].extent1,
-					 sample [i].extent2,
-					 squaredDistance);
-    resultDisjointAndLowerBound [i].distance = sqrt (squaredDistance);
-  }    
-  gettimeofday (&t1, NULL);
-  t = (t1.tv_sec - t0.tv_sec) + 1e-6*(t1.tv_usec - t0.tv_usec + 0.);
-  std::cout << "Time for " << nbSamples
-	    << " calls to obbDisjointAndLowerBoundDistance"
-	    << " (in seconds): "
-	    << t << std::endl;
-
-  gettimeofday (&t0, NULL);
-  for (std::size_t i=0; i<nbSamples; ++i) {
-    resultDisjoint [i].overlap =
-      !obbDisjoint (sample [i].R, sample [i].T,
-		    sample [i].extent1,
-		    sample [i].extent2);
-    resultDisjoint [i].distance = 0;
-  }    
-  gettimeofday (&t1, NULL);
-  t = (t1.tv_sec - t0.tv_sec) + 1e-6*(t1.tv_usec - t0.tv_usec + 0.);
-  std::cout << "Time for " << nbSamples << " calls to obbDisjoint"
-	    << " (in seconds): "
-	    << t << std::endl;
-
-  // Test correctness of results
-  bool res1, res2, res3;
-  for (std::size_t i=0; i<nbSamples; ++i) {
-    res1 = resultDisjoint [i].overlap;
-    res2 = resultDisjointAndLowerBound [i].overlap;
-    res3 = resultDistance [i].overlap;
-    sumDistanceLowerBound += resultDisjointAndLowerBound [i].distance;
-    sumDistance += resultDistance [i].distance;
-
-#if 0
-    std::cout << "ShapeShapeDistance:    overlap: "
-	      << resultDistance [i].overlap
-	      << ", distance: " << resultDistance [i].distance << std::endl;
-    std::cout << "disjoint:              overlap: "
-	      << resultDisjoint [i].overlap
-	      << ", distance: " << resultDisjoint [i].distance << std::endl;
-    std::cout << "disjointAndLowerBound: overlap: "
-	      << resultDisjointAndLowerBound [i].overlap
-	      << ", distance: " << resultDisjointAndLowerBound [i].distance
-	      << std::endl << std::endl;
-#endif
-    BOOST_CHECK (res1 == res2);
-    BOOST_CHECK (res1 == res3);
-    BOOST_CHECK ((!res1 && resultDisjointAndLowerBound [i].distance > 0) ||
-		 (res1 && resultDisjointAndLowerBound [i].distance == 0));
-    BOOST_CHECK (resultDisjointAndLowerBound [i].distance <=
-		 resultDistance [i].distance);
-  }
-  std::cout << "Sum distances ShapeShapeDistance:               "
-	    << sumDistance << std::endl;
-  std::cout << "Sum distances obbDisjointAndLowerBoundDistance: "
-	    << sumDistanceLowerBound << std::endl;
-}
diff --git a/test/test_fcl_utility.cpp b/test/utility.cpp
similarity index 99%
rename from test/test_fcl_utility.cpp
rename to test/utility.cpp
index 9a22a9dffeac0c854e64a551ed151c1529a71e98..f5708d948c5b9970b56a7c38b9be2a4b0ca1b048 100644
--- a/test/test_fcl_utility.cpp
+++ b/test/utility.cpp
@@ -1,4 +1,4 @@
-#include "test_fcl_utility.h"
+#include "utility.h"
 #include <hpp/fcl/collision.h>
 #include <hpp/fcl/distance.h>
 #include <cstdio>
diff --git a/test/test_fcl_utility.h b/test/utility.h
similarity index 100%
rename from test/test_fcl_utility.h
rename to test/utility.h