Merge pull request #284 from Loop3D/fix/speed-up

lachlangrose · web-flow · commit 28f25a6c3d0f · 2026-04-14T21:04:26.000+09:30
Fix/speed up
diff --git a/LoopStructural/interpolators/_discrete_interpolator.py b/LoopStructural/interpolators/_discrete_interpolator.py
@@ -65,6 +65,7 @@ def __init__(self, support, data={}, c=None, up_to_date=False):
         self.apply_scaling_matrix = True
         self.add_ridge_regulatisation = True
         self.ridge_factor = 1e-8
+
     def set_nelements(self, nelements: int) -> int:
         return self.support.set_nelements(nelements)
 
@@ -247,11 +248,11 @@ def add_constraints_to_least_squares(self, A, B, idc, w=1.0, name="undefined"):
 
         rows = np.tile(rows, (A.shape[-1], 1)).T
         self.constraints[name] = {
-            'matrix': sparse.coo_matrix(
+            "matrix": sparse.coo_matrix(
                 (A.flatten(), (rows.flatten(), idc.flatten())), shape=(n_rows, self.dof)
             ).tocsc(),
-            'b': B.flatten(),
-            'w': w,
+            "b": B.flatten(),
+            "w": w,
         }
 
     @abstractmethod
@@ -305,7 +306,7 @@ def add_inequality_constraints_to_matrix(
         rows = np.tile(rows, (A.shape[-1], 1)).T
 
         self.ineq_constraints[name] = {
-            'matrix': sparse.coo_matrix(
+            "matrix": sparse.coo_matrix(
                 (A.flatten(), (rows.flatten(), idc.flatten())), shape=(rows.shape[0], self.dof)
             ).tocsc(),
             "bounds": bounds,
@@ -320,7 +321,7 @@ def add_value_inequality_constraints(self, w: float = 1.0):
             rows = np.tile(rows, (a.shape[-1], 1)).T
             a = a[inside]
             cols = self.support.elements[element[inside]]
-            self.add_inequality_constraints_to_matrix(a, points[:, 3:5], cols, 'inequality_value')
+            self.add_inequality_constraints_to_matrix(a, points[:, 3:5], cols, "inequality_value")
 
     def add_inequality_pairs_constraints(
         self,
@@ -354,11 +355,11 @@ def add_inequality_pairs_constraints(
                 lower_interpolation = self.support.get_element_for_location(lower_points)
                 if (~upper_interpolation[3]).sum() > 0:
                     logger.warning(
-                        f'Upper points not in mesh {upper_points[~upper_interpolation[3]]}'
+                        f"Upper points not in mesh {upper_points[~upper_interpolation[3]]}"
                     )
                 if (~lower_interpolation[3]).sum() > 0:
                     logger.warning(
-                        f'Lower points not in mesh {lower_points[~lower_interpolation[3]]}'
+                        f"Lower points not in mesh {lower_points[~lower_interpolation[3]]}"
                     )
                 ij = np.array(
                     [
@@ -392,7 +393,7 @@ def add_inequality_pairs_constraints(
                 bounds[:, 1] = upper_bound
 
                 self.add_inequality_constraints_to_matrix(
-                    a, bounds, cols, f'inequality_pairs_{pair[0]}_{pair[1]}'
+                    a, bounds, cols, f"inequality_pairs_{pair[0]}_{pair[1]}"
                 )
 
     def add_inequality_feature(
@@ -506,13 +507,14 @@ def build_matrix(self):
         for c in self.constraints.values():
             if len(c["w"]) == 0:
                 continue
-            mats.append(c['matrix'].multiply(c['w'][:, None]))
-            bs.append(c['b'] * c['w'])
+            mats.append(c["matrix"].multiply(c["w"][:, None]))
+            bs.append(c["b"] * c["w"])
         A = sparse.vstack(mats)
         logger.info(f"Interpolation matrix is {A.shape[0]} x {A.shape[1]}")
 
         B = np.hstack(bs)
         return A, B
+
     def compute_column_scaling_matrix(self, A: sparse.csr_matrix) -> sparse.dia_matrix:
         """Compute column scaling matrix S for matrix A so that A @ S has columns with unit norm.
 
@@ -576,8 +578,8 @@ def build_inequality_matrix(self):
         mats = []
         bounds = []
         for c in self.ineq_constraints.values():
-            mats.append(c['matrix'])
-            bounds.append(c['bounds'])
+            mats.append(c["matrix"])
+            bounds.append(c["bounds"])
         if len(mats) == 0:
             return sparse.csr_matrix((0, self.dof), dtype=float), np.zeros((0, 3))
         Q = sparse.vstack(mats)
@@ -623,40 +625,40 @@ def solve_system(
 
         Q, bounds = self.build_inequality_matrix()
         if callable(solver):
-            logger.warning('Using custom solver')
+            logger.warning("Using custom solver")
             self.c = solver(A.tocsr(), b)
             self.up_to_date = True
         elif isinstance(solver, str) or solver is None:
-            if solver not in ['cg', 'lsmr', 'admm']:
+            if solver not in ["cg", "lsmr", "admm"]:
                 logger.warning(
-                    f'Unknown solver {solver} using cg. \n Available solvers are cg and lsmr or a custom solver as a callable function'
+                    f"Unknown solver {solver} using cg. \n Available solvers are cg and lsmr or a custom solver as a callable function"
                 )
-                solver = 'cg'
-        if solver == 'cg':
+                solver = "cg"
+        if solver == "cg":
             logger.info("Solving using cg")
-            if 'atol' not in solver_kwargs or 'rtol' not in solver_kwargs:
+            if "atol" not in solver_kwargs or "rtol" not in solver_kwargs:
                 if tol is not None:
-                    solver_kwargs['atol'] = tol
+                    solver_kwargs["atol"] = tol
 
             logger.info(f"Solver kwargs: {solver_kwargs}")
 
             res = sparse.linalg.cg(A.T @ A, A.T @ b, **solver_kwargs)
             if res[1] > 0:
                 logger.warning(
-                    f'CG reached iteration limit ({res[1]})and did not converge, check input data. Setting solution to last iteration'
+                    f"CG reached iteration limit ({res[1]})and did not converge, check input data. Setting solution to last iteration"
                 )
             self.c = res[0]
             self.up_to_date = True
 
-        elif solver == 'lsmr':
+        elif solver == "lsmr":
             logger.info("Solving using lsmr")
             # if 'atol' not in solver_kwargs:
             #     if tol is not None:
             #         solver_kwargs['atol'] = tol
-            if 'btol' not in solver_kwargs:
+            if "btol" not in solver_kwargs:
                 if tol is not None:
-                    solver_kwargs['btol'] = tol
-                    solver_kwargs['atol'] = 0.
+                    solver_kwargs["btol"] = tol
+                    solver_kwargs["atol"] = 0.0
                     logger.info(f"Setting lsmr btol to {tol}")
             logger.info(f"Solver kwargs: {solver_kwargs}")
             res = sparse.linalg.lsmr(A, b, **solver_kwargs)
@@ -674,31 +676,31 @@ def solve_system(
                 self.c = res[0]
             self.up_to_date = True
 
-        elif solver == 'admm':
+        elif solver == "admm":
             logger.info("Solving using admm")
 
-            if 'x0' in solver_kwargs:
-                x0 = solver_kwargs['x0'](self.support)
+            if "x0" in solver_kwargs:
+                x0 = solver_kwargs["x0"](self.support)
             else:
                 x0 = np.zeros(A.shape[1])
-            solver_kwargs.pop('x0', None)
+            solver_kwargs.pop("x0", None)
             if Q is None:
                 logger.warning("No inequality constraints, using lsmr")
-                return self.solve_system('lsmr', solver_kwargs)
+                return self.solve_system("lsmr", solver_kwargs=solver_kwargs)
 
             try:
                 from loopsolver import admm_solve
 
                 try:
-                    linsys_solver = solver_kwargs.pop('linsys_solver', 'lsmr')
+                    linsys_solver = solver_kwargs.pop("linsys_solver", "lsmr")
                     res = admm_solve(
                         A,
                         b,
                         Q,
                         bounds,
                         x0=x0,
-                        admm_weight=solver_kwargs.pop('admm_weight', 0.01),
-                        nmajor=solver_kwargs.pop('nmajor', 200),
+                        admm_weight=solver_kwargs.pop("admm_weight", 0.01),
+                        nmajor=solver_kwargs.pop("nmajor", 200),
                         linsys_solver_kwargs=solver_kwargs,
                         linsys_solver=linsys_solver,
                     )
@@ -756,12 +758,7 @@ def evaluate_value(self, locations: np.ndarray) -> np.ndarray:
         """
         self.update()
         evaluation_points = np.array(locations)
-        evaluated = np.zeros(evaluation_points.shape[0])
-        mask = np.any(evaluation_points == np.nan, axis=1)
-
-        if evaluation_points[~mask, :].shape[0] > 0:
-            evaluated[~mask] = self.support.evaluate_value(evaluation_points[~mask], self.c)
-        return evaluated
+        return self.support.evaluate_value(evaluation_points, self.c)
 
     def evaluate_gradient(self, locations: np.ndarray) -> np.ndarray:
         """
@@ -792,4 +789,4 @@ def to_dict(self):
     def vtk(self):
         if self.up_to_date is False:
             self.update()
-        return self.support.vtk({'c': self.c})
+        return self.support.vtk({"c": self.c})
diff --git a/LoopStructural/interpolators/supports/_3d_base_structured.py b/LoopStructural/interpolators/supports/_3d_base_structured.py
@@ -285,10 +285,8 @@ def position_to_cell_global_index(self, pos):
 
     def inside(self, pos):
         # check whether point is inside box
-        inside = np.ones(pos.shape[0]).astype(bool)
-        for i in range(3):
-            inside *= pos[:, i] > self.origin[None, i]
-            inside *= pos[:, i] < self.maximum[None, i]
+        pos = self.check_position(pos)
+        inside = np.all((pos > self.origin) & (pos < self.maximum), axis=1)
         return inside
 
     def check_position(self, pos: np.ndarray) -> np.ndarray:
@@ -306,11 +304,21 @@ def check_position(self, pos: np.ndarray) -> np.ndarray:
         [type]
             [description]
         """
-        pos = np.array(pos)
+        if not isinstance(pos, np.ndarray):
+            try:
+                pos = np.array(pos, dtype=float)
+            except Exception as e:
+                logger.error(
+                    f"Position array should be a numpy array or list of points, not {type(pos)}"
+                )
+                raise ValueError(
+                    f"Position array should be a numpy array or list of points, not {type(pos)}"
+                ) from e
+
         if len(pos.shape) == 1:
             pos = np.array([pos])
         if len(pos.shape) != 2:
-            print("Position array needs to be a list of points or a point")
+            logger.error("Position array needs to be a list of points or a point")
             raise ValueError("Position array needs to be a list of points or a point")
         return pos
 
@@ -379,20 +387,24 @@ def position_to_cell_corners(self, pos):
         ----------
         pos : np.array
             (N, 3) array of xyz coordinates representing the positions of N points.
-    
+
         Returns
         -------
         globalidx : np.array
-            (N, 8) array of global indices corresponding to the 8 corner nodes of the cell 
-            each point lies in. If a point lies outside the support, its corresponding entry 
+            (N, 8) array of global indices corresponding to the 8 corner nodes of the cell
+            each point lies in. If a point lies outside the support, its corresponding entry
             will be set to -1.
         inside : np.array
             (N,) boolean array indicating whether each point is inside the support domain.
         """
         cell_indexes, inside = self.position_to_cell_index(pos)
-        corner_indexes = self.cell_corner_indexes(cell_indexes)
-        globalidx = self.global_node_indices(corner_indexes)
-        # if global index is not inside the support set to -1
+        nx, ny = self.nsteps[0], self.nsteps[1]
+        offsets = np.array(
+            [0, 1, nx, nx + 1, nx * ny, nx * ny + 1, nx * ny + nx, nx * ny + nx + 1],
+            dtype=np.intp,
+        )
+        g = cell_indexes[:, 0] + nx * cell_indexes[:, 1] + nx * ny * cell_indexes[:, 2]
+        globalidx = g[:, None] + offsets[None, :]  # (N, 8)
         globalidx[~inside] = -1
         return globalidx, inside
 
diff --git a/LoopStructural/modelling/features/_lambda_geological_feature.py b/LoopStructural/modelling/features/_lambda_geological_feature.py
@@ -66,8 +66,25 @@ def evaluate_value(self, pos: np.ndarray, ignore_regions=False) -> np.ndarray:
         v = np.zeros((pos.shape[0]))
         v[:] = np.nan
 
-        mask = self._calculate_mask(pos, ignore_regions=ignore_regions)
-        pos = self._apply_faults(pos)
+        # Precompute each fault's scalar value (gx = fault.__getitem__(0).evaluate_value)
+        # once and reuse for both the region mask check and fault application.
+        # FaultSegment.evaluate_value(pos) == self.__getitem__(0).evaluate_value(pos) == gx.
+        # apply_to_points also needs gx to determine the displacement mask — passing it
+        # avoids the duplicate evaluation on the np.copy of pos.
+        precomputed_gx = {id(f): f.evaluate_value(pos) for f in self.faults}
+
+        # Region mask: pass precomputed gx so PositiveRegion/NegativeRegion skip re-evaluation
+        mask = np.ones(pos.shape[0], dtype=bool)
+        if not ignore_regions:
+            for r in self.regions:
+                pre = precomputed_gx.get(id(getattr(r, 'feature', None)))
+                mask = np.logical_and(mask, r(pos, precomputed_val=pre))
+
+        # Apply faults: pass precomputed gx so apply_to_points skips one evaluate_value call
+        if self.faults_enabled:
+            for f in self.faults:
+                pos = f.apply_to_points(pos, precomputed_gx=precomputed_gx.get(id(f)))
+
         if self.function is not None:
             v[mask] = self.function(pos[mask,:])
         return v
diff --git a/LoopStructural/modelling/features/fault/_fault_segment.py b/LoopStructural/modelling/features/fault/_fault_segment.py
@@ -311,13 +311,14 @@ def evaluate_displacement(self, points):
             d[mask] = self.faultfunction(gx[mask] + self.fault_offset, gy[mask], gz[mask])
         return d * self.displacement
 
-    def apply_to_points(self, points, reverse=False):
+    def apply_to_points(self, points, reverse=False, precomputed_gx=None):
         """
         Unfault the array of points
 
         Parameters
         ----------
         points - numpy array Nx3
+        precomputed_gx - optional pre-evaluated gx values (same points, avoids duplicate evaluation)
 
         Returns
         -------
@@ -328,10 +329,12 @@ def apply_to_points(self, points, reverse=False):
         newp = np.copy(points).astype(float)
         # evaluate fault function for all points
         # then define mask for only points affected by fault
-        gx = None
-        gy = None
-        gz = None
-        if use_threads:
+        # gx may be supplied by caller to avoid re-evaluation (precomputed from region check)
+        if precomputed_gx is not None:
+            gx = precomputed_gx
+            gy = self.__getitem__(1).evaluate_value(newp)
+            gz = self.__getitem__(2).evaluate_value(newp)
+        elif use_threads:
             with ThreadPoolExecutor(max_workers=8) as executor:
                 # all of these operations should be
                 # independent so just run as different threads
@@ -361,27 +364,32 @@ def apply_to_points(self, points, reverse=False):
             d *= -1.0
         # calculate the fault frame for the evaluation points
         for _i in range(steps):
-            gx = None
-            gy = None
-            gz = None
             g = None
-            if use_threads:
+            if _i == 0:
+                # Reuse gx/gy/gz from the initial full-array evaluation above — newp[mask] hasn't
+                # moved yet on the first iteration, so values are identical.
+                gx_m = gx[mask]
+                gy_m = gy[mask]
+                gz_m = gz[mask]
+                g = self.__getitem__(1).evaluate_gradient(newp[mask, :], ignore_regions=True)
+            elif use_threads:
                 with ThreadPoolExecutor(max_workers=8) as executor:
                     # all of these operations should be
                     # independent so just run as different threads
                     gx_future = executor.submit(self.__getitem__(0).evaluate_value, newp[mask, :])
                     g_future = executor.submit(self.__getitem__(1).evaluate_gradient, newp[mask, :])
                     gy_future = executor.submit(self.__getitem__(1).evaluate_value, newp[mask, :])
                     gz_future = executor.submit(self.__getitem__(2).evaluate_value, newp[mask, :])
-                    gx = gx_future.result()
+                    gx_m = gx_future.result()
                     g = g_future.result()
-                    gy = gy_future.result()
-                    gz = gz_future.result()
+                    gy_m = gy_future.result()
+                    gz_m = gz_future.result()
             else:
-                gx = self.__getitem__(0).evaluate_value(newp[mask, :])
-                gy = self.__getitem__(1).evaluate_value(newp[mask, :])
-                gz = self.__getitem__(2).evaluate_value(newp[mask, :])
+                gx_m = self.__getitem__(0).evaluate_value(newp[mask, :])
+                gy_m = self.__getitem__(1).evaluate_value(newp[mask, :])
+                gz_m = self.__getitem__(2).evaluate_value(newp[mask, :])
                 g = self.__getitem__(1).evaluate_gradient(newp[mask, :], ignore_regions=True)
+            gx, gy, gz = gx_m, gy_m, gz_m  # alias for block below
             # # get the fault frame val/grad for the points
             # determine displacement magnitude, for constant displacement
             # hanging wall should be > 0
diff --git a/LoopStructural/utils/regions.py b/LoopStructural/utils/regions.py