Source code for paref.express.info

import numpy as np
import scipy as sp
from scipy.stats import qmc

from paref.blackbox_functions.design_space.bounds import Bounds
from paref.interfaces.moo_algorithms.blackbox_function import BlackboxFunction
from paref.moo_algorithms.minimizer.differential_evolution_minimizer import DifferentialEvolutionMinimizer
from paref.moo_algorithms.minimizer.surrogates.gpr import GPR
from paref.moo_algorithms.stopping_criteria.max_iterations_reached import MaxIterationsReached
from paref.pareto_reflection_sequences.multi_dimensional.find_1_pareto_points_for_all_components_sequence import \
    Find1ParetoPointsForAllComponentsSequence
from paref.pareto_reflection_sequences.multi_dimensional.find_edge_points_sequence import FindEdgePointsSequence
from paref.pareto_reflections.find_maximal_pareto_point import FindMaximalParetoPoint
from paref.pareto_reflections.minimize_weighted_norm_to_utopia import MinimizeWeightedNormToUtopia

from tabulate import tabulate



[docs]
class GprBbf(BlackboxFunction):
    """Approximate a blackbox function with a Gaussian process regression (GPR) in the BlackboxFunction interface

    """

    def __init__(self, bbf: BlackboxFunction, training_iter=2000, learning_rate=0.05):
        self._bbf = bbf
        self._evaluations = bbf.evaluations.copy()
        self._gpr = GPR(training_iter=training_iter, learning_rate=learning_rate)
        self._gpr.train(train_x=bbf.x, train_y=bbf.y)

    def __call__(self, x: np.ndarray) -> np.ndarray:
        return self._gpr(x)

    @property
    def dimension_design_space(self) -> int:
        return self._bbf.dimension_design_space

    @property
    def dimension_target_space(self) -> int:
        return self._bbf.dimension_target_space

    @property
    def design_space(self):
        return self._bbf.design_space




[docs]
class Info:
    """Obtain relevant information about the Pareto front of a blackbox function and model fitness

        * topology:
            the shape of your Pareto front (Info.topology)
        * suggestion_pareto_points:
            suggestions for Pareto points to evaluate, how and why (Info.suggestion_pareto_points)
        * minima:
            the estimated minima of each component (Info.minima)
        * model fitness:
            how well the model approximates the bbf, how to improve it and
            how certain its estimation is (Info.model_fitness)


    .. warning::

        Paref's Info class is still under development.
        If you run into any problems, errors or have suggestions how to make it *user-friendlier*, please contact me
        or open an issue on GitHub. Many thanks!

    """

    def __init__(self, blackbox_function: BlackboxFunction, training_iter=2000, learning_rate=0.05):
        """

        Parameters
        ----------
        blackbox_function : BlackboxFunction
            underlying blackbox function
        training_iter : int default 2000
            number of training iterations for the underlying approximate of the bbf (GPR)
        learning_rate : float default 0.01
            learning rate for the underlying approximate of the bbf (GPR)
        """
        if not isinstance(blackbox_function.design_space, Bounds):
            raise ValueError('Design space property of blackbox function must be an instance of Bounds!')
        self._blackbox_function = blackbox_function
        self._minimizer = DifferentialEvolutionMinimizer()
        self._training_iter = training_iter
        self._learning_rate = learning_rate
        self.update()


[docs]
    def update(self):
        """Update the information about the Pareto front

        Call this method whenever you have evaluated the blackbox function at some design.
        """
        if len(self._blackbox_function.y) == 0:
            raise ValueError('You must evaluate the blackbox function at least once before obtaining information!')

        print('Obtaining information about the approximate Pareto front...')
        self._surrogate = GprBbf(self._blackbox_function, self._training_iter, self._learning_rate)

        # mean of std
        self.percent_mean_std = np.mean([self._surrogate._gpr.std(x) / self._surrogate._gpr(x) * 100 for x in qmc.scale(
            qmc.LatinHypercube(d=self._blackbox_function.dimension_design_space).random(
                n=1000),
            self._blackbox_function.design_space.lower_bounds,
            self._blackbox_function.design_space.upper_bounds,
        )], axis=0)

        # Search for minima in components
        minima_sequence = Find1ParetoPointsForAllComponentsSequence()

        self._minimizer.apply_to_sequence(blackbox_function=self._surrogate,
                                          sequence_pareto_reflections=minima_sequence,
                                          stopping_criteria=MaxIterationsReached(
                                              max_iterations=self._blackbox_function.dimension_target_space))
        self._minima_pareto_points = minima_sequence.best_fits(self._surrogate.y)
        self._minima_pareto_points_std = [self._surrogate._gpr.std(x) for i, x in
                                          enumerate(
                                              self._surrogate.x[-self._blackbox_function.dimension_target_space:])]
        self._minima = np.min(minima_sequence.best_fits(self._surrogate.y), axis=0)
        self._minima_std = [self._surrogate._gpr.std(x)[i] for i, x in
                            enumerate(self._surrogate.x[-self._blackbox_function.dimension_target_space:])]

        # Search for maximal Pareto point
        maximal_pareto_point_reflection = FindMaximalParetoPoint(blackbox_function=self._surrogate)
        self._minimizer.apply_to_sequence(blackbox_function=self._surrogate,
                                          sequence_pareto_reflections=maximal_pareto_point_reflection,
                                          stopping_criteria=MaxIterationsReached(max_iterations=1))
        self.maximal_pareto_point = maximal_pareto_point_reflection.best_fits(self._surrogate.y)[-1]
        self.maximal_pareto_point_std = self._surrogate._gpr.std(self._surrogate.x[-1])

        # Search for closest to theoretical global optimum
        global_optimum_pareto_point_reflection = (
            MinimizeWeightedNormToUtopia(utopia_point=self._minima,
                                         potency=2,
                                         scalar=np.ones(
                                             self._surrogate.dimension_target_space)))
        self._minimizer.apply_to_sequence(blackbox_function=self._surrogate,
                                          sequence_pareto_reflections=global_optimum_pareto_point_reflection,
                                          stopping_criteria=MaxIterationsReached(max_iterations=1))
        self.global_optimum_pareto_point = maximal_pareto_point_reflection.best_fits(self._surrogate.y)[-1]
        self.global_optimum_pareto_point_std = self._surrogate._gpr.std(self._surrogate.x[-1])

        # Search for edge points
        if self._blackbox_function.dimension_target_space > 2:
            edge_points_sequence = FindEdgePointsSequence()
            self._minimizer.apply_to_sequence(blackbox_function=self._surrogate,
                                              sequence_pareto_reflections=edge_points_sequence,
                                              stopping_criteria=MaxIterationsReached(
                                                  max_iterations=self._blackbox_function.dimension_target_space))
            self.edge_points = minima_sequence.best_fits(self._surrogate.y)

        else:
            self.edge_points = self._minima_pareto_points

        # Concave/convex
        basis = sp.linalg.orth((self.edge_points - self.edge_points[0]).T).T

        self.dimension_pf = len(basis)  # approximation of dimension of Pareto front
        projected_point = np.sum(
            np.array([np.dot(self.maximal_pareto_point - self.edge_points[0], basis_vector) * basis_vector
                      for basis_vector in basis]), axis=0) + self.edge_points[0]
        min_edges = np.min(self.edge_points, axis=0)

        # TODO: this is prone to small values!
        self._concave_degree = np.linalg.norm(
            self.maximal_pareto_point - min_edges) / np.linalg.norm(
            projected_point - min_edges)

        if self._concave_degree < 0.5:
            self.concave_degree_description = 'Very convex'

        elif self._concave_degree < 0.97:
            self.concave_degree_description = 'Convex'

        elif self._concave_degree < 1.03:
            self.concave_degree_description = 'Linear'

        elif self._concave_degree < 1.3:
            self.concave_degree_description = 'Concave'

        else:
            self.concave_degree_description = 'Very concave'

        # is there a trade-off
        # TODO: global optimum check might fail: testing on real use-cases
        self.global_optimum = False
        if self.dimension_pf == 0:
            self.global_optimum = True

        print("""Done! You can access the following information about your Pareto front:
        * model fitness: how well the model approximates the bbf,
            how to improve it and how certain its estimation is (Info.model_fitness)
        * topology:
            topological information of your Pareto front (Info.topology)
        * suggestion:
            for Pareto points to evaluate, how and why (Info.suggestion_pareto_points)
        * minima:
            the estimated minima of each component (Info.minima)
        """)


    @property
    def minima(self):
        """Estimated minimum of each component
        """

        print(tabulate({'Component': range(len(self._minima)),
                        'Minimum': self._minima,
                        'Std': self._minima_std}, headers='keys'))

    @property
    def topology(self):
        """Obtain topological information about the Pareto front

        Is there are global optimum?
        Is the Pareto front rather convex or concave or linear?
        Of which dimension is the Pareto front?
        """

        print(
            tabulate({'(Almost) Global optimum': [self.global_optimum],
                      'Shape': [self.concave_degree_description],
                      'Dimension Pareto front': [self.dimension_pf]},
                     headers='keys'))

    @property
    def suggestion_pareto_points(self):
        """Suggestions for Pareto points to evaluate, how and why

        The suggestion mainly focuses on the approximate shape of the Pareto front and its implication
        for the trade-offs between the components.
        """
        if self.global_optimum:
            print("""
            There is probably an (almost) global optimum.
            This means your target objectives are not conflicting
            and any Pareto point will be that global optimum.
            You can use any MOO algorithm provided in Paref to find that point.
            """)

        elif self._concave_degree < 0.7:
            print("""Your objectives appear to be conflicting, so there are real trade-offs.
            Since your Pareto front appears to be highly convex, you may be able to achieve large improvements
            in one component while suffering relatively small losses in others.
            The best trade-off I've found that minimises all components simultaneously is
            """)
            print(tabulate({'Target values': [self.global_optimum_pareto_point.tolist()],
                            'Std': [self.global_optimum_pareto_point_std.tolist()],
                            'Dominates x% of evaluations': [
                                self._better_than_evaluations(self.global_optimum_pareto_point)]},
                           headers='keys'))
            print(f"""This Pareto optimum is closest to the theoretical global optimum:
            {self._minima}
            """)
            print("""
            You can use the search_for_best_real_trade_off algorithm provided by the paref.express.express_search
            module to find that Pareto point.
            """)

        elif self._concave_degree < 0.97:
            print("""
            Your objectives appear to be conflicting, so there are real trade-offs.
            As your Pareto front appears to be convex, you may be able to achieve improvements in one
            component while suffering a smaller loss in others.
            A Pareto point that represents a real trade-off in all components I've found is
            """)
            print(tabulate({'Target values': [self.maximal_pareto_point.tolist()],
                            'Std': [self.maximal_pareto_point_std.tolist()],
                            'Dominates x% of evaluations': [self._better_than_evaluations(self.maximal_pareto_point)]},
                           headers='keys'))
            print("""
            You can use the search_for_best_real_trade_off algorithm provided by the paref.express.express_search
            module to find that Pareto point.
            """)

        elif self._concave_degree < 1.03:
            print("""
            Your objectives appear to be conflicting, so there are real trade-offs.
            Your Pareto front appears to be (almost) a plane.
            This means that improvements in one component will result in
            almost equal losses in other components.
            Accordingly, all Pareto points will be (almost) equally good and it is up to you to
            decide which components are more relevant.
            I suggest you use the priority_search algorithm provided by the paref.express.express_search module
            to take into account your preference for certain components.
            """)

        elif self._concave_degree < 1.3:
            print("""
            Your objectives appear to be conflicting, so there are real trade-offs.
            Your Pareto front appears to be concave.
            This means that any improvement in one component will result in a greater loss in the other components.
            Accordingly, I suggest that you focus on a single component by using
            the search_for_minima or priority_search algorithm provided by the paref.express.express_search module
            to take into account your preference for certain components.
            Here are the estimated Pareto points that minimise a component
            """)

            print(tabulate(
                [[i, minima.tolist(), self._minima_pareto_points_std[i].tolist(), self._better_than_evaluations(minima)]
                 for i, minima in enumerate(self._minima_pareto_points)],
                headers=['Component', 'Target values', 'Std', 'Dominates x% of evaluations']))

            suggested_point = self._minima_pareto_points[
                np.argmax([self._better_than_evaluations(minima) for minima in self._minima_pareto_points])]
            print(f"""
            I propose
            {suggested_point}
            because it dominates the most evaluations ({self._better_than_evaluations(suggested_point)}%).

            Nevertheless, here is a Pareto point representing a real trade-off in all components
            """)
            print(tabulate({'Target values': [self.maximal_pareto_point.tolist()],
                            'Std': [self.maximal_pareto_point_std.tolist()],
                            'Dominates x% of evaluations': [self._better_than_evaluations(self.maximal_pareto_point)]},
                           headers='keys'))

        else:
            print("""
            Your Pareto front appears to be very concave.
            This means that any little improvement in one component will result in a significant
            loss in the other components.
            Accordingly, I suggest that you focus on a single component and
            choose the Pareto point that minimises that component.
            You can use the search_for_minima algorithm provided by the paref.express.express_search module
            to find those Pareto points.

            Here are the estimated Pareto points that minimise a component
            """)
            print(tabulate([[i,
                             minima.tolist(),
                             self._minima_pareto_points_std[i].tolist(),
                             self._better_than_evaluations(minima)] for i, minima in
                            enumerate(self._minima_pareto_points)
                            ], headers=['Component', 'Target values', 'Std', 'Dominates x% of evaluations']))
            print(f"""
            I propose
            {self._minima_pareto_points[
                np.argmax([self._better_than_evaluations(minima) for minima in self._minima_pareto_points])]}
            because it dominates the most evaluations.
            """)

    @property
    def model_fitness(self):
        """Information about the model fitness

        How well does the model approximate the blackbox function?
        Are the training iterations sufficient, i.e. did the training convergence?
        How uncertain is the model?
        How many evaluations are needed to obtain good results?
        """
        # TODO: include cross validation
        losses = np.array([hp['loss'] for hp in self._surrogate._gpr.info])
        losses_last = np.array([hp['loss'][int(self._training_iter * 0.9):] for hp in self._surrogate._gpr.info])

        model_convergence = (np.max(losses_last, axis=1) - np.min(losses_last, axis=1)) / (
                np.max(losses, axis=1) - np.min(losses, axis=1)) * 9

        # TODO: this might be prone to errors. Try a relative convergence criterion instead.
        if np.all(model_convergence < 0.05):
            print(
                """
                The model has converged. The training iterations seem to be sufficient.
                """
            )
        else:
            print("""
                The model has not converged. I suggest you increase the number of training iterations.
                """)

        if np.any(self.percent_mean_std > 10):
            print("""
                The uncertainty of your model seems to be high.
                Accordingly, you might need more evaluations to obtain good results.
                I suggest you allow each algorithm the maximal number of evaluations
                by using the max_iterations stopping criterion.
                """)
        else:
            print("""
                The uncertainty of your model seems to be low.
                Accordingly, you might only need few evaluations to obtain good results.
                I suggest you allow each algorithm the minimal number of evaluations
                by using the convergence_reached stopping criterion.
                """)

        print(tabulate({'Component': range(self._blackbox_function.dimension_target_space),
                        f'Average uncertainty (%) at {self._training_iter} training iterations':
                            self.percent_mean_std.tolist()},
                       headers='keys'))
        self._surrogate._gpr.plot_loss()

    def _better_than_evaluations(self, y):
        i = 0
        for eval in self._blackbox_function.y:
            if np.all(eval >= y):
                i += 1

        return i / len(self._blackbox_function.y) * 100  # in percent

    @property
    def model(self) -> GPR:
        """The underlying model of the blackbox function

        Returns
        -------
        GPR
            underlying surrogate (GPR) of the blackbox function
        """
        return self._surrogate._gpr