diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 9b7c3d7b..5d590bc0 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -6,7 +6,7 @@ name: test on: push: - branches: [ "main", "v2.0.0"] + branches: [ "main", "2.1.0", "optimization"] pull_request: branches: [ "main"] @@ -21,7 +21,7 @@ jobs: steps: - uses: actions/checkout@v4 - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v4 + uses: actions/setup-python@v5 with: python-version: ${{ matrix.python-version }} - name: Install dependencies diff --git a/.gitignore b/.gitignore index 450f9041..d706da28 100644 --- a/.gitignore +++ b/.gitignore @@ -191,4 +191,11 @@ cython_debug/ #.idea/ # VSCode -.vscode/* \ No newline at end of file +.vscode/* + +# Coverage report +coverage +tests/.coverage + +# performance results +tests/performance/results diff --git a/matid/classification/classifications.py b/matid/classification/classifications.py index ea97c585..de50315b 100644 --- a/matid/classification/classifications.py +++ b/matid/classification/classifications.py @@ -50,27 +50,9 @@ def basis_indices(self): @property def outliers(self): - return self.region.get_outliers() - - @property - def interstitials(self): - return self.region.get_interstitials() - - @property - def adsorbates(self): - return self.region.get_adsorbates() - - @property - def substitutions(self): - return self.region.get_substitutions() - - @property - def vacancies(self): - return self.region.get_vacancies() - - @property - def unknowns(self): - return self.region.get_unknowns() + all = set(list(range(len(self.atoms)))) + region = self.region.get_basis_indices() + return list(all - region) @property def prototype_cell(self): diff --git a/matid/classification/classifier.py b/matid/classification/classifier.py index 0518a1d2..33c19d03 100644 --- a/matid/classification/classifier.py +++ b/matid/classification/classifier.py @@ -267,26 +267,18 @@ class that represents a classification. n_region_conn = np.sum(region_conn) region_is_periodic = n_region_conn == 2 - # This might be unnecessary because the connectivity of the - # unit cell is already checked. - clusters = best_region.get_clusters() - basis_indices = set(list(best_region.get_basis_indices())) - split = True - for cluster in clusters: - if basis_indices.issubset(cluster): - split = False - # Check that the found region covers enough of the entire # system. If not, then the region alone cannot be used to # classify the entire structure. This happens e.g. when one # 2D sheet is found from a 2D heterostructure, or a local # pattern is found inside a structure. + basis_indices = set(list(best_region.get_basis_indices())) n_atoms = len(system) n_basis_atoms = len(basis_indices) coverage = n_basis_atoms / n_atoms covered = coverage >= self.min_coverage - if not split and covered and region_is_periodic: + if covered and region_is_periodic: if best_region.is_2d: classification = Material2D(input_system, best_region) else: @@ -329,14 +321,12 @@ def cross_validate_region( cell_size_tol=self.cell_size_tol, max_2d_cell_height=self.max_2d_cell_height, max_2d_single_cell_size=self.max_2d_single_cell_size, - chem_similarity_threshold=self.chem_similarity_threshold, ) region = periodicfinder.get_region( system, index, size, tol, - self.abs_delaunay_threshold, self.bond_threshold, distances=distances, ) diff --git a/matid/clustering/sbc.py b/matid/clustering/sbc.py index f9bbbf13..26a62c1b 100644 --- a/matid/clustering/sbc.py +++ b/matid/clustering/sbc.py @@ -117,9 +117,7 @@ def get_clusters( distances = matid.geometry.get_distances(system_copy, radii) # Iteratively search for new clusters until whole system is covered - periodic_finder = PeriodicFinder( - angle_tol=angle_tol, chem_similarity_threshold=0 - ) + periodic_finder = PeriodicFinder(angle_tol=angle_tol) indices = set(list(range(len(system_copy)))) clusters = [] while len(indices) != 0: @@ -325,12 +323,18 @@ def _clean_clusters(self, clusters, bond_threshold): Returns: List of Clusters where any outlier atoms have been removed. """ + clusters_cleaned = [] for cluster in clusters: - dbscan_clusters = matid.geometry.get_clusters( - cluster._get_distance_matrix_radii_mic(), - bond_threshold, - min_samples=1, - ) + # If the cluster cleaning fails, the cluster is not reported + try: + dbscan_clusters = matid.geometry.get_clusters( + cluster._get_distance_matrix_radii_mic(), + bond_threshold, + min_samples=1, + ) + except Exception: + continue largest_indices = max(dbscan_clusters, key=lambda x: len(x)) cluster.indices = np.array(cluster.indices)[largest_indices].tolist() - return clusters + clusters_cleaned.append(cluster) + return clusters_cleaned diff --git a/matid/core/distances.py b/matid/core/distances.py index 3447a337..808e9465 100644 --- a/matid/core/distances.py +++ b/matid/core/distances.py @@ -7,14 +7,12 @@ def __init__( self, disp_tensor_mic, disp_factors, - disp_tensor_finite, dist_matrix_mic, dist_matrix_radii_mic, cell_list=None, ): self.disp_tensor_mic = disp_tensor_mic self.disp_factors = disp_factors - self.disp_tensor_finite = disp_tensor_finite self.dist_matrix_mic = dist_matrix_mic self.dist_matrix_radii_mic = dist_matrix_radii_mic self.cell_list = cell_list diff --git a/matid/core/linkedunits.py b/matid/core/linkedunits.py index 313dc89c..cd1dfd60 100644 --- a/matid/core/linkedunits.py +++ b/matid/core/linkedunits.py @@ -1,11 +1,7 @@ -from collections import defaultdict, OrderedDict import numpy as np from ase import Atoms -from ase.data import covalent_radii -from ase.io import write -import matid.geometry from matid.data import constants import networkx as nx @@ -24,11 +20,6 @@ def __init__( system, cell, is_2d, - dist_matrix_radii_pbc, - disp_tensor_finite, - delaunay_threshold=constants.DELAUNAY_THRESHOLD, - chem_similarity_threshold=constants.CHEM_SIMILARITY_THRESHOLD, - bond_threshold=constants.BOND_THRESHOLD, ): """ Args: @@ -37,31 +28,14 @@ def __init__( cell(ase.Atoms): The prototype cell that is used in finding this region. is_2d(boolean): Whether this system represents a 2D-material or not. - delaunay_threshold(float): The maximum allowed size of a tetrahedra - edge in the Delaunay triangulation of the region.. """ self.system = system self.cell = cell self.is_2d = is_2d - self.delaunay_threshold = delaunay_threshold - self.chem_similarity_threshold = chem_similarity_threshold - self.bond_threshold = bond_threshold - self.dist_matrix_radii_pbc = dist_matrix_radii_pbc - self.disp_tensor_finite = disp_tensor_finite self._search_graph = nx.MultiDiGraph() - self._wrapped_moves = [] self._index_cell_map = {} self._used_points = set() - self._decomposition = None - self._inside_indices = None - self._outside_indices = None - self._adsorbates = None - self._substitutions = None - self._vacancies = None - self._clusters = None self._basis_indices = None - self._basis_environments = None - self._translations = None self._pos_tol = None dict.__init__(self) @@ -101,67 +75,6 @@ def recreate_valid(self): return recreated_system - def get_basis_atom_neighbourhood(self): - """For each atom in the basis calculates the chemical neighbourhood. - The chemical neighbourhood consists of a list of atomic numbers that - are closer than a certain threshold value when the covalent radii is - taken into account. - - Args: - - Returns: - """ - if self._basis_environments is None: - # Multiply the system to get the entire neighbourhood. - cell = self.cell - max_radii = covalent_radii[cell.get_atomic_numbers()].max() - cutoff = max_radii + self.bond_threshold - if self.is_2d: - pbc = [True, True, False] - else: - pbc = [True, True, True] - factors = matid.geometry.get_neighbour_cells(cell.get_cell(), cutoff, pbc) - tvecs = np.dot(factors, cell.get_cell()) - - # Find the factor corresponding to the original cell - for i_factor, factor in enumerate(factors): - if tuple(factor) == (0, 0, 0): - tvecs_reduced = np.delete(tvecs, i_factor, axis=0) - break - - pos = cell.get_positions() - disp = matid.geometry.get_displacement_tensor(pos, cell.get_cell()) - - env_list = [] - for i in range(len(cell)): - i_env = self.get_chemical_environment( - cell, i, disp, tvecs, tvecs_reduced - ) - env_list.append(i_env) - self._basis_environments = env_list - - return self._basis_environments - - def get_chem_env_translations(self): - """Used to calculate the translations that are used in calculating the - chemical enviroments. - """ - if self._translations is None: - cell = self.system.get_cell() - num = self.system.get_atomic_numbers() - max_radii = covalent_radii[num].max() - cutoff = max_radii + self.bond_threshold - factors = matid.geometry.get_neighbour_cells(cell, cutoff, True) - translations = np.dot(factors, cell) - - # Find and remove the factor corresponding to the original cell - for i_factor, factor in enumerate(factors): - if tuple(factor) == (0, 0, 0): - translations_reduced = np.delete(translations, i_factor, axis=0) - break - self._translations = (translations, translations_reduced) - return self._translations - def get_basis_indices(self): """Returns the indices of the atoms that were found to belong to a unit cell basis in the LinkedUnits in this collection as a single list. @@ -171,322 +84,14 @@ def get_basis_indices(self): to this collection of LinkedUnits. """ if self._basis_indices is None: - # The chemical similarity check is completely skipped if threshold is zero - if self.chem_similarity_threshold == 0: - indices = set() - for unit in self.values(): - for index in unit.basis_indices: - if index is not None: - indices.add(index) - self._basis_indices = indices - else: - translations, translations_reduced = self.get_chem_env_translations() - - # For each atom in the basis check the chemical environment - neighbour_map = self.get_basis_atom_neighbourhood() - - indices = set() - for unit in self.values(): - # Compare the chemical environment near this atom to the one - # that is present in the prototype cell. If these - # neighbourhoods are too different, then the atom is not - # counted as being a part of the region. - for i_index, index in enumerate(unit.basis_indices): - if index is not None: - real_environment = self.get_chemical_environment( - self.system, - index, - self.disp_tensor_finite, - translations, - translations_reduced, - ) - ideal_environment = neighbour_map[i_index] - chem_similarity = self.get_chemical_similarity( - ideal_environment, real_environment - ) - if chem_similarity >= self.chem_similarity_threshold: - indices.add(index) - - # Ensure that all the basis atoms belong to the same cluster. - # clusters = self.get_clusters() - self._basis_indices = np.array(list(indices)) - - return self._basis_indices - - def get_outliers(self): - """Returns the indices of atoms that were not found to belong to the - basis. - """ - basis_indices = set(self.get_basis_indices()) - all_indices = set(self.get_all_indices()) - additional_indices = np.array(list(all_indices - basis_indices)) - - return additional_indices - - def get_chemical_environment( - self, system, index, disp_tensor_finite, translations, translations_reduced - ): - """Get the chemical environment around an atom. The chemical - environment is quantified simply by the number of different species - around a certain distance when the covalent radii have been considered. - """ - # Multiply the system to get the entire neighbourhood. - num = system.get_atomic_numbers() - n_atoms = len(system) - seed_num = num[index] - - neighbours = defaultdict(lambda: 0) - for j in range(n_atoms): - j_num = num[j] - ij_disp = disp_tensor_finite[index, j, :] - - if index == j: - trans = translations_reduced - else: - trans = translations - - D_trans = trans + ij_disp - D_trans_len = np.linalg.norm(D_trans, axis=1) - ij_radii = covalent_radii[seed_num] + covalent_radii[j_num] - ij_n_neigh = np.sum(D_trans_len - ij_radii <= self.bond_threshold) - - neighbours[j_num] += ij_n_neigh - - return neighbours - - def get_chemical_similarity(self, ideal_env, real_env): - """Returns a metric that quantifies the similarity between two chemical - environments. Here the metric is defined simply by the number - neighbours that are found to be same as in the ideal environmen within - a certain radius. - """ - max_score = sum(ideal_env.values()) - - score = 0 - for ideal_key, ideal_value in ideal_env.items(): - real_value = real_env.get(ideal_key) - if real_value is not None: - score += min(real_value, ideal_value) - - return score / max_score - - def get_interstitials(self): - """Get the indices of interstitial atoms in the original system.""" - inside_indices, _ = self.get_inside_and_outside_indices() - inside_indices = set(inside_indices) - substitutions = self.get_substitutions() - subst_indices = set() - for subst in substitutions: - subst_indices.add(subst.index) - interstitials = inside_indices - subst_indices - - return np.array(list(interstitials)) - - def get_clusters(self): - """Used to cluster the system in order to distinguish chemisorbed - adsorbates from the surface. - """ - if self._clusters is None: - clusters = matid.geometry.get_clusters( - self.dist_matrix_radii_pbc, self.bond_threshold - ) - clusters = [set(list(x)) for x in clusters] - self._clusters = clusters - - return self._clusters - - def get_adsorbates(self): - """Get the indices of the adsorbate atoms in the region. - - All atoms that are outside the tesselation, and are either not part of - the elements present in the surface or further away than a certain - threshold, are labeled as adsorbate atoms. - - This function does not differentiate between different adsorbate - molecules. - - Returns: - np.ndarray: Indices of the adsorbates in the original system. - """ - if self._adsorbates is None: - _, outside_indices = self.get_inside_and_outside_indices() - basis_elements = set(self.cell.get_atomic_numbers()) - - # The substitutions have to be removed explicitly from the ouside - # atoms because sometimes they are outside the tesselation. - substitutions = self.get_substitutions() - outside_indices = set(outside_indices) - substitutions = set([x.index for x in substitutions]) - outside_indices -= substitutions - outside_indices = np.array(list(outside_indices)) - - if len(outside_indices) != 0: - basis_elements = set(basis_elements) - adsorbates = [] - for index in outside_indices: - adsorbates.append(index) - adsorbates = np.array(adsorbates) - else: - adsorbates = np.array([]) - - self._adsorbates = adsorbates - - return self._adsorbates - - def get_substitutions(self): - """Get the substitutions in the region.""" - if self._substitutions is None: - # Gather all substitutions - # all_substitutions = [] - # for cell in self.values(): - # subst = cell.substitutions - # if len(subst) != 0: - # all_substitutions.extend(subst) - - # The substitutions are validate based on their chemical - # environment and position in the triangulation. - neighbour_map = self.get_basis_atom_neighbourhood() - valid_subst = [] - # _, outside_indices = self.get_inside_and_outside_indices() - # outside_set = set(outside_indices) - translations, translations_reduced = self.get_chem_env_translations() - + indices = set() for unit in self.values(): - # Compare the chemical environment near this atom to the one - # that is present in the prototype cell. If these - # neighbourhoods are too different, then the atom is not - # counted as being a part of the region. - if unit.substitutions is not None: - for i_index, subst in enumerate(unit.substitutions): - if subst is not None: - subst_index = subst.index - - # Otherwise check the chemical similarity - real_environment = self.get_chemical_environment( - self.system, - subst_index, - self.disp_tensor_finite, - translations, - translations_reduced, - ) - ideal_environment = neighbour_map[i_index] - chem_similarity = self.get_chemical_similarity( - ideal_environment, real_environment - ) - if chem_similarity >= self.chem_similarity_threshold: - valid_subst.append(subst) - self._substitutions = valid_subst - - # In 2D materials all substitutions in the cell are valid - # substitutions - # if self.is_2d: - # self._substitutions = all_substitutions - # else: - # # In surfaces the substitutions have to be validate by whether they - # # are inside the tesselation or not - # inside_indices, _ = self.get_inside_and_outside_indices() - # inside_set = set(inside_indices) + for index in unit.basis_indices: + if index is not None: + indices.add(index) + self._basis_indices = indices - # # Find substitutions that are inside the tesselation - # valid_subst = [] - # for subst in all_substitutions: - # subst_index = subst.index - # if subst_index in inside_set: - # valid_subst.append(subst) - # self._substitutions = valid_subst - - return self._substitutions - - def get_vacancies(self): - """Get the vacancies in the region. - - Returns: - ASE.Atoms: An atoms object representing the atoms that are missing. - The Atoms object has the same properties as the original system. - """ - if self._vacancies is None: - # Gather all vacancies - all_vacancies = [] - for cell in self.values(): - vacancies = cell.vacancies - if len(vacancies) != 0: - all_vacancies.extend(vacancies) - - # For purely 2D systems all missing atoms in the basis are vacancies - if self.is_2d: - self._vacancies = all_vacancies - else: - # Get the tesselation - tesselation = self.get_tetrahedra_decomposition() - - # Find substitutions that are inside the tesselation - valid_vacancies = [] - for vacancy in all_vacancies: - vac_pos = vacancy.position - simplex = tesselation.find_simplex(vac_pos) - if simplex is not None: - valid_vacancies.append(vacancy) - self._vacancies = valid_vacancies - - return self._vacancies - - def get_tetrahedra_decomposition(self): - """Get the tetrahedra decomposition for this region.""" - if self._decomposition is None: - # Get the positions of basis atoms - basis_indices = self.get_basis_indices() - valid_sys = self.system[basis_indices] - - # Perform tetrahedra decomposition - self._decomposition = matid.geometry.get_tetrahedra_decomposition( - valid_sys, self.delaunay_threshold - ) - - return self._decomposition - - def get_all_indices(self): - """Get all the indices that are present in the full system.""" - return set(range(len(self.system))) - - def get_unknowns(self): - """Returns indices of the atoms that are in the outliers but are not - recognized as any specialized group. - """ - outliers = set(self.get_outliers()) - adsorbates = set(self.get_adsorbates()) - interstitials = set(self.get_interstitials()) - substitutions = set([x.index for x in self.get_substitutions()]) - - return outliers - adsorbates - interstitials - substitutions - - def get_inside_and_outside_indices(self): - """Get the indices of atoms that are inside and outside the tetrahedra - tesselation. - - Returns: - (np.ndarray, np.ndarray): Indices of atoms that are inside and - outside the tesselation. The inside indices are in the first array. - """ - if self._inside_indices is None and self._outside_indices is None: - invalid_indices = self.get_outliers() - invalid_pos = self.system - inside_indices = [] - outside_indices = [] - - if len(invalid_indices) != 0: - invalid_pos = self.system.get_positions()[invalid_indices] - tesselation = self.get_tetrahedra_decomposition() - for i, pos in zip(invalid_indices, invalid_pos): - simplex_index = tesselation.find_simplex(pos) - if simplex_index is None: - outside_indices.append(i) - else: - inside_indices.append(i) - - self._inside_indices = np.array(inside_indices) - self._outside_indices = np.array(outside_indices) - - return self._inside_indices, self._outside_indices + return self._basis_indices def get_connected_directions(self): """During the tracking of the region the information about searches @@ -499,13 +104,14 @@ def get_connected_directions(self): node_edges = G.in_edges(node, data=True) dir_to_remove = set() for direction in directions: + dir_vector = dir_vectors[direction] positive = False negative = False for edge in node_edges: multiplier = edge[2]["multiplier"] - if np.array_equal(multiplier, dir_vectors[direction]): + if np.array_equal(multiplier, dir_vector): positive = True - if np.array_equal(multiplier, -dir_vectors[direction]): + if np.array_equal(multiplier, -dir_vector): negative = True if positive and negative: break diff --git a/matid/core/periodicfinder.py b/matid/core/periodicfinder.py index 19f01af3..d6e68125 100644 --- a/matid/core/periodicfinder.py +++ b/matid/core/periodicfinder.py @@ -39,7 +39,6 @@ def __init__( cell_size_tol=constants.CELL_SIZE_TOL, max_2d_cell_height=constants.MAX_2D_CELL_HEIGHT, max_2d_single_cell_size=constants.MAX_SINGLE_CELL_SIZE, - chem_similarity_threshold=constants.CHEM_SIMILARITY_THRESHOLD, ): """ Args: @@ -54,7 +53,6 @@ def __init__( self.cell_size_tol = cell_size_tol self.max_2d_cell_height = max_2d_cell_height self.max_2d_single_cell_size = (max_2d_single_cell_size,) - self.chem_similarity_threshold = chem_similarity_threshold def get_region( self, @@ -62,7 +60,6 @@ def get_region( seed_index, max_cell_size, pos_tol, - delaunay_threshold=None, bond_threshold=None, overlap_threshold=-0.1, distances: Distances = None, @@ -91,8 +88,6 @@ def get_region( linkedunitcollection or None: A LinkedUnitCollection object representing the region or None if no region could be identified. """ - if delaunay_threshold is None: - delaunay_threshold = constants.DELAUNAY_THRESHOLD if bond_threshold is None: bond_threshold = constants.BOND_THRESHOLD @@ -101,7 +96,6 @@ def get_region( distances = matid.geometry.get_distances(system) self.disp_tensor_mic = distances.disp_tensor_mic - self.disp_tensor_finite = distances.disp_tensor_finite self.disp_factors = distances.disp_factors self.dist_matrix_radii_mic = distances.dist_matrix_radii_mic @@ -112,13 +106,13 @@ def get_region( # system is extended using the position tolerance and the celllist # cutoff is at most the size of the position tolerance, but not too # small to not take too much time/memory to create. - # self.cell_list = matid.geometry.get_cell_list( - # system.get_positions(), - # system.get_cell(), - # system.get_pbc(), - # max_cell_size, - # max(pos_tol, 1), - # ) + self.cell_list = matid.geometry.get_cell_list( + system.get_positions(), + system.get_cell(), + system.get_pbc(), + pos_tol, + max(pos_tol, 1), + ) self.pos_tol = pos_tol self.max_cell_size = max_cell_size @@ -151,8 +145,6 @@ def get_region( unit_collection = self._find_periodic_region( system, dim == 2, - delaunay_threshold, - bond_threshold, seed_index, proto_cell, offset, @@ -255,11 +247,19 @@ def _find_proto_cell( add_pos = neighbour_pos + span sub_pos = neighbour_pos - span - add_indices, _, _, add_factors = matid.geometry.get_matches_old( - system, add_pos, neighbour_num, self.pos_tol + add_indices, _, _, add_factors = matid.geometry.get_matches( + system, + self.cell_list, + add_pos, + neighbour_num, + self.pos_tol, ) - sub_indices, _, _, sub_factors = matid.geometry.get_matches_old( - system, sub_pos, neighbour_num, self.pos_tol + sub_indices, _, _, sub_factors = matid.geometry.get_matches( + system, + self.cell_list, + sub_pos, + neighbour_num, + self.pos_tol, ) n_metric = 0 @@ -275,21 +275,17 @@ def _find_proto_cell( if i_add is not None: n_metric += 1 dest_factor = origin_factor + i_add_factor - i_adj_list[ - (neighbour_indices[i_neigh], tuple(origin_factor)) - ].append((i_add, tuple(dest_factor))) - i_adj_list_add[ - (neighbour_indices[i_neigh], tuple(origin_factor)) - ].append((i_add, tuple(dest_factor))) + add_tuple = (i_add, tuple(dest_factor)) + add_key = (neighbour_indices[i_neigh], tuple(origin_factor)) + i_adj_list[add_key].append(add_tuple) + i_adj_list_add[add_key].append(add_tuple) if i_sub is not None: n_metric += 1 dest_factor = origin_factor + i_sub_factor - i_adj_list[ - (neighbour_indices[i_neigh], tuple(origin_factor)) - ].append((i_sub, tuple(dest_factor))) - i_adj_list_sub[ - (neighbour_indices[i_neigh], tuple(origin_factor)) - ].append((i_sub, tuple(dest_factor))) + sub_tuple = (i_sub, tuple(dest_factor)) + sub_key = (neighbour_indices[i_neigh], tuple(origin_factor)) + i_adj_list[sub_key].append(sub_tuple) + i_adj_list_sub[sub_key].append(sub_tuple) metric[i_span] = n_metric adjacency_lists.append(i_adj_list) @@ -321,18 +317,13 @@ def _find_proto_cell( i_factor[i_per_span] = 1 for i_neigh, neigh_factor in neighbour_nodes: neigh_tuple = tuple(neigh_factor) - per_adjacency_list[(i_neigh, neigh_tuple)].append( - (i_neigh, tuple(neigh_factor + i_factor)) - ) - per_adjacency_list[(i_neigh, neigh_tuple)].append( - (i_neigh, tuple(neigh_factor - i_factor)) - ) - per_adjacency_list_add[(i_neigh, neigh_tuple)].append( - (i_neigh, tuple(neigh_factor + i_factor)) - ) - per_adjacency_list_sub[(i_neigh, neigh_tuple)].append( - (i_neigh, tuple(neigh_factor - i_factor)) - ) + key = (i_neigh, neigh_tuple) + value_add = (i_neigh, tuple(neigh_factor + i_factor)) + value_sub = (i_neigh, tuple(neigh_factor - i_factor)) + per_adjacency_list[key].append(value_add) + per_adjacency_list[key].append(value_sub) + per_adjacency_list_add[key].append(value_add) + per_adjacency_list_sub[key].append(value_sub) adjacency_lists.append(per_adjacency_list) adjacency_lists_add.append(per_adjacency_list_add) @@ -723,6 +714,7 @@ def _find_proto_cell_3d( # Find the seed positions copies that are within the neighbourhood orig_cell = system.get_cell() + positions = system.get_positions() # Find the cells in which the copies of the seed atom are at the # origin. Here we are reusing information from the displacement tensor @@ -756,12 +748,9 @@ def _find_proto_cell_3d( a_correction = np.dot( (-np.array(node_factor) + np.array(i_factor)), orig_cell ) - a = ( - self.disp_tensor_finite[a_final_neighbour, node_index, :] - + a_correction - ) + displacement = positions[a_final_neighbour] - positions[node_index] + a = displacement + a_correction a *= multiplier - else: a = best_spans[i_basis, :] @@ -912,6 +901,7 @@ def _find_proto_cell_2d( seed_group_index(int): A new index of the seed atom in the cell. """ orig_cell = system.get_cell() + positions = system.get_positions() # We need to make the third basis vector, In 2D systems the maximum # thickness of the system is defined by max_cell_size. @@ -953,11 +943,8 @@ def _find_proto_cell_2d( a_correction = np.dot( (-np.array(node_factor) + np.array(i_factor)), orig_cell ) - a = ( - multiplier - * self.disp_tensor_finite[a_final_neighbour, node_index, :] - + a_correction - ) + displacement = positions[a_final_neighbour] - positions[node_index] + a = multiplier * displacement + a_correction else: a = best_spans[i_basis, :] cells[i_node, i_basis, :] = a @@ -985,9 +972,17 @@ def _find_proto_cell_2d( if i_seed in index_cell_map: i_indices, i_pos, i_factors = index_cell_map[i_seed] else: - i_indices, i_pos, i_factors = matid.geometry.get_positions_within_basis( - system, cell, search_coord, pos_tol, pbc=system.get_pbc() - ) + # If there is a problem in using the given cell to retrieve + # positions (e.g. cell is singular), we don't report a prototype + # cell + try: + i_indices, i_pos, i_factors = ( + matid.geometry.get_positions_within_basis( + system, cell, search_coord, pos_tol, pbc=system.get_pbc() + ) + ) + except Exception: + return None, None, None index_cell_map[i_seed] = (i_indices, i_pos, i_factors) # Add the seed node factor @@ -1248,8 +1243,6 @@ def _find_periodic_region( self, system, is_2d, - tesselation_distance, - bond_threshold, seed_index, unit_cell, seed_position, @@ -1294,11 +1287,6 @@ def _find_periodic_region( system, unit_cell, is_2d, - self.dist_matrix_radii_mic, - self.disp_tensor_finite, - tesselation_distance, - self.chem_similarity_threshold, - bond_threshold, ) multipliers = self._get_multipliers(periodic_indices) @@ -1415,6 +1403,7 @@ def _find_region_rec( cell_pos = unit_cell.get_scaled_positions(wrap=False) except Exception: return + cell_num = unit_cell.get_atomic_numbers() old_basis = unit_cell.get_cell() @@ -1432,8 +1421,8 @@ def _find_region_rec( test_pos = matid.geometry.to_cartesian(orig_cell, test_pos) # Find the atoms that match the positions in the original basis - matches, substitutions, vacancies, _ = matid.geometry.get_matches_old( - system, test_pos, cell_num, self.pos_tol + matches, substitutions, vacancies, _ = matid.geometry.get_matches( + system, self.cell_list, test_pos, cell_num, self.pos_tol ) # Associate the matched atoms to this cell @@ -1512,13 +1501,18 @@ def _find_region_rec( ) collection[cell_index] = new_unit - # Save the updated cell shape for the new cells in the queue - new_sys = Atoms( - cell=new_cell, - scaled_positions=cell_pos, - symbols=cell_num, - pbc=unit_cell.get_pbc(), - ) + # Save the updated cell shape for the new cells in the queue. If the + # found system is invalid, the result is ignored. + try: + new_sys = Atoms( + cell=new_cell, + scaled_positions=cell_pos, + symbols=cell_num, + pbc=unit_cell.get_pbc(), + ) + except Exception: + return + cells = len(new_seed_pos) * [new_sys] # Add the found neighbours to a queue @@ -1582,8 +1576,6 @@ def _find_new_seeds_and_cell( return new_cell, new_seed_indices, new_seed_pos, new_cell_indices else: used_points.add(seed_index) - - orig_cell = system.get_cell() orig_pos = system.get_positions() # Filter out cells that have already been searched @@ -1602,37 +1594,32 @@ def _find_new_seeds_and_cell( # Find out the atoms that match the seed_guesses in the original # system seed_guesses = seed_pos + dislocations - matches, _, _, factors = matid.geometry.get_matches_old( + matches, displacements = matid.geometry.get_matches_simple( system, + self.cell_list, seed_guesses, len(dislocations) * [seed_atomic_number], self.pos_tol, ) - for match, factor, seed_guess, multiplier, disloc, test_cell_index in zip( + for ( + match, + seed_guess, + multiplier, + disloc, + test_cell_index, + displacement, + ) in zip( matches, - factors, seed_guesses, multipliers, dislocations, test_cell_indices, + displacements, ): multiplier_tuple = tuple(multiplier) - # Save the position corresponding to a seed atom or a guess for - # it. If a match was found that is not the original seed, use - # it's position to update the cell. If the matched index is the - # same as the original seed, check the factors array to decide - # whether to use the guess or not. - if match is not None: - if match != seed_index: - i_seed_pos = orig_pos[match] - else: - if (factor == 0).all(): - i_seed_pos = seed_guess - else: - i_seed_pos = orig_pos[match] - else: - i_seed_pos = seed_guess + # Save the position corresponding to a seed atom or a guess for it. + i_seed_pos = seed_guess if match is None else orig_pos[match] # Check if this index has already been used as a seed. The # used_seed_indices is needed so that the same atom cannot @@ -1670,21 +1657,17 @@ def _find_new_seeds_and_cell( if match is not None: used_indices.add(match) - # Store the cell basis vector + # Update the cell basis vector based on the found match. TODO: + # This displacement correction may have unwanted effects in + # noisy systems. for i in range(3): basis_mult = [0, 0, 0] basis_mult[i] = 1 basis_mult = tuple(basis_mult) if multiplier_tuple == basis_mult: - if match is None: - i_basis = disloc - else: - temp = i_seed_pos + np.dot(factor, orig_cell) - i_basis = temp - seed_pos + i_basis = disloc + if match: + i_basis -= np.array(displacement) new_cell[i, :] = i_basis - # TODO: Calculate the average cell for this seed atom. The average cell - # is then used in the next phase of the search for the neighbouring - # cells. - return new_cell, new_seed_indices, new_seed_pos, new_cell_indices diff --git a/matid/geometry/geometry.py b/matid/geometry/geometry.py index 140fd3c4..4f3f64a8 100644 --- a/matid/geometry/geometry.py +++ b/matid/geometry/geometry.py @@ -558,75 +558,6 @@ def get_wrapped_positions(scaled_pos, precision=1e-5): return scaled_pos -def get_displacement_tensor_old( - pos1, - pos2, - cell=None, - pbc=None, - mic=False, - cutoff=None, - return_factors=False, - return_distances=False, -): - """Given an array of positions, calculates the 3D displacement tensor - between the positions. - - The displacement tensor is a matrix where the entry A[i, j, :] is the - vector pos1[i] - pos2[j], i.e. the vector from pos2 to pos1 - - Args: - pos1(np.ndarray): 2D array of positions - pos2(np.ndarray): 2D array of positions - cell(np.ndarray): Cell for taking into account the periodicity - pbc(boolean or a list of booleans): Periodicity of the axes - mic(boolean): Whether to return the displacement to the nearest - periodic copy - - Returns: - np.ndarray: 3D displacement tensor - (optional) np.ndarray: The indices of the periodic copies in which the - minimum image was found - (optional) np.ndarray: The lengths of the displacement vectors - """ - # Make 1D into 2D - shape1 = pos1.shape - shape2 = pos2.shape - if len(shape1) == 1: - n_cols1 = len(pos1) - pos1 = np.reshape(pos1, (-1, n_cols1)) - if len(shape2) == 1: - n_cols2 = len(pos2) - pos2 = np.reshape(pos2, (-1, n_cols2)) - - # Calculate the pairwise distance vectors - disp_tensor = pos1[:, None, :] - pos2[None, :, :] - tensor_shape = np.array(disp_tensor.shape) - - # If minimum image convention is used, get the corrected vectors - if mic: - flattened_disp = np.reshape(disp_tensor, (-1, 3)) - D, D_len, factors = find_mic(flattened_disp, cell, pbc, cutoff) - disp_tensor = np.reshape(D, tensor_shape) - if return_factors: - factors = np.reshape(factors, tensor_shape) - if return_distances: - lengths = np.reshape(D_len, (tensor_shape[0], tensor_shape[1])) - else: - if return_factors: - factors = np.zeros((tensor_shape[0], tensor_shape[1], 3)) - if return_distances: - lengths = np.linalg.norm(disp_tensor, axis=2) - - if return_factors and return_distances: - return disp_tensor, factors, lengths - elif return_factors: - return disp_tensor, factors - elif return_distances: - return disp_tensor, lengths - else: - return disp_tensor - - def get_displacement_tensor( positions, cell=None, @@ -979,112 +910,6 @@ def get_positions_within_basis( return indices, cell_pos, factors -def get_matches_old(system, positions, numbers, tolerance, mic=True): - """Given a system and a list of cartesian positions and atomic numbers, - returns a list of indices for the atoms corresponding to the given - positions with some tolerance. - - Args: - system(ASE.Atoms): System where to search the positions - positions(np.ndarray): Positions to match in the system. - tolerance(float): Maximum allowed distance for each vector that - is allowed for a match in position. - mic(boolean): Whether to find the minimum image copy. - - Returns: - np.ndarray: indices of matched atoms - list: list of substitutions - list: list of vacancies - np.ndarray: for each searched position, an integer array representing - the number of the periodic copy where the match was found. - """ - orig_num = system.get_atomic_numbers() - orig_pos = system.get_positions() - cell = system.get_cell() - pbc = system.get_pbc() - pbc = expand_pbc(pbc) - scaled_pos2 = to_scaled(cell, positions, wrap=False) - - _, factors, dist_matrix = get_displacement_tensor_old( - positions, - orig_pos, - cell, - pbc, - mic=mic, - cutoff=tolerance, - return_factors=True, - return_distances=True, - ) - - # Find the closest atom within the tolerance and with the required atomic - # number, or if not found, get the closest atom that is within the - # tolerance - best_matches = [] - best_substitutions = [] - for i_atom, i in enumerate(dist_matrix): - near_mask = i <= tolerance - element_mask = orig_num == numbers[i_atom] - combined_mask = near_mask & element_mask - possible_indices = np.where(combined_mask)[0] - if len(possible_indices) != 0: - min_dist_index = np.argmin(i[combined_mask]) - best_index = possible_indices[min_dist_index] - best_matches.append(best_index) - best_substitutions.append(None) - elif near_mask.any(): - best_matches.append(None) - near_indices = np.where(near_mask)[0] - nearest_index = np.argmin(i[near_mask]) - best_substitutions.append(near_indices[nearest_index]) - else: - best_matches.append(None) - best_substitutions.append(None) - - matches = [] - substitutions = [] - vacancies = [] - copy_indices = np.zeros((len(positions), 3), dtype=int) - - for i, (i_match, i_subst) in enumerate(zip(best_matches, best_substitutions)): - match = None - copy = None - subst = None - b_num = numbers[i] - - if i_match is not None: - ind = i_match - match = ind - - # If a match was found the factor is reported based on the - # displacement tensor - i_move = factors[i][ind] - copy = i_move - elif i_subst is not None: - ind = i_subst - a_num = orig_num[ind] - - # Wrap the substitute position - subst_pos_cart = orig_pos[ind] - subst = Substitution(ind, subst_pos_cart, b_num, a_num) - - # If a match was found the factor is reported based on the - # displacement tensor - i_move = factors[i][ind] - copy = i_move - else: - vacancies.append(Atom(b_num, position=positions[i])) - - # If no match was found, the factor is reported from the scaled - # positions - copy = np.floor(scaled_pos2[i]) - - substitutions.append(subst) - matches.append(match) - copy_indices[i] = copy - - return matches, substitutions, vacancies, copy_indices - - def get_matches(system, cell_list, positions, numbers, tolerance): """Given a system and a list of cartesian positions and atomic numbers, returns a list of indices for the atoms corresponding to the given @@ -1117,6 +942,7 @@ def get_matches(system, cell_list, positions, numbers, tolerance): match = None substitution = None copy_index = None + displacement = None cell_list_result = cell_list.get_neighbours_for_position( position[0], position[1], position[2] ) @@ -1151,6 +977,49 @@ def get_matches(system, cell_list, positions, numbers, tolerance): return matches, substitutions, vacancies, copy_indices +def get_matches_simple(system, cell_list, positions, numbers, tolerance): + """Given a system and a list of cartesian positions and atomic numbers, + returns a list of indices for the atoms corresponding to the given + positions with some tolerance. + Args: + system(ASE.Atoms): System where to search the positions + cell_list(CellList): The cell list for an appropriately extended version + of the system. + positions(np.ndarray): Positions to match in the system. + tolerance(float): Maximum allowed distance for matching. + Returns: + list: list of matched atoms or None is nothing was matched. + """ + atomic_numbers = system.get_atomic_numbers() + cell = system.get_cell() + pbc = system.get_pbc() + matches = [] + displacements = [] + wrapped_positions = ase.geometry.wrap_positions(positions, cell, pbc) + + for wrapped_position, atomic_number in zip(wrapped_positions, numbers): + match = None + displacement = None + cell_list_result = cell_list.get_neighbours_for_position( + wrapped_position[0], wrapped_position[1], wrapped_position[2] + ) + indices = cell_list_result.indices_original + if len(indices) > 0: + distances = cell_list_result.distances + min_distance_index = np.argmin(distances) + closest_distance = distances[min_distance_index] + closest_index = indices[min_distance_index] + if closest_distance <= tolerance: + closest_atomic_number = atomic_numbers[closest_index] + if closest_atomic_number == atomic_number: + match = closest_index + displacement = cell_list_result.displacements[min_distance_index] + matches.append(match) + displacements.append(displacement) + + return matches, displacements + + def get_cell_list(positions, cell, pbc, extension, cutoff): """Given a system and a cutoff value, returns a cell list object. @@ -1574,15 +1443,15 @@ def get_distances(system: Atoms, radii="covalent") -> Distances: pbc = system.get_pbc() atomic_numbers = system.get_atomic_numbers() radii = get_radii(radii, atomic_numbers) - disp_tensor_finite = get_displacement_tensor(pos) if pbc.any(): - disp_tensor_mic, disp_factors = get_displacement_tensor( - pos, cell, pbc, return_factors=True + disp_tensor_mic, disp_factors, dist_matrix_mic = get_displacement_tensor( + pos, cell, pbc, return_factors=True, return_distances=True ) else: - disp_tensor_mic = disp_tensor_finite - disp_factors = np.zeros(disp_tensor_finite.shape) - dist_matrix_mic = np.linalg.norm(disp_tensor_mic, axis=2) + disp_tensor_mic, dist_matrix_mic = get_displacement_tensor( + pos, return_distances=True + ) + disp_factors = np.zeros(disp_tensor_mic.shape) # Calculate the distance matrix where the periodicity and the covalent # radii have been taken into account @@ -1593,7 +1462,6 @@ def get_distances(system: Atoms, radii="covalent") -> Distances: return Distances( disp_tensor_mic, disp_factors, - disp_tensor_finite, dist_matrix_mic, dist_matrix_radii_mic, ) diff --git a/matid/utils/symmetry_info_generation/query_wyckoff_sets.py b/matid/utils/symmetry_info_generation/query_wyckoff_sets.py index ab6972ab..09bbee05 100644 --- a/matid/utils/symmetry_info_generation/query_wyckoff_sets.py +++ b/matid/utils/symmetry_info_generation/query_wyckoff_sets.py @@ -22,7 +22,6 @@ import numpy as np import pickle from fractions import Fraction -import urllib.request from bs4 import BeautifulSoup import requests @@ -264,7 +263,7 @@ def extract_wyckoff_sets(soup, settings): # Raise exception if match not found for a space group number if space_group > i_spg: - raise + raise ValueError(f"No match for space group") if space_group == i_spg: spglib_defaults[space_group] = [hall_number, sub_hall, hm, settings] diff --git a/pyproject.toml b/pyproject.toml index e35c0136..61396010 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,14 +4,14 @@ build-backend = "setuptools.build_meta" [project] name = 'matid' -version = '2.0.1' +version = '2.1.0' description = 'MatID is a Python package for identifying and analyzing atomistic systems based on their structure.' readme = "README.md" authors = [{ name = "Lauri Himanen" }] license = { file = "LICENSE" } requires-python = ">=3.8" dependencies = [ - "numpy", + "numpy<2.0.0", "ase", "scipy", "spglib>=1.15.0", @@ -90,4 +90,4 @@ indent-style = "space" skip-magic-trailing-comma = false # Like Black, automatically detect the appropriate line ending. -line-ending = "auto" \ No newline at end of file +line-ending = "auto" diff --git a/tests/classificationtests.py b/tests/classificationtests.py index 8db8bb0f..01db6c32 100644 --- a/tests/classificationtests.py +++ b/tests/classificationtests.py @@ -9,16 +9,15 @@ from ase import Atoms from ase.build import bcc100, molecule -from ase.visualize import view import ase.build from ase.build import nanotube import ase.lattice.hexagonal from ase.lattice.compounds import Zincblende -from ase.lattice.cubic import SimpleCubicFactory from ase.data import covalent_radii import ase.io -from networkx import draw_networkx -import matplotlib.pyplot as mpl +# from ase.visualize import view +# from networkx import draw_networkx +# import matplotlib.pyplot as mpl from matid.classification import ( Classifier, @@ -379,7 +378,6 @@ def test_random(self): symbols=n_atoms * ["C"], pbc=(1, 1, 1), ) - # view(system) finder = PeriodicFinder() region = finder.get_region( @@ -407,7 +405,6 @@ def test_nanocluster(self): # Get the index of the atom that is closest to center of mass cm = system.get_center_of_mass() seed_index = np.argmin(np.linalg.norm(pos - cm, axis=1)) - # view(system) # Find the region with periodicity finder = PeriodicFinder() @@ -418,54 +415,8 @@ def test_nanocluster(self): max_cell_size=4, ) - # view(region.cell) - # No defects or unknown atoms - adsorbates = region.get_adsorbates() - interstitials = region.get_interstitials() - substitutions = region.get_substitutions() - vacancies = region.get_vacancies() - self.assertEqual(len(adsorbates), 0) - self.assertEqual(len(interstitials), 0) - self.assertEqual(len(substitutions), 0) - self.assertEqual(len(vacancies), 0) - - # def test_optimized_nanocluster(self): - # """Test the periodicity finder on a DFT-optimized nanocluster. This - # test does not yet pass because the full cluster is not detected - # correctly. - # """ - # system = ase.io.read("./data/cu55.xyz") - # system.set_cell([20, 20, 20]) - # system.set_pbc(True) - # system.center() - - # # Get the index of the atom that is closest to center of mass - # cm = system.get_center_of_mass() - # pos = system.get_positions() - # seed_index = np.argmin(np.linalg.norm(pos-cm, axis=1)) - # view(system) - - # # Find the region with periodicity - # finder = PeriodicFinder() - # region = finder.get_region(system, seed_index, 4, 2.75) - # # print(region) - - # rec = region.recreate_valid() - # view(rec) - # # view(rec.unit_cell) - - # # No defects or unknown atoms - # adsorbates = region.get_adsorbates() - # interstitials = region.get_interstitials() - # substitutions = region.get_substitutions() - # vacancies = region.get_vacancies() - # unknowns = region.get_unknowns() - # self.assertEqual(len(interstitials), 0) - # self.assertEqual(len(substitutions), 0) - # self.assertEqual(len(vacancies), 0) - # self.assertEqual(len(adsorbates), 0) - # self.assertEqual(len(unknowns), 0) + self.assertEqual(set(range(len(system))), set(region.get_basis_indices())) class DelaunayTests(unittest.TestCase): @@ -476,7 +427,6 @@ class DelaunayTests(unittest.TestCase): def test_surface(self): system = bcc100("Fe", size=(5, 5, 3), vacuum=8) - # view(system) decomposition = matid.geometry.get_tetrahedra_decomposition( system, DelaunayTests.delaunay_threshold ) @@ -502,7 +452,6 @@ def test_2d(self): formula="MoS2", kind="2H", a=3.18, thickness=3.19, size=(2, 2, 1), vacuum=8 ) system.set_pbc(True) - # view(system) decomposition = matid.geometry.get_tetrahedra_decomposition( system, DelaunayTests.delaunay_threshold @@ -514,13 +463,13 @@ def test_2d(self): test_pos = np.array([2, 2, 10.5]) self.assertNotEqual(decomposition.find_simplex(test_pos), None) - # # Atoms at the edges should belong to the surface + # Atoms at the edges should belong to the surface test_pos = np.array([0, 4, 10]) self.assertNotEqual(decomposition.find_simplex(test_pos), None) test_pos = np.array([5, 1, 10]) self.assertNotEqual(decomposition.find_simplex(test_pos), None) - # # Atoms outside + # Atoms outside test_pos = np.array([2, 2, 11.2]) self.assertEqual(decomposition.find_simplex(test_pos), None) test_pos = np.array([0, 0, 7.9]) @@ -594,7 +543,6 @@ def test_unknown_molecule(self): system.set_cell([[gap, 0, 0], [0, gap, 0], [0, 0, gap]]) system.set_pbc([True, True, True]) system.center() - # view(system) classifier = Classifier() clas = classifier.classify(system) self.assertIsInstance(clas, Class0D) @@ -686,7 +634,6 @@ def test_molecule_network(self): system = ase.io.read( "./data/R6JuJXj20goPQ0vv6aAVYpNyuwGgN+P_PaYo5EiiPChgUe9B6JnTX6BcOwt.xyz" ) - # view(system) classifier = Classifier(max_cell_size=20, max_2d_cell_height=20) classification = classifier.classify(system) @@ -701,7 +648,6 @@ def test_small_cell_defect(self): system = Material2DTests.graphene.repeat([3, 3, 1]) del system[8] system.set_pbc([True, True, False]) - # view(system) classifier = Classifier(max_cell_size=20) classification = classifier.classify(system) @@ -717,7 +663,6 @@ def test_small_cell_adsorption(self): system.set_pbc([True, True, True]) adsorbate = ase.Atom(position=[2, 2, 11], symbol="H") system += adsorbate - # view(system) classifier = Classifier(max_cell_size=20) classification = classifier.classify(system) @@ -733,23 +678,15 @@ def test_small_2d_cell_vacuum_direction_included(self): system = ase.io.read( "./data/RJv-r5Vwf6ypWElBSq_hTCOaxEU89+PgZTqAjcn_4hHS3fozZkAI0Jxtdas.xyz" ) - # view(system) classifier = Classifier() classification = classifier.classify(system) self.assertIsInstance(classification, Material2D) # No defects or unknown atoms - adsorbates = classification.adsorbates - interstitials = classification.interstitials - substitutions = classification.substitutions - vacancies = classification.vacancies - unknowns = classification.unknowns - self.assertEqual(len(interstitials), 0) - self.assertEqual(len(substitutions), 0) - self.assertEqual(len(vacancies), 0) - self.assertEqual(len(adsorbates), 0) - self.assertEqual(len(unknowns), 0) + self.assertEqual( + set(range(len(system))), set(classification.region.get_basis_indices()) + ) def test_vacuum_in_2d_unit_cell(self): """Structure where a 2D unit cell is found, but it has a vacuum gap. @@ -775,23 +712,15 @@ def test_graphene_sheets_close(self): system.set_cell(old_cell) system.center() system.set_pbc([True, True, True]) - # view(system) classifier = Classifier(max_cell_size=12) classification = classifier.classify(system) self.assertEqual(type(classification), Material2D) # No defects or unknown atoms - adsorbates = classification.adsorbates - interstitials = classification.interstitials - substitutions = classification.substitutions - vacancies = classification.vacancies - unknowns = classification.unknowns - self.assertEqual(len(interstitials), 0) - self.assertEqual(len(substitutions), 0) - self.assertEqual(len(vacancies), 0) - self.assertEqual(len(adsorbates), 0) - self.assertEqual(len(unknowns), 0) + self.assertEqual( + set(range(len(system))), set(classification.region.get_basis_indices()) + ) def test_too_big_single_cell(self): """Test that with when only the simulation cell itself is the found @@ -804,8 +733,6 @@ def test_too_big_single_cell(self): rng = RandomState(8) matid.geometry.make_random_displacement(system, 2, rng) - # view(system) - classifier = Classifier() classification = classifier.classify(system) self.assertIsInstance(classification, Class2D) @@ -836,37 +763,22 @@ def test_layered_2d(self): classification = classifier.classify(system) self.assertEqual(type(classification), Material2D) - # Boron nitrate adsorbate - adsorbates = classification.adsorbates - interstitials = classification.interstitials - substitutions = classification.substitutions - vacancies = classification.vacancies - unknowns = classification.unknowns - self.assertEqual(len(interstitials), 0) - self.assertEqual(len(substitutions), 0) - self.assertEqual(len(vacancies), 0) - self.assertEqual(len(adsorbates), 4) - self.assertEqual(len(unknowns), 0) - self.assertEqual(set(adsorbates), set([8, 9, 10, 11])) + # Adsorbate not part of the region + self.assertEqual( + set(range(len(system))).difference(set([8, 9, 10, 11])), + set(classification.region.get_basis_indices()), + ) def test_graphene_primitive(self): system = Material2DTests.graphene - # view(system) classifier = Classifier() classification = classifier.classify(system) self.assertIsInstance(classification, Material2D) # No defects or unknown atoms - adsorbates = classification.adsorbates - interstitials = classification.interstitials - substitutions = classification.substitutions - vacancies = classification.vacancies - unknowns = classification.unknowns - self.assertEqual(len(interstitials), 0) - self.assertEqual(len(substitutions), 0) - self.assertEqual(len(vacancies), 0) - self.assertEqual(len(adsorbates), 0) - self.assertEqual(len(unknowns), 0) + self.assertEqual( + set(range(len(system))), set(classification.region.get_basis_indices()) + ) def test_graphene_rectangular(self): system = Atoms( @@ -885,24 +797,14 @@ def test_graphene_rectangular(self): pbc=True, ) system = system.repeat([2, 1, 2]) - # view(system) classifier = Classifier() classification = classifier.classify(system) self.assertIsInstance(classification, Material2D) # Pristine - basis = classification.basis_indices - adsorbates = classification.adsorbates - interstitials = classification.interstitials - substitutions = classification.substitutions - vacancies = classification.vacancies - unknowns = classification.unknowns - self.assertEqual(len(interstitials), 0) - self.assertEqual(len(substitutions), 0) - self.assertEqual(len(unknowns), 0) - self.assertEqual(len(vacancies), 0) - self.assertEqual(len(adsorbates), 0) - self.assertEqual(set(basis), set(range(len(system)))) + self.assertEqual( + set(range(len(system))), set(classification.region.get_basis_indices()) + ) def test_2d_z_smaller_than_rmax(self): """Test that 2D systems that have an interlayer spacing smaller than @@ -931,7 +833,6 @@ def test_2d_z_smaller_than_rmax(self): ) system.center() system = system.repeat([2, 1, 2]) - # view(system) classifier = Classifier(max_cell_size=r_max) classification = classifier.classify(system) @@ -944,16 +845,9 @@ def test_graphene_supercell(self): self.assertIsInstance(classification, Material2D) # No defects or unknown atoms - adsorbates = classification.adsorbates - interstitials = classification.interstitials - substitutions = classification.substitutions - vacancies = classification.vacancies - unknowns = classification.unknowns - self.assertEqual(len(interstitials), 0) - self.assertEqual(len(substitutions), 0) - self.assertEqual(len(vacancies), 0) - self.assertEqual(len(adsorbates), 0) - self.assertEqual(len(unknowns), 0) + self.assertEqual( + set(range(len(system))), set(classification.region.get_basis_indices()) + ) def test_graphene_partial_pbc(self): system = Material2DTests.graphene.copy() @@ -963,70 +857,38 @@ def test_graphene_partial_pbc(self): self.assertIsInstance(classification, Material2D) # No defects or unknown atoms - adsorbates = classification.adsorbates - interstitials = classification.interstitials - substitutions = classification.substitutions - vacancies = classification.vacancies - unknowns = classification.unknowns - self.assertEqual(len(interstitials), 0) - self.assertEqual(len(substitutions), 0) - self.assertEqual(len(vacancies), 0) - self.assertEqual(len(adsorbates), 0) - self.assertEqual(len(unknowns), 0) + self.assertEqual( + set(range(len(system))), set(classification.region.get_basis_indices()) + ) def test_graphene_missing_atom(self): """Test graphene with a vacancy defect.""" system = Material2DTests.graphene.repeat([5, 5, 1]) del system[24] - # view(system) system.set_pbc([True, True, False]) classifier = Classifier() classification = classifier.classify(system) self.assertIsInstance(classification, Material2D) - # One vacancy - adsorbates = classification.adsorbates - interstitials = classification.interstitials - substitutions = classification.substitutions - vacancies = classification.vacancies - unknowns = classification.unknowns - self.assertEqual(len(interstitials), 0) - self.assertEqual(len(substitutions), 0) - self.assertEqual(len(vacancies), 1) - self.assertEqual(len(adsorbates), 0) - self.assertEqual(len(unknowns), 0) + # No defects or unknown atoms + self.assertEqual( + set(range(len(system))), set(classification.region.get_basis_indices()) + ) def test_graphene_substitution(self): """Test graphene with a substitution defect.""" system = Material2DTests.graphene.repeat([5, 5, 1]) system[0].number = 7 - # view(system) system.set_pbc([True, True, False]) classifier = Classifier() classification = classifier.classify(system) self.assertIsInstance(classification, Material2D) # One substitution - adsorbates = classification.adsorbates - interstitials = classification.interstitials - substitutions = classification.substitutions - vacancies = classification.vacancies - unknowns = classification.unknowns - - self.assertEqual(len(interstitials), 0) - self.assertEqual(len(substitutions), 1) - self.assertEqual(len(vacancies), 0) - self.assertEqual(len(adsorbates), 0) - self.assertEqual(len(unknowns), 0) - - # Check substitution info - subst = substitutions[0] - index = subst.index - orig_num = subst.original_element - subst_num = subst.substitutional_element - self.assertEqual(index, 0) - self.assertEqual(orig_num, 6) - self.assertEqual(subst_num, 7) + self.assertEqual( + set(range(len(system))).difference(set([0])), + set(classification.region.get_basis_indices()), + ) def test_graphene_missing_atom_exciting(self): """Test a more realistic graphene with a vacancy defect from the @@ -1151,31 +1013,15 @@ def test_graphene_missing_atom_exciting(self): cell=1e10 * cell, pbc=pbc, ) - # view(system) classifier = Classifier() classification = classifier.classify(system) self.assertIsInstance(classification, Material2D) - # view(classification.region.recreate_valid()) - - # One vacancy - adsorbates = classification.adsorbates - interstitials = classification.interstitials - substitutions = classification.substitutions - vacancies = classification.vacancies - unknowns = classification.unknowns - self.assertEqual(len(interstitials), 0) - self.assertEqual(len(substitutions), 0) - self.assertEqual(len(vacancies), 1) - self.assertEqual(len(adsorbates), 0) - self.assertEqual(len(unknowns), 0) - - # Check vacancy position - vac_atom = vacancies[0] - vac_symbol = vac_atom.symbol - vac_pos = vac_atom.position - self.assertEqual(vac_symbol, "C") - self.assertTrue(np.allclose(vac_pos, [0.7123, 11.0639, 0], atol=0.05)) + + # No defects or unknown atoms + self.assertEqual( + set(range(len(system))), set(classification.region.get_basis_indices()) + ) def test_graphene_shaken(self): """Test graphene that has randomly oriented but uniform length @@ -1191,19 +1037,9 @@ def test_graphene_shaken(self): self.assertIsInstance(classification, Material2D) # Pristine - adsorbates = classification.adsorbates - interstitials = classification.interstitials - substitutions = classification.substitutions - vacancies = classification.vacancies - unknowns = classification.unknowns - if len(vacancies) != 0: - view(system) - view(classification.region.cell) - self.assertEqual(len(interstitials), 0) - self.assertEqual(len(substitutions), 0) - self.assertEqual(len(vacancies), 0) - self.assertEqual(len(adsorbates), 0) - self.assertEqual(len(unknowns), 0) + self.assertEqual( + set(range(len(system))), set(classification.region.get_basis_indices()) + ) def test_chemisorption(self): """Test the adsorption where there is sufficient distance between the @@ -1213,25 +1049,16 @@ def test_chemisorption(self): system = ase.io.read( "./data/RloVGNkMhI83gtwzF5DmftT6fM31d+PKxGoPkNrvdpZrlLS-V14MszJ-57L.xyz" ) - # view(system) classifier = Classifier() classification = classifier.classify(system) self.assertIsInstance(classification, Material2D) - # No defects or unknown atoms, one adsorbate cluster - adsorbates = classification.adsorbates - interstitials = classification.interstitials - substitutions = classification.substitutions - vacancies = classification.vacancies - unknowns = classification.unknowns - - self.assertEqual(len(interstitials), 0) - self.assertEqual(len(substitutions), 0) - self.assertEqual(len(vacancies), 0) - self.assertEqual(len(unknowns), 0) - self.assertEqual(len(adsorbates), 24) - self.assertTrue(np.array_equal(adsorbates, np.arange(50, 74))) + # Adsorbates + self.assertEqual( + set(range(len(system))).difference(set(np.arange(50, 74))), + set(classification.region.get_basis_indices()), + ) def test_curved_2d(self): """Curved 2D-material""" @@ -1254,46 +1081,30 @@ def test_mos2_pristine_supercell(self): formula="MoS2", kind="2H", a=3.18, thickness=3.19, size=(5, 5, 1), vacuum=8 ) system.set_pbc(True) - # view(system) classifier = Classifier() classification = classifier.classify(system) self.assertIsInstance(classification, Material2D) # Pristine - adsorbates = classification.adsorbates - interstitials = classification.interstitials - substitutions = classification.substitutions - vacancies = classification.vacancies - unknowns = classification.unknowns - self.assertEqual(len(interstitials), 0) - self.assertEqual(len(substitutions), 0) - self.assertEqual(len(vacancies), 0) - self.assertEqual(len(adsorbates), 0) - self.assertEqual(len(unknowns), 0) + self.assertEqual( + set(range(len(system))), set(classification.region.get_basis_indices()) + ) def test_mos2_pristine_primitive(self): system = ase.build.mx2( formula="MoS2", kind="2H", a=3.18, thickness=3.19, size=(1, 1, 1), vacuum=8 ) system.set_pbc(True) - # view(system) classifier = Classifier() classification = classifier.classify(system) self.assertIsInstance(classification, Material2D) # Pristine - adsorbates = classification.adsorbates - interstitials = classification.interstitials - substitutions = classification.substitutions - vacancies = classification.vacancies - unknowns = classification.unknowns - self.assertEqual(len(interstitials), 0) - self.assertEqual(len(substitutions), 0) - self.assertEqual(len(vacancies), 0) - self.assertEqual(len(adsorbates), 0) - self.assertEqual(len(unknowns), 0) + self.assertEqual( + set(range(len(system))), set(classification.region.get_basis_indices()) + ) def test_mos2_substitution(self): system = ase.build.mx2( @@ -1305,23 +1116,15 @@ def test_mos2_substitution(self): symbols[25] = 6 system.set_atomic_numbers(symbols) - # view(system) - classifier = Classifier() classification = classifier.classify(system) self.assertIsInstance(classification, Material2D) # One substitution - adsorbates = classification.adsorbates - interstitials = classification.interstitials - substitutions = classification.substitutions - vacancies = classification.vacancies - unknowns = classification.unknowns - self.assertEqual(len(interstitials), 0) - self.assertEqual(len(vacancies), 0) - self.assertEqual(len(adsorbates), 0) - self.assertEqual(len(unknowns), 0) - self.assertEqual(len(substitutions), 1) + self.assertEqual( + set(range(len(system))).difference(set([25])), + set(classification.region.get_basis_indices()), + ) def test_mos2_vacancy(self): system = ase.build.mx2( @@ -1330,23 +1133,15 @@ def test_mos2_vacancy(self): system.set_pbc(True) del system[25] - # view(system) classifier = Classifier() classification = classifier.classify(system) self.assertIsInstance(classification, Material2D) - # One vacancy - adsorbates = classification.adsorbates - interstitials = classification.interstitials - substitutions = classification.substitutions - vacancies = classification.vacancies - unknowns = classification.unknowns - self.assertEqual(len(interstitials), 0) - self.assertEqual(len(adsorbates), 0) - self.assertEqual(len(substitutions), 0) - self.assertEqual(len(unknowns), 0) - self.assertEqual(len(vacancies), 1) + # Pristine + self.assertEqual( + set(range(len(system))), set(classification.region.get_basis_indices()) + ) def test_mos2_adsorption(self): """Test adsorption on mos2 surface.""" @@ -1359,24 +1154,15 @@ def test_mos2_adsorption(self): ads.translate([4.9, 5.5, 13]) system += ads - # view(system) - classifier = Classifier() classification = classifier.classify(system) self.assertIsInstance(classification, Material2D) - # One adsorbate - adsorbates = classification.adsorbates - interstitials = classification.interstitials - substitutions = classification.substitutions - vacancies = classification.vacancies - unknowns = classification.unknowns - self.assertEqual(len(interstitials), 0) - self.assertEqual(len(substitutions), 0) - self.assertEqual(len(unknowns), 0) - self.assertEqual(len(vacancies), 0) - self.assertEqual(len(adsorbates), 12) - self.assertTrue(np.array_equal(adsorbates, range(75, 87))) + # Adsorbates + self.assertEqual( + set(range(len(system))).difference(set(range(75, 87))), + set(classification.region.get_basis_indices()), + ) def test_2d_split(self): """A simple 2D system where the system has been split by the cell @@ -1393,24 +1179,14 @@ def test_2d_split(self): ), pbc=True, ) - # view(system) classifier = Classifier() classification = classifier.classify(system) self.assertIsInstance(classification, Material2D) # Pristine - basis = classification.basis_indices - adsorbates = classification.adsorbates - interstitials = classification.interstitials - substitutions = classification.substitutions - vacancies = classification.vacancies - unknowns = classification.unknowns - self.assertEqual(len(interstitials), 0) - self.assertEqual(len(substitutions), 0) - self.assertEqual(len(unknowns), 0) - self.assertEqual(len(vacancies), 0) - self.assertEqual(len(adsorbates), 0) - self.assertEqual(set(basis), set(range(len(system)))) + self.assertEqual( + set(range(len(system))), set(classification.region.get_basis_indices()) + ) def test_boron_nitride(self): system = Atoms( @@ -1427,24 +1203,14 @@ def test_boron_nitride(self): ), pbc=True, ) - # view(system) classifier = Classifier() classification = classifier.classify(system) self.assertIsInstance(classification, Material2D) # Pristine - basis = classification.basis_indices - adsorbates = classification.adsorbates - interstitials = classification.interstitials - substitutions = classification.substitutions - vacancies = classification.vacancies - unknowns = classification.unknowns - self.assertEqual(len(interstitials), 0) - self.assertEqual(len(substitutions), 0) - self.assertEqual(len(unknowns), 0) - self.assertEqual(len(vacancies), 0) - self.assertEqual(len(adsorbates), 0) - self.assertEqual(set(basis), set(range(len(system)))) + self.assertEqual( + set(range(len(system))), set(classification.region.get_basis_indices()) + ) def test_fluorographene(self): system = Atoms( @@ -1476,16 +1242,9 @@ def test_fluorographene(self): self.assertIsInstance(classification, Material2D) # Pristine - adsorbates = classification.adsorbates - interstitials = classification.interstitials - substitutions = classification.substitutions - vacancies = classification.vacancies - unknowns = classification.unknowns - self.assertEqual(len(interstitials), 0) - self.assertEqual(len(substitutions), 0) - self.assertEqual(len(vacancies), 0) - self.assertEqual(len(adsorbates), 0) - self.assertEqual(len(unknowns), 0) + self.assertEqual( + set(range(len(system))), set(classification.region.get_basis_indices()) + ) class Class3DTests(unittest.TestCase): @@ -1572,20 +1331,6 @@ def test_too_sparse(self): class SurfaceTests(unittest.TestCase): """Tests for detecting and analyzing surfaces.""" - # This test case is disabled due to skipping the \omega_c criterion in v2.0.0. - # def test_adsorbate_pattern(self): - # """Here the adsorbate will easily get included in the basis if the - # values for \omega_v and \omega_c are not suitable. - # """ - # system = ase.io.read( - # "./structures/RmlNIfj-YIQ14UBYjtAHtXcAEXZif+PIkKcrxeOf997qnQ_hWRXLdMsmpAf.xyz" - # ) - # # view(system) - # classifier = Classifier() - # classification = classifier.classify(system) - # self.assertEqual(type(classification), Surface) - # self.assertTrue(np.array_equal(classification.outliers, [24, 25, 26])) - def test_not_enough_repetitions(self): """In this system there is not enough repetitions of the cell in a third direction. One can with visual inspection guess the cell, but the @@ -1594,7 +1339,6 @@ def test_not_enough_repetitions(self): system = ase.io.read( "./data/Rhn-EWQQN8Z-lbmZwoWPyrGiM9Isx+PbYDgCBSwbq3nxONqWaq03HYUn8_V.xyz" ) - # view(system) classifier = Classifier() classification = classifier.classify(system) self.assertEqual(type(classification), Class2D) @@ -1607,21 +1351,17 @@ def test_incorrect_cell(self): system = ase.io.read( "./data/Rq0LUBXa6rZ-mddbQUZJXOIVAIg-J+Pm73-Kx5CWtuIHzLTr5R-Nir2el0i.xyz" ) - # view(system) classifier = Classifier() classification = classifier.classify(system) self.assertEqual(type(classification), Surface) - # print(classification.outliers) def test_thin_surface(self): """A realistic surface with only two layers.""" system = ase.io.read( "./data/RmlNIfj-YIQ14UBYjtAHtXcAEXZif+PYu3zrqdlNhhs9tII2lnvJ3Gj7tai.xyz" ) - # view(system) classifier = Classifier() classification = classifier.classify(system) - # cell = classification.region.cell self.assertEqual(type(classification), Surface) self.assertTrue(np.array_equal(classification.outliers, [24, 25, 26])) @@ -1632,7 +1372,6 @@ def test_mo_incorrect_3(self): system = ase.io.read( "./data/RmlNIfj-YIQ14UBYjtAHtXcAEXZif+PmZsb-Uf3AIGQyTBZDg4ZgxXaq5UB.xyz" ) - # view(system) classifier = Classifier() classification = classifier.classify(system) self.assertEqual(type(classification), Surface) @@ -1662,7 +1401,6 @@ def test_2d_motif_in_surface_hard(self): old_cell[2, :] = np.array([0, 0, 4]) system.set_cell(old_cell) system.center() - # view(system) # Should be classified as Class2D because the 2D motif that is detected # is not really periodic in the found basis vector directions (does not @@ -1706,89 +1444,12 @@ def test_2d_motif_in_surface_easy(self): old_cell[2, :] = np.array([0, 0, 8]) system.set_cell(old_cell) system.center() - # view(system) # Should be classified as Class2D because the coverage is too small classifier = Classifier(max_cell_size=4) classification = classifier.classify(system) self.assertEqual(type(classification), Class2D) - def test_surface_difficult_basis_atoms(self): - """This is a surface where the atoms on top of the surface will get - easily classified as adsorbates if the chemical environment detection - is not tuned correctly. - """ - system = ase.io.read( - "./data/RzQh5XijWuXsNZiRSxeOlPFUY_9Gl+PY5NRLMRYyQXsYmBN9hMcT-FftquP.xyz" - ) - # view(system) - - # With a little higher chemical similarity threshold the whole surface - # is not detected - classifier = Classifier(chem_similarity_threshold=0.45) - classification = classifier.classify(system) - self.assertIsInstance(classification, Surface) - - # Has outliers with these settings - outliers = classification.outliers - self.assertTrue(len(outliers) != 0) - - # With a little lower chemical similarity threshold the whole surface - # is again detected - classifier = Classifier(chem_similarity_threshold=0.40) - classification = classifier.classify(system) - self.assertIsInstance(classification, Surface) - - # Has no outliers with these settings - outliers = classification.outliers - self.assertTrue(len(outliers) == 0) - - # def test_surface_with_one_cell_but_periodic_backbone(self): - # """This is a surface that ultimately has only one repetition of the - # underlying unit cell in the simulation cell. Normally it would not get - # classified, but because it has a periodic backbone of Barium atoms, - # they are identified as the unit cell and everything inside is - # identified as outliers. Such systems still pose a challenge to the - # algorithm. - # """ - # system = ase.io.read("./data/Rhn-EWQQN8Z-lbmZwoWPyrGiM9Isx+PbYDgCBSwbq3nxONqWaq03HYUn8_V.xyz") - # view(system) - - # classifier = Classifier() - # classification = classifier.classify(system) - # self.assertIsInstance(classification, Surface) - - # # No outliers - # outliers = classification.outliers - # self.assertEqual(len(outliers), 0) - - def test_adsorbate_detection_via_neighbourhood(self): - """Test that adsorbates that are in a basis atom position, but do not - exhibit the correct chemical neighbourhood are identified. - """ - system = ase.io.read( - "./data/ROHGEranIWm-gnS6jhQaLZRORWDKx+Pbsl6Hlb_C1aXadFiJ58UCUek5a8x.xyz" - ) - # view(system) - - classifier = Classifier() - classification = classifier.classify(system) - self.assertIsInstance(classification, Surface) - - # Only adsorbates - adsorbates = classification.adsorbates - interstitials = classification.interstitials - substitutions = classification.substitutions - vacancies = classification.vacancies - unknowns = classification.unknowns - - self.assertEqual(len(interstitials), 0) - self.assertEqual(len(substitutions), 0) - self.assertEqual(len(vacancies), 0) - self.assertEqual(len(adsorbates), 18) - self.assertEqual(len(unknowns), 0) - self.assertTrue(np.array_equal(adsorbates, np.arange(0, 18))) - def test_surface_wrong_cm(self): """Test that the seed atom is correctly chosen near the center of mass even if the structure is cut. @@ -1799,25 +1460,16 @@ def test_surface_wrong_cm(self): system.set_pbc([True, True, True]) system.translate([0, 0, 10]) system.wrap() - # view(system) classifier = Classifier() classification = classifier.classify(system) - # view(classification.region.recreate_valid()) - # view(classification.region.cell) self.assertIsInstance(classification, Surface) # One adsorbate - adsorbates = classification.adsorbates - interstitials = classification.interstitials - substitutions = classification.substitutions - vacancies = classification.vacancies - unknowns = classification.unknowns - self.assertEqual(len(interstitials), 0) - self.assertEqual(len(substitutions), 0) - self.assertEqual(len(vacancies), 0) - self.assertEqual(len(adsorbates), 1) - self.assertEqual(len(unknowns), 0) + self.assertEqual( + set(range(len(system))).difference(set([36])), + set(classification.region.get_basis_indices()), + ) def test_search_beyond_limits(self): """In this system the found unit cell cannot be used to seach the whole @@ -1832,17 +1484,11 @@ def test_search_beyond_limits(self): classification = classifier.classify(system) self.assertIsInstance(classification, Surface) - # Only adsorbates - adsorbates = classification.adsorbates - interstitials = classification.interstitials - substitutions = classification.substitutions - vacancies = classification.vacancies - unknowns = classification.unknowns - self.assertEqual(len(vacancies), 0) - self.assertEqual(len(substitutions), 0) - self.assertEqual(len(adsorbates), 14) - self.assertEqual(len(unknowns), 0) - self.assertEqual(len(interstitials), 0) + # Adsorbates + self.assertEqual( + set(range(len(system))).difference(set(range(0, 14))), + set(classification.region.get_basis_indices()), + ) def test_ordered_adsorbates(self): """Test surface where on top there are adsorbates with high @@ -1852,50 +1498,31 @@ def test_ordered_adsorbates(self): system = ase.io.read( "./data/RDtJ5cTyLBPt4PA182VbCzoCxf5Js+P8Wnwz4dfyea6UAD0WEBadXv83wyf.xyz" ) - # view(system) classifier = Classifier() classification = classifier.classify(system) self.assertIsInstance(classification, Surface) - # Only adsorbates - adsorbates = classification.adsorbates - interstitials = classification.interstitials - substitutions = classification.substitutions - vacancies = classification.vacancies - unknowns = classification.unknowns - - self.assertEqual(len(interstitials), 0) - self.assertEqual(len(substitutions), 0) - self.assertEqual(len(vacancies), 0) - self.assertEqual(len(adsorbates), 13) - self.assertEqual(len(unknowns), 0) - self.assertTrue(np.array_equal(adsorbates, np.arange(0, 13))) + # Adsorbates + self.assertEqual( + set(range(len(system))).difference(set(range(0, 13))), + set(classification.region.get_basis_indices()), + ) def test_surface_with_one_basis_vector_as_span(self): system = ase.io.read( "./data/RDtJ5cTyLBPt4PA182VbCzoCxf5Js+PFw_-OtcPJ5og8XMItaAAFYhQUaY6.xyz" ) - # view(system) classifier = Classifier() classification = classifier.classify(system) self.assertIsInstance(classification, Surface) - # view(classification.region.recreate_valid()) - - # Only adsorbates - adsorbates = classification.adsorbates - interstitials = classification.interstitials - substitutions = classification.substitutions - vacancies = classification.vacancies - unknowns = classification.unknowns - self.assertEqual(len(interstitials), 0) - self.assertEqual(len(substitutions), 0) - self.assertEqual(len(vacancies), 0) - self.assertEqual(len(adsorbates), 6) - self.assertEqual(len(unknowns), 0) - self.assertTrue(np.array_equal(adsorbates, np.arange(0, 6))) + # Adsorbates + self.assertEqual( + set(range(len(system))).difference(set(range(0, 6))), + set(classification.region.get_basis_indices()), + ) def test_cut_surface(self): """Test a surface that has been cut by the cell boundary. Should still @@ -1904,24 +1531,16 @@ def test_cut_surface(self): system = ase.io.read( "./data/RscdVKibS4pD0O_Yo1CSwkznfiL1c+PCvflj-qTkfRcUaCISfn8fm-2oaVW.xyz" ) - # view(system) classifier = Classifier() classification = classifier.classify(system) self.assertIsInstance(classification, Surface) # Pristine - adsorbates = classification.adsorbates - interstitials = classification.interstitials - substitutions = classification.substitutions - vacancies = classification.vacancies - unknowns = classification.unknowns - - self.assertEqual(len(interstitials), 0) - self.assertEqual(len(substitutions), 0) - self.assertEqual(len(vacancies), 0) - self.assertEqual(len(adsorbates), 0) - self.assertEqual(len(unknowns), 0) + self.assertEqual( + set(range(len(system))), + set(classification.region.get_basis_indices()), + ) def test_zinc_blende(self): system = Zincblende(symbol=["Au", "Fe"], latticeconstant=5) @@ -1930,7 +1549,6 @@ def test_zinc_blende(self): cell[2, :] *= 3 system.set_cell(cell) system.center() - # view(system) classifier = Classifier() classification = classifier.classify(system) @@ -1942,17 +1560,11 @@ def test_zinc_blende(self): space_group = analyzer.get_space_group_number() self.assertEqual(space_group, 216) - # No defects or unknown atoms - adsorbates = classification.adsorbates - interstitials = classification.interstitials - substitutions = classification.substitutions - vacancies = classification.vacancies - unknowns = classification.unknowns - self.assertEqual(len(interstitials), 0) - self.assertEqual(len(substitutions), 0) - self.assertEqual(len(vacancies), 0) - self.assertEqual(len(adsorbates), 0) - self.assertEqual(len(unknowns), 0) + # Pristine + self.assertEqual( + set(range(len(system))), + set(classification.region.get_basis_indices()), + ) def test_thin_complex_surface(self): """Test for a complex thin surface with adsorbate. This surface has @@ -1961,63 +1573,40 @@ def test_thin_complex_surface(self): system = ase.io.read( "./data/RmlNIfj-YIQ14UBYjtAHtXcAEXZif+Pkl2CiGU9KP0uluTY8M3PeGEb4OS_.xyz" ) - # view(system) classifier = Classifier(pos_tol=0.75) classification = classifier.classify(system) self.assertEqual(type(classification), Surface) - # CO2 adsorbate - adsorbates = classification.adsorbates - interstitials = classification.interstitials - substitutions = classification.substitutions - vacancies = classification.vacancies - unknowns = classification.unknowns - - self.assertEqual(len(interstitials), 0) - self.assertEqual(len(substitutions), 0) - self.assertEqual(len(vacancies), 0) - self.assertEqual(len(unknowns), 0) - self.assertEqual(len(adsorbates), 3) - self.assertTrue(np.array_equal(adsorbates, np.array([24, 25, 26]))) + # Adsorbates + self.assertEqual( + set(range(len(system))).difference(set([24, 25, 26])), + set(classification.region.get_basis_indices()), + ) def test_bcc_pristine_small_surface(self): system = bcc100("Fe", size=(1, 1, 3), vacuum=8) - # view(system) classifier = Classifier() classification = classifier.classify(system) self.assertIsInstance(classification, Surface) - # No defects or unknown atoms - adsorbates = classification.adsorbates - interstitials = classification.interstitials - substitutions = classification.substitutions - vacancies = classification.vacancies - unknowns = classification.unknowns - self.assertEqual(len(interstitials), 0) - self.assertEqual(len(substitutions), 0) - self.assertEqual(len(vacancies), 0) - self.assertEqual(len(adsorbates), 0) - self.assertEqual(len(unknowns), 0) + # Pristine + self.assertEqual( + set(range(len(system))), + set(classification.region.get_basis_indices()), + ) def test_bcc_pristine_big_surface(self): system = bcc100("Fe", size=(5, 5, 3), vacuum=8) - # view(system) classifier = Classifier() classification = classifier.classify(system) self.assertIsInstance(classification, Surface) - # No defects or unknown atoms - adsorbates = classification.adsorbates - interstitials = classification.interstitials - substitutions = classification.substitutions - vacancies = classification.vacancies - unknowns = classification.unknowns - self.assertEqual(len(interstitials), 0) - self.assertEqual(len(substitutions), 0) - self.assertEqual(len(vacancies), 0) - self.assertEqual(len(adsorbates), 0) - self.assertEqual(len(unknowns), 0) + # Pristine + self.assertEqual( + set(range(len(system))), + set(classification.region.get_basis_indices()), + ) def test_bcc_substitution(self): """Surface with substitutional point defect.""" @@ -2027,61 +1616,34 @@ def test_bcc_substitution(self): sub_element = 20 labels[sub_index] = sub_element system.set_atomic_numbers(labels) - # view(system) # Classified as surface classifier = Classifier() classification = classifier.classify(system) self.assertIsInstance(classification, Surface) - # One substitutional defect - adsorbates = classification.adsorbates - interstitials = classification.interstitials - substitutions = classification.substitutions - vacancies = classification.vacancies - unknowns = classification.unknowns - self.assertEqual(len(interstitials), 0) - self.assertEqual(len(vacancies), 0) - self.assertEqual(len(adsorbates), 0) - self.assertEqual(len(unknowns), 0) - self.assertEqual(len(substitutions), 1) - subst = substitutions[0] - self.assertEqual(subst.index, sub_index) - self.assertEqual(subst.original_element, 26) - self.assertEqual(subst.substitutional_element, sub_element) + # One defect + self.assertEqual( + set(range(len(system))).difference(set([42])), + set(classification.region.get_basis_indices()), + ) def test_bcc_vacancy(self): """Surface with vacancy point defect.""" system = bcc100("Fe", size=(5, 5, 3), vacuum=8) vac_index = 42 - - # Get the vacancy atom - vac_true = ase.Atom( - system[vac_index].symbol, - system[vac_index].position, - ) del system[vac_index] - # view(system) # Classified as surface classifier = Classifier() classification = classifier.classify(system) self.assertIsInstance(classification, Surface) - # One vacancy - adsorbates = classification.adsorbates - interstitials = classification.interstitials - substitutions = classification.substitutions - vacancies = classification.vacancies - unknowns = classification.unknowns - self.assertEqual(len(interstitials), 0) - self.assertEqual(len(adsorbates), 0) - self.assertEqual(len(unknowns), 0) - self.assertEqual(len(substitutions), 0) - self.assertEqual(len(vacancies), 1) - vac_found = vacancies[0] - self.assertTrue(np.allclose(vac_true.position, vac_found.position)) - self.assertEqual(vac_true.symbol, vac_found.symbol) + # Pristine + self.assertEqual( + set(range(len(system))), + set(classification.region.get_basis_indices()), + ) def test_bcc_interstitional(self): """Surface with interstitional atom.""" @@ -2094,56 +1656,36 @@ def test_bcc_interstitional(self): ) system += interstitional - # view(system) - # Classified as surface classifier = Classifier() classification = classifier.classify(system) self.assertIsInstance(classification, Surface) - # One interstitional - adsorbates = classification.adsorbates - interstitials = classification.interstitials - substitutions = classification.substitutions - vacancies = classification.vacancies - unknowns = classification.unknowns - self.assertEqual(len(vacancies), 0) - self.assertEqual(len(substitutions), 0) - self.assertEqual(len(adsorbates), 0) - self.assertEqual(len(unknowns), 0) - self.assertEqual(len(interstitials), 1) - int_found = interstitials[0] - self.assertEqual(int_found, 125) + # Pristine + self.assertEqual( + set(range(len(system))).difference(set([125])), + set(classification.region.get_basis_indices()), + ) def test_bcc_dislocated_big_surface(self): system = bcc100("Fe", size=(5, 5, 3), vacuum=8) # Run multiple times with random displacements rng = RandomState(47) - # for i in range(4): - # disloc = rng.rand(len(system), 3) for i in range(10): i_sys = system.copy() matid.geometry.make_random_displacement(system, 0.04, rng) - # view(system) # Classified as surface - # classifier = Classifier(pos_tol=0.75) classifier = Classifier() classification = classifier.classify(i_sys) self.assertIsInstance(classification, Surface) - # No defects or unknown atoms - adsorbates = classification.adsorbates - interstitials = classification.interstitials - substitutions = classification.substitutions - vacancies = classification.vacancies - unknowns = classification.unknowns - self.assertEqual(len(interstitials), 0) - self.assertEqual(len(substitutions), 0) - self.assertEqual(len(vacancies), 0) - self.assertEqual(len(adsorbates), 0) - self.assertEqual(len(unknowns), 0) + # Pristine + self.assertEqual( + set(range(len(system))), + set(classification.region.get_basis_indices()), + ) def test_curved_surface(self): # Create an Fe 100 surface as an ASE Atoms object @@ -2155,24 +1697,17 @@ def test_curved_surface(self): pos = atom.position distortion_z = 0.5 * np.sin(pos[0] / cell_width * 2.0 * np.pi) pos += np.array((0, 0, distortion_z)) - # view(system) # Classified as surface classifier = Classifier() classification = classifier.classify(system) self.assertIsInstance(classification, Surface) - # No defects or unknown atoms - adsorbates = classification.adsorbates - interstitials = classification.interstitials - substitutions = classification.substitutions - vacancies = classification.vacancies - unknowns = classification.unknowns - self.assertEqual(len(interstitials), 0) - self.assertEqual(len(substitutions), 0) - self.assertEqual(len(vacancies), 0) - self.assertEqual(len(adsorbates), 0) - self.assertEqual(len(unknowns), 0) + # Pristine + self.assertEqual( + set(range(len(system))), + set(classification.region.get_basis_indices()), + ) def test_surface_ads(self): """Test a surface with an adsorbate.""" @@ -2184,115 +1719,85 @@ def test_surface_ads(self): h2o.rotate(180, [1, 0, 0]) h2o.translate([7.2, 7.2, 13.5]) system += h2o - # view(system) classifier = Classifier() classification = classifier.classify(system) self.assertIsInstance(classification, Surface) - # No defects or unknown atoms, one adsorbate cluster - adsorbates = classification.adsorbates - interstitials = classification.interstitials - substitutions = classification.substitutions - vacancies = classification.vacancies - unknowns = classification.unknowns - - self.assertEqual(len(interstitials), 0) - self.assertEqual(len(substitutions), 0) - self.assertEqual(len(vacancies), 0) - self.assertEqual(len(unknowns), 0) - self.assertEqual(len(adsorbates), 3) - self.assertTrue(np.array_equal(adsorbates, np.array([100, 101, 102]))) - - def test_nacl(self): - """Test the detection for an imperfect NaCl surface with adsorbate and - defects. - """ - - # Create the system - class NaClFactory(SimpleCubicFactory): - "A factory for creating NaCl (B1, Rocksalt) lattices." - - bravais_basis = [ - [0, 0, 0], - [0, 0, 0.5], - [0, 0.5, 0], - [0, 0.5, 0.5], - [0.5, 0, 0], - [0.5, 0, 0.5], - [0.5, 0.5, 0], - [0.5, 0.5, 0.5], - ] - element_basis = (0, 1, 1, 0, 1, 0, 0, 1) + # Adsorbates + self.assertEqual( + set(range(len(system))).difference(set([100, 101, 102])), + set(classification.region.get_basis_indices()), + ) - nacl = NaClFactory() - nacl = nacl(symbol=["Na", "Cl"], latticeconstant=5.64) - nacl = nacl.repeat((4, 4, 2)) - cell = nacl.get_cell() - cell[2, :] *= 3 - nacl.set_cell(cell) - nacl.center() + def test_surface_difficult_basis_atoms(self): + """This is a surface where the atoms on top of the surface will get + easily classified as adsorbates if the chemical environment detection + is not tuned correctly. + """ + system = ase.io.read( + "./data/RzQh5XijWuXsNZiRSxeOlPFUY_9Gl+PY5NRLMRYyQXsYmBN9hMcT-FftquP.xyz" + ) + classifier = Classifier() + classification = classifier.classify(system) + self.assertIsInstance(classification, Surface) - # Add vacancy - vac_index = 17 - vac_true = ase.Atom( - nacl[vac_index].symbol, - nacl[vac_index].position, + # Pristine + self.assertEqual( + set(range(len(system))), + set(classification.region.get_basis_indices()), ) - del nacl[vac_index] - # Shake the atoms - # rng = RandomState(8) - # matid.geometry.make_random_displacement(nacl, 0.4, rng) + # def test_surface_with_one_cell_but_periodic_backbone(self): + # """This is a surface that ultimately has only one repetition of the + # underlying unit cell in the simulation cell. Normally it would not get + # classified, but because it has a periodic backbone of Barium atoms, + # they are identified as the unit cell and everything inside is + # identified as outliers. Such systems still pose a challenge to the + # algorithm. + # """ + # system = ase.io.read("./data/Rhn-EWQQN8Z-lbmZwoWPyrGiM9Isx+PbYDgCBSwbq3nxONqWaq03HYUn8_V.xyz") - # Add adsorbate - h2o = molecule("H2O") - h2o.rotate(-45, [0, 0, 1]) - h2o.translate([11.5, 11.5, 22.5]) - nacl += h2o + # classifier = Classifier() + # classification = classifier.classify(system) + # self.assertIsInstance(classification, Surface) - # Add substitution - symbols = nacl.get_atomic_numbers() - subst_num = 39 - subst_atomic_num = 19 - symbols[subst_num] = subst_atomic_num - nacl.set_atomic_numbers(symbols) + # # No outliers + # outliers = classification.outliers + # self.assertEqual(len(outliers), 0) - # view(nacl) + # This test case is disabled due to skipping the \omega_c criterion in v2. + # def test_adsorbate_pattern(self): + # """Here the adsorbate will easily get included in the basis if the + # values for \omega_v and \omega_c are not suitable. + # """ + # system = ase.io.read( + # "./structures/RmlNIfj-YIQ14UBYjtAHtXcAEXZif+PIkKcrxeOf997qnQ_hWRXLdMsmpAf.xyz" + # ) + # classifier = Classifier() + # classification = classifier.classify(system) + # self.assertEqual(type(classification), Surface) + # self.assertTrue(np.array_equal(classification.outliers, [24, 25, 26])) - classifier = Classifier() - classification = classifier.classify(nacl) - self.assertIsInstance(classification, Surface) + # This test is disabled in v2 as the chemical similarity check is completely + # removed. + # def test_adsorbate_detection_via_neighbourhood(self): + # """Test that adsorbates that are in a basis atom position, but do not + # exhibit the correct chemical neighbourhood are identified. + # """ + # system = ase.io.read( + # "./data/ROHGEranIWm-gnS6jhQaLZRORWDKx+Pbsl6Hlb_C1aXadFiJ58UCUek5a8x.xyz" + # ) + + # classifier = Classifier() + # classification = classifier.classify(system) + # self.assertIsInstance(classification, Surface) - # Detect adsorbate - adsorbates = classification.adsorbates - # print(adsorbates) - self.assertEqual(len(adsorbates), 3) - self.assertTrue(np.array_equal(adsorbates, np.array([256, 257, 255]))) - - # Detect vacancy - vacancies = classification.vacancies - self.assertEqual(len(vacancies), 1) - vac_found = vacancies[0] - vacancy_disp = np.linalg.norm(vac_true.position - vac_found.position) - self.assertTrue(vacancy_disp <= 1) - self.assertEqual(vac_true.symbol, vac_found.symbol) - - # Detect substitution - substitutions = classification.substitutions - self.assertEqual(len(substitutions), 1) - found_subst = substitutions[0] - self.assertEqual(found_subst.index, subst_num) - self.assertEqual(found_subst.original_element, 11) - self.assertEqual(found_subst.substitutional_element, subst_atomic_num) - - # No unknown atoms - unknowns = classification.unknowns - self.assertEqual(len(unknowns), 0) - - # No interstitials - interstitials = classification.interstitials - self.assertEqual(len(interstitials), 0) + # # Adsorbates + # self.assertEqual( + # set(range(len(system))).difference(set(range(0, 18))), + # set(classification.region.get_basis_indices()), + # ) class SearchGraphTests(unittest.TestCase): @@ -2311,11 +1816,9 @@ def test_non_orthogonal_cell_1(self): ) symbols = np.array(["Sr"]) system = Atoms(scaled_positions=pos, cell=cell, symbols=symbols, pbc=True) - # view(system) finder = PeriodicFinder() region = finder.get_region(system, 0, 5, 0.7) - # view(region.cell) G = region._search_graph # draw_networkx(G) @@ -2329,6 +1832,34 @@ def test_non_orthogonal_cell_1(self): periodicity = region.get_connected_directions() self.assertTrue(np.array_equal(periodicity, [True, True, False])) + def test_surface(self): + system = Atoms( + positions=[ + [0, 0, 8], + [2, 0, 12], + ], + cell=[4, 4, 20], + symbols=["Sr", "Sr"], + pbc=True, + ) + + finder = PeriodicFinder() + region = finder.get_region(system, 0, 5, 0.7) + + G = region._search_graph + # for node1, node2, data in G.edges(data=True): + # print(node1, node2, data) + + # Check that the correct graph is created + self.assertEqual(len(G.nodes), 2) + self.assertEqual( + len(G.edges), 8 + 8 + 9 + 9 - 1 + ) # 8+8 horizontally, 9+9 vertically, minus 1 shared edge + + # Check graph periodicity + periodicity = region.get_connected_directions() + self.assertTrue(np.array_equal(periodicity, [False, True, True])) + def test_non_orthogonal_cell_2(self): """Non-orthogonal cell with two atoms.""" cell = np.array([[7.8155, 0.0, 0.0], [-3.9074, 6.7683, 0.0], [0.0, 0.0, 175.0]]) @@ -2341,19 +1872,19 @@ def test_non_orthogonal_cell_2(self): ) symbols = np.array(2 * ["Sr"]) system = Atoms(scaled_positions=pos, cell=cell, symbols=symbols, pbc=True) - # view(system) finder = PeriodicFinder() region = finder.get_region(system, 0, 5, 0.8) - # view(region.cell) G = region._search_graph + # for node1, node2, data in G.edges(data=True): + # print(node1, node2, data) # draw_networkx(G) # mpl.show() # Check that the correct graph is created self.assertEqual(len(G.nodes), 2) - self.assertEqual(len(G.edges), 11) + self.assertEqual(len(G.edges), 15) # 8+8-1 # Check graph periodicity periodicity = region.get_connected_directions() @@ -2372,7 +1903,6 @@ def test_non_orthogonal_cell_4(self): ) symbols = np.array(4 * ["Sr"]) system = Atoms(scaled_positions=pos, cell=cell, symbols=symbols, pbc=True) - # view(system) finder = PeriodicFinder() region = finder.get_region(system, 0, 5, 0.7) @@ -2386,29 +1916,42 @@ def test_non_orthogonal_cell_4(self): periodicity = region.get_connected_directions() self.assertTrue(np.array_equal(periodicity, [True, True, False])) - def test_surface_difficult_basis_atoms(self): - """This system with this specific position tolerance fails if there is - no check against moves that occur inside the unit cell 'grid', and do - not wrap across it. - """ - system = ase.io.read( - "./data/RzQh5XijWuXsNZiRSxeOlPFUY_9Gl+PY5NRLMRYyQXsYmBN9hMcT-FftquP.xyz" - ) - # view(system) + # def test_connected_directions(self): + # """TODO: Should be fixed. - finder = PeriodicFinder() - region = finder.get_region(system, 42, 12, 1.05146337551) + # Checks that the correct connected directions are returned in a case + # where the search may wrap around periodic boundaries. - # Check graph periodicity - periodicity = region.get_connected_directions() - self.assertTrue(np.array_equal(periodicity, [False, True, True])) + # This test will fail if the connected directions are based on graph nodes + # having two inbound edges with opposing search directions. E.g. "cell + # [0,0,0] has inbound node from direction [1, 0, 0] and from direction + # [-1, 0 0]". + # """ + # for i in range(5): + # system = Atoms( + # positions=[[0, 4, 0], [2, 6, 0], [0, 8, 0]], + # cell=[4, 12, 2], + # symbols=['Al', 'Al', 'Al'], + # pbc=True + # ) + # system = system * [2, 1, 1] + # system.rattle(0.05, seed=i) + + # finder = PeriodicFinder() + # region = finder.get_region(system, 0, 6, 0.2) + + # # Check that entire system is found + # self.assertTrue(len(region.get_basis_indices()) == len(system)) + + # # Check graph periodicity + # periodicity = region.get_connected_directions() + # self.assertTrue(periodicity.sum() == 2) def test_surface_adsorbate(self): """Test graph search in the presence of adsorbates.""" system = ase.io.read( "./data/ROHGEranIWm-gnS6jhQaLZRORWDKx+Pbco91p05ftuJQ38__Y0_TDg9tNIy.xyz" ) - # view(system) finder = PeriodicFinder() region = finder.get_region(system, 19, 12, 0.252687223066) diff --git a/tests/performance/benchmark_cpu.sh b/tests/performance/benchmark_cpu.sh new file mode 100755 index 00000000..2785fc9e --- /dev/null +++ b/tests/performance/benchmark_cpu.sh @@ -0,0 +1,9 @@ +for key in 500 1000 1500 2000 2500 3000 3500 4000 +do + python performance.py benchmark-cpu-single --system="ordered" -s $key +done +for key in 100 200 300 400 500 +do + python performance.py benchmark-cpu-single --system="unordered" -s $key +done + diff --git a/tests/performance/benchmark_memory.sh b/tests/performance/benchmark_memory.sh new file mode 100755 index 00000000..e4f4484a --- /dev/null +++ b/tests/performance/benchmark_memory.sh @@ -0,0 +1,9 @@ +for key in 500 1000 1500 2000 2500 3000 3500 4000 +do + python performance.py benchmark-memory-single --system="ordered" -s $key +done +for key in 100 200 300 400 500 +do + python performance.py benchmark-memory-single --system="unordered" -s $key +done + diff --git a/tests/performance/performance.py b/tests/performance/performance.py new file mode 100644 index 00000000..0632df0a --- /dev/null +++ b/tests/performance/performance.py @@ -0,0 +1,296 @@ +import time +import os +import json +import psutil +import resource +import click +import numpy as np +from typing import Tuple +from ase import Atoms +from ase.build import bulk +import matplotlib.pyplot as plt +from matid.clustering import SBC +from importlib.metadata import version + +matid_version = version("matid") + +np.random.seed(7) + + +def get_ordered_system(): + ordered = bulk("Cu", "fcc", a=3.6, cubic=True) * [1, 1, 1] + ordered.set_pbc(True) + return ordered + + +def run_single(atoms) -> Tuple[int, int]: + """Run a single classification.""" + SBC().get_clusters(atoms) + + +def run_single_cpu(atoms) -> int: + """Run a single classification.""" + start = time.time() + run_single(atoms) + end = time.time() + elapsed = end - start + return elapsed + + +def run_single_memory(atoms) -> int: + """Run a single classification.""" + run_single(atoms) + return resource.getrusage(resource.RUSAGE_SELF).ru_maxrss / 1024 + + +def generate_unordered(n_atoms): + """Generates a cubic system with n_atoms, given density and uniformly + randomly distributed atoms.""" + model = get_ordered_system() + density = len(model) / model.get_volume() + pos = np.random.rand(n_atoms, 3) + a = np.power(n_atoms / density, 1 / 3) + system = Atoms( + symbols=["Cu"] * n_atoms, scaled_positions=pos, cell=[a, a, a], pbc=True + ) + + return system + + +def generate_ordered(n_atoms): + """Generates a pristine surface system with n_atoms.""" + system = get_ordered_system() + n_atoms_unit = len(system) + n_repetitions = int(np.power(n_atoms / n_atoms_unit, 1.0 / 3.0)) + system *= [n_repetitions, n_repetitions, n_repetitions] + + return system + + +def get_path(): + path = f"./results/results_{matid_version}.json" + return path + + +def get_result(path): + if os.path.exists(path): + with open(path) as fin: + results = json.load(fin) + else: + results = { + "ordered": { + "memory": {}, + "cpu": {}, + }, + "unordered": { + "memory": {}, + "cpu": {}, + }, + } + return results + + +@click.group() +def cli(): + pass + + +@cli.command() +@click.option("--system", help="System type", required=True) +@click.option("--n_atoms", type=int, help="Number of atoms", required=True) +def run(system, n_atoms): + run_single(system, n_atoms) + + +@cli.command() +@click.option("--system", help="System type", required=True) +@click.option("-s", help="Requested system size", type=int, required=True) +def benchmark_cpu_single(system, s): + atoms = globals()[f"generate_{system}"](s) + size_actual = len(atoms) + + # Check if this size has already been calculated + path = get_path() + result = get_result(path) + if result[system]["cpu"].get(str(size_actual)): + print(f"Results exist for size {size_actual}, skipping...") + return + + elapsed = run_single_cpu(atoms) + + # Save result + result[system]["cpu"][size_actual] = [elapsed] + with open(path, "w") as fout: + json.dump(result, fout) + + +@cli.command() +@click.option("--system", help="System type", required=True) +@click.option("-s", help="Requested system size", type=int, required=True) +def benchmark_memory_single(system, s): + """Measures the peak memory usage (resident set) as MiBi.""" + atoms = globals()[f"generate_{system}"](s) + size_actual = len(atoms) + + # Check if this size has already been calculated + path = get_path() + result = get_result(path) + if result[system]["memory"].get(str(size_actual)): + print(f"Results exist for size {size_actual}, skipping...") + return + + process = psutil.Process() + start = process.memory_info().rss / 1024 / 1024 + mem = run_single_memory(atoms) + memory = mem - start + + # Save result + result[system]["memory"][size_actual] = [memory] + with open(path, "w") as fout: + json.dump(result, fout) + + +@cli.command() +@click.option("--show", is_flag=True, help="Whether to show the plot") +def plot(show): + plt.rcParams["text.latex.preamble"] = r"\usepackage{amsmath}" + plt.rcParams.update( + { + "text.usetex": True, + "font.family": "sans-serif", + "font.sans-serif": "Computer Modern Sans Serif", + "axes.titlesize": 25, + "axes.labelsize": 23, + "lines.markersize": 8, + "xtick.labelsize": 20, + "ytick.labelsize": 20, + } + ) + + figsize = (8, 9) + fig, [ax1, ax2] = plt.subplots(2, 1, sharex=True, figsize=figsize) + ax1.set_title(f"MatID v{matid_version}", y=1.2) + colors = ["#2988AD", "#FEA534"] + labels = {"ordered": "Ordered", "unordered": "Unordered"} + + timemax = float("-Infinity") + timemin = float("Infinity") + nmax = float("-Infinity") + nmin = float("Infinity") + + for i_system, system in enumerate(["ordered", "unordered"]): + color = colors[i_system] + + # Plot CPU time + path = get_path() + results = get_result(path) + times = [] + n_atoms = [] + for key, value in results[system]["cpu"].items(): + times.append(value) + n_atoms.append(int(key)) + times = np.array(times) + n_atoms = np.array(n_atoms) + + times_mean = times.mean(axis=1) + # times_std = times.std(axis=1) + i_timemax = times.max() + i_timemin = times.min() + i_nmax = n_atoms.max() + i_nmin = n_atoms.min() + if i_timemax > timemax: + timemax = i_timemax + if i_timemin < timemin: + timemin = i_timemin + if i_nmax > nmax: + nmax = i_nmax + if i_nmin < nmin: + nmin = i_nmin + # ax1.fill_between(n_atoms, times_mean - times_std, times_mean + times_std, color=color, alpha=0.3) + ax1.plot( + n_atoms, + times_mean, + color=color, + marker="o", + linestyle="dashed", + label=labels[system], + ) + ax1.grid(color="#333", linestyle="--", linewidth=1, alpha=0.3) + + # Plot max memory usage + path = get_path() + results = get_result(path) + memory = [] + n_atoms = [] + for key, value in results[system]["memory"].items(): + memory.append(value) + n_atoms.append(int(key)) + memory = np.array(memory) + n_atoms = np.array(n_atoms) + memory_mean = memory.mean(axis=1) + # memory_std = memory.std(axis=1) + # ax2.fill_between(n_atoms, memory_mean - memory_std, memory_mean + memory_std, color=color, alpha=0.3) + ax2.plot( + n_atoms, + memory_mean, + color=color, + marker="o", + linestyle="dashed", + label=labels[system], + ) + ax2.grid(color="#333", linestyle="--", linewidth=1, alpha=0.3) + + ninterval = nmax - nmin + timeinterval = timemax - timemin + + # Add images of tested systems + # axisratio = figsize[0] / figsize[1] + # for i_system, system in enumerate(['ordered', 'unordered']): + # atoms = globals()[f'generate_{system}'](40) + # imgpath = f'./results/{system}/image.eps' + # rgba = tuple(int(colors[i_system].lstrip('#')[i:i+2], 16) / 255 for i in (0, 2, 4)) + # colorarray = np.tile(rgba, (len(atoms), 1)) + # if system == 'ordered': + # amount = 0.18 + # margintop = -0.02*timeinterval + # marginleft = 0.22*ninterval + # else: + # amount = 0.18 + # margintop = -0.05*timeinterval + # marginleft = 0.60*ninterval + # write(imgpath, atoms, rotation='110x', colors=colorarray, maxwidth=1000) + # im = plt.imread(imgpath) + # aspectratio = im.shape[0] / (im.shape[1] / 2.1) + # ax1.imshow(im, aspect='auto', extent=(nmin+marginleft, nmin + ninterval*amount+marginleft, timemax - amount*aspectratio*axisratio*timeinterval-margintop, timemax-margintop)) + + margin = 0.05 + ax1.set_xlim(nmin - margin * ninterval, nmax + margin * ninterval) + ax1.set_ylim(timemin - margin * timeinterval, timemax + margin * timeinterval) + # ax1.set_xticks( + # np.arange(100, 901, 100), + # [r"$\mathsf{{{}}}$".format(i) for i in np.round(np.arange(100, 901, 100), 2)], + # fontsize=20, + # ) + # ax1.set_yticks( + # np.arange(0, 200, 30), + # [r"$\mathsf{{{}}}$".format(i) for i in np.round(np.arange(0, 200, 30), 0)], + # fontsize=20, + # ) + # ax2.set_yticks( + # np.arange(0, 3001, 500), + # [r"$\mathsf{{{}}}$".format(i) for i in np.round(np.arange(0, 3001, 500), 0)], + # fontsize=20, + # ) + ax1.legend(loc="upper center", bbox_to_anchor=(0.5, 1.22), ncol=2, fontsize=20) + ax1.set_ylabel("Elapsed time (s)") + ax2.set_ylabel("Peak memory usage (MB)") + ax2.set_xlabel("Number of atoms") + + fig.tight_layout() + plt.savefig(f"./results/results_{matid_version}.pdf") + if show: + plt.show(block=True) + + +if __name__ == "__main__": + cli() diff --git a/tests/performance/plot.sh b/tests/performance/plot.sh new file mode 100755 index 00000000..6443502b --- /dev/null +++ b/tests/performance/plot.sh @@ -0,0 +1,2 @@ +python performance.py plot + diff --git a/tests/test_geometry.py b/tests/test_geometry.py index a18262ce..46e58389 100644 --- a/tests/test_geometry.py +++ b/tests/test_geometry.py @@ -566,14 +566,6 @@ def test_matches(system, pbc, position, expected_matches, expected_factors): """ system.set_pbc(pbc) - # Old python implementation - matches, _, _, factors = matid.geometry.get_matches_old( - system, - np.array(position)[None, :], - numbers=[system.get_atomic_numbers()[0]], - tolerance=tolerance, - ) - # New CPP implementation cell_list = matid.geometry.get_cell_list( system.get_positions(), @@ -591,11 +583,9 @@ def test_matches(system, pbc, position, expected_matches, expected_factors): ) # Make sure that the atom is found in the correct copy - assert tuple(factors[0]) == expected_factors assert tuple(factors_ext[0]) == expected_factors # Make sure that the correct atom is found - assert np.array_equal(matches, expected_matches) assert np.array_equal(matches_ext, expected_matches) @@ -763,37 +753,3 @@ def test_minimize_cell(system, axis, minimum_size, expected_cell, expected_pos): def test_thickness(system, axis, expected_thickness): thickness = matid.geometry.get_thickness(system, axis) assert thickness == expected_thickness - - -def test_displacement_tensor_performance(): - system = bulk("NaCl", "rocksalt", a=5.64) * [10, 10, 10] - cutoff = 10 - - start = time.time() - matid.geometry.get_displacement_tensor( - system.get_positions(), - system.get_cell(), - system.get_pbc(), - cutoff=cutoff, - return_distances=True, - return_factors=False, - ) - end = time.time() - elapsed_new = end - start - - start = time.time() - matid.geometry.get_displacement_tensor_old( - system.get_positions(), - system.get_positions(), - system.get_cell(), - system.get_pbc(), - mic=True, - cutoff=cutoff, - return_distances=True, - return_factors=False, - ) - end = time.time() - elapsed_old = end - start - - ratio = elapsed_old / elapsed_new - assert ratio > 30