Source code for GSASII.k_vector_search

#
# ------------------
# k_vector_search.py
# ------------------
#
# This Python file contains the definition of the `kVector` class for the
# identification of optimal k vector(s) for explaining the experimentally
# observed satellite peaks in powder diffraction patterns. Provided the
# refined nucleus structure and the extracted positions of those
# satellite peaks, we first identify the optimal k-path to search along.
# For such a purpose, we were using the k-path finder as reported by
# Y. Hinuma, et al.,
#
# -------------------------------------------------
# http://dx.doi.org/10.1016/j.commatsci.2016.10.015
# -------------------------------------------------
#
# We first search those high symmetry points along the suggested
# k-path, then along the path and finally, if specified to, search
# across the whole first Brillouin zone.
#
# +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
# Yuanpeng Zhang & Joe Paddison @ Mar-03-2024
# SNS-HFIR, ORNL
# -------------------------------------------
# Vasile Garlea and Stuart Calder are
# acknowledged for their useful comments.
# +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
#
import sys
import time
import math
import numpy as np
from scipy.optimize import linear_sum_assignment
from . import GSASIIfiles as G2fil
from . import GSASIIpath
GSASIIpath.SetBinaryPath()

gen_option_avail = True
try:
    import seekpath
except ModuleNotFoundError:
    G2fil.NeededPackage({'magnetic k-vector search':['seekpath']})
    print('k_vector_search: seekpath could not be imported')
    gen_option_avail = False

try:
    if GSASIIpath.binaryPath:
        import kvec_general
    else:
        from . import kvec_general
except ImportError:
    print('binary load error: kvec_general not found')
    gen_option_avail = False
except ModuleNotFoundError:
    print('k_vector_search: kvec_general could not be imported')
    gen_option_avail = False

[docs] def unique_id_gen(string_list: list) -> list: """Generate unique IDs for strings included in the string list and the same string will be assigned with the same ID. :param string_list: list of strings :return: list of integer IDs """ unique_integer = 1 replaced_strings = {} output_list = [] for string in string_list: if string in replaced_strings: output_list.append(replaced_strings[string]) else: replaced_strings[string] = unique_integer output_list.append(unique_integer) unique_integer += 1 return output_list
[docs] def lat_params_to_vec(lat_params: list) -> list: """Construct lattice vectors from lattice parameters, according to the convention as detailed in the following post, https://iris2020.net/2024-03-04-latt_params_to_latt_vecs/ :param lat_params: list of lattice parameters a, b, c, alpha, beta and gamma. Angles should be given in degree. :return: lattice vectors in the list form, namely, the a, b and c lattice vectors given in the Cartesian coordinate """ a_norm = lat_params[0] b_norm = lat_params[1] c_norm = lat_params[2] alpha = np.deg2rad(lat_params[3]) beta = np.deg2rad(lat_params[4]) gamma = np.deg2rad(lat_params[5]) c = [0., 0., c_norm] b = [ 0., b_norm * np.sin(alpha), b_norm * np.cos(alpha) ] az = a_norm * np.cos(beta) cos_al = np.cos(alpha) cos_be = np.cos(beta) cos_ga = np.cos(gamma) sin_al = np.sin(alpha) top = a_norm * (cos_ga - cos_al * cos_be) bottom = sin_al ay = top / bottom ax = np.sqrt(a_norm**2. - ay**2. - az**2.) a = [ax, ay, az] return [a, b, c]
[docs] class kVector: """For k-vector search, given the input structure, the nucleus and satellite diffraction peaks. :param bravfSym: Bravais lattice symbol :param cell: a `3x3` list of floats (cell[0] is the first lattice vector, etc.) The cell vectors should be corresponding to the primitive following the ITA convention. :param positions: a `Nx3` list of floats with the atomic coordinates in fractional coordinates (i.e., w.r.t. the cell vectors) :param numbers: a length-`N` list with integers identifying uniquely the atoms in the cell :param nucPeaks: a `mx4` list for holding the nucleus diffraction peaks (nucPeaks[0] has 4 entries, giving the hkl indeces corresponding to the conventional unit cell setting (ITA), together with the d-spacing) :param superPeaks: a length-`n` list for the satellite peak positions in `d`. :param threshold: specify the delta_d/d threshold. When the distance between two peaks is smaller than such a threshold, they would then be considered as identical. When searching for the single k vector, if the maximum value among those distances between the nominal and observed positions of the satellite peaks is smaller than the threshold, the corresponding k vector will be returned directly as the optimal solution. :param option: (optional) control the scope for k vector search in the Brillouin zone, `0` for high symmetry points only, `1` for high symmetry and edges, `2` for the whole Brillouin zone. Default: 0 :param kstep: (optional) step of k values for searching over the k grid. Default: [.01, .01, .01] :param processes: (optional) the number of processes for parallel processing. Default: 1 """ transMatrix = { "P": np.array( [ [1, 0, 0], [0, 1, 0], [0, 0, 1] ] ), "cF": 1 / 2 * np.array( [ [0, 1, 1], [1, 0, 1], [1, 1, 0] ] ), "oF": 1 / 2 * np.array( [ [0, 1, 1], [1, 0, 1], [1, 1, 0] ] ), "cI": 1 / 2 * np.array( [ [-1, 1, 1], [1, -1, 1], [1, 1, -1] ] ), "tI": 1 / 2 * np.array( [ [-1, 1, 1], [1, -1, 1], [1, 1, -1] ] ), "oI": 1 / 2 * np.array( [ [-1, 1, 1], [1, -1, 1], [1, 1, -1] ] ), "hR": 1 / 3 * np.array( [ [2, -1, -1], [1, 1, -2], [1, 1, 1] ] ), "oC": 1 / 2 * np.array( [ [1, 1, 0], [-1, 1, 0], [0, 0, 2] ] ), "oA": 1 / 2 * np.array( [ [0, 0, 2], [1, 1, 0], [-1, 1, 0] ] ), "mC": 1 / 2 * np.array( [ [1, -1, 0], [1, -1, 1], [1, 1, -1] ] ) } def __init__(self, bravfSym: str, cell: list, positions: list, numbers: list, nucPeaks: list, superPeaks: list, threshold: float, option: int = 0, kstep: list = None, processes: int = 1): self.bravfSym = bravfSym self.cell = cell self.positions = positions self.numbers = numbers self.nucPeaks = nucPeaks self.superPeaks = superPeaks self.threshold = threshold self.option = option self.kstep = kstep self.processes = processes if self.option == 2 and not gen_option_avail: err_msg = "The general search option is not available. " err_msg += "Please install the `kvec_general` module." raise ModuleNotFoundError(err_msg) if kstep is None: self.kstep = [.01, .01, .01] rep_prim_latt = self.kpathFinder()["reciprocal_primitive_lattice"] if "P" in bravfSym: self.bs_tmp = "P" else: self.bs_tmp = bravfSym self.rep_conv_latt = np.matmul( kVector.transMatrix[self.bs_tmp], rep_prim_latt )
[docs] def kpathFinder(self) -> dict: """Provided the structure inputs, the routine will be collecting the inputs into a structure tuple which will be fed into the `get_path` routine in the `seekpath` module. The special k points and the k-path will be returned. :return: the dictionary containing the k-points, k-path and the reciprocal space primitive lattice vectors. """ structure = (self.cell, self.positions, self.numbers) # the inputs for the `seekpath.get_path` routine assumes no symmetry # and internally in the routine, the symmetry would be identified # automatically using the `spglib` module. For the symmetry # identification, we need to specify the tolerance (for atomic # coordinates, etc.). Here, for our purpose, we know exactly what # the symmetry is for our input structure, we were to tune the # tolerance value until the expected Bravais lattice type is # identified. sym_tol = 1.E-5 while True: k_info_tmp = seekpath.get_path(structure, True, symprec=sym_tol) if k_info_tmp["bravais_lattice"] == self.bravfSym: break else: sym_tol *= 2. k_info = { "point_coords": k_info_tmp["point_coords"], "path": k_info_tmp["path"], "reciprocal_primitive_lattice": k_info_tmp[ "reciprocal_primitive_lattice" ] } return k_info
[docs] def hklConvToPrim(self, hkl: list) -> np.ndarray: """Convert the hkl indeces in the conventional cell setting to the primitive cell setting. :param hkl: input hkl indeces in the conventional cell setting :return: a list containing the hkl indeces in the primitive cell setting. """ prim_hkl = np.matmul( np.array(hkl), kVector.transMatrix[self.bs_tmp] ) return prim_hkl
[docs] def kVecPrimToConv(self, k_vec: list) -> np.ndarray: """Convert the k vector in the reciprocal primitive lattice setting to that in the conventional cell setting. :param k_vec: the k vector in the reciprocal primitive lattice setting :return: the k vector in the conventional cell setting """ inv_trans_matrix = np.linalg.inv( kVector.transMatrix[self.bs_tmp] ) k_vec_conv = np.matmul( np.array(k_vec), inv_trans_matrix ) return k_vec_conv
[docs] def pointOnVector(self, s_point: list, e_point: list, distance: float) -> list: """Grab the coordinate of a point on a vector specified by the starting and ending points. The distance from the point on the vector to the starting point should be given as the parameter. :param s_point: a list for the coordinate of the starting point :param e_point: a list for the coordinate of the ending point :param distance: the distance away from the starting point :return: the coordinate of the point on the vector """ vec_length = np.sqrt(sum((x - y)**2 for x, y in zip(s_point, e_point))) xp = s_point[0] + distance / vec_length * (e_point[0] - s_point[0]) yp = s_point[1] + distance / vec_length * (e_point[1] - s_point[1]) zp = s_point[2] + distance / vec_length * (e_point[2] - s_point[2]) return [xp, yp, zp]
[docs] def insIntoSortedList(self, lst: list, new_val: float) -> tuple: """Insert a new entry into the ascendingly sorted list. :param lst: an ascendingly sorted list :param new_val: the new entry to be inserted into the sorted list :return: tuple containing the new sorted list and the index of the new entry in the new sorted list. """ ind = 0 for i in range(len(lst)): if lst[i] < new_val: ind = i + 1 else: break lst.insert(ind, new_val) return (lst, ind)
[docs] def unique_closest(self, list1: list, list2: list) -> tuple: """Assign each member in `list2` with a unique member in `list1`. The assignment should guarantee that the sum of the squared difference between the mapping pairs is minimized. Here, the codes were created by the GPT-4-Turbo model, and the underlying algorithm used is the Hungarian algorithm. Here follows are some useful links, https://medium.com/math-simplified/the-perfect-matching-1be8b028183c https://brilliant.org/wiki/hungarian-matching/ :param list1: the pool of satellite peaks to be generated by a certain k vector, given the nucleus peaks. :param list2: the list of observed satellite peaks. :return: a tuple containing two entries, the first being the mapping, and the second being the sum of the squared difference between the mapping pairs. """ if len(list1) < len(list2): err_msg = "First list must be longer than or equal " err_msg += "to the second list." raise ValueError(err_msg) cost_matrix = np.array( [[((i - j) / j)**2. for i in list1] for j in list2] ) cost_matrix_dd = np.array( [[(i - j)**2. for i in list1] for j in list2] ) row_ind, col_ind = linear_sum_assignment(cost_matrix) mapping = {list2[i]: list1[j] for i, j in zip(row_ind, col_ind)} sum_of_squares = np.sqrt( cost_matrix[row_ind, col_ind].sum() ) / float(len(list2)) ave_dd = np.sqrt( cost_matrix_dd[row_ind, col_ind].sum() ) / float(len(list2)) max_dd = np.sqrt( cost_matrix_dd[row_ind, col_ind].max() ) return (mapping, sum_of_squares, ave_dd, max_dd)
[docs] def updateCandidateList(self, kpoint: list, k_opt_list: list, k_opt_dist: list, ave_dd: list, max_dd: list, try_neg: bool) -> tuple: """For a given k point, we want to find out such a one-to-one matching between the calculated satellite peaks and those observed ones that gives the minimal overall distance. The optimized overall distance will be treated as the `indicator distance` corresponding to the specified k-vector. If the `indicator distance` happens to be smaller than the uncertainty of peak positions (which is determined by the instrument resolution), only the top candidate will be returned. :param kpoint: the trial k vector :param k_opt_list: list of top candidates of k vectors :param k_opt_dist: list of the `indicator distances` of those top candidates of k vectors :param ave_dd: list of ave. delta d values of those top candidates of k vectors :param max_dd: list of max. delta d values of those top candidates of k vectors :param try_neg: flag for controlling whether to try the negative k vector corresponding to the input k vector trial. For the corners of the irreducible Brillouin zone wedge, we don't need to try their negatives, as the k vector, in that case, is different from its negative by a lattice vector in reciprocal space and thus they are equivalent to each other. :return: tuple of the updated list of top candidates of k vectors and the updated list of the `indicator distances` """ rep_prim_latt = self.kpathFinder()["reciprocal_primitive_lattice"] satellite_peaks = list() for nucp in self.nucPeaks: all_zero_nuc = all(v == 0 for v in nucp) all_zero_k = all(v == 0 for v in kpoint) if all_zero_nuc and all_zero_k: continue hkl_prim = np.array(nucp) hkl_p_k = hkl_prim + np.array(kpoint) k_cart = np.matmul( hkl_p_k, rep_prim_latt ) d_hkl_p_k = 2. * np.pi / np.linalg.norm(k_cart) satellite_peaks.append(d_hkl_p_k) if try_neg: hkl_m_k = hkl_prim - np.array(kpoint) k_cart = np.matmul( hkl_m_k, rep_prim_latt ) d_hkl_m_k = 2. * np.pi / np.linalg.norm(k_cart) satellite_peaks.append(d_hkl_m_k) satellite_peaks = list(set(satellite_peaks)) _, indicator_dist, ave_dd_v, max_dd_v = self.unique_closest( satellite_peaks, self.superPeaks ) if indicator_dist <= self.threshold: k_opt_list = [kpoint] k_opt_dist = [indicator_dist] ave_dd = [ave_dd_v] max_dd = [max_dd_v] return (k_opt_list, k_opt_dist, ave_dd, max_dd) else: if len(k_opt_list) < 10: k_opt_new = self.insIntoSortedList( k_opt_dist, indicator_dist ) k_opt_list.insert(k_opt_new[1], kpoint) ave_dd.insert(k_opt_new[1], ave_dd_v) max_dd.insert(k_opt_new[1], max_dd_v) else: if indicator_dist < k_opt_dist[9]: k_opt_new = self.insIntoSortedList( k_opt_dist, indicator_dist ) k_opt_list.insert(k_opt_new[1], kpoint) ave_dd.insert(k_opt_new[1], ave_dd_v) max_dd.insert(k_opt_new[1], max_dd_v) else: return (k_opt_list, k_opt_dist, ave_dd, max_dd) return ( k_opt_list[:10], k_opt_new[0][:10], ave_dd, max_dd )
[docs] def kOptFinder(self) -> list: """This is the kernel of the class, defining the method for searching over the Brillouin zone for optimal k vector that best explains the observed satellite peaks, given the already refined nucleus structure. :return: the list of top candidates of the optimal k vector (in the reciprocal `primitive cell` setting). If the top one is below the threshold, i.e., the distance between the nominal and observed positions of those satellite peaks smaller than the uncertainty of peak positions (which is determined by the instrument resolution), only one candidate will be returned. Otherwise, top 10 candidates will be returned. """ start_search = time.time() hs_points = self.kpathFinder()["point_coords"] rep_prim_latt = self.kpathFinder()["reciprocal_primitive_lattice"] k_opt_list = list() k_opt_dist = list() k_opt_ad = list() k_opt_md = list() if len(self.nucPeaks) < np.floor(len(self.superPeaks) / 2): err_msg = "The number of nucleus peaks is not enough for " err_msg += "k vector determination." raise ValueError("err_msg") else: if len(self.nucPeaks) >= len(self.superPeaks): # search over those high symmetry points on the suggested k # path In this case, we don't need to consider their negatives # as they are equivalent. print("[Info] Searching over high symmetry points ...") for name, kpoint in hs_points.items(): k_opt_tmp = self.updateCandidateList( kpoint, k_opt_list, k_opt_dist, k_opt_ad, k_opt_md, True ) k_opt_list = k_opt_tmp[0] k_opt_dist = k_opt_tmp[1] k_opt_ad = k_opt_tmp[2] k_opt_md = k_opt_tmp[3] found_opt = k_opt_dist[0] <= self.threshold msg = "[Info] k point (primitive setting): " msg += "{:s} => [{:.5F}, {:.5F}, {:.5F}], ".format( name, kpoint[0], kpoint[1], kpoint[2] ) msg += "Indicator distance: " msg += "{:.5F}, ".format(k_opt_dist[0]) msg += f"Threshold: {self.threshold}" print(msg) if len(k_opt_list) == 1 and found_opt: stop_search = time.time() te = stop_search - start_search print(f"[Info] Time elapsed: {te} s") return (k_opt_list, k_opt_dist, k_opt_ad, k_opt_md) if self.option == 1 or self.option == 2: # search along the k-path print("[Info] Searching along the high symmetry path ...") k_paths = self.kpathFinder() for k_path in k_paths["path"]: print("[Info] k path (primitive setting):", k_path) seg_len = self.kstep[0] k_path_s = k_paths["point_coords"][k_path[0]] k_path_e = k_paths["point_coords"][k_path[1]] k_path_vec = np.array( [ y - x for y, x in zip(k_path_e, k_path_s) ] ) k_path_vec_cart = np.matmul( k_path_vec, rep_prim_latt ) k_path_len = np.linalg.norm(k_path_vec_cart) while seg_len < k_path_len: kpoint = self.pointOnVector( k_path_s, k_path_e, seg_len ) k_opt_tmp = self.updateCandidateList( kpoint, k_opt_list, k_opt_dist, k_opt_ad, k_opt_md, True ) k_opt_list = k_opt_tmp[0] k_opt_dist = k_opt_tmp[1] k_opt_ad = k_opt_tmp[2] k_opt_md = k_opt_tmp[3] found_opt = k_opt_dist[0] <= self.threshold if len(k_opt_list) == 1 and found_opt: stop_search = time.time() te = stop_search - start_search print(f"[Info] Time elapsed: {te} s") return (k_opt_list, k_opt_dist) seg_len += self.kstep[0] if self.option == 2: # search over the whole 1st Brillouin zone print("[Info] Searching over general k points ...") ka_step = self.kstep[0] kb_step = self.kstep[1] kc_step = self.kstep[2] kpa_len = np.linalg.norm(rep_prim_latt[0]) kpb_len = np.linalg.norm(rep_prim_latt[1]) kpc_len = .5 * np.linalg.norm(rep_prim_latt[2]) a_array = -0.5 + np.arange(0, kpa_len, ka_step) / kpa_len b_array = -0.5 + np.arange(0, kpb_len, kb_step) / kpb_len c_array = np.arange(0, kpc_len / 2., kc_step) / kpc_len points = np.array(np.meshgrid(a_array, b_array, c_array)) points = points.T.reshape(-1, 3) results = kvec_general.parallel_proc( points, self.nucPeaks, self.superPeaks, rep_prim_latt, processes=self.processes ) k_opt_list = [ item[0] for item in results ] k_opt_dist = [ item[1] for item in results ] k_opt_ad = [ item[2] for item in results ] k_opt_md = [ item[3] for item in results ] stop_search = time.time() te = stop_search - start_search print(f"[Info] Time elapsed: {te} s") return (k_opt_list, k_opt_dist, k_opt_ad, k_opt_md) else: if self.option == 0: err_msg = "The number of nucleus peaks is less than that " err_msg += "of the satellite peaks, and thus the search " err_msg += "should go beyond the high symmetry point. \n" err_msg += "Please select to search either along k paths " err_msg += "or over the whole 1st Brillouin zone." raise ValueError(err_msg) else: warn_msg = "The number of nucleus peaks is less than that " warn_msg += "of the satellite peaks, thus skipping the " warn_msg += "search over the high symmetry points." print(f"[Warning] {warn_msg}")