Skip to content

Commit

Permalink
added parameter for metric and largest or smaller
Browse files Browse the repository at this point in the history
  • Loading branch information
Laouen committed Jun 13, 2024
1 parent d9dac6a commit 3c48845
Show file tree
Hide file tree
Showing 4 changed files with 109 additions and 59 deletions.
30 changes: 21 additions & 9 deletions thoi/collectors.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,13 +68,15 @@ def concatenate_csv(batched_dataframes, output_path, sep='\t'):


def batch_to_tensor(partition_idxs: np.ndarray,
nplets_o: np.ndarray,
nplets_s: np.ndarray,
nplets_tc: np.ndarray,
nplets_dtc: np.ndarray,
nplets_o:np.ndarray,
nplets_s:np.ndarray,
nplets_tc:np.ndarray,
nplets_dtc:np.ndarray,
bn:int,
only_synergestic: bool=False,
top_k: Optional[int] = None):
only_synergestic:bool=False,
top_k:Optional[int] = None,
metric:str = 'o',
largest:bool = False):

# If only_synergestic; remove nplets with nplet_o >= 0
if only_synergestic:
Expand All @@ -86,7 +88,11 @@ def batch_to_tensor(partition_idxs: np.ndarray,
partition_idxs = partition_idxs[to_keep.cpu()]

if top_k is not None and len(nplets_o) > top_k:
_, indices = torch.topk(nplets_o, top_k, largest=False, sorted=True)

values_idxs = ['tc','dtc','o','s'].index(metric)
values = [nplets_tc, nplets_dtc, nplets_o, nplets_s][values_idxs]

_, indices = torch.topk(values, top_k, largest=largest, sorted=True)

nplets_tc = nplets_tc[indices]
nplets_dtc = nplets_dtc[indices]
Expand All @@ -104,7 +110,9 @@ def batch_to_tensor(partition_idxs: np.ndarray,


def concat_tensors(batched_tensors: List[Tuple[torch.tensor, torch.tensor, torch.tensor, torch.tensor, torch.tensor]],
top_k: Optional[int] = None):
top_k: Optional[int] = None,
metric:str = 'o',
largest:bool = False):

nplets_tc = torch.cat([batch[0] for batch in batched_tensors])
nplets_dtc = torch.cat([batch[1] for batch in batched_tensors])
Expand All @@ -113,7 +121,11 @@ def concat_tensors(batched_tensors: List[Tuple[torch.tensor, torch.tensor, torch
partition_idxs = torch.cat([batch[4] for batch in batched_tensors])

if top_k is not None and len(nplets_o) > top_k:
_, indices = torch.topk(nplets_o, top_k, largest=False, sorted=True)

values_idxs = ['tc','dtc','o','s'].index(metric)
values = [nplets_tc, nplets_dtc, nplets_o, nplets_s][values_idxs]

_, indices = torch.topk(values, top_k, largest=largest, sorted=True)
nplets_tc = nplets_tc[indices]
nplets_dtc = nplets_dtc[indices]
nplets_o = nplets_o[indices]
Expand Down
65 changes: 39 additions & 26 deletions thoi/heuristics/greedy.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,46 +3,43 @@
import torch
from functools import partial

from thoi.measures.gaussian_copula import nplets_measures, gaussian_copula, multi_order_measures
from thoi.measures.gaussian_copula import gaussian_copula, multi_order_measures
from thoi.collectors import batch_to_tensor, concat_tensors
from thoi.heuristics.scoring import _evaluate_nplet


def _gc_oinfo(covmat: torch.tensor, T:int, batched_nplets: torch.tensor):
def greedy(X:np.ndarray,
order:int,
initial_order:int=3,
repeat:int=10,
use_cpu:bool=False,
batch_size:int=1000000,
metric:str='o',
largest:bool=False):

"""
X (torch.tensor): The covariance matrix with shape (n_variables, n_variables)
batched_nplets (torch.tensor): The nplets to calculate the inverse of the oinformation with shape (batch_size, order)
"""
# |batch_size| x |4 = (tc, dtc, o, s)|
batched_res = nplets_measures(covmat, batched_nplets, T=T, covmat_precomputed=True)

# Return minus the o information score to make it an maximum optimization
# |batch_size|
return batched_res[:,2].flatten()


def greedy(X:np.ndarray, order:int, initial_order:int=3, repeat:int=10, use_cpu:bool=False, batch_size:int=1000000):
assert metric in ['tc', 'dtc', 'o', 's'], f'metric must be one of tc, dtc, o or s. invalid value: {metric}'

current_solution = multi_order_measures(
X, initial_order, initial_order, batch_size=batch_size, use_cpu=use_cpu,
batch_data_collector=partial(batch_to_tensor, top_k=repeat),
batch_aggregation=partial(concat_tensors, top_k=repeat)
batch_data_collector=partial(batch_to_tensor, top_k=repeat, metric=metric, largest=largest),
batch_aggregation=partial(concat_tensors, top_k=repeat, metric=metric, largest=largest)
)[-1]

T, N = X.shape

covmat = torch.tensor(gaussian_copula(X)[1])

# make device cpu if not cuda available or cuda if available
# Make device cpu if not cuda available or cuda if available
using_GPU = torch.cuda.is_available() and not use_cpu
device = torch.device('cuda' if using_GPU else 'cpu')

covmat = covmat.to(device).contiguous()
current_solution = current_solution.to(device).contiguous()

best_scores = [_gc_oinfo(covmat, T, current_solution)]
best_scores = [_evaluate_nplet(covmat, T, current_solution, metric)]
for _ in trange(initial_order, order, leave=False, desc='Order'):
best_candidate, best_score = next_order_greedy(covmat, T, current_solution)
best_candidate, best_score = next_order_greedy(covmat, T, current_solution, metric, min)
best_scores.append(best_score)

current_solution = torch.cat((current_solution, best_candidate.unsqueeze(1)) , dim=1)
Expand All @@ -53,7 +50,11 @@ def greedy(X:np.ndarray, order:int, initial_order:int=3, repeat:int=10, use_cpu:

def next_order_greedy(covmat: torch.tensor,
T: int,
initial_solution: torch.tensor):
initial_solution: torch.tensor,
metric:str='o',
largest:bool=False):

assert metric in ['tc', 'dtc', 'o', 's'], f'metric must be one of tc, dtc, o or s. invalid value: {metric}'

# Get parameters attributes
device = covmat.device
Expand All @@ -68,16 +69,19 @@ def next_order_greedy(covmat: torch.tensor,
for b in torch.arange(batch_size, device=device)
]).contiguous()

# current_solution constructed by adding first element of valid_candidate to input solution
# Current_solution constructed by adding first element of valid_candidate to input solution
current_solution = torch.cat((initial_solution, valid_candidates[:, [0]]) , dim=1)

# start best solution first_candidate
# Start best solution first_candidate
# |batch_size| x |order+1|
best_candidates = valid_candidates[:, 0]
# |batch_size|
best_score = _gc_oinfo(covmat, T, current_solution)
best_score = _evaluate_nplet(covmat, T, current_solution, metric)

if not largest:
best_score = -best_score

# iterate candidate from 1 since candidate 0 is already in the initial best solution
# Iterate candidate from 1 since candidate 0 is already in the initial best solution
for i_cand in trange(1,valid_candidates.shape[1], leave=False, desc=f'Candidates'):

# Update current solution to the next candidate inplace to avoid memory overhead
Expand All @@ -86,11 +90,16 @@ def next_order_greedy(covmat: torch.tensor,

# Calculate score of new solution
# |batch_size|
new_score = _gc_oinfo(covmat, T, current_solution)
new_score = _evaluate_nplet(covmat, T, current_solution, metric)

# if minimizing, then return score to optimizing
if not largest:
new_score = -new_score

# Determine if we should accept the new solution
# new_score is bigger (more optimal) than best_score
# |batch_size|
new_global_maxima = new_score < best_score
new_global_maxima = new_score > best_score

# update best solution based on accpetance criteria
# |batch_size| x |order|
Expand All @@ -106,5 +115,9 @@ def next_order_greedy(covmat: torch.tensor,
new_score,
best_score
)

# If minimizing, then return score to its real value
if not largest:
best_score = -best_score

return best_candidates, best_score
24 changes: 24 additions & 0 deletions thoi/heuristics/scoring.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
import torch
from thoi.measures.gaussian_copula import nplets_measures


def _evaluate_nplet(covmat: torch.tensor, T:int, batched_nplets: torch.tensor, metric:str):

"""
X (torch.tensor): The covariance matrix with shape (n_variables, n_variables)
batched_nplets (torch.tensor): The nplets to calculate the inverse of the oinformation with shape (batch_size, order)
metric (str): The metric to evaluate. One of tc, dtc, o or s
"""

# get the metric index to get from the result of nplets
METRICS = ['tc', 'dtc', 'o', 's']
metric_idx = METRICS.index(metric)

# |batch_size| x |4 = (tc, dtc, o, s)|
batched_res = nplets_measures(covmat, batched_nplets, T=T, covmat_precomputed=True)

# Return minus the o information score to make it an maximum optimization (energy)
# |batch_size|
res = batched_res[:,metric_idx].flatten()

return res
49 changes: 25 additions & 24 deletions thoi/heuristics/simulated_annealing.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,17 +4,18 @@
import torch
from functools import partial

from thoi.measures.gaussian_copula import nplets_measures, gaussian_copula, multi_order_measures
from thoi.measures.gaussian_copula import gaussian_copula, multi_order_measures
from thoi.collectors import batch_to_tensor, concat_tensors
from thoi.heuristics.scoring import _evaluate_nplet

def init_lower_order(X: np.ndarray, order:int, lower_order:int, repeat:int, use_cpu:bool, device:torch.device):
def init_lower_order(X: np.ndarray, order:int, lower_order:int, repeat:int, metric:str, largest:bool, use_cpu:bool, device:torch.device):
N = X.shape[1]

# |repeat| x |lower_order|
current_solution = multi_order_measures(
X, lower_order, lower_order, batch_size=repeat, use_cpu=use_cpu,
batch_data_collector=partial(batch_to_tensor, top_k=repeat),
batch_aggregation=partial(concat_tensors, top_k=repeat)
batch_data_collector=partial(batch_to_tensor, top_k=repeat, metric=metric, largest=largest),
batch_aggregation=partial(concat_tensors, top_k=repeat, metric=metric, largest=largest)
)[-1].to(device)

# |N|
Expand All @@ -35,6 +36,7 @@ def init_lower_order(X: np.ndarray, order:int, lower_order:int, repeat:int, use_
# |repeat| x |order|
return torch.cat((current_solution, valid_candidates) , dim=1).contiguous()


def random_sampler(N:int, order:int, repeat:int, device:torch.device=None):

if device is None:
Expand All @@ -46,21 +48,6 @@ def random_sampler(N:int, order:int, repeat:int, device:torch.device=None):
])


def _gc_oinfo_energy(covmat: torch.tensor, T:int, batched_nplets: torch.tensor):

"""
X (torch.tensor): The covariance matrix with shape (n_variables, n_variables)
batched_nplets (torch.tensor): The nplets to calculate the inverse of the oinformation with shape (batch_size, order)
"""
# |batch_size| x |4 = (tc, dtc, o, s)|
batched_res = nplets_measures(covmat, batched_nplets, T=T, covmat_precomputed=True)

# Return minus the o information score to make it an maximum optimization (energy)
# |batch_size|
return -batched_res[:,2].flatten()


# TODO: add optimization value option as parameter
def simulated_annealing(X: np.ndarray,
order: int,
initial_temp:float=100.0,
Expand All @@ -71,11 +58,15 @@ def simulated_annealing(X: np.ndarray,
init_method:str='random', # lower_order, 'random', 'precumputed', 'precomputed_lower_order';
lower_order:int=None,
early_stop:int=100,
current_solution: Optional[torch.tensor]=None):
current_solution: Optional[torch.tensor]=None,
metric:str='o', # tc, dtc, o, s
largest:bool=False):

lower_order = order-1 if lower_order is None else lower_order
assert init_method != 'lower_order' or lower_order < order, 'Init from optima lower order cannot start from a lower_order higher than the order to compute.'

assert metric in ['tc', 'dtc', 'o', 's'], f'metric must be one of tc, dtc, o or s. invalid value: {metric}'

# make device cpu if not cuda available or cuda if available
using_GPU = torch.cuda.is_available() and not use_cpu
device = torch.device('cuda' if using_GPU else 'cpu')
Expand All @@ -85,17 +76,20 @@ def simulated_annealing(X: np.ndarray,
covmat = torch.tensor(gaussian_copula(X)[1])
covmat = covmat.to(device).contiguous()

# Initialize random solution
# Compute initial solution
# |batch_size| x |order|
if init_method == 'random':
current_solution = random_sampler(N, order, repeat, device)
elif init_method == 'lower_order':
current_solution = init_lower_order(X, order, lower_order, repeat, use_cpu, device)
current_solution = init_lower_order(X, order, lower_order, repeat, metric, largest, use_cpu, device)
elif init_method == 'precomputed':
assert current_solution is not None, 'current_solution must be a torch tensor'

# |batch_size|
current_energy = _gc_oinfo_energy(covmat, T, current_solution)
current_energy = _evaluate_nplet(covmat, T, current_solution, metric)

if not largest:
current_energy = -current_energy

# Initial valid candidates
all_elements = torch.arange(N, device=device)
Expand Down Expand Up @@ -132,7 +126,10 @@ def simulated_annealing(X: np.ndarray,

# Calculate energy of new solution
# |batch_size|
new_energy = _gc_oinfo_energy(covmat, T, new_solution)
new_energy = _evaluate_nplet(covmat, T, new_solution, metric)

if not largest:
new_energy = -new_energy

# Calculate change in energy
# delca_energy > 0 means new_energy is bigger (more optimal) than current_energy
Expand Down Expand Up @@ -196,4 +193,8 @@ def simulated_annealing(X: np.ndarray,
print('Early stop reached')
break

# If minimizing, then return score to its real value
if not largest:
best_energy = -best_energy

return best_solution, best_energy

0 comments on commit 3c48845

Please sign in to comment.