分配权重(基于实例分配,基于类分配)
import numpy as np
import torch
from torch.utils.data import DataLoader, WeightedRandomSampler, Dataset
# Mock dataset class
class MockDataset(Dataset):
def __init__(self, data, targets):
self.data = data
self.targets = targets
self.dr = targets # mimicking the 'dr' attribute in the original dataset
def __len__(self):
return len(self.data)
def __getitem__(self, idx):
return self.data[idx], self.targets[idx]
# Mock get_sampling_probabilities function
def get_sampling_probabilities(class_count, mode='instance', ep=None, n_eps=None):
if mode == 'instance':
q = 0
elif mode == 'class':
q = 1
elif mode == 'sqrt':
q = 0.5
elif mode == 'cbrt':
q = 0.125
elif mode == 'prog':
assert ep is not None and n_eps is not None, 'progressive sampling requires to pass values for ep and n_eps'
relative_freq_imbal = class_count ** 0 / (class_count ** 0).sum()
relative_freq_bal = class_count ** 1 / (class_count ** 1).sum()
sampling_probabilities_imbal = relative_freq_imbal ** (-1)
sampling_probabilities_bal = relative_freq_bal ** (-1)
return (1 - ep / (n_eps - 1)) * sampling_probabilities_imbal + (ep / (n_eps - 1)) * sampling_probabilities_bal
else:
sys.exit('not a valid mode')
relative_freq = class_count ** q / (class_count ** q).sum()
sampling_probabilities = relative_freq ** (-1)
return sampling_probabilities
# Mock modify_loader function
def modify_loader(loader, mode, ep=None, n_eps=None):
# 计算每个类别的样本数
class_count = np.unique(loader.dataset.dr, return_counts=True)[1]
print(f"class_count: {class_count}")
# 获取每个类别的采样概率
sampling_probs = get_sampling_probabilities(class_count, mode=mode, ep=ep, n_eps=n_eps)
print(f"sampling_probs: {sampling_probs}")
# 计算每个样本的权重
sample_weights = sampling_probs[loader.dataset.dr]
print(f"loader.dataset.dr:{loader.dataset.dr}")
print(f"sample_weights: {sample_weights}")
# 创建加权随机采样器
mod_sampler = WeightedRandomSampler(weights=sample_weights, num_samples=len(sample_weights))
# 创建新的数据加载器
mod_loader = DataLoader(loader.dataset, batch_size=loader.batch_size, sampler=mod_sampler, num_workers=loader.num_workers)
return mod_loader
# 模拟数据和标签
data = np.random.randn(100, 3, 32, 32)
targets = np.random.choice([0, 1, 2], size=100, p=[0.7, 0.2, 0.1]) # imbalanced classes
dataset = MockDataset(data, targets)
loader = DataLoader(dataset, batch_size=10, num_workers=1)
# 使用 'class' 模式进行均衡采样
modified_loader = modify_loader(loader, mode='class')
# 打印修改后的加载器中的一些样本
for i, (data, target) in enumerate(modified_loader):
print(f"Batch {i}:")
print(f"Data shape: {data.shape}, Targets: {target}")
if i == 2: # 仅打印前3个批次以简洁
break
合并2个loader
import numpy as np
import torch
from torch.utils.data import DataLoader, Dataset
import torch.nn.functional as F
# 定义一个简单的数据集类
class SimpleDataset(Dataset):
def __init__(self, data, targets):
self.data = data
self.targets = targets
def __len__(self):
return len(self.data)
def __getitem__(self, idx):
return self.data[idx], self.targets[idx]
# 定义 ComboIter 类
class ComboIter(object):
"""An iterator."""
def __init__(self, my_loader):
self.my_loader = my_loader
self.loader_iters = [iter(loader) for loader in self.my_loader.loaders]
def __iter__(self):
return self
def __next__(self):
# 获取每个 loader 的一个 batch
batches = [loader_iter.__next__() for loader_iter in self.loader_iters]
# 返回合并后的 batch
return self.my_loader.combine_batch(batches)
def __len__(self):
return len(self.my_loader)
# 定义 ComboLoader 类
class ComboLoader(object):
"""This class wraps several pytorch DataLoader objects, allowing each time
taking a batch from each of them and then combining these several batches
into one. This class mimics the `for batch in loader:` interface of
pytorch `DataLoader`.
Args:
loaders: a list or tuple of pytorch DataLoader objects
"""
def __init__(self, loaders):
self.loaders = loaders
def __iter__(self):
return ComboIter(self)
def __len__(self):
return min([len(loader) for loader in self.loaders])
# 自定义合并批次的方法
def combine_batch(self, batches):
return batches
# 创建两个数据集和 DataLoader
data1 = torch.randn(100, 3, 32, 32)
targets1 = torch.randint(0, 2, (100,))
dataset1 = SimpleDataset(data1, targets1)
loader1 = DataLoader(dataset1, batch_size=10)
data2 = torch.randn(100, 3, 32, 32)
targets2 = torch.randint(0, 2, (100,))
dataset2 = SimpleDataset(data2, targets2)
loader2 = DataLoader(dataset2, batch_size=10)
# 使用 ComboLoader 合并两个 DataLoader
combo_loader = ComboLoader([loader1, loader2])
# 打印合并后的批次数据格式
print("\nCombined Loader batches:")
for i, batches in enumerate(combo_loader):
print(f"Batch {i}:")
for j, batch in enumerate(batches):
print(f" Sub-Batch {j}: Data shape: {batch[0].shape}, Targets: {batch[1]}")
if i == 2: # 仅打印前3个批次以简洁
break
import numpy as np
import torch
from torch.utils.data import DataLoader, Dataset
# 定义一个简单的数据集类
class SimpleDataset(Dataset):
def __init__(self, data, targets):
self.data = data
self.targets = targets
def __len__(self):
return len(self.data)
def __getitem__(self, idx):
return self.data[idx], self.targets[idx]
# 定义 ComboIter 类
class ComboIter(object):
"""An iterator."""
def __init__(self, my_loader):
self.my_loader = my_loader
self.loader_iters = [iter(loader) for loader in self.my_loader.loaders]
def __iter__(self):
return self
def __next__(self):
# 获取每个 loader 的一个 batch
batches = [loader_iter.__next__() for loader_iter in self.loader_iters]
# 返回合并后的 batch
return self.my_loader.combine_batch(batches)
def __len__(self):
return len(self.my_loader)
# 定义 ComboLoader 类
class ComboLoader(object):
"""This class wraps several pytorch DataLoader objects, allowing each time
taking a batch from each of them and then combining these several batches
into one. This class mimics the `for batch in loader:` interface of
pytorch `DataLoader`.
Args:
loaders: a list or tuple of pytorch DataLoader objects
"""
def __init__(self, loaders):
self.loaders = loaders
def __iter__(self):
return ComboIter(self)
def __len__(self):
return min([len(loader) for loader in self.loaders])
# 自定义合并批次的方法
def combine_batch(self, batches):
combined_data = torch.cat([batch[0] for batch in batches], dim=0)
combined_targets = torch.cat([batch[1] for batch in batches], dim=0)
return combined_data, combined_targets
# 创建两个数据集和 DataLoader
data1 = torch.randn(100, 3, 32, 32)
targets1 = torch.randint(0, 2, (100,))
dataset1 = SimpleDataset(data1, targets1)
loader1 = DataLoader(dataset1, batch_size=10)
data2 = torch.randn(100, 3, 32, 32)
targets2 = torch.randint(0, 2, (100,))
dataset2 = SimpleDataset(data2, targets2)
loader2 = DataLoader(dataset2, batch_size=10)
# 打印每个加载器中的一些批次数据
print("Loader 1 batches:")
for i, (data, targets) in enumerate(loader1):
print(f"Batch {i}:")
print(f"Data shape: {data.shape}, Targets: {targets}")
if i == 2: # 仅打印前3个批次以简洁
break
print("\nLoader 2 batches:")
for i, (data, targets) in enumerate(loader2):
print(f"Batch {i}:")
print(f"Data shape: {data.shape}, Targets: {targets}")
if i == 2: # 仅打印前3个批次以简洁
break
# 使用 ComboLoader 合并两个 DataLoader
combo_loader = ComboLoader([loader1, loader2])
# 打印合并后的批次数据
print("\nCombined Loader batches:")
for i, (data, targets) in enumerate(combo_loader):
print(f"Batch {i}:")
print(f"Data shape: {data.shape}, Targets: {targets}")
if i == 2: # 仅打印前3个批次以简洁
break