Kaggle: CIFAR-10-Object Recognition in Images 本文是实现d2l
网站代码的总结,教程链接
本文所用环境如下:1 2 3 4 python==3.9.23 nvidia-cuda-runtime-cu12==12.9.79 torch==2.7.1+cu118 d2l==1.0.3
📚 数据集介绍 CIFAR-10 是由加拿大多伦多大学 Alex Krizhevsky 等人收集的图像数据集,包含了 10 个类别的彩色图片 。这些图片都是从真实世界拍摄的物体中裁剪而来的。
📏 数据细节:
项目
描述
图片大小
32x32 像素,RGB(三通道)
图片数量
60,000 张图像
类别数量
10 个
训练集
50,000 张图像
测试集
10,000 张图像
图像格式
.png
图像 + .csv
标签/预测
预测流程 导包 1 2 3 4 5 6 7 8 9 import collectionsimport mathimport osimport shutilimport pandas as pdimport torchimport torchvisionfrom torch import nnfrom d2l import torch as d2l
数据整理 我是将完整的数据集下载到本地然后直接用全部数据进行训练的。
1 2 data_dir = 'D:\datasets\cifar-10'
文件夹中有trainLabels.csv
,里边有id
和label
两列,分别代表图片名称和标签。
原教程是直接读取的文件,根据csv文件是用逗号分隔将文件拆成两部分然后转换成字典类型,实际上直接用pandas
库就可以直接读取:
1 2 3 4 5 6 7 def read_csv_labels (fname ): df = pd.read_csv(fname) return dict (zip (df['id' ], df['label' ])) labels = read_csv_labels(os.path.join(data_dir,'trainLabels.csv' )) print ('# training examples:' , len (labels))print ('# classes:' , len (set (labels.values())))
这样我们就得到了标签字典,可以直接用labels
查看图片的标签。
教程中为了方便读取图片,将图片的存储位置重新整理了一下,建立train_valid_test
文件夹,里面有train
,valid
,test
和train_valid
四个文件夹。除了test
文件夹,其他文件夹内均按照标签分成10个文件夹,每个图片根据自己的标签放到相应的文件夹里,这样做是为了后面分组的时候每组的标签尽量均匀。
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 def copyfile (filename, target_dir ): os.makedirs(target_dir,exist_ok=True ) shutil.copy(filename,target_dir) def reorg_train_valid (data_dir, labels, valid_ratio ): n = collections.Counter(labels.values()).most_common()[-1 ][1 ] n_valid_per_label = max (1 ,math.floor(n * valid_ratio)) label_count={} for train_file in os.listdir(os.path.join(data_dir,'train' )): label = labels[train_file.split('.' )[0 ]] fname = os.path.join(data_dir,'train' ,train_file) copyfile(fname,os.path.join(data_dir,'train_valid_test' ,'train_valid' ,label)) if label not in label_count or label_count[label] <n_valid_per_label: copyfile(fname, os.path.join(data_dir,'train_valid_test' ,'valid' ,label)) label_count[label] = label_count.get(label,0 ) + 1 else : copyfile(fname, os.path.join(data_dir, 'train_valid_test' ,'train' ,label)) return n_valid_per_label def reorg_test (data_dir ): for test_file in os.listdir(os.path.join(data_dir,'test' )): copyfile(os.path.join(data_dir, 'test' , test_file), os.path.join(data_dir,'train_valid_test' ,'test' ,'unknown' ))
valid_ratio
:验证集占比,如0.1表示验证集500张,训练集4500张
然后定义一个整合函数,将上面的函数流程统一处理:1 2 3 4 def reorg_cifar10_data (data_dir,valid_ratio ): labels = read_csv_labels(os.path.join(data_dir, 'trainLabels.csv' )) reorg_train_valid(data_dir, labels, valid_ratio) reorg_test(data_dir)
设置批量大小为128,将10%的训练样本作为调整超参数的验证集
1 2 3 batch_size = 128 valid_ratio = 0.1 reorg_cifar10_data(data_dir,valid_ratio)
图像增广 为了防止过拟合,一般会采用图像增广。
标准化的原因:
加快模型收敛速度。让特征值范围大致分布在[-1,1]
避免某些特征主导模型训练。让各通道均值为0,方差为1,平衡每个像素的“权重”
提高模型泛化能力
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 transform_train = torchvision.transforms.Compose([ torchvision.transforms.Resize(40 ), torchvision.transforms.RandomResizedCrop(32 , scale=(0.64 , 1.0 ), ratio=(1.0 , 1.0 )), torchvision.transforms.RandomHorizontalFlip(), torchvision.transforms.ToTensor(), torchvision.transforms.Normalize([0.4914 , 0.4822 , 0.4465 ], [0.2023 , 0.1994 , 0.2010 ])]) transform_test = torchvision.transforms.Compose([ torchvision.transforms.ToTensor(), torchvision.transforms.Normalize([0.4914 , 0.4822 , 0.4465 ], [0.2023 , 0.1994 , 0.2010 ])])
读取数据 四个dataset列表:训练集,训练验证集,验证集,测试集
使用训练集和验证集组合而成的数据集(训练验证集)进行训练,充分利用所有标记的数据。
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 train_ds, train_valid_ds = [torchvision.datasets.ImageFolder( os.path.join(data_dir,'train_valid_test' ,folder), transform=transform_train ) for folder in ['train' ,'train_valid' ]] valid_ds, test_ds = [torchvision.datasets.ImageFolder( os.path.join(data_dir,'train_valid_test' ,folder), transform=transform_test ) for folder in ['valid' ,'test' ]] print ("train_ds size:" , len (train_ds))print ("train_valid_ds size:" , len (train_valid_ds))print ("valid_ds size:" , len (valid_ds))print ("test_ds size:" , len (test_ds))
还需要将上面的数据集转换为可迭代的对象:
1 2 3 4 5 6 7 8 train_iter, train_valid_iter = [torch.utils.data.DataLoader( dataset, batch_size, shuffle=True , drop_last=True ) for dataset in (train_ds, train_valid_ds)] valid_iter = torch.utils.data.DataLoader(valid_ds, batch_size,shuffle=False , drop_last=True ) test_iter = torch.utils.data.DataLoader(test_ds, batch_size, shuffle=False ,drop_last=False )
定义模型 使用Resnet-18
模型
1 2 3 4 5 6 7 8 def get_net (): num_classes = 10 net = d2l.resnet18(num_classes, 3 ) return net loss = nn.CrossEntropyLoss(reduction="none" )
定义训练函数 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 def train (net, train_iter, valid_iter, num_epochs, lr, wd, devices, lr_period, lr_decay ): trainer = torch.optim.SGD(net.parameters(), lr=lr, momentum=0.9 , weight_decay=wd) scheduler = torch.optim.lr_scheduler.StepLR(trainer, lr_period, lr_decay) num_batches, timer = len (train_iter), d2l.Timer() legend = ['train loss' , 'train acc' ] if valid_iter is not None : legend.append('valid acc' ) animator = d2l.Animator(xlabel='epoch' , xlim=[1 , num_epochs], legend=legend) net = nn.DataParallel(net, device_ids=devices).to(devices[0 ]) for epoch in range (num_epochs): net.train() metric = d2l.Accumulator(3 ) for i, (features, labels) in enumerate (train_iter): timer.start() l, acc = d2l.train_batch_ch13(net, features, labels, loss, trainer, devices) metric.add(l, acc, labels.shape[0 ]) timer.stop() if (i + 1 ) % (num_batches // 5 ) == 0 or i == num_batches - 1 : animator.add(epoch + (i+1 ) / num_batches, (metric[0 ] / metric[2 ], metric[1 ] / metric[2 ], None )) if valid_iter is not None : valid_acc = d2l.evaluate_accuracy_gpu(net, valid_iter) animator.add(epoch + 1 , (None , None , valid_acc)) scheduler.step() measures = (f'train loss {metric[0 ] / metric[2 ]:.3 f} , ' f'train acc {metric[1 ] / metric[2 ]:.3 f} ' ) if valid_iter is not None : measures += f', valid acc {valid_acc:.3 f} ' print (measures + f'\n{metric[2 ] * num_epochs / timer.sum ():.1 f} ' f' examples/sec on {str (devices)} ' )
训练和验证模型 定义超参数和训练模型
1 2 3 4 devices, num_epochs, lr, wd = d2l.try_all_gpus(), 20 , 2e-4 , 5e-4 lr_period, lr_decay, net = 4 , 0.9 , get_net() train(net, train_iter, valid_iter, num_epochs, lr, wd, devices, lr_period, lr_decay)
对测试集进行分类并生成结果 1 2 3 4 5 6 7 8 9 10 11 12 net, preds = get_net(), [] train(net, train_valid_iter, None , num_epochs, lr, wd, devices, lr_period, lr_decay) for X, _ in test_iter: y_hat = net(X.to(devices[0 ])) preds.extend(y_hat.argmax(dim=1 ).type (torch.int32).cpu().numpy()) sorted_ids = list (range (1 , len (test_ds) + 1 )) sorted_ids.sort(key=lambda x: str (x)) df = pd.DataFrame({'id' : sorted_ids, 'label' : preds}) df['label' ] = df['label' ].apply(lambda x: train_valid_ds.classes[x]) df.to_csv('submission.csv' , index=False )
生成的submission.csv
就可以提交到Kaggle上了。