由于时效问题,该文某些代码、技术可能已经过期,请注意!!!本文最后更新于:3 年前
Pytorch Learning Note
torch.nn
Module:创建一个行为类似于函数的可调用对象,但也可以包含状态(例如神经网络层权重)。 它知道其中包含的 Parameter ,并且可以将其所有坡度归零,遍历它们以进行权重更新等。
Parameter:张量的包装器,用于告知 Module 具有在反向传播期间需要更新的权重。 仅更新具有require_grad属性集的张量
functional:一个模块(通常按照惯例导入到 F 名称空间中),其中包含激活函数,损失函数等。 以及卷积和线性层等层的无状态版本。
torch.optim
包含诸如 SGD 的优化程序,这些优化程序在后退步骤
Dataset
更新 Parameter 的权重。 具有__len__和__getitem__的对象,包括 Pytorch 提供的类,例如 TensorDataset
DataLoader
获取任何 Dataset 并创建一个迭代器,该迭代器返回批量数据。
1 2 3 4 5 6 7 8 9 10 11 12 13 14
| from pathlib import Path import requests
DATA_PATH = Path("data") PATH = DATA_PATH / "mnist"
PATH.mkdir(parents=True, exist_ok=True)
URL = "https://github.com/pytorch/tutorials/raw/master/_static/" FILENAME = "mnist.pkl.gz"
if not (PATH / FILENAME).exists(): content = requests.get(URL + FILENAME).content (PATH / FILENAME).open("wb").write(content)
|
1 2 3 4 5
| import pickle import gzip
with gzip.open((PATH / FILENAME).as_posix(), "rb") as f: ((x_train, y_train), (x_valid, y_valid), _) = pickle.load(f, encoding="latin-1")
|
1 2 3 4 5
| from matplotlib import pyplot import numpy as np
pyplot.imshow(x_train[0].reshape((28, 28)), cmap="gray") print(x_train.shape)
|
(50000, 784)
1 2 3 4 5 6 7 8 9 10
| import torch
x_train, y_train, x_valid, y_valid = map( torch.tensor, (x_train, y_train, x_valid, y_valid) ) n, c = x_train.shape x_train, x_train.shape, y_train.min(), y_train.max() print(x_train, y_train) print(x_train.shape) print(y_train.min(), y_train.max())
|
tensor([[0., 0., 0., ..., 0., 0., 0.],
[0., 0., 0., ..., 0., 0., 0.],
[0., 0., 0., ..., 0., 0., 0.],
...,
[0., 0., 0., ..., 0., 0., 0.],
[0., 0., 0., ..., 0., 0., 0.],
[0., 0., 0., ..., 0., 0., 0.]]) tensor([5, 0, 4, ..., 8, 4, 8])
torch.Size([50000, 784])
tensor(0) tensor(9)
从0构建神经网络线性模型
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59
| import math
weights = torch.randn(784, 10) / math.sqrt(784) weights.requires_grad_() bias = torch.zeros(10, requires_grad=True)
def log_softmax(x): return x - x.exp().sum(-1).log().unsqueeze(-1)
def model(xb): return log_softmax(xb @ weights + bias)
bs = 64
xb = x_train[0:bs] preds = model(xb) preds[0], preds.shape print(preds[0], preds.shape)
def nll(input, target): return -input[range(target.shape[0]), target].mean()
loss_func = nll
yb = y_train[0:bs] print(loss_func(preds, yb))
def accuracy(out, yb): preds = torch.argmax(out, dim=1) return (preds == yb).float().mean()
print(accuracy(preds, yb))
from IPython.core.debugger import set_trace
lr = 0.5 epochs = 2
for epoch in range(epochs): for i in range((n - 1) // bs + 1): start_i = i * bs end_i = start_i + bs xb = x_train[start_i:end_i] yb = y_train[start_i:end_i] pred = model(xb) loss = loss_func(pred, yb)
loss.backward() with torch.no_grad(): weights -= weights.grad * lr bias -= bias.grad * lr weights.grad.zero_() bias.grad.zero_() print(loss_func(model(xb), yb), accuracy(model(xb), yb))
|
tensor([-2.5487, -2.8346, -2.7262, -2.1794, -2.1199, -2.1041, -1.9327, -2.1947,
-2.5637, -2.2133], grad_fn=<SelectBackward>) torch.Size([64, 10])
tensor(2.3308, grad_fn=<NegBackward>)
tensor(0.1094)
tensor(0.0806, grad_fn=<NegBackward>) tensor(1.)
使用torch.nn.functional 重构
1 2 3 4 5 6 7 8 9
| import torch.nn.functional as F
loss_func = F.cross_entropy
def model(xb): return xb @ weights + bias
print(loss_func(model(xb), yb), accuracy(model(xb), yb))
|
tensor(0.0806, grad_fn=<NllLossBackward>) tensor(1.)
使用nn.Module重构
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34
| from torch import nn
class Mnist_Logistic(nn.Module): def __init__(self): super().__init__() self.weights = nn.Parameter(torch.randn(784, 10) / math.sqrt(784)) self.bias = nn.Parameter(torch.zeros(10))
def forward(self, xb): return xb @ self.weights + self.bias model = Mnist_Logistic() print(loss_func(model(xb), yb))
def fit(): for epoch in range(epochs): for i in range((n - 1) // bs + 1): start_i = i * bs end_i = start_i + bs xb = x_train[start_i:end_i] yb = y_train[start_i:end_i] pred = model(xb) loss = loss_func(pred, yb)
loss.backward() with torch.no_grad(): for p in model.parameters(): p -= p.grad * lr model.zero_grad()
fit() print(loss_func(model(xb), yb))
|
tensor(2.4222, grad_fn=<NllLossBackward>)
tensor(0.0817, grad_fn=<NllLossBackward>)
使用nn.Linear重构
1 2 3 4 5 6 7 8 9 10 11 12 13
| class Mnist_Logistic(nn.Module): def __init__(self): super().__init__() self.lin = nn.Linear(784, 10)
def forward(self, xb): return self.lin(xb) model = Mnist_Logistic() print(loss_func(model(xb), yb)) fit()
print(loss_func(model(xb), yb))
|
tensor(2.3090, grad_fn=<NllLossBackward>)
tensor(0.0824, grad_fn=<NllLossBackward>)
使用optim重构
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
| from torch import optim
def get_model(): model = Mnist_Logistic() return model, optim.SGD(model.parameters(), lr=lr)
model, opt = get_model() print(loss_func(model(xb), yb))
for epoch in range(epochs): for i in range((n - 1) // bs + 1): start_i = i * bs end_i = start_i + bs xb = x_train[start_i:end_i] yb = y_train[start_i:end_i] pred = model(xb) loss = loss_func(pred, yb)
loss.backward() opt.step() opt.zero_grad()
print(loss_func(model(xb), yb))
|
tensor(2.2990, grad_fn=<NllLossBackward>)
tensor(0.0805, grad_fn=<NllLossBackward>)
使用Dataset重构
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17
| from torch.utils.data import TensorDataset
train_ds = TensorDataset(x_train, y_train) xb,yb = train_ds[i*bs : i*bs+bs] model, opt = get_model()
for epoch in range(epochs): for i in range((n - 1) // bs + 1): xb, yb = train_ds[i * bs: i * bs + bs] pred = model(xb) loss = loss_func(pred, yb)
loss.backward() opt.step() opt.zero_grad()
print(loss_func(model(xb), yb))
|
tensor(0.0817, grad_fn=<NllLossBackward>)
使用DataLoader重构
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17
| from torch.utils.data import DataLoader
train_ds = TensorDataset(x_train, y_train) train_dl = DataLoader(train_ds, batch_size=bs)
model, opt = get_model()
for epoch in range(epochs): for xb, yb in train_dl: pred = model(xb) loss = loss_func(pred, yb)
loss.backward() opt.step() opt.zero_grad()
print(loss_func(model(xb), yb))
|
tensor(0.0803, grad_fn=<NllLossBackward>)
添加验证
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
| train_ds = TensorDataset(x_train, y_train) train_dl = DataLoader(train_ds, batch_size=bs, shuffle=True)
valid_ds = TensorDataset(x_valid, y_valid) valid_dl = DataLoader(valid_ds, batch_size=bs * 2)
model, opt = get_model()
for epoch in range(epochs): model.train() for xb, yb in train_dl: pred = model(xb) loss = loss_func(pred, yb)
loss.backward() opt.step() opt.zero_grad()
model.eval() with torch.no_grad(): valid_loss = sum(loss_func(model(xb), yb) for xb, yb in valid_dl)
print(epoch, valid_loss / len(valid_dl))
|
0 tensor(0.3093)
1 tensor(0.3198)
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37
| def loss_batch(model, loss_func, xb, yb, opt=None): loss = loss_func(model(xb), yb)
if opt is not None: loss.backward() opt.step() opt.zero_grad()
return loss.item(), len(xb)
import numpy as np
def fit(epochs, model, loss_func, opt, train_dl, valid_dl): for epoch in range(epochs): model.train() for xb, yb in train_dl: loss_batch(model, loss_func, xb, yb, opt)
model.eval() with torch.no_grad(): losses, nums = zip( *[loss_batch(model, loss_func, xb, yb) for xb, yb in valid_dl] ) val_loss = np.sum(np.multiply(losses, nums)) / np.sum(nums)
print(epoch, val_loss) def get_data(train_ds, valid_ds, bs): return ( DataLoader(train_ds, batch_size=bs, shuffle=True), DataLoader(valid_ds, batch_size=bs * 2), )
train_dl, valid_dl = get_data(train_ds, valid_ds, bs) model, opt = get_model() fit(epochs, model, loss_func, opt, train_dl, valid_dl)
|
0 0.3313611475586891
1 0.35820939881801606
切换到 CNN
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21
| class Mnist_CNN(nn.Module): def __init__(self): super().__init__() self.conv1 = nn.Conv2d(1, 16, kernel_size=3, stride=2, padding=1) self.conv2 = nn.Conv2d(16, 16, kernel_size=3, stride=2, padding=1) self.conv3 = nn.Conv2d(16, 10, kernel_size=3, stride=2, padding=1)
def forward(self, xb): xb = xb.view(-1, 1, 28, 28) xb = F.relu(self.conv1(xb)) xb = F.relu(self.conv2(xb)) xb = F.relu(self.conv3(xb)) xb = F.avg_pool2d(xb, 4) return xb.view(-1, xb.size(1))
lr = 0.1
model = Mnist_CNN() opt = optim.SGD(model.parameters(), lr=lr, momentum=0.9)
fit(epochs, model, loss_func, opt, train_dl, valid_dl)
|
0 0.2936670451879501
1 0.21561954822540283
nn.Sequential Sequential对象以顺序方式运行其中包含的每个模块。
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26
| class Lambda(nn.Module): def __init__(self, func): super().__init__() self.func = func
def forward(self, x): return self.func(x)
def preprocess(x): return x.view(-1, 1, 28, 28)
model = nn.Sequential( Lambda(preprocess), nn.Conv2d(1, 16, kernel_size=3, stride=2, padding=1), nn.ReLU(), nn.Conv2d(16, 16, kernel_size=3, stride=2, padding=1), nn.ReLU(), nn.Conv2d(16, 10, kernel_size=3, stride=2, padding=1), nn.ReLU(), nn.AvgPool2d(4), Lambda(lambda x: x.view(x.size(0), -1)), )
opt = optim.SGD(model.parameters(), lr=lr, momentum=0.9)
fit(epochs, model, loss_func, opt, train_dl, valid_dl)
|
0 0.4037924102306366
1 0.25595326462984086
包装DataLoader
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35
| def preprocess(x, y): return x.view(-1, 1, 28, 28), y
class WrappedDataLoader: def __init__(self, dl, func): self.dl = dl self.func = func
def __len__(self): return len(self.dl)
def __iter__(self): batches = iter(self.dl) for b in batches: yield (self.func(*b))
train_dl, valid_dl = get_data(train_ds, valid_ds, bs) train_dl = WrappedDataLoader(train_dl, preprocess) valid_dl = WrappedDataLoader(valid_dl, preprocess)
model = nn.Sequential( nn.Conv2d(1, 16, kernel_size=3, stride=2, padding=1), nn.ReLU(), nn.Conv2d(16, 16, kernel_size=3, stride=2, padding=1), nn.ReLU(), nn.Conv2d(16, 10, kernel_size=3, stride=2, padding=1), nn.ReLU(), nn.AdaptiveAvgPool2d(1), Lambda(lambda x: x.view(x.size(0), -1)), )
opt = optim.SGD(model.parameters(), lr=lr, momentum=0.9)
fit(epochs, model, loss_func, opt, train_dl, valid_dl)
|
0 0.31396417818069455
1 0.2551067463874817
使用GPU,,,如果有
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
| print(torch.cuda.is_available()) dev = torch.device( "cuda") if torch.cuda.is_available() else torch.device("cpu")
def preprocess(x, y): return x.view(-1, 1, 28, 28).to(dev), y.to(dev)
train_dl, valid_dl = get_data(train_ds, valid_ds, bs) train_dl = WrappedDataLoader(train_dl, preprocess) valid_dl = WrappedDataLoader(valid_dl, preprocess)
model.to(dev) opt = optim.SGD(model.parameters(), lr=lr, momentum=0.9) fit(epochs, model, loss_func, opt, train_dl, valid_dl)
|
False
0 0.22373724069595338
1 0.2494806985616684