Pytorch 学习入门

由于时效问题,该文某些代码、技术可能已经过期,请注意!!!本文最后更新于:3 年前

Pytorch Learning Note

torch.nn

Module:创建一个行为类似于函数的可调用对象,但也可以包含状态(例如神经网络层权重)。 它知道其中包含的 Parameter ,并且可以将其所有坡度归零,遍历它们以进行权重更新等。
Parameter:张量的包装器,用于告知 Module 具有在反向传播期间需要更新的权重。 仅更新具有require_grad属性集的张量
functional:一个模块(通常按照惯例导入到 F 名称空间中),其中包含激活函数,损失函数等。 以及卷积和线性层等层的无状态版本。

torch.optim

包含诸如 SGD 的优化程序,这些优化程序在后退步骤

Dataset

更新 Parameter 的权重。 具有__len__和__getitem__的对象,包括 Pytorch 提供的类,例如 TensorDataset

DataLoader

获取任何 Dataset 并创建一个迭代器,该迭代器返回批量数据。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
from pathlib import Path
import requests

DATA_PATH = Path("data")
PATH = DATA_PATH / "mnist"

PATH.mkdir(parents=True, exist_ok=True)

URL = "https://github.com/pytorch/tutorials/raw/master/_static/"
FILENAME = "mnist.pkl.gz"

if not (PATH / FILENAME).exists():
content = requests.get(URL + FILENAME).content
(PATH / FILENAME).open("wb").write(content)
1
2
3
4
5
import pickle
import gzip

with gzip.open((PATH / FILENAME).as_posix(), "rb") as f:
((x_train, y_train), (x_valid, y_valid), _) = pickle.load(f, encoding="latin-1")
1
2
3
4
5
from matplotlib import pyplot
import numpy as np

pyplot.imshow(x_train[0].reshape((28, 28)), cmap="gray")
print(x_train.shape)
(50000, 784)
1
2
3
4
5
6
7
8
9
10
import torch

x_train, y_train, x_valid, y_valid = map(
torch.tensor, (x_train, y_train, x_valid, y_valid)
)
n, c = x_train.shape
x_train, x_train.shape, y_train.min(), y_train.max()
print(x_train, y_train)
print(x_train.shape)
print(y_train.min(), y_train.max())
tensor([[0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        ...,
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.]]) tensor([5, 0, 4,  ..., 8, 4, 8])
torch.Size([50000, 784])
tensor(0) tensor(9)
从0构建神经网络线性模型
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
import math

weights = torch.randn(784, 10) / math.sqrt(784)
weights.requires_grad_()
bias = torch.zeros(10, requires_grad=True)

## softmax激活函数
def log_softmax(x):
return x - x.exp().sum(-1).log().unsqueeze(-1)

def model(xb):
return log_softmax(xb @ weights + bias) # @代表点积运算

bs = 64 # batch size

xb = x_train[0:bs] # a mini-batch from x
preds = model(xb) # predictions
preds[0], preds.shape
print(preds[0], preds.shape)

## 损失函数
def nll(input, target):
return -input[range(target.shape[0]), target].mean()

loss_func = nll

yb = y_train[0:bs]
print(loss_func(preds, yb))

def accuracy(out, yb):
preds = torch.argmax(out, dim=1)
return (preds == yb).float().mean()

print(accuracy(preds, yb))

from IPython.core.debugger import set_trace

lr = 0.5 # learning rate
epochs = 2 # how many epochs to train for

for epoch in range(epochs):
for i in range((n - 1) // bs + 1):
# set_trace()
start_i = i * bs
end_i = start_i + bs
xb = x_train[start_i:end_i]
yb = y_train[start_i:end_i]
pred = model(xb)
loss = loss_func(pred, yb)

loss.backward()
with torch.no_grad():
weights -= weights.grad * lr
bias -= bias.grad * lr
weights.grad.zero_()
bias.grad.zero_()

print(loss_func(model(xb), yb), accuracy(model(xb), yb))

tensor([-2.5487, -2.8346, -2.7262, -2.1794, -2.1199, -2.1041, -1.9327, -2.1947,
        -2.5637, -2.2133], grad_fn=<SelectBackward>) torch.Size([64, 10])
tensor(2.3308, grad_fn=<NegBackward>)
tensor(0.1094)
tensor(0.0806, grad_fn=<NegBackward>) tensor(1.)
使用torch.nn.functional 重构
1
2
3
4
5
6
7
8
9
import torch.nn.functional as F

loss_func = F.cross_entropy

def model(xb):
return xb @ weights + bias

print(loss_func(model(xb), yb), accuracy(model(xb), yb))

tensor(0.0806, grad_fn=<NllLossBackward>) tensor(1.)
使用nn.Module重构
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
from torch import nn

class Mnist_Logistic(nn.Module):
def __init__(self):
super().__init__()
self.weights = nn.Parameter(torch.randn(784, 10) / math.sqrt(784))
self.bias = nn.Parameter(torch.zeros(10))

def forward(self, xb):
return xb @ self.weights + self.bias

model = Mnist_Logistic()
print(loss_func(model(xb), yb))


def fit():
for epoch in range(epochs):
for i in range((n - 1) // bs + 1):
start_i = i * bs
end_i = start_i + bs
xb = x_train[start_i:end_i]
yb = y_train[start_i:end_i]
pred = model(xb)
loss = loss_func(pred, yb)

loss.backward()
with torch.no_grad():
for p in model.parameters():
p -= p.grad * lr
model.zero_grad()

fit()
print(loss_func(model(xb), yb))

tensor(2.4222, grad_fn=<NllLossBackward>)
tensor(0.0817, grad_fn=<NllLossBackward>)
使用nn.Linear重构
1
2
3
4
5
6
7
8
9
10
11
12
13
class Mnist_Logistic(nn.Module):
def __init__(self):
super().__init__()
self.lin = nn.Linear(784, 10)

def forward(self, xb):
return self.lin(xb)

model = Mnist_Logistic()
print(loss_func(model(xb), yb))
fit()

print(loss_func(model(xb), yb))
tensor(2.3090, grad_fn=<NllLossBackward>)
tensor(0.0824, grad_fn=<NllLossBackward>)
使用optim重构
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
from torch import optim

def get_model():
model = Mnist_Logistic()
return model, optim.SGD(model.parameters(), lr=lr)

model, opt = get_model()
print(loss_func(model(xb), yb))

for epoch in range(epochs):
for i in range((n - 1) // bs + 1):
start_i = i * bs
end_i = start_i + bs
xb = x_train[start_i:end_i]
yb = y_train[start_i:end_i]
pred = model(xb)
loss = loss_func(pred, yb)

loss.backward()
opt.step()
opt.zero_grad()

print(loss_func(model(xb), yb))
tensor(2.2990, grad_fn=<NllLossBackward>)
tensor(0.0805, grad_fn=<NllLossBackward>)
使用Dataset重构
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
from torch.utils.data import TensorDataset

train_ds = TensorDataset(x_train, y_train)
xb,yb = train_ds[i*bs : i*bs+bs]
model, opt = get_model()

for epoch in range(epochs):
for i in range((n - 1) // bs + 1):
xb, yb = train_ds[i * bs: i * bs + bs]
pred = model(xb)
loss = loss_func(pred, yb)

loss.backward()
opt.step()
opt.zero_grad()

print(loss_func(model(xb), yb))
tensor(0.0817, grad_fn=<NllLossBackward>)
使用DataLoader重构
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
from torch.utils.data import DataLoader

train_ds = TensorDataset(x_train, y_train)
train_dl = DataLoader(train_ds, batch_size=bs)

model, opt = get_model()

for epoch in range(epochs):
for xb, yb in train_dl:
pred = model(xb)
loss = loss_func(pred, yb)

loss.backward()
opt.step()
opt.zero_grad()

print(loss_func(model(xb), yb))
tensor(0.0803, grad_fn=<NllLossBackward>)
添加验证
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
train_ds = TensorDataset(x_train, y_train)
train_dl = DataLoader(train_ds, batch_size=bs, shuffle=True)

valid_ds = TensorDataset(x_valid, y_valid)
valid_dl = DataLoader(valid_ds, batch_size=bs * 2)

model, opt = get_model()

for epoch in range(epochs):
model.train()
for xb, yb in train_dl:
pred = model(xb)
loss = loss_func(pred, yb)

loss.backward()
opt.step()
opt.zero_grad()

model.eval()
with torch.no_grad():
valid_loss = sum(loss_func(model(xb), yb) for xb, yb in valid_dl)

print(epoch, valid_loss / len(valid_dl))
0 tensor(0.3093)
1 tensor(0.3198)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
## 创建fit()和get_data()
def loss_batch(model, loss_func, xb, yb, opt=None):
loss = loss_func(model(xb), yb)

if opt is not None:
loss.backward()
opt.step()
opt.zero_grad()

return loss.item(), len(xb)

import numpy as np

def fit(epochs, model, loss_func, opt, train_dl, valid_dl):
for epoch in range(epochs):
model.train()
for xb, yb in train_dl:
loss_batch(model, loss_func, xb, yb, opt)

model.eval()
with torch.no_grad():
losses, nums = zip(
*[loss_batch(model, loss_func, xb, yb) for xb, yb in valid_dl]
)
val_loss = np.sum(np.multiply(losses, nums)) / np.sum(nums)

print(epoch, val_loss)

def get_data(train_ds, valid_ds, bs):
return (
DataLoader(train_ds, batch_size=bs, shuffle=True),
DataLoader(valid_ds, batch_size=bs * 2),
)

train_dl, valid_dl = get_data(train_ds, valid_ds, bs)
model, opt = get_model()
fit(epochs, model, loss_func, opt, train_dl, valid_dl)
0 0.3313611475586891
1 0.35820939881801606
切换到 CNN
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
class Mnist_CNN(nn.Module):
def __init__(self):
super().__init__()
self.conv1 = nn.Conv2d(1, 16, kernel_size=3, stride=2, padding=1)
self.conv2 = nn.Conv2d(16, 16, kernel_size=3, stride=2, padding=1)
self.conv3 = nn.Conv2d(16, 10, kernel_size=3, stride=2, padding=1)

def forward(self, xb):
xb = xb.view(-1, 1, 28, 28)
xb = F.relu(self.conv1(xb))
xb = F.relu(self.conv2(xb))
xb = F.relu(self.conv3(xb))
xb = F.avg_pool2d(xb, 4)
return xb.view(-1, xb.size(1))

lr = 0.1

model = Mnist_CNN()
opt = optim.SGD(model.parameters(), lr=lr, momentum=0.9)

fit(epochs, model, loss_func, opt, train_dl, valid_dl)
0 0.2936670451879501
1 0.21561954822540283
nn.Sequential Sequential对象以顺序方式运行其中包含的每个模块。
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
class Lambda(nn.Module):
def __init__(self, func):
super().__init__()
self.func = func

def forward(self, x):
return self.func(x)

def preprocess(x):
return x.view(-1, 1, 28, 28)

model = nn.Sequential(
Lambda(preprocess),
nn.Conv2d(1, 16, kernel_size=3, stride=2, padding=1),
nn.ReLU(),
nn.Conv2d(16, 16, kernel_size=3, stride=2, padding=1),
nn.ReLU(),
nn.Conv2d(16, 10, kernel_size=3, stride=2, padding=1),
nn.ReLU(),
nn.AvgPool2d(4),
Lambda(lambda x: x.view(x.size(0), -1)),
)

opt = optim.SGD(model.parameters(), lr=lr, momentum=0.9)

fit(epochs, model, loss_func, opt, train_dl, valid_dl)
0 0.4037924102306366
1 0.25595326462984086
包装DataLoader
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
def preprocess(x, y):
return x.view(-1, 1, 28, 28), y

class WrappedDataLoader:
def __init__(self, dl, func):
self.dl = dl
self.func = func

def __len__(self):
return len(self.dl)

def __iter__(self):
batches = iter(self.dl)
for b in batches:
yield (self.func(*b))

train_dl, valid_dl = get_data(train_ds, valid_ds, bs)
train_dl = WrappedDataLoader(train_dl, preprocess)
valid_dl = WrappedDataLoader(valid_dl, preprocess)

model = nn.Sequential(
nn.Conv2d(1, 16, kernel_size=3, stride=2, padding=1),
nn.ReLU(),
nn.Conv2d(16, 16, kernel_size=3, stride=2, padding=1),
nn.ReLU(),
nn.Conv2d(16, 10, kernel_size=3, stride=2, padding=1),
nn.ReLU(),
nn.AdaptiveAvgPool2d(1),
Lambda(lambda x: x.view(x.size(0), -1)),
)

opt = optim.SGD(model.parameters(), lr=lr, momentum=0.9)

fit(epochs, model, loss_func, opt, train_dl, valid_dl)

0 0.31396417818069455
1 0.2551067463874817
使用GPU,,,如果有
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
print(torch.cuda.is_available())
dev = torch.device(
"cuda") if torch.cuda.is_available() else torch.device("cpu")

def preprocess(x, y):
return x.view(-1, 1, 28, 28).to(dev), y.to(dev)

train_dl, valid_dl = get_data(train_ds, valid_ds, bs)
train_dl = WrappedDataLoader(train_dl, preprocess)
valid_dl = WrappedDataLoader(valid_dl, preprocess)

model.to(dev)
opt = optim.SGD(model.parameters(), lr=lr, momentum=0.9)
fit(epochs, model, loss_func, opt, train_dl, valid_dl)

False
0 0.22373724069595338
1 0.2494806985616684