一起读《动手学深度学习（PyTorch版）》- 多项式 - 欠拟合、过拟合 - LitchiCheng的日志

多项式回归

这样可以避免很大的 x^i 带来的特别大的指数值，变成 x^i / i !，也就是x的i次幂 / i的阶乘，如下图可以对比

import matplotlib.pyplot as plt
import math

num = 3
x = range(0, 1000)
y = []
y1 = []
for i in x:
    y.append(i**num)
    y1.append((i**num)/math.factorial(num))
plt.plot(x, y)
plt.plot(x, y1)
plt.show()

3阶多项式拟合

100个训练样本，100和验证样本

设定多项式的权重，3阶多项式，有4个权重值，经过400次训练

import torch
import torchvision
from torch.utils import data
from torchvision import transforms
import matplotlib.pyplot as plt
from torch import nn
import numpy as np
import math

def get_dataloader_workers():
    return 6

def accurancy(y_hat, y):
    if len(y_hat.shape) > 1 and y_hat.shape[1] > 1:
        y_hat = y_hat.argmax(axis=1)
    # cmp is a dict which restore true or false 
    cmp = y_hat.type(y.dtype) == y
    # calc the num of true
    return float(cmp.type(y.dtype).sum())

class Accumulator:
    def __init__(self, n) -> None:
        self.data = [0.0]*n
    
    def add(self, *args):
        # args is a tupe
        self.data = [a + float(b) for a, b in zip(self.data, args)]

    def reset(self):
        self.data = [0.0] * len(self.data)

    def __getitem__(self, idx):
        return self.data[idx]

def evaluate_accurancy(net, data_iter):
    if isinstance(net, torch.nn.Module):
        net.eval()
    metric = Accumulator(2)
    with torch.no_grad():
        for X, y in data_iter:
            metric.add(accurancy(net(X), y), y.numel())
    return metric[0] / metric[1]

def train_epoch_ch3(net, train_iter, loss, updater):
    if isinstance(net, torch.nn.Module):
        print("is instance nn.Module")
        net.train()
    metric = Accumulator(3)
    for X, y in train_iter:
        y_hat = net(X)
        # print(y, y_hat)
        l = loss(y_hat, y)
        if isinstance(updater, torch.optim.Optimizer):
            updater.zero_grad()
            l.mean().backward()
            updater.step()
        else:
            l.sum().backward()
            updater(X.shape[0])
        metric.add(float(l.sum()), accurancy(y_hat, y), y.numel())
    #     print(metric[0] , metric[1], metric[2])
    # print("", metric[0] , metric[1], metric[2])
    # return metric[0] / metric[1], metric[1] / metric[2]

def set_axes(axes, xlable, ylable, xlim, ylim, xscale, yscale, legend):
    axes.set_xlabel(xlable)
    axes.set_ylabel(ylable)
    axes.set_xscale(xscale)
    axes.set_yscale(yscale)
    axes.set_xlim(xlim)
    axes.set_ylim(ylim)
    if legend:
        axes.legend(legend)
        axes.grid()

class Animator:
    def __init__(self, xlable=None, ylable=None, legend=None, xlim=None, ylim=None, 
    xscale='linear', yscale='linear',fmts=('-','m--','g-.','r:'), nrows=1, ncols=1, figsize=(3.5, 2.5)):
        if legend is None:
            legend = []
        self.fig, self.axes = plt.subplots(nrows, ncols, figsize=figsize)
        if nrows * ncols == 1:
            self.axes = [self.axes, ]
        self.config_axes = lambda: set_axes(self.axes[0], xlable, ylable, xlim, ylim, xscale, yscale, legend)
        self.X, self.Y, self.fmts = None, None, fmts
    
    def add(self, x, y):
        if not hasattr(y, "__len__"):
            y=[y]
        n = len(y)
        if not hasattr(x, "__len__"):
            x = [x] * n
        if not self.X:
            self.X = [[] for _ in range(n)]
        if not self.Y:
            self.Y = [[] for _ in range(n)]
        for i, (a,b) in enumerate(zip(x, y)):
            if a is not None and b is not None:
                self.X[i].append(a)
                self.Y[i].append(b)
        self.axes[0].cla()
        for x, y, fmt in zip(self.X, self.Y, self.fmts):
            self.axes[0].plot(x, y, fmt)
        self.config_axes()

def load_array(data_arrays, batch_size, is_train=True):  #@save
    dataset = data.TensorDataset(*data_arrays)
    return data.DataLoader(dataset, batch_size, shuffle=is_train, num_workers=get_dataloader_workers())

max_degree = 20  # 20 power
n_train, n_test = 100, 100  
true_w = np.zeros(max_degree)
true_w[0:4] = np.array([5, 1.2, -3.4, 5.6])

features = np.random.normal(size=(n_train + n_test, 1))
np.random.shuffle(features)
poly_features = np.power(features, np.arange(max_degree).reshape(1, -1))
for i in range(max_degree):
    poly_features[:, i] /= math.gamma(i + 1)  # gamma(n)=(n-1)!
labels = np.dot(poly_features, true_w)
labels += np.random.normal(scale=0.1, size=labels.shape)

true_w, features, poly_features, labels = [torch.tensor(x, dtype=
    torch.float32) for x in [true_w, features, poly_features, labels]]

# print(features[:2], poly_features[:2, :], labels[:2])

def evaluate_loss(net, data_iter, loss):
    metric = Accumulator(2)
    for X, y in data_iter:
        out = net(X)
        y = y.reshape(out.shape)
        l = loss(out, y)
        metric.add(l.sum(), l.numel())
    return metric[0] / metric[1]

def train(train_features, test_features, train_labels, test_labels,
          num_epochs=400):
    loss = nn.MSELoss(reduction='none')
    input_shape = train_features.shape[-1]
    net = nn.Sequential(nn.Linear(input_shape, 1, bias=False))
    batch_size = min(10, train_labels.shape[0])
    train_iter = load_array((train_features, train_labels.reshape(-1,1)),
                                batch_size)
    test_iter = load_array((test_features, test_labels.reshape(-1,1)),
                               batch_size, is_train=False)
    trainer = torch.optim.SGD(net.parameters(), lr=0.01)
    animator = Animator(xlable='epoch', ylable='loss', yscale='log',
                            xlim=[1, num_epochs], ylim=[1e-3, 1e2],
                            legend=['train', 'test'])
    for epoch in range(num_epochs):
        train_epoch_ch3(net, train_iter, loss, trainer)
        if epoch == 0 or (epoch + 1) % 20 == 0:
            animator.add(epoch + 1, (evaluate_loss(net, train_iter, loss),
                                     evaluate_loss(net, test_iter, loss)))
    print('weight:', net[0].weight.data.numpy())

train(poly_features[:n_train, :4], poly_features[n_train:, :4],
      labels[:n_train], labels[n_train:])

plt.show()

可以看到权重值也就是多项式系数和最初设定的系数基本相同，loss也逐渐变小

线性函数，欠拟合

原因：由四个系数构成的20阶多项式，当训练系数只有2个时，不太可能表达出4个的效果，再增加数量或者训练次数，都不能减少损失

train(poly_features[:n_train, :2], poly_features[n_train:, :2],
      labels[:n_train], labels[n_train:])

高阶多项式，过拟合

3阶以上的系数未指定，本身应该时0值，但因为添加了噪声，没有明显的规律，所以训练结果很好，但面对随机的噪声怎么可能预估的出来

train(poly_features[:n_train, :], poly_features[n_train:, :],
      labels[:n_train], labels[n_train:], num_epochs=1000)