[链接](https://tangshusen.me/Dive-into-DL-PyTorch/#/chapter03_DL-basics/3.6_softmax-regression-scratch?id=_367-%e8%ae%ad%e7%bb%83%e6%a8%a1%e5%9e%8b 链接

import torch
import torchvision
from torch.utils import data
from torchvision import transforms
from matplotlib import pyplot as plt

trains = transforms.ToTensor()
mnist_train = torchvision.datasets.MNIST(root = "./data/",train = True,transform = trains,download = False)

mnist_test =  torchvision.datasets.MNIST(root = "./data/",train = False,transform = trains,download = False))

#可视化函数
def show_image(imgs,num_rows,num_cols,titles = None,scale = 1.5):
    figsize = (num_rows * scale,num_cols * scale)
    _,axes = plt.subplots(num_rows,num_cols,figsize = figsize)
    axes = axes.flatten()
    for i ,(ax,img) in enumerate(zip(axes,imgs)):
        if torch.is_tensor(img):
            #图片张量
            ax.imshow(img.numpy())
        else:
            #PIL图片
            ax.imshow(img)
        ax.axes.get_xaxis().set_visible(False)
        ax.axes.get_yaxis().set_visible(False)
        if titles:
            ax.set_title(titles[i])
    return axes

def load_data_mnist(batch_size,resize = None):
    trans = [transforms.ToTensor()]
    if resize:
        trans.insert(0,transforms.Resize(resize))
    trans = transforms.Compose(trans)
    return (data.DataLoader(mnist_train,batch_size,shuffle = True),
            data.DataLoader(mnist_test,batch_size,shuffle = False))
      
n_epochs = 5
batch_size = 64
learing_rate = 0.01
train_iter,test_iter = load_data_mnist(batch_size)

import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

class Net(nn.Module):
    def __init__(self):
        super(Net,self).__init__()
        self.convl = nn.Sequential(nn.Conv2d(1,64,kernel_size = 3,stride = 1,padding = 1),nn.ReLU(),
                                  nn.Conv2d(64,128,kernel_size = 3,stride = 1,padding = 1),nn.ReLU(),
                                  nn.MaxPool2d(stride = 2,kernel_size = 2))
        self.dense = nn.Sequential(nn.Linear(14*14*128,1024),nn.ReLU(),nn.Dropout(p = 0.5),nn.Linear(1024,10))
        
    def forward(self,x):
        x = self.convl(x)
        x = x.view(-1,14*14*128)
        x = self.dense(x)
        return x
        
model = Net()
model = model.to(device=cuda())
loss = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(),lr = learing_rate)

#开始训练
for epoch in range(n_epochs):
    running_loss = 0.0
    running_correct = 0
    print("Epoch{}/{}".format(epoch,n_epochs))
    print("-"*10)
    for X,y in train_iter:
        print(y.shape)
        y_hat = model(X)
        print(y_hat.shape)
        l = loss(y_hat,y).sum()
        #梯度清0
        if optimizer is not None:
            optimizer.zero_grad()
        elif params is not None and params[0].grad is not None:
            for param in params:
                param.grad.data.zero_()
        
        l.backward()
        optimizer.step()
        running_loss += l.item()
        running_correct += (y_hat.argmax(dim = 1) == y).sum().item()
   
    print('Loss is:{:.4f},Train Accuracy is:{:。4f}%'.format(running_loss/len(mnist_train)))  
 
torch.save(model.state_dict(),"./Mnist_Model_pth/")