神经网络

import tensorflow as tf
from tensorflow.keras import layers, datasets, regularizers
import numpy as np
import matplotlib.pyplot as plt
 
# 优化点：
# 1、使用 3个卷积模块 提升特征提取能力
# 2、添加 BatchNormalization 加速收敛
# 3、使用 GlobalAveragePooling 替代Flatten减少参数量
# 4、通过旋转/缩放等模拟真实场景，增加数据多样性
# 5、L2正则化 + Dropout    约束权重值大小，随机关闭神经元防止过拟合   降低验证集和测试集误差
# 6、改进评估和调参
 
# 1. 数据加载与预处理
(train_images, train_labels), (test_images, test_labels) = datasets.mnist.load_data()
 
# 重塑维度并归一化（不在此处归一化，整合到模型内部）
train_images = train_images.reshape((60000, 28, 28, 1)).astype('float32')
test_images = test_images.reshape((10000, 28, 28, 1)).astype('float32')
 
# 2. 构建包含数据增强的模型
def build_model():
    # 输入层 + 数据增强
    inputs = tf.keras.Input(shape=(28, 28, 1))
    x = layers.Rescaling(1./255)(inputs)  # 归一化整合到模型中
 
    # 数据增强层（仅在训练时激活）  通过旋转/缩放等模拟真实场景，增加数据多样性，提升模型泛化能力
    x = layers.RandomRotation(factor=0.05)(x)      # 随机旋转 ±5%
    x = layers.RandomZoom(height_factor=0.1)(x)   # 随机缩放 ±10%
 
    # 卷积模块1
    x = layers.Conv2D(32, (3,3), activation='relu', padding='same')(x)
    # 加速收敛
    x = layers.BatchNormalization()(x)
    x = layers.Activation('relu')(x)
    x = layers.MaxPooling2D((2,2))(x)
    #  防止过拟合
    x = layers.Dropout(0.2)(x)
 
    # 卷积模块2
    x = layers.Conv2D(64, (3,3), activation='relu', padding='same')(x)
    x = layers.BatchNormalization()(x)
    x = layers.Activation('relu')(x)
    x = layers.MaxPooling2D((2,2))(x)
    x = layers.Dropout(0.3)(x)
 
    # 卷积模块3（新增深层）
    x = layers.Conv2D(128, (3,3), activation='relu', padding='same')(x)
    x = layers.BatchNormalization()(x)
    x = layers.Activation('relu')(x)
    # 当需要保留输入特征的所有细节信息，并且模型对局部特征比较敏感时，可以使用 Flatte
    x = layers.GlobalAveragePooling2D()(x)  # 替代Flatten 减少参数量和计算量，提高模型的效率和泛化能力
 
    # 全连接层
    x = layers.Dense(128, activation='relu',
                     kernel_regularizer=regularizers.l2(0.001))(x)
    x = layers.Dropout(0.5)(x)
    outputs = layers.Dense(10, activation='softmax')(x)
 
    return tf.keras.Model(inputs=inputs, outputs=outputs)
 
model = build_model()
 
# 3. 自定义学习率调度
initial_lr = 0.001
total_steps = len(train_images)//128 * 30
lr_schedule = tf.keras.optimizers.schedules.ExponentialDecay(
    initial_lr, decay_steps=total_steps//10, decay_rate=0.9, staircase=True)
 
# 4. 编译模型
model.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=lr_schedule),
    loss='sparse_categorical_crossentropy',
    metrics=['accuracy']
)
 
# 5. 定义回调函数  增加训练轮次和早停机制  连续5轮无改进则停止
early_stopping = tf.keras.callbacks.EarlyStopping(
    monitor='val_loss', patience=5, restore_best_weights=True)
 
# 6. 训练模型（增加验证集拆分）
history = model.fit(
    train_images, train_labels,
    epochs=30,
    batch_size=128,
    validation_split=0.2,
    # 显式启用
    shuffle=True,
    callbacks=[early_stopping]
)
 
# 7. 可视化训练过程
plt.figure(figsize=(12, 4))
plt.subplot(1, 2, 1)
plt.plot(history.history['accuracy'], label='Training Accuracy')
plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
plt.xlabel('Epoch')
plt.legend()
 
plt.subplot(1, 2, 2)
plt.plot(history.history['loss'], label='Training Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.xlabel('Epoch')
plt.legend()
plt.show()
 
# 8. 模型评估
test_loss, test_acc = model.evaluate(test_images, test_labels)
print(f'\n测试集准确率: {test_acc:.4f}')
 
# 9. 保存模型
model.save('mnist_cnn_optimized.keras')
 
# 10. 示例预测
sample_idx = np.random.randint(0, len(test_images))
prediction = model.predict(test_images[sample_idx][np.newaxis, ...])
print(f"\n示例预测: 真实标签={test_labels[sample_idx]}, 预测结果={np.argmax(prediction)}")

系统生成数字图片，也可以用画图，画 28×28 像素的数字手写图

from PIL import Image, ImageDraw, ImageFont, ImageFilter
import numpy as np
import os
import random
def generate_mnist_style_digits():
    os.makedirs('image', exist_ok=True)
 
    # 改进的字体和尺寸控制
    font_paths = [
        "arial.ttf",  # 优先使用标准字体
        "C:/Windows/Fonts/seguiemj.ttf"
    ]
 
    for i in range(10):
        # 动态调整尺寸
        canvas_size = 28
        target_height = random.randint(15, 18)  # 控制数字高度占画布的70-85%
        font_size = int(target_height * 1.6)   # 根据高度计算字体大小
 
        # 创建画布
        img = Image.new('L', (canvas_size, canvas_size), 0)
        draw = ImageDraw.Draw(img)
 
        # 加载字体
        font = None
        for path in font_paths:
            try:
                font = ImageFont.truetype(path, font_size)
                break
            except:
                continue
        if not font:
            font = ImageFont.load_default().font_variant(size=font_size)
 
        # 精确居中绘制
        text = str(i)
        bbox = draw.textbbox((0,0), text, font=font)
        w, h = bbox[2]-bbox[0], bbox[3]-bbox[1]
        x = (canvas_size - w) / 2 - bbox[0]
        y = (canvas_size - h) / 2 - bbox[1]
 
        draw.text((x, y), text, 255, font=font)
 
        # 后处理优化
        #img = img.filter(ImageFilter.MaxFilter(3))  # 膨胀使笔画更粗
        img = img.filter(ImageFilter.GaussianBlur(radius=0.6))
 
        # 添加噪声
        np_img = np.array(img)
        noise = np.random.normal(128, 30, np_img.shape)
        np_img = np.clip(np_img * 0.7 + noise * 0.1, 0, 255).astype(np.uint8)
 
        # 保存图像
        Image.fromarray(np_img).save(f'image/digit_{i}.png')
 
generate_mnist_style_digits()

import tensorflow as tf
from PIL import Image, ImageOps, ImageFilter, ImageEnhance
import os
import numpy as np
import matplotlib.pyplot as plt
plt.rcParams['font.sans-serif'] = ['SimHei']  # 中文字体
plt.rcParams['axes.unicode_minus'] = False    # 修复负号显示
import matplotlib
matplotlib.use('TkAgg')  # 非交互式后端，避免警告
 
 
# 请先训练模型，并按照mnist_cnn_model.keras命名
# 加载保存的模型
model = tf.keras.models.load_model('mnist_cnn_optimized.keras')
 
def preprocess_image(img_path):
    # 更符合MNIST数据分布的预处理
    img = Image.open(img_path).convert('L')
 
    # 自动居中处理（保留原始比例）
    img.thumbnail((24, 24))     # 保留比例缩放到24x24以下
    # 创建28x28画布
    canvas = Image.new('L', (28, 28), 0)
    # 计算居中位置
    x = (28 - img.width) // 2
    y = (28 - img.height) // 2
    canvas.paste(img, (x, y))
 
    # 转换为数组（不进行任何归一化）
    img_array = np.array(canvas).astype('float32')  # 保持0-255范围
    return img_array.reshape(1, 28, 28, 1)
 
# 修改后的预测验证代码
image_dir = 'images'
plt.figure(figsize=(12, 6))
for i, filename in enumerate(os.listdir(image_dir)):
    if filename.endswith(('.png', '.jpg', '.jpeg')):
        img_path = os.path.join(image_dir, filename)
 
        # 预处理并预测
        img_array = preprocess_image(img_path)
        predicted_digit = np.argmax(model.predict(img_array))
 
        # 显示处理前后的对比
        plt.subplot(2, 5, i+1)
        plt.imshow(img_array[0,...,0], cmap='gray')
        plt.title(f'预测值: {predicted_digit}')
        plt.axis('off')
 
        print(f'图片 {filename} ，预测值：{predicted_digit}')
 
plt.tight_layout()
plt.show()

莫莫绵的博客

探索

知识图谱

反向链接