方便广大烟酒生研究生、人工智障炼丹师算法工程师快速使用keras,所以特写此文章,默认使用者已有基本的深度学习概念、数据集概念。
python 3.7.4
tensorflow 2.6.0
keras 2.6.0
onnx 1.9.0
onnxruntime-gpu 1.9.0
tf2onnx 1.9.3
MNIST数据集csv文件是一个42000x785的矩阵
42000表示有42000张图片
785中第一列是图片的类别(0,1,2,..,9),第二列到最后一列是图片数据向量 (28x28的图片张成784的向量), 数据集长这个样子:
1 0 0 0 0 0 0 0 0 0 ..
0 0 0 0 0 0 0 0 0 0
1 0 0 0 0 0 0 0 0 0
4 0 0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0 0 0
7 0 0 0 0 0 0 0 0 0
3 0 0 0 0 0 0 0 0 0
5 0 0 0 0 0 0 0 0 0
3 0 0 0 0 0 0 0 0 0
8 0 0 0 0 0 0 0 0 0
9 0 0 0 0 0 0 0 0 0
1 0 0 0 0 0 0 0 0 0
3 0 0 0 0 0 0 0 0 0
3 0 0 0 0 0 0 0 0 0
1 0 0 0 0 0 0 0 0 0
2 0 0 0 0 0 0 0 0 0
0 0 0 0 0 0 0 0 0 0
import osimport onnximport kerasimport loggingimport subprocessimport numpy as npimport pandas as pdimport tensorflow as tfimport onnxruntime as ortfrom sklearn.metrics import accuracy_scorefrom keras.models import Sequential, Model, load_model, save_modelfrom keras.layers import Dense, Activation, Dropout, Conv2D, Flatten, MaxPool2D, Input, Conv1Dfrom keras.utils.np_utils import to_categoricaltf.autograph.set_verbosity(0)logging.getLogger("tensorflow").setLevel(logging.ERROR)N_EPOCH = 1N_BATCH = 64N_BATCH_NUM = 500S_DATA_PATH = r"mnist_train.csv"S_KERAS_MODEL_DIR_PATH = r"cnn_keras"S_KERAS_MODEL_PATH = r"cnn_keras.h5"S_ONNX_MODEL_PATH = r"cnn_keras.onnx"S_DEVICE, N_DEVICE_ID, S_DEVICE_FULL = "cuda", 0, "cuda:0" # 使用gpu# S_DEVICE, N_DEVICE_ID, S_DEVICE_FULL = "cpu", 0, "cpu" # 没有gpu请反注释这行以使用CPUif S_DEVICE == "cpu": os.environ["CUDA_VISIBLE_DEVICES"] = "-1"df = pd.read_csv(S_DATA_PATH, header=None)np_mat = np.array(df)print(df.shape)print(np_mat.shape)X = np_mat[:, 1:]Y = np_mat[:, 0]X = X.astype(np.float32) / 255X_train = X[:N_BATCH * N_BATCH_NUM]X_test = X[N_BATCH * N_BATCH_NUM:]Y_train = Y[:N_BATCH * N_BATCH_NUM]Y_test = Y[N_BATCH * N_BATCH_NUM:]X_train = X_train.reshape(X_train.shape[0], 28, 28, 1)X_test = X_test.reshape(X_test.shape[0], 28, 28, 1)Y_train = to_categorical(Y_train, num_classes=10)Y_test = to_categorical(Y_test, num_classes=10)print(X_train.shape)print(Y_train.shape)print(X_test.shape)print(Y_test.shape)运行输出
(42000, 785)(42000, 785)(32000, 28, 28, 1)(32000, 10)(10000, 28, 28, 1)(10000, 10)x_in = Input(shape=(28, 28, 1)) # 图像维度必须是 w h cx = Conv2D(filters=32, kernel_size=(3, 3))(x_in)x = MaxPool2D(pool_size=(2, 2))(x)x = Dropout(0.2)(x)x = Flatten()(x)x = Dense(128)(x)x = Activation('relu')(x)x = Dense(10)(x)y = Activation('softmax')(x)model = Model(x_in, y)model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])print(model.summary())运行输出
Model: "model"_________________________________________________________________Layer (type) Output Shape Param # =================================================================input_1 (InputLayer) [(None, 28, 28, 1)] 0 _________________________________________________________________conv2d (Conv2D) (None, 26, 26, 32) 320 _________________________________________________________________max_pooling2d (MaxPooling2D) (None, 13, 13, 32) 0 _________________________________________________________________dropout (Dropout) (None, 13, 13, 32) 0 _________________________________________________________________flatten (Flatten) (None, 5408) 0 _________________________________________________________________dense (Dense) (None, 128) 692352 _________________________________________________________________activation (Activation) (None, 128) 0 _________________________________________________________________dense_1 (Dense) (None, 10) 1290 _________________________________________________________________activation_1 (Activation) (None, 10) 0 =================================================================Total params: 693,962Trainable params: 693,962Non-trainable params: 0_________________________________________________________________Nonemodel.fit(X_train, Y_train, epochs=N_EPOCH, batch_size=N_BATCH, verbose=1, validation_data=(X_test, Y_test))score = model.evaluate(X_test, Y_test, verbose=0)print('Test score:', score[0])print('Test accuracy:', score[1])save_model(model, S_KERAS_MODEL_PATH)运行输出
486/500 [============================>.] - ETA: 0s - loss: 0.2873 - accuracy: 0.9144500/500 [==============================] - 4s 3ms/step - loss: 0.2837 - accuracy: 0.9155 - val_loss: 0.1352 - val_accuracy: 0.9616Test score: 0.13516278564929962Test accuracy: 0.9616000056266785load_model = load_model(S_KERAS_MODEL_PATH)print("load model ok")score = load_model.evaluate(X_test, Y_test, verbose=0)print('load model Test score:', score[0])print('load model Test accuracy:', score[1])运行输出
load model okload model Test score: 0.13516278564929962load model Test accuracy: 0.9616000056266785s_cmd = 'python -m tf2onnx.convert --keras %s --output %s' % (S_KERAS_MODEL_PATH, S_ONNX_MODEL_PATH)print(s_cmd)print(os.system(s_cmd))# proc = subprocess.run(s_cmd.split(), check=True)# print(proc.returncode)运行输出
python -m tf2onnx.convert --keras G:\Data\task_model_out\_tmp_out\cnn_keras.h5 --output G:\Data\task_model_out\_tmp_out\cnn_keras.onnx0model = onnx.load(S_ONNX_MODEL_PATH)print(onnx.checker.check_model(model)) # Check that the model is well formedprint(onnx.helper.printable_graph(model.graph)) # Print a human readable representation of the graphls_input_name, ls_output_name = [input.name for input in model.graph.input], [output.name for output in model.graph.output]print("input name ", ls_input_name)print("output name ", ls_output_name)s_input_name = ls_input_name[0]x_input = X_train[:N_BATCH*2, :, :, :].astype(np.float32)ort_val = ort.OrtValue.ortvalue_from_numpy(x_input, S_DEVICE, N_DEVICE_ID)print("val device ", ort_val.device_name())print("val shape ", ort_val.shape())print("val data type ", ort_val.data_type())print("is_tensor ", ort_val.is_tensor())print("array_equal ", np.array_equal(ort_val.numpy(), x_input))providers = 'CUDAExecutionProvider' if S_DEVICE == "cuda" else 'CPUExecutionProvider'print("providers ", providers)ort_session = ort.InferenceSession(S_ONNX_MODEL_PATH, providers=[providers]) # gpu运行ort_session.set_providers([providers])outputs = ort_session.run(None, {s_input_name: ort_val})print("sess env ", ort_session.get_providers())print(type(outputs))print(outputs[0])运行输出
Nonegraph tf2onnx ( %input_1:0[FLOAT, unk__17x28x28x1]) initializers ( %new_shape__15[INT64, 4] %model/dense_1/MatMul/ReadVariableOp:0[FLOAT, 128x10] %model/dense_1/BiasAdd/ReadVariableOp:0[FLOAT, 10] %model/dense/MatMul/ReadVariableOp:0[FLOAT, 5408x128] %model/dense/BiasAdd/ReadVariableOp:0[FLOAT, 128] %model/conv2d/Conv2D/ReadVariableOp:0[FLOAT, 32x1x3x3] %model/conv2d/BiasAdd/ReadVariableOp:0[FLOAT, 32] %const_fold_opt__16[INT64, 2]) { %model/conv2d/BiasAdd__6:0 = Reshape(%input_1:0, %new_shape__15) %model/conv2d/BiasAdd:0 = Conv[dilations = [1, 1], group = 1, kernel_shape = [3, 3], strides = [1, 1]](%model/conv2d/BiasAdd__6:0, %model/conv2d/Conv2D/ReadVariableOp:0, %model/conv2d/BiasAdd/ReadVariableOp:0) %model/max_pooling2d/MaxPool:0 = MaxPool[kernel_shape = [2, 2], strides = [2, 2]](%model/conv2d/BiasAdd:0) %model/max_pooling2d/MaxPool__12:0 = Transpose[perm = [0, 2, 3, 1]](%model/max_pooling2d/MaxPool:0) %model/flatten/Reshape:0 = Reshape(%model/max_pooling2d/MaxPool__12:0, %const_fold_opt__16) %model/dense/MatMul:0 = MatMul(%model/flatten/Reshape:0, %model/dense/MatMul/ReadVariableOp:0) %model/dense/BiasAdd:0 = Add(%model/dense/MatMul:0, %model/dense/BiasAdd/ReadVariableOp:0) %model/activation/Relu:0 = Relu(%model/dense/BiasAdd:0) %model/dense_1/MatMul:0 = MatMul(%model/activation/Relu:0, %model/dense_1/MatMul/ReadVariableOp:0) %model/dense_1/BiasAdd:0 = Add(%model/dense_1/MatMul:0, %model/dense_1/BiasAdd/ReadVariableOp:0) %Identity:0 = Softmax[axis = 1](%model/dense_1/BiasAdd:0) return %Identity:0}input name ['input_1:0']output name ['Identity:0']val device cudaval shape [128, 28, 28, 1]val data type tensor(float)is_tensor Truearray_equal Trueproviders CUDAExecutionProvidersess env ['CUDAExecutionProvider', 'CPUExecutionProvider']<class 'list'>[[1.0287621e-04 9.9524093e-01 5.0408958e-04 ... 6.5664819e-05 3.8182980e-03 1.2303158e-05] [9.9932754e-01 2.7173186e-08 3.5315077e-04 ... 3.0959238e-06 8.5986117e-05 3.6047477e-06] [1.1101285e-05 9.9719965e-01 3.8205151e-04 ... 1.2267688e-03 7.8595197e-04 4.0839368e-05] ... [2.8337089e-02 1.5399084e-05 2.1733245e-01 ... 1.5945830e-05 2.1134425e-02 1.7111158e-03] [1.7888090e-06 3.3868539e-06 5.2631256e-04 ... 9.9888057e-01 5.4794059e-06 5.5255485e-04] [4.1398227e-05 1.0462944e-06 5.5901739e-03 ... 3.1221823e-09 6.6847453e-04 7.8918066e-07]]ai_fast_handbook