研究CNTK(三):MNIST识别之01_OneHidden.cntk

研究CNTK(三):MNIST识别之01_OneHidden.cntk

# Parameters can be overwritten on the command line
# for example: cntk configFile=myConfigFile RootDir=../..
# For running from Visual Studio add
# currentDirectory=$(SolutionDir)/<path to corresponding data folder>

command = trainNetwork:testNetwork

precision = "float";
traceLevel = 1 ;
deviceId = "auto"

rootDir = ".." ;
dataDir = "$rootDir$/DataSets/MNIST" ;
outputDir = "./Output" ;

modelPath = "$outputDir$/Models/01_OneHidden"
#stderr = "$outputDir$/01_OneHidden_bs_out"

# TRAINING CONFIG
trainNetwork = {
    action = "train"
    
    BrainScriptNetworkBuilder = {
        imageShape = 28:28:1                        # image dimensions, 1 channel only
        labelDim = 10                               # number of distinct labels
        featScale = 1/256

        # This model returns multiple nodes as a record, which
        # can be accessed using .x syntax.
        # modle 是返回多个节点作为一条记录,可以使用 .x 语法访问
        
        model(x) = {
            #对所有的图片归一化到0~1之间,因为字节是0~255,所以乘以1/256
            s1 = x * featScale
            
            #激活函数使用Relu,Dense代表标准全连接层
            #另外还有TimeDistributedDense是基于时间的,主要用于建立RNN(递归神经网络)的
            h1 = DenseLayer {200, activation=ReLU} (s1)
            
            # 线性对应,一个图片对应一个标签
            z = LinearLayer {labelDim} (h1)
        }
        
        # inputs
        # 定义输入层
        features = Input {imageShape}
        labels = Input {labelDim}

        # apply model to features
        # 直接使用model函数
        out = model (features)

        # loss and error computation
        # 误差的计算,对多分类,由于类别是互斥的,所以使用Softmax
        ce   = CrossEntropyWithSoftmax (labels, out.z)
        errs = ClassificationError (labels, out.z)

        # declare special nodes
        # 定义特殊节点
        featureNodes    = (features)
        labelNodes      = (labels)
        criterionNodes  = (ce)
        evaluationNodes = (errs)
        outputNodes     = (out.z)
        
        # Alternative, you can use the Sequential keyword and write the model
        # as follows. We keep the previous format because EvalClientTest needs
        # to access the internal nodes, which is not doable yet with Sequential
        #
        # Scale{f} = x => Constant(f) .* x
        # model = Sequential (
            # Scale {featScale} :
            # DenseLayer {200} : ReLU :
            # LinearLayer {labelDim}
        # )

        # # inputs
        # features = Input {imageShape}
        # labels = Input (labelDim)

        # # apply model to features
        # ol = model (features)

        # # loss and error computation
        # ce   = CrossEntropyWithSoftmax (labels, ol)
        # errs = ClassificationError (labels, ol)

        # # declare special nodes
        # featureNodes    = (features)
        # labelNodes      = (labels)
        # criterionNodes  = (ce)
        # evaluationNodes = (errs)
        # outputNodes     = (ol)
    }

    SGD = {
        epochSize = 60000
        minibatchSize = 64
        maxEpochs = 10
        learningRatesPerSample = 0.01*5:0.005
        momentumAsTimeConstant = 0
        
        numMBsToShowResult = 500
    }

    #定义读取数据类的工具,这里CNTKTextFormatReader用于直接读文本
    reader = {
        readerType = "CNTKTextFormatReader"
        
        # See ../REAMDE.md for details on getting the data (Train-28x28_cntk_text.txt).
        # 读取已转换好的文件
        file = "$DataDir$/Train-28x28_cntk_text.txt"
        
        # 将输入定义好二维数组,方便使用,格式要指定好是全连接
        input = {
            features = { dim = 784 ; format = "dense" }
            labels =   { dim = 10  ; format = "dense" }
        }
    }   
}

# TEST CONFIG
testNetwork = {
    action = "test"
    # reduce this if you run out of memory
    # 一次批量检测多少,如果内存不够就减少这个值
    minibatchSize = 1024    

    reader = {
        readerType = "CNTKTextFormatReader"
        file = "$DataDir$/Test-28x28_cntk_text.txt"
        input = {
            features = { dim = 784 ; format = "dense" }
            labels =   { dim = 10  ; format = "dense" }
        }
    }
}

可以看出建立一个三层神经网络非常简单,配置好Reader后,直接读取数据。

训练里,定义了model函数,这里面包含了一个输入层的归一化,然后全连接一个新层DenseLayer,再设置DenseLayer输出结果与LABEL进行线性对应。

 

三符风云涌

发表评论