研究CNTK(七):Python还是BrainScript速度更快?

研究CNTK(七):Python还是BrainScript速度更快?

def create_reader(map_file, mean_file, train):
    if not os.path.exists(map_file) or not os.path.exists(mean_file):
        raise RuntimeError("File '%s' or '%s' does not exist. Please run install_cifar10.py from DataSets/CIFAR-10 to fetch them" %
                           (map_file, mean_file))
    # transformation pipeline for the features has jitter/crop only when training
    transforms = []
    if train:
        transforms += [
            cntk.io.ImageDeserializer.crop(crop_type='Random', ratio=0.8, jitter_type='uniRatio') # train uses jitter
        ]
    transforms += [
        cntk.io.ImageDeserializer.scale(width=image_width, height=image_height, channels=num_channels, interpolations='linear'),
        cntk.io.ImageDeserializer.mean(mean_file)
    ]
    # deserializer
    return cntk.io.MinibatchSource(cntk.io.ImageDeserializer(map_file, cntk.io.StreamDefs(
        features = cntk.io.StreamDef(field='image', transforms=transforms), # first column in map file is referred to as 'image'
        labels   = cntk.io.StreamDef(field='label', shape=num_classes))))   # and second as 'label'

在读取数据时,create_reader(map_file, mean_file, train),需要提供map文件 ,map文件就是一个映射,用于指明图片的路径,对应的标签是什么,而mean文件则是一个取所有图片的在每个像素上的平均值,train是一个布尔型,为true时,会对训练的图片随机增加噪声 ,用以增强数据集。

在调用ImageDeserializer时,features与labels均是从map文件里读取,在map文件实际相当于一个索引文件,文本内容可以是这样:


/home/user/data/train.zip@/n01440764/n01440764_10026.JPEG<tab>0
/home/user/data/train.zip@/n01440764/n01440764_10027.JPEG<tab>0
/home/user/data/train_01.zip@/n01534433/n01534433_7285.JPEG<tab>0
/home/user/data/train/n01534433/n01534433_7285.JPEG<tab>0

 也可以是这样,直接用压缩文件:

/home/user/data/train.zip@/n01440764/n01440764_10026.JPEG<tab>0
/home/user/data/train.zip@/n01440764/n01440764_10027.JPEG<tab>0
/home/user/data/train_01.zip@/n01534433/n01534433_7285.JPEG<tab>0
/home/user/data/train/n01534433/n01534433_7285.JPEG<tab>0

在Mxnet中,使用的REC文件是一个Magic Number 加上一个 cflag 加上长度,然后再加上图片数据,整个按4字节对齐,因为没有详细的文档,所以只能读源码,很是不容易。

在主体的训练代码中:

def convnet_cifar10_dataaug(reader_train, reader_test, max_epochs = 80):
    _cntk_py.set_computation_network_trace_level(0)
    # Input variables denoting the features and label data
    input_var = cntk.ops.input_variable((num_channels, image_height, image_width))
    label_var = cntk.ops.input_variable((num_classes))
    # apply model to input
    scaled_input = cntk.ops.element_times(cntk.ops.constant(0.00390625), input_var)
    with cntk.layers.default_options(activation=cntk.ops.relu, pad=True): 
        z = cntk.models.Sequential([
            cntk.models.LayerStack(2, lambda : [
                cntk.layers.Convolution((3,3), 64), 
                cntk.layers.Convolution((3,3), 64), 
                cntk.layers.MaxPooling((3,3), (2,2))
            ]), 
            cntk.models.LayerStack(2, lambda i: [
                cntk.layers.Dense([256,128][i]), 
                cntk.layers.Dropout(0.5)
            ]), 
            cntk.layers.Dense(num_classes, activation=None)
        ])(scaled_input)
    # loss and metric
    ce = cntk.ops.cross_entropy_with_softmax(z, label_var)
    pe = cntk.ops.classification_error(z, label_var)
    # training config
    epoch_size = 50000                    # for now we manually specify epoch size
    minibatch_size = 64
    # Set learning parameters
    lr_per_sample          = [0.0015625]*20 + [0.00046875]*20 + [0.00015625]*20 + [0.000046875]*10 + [0.000015625]
    lr_schedule            = cntk.learning_rate_schedule(lr_per_sample, unit=cntk.learner.UnitType.sample, epoch_size=epoch_size)
    mm_time_constant       = [0]*20 + [600]*20 + [1200]
    mm_schedule            = cntk.learner.momentum_as_time_constant_schedule(mm_time_constant, epoch_size=epoch_size)
    l2_reg_weight          = 0.002
    
    # trainer object
    learner = cntk.learner.momentum_sgd(z.parameters, lr_schedule, mm_schedule,
                                        l2_regularization_weight = l2_reg_weight)
    trainer =  cntk.Trainer(z, ce, pe, learner)
    # define mapping from reader streams to network inputs
    input_map = {
        input_var: reader_train.streams.features,
        label_var: reader_train.streams.labels
    }
    cntk.utils.log_number_of_parameters(z) ; print()
    progress_printer = cntk.utils.ProgressPrinter(tag='Training')
    # perform model training
    for epoch in range(max_epochs):       # loop over epochs
        sample_count = 0
        while sample_count < epoch_size:  # loop over minibatches in the epoch
            data = reader_train.next_minibatch(min(minibatch_size, epoch_size-sample_count), input_map=input_map) # fetch minibatch.
            trainer.train_minibatch(data)                                   # update model with it
            sample_count += trainer.previous_minibatch_sample_count         # count samples processed so far
            progress_printer.update_with_trainer(trainer, with_metric=True) # log progress
        progress_printer.epoch_summary(with_metric=True)
        z.save_model(os.path.join(model_path, "ConvNet_CIFAR10_DataAug_{}.dnn".format(epoch)))
    
    ### Evaluation action
    epoch_size     = 10000
    minibatch_size = 16
    # process minibatches and evaluate the model
    metric_numer    = 0
    metric_denom    = 0
    sample_count    = 0
    minibatch_index = 0
    while sample_count < epoch_size:
        current_minibatch = min(minibatch_size, epoch_size - sample_count)
        # Fetch next test min batch.
        data = reader_test.next_minibatch(current_minibatch, input_map=input_map)
        # minibatch data to be trained with
        metric_numer += trainer.test_minibatch(data) * current_minibatch
        metric_denom += current_minibatch
        # Keep track of the number of samples processed so far.
        sample_count += data[label_var].num_samples
        minibatch_index += 1
    print("")
    print("Final Results: Minibatch[1-{}]: errs = {:0.2f}% * {}".format(minibatch_index+1, (metric_numer*100.0)/metric_denom, metric_denom))
    print("")
    return metric_numer/metric_denom

虽然代码看起来不如BrainScript那么简洁,然而也避免了晦涩,并且这样可以细到对每个变量进行控制。

经过80次迭代以后,误差是13.86%。

Finished Epoch [1]: [Training] loss = 2.012469 * 50000, metric = 75.1% * 50000 15.380s (3250.9 samples per second)
Finished Epoch [2]: [Training] loss = 1.701941 * 50000, metric = 62.7% * 50000 15.424s (3241.6 samples per second)
Finished Epoch [3]: [Training] loss = 1.526383 * 50000, metric = 56.0% * 50000 15.634s (3198.1 samples per second)
Finished Epoch [4]: [Training] loss = 1.408301 * 50000, metric = 51.1% * 50000 15.576s (3210.2 samples per second)
Finished Epoch [5]: [Training] loss = 1.297389 * 50000, metric = 46.4% * 50000 15.697s (3185.4 samples per second)
Finished Epoch [6]: [Training] loss = 1.207954 * 50000, metric = 42.4% * 50000 15.661s (3192.7 samples per second)
Finished Epoch [7]: [Training] loss = 1.126663 * 50000, metric = 39.5% * 50000 15.449s (3236.5 samples per second)
Finished Epoch [8]: [Training] loss = 1.081688 * 50000, metric = 37.3% * 50000 15.411s (3244.4 samples per second)
Finished Epoch [9]: [Training] loss = 1.043844 * 50000, metric = 36.0% * 50000 15.419s (3242.8 samples per second)
Finished Epoch [10]: [Training] loss = 1.001881 * 50000, metric = 34.3% * 50000 15.453s (3235.7 samples per second)
Finished Epoch [11]: [Training] loss = 0.971302 * 50000, metric = 33.4% * 50000 15.439s (3238.5 samples per second)
Finished Epoch [12]: [Training] loss = 0.946839 * 50000, metric = 32.4% * 50000 15.507s (3224.4 samples per second)
Finished Epoch [13]: [Training] loss = 0.929610 * 50000, metric = 31.6% * 50000 15.867s (3151.2 samples per second)
Finished Epoch [14]: [Training] loss = 0.913509 * 50000, metric = 30.9% * 50000 15.772s (3170.1 samples per second)
Finished Epoch [15]: [Training] loss = 0.891763 * 50000, metric = 30.0% * 50000 15.830s (3158.6 samples per second)
Finished Epoch [16]: [Training] loss = 0.882295 * 50000, metric = 30.0% * 50000 15.788s (3167.0 samples per second)
Finished Epoch [17]: [Training] loss = 0.866338 * 50000, metric = 29.3% * 50000 15.519s (3221.9 samples per second)
Finished Epoch [18]: [Training] loss = 0.852705 * 50000, metric = 28.5% * 50000 15.573s (3210.6 samples per second)
Finished Epoch [19]: [Training] loss = 0.840476 * 50000, metric = 28.3% * 50000 15.585s (3208.1 samples per second)
Finished Epoch [20]: [Training] loss = 0.833708 * 50000, metric = 27.9% * 50000 15.565s (3212.3 samples per second)
Finished Epoch [21]: [Training] loss = 0.700365 * 50000, metric = 23.3% * 50000 15.551s (3215.3 samples per second)
Finished Epoch [22]: [Training] loss = 0.666370 * 50000, metric = 22.3% * 50000 15.734s (3177.8 samples per second)
Finished Epoch [23]: [Training] loss = 0.652150 * 50000, metric = 21.8% * 50000 15.836s (3157.3 samples per second)
Finished Epoch [24]: [Training] loss = 0.647079 * 50000, metric = 21.7% * 50000 15.896s (3145.5 samples per second)
Finished Epoch [25]: [Training] loss = 0.643886 * 50000, metric = 21.4% * 50000 16.040s (3117.3 samples per second)
Finished Epoch [26]: [Training] loss = 0.637335 * 50000, metric = 21.3% * 50000 15.786s (3167.3 samples per second)
Finished Epoch [27]: [Training] loss = 0.633625 * 50000, metric = 21.2% * 50000 15.637s (3197.6 samples per second)
Finished Epoch [28]: [Training] loss = 0.629613 * 50000, metric = 21.1% * 50000 15.581s (3209.0 samples per second)
Finished Epoch [29]: [Training] loss = 0.629364 * 50000, metric = 21.0% * 50000 15.917s (3141.2 samples per second)
Finished Epoch [30]: [Training] loss = 0.626938 * 50000, metric = 20.8% * 50000 15.832s (3158.1 samples per second)
Finished Epoch [31]: [Training] loss = 0.627765 * 50000, metric = 21.0% * 50000 15.824s (3159.8 samples per second)
Finished Epoch [32]: [Training] loss = 0.617458 * 50000, metric = 20.5% * 50000 15.874s (3149.8 samples per second)
Finished Epoch [33]: [Training] loss = 0.615286 * 50000, metric = 20.3% * 50000 15.638s (3197.4 samples per second)
Finished Epoch [34]: [Training] loss = 0.616328 * 50000, metric = 20.5% * 50000 15.652s (3194.4 samples per second)
Finished Epoch [35]: [Training] loss = 0.607314 * 50000, metric = 20.1% * 50000 15.586s (3208.0 samples per second)
Finished Epoch [36]: [Training] loss = 0.608924 * 50000, metric = 20.2% * 50000 15.636s (3197.7 samples per second)
Finished Epoch [37]: [Training] loss = 0.609010 * 50000, metric = 20.3% * 50000 16.038s (3117.6 samples per second)
Finished Epoch [38]: [Training] loss = 0.601167 * 50000, metric = 20.1% * 50000 15.701s (3184.5 samples per second)
Finished Epoch [39]: [Training] loss = 0.607460 * 50000, metric = 20.2% * 50000 15.744s (3175.9 samples per second)
Finished Epoch [40]: [Training] loss = 0.595888 * 50000, metric = 19.9% * 50000 15.717s (3181.2 samples per second)
Finished Epoch [41]: [Training] loss = 0.550176 * 50000, metric = 18.2% * 50000 15.562s (3213.0 samples per second)
Finished Epoch [42]: [Training] loss = 0.533501 * 50000, metric = 17.6% * 50000 15.803s (3163.9 samples per second)
Finished Epoch [43]: [Training] loss = 0.531469 * 50000, metric = 17.6% * 50000 15.766s (3171.3 samples per second)
Finished Epoch [44]: [Training] loss = 0.527416 * 50000, metric = 17.5% * 50000 15.686s (3187.6 samples per second)
Finished Epoch [45]: [Training] loss = 0.521281 * 50000, metric = 17.3% * 50000 15.554s (3214.7 samples per second)
Finished Epoch [46]: [Training] loss = 0.520650 * 50000, metric = 17.2% * 50000 15.586s (3208.1 samples per second)
Finished Epoch [47]: [Training] loss = 0.527120 * 50000, metric = 17.3% * 50000 15.722s (3180.2 samples per second)
Finished Epoch [48]: [Training] loss = 0.517304 * 50000, metric = 17.1% * 50000 15.633s (3198.4 samples per second)
Finished Epoch [49]: [Training] loss = 0.516729 * 50000, metric = 17.0% * 50000 15.705s (3183.6 samples per second)
Finished Epoch [50]: [Training] loss = 0.516743 * 50000, metric = 17.0% * 50000 15.577s (3209.8 samples per second)
Finished Epoch [51]: [Training] loss = 0.510191 * 50000, metric = 16.8% * 50000 15.715s (3181.7 samples per second)
Finished Epoch [52]: [Training] loss = 0.515140 * 50000, metric = 17.0% * 50000 15.550s (3215.5 samples per second)
Finished Epoch [53]: [Training] loss = 0.514969 * 50000, metric = 17.2% * 50000 15.666s (3191.6 samples per second)
Finished Epoch [54]: [Training] loss = 0.512692 * 50000, metric = 16.9% * 50000 15.573s (3210.8 samples per second)
Finished Epoch [55]: [Training] loss = 0.511310 * 50000, metric = 16.7% * 50000 15.568s (3211.7 samples per second)
Finished Epoch [56]: [Training] loss = 0.510532 * 50000, metric = 16.7% * 50000 15.581s (3209.1 samples per second)
Finished Epoch [57]: [Training] loss = 0.508308 * 50000, metric = 16.8% * 50000 15.569s (3211.6 samples per second)
Finished Epoch [58]: [Training] loss = 0.511943 * 50000, metric = 16.8% * 50000 15.510s (3223.7 samples per second)
Finished Epoch [59]: [Training] loss = 0.504965 * 50000, metric = 16.7% * 50000 15.571s (3211.2 samples per second)
Finished Epoch [60]: [Training] loss = 0.508391 * 50000, metric = 16.9% * 50000 15.586s (3208.0 samples per second)
Finished Epoch [61]: [Training] loss = 0.479414 * 50000, metric = 15.8% * 50000 15.558s (3213.8 samples per second)
Finished Epoch [62]: [Training] loss = 0.475528 * 50000, metric = 15.5% * 50000 15.586s (3208.0 samples per second)
Finished Epoch [63]: [Training] loss = 0.472683 * 50000, metric = 15.4% * 50000 15.661s (3192.7 samples per second)
Finished Epoch [64]: [Training] loss = 0.474475 * 50000, metric = 15.6% * 50000 15.755s (3173.5 samples per second)
Finished Epoch [65]: [Training] loss = 0.462044 * 50000, metric = 15.3% * 50000 15.709s (3182.9 samples per second)
Finished Epoch [66]: [Training] loss = 0.466689 * 50000, metric = 15.4% * 50000 15.591s (3206.9 samples per second)
Finished Epoch [67]: [Training] loss = 0.468582 * 50000, metric = 15.5% * 50000 15.732s (3178.2 samples per second)
Finished Epoch [68]: [Training] loss = 0.465083 * 50000, metric = 15.4% * 50000 15.754s (3173.7 samples per second)
Finished Epoch [69]: [Training] loss = 0.466774 * 50000, metric = 15.3% * 50000 15.591s (3206.9 samples per second)
Finished Epoch [70]: [Training] loss = 0.465085 * 50000, metric = 15.3% * 50000 15.665s (3191.9 samples per second)
Finished Epoch [71]: [Training] loss = 0.457235 * 50000, metric = 14.8% * 50000 15.673s (3190.3 samples per second)
Finished Epoch [72]: [Training] loss = 0.450829 * 50000, metric = 14.9% * 50000 15.667s (3191.5 samples per second)
Finished Epoch [73]: [Training] loss = 0.454430 * 50000, metric = 15.0% * 50000 15.765s (3171.6 samples per second)
Finished Epoch [74]: [Training] loss = 0.449034 * 50000, metric = 14.6% * 50000 15.828s (3158.9 samples per second)
Finished Epoch [75]: [Training] loss = 0.454414 * 50000, metric = 14.8% * 50000 16.028s (3119.6 samples per second)
Finished Epoch [76]: [Training] loss = 0.452720 * 50000, metric = 15.0% * 50000 16.046s (3116.1 samples per second)
Finished Epoch [77]: [Training] loss = 0.452330 * 50000, metric = 14.9% * 50000 16.135s (3098.9 samples per second)
Finished Epoch [78]: [Training] loss = 0.451151 * 50000, metric = 14.9% * 50000 16.073s (3110.8 samples per second)
Finished Epoch [79]: [Training] loss = 0.448083 * 50000, metric = 14.8% * 50000 16.302s (3067.0 samples per second)
Finished Epoch [80]: [Training] loss = 0.451979 * 50000, metric = 15.0% * 50000 15.936s (3137.5 samples per second)

总共花费时间是1255.31秒,那么用BrainScript来会不会更快一点,测试了一下,平时每次Epoch,都会快上一秒多,每秒处理的sample大概会是3300以上,最后算下来,在大概BrainScript比用Python性能稍好一些,至少在80秒以上,大概要快6%至10%之间。

三符风云涌

发表评论