def create_reader(map_file, mean_file, train): if not os.path.exists(map_file) or not os.path.exists(mean_file): raise RuntimeError("File '%s' or '%s' does not exist. Please run install_cifar10.py from DataSets/CIFAR-10 to fetch them" % (map_file, mean_file)) # transformation pipeline for the features has jitter/crop only when training transforms = [] if train: transforms += [ cntk.io.ImageDeserializer.crop(crop_type='Random', ratio=0.8, jitter_type='uniRatio') # train uses jitter ] transforms += [ cntk.io.ImageDeserializer.scale(width=image_width, height=image_height, channels=num_channels, interpolations='linear'), cntk.io.ImageDeserializer.mean(mean_file) ] # deserializer return cntk.io.MinibatchSource(cntk.io.ImageDeserializer(map_file, cntk.io.StreamDefs( features = cntk.io.StreamDef(field='image', transforms=transforms), # first column in map file is referred to as 'image' labels = cntk.io.StreamDef(field='label', shape=num_classes)))) # and second as 'label'
在读取数据时,create_reader(map_file, mean_file, train),需要提供map文件 ,map文件就是一个映射,用于指明图片的路径,对应的标签是什么,而mean文件则是一个取所有图片的在每个像素上的平均值,train是一个布尔型,为true时,会对训练的图片随机增加噪声 ,用以增强数据集。
在调用ImageDeserializer时,features与labels均是从map文件里读取,在map文件实际相当于一个索引文件,文本内容可以是这样:
/home/user/data/train.zip@/n01440764/n01440764_10026.JPEG<tab>0 /home/user/data/train.zip@/n01440764/n01440764_10027.JPEG<tab>0 /home/user/data/train_01.zip@/n01534433/n01534433_7285.JPEG<tab>0 /home/user/data/train/n01534433/n01534433_7285.JPEG<tab>0
也可以是这样,直接用压缩文件:
/home/user/data/train.zip@/n01440764/n01440764_10026.JPEG<tab>0 /home/user/data/train.zip@/n01440764/n01440764_10027.JPEG<tab>0 /home/user/data/train_01.zip@/n01534433/n01534433_7285.JPEG<tab>0 /home/user/data/train/n01534433/n01534433_7285.JPEG<tab>0
在Mxnet中,使用的REC文件是一个Magic Number 加上一个 cflag 加上长度,然后再加上图片数据,整个按4字节对齐,因为没有详细的文档,所以只能读源码,很是不容易。
在主体的训练代码中:
def convnet_cifar10_dataaug(reader_train, reader_test, max_epochs = 80): _cntk_py.set_computation_network_trace_level(0) # Input variables denoting the features and label data input_var = cntk.ops.input_variable((num_channels, image_height, image_width)) label_var = cntk.ops.input_variable((num_classes)) # apply model to input scaled_input = cntk.ops.element_times(cntk.ops.constant(0.00390625), input_var) with cntk.layers.default_options(activation=cntk.ops.relu, pad=True): z = cntk.models.Sequential([ cntk.models.LayerStack(2, lambda : [ cntk.layers.Convolution((3,3), 64), cntk.layers.Convolution((3,3), 64), cntk.layers.MaxPooling((3,3), (2,2)) ]), cntk.models.LayerStack(2, lambda i: [ cntk.layers.Dense([256,128][i]), cntk.layers.Dropout(0.5) ]), cntk.layers.Dense(num_classes, activation=None) ])(scaled_input) # loss and metric ce = cntk.ops.cross_entropy_with_softmax(z, label_var) pe = cntk.ops.classification_error(z, label_var) # training config epoch_size = 50000 # for now we manually specify epoch size minibatch_size = 64 # Set learning parameters lr_per_sample = [0.0015625]*20 + [0.00046875]*20 + [0.00015625]*20 + [0.000046875]*10 + [0.000015625] lr_schedule = cntk.learning_rate_schedule(lr_per_sample, unit=cntk.learner.UnitType.sample, epoch_size=epoch_size) mm_time_constant = [0]*20 + [600]*20 + [1200] mm_schedule = cntk.learner.momentum_as_time_constant_schedule(mm_time_constant, epoch_size=epoch_size) l2_reg_weight = 0.002 # trainer object learner = cntk.learner.momentum_sgd(z.parameters, lr_schedule, mm_schedule, l2_regularization_weight = l2_reg_weight) trainer = cntk.Trainer(z, ce, pe, learner) # define mapping from reader streams to network inputs input_map = { input_var: reader_train.streams.features, label_var: reader_train.streams.labels } cntk.utils.log_number_of_parameters(z) ; print() progress_printer = cntk.utils.ProgressPrinter(tag='Training') # perform model training for epoch in range(max_epochs): # loop over epochs sample_count = 0 while sample_count < epoch_size: # loop over minibatches in the epoch data = reader_train.next_minibatch(min(minibatch_size, epoch_size-sample_count), input_map=input_map) # fetch minibatch. trainer.train_minibatch(data) # update model with it sample_count += trainer.previous_minibatch_sample_count # count samples processed so far progress_printer.update_with_trainer(trainer, with_metric=True) # log progress progress_printer.epoch_summary(with_metric=True) z.save_model(os.path.join(model_path, "ConvNet_CIFAR10_DataAug_{}.dnn".format(epoch))) ### Evaluation action epoch_size = 10000 minibatch_size = 16 # process minibatches and evaluate the model metric_numer = 0 metric_denom = 0 sample_count = 0 minibatch_index = 0 while sample_count < epoch_size: current_minibatch = min(minibatch_size, epoch_size - sample_count) # Fetch next test min batch. data = reader_test.next_minibatch(current_minibatch, input_map=input_map) # minibatch data to be trained with metric_numer += trainer.test_minibatch(data) * current_minibatch metric_denom += current_minibatch # Keep track of the number of samples processed so far. sample_count += data[label_var].num_samples minibatch_index += 1 print("") print("Final Results: Minibatch[1-{}]: errs = {:0.2f}% * {}".format(minibatch_index+1, (metric_numer*100.0)/metric_denom, metric_denom)) print("") return metric_numer/metric_denom
虽然代码看起来不如BrainScript那么简洁,然而也避免了晦涩,并且这样可以细到对每个变量进行控制。
经过80次迭代以后,误差是13.86%。
Finished Epoch [1]: [Training] loss = 2.012469 * 50000, metric = 75.1% * 50000 15.380s (3250.9 samples per second) Finished Epoch [2]: [Training] loss = 1.701941 * 50000, metric = 62.7% * 50000 15.424s (3241.6 samples per second) Finished Epoch [3]: [Training] loss = 1.526383 * 50000, metric = 56.0% * 50000 15.634s (3198.1 samples per second) Finished Epoch [4]: [Training] loss = 1.408301 * 50000, metric = 51.1% * 50000 15.576s (3210.2 samples per second) Finished Epoch [5]: [Training] loss = 1.297389 * 50000, metric = 46.4% * 50000 15.697s (3185.4 samples per second) Finished Epoch [6]: [Training] loss = 1.207954 * 50000, metric = 42.4% * 50000 15.661s (3192.7 samples per second) Finished Epoch [7]: [Training] loss = 1.126663 * 50000, metric = 39.5% * 50000 15.449s (3236.5 samples per second) Finished Epoch [8]: [Training] loss = 1.081688 * 50000, metric = 37.3% * 50000 15.411s (3244.4 samples per second) Finished Epoch [9]: [Training] loss = 1.043844 * 50000, metric = 36.0% * 50000 15.419s (3242.8 samples per second) Finished Epoch [10]: [Training] loss = 1.001881 * 50000, metric = 34.3% * 50000 15.453s (3235.7 samples per second) Finished Epoch [11]: [Training] loss = 0.971302 * 50000, metric = 33.4% * 50000 15.439s (3238.5 samples per second) Finished Epoch [12]: [Training] loss = 0.946839 * 50000, metric = 32.4% * 50000 15.507s (3224.4 samples per second) Finished Epoch [13]: [Training] loss = 0.929610 * 50000, metric = 31.6% * 50000 15.867s (3151.2 samples per second) Finished Epoch [14]: [Training] loss = 0.913509 * 50000, metric = 30.9% * 50000 15.772s (3170.1 samples per second) Finished Epoch [15]: [Training] loss = 0.891763 * 50000, metric = 30.0% * 50000 15.830s (3158.6 samples per second) Finished Epoch [16]: [Training] loss = 0.882295 * 50000, metric = 30.0% * 50000 15.788s (3167.0 samples per second) Finished Epoch [17]: [Training] loss = 0.866338 * 50000, metric = 29.3% * 50000 15.519s (3221.9 samples per second) Finished Epoch [18]: [Training] loss = 0.852705 * 50000, metric = 28.5% * 50000 15.573s (3210.6 samples per second) Finished Epoch [19]: [Training] loss = 0.840476 * 50000, metric = 28.3% * 50000 15.585s (3208.1 samples per second) Finished Epoch [20]: [Training] loss = 0.833708 * 50000, metric = 27.9% * 50000 15.565s (3212.3 samples per second) Finished Epoch [21]: [Training] loss = 0.700365 * 50000, metric = 23.3% * 50000 15.551s (3215.3 samples per second) Finished Epoch [22]: [Training] loss = 0.666370 * 50000, metric = 22.3% * 50000 15.734s (3177.8 samples per second) Finished Epoch [23]: [Training] loss = 0.652150 * 50000, metric = 21.8% * 50000 15.836s (3157.3 samples per second) Finished Epoch [24]: [Training] loss = 0.647079 * 50000, metric = 21.7% * 50000 15.896s (3145.5 samples per second) Finished Epoch [25]: [Training] loss = 0.643886 * 50000, metric = 21.4% * 50000 16.040s (3117.3 samples per second) Finished Epoch [26]: [Training] loss = 0.637335 * 50000, metric = 21.3% * 50000 15.786s (3167.3 samples per second) Finished Epoch [27]: [Training] loss = 0.633625 * 50000, metric = 21.2% * 50000 15.637s (3197.6 samples per second) Finished Epoch [28]: [Training] loss = 0.629613 * 50000, metric = 21.1% * 50000 15.581s (3209.0 samples per second) Finished Epoch [29]: [Training] loss = 0.629364 * 50000, metric = 21.0% * 50000 15.917s (3141.2 samples per second) Finished Epoch [30]: [Training] loss = 0.626938 * 50000, metric = 20.8% * 50000 15.832s (3158.1 samples per second) Finished Epoch [31]: [Training] loss = 0.627765 * 50000, metric = 21.0% * 50000 15.824s (3159.8 samples per second) Finished Epoch [32]: [Training] loss = 0.617458 * 50000, metric = 20.5% * 50000 15.874s (3149.8 samples per second) Finished Epoch [33]: [Training] loss = 0.615286 * 50000, metric = 20.3% * 50000 15.638s (3197.4 samples per second) Finished Epoch [34]: [Training] loss = 0.616328 * 50000, metric = 20.5% * 50000 15.652s (3194.4 samples per second) Finished Epoch [35]: [Training] loss = 0.607314 * 50000, metric = 20.1% * 50000 15.586s (3208.0 samples per second) Finished Epoch [36]: [Training] loss = 0.608924 * 50000, metric = 20.2% * 50000 15.636s (3197.7 samples per second) Finished Epoch [37]: [Training] loss = 0.609010 * 50000, metric = 20.3% * 50000 16.038s (3117.6 samples per second) Finished Epoch [38]: [Training] loss = 0.601167 * 50000, metric = 20.1% * 50000 15.701s (3184.5 samples per second) Finished Epoch [39]: [Training] loss = 0.607460 * 50000, metric = 20.2% * 50000 15.744s (3175.9 samples per second) Finished Epoch [40]: [Training] loss = 0.595888 * 50000, metric = 19.9% * 50000 15.717s (3181.2 samples per second) Finished Epoch [41]: [Training] loss = 0.550176 * 50000, metric = 18.2% * 50000 15.562s (3213.0 samples per second) Finished Epoch [42]: [Training] loss = 0.533501 * 50000, metric = 17.6% * 50000 15.803s (3163.9 samples per second) Finished Epoch [43]: [Training] loss = 0.531469 * 50000, metric = 17.6% * 50000 15.766s (3171.3 samples per second) Finished Epoch [44]: [Training] loss = 0.527416 * 50000, metric = 17.5% * 50000 15.686s (3187.6 samples per second) Finished Epoch [45]: [Training] loss = 0.521281 * 50000, metric = 17.3% * 50000 15.554s (3214.7 samples per second) Finished Epoch [46]: [Training] loss = 0.520650 * 50000, metric = 17.2% * 50000 15.586s (3208.1 samples per second) Finished Epoch [47]: [Training] loss = 0.527120 * 50000, metric = 17.3% * 50000 15.722s (3180.2 samples per second) Finished Epoch [48]: [Training] loss = 0.517304 * 50000, metric = 17.1% * 50000 15.633s (3198.4 samples per second) Finished Epoch [49]: [Training] loss = 0.516729 * 50000, metric = 17.0% * 50000 15.705s (3183.6 samples per second) Finished Epoch [50]: [Training] loss = 0.516743 * 50000, metric = 17.0% * 50000 15.577s (3209.8 samples per second) Finished Epoch [51]: [Training] loss = 0.510191 * 50000, metric = 16.8% * 50000 15.715s (3181.7 samples per second) Finished Epoch [52]: [Training] loss = 0.515140 * 50000, metric = 17.0% * 50000 15.550s (3215.5 samples per second) Finished Epoch [53]: [Training] loss = 0.514969 * 50000, metric = 17.2% * 50000 15.666s (3191.6 samples per second) Finished Epoch [54]: [Training] loss = 0.512692 * 50000, metric = 16.9% * 50000 15.573s (3210.8 samples per second) Finished Epoch [55]: [Training] loss = 0.511310 * 50000, metric = 16.7% * 50000 15.568s (3211.7 samples per second) Finished Epoch [56]: [Training] loss = 0.510532 * 50000, metric = 16.7% * 50000 15.581s (3209.1 samples per second) Finished Epoch [57]: [Training] loss = 0.508308 * 50000, metric = 16.8% * 50000 15.569s (3211.6 samples per second) Finished Epoch [58]: [Training] loss = 0.511943 * 50000, metric = 16.8% * 50000 15.510s (3223.7 samples per second) Finished Epoch [59]: [Training] loss = 0.504965 * 50000, metric = 16.7% * 50000 15.571s (3211.2 samples per second) Finished Epoch [60]: [Training] loss = 0.508391 * 50000, metric = 16.9% * 50000 15.586s (3208.0 samples per second) Finished Epoch [61]: [Training] loss = 0.479414 * 50000, metric = 15.8% * 50000 15.558s (3213.8 samples per second) Finished Epoch [62]: [Training] loss = 0.475528 * 50000, metric = 15.5% * 50000 15.586s (3208.0 samples per second) Finished Epoch [63]: [Training] loss = 0.472683 * 50000, metric = 15.4% * 50000 15.661s (3192.7 samples per second) Finished Epoch [64]: [Training] loss = 0.474475 * 50000, metric = 15.6% * 50000 15.755s (3173.5 samples per second) Finished Epoch [65]: [Training] loss = 0.462044 * 50000, metric = 15.3% * 50000 15.709s (3182.9 samples per second) Finished Epoch [66]: [Training] loss = 0.466689 * 50000, metric = 15.4% * 50000 15.591s (3206.9 samples per second) Finished Epoch [67]: [Training] loss = 0.468582 * 50000, metric = 15.5% * 50000 15.732s (3178.2 samples per second) Finished Epoch [68]: [Training] loss = 0.465083 * 50000, metric = 15.4% * 50000 15.754s (3173.7 samples per second) Finished Epoch [69]: [Training] loss = 0.466774 * 50000, metric = 15.3% * 50000 15.591s (3206.9 samples per second) Finished Epoch [70]: [Training] loss = 0.465085 * 50000, metric = 15.3% * 50000 15.665s (3191.9 samples per second) Finished Epoch [71]: [Training] loss = 0.457235 * 50000, metric = 14.8% * 50000 15.673s (3190.3 samples per second) Finished Epoch [72]: [Training] loss = 0.450829 * 50000, metric = 14.9% * 50000 15.667s (3191.5 samples per second) Finished Epoch [73]: [Training] loss = 0.454430 * 50000, metric = 15.0% * 50000 15.765s (3171.6 samples per second) Finished Epoch [74]: [Training] loss = 0.449034 * 50000, metric = 14.6% * 50000 15.828s (3158.9 samples per second) Finished Epoch [75]: [Training] loss = 0.454414 * 50000, metric = 14.8% * 50000 16.028s (3119.6 samples per second) Finished Epoch [76]: [Training] loss = 0.452720 * 50000, metric = 15.0% * 50000 16.046s (3116.1 samples per second) Finished Epoch [77]: [Training] loss = 0.452330 * 50000, metric = 14.9% * 50000 16.135s (3098.9 samples per second) Finished Epoch [78]: [Training] loss = 0.451151 * 50000, metric = 14.9% * 50000 16.073s (3110.8 samples per second) Finished Epoch [79]: [Training] loss = 0.448083 * 50000, metric = 14.8% * 50000 16.302s (3067.0 samples per second) Finished Epoch [80]: [Training] loss = 0.451979 * 50000, metric = 15.0% * 50000 15.936s (3137.5 samples per second)
总共花费时间是1255.31秒,那么用BrainScript来会不会更快一点,测试了一下,平时每次Epoch,都会快上一秒多,每秒处理的sample大概会是3300以上,最后算下来,在大概BrainScript比用Python性能稍好一些,至少在80秒以上,大概要快6%至10%之间。
发表评论