Refine fluid_benchmark.py#11118
Conversation
8820ab8 to
3b7157f
Compare
3b7157f to
2e61c48
Compare
benchmark/fluid/models/resnet.py
Outdated
| "./flowers_1.recordio", "./flowers_1.recordio", | ||
| "./flowers_1.recordio", "./flowers_1.recordio", | ||
| "./flowers_1.recordio", "./flowers_1.recordio" | ||
| ] |
There was a problem hiding this comment.
file_list = ["./flowers_1.recordio"] * 8
benchmark/fluid/fluid_benchmark.py
Outdated
| train_losses.append(loss) | ||
| print("Pass: %d, Iter: %d, Loss: %f\n" % | ||
| (pass_id, iters, np.mean(train_losses))) | ||
| if args.use_recordio: |
There was a problem hiding this comment.
recordio现在主要是为了加速GPU上数据的读取,对于CPU可能作用不大,并且现在parallel executor还不支持CPU。
edfcdd6 to
5ee39eb
Compare
5ee39eb to
4ff3ed3
Compare
4ff3ed3 to
8bb6942
Compare
benchmark/fluid/fluid_benchmark.py
Outdated
| for batch_id, data in enumerate(train_reader()): | ||
| train_losses = [] | ||
| if args.use_recordio: | ||
| pass_id = 0 |
There was a problem hiding this comment.
成舵老师, use_recordio 场景只是想训练1个pass 么?
There was a problem hiding this comment.
用recordio乐意训练很多个pass的,只是需要训练多少个pass是在这里指定的,这里的pass_id其实是没有什么用的。
benchmark/fluid/fluid_benchmark.py
Outdated
| examples_per_sec = num_samples / train_elapsed | ||
| print('Total examples: %d, total time: %.5f, %.5f examples/sec' % | ||
| (num_samples, train_elapsed, examples_per_sec)) | ||
| print("Pass: %d, Loss: %f" % (pass_id, np.mean(train_losses))) |
There was a problem hiding this comment.
@panyx0718 @guochaorong This code can work, but line 265 is unnecessary.
benchmark/fluid/fluid_benchmark.py
Outdated
| (num_samples, train_elapsed, examples_per_sec)) | ||
| if not args.no_test and batch_acc != None: | ||
| test_acc = test(startup_exe, infer_prog, test_reader, feeder, batch_acc) | ||
| print("Pass: %d, Test Accuracy: %f" % (pass_id, test_acc)) |
| fluid.layers.data( | ||
| name='label', shape=[1], dtype='int64'), | ||
| ], | ||
| place=fluid.CPUPlace()) |
|
|
||
|
|
||
| def get_model(args): | ||
| model = resnet_cifar10 |
There was a problem hiding this comment.
这一行去掉了, 倒数第3行 model 可能未定义。
| startup_exe = fluid.Executor(place) | ||
| startup_exe.run(startup_prog) | ||
| strategy = fluid.ExecutionStrategy() | ||
| strategy.num_threads = 1 |
There was a problem hiding this comment.
why is this default to 1?
There was a problem hiding this comment.
Because I saw that strategy.num_threads is set to 1 in train_parallel, and @Yancey1989 ever said that the distributed program will be hung when strategy.num_threads is greater than 1.
benchmark/fluid/fluid_benchmark.py
Outdated
| startup_exe.run(startup_prog) | ||
| strategy = fluid.ExecutionStrategy() | ||
| strategy.num_threads = 1 | ||
| strategy.allow_op_delay = False |
There was a problem hiding this comment.
should this be default to false if not often used?
There was a problem hiding this comment.
You are right, this line is unnecessary in fact because the default value is false.
| train_losses = [] | ||
| for batch_id, data in enumerate(train_reader()): | ||
| train_losses = [] | ||
| if args.use_recordio: |
There was a problem hiding this comment.
why the original training loop not able to use_recordio? Seems hacking the use_recordio into this place.
There was a problem hiding this comment.
Because the number pass has been set here, so the for pass_id in range(args.pass_num) is meanless for use_recordio.
benchmark/fluid/fluid_benchmark.py
Outdated
| examples_per_sec = num_samples / train_elapsed | ||
| print('Total examples: %d, total time: %.5f, %.5f examples/sec' % | ||
| (num_samples, train_elapsed, examples_per_sec)) | ||
| print("Pass: %d, Loss: %f" % (pass_id, np.mean(train_losses))) |
benchmark/fluid/fluid_benchmark.py
Outdated
|
|
||
| def main(): | ||
| args = parse_args() | ||
| cards = os.getenv("CUDA_VISIBLE_DEVICES") or "" |
| # only | ||
| nccl_id_var, num_trainers, trainer_id = ( | ||
| None, 1, int(os.getenv("PADDLE_TRAINER_ID", "-1"))) | ||
| None, 1, int(os.getenv("PADDLE_TRAINER_ID", "0"))) |
There was a problem hiding this comment.
Because trainer_id is the last parameter of ParallelExecutor but the last parameter's type is size_t, so it cannot be -1.
Paddle/paddle/fluid/pybind/pybind.cc
Lines 550 to 555 in d3e99ae
There was a problem hiding this comment.
I see. I made this a illegal default number so that PADDLE_TRAINER_ID must be set to posiive for ParallelExecutor.
benchmark/fluid/models/resnet.py
Outdated
| input = fluid.layers.data(name='data', shape=dshape, dtype='float32') | ||
| label = fluid.layers.data(name='label', shape=[1], dtype='int64') | ||
| if args.use_recordio: | ||
| recordio_name = './cifar10_1.recordio' if args.data_set == 'cifar10' else './flowers_1.recordio' |
| num_samples = 0 | ||
| start_time = time.time() | ||
|
|
||
| if args.use_recordio: |
There was a problem hiding this comment.
pass_num doesn't work when use_recordio?
guochaorong
left a comment
There was a problem hiding this comment.
please fix the logical problems
typhoonzero
left a comment
There was a problem hiding this comment.
Thought this is the same work as #11121?
benchmark/fluid/fluid_benchmark.py
Outdated
| help='The model to run benchmark with.') | ||
| parser.add_argument( | ||
| '--batch_size', type=int, default=32, help='The minibatch size.') | ||
| parser.add_argument( |
There was a problem hiding this comment.
Just change the meaning of --batch_size is OK
benchmark/fluid/fluid_benchmark.py
Outdated
| num_samples += len(data) | ||
| iters += 1 | ||
| if batch_id % 1 == 0: | ||
| num_samples += args.batch_size # dev_cnt * args.batch_size? |
There was a problem hiding this comment.
The last batch size may be different
There was a problem hiding this comment.
You are right, but currently, we didn't get the current batch size when we use recordio. This is the problem.
The batch size is set here, if the last batch size of one pass is less than args.batch_size and the current pass is not last, recordio will read data for the next pass to make up the batch.
6e5e0ea to
041140a
Compare
2a7a1dd to
f7414a5
Compare
f7414a5 to
e131716
Compare
| startup_exe = fluid.Executor(place) | ||
| startup_exe.run(startup_prog) | ||
| strategy = fluid.ExecutionStrategy() | ||
| strategy.num_threads = 1 |
| # only | ||
| nccl_id_var, num_trainers, trainer_id = ( | ||
| None, 1, int(os.getenv("PADDLE_TRAINER_ID", "-1"))) | ||
| None, 1, int(os.getenv("PADDLE_TRAINER_ID", "0"))) |
There was a problem hiding this comment.
I see. I made this a illegal default number so that PADDLE_TRAINER_ID must be set to posiive for ParallelExecutor.
| generate_recordio(dshape, data_set_iterator, recordio_name) | ||
|
|
||
| batch_size_per_gpu = args.batch_size / args.gpus | ||
| file_list = [recordio_name] * 8 |
There was a problem hiding this comment.
Would it be better to generate 8 sharded files?
In current way, it's easy to have duplicated data if shuffling buffer is not big enough?
There was a problem hiding this comment.
You are right. This script currently only takes care of the performance. If the length of file_list and thread_num are equal to the device count, the program will be faster.
| def main(): | ||
| args = parse_args() | ||
| gpus = os.getenv("CUDA_VISIBLE_DEVICES") or "" | ||
| args.gpus = len(gpus.split(",")) |
There was a problem hiding this comment.
It seems use can use --gpus instead of CUDA_VISIBLE_DEVCIES
| train_exe.bcast_params() | ||
|
|
||
| num_samples += args.batch_size | ||
| if iters % 1 == 0: |
There was a problem hiding this comment.
the line seems no necessary
| num_samples, start_time = 0, time.time() | ||
|
|
||
| if args.use_recordio: | ||
| for iters in xrange(args.iterations): |
There was a problem hiding this comment.
seems add a pass_num control will be better
No description provided.