/* * call-seq: * enum.sort_by {| obj | block } => array * * Sorts <i>enum</i> using a set of keys generated by mapping the * values in <i>enum</i> through the given block. * * %w{ apple pear fig }.sort_by {|word| word.length} #=> ["fig", "pear", "apple"] * * The current implementation of <code>sort_by</code> generates an * array of tuples containing the original collection element and the * mapped value. This makes <code>sort_by</code> fairly expensive when * the keysets are simple * * require 'benchmark' * include Benchmark * * a = (1..100000).map {rand(100000)} * * bm(10) do |b| * b.report("Sort") { a.sort } * b.report("Sort by") { a.sort_by {|a| a} } * end * * <em>produces:</em> * * user system total real * Sort 0.180000 0.000000 0.180000 ( 0.175469) * Sort by 1.980000 0.040000 2.020000 ( 2.013586) * * However, consider the case where comparing the keys is a non-trivial * operation. The following code sorts some files on modification time * using the basic <code>sort</code> method. * * files = Dir["*"] * sorted = files.sort {|a,b| File.new(a).mtime <=> File.new(b).mtime} * sorted #=> ["mon", "tues", "wed", "thurs"] * * This sort is inefficient: it generates two new <code>File</code> * objects during every comparison. A slightly better technique is to * use the <code>Kernel#test</code> method to generate the modification * times directly. * * files = Dir["*"] * sorted = files.sort { |a,b| * test(?M, a) <=> test(?M, b) * } * sorted #=> ["mon", "tues", "wed", "thurs"] * * This still generates many unnecessary <code>Time</code> objects. A * more efficient technique is to cache the sort keys (modification * times in this case) before the sort. Perl users often call this * approach a Schwartzian Transform, after Randal Schwartz. We * construct a temporary array, where each element is an array * containing our sort key along with the filename. We sort this array, * and then extract the filename from the result. * * sorted = Dir["*"].collect { |f| * [test(?M, f), f] * }.sort.collect { |f| f[1] } * sorted #=> ["mon", "tues", "wed", "thurs"] * * This is exactly what <code>sort_by</code> does internally. * * sorted = Dir["*"].sort_by {|f| test(?M, f)} * sorted #=> ["mon", "tues", "wed", "thurs"] */ static VALUE enum_sort_by(obj) VALUE obj; { VALUE ary; long i; if (TYPE(obj) == T_ARRAY) { ary = rb_ary_new2(RARRAY(obj)->len); } else { ary = rb_ary_new(); } RBASIC(ary)->klass = 0; rb_iterate(rb_each, obj, sort_by_i, ary); if (RARRAY(ary)->len > 1) { qsort(RARRAY(ary)->ptr, RARRAY(ary)->len, sizeof(VALUE), sort_by_cmp, 0); } if (RBASIC(ary)->klass) { rb_raise(rb_eRuntimeError, "sort_by reentered"); } for (i=0; i<RARRAY(ary)->len; i++) { RARRAY(ary)->ptr[i] = RNODE(RARRAY(ary)->ptr[i])->u2.value; } RBASIC(ary)->klass = rb_cArray; return ary; }