区内排序
现在又加了一个需求:分区,将132、155开头的电话号码按照电话号码省份放在一个文件中,其他的单独放到一个文件中
Partitioner类
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Partitioner;
/**
* Partitioner 是 shuffle 过程中分区的处理过程
* 因为 shuffle在map阶段之后,所以这里的 key和value 的泛型应该是 map 阶段输出的类型
*/
public class ProvincePartitioner extends Partitioner<FlowBean, Text> {
@Override
public int getPartition(FlowBean flowBean, Text text, int numPartitions) {
// 获取电话号前三位
String prePhoneNumber = text.toString().substring(0, 3);
int partition;
// 开始分区
if ("132".equals(prePhoneNumber)) {
partition = 0;
} else if ("155".equals(prePhoneNumber)) {
partition = 1;
} else {
partition = 2;
}
return partition;
}
}
Driver类
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.KeyValueLineRecordReader;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import java.io.IOException;
public class FlowDriver {
public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException {
// 1.获取job
Configuration conf = new Configuration();
conf.set(KeyValueLineRecordReader.KEY_VALUE_SEPARATOR, "\t");
Job job = Job.getInstance(conf);
// 2.设置jar包路径
job.setJarByClass(FlowDriver.class);
// 3.关联mapper和reducer
job.setMapperClass(FlowMapper.class);
job.setReducerClass(FlowReducer.class);
// 4.设置map输出的KV类型,这里需要更改!
job.setMapOutputKeyClass(FlowBean.class);
job.setMapOutputValueClass(Text.class);
// 5.设置最终输出的KV类型
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(FlowBean.class);
// 6.设置输入路径和输出路径
FileInputFormat.setInputPaths(job, new Path("C:\\Users\\MaHe666\\Desktop\\phone_data3.txt"));
FileOutputFormat.setOutputPath(job, new Path("D:\\output"));
// 分区相关设置
job.setPartitionerClass(ProvincePartitioner.class);
// Partitioner类里分了3个区,所以这里也要分3个
job.setNumReduceTasks(3);
// 7.提交job
boolean result = job.waitForCompletion(true);
System.exit(result ? 0 : 1);
}
}