hadoop wordcount新API例子

准备准备一些输入文件，可以用hdfs dfs -put xxx/*?/user/fatkun/input上传文件代码 package com.fatkun;?import java.io.ioexception;import java.util.arraylist;import java.util.list;import java.util.stringtokenizer;?import org.apache.commons.lo
准备准备一些输入文件，可以用hdfs dfs -put xxx/*?/user/fatkun/input上传文件
代码package com.fatkun;?import java.io.ioexception;import java.util.arraylist;import java.util.list;import java.util.stringtokenizer;?import org.apache.commons.logging.log;import org.apache.commons.logging.logfactory;import org.apache.hadoop.conf.configuration;import org.apache.hadoop.conf.configured;import org.apache.hadoop.fs.path;import org.apache.hadoop.io.intwritable;import org.apache.hadoop.io.longwritable;import org.apache.hadoop.io.text;import org.apache.hadoop.mapreduce.job;import org.apache.hadoop.mapreduce.mapper;import org.apache.hadoop.mapreduce.reducer;import org.apache.hadoop.mapreduce.lib.input.fileinputformat;import org.apache.hadoop.mapreduce.lib.output.fileoutputformat;import org.apache.hadoop.util.tool;import org.apache.hadoop.util.toolrunner;?public class wordcount extends configured implements tool { static enum counters { input_words // 计数器 } ? static log logger = logfactory.getlog(wordcount.class);? public static class countmapper extends mapper { private final intwritable one = new intwritable(1); private text word = new text(); private boolean casesensitive = true;? @override protected void setup(context context) throws ioexception, interruptedexception { // 读取配置 configuration conf = context.getconfiguration(); casesensitive = conf.getboolean(wordcount.case.sensitive, true); super.setup(context); }? @override protected void map(longwritable key, text value, context context) throws ioexception, interruptedexception { stringtokenizer itr = new stringtokenizer(value.tostring()); while (itr.hasmoretokens()) { if (casesensitive) { // 是否大小写敏感 word.set(itr.nexttoken()); } else { word.set(itr.nexttoken().tolowercase()); } context.write(word, one); context.getcounter(counters.input_words).increment(1); } } }? public static class countreducer extends reducer {? @override protected void reduce(text text, iterable values, context context) throws ioexception, interruptedexception { int sum = 0; for (intwritable value : values) { sum += value.get(); } context.write(text, new intwritable(sum)); }? }? @override public int run(string[] args) throws exception { configuration conf = new configuration(getconf()); job job = job.getinstance(conf, example hadoop wordcount); job.setjarbyclass(wordcount.class); job.setmapperclass(countmapper.class); job.setcombinerclass(countreducer.class); job.setreducerclass(countreducer.class);? job.setoutputkeyclass(text.class); job.setoutputvalueclass(intwritable.class);? list other_args = new arraylist(); for (int i = 0; i 运行在eclipse导出jar包，执行以下命令
hadoop jar wordcount.jar com.fatkun.wordcount -dwordcount.case.sensitive=false /user/fatkun/input /user/fatkun/output
参考http://cxwangyi.blogspot.com/2009/12/wordcount-tutorial-for-hadoop-0201.html
http://hadoop.apache.org/docs/r1.2.1/mapred_tutorial.html#example%3a+wordcount+v2.0
原文地址：hadoop wordcount新api例子, 感谢原作者分享。

hadoop wordcount新API例子

VIP推荐