1. Open IntelliJ, created a new command line project.
2. In WordCount.java, copy following code:
import java.io.IOException;
import java.util.StringTokenizer;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
public class WordCount {
public static class TokenizerMapper
extends Mapper
3. Import dependencies.
'Command' + ';' => click 'Modules' => select 'SDK 1.7' => click '+', select 'Libraries' => "New Libraries" => 'From Mavon' => Search "hadoop-core" => select "org.apache.hadoop:hadoop-core:1.2.0".
4. Build a jar.
'Command' + ';' => click "Artifacts" => click '+' => 'JAR' => 'From modules with dependences...'
5. Run
hdfs dfs -put A-LOCAL-FOLDER input
hadoop jar out/artifacts/wordcount_jar/wordcount.jar input output
No comments:
Post a Comment