添加maven坐标
<dependencies>
<dependency>
<groupId>junit
</groupId
>
<artifactId>junit
</artifactId
>
<version>RELEASE
</version
>
</dependency
>
<dependency>
<groupId>org
.apache
.logging
.log4j
</groupId
>
<artifactId>log4j
-core
</artifactId
>
<version>2.8.2</version
>
</dependency
>
<dependency>
<groupId>org
.apache
.hadoop
</groupId
>
<artifactId>hadoop
-common
</artifactId
>
<version>2.7.1</version
>
</dependency
>
<dependency>
<groupId>org
.apache
.hadoop
</groupId
>
<artifactId>hadoop
-client
</artifactId
>
<version>2.7.1</version
>
</dependency
>
<dependency>
<groupId>org
.apache
.hadoop
</groupId
>
<artifactId>hadoop
-hdfs
</artifactId
>
<version>2.7.1</version
>
</dependency
>
<dependency>
<groupId>jdk
.tools
</groupId
>
<artifactId>jdk
.tools
</artifactId
>
<version>1.8</version
>
<scope>system
</scope
>
<!--suppress UnresolvedMavenProperty
-->
<systemPath>$
{java
.home
}/../lib
/tools
.jar
</systemPath
>
</dependency
>
</dependencies
>
创建Mapper类
import org
.apache
.hadoop
.io
.IntWritable
;
import org
.apache
.hadoop
.io
.LongWritable
;
import org
.apache
.hadoop
.io
.Text
;
import org
.apache
.hadoop
.mapreduce
.Mapper
;
import java
.io
.IOException
;
public class CountMapper extends Mapper<LongWritable, Text, Text, IntWritable> {
private static final IntWritable one
= new IntWritable(1);
private static final Text word
= new Text();
@Override
protected void map(LongWritable key
, Text value
, Context context
) throws IOException
, InterruptedException
{
String
[] words
= value
.toString().split(" ");
for (String word
: words
) {
this.word
.set(word
);
context
.write(this.word
, one
);
}
}
}
创建Reducer类
import org
.apache
.hadoop
.io
.IntWritable
;
import org
.apache
.hadoop
.io
.Text
;
import org
.apache
.hadoop
.mapreduce
.Reducer
;
import java
.io
.IOException
;
public class CountReducer extends Reducer<Text, IntWritable, Text, IntWritable> {
private static final IntWritable wordSum
= new IntWritable();
@Override
protected void reduce(Text key
, Iterable
<IntWritable> values
, Context context
) throws IOException
, InterruptedException
{
int sum
= 0;
for (IntWritable value
: values
) {
sum
+= 1;
}
wordSum
.set(sum
);
context
.write(key
, wordSum
);
}
}
创建Driver类
import org
.apache
.hadoop
.conf
.Configuration
;
import org
.apache
.hadoop
.fs
.Path
;
import org
.apache
.hadoop
.io
.IntWritable
;
import org
.apache
.hadoop
.io
.Text
;
import org
.apache
.hadoop
.mapreduce
.Job
;
import org
.apache
.hadoop
.mapreduce
.lib
.input
.FileInputFormat
;
import org
.apache
.hadoop
.mapreduce
.lib
.output
.FileOutputFormat
;
import java
.io
.IOException
;
public class CountDriver {
public static void main(String
[] args
) throws IOException
, ClassNotFoundException
, InterruptedException
{
Job job
= Job
.getInstance(new Configuration());
job
.setJarByClass(CountDriver
.class);
job
.setMapperClass(CountMapper
.class);
job
.setReducerClass(CountReducer
.class);
job
.setMapOutputKeyClass(Text
.class);
job
.setMapOutputValueClass(IntWritable
.class);
job
.setOutputKeyClass(Text
.class);
job
.setOutputValueClass(IntWritable
.class);
FileInputFormat
.setInputPaths(job
, new Path(args
[0]));
FileOutputFormat
.setOutputPath(job
, new Path(args
[1]));
boolean b
= job
.waitForCompletion(true);
System
.exit(b
? 0 : 1 );
}
}
输入main方法参数
在d盘下创建input目录,随便复制一份英文内容的文件到input目录中。
注意:output目录一定是不存在的。
查看结果
打开output目录,查看下面文件
转载请注明原文地址:https://blackberry.8miu.com/read-43046.html