wordcount学生成绩普通版案例

1
2
3
4
5
6
7
8
9
10
11
12
13
package com.Practice.StudentScores;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;

/**
* 题目:学生成绩普通版
*
* computer,huangxiaoming,85
computer,xuzheng,54
computer,huangbo,86
computer,liutao,85
computer,huanglei,99
computer,liujialing,85
computer,liuyifei,75
computer,huangdatou,48
computer,huangjiaju,88
computer,huangzitao,85
english,zhaobenshan,57
english,liuyifei,85
english,liuyifei,76
english,huangdatou,48
english,zhouqi,85
english,huangbo,85
english,huangxiaoming,96
english,huanglei,85
english,liujialing,75
algorithm,liuyifei,75
algorithm,huanglei,76
algorithm,huangjiaju,85
algorithm,liutao,85
algorithm,huangdou,42
algorithm,huangzitao,81
math,wangbaoqiang,85
math,huanglei,76
math,huangjiaju,85
math,liutao,48
math,xuzheng,54
math,huangxiaoming,85
math,liujialing,85
*
* 1、每一个course的最高分,最低分,平均分
返回结果格式:
course max=95 min=22 avg=55
例子:
computer max=99 min=48 avg=75

解题思路:在map以course作为key值,其余部分作为value,在reduce中设置变量max,min,avg,通过累计求出,并设置格式

2、求该成绩表当中出现了相同分数的分数,还有次数,以及该分数的人
返回结果的格式:
科目 分数 次数 该分数的人
例子:
computer 85 3 huangzitao,liujialing,huangxiaoming

解题思路:求某科目中出现系统分数的人数以及分数,map以科目和分数作为key值,进行分组,在reduce中进行计数,当计数结果大于1时,输出分数,人数和人名
*/
public class StudentScores1 {

public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
Configuration conf = new Configuration();
FileSystem fs = FileSystem.get(conf);

Job job = Job.getInstance(conf);
job.setJar("wordcountJar/wordcount.jar");
// job.setMapperClass(StudentScoresMapper.class);
// job.setReducerClass(StudentScoresReducer.class);
job.setMapperClass(StudentScoresMapper2.class);
job.setReducerClass(StudentScoresReducer2.class);

// job.setOutputKeyClass(Text.class);
// job.setOutputValueClass(IntWritable.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Text.class);

Path inputPath = new Path("input/studentScores");
Path outputPath = new Path("output/studentScores");

if(fs.isDirectory(outputPath)){
fs.delete(outputPath,true);
}

FileInputFormat.setInputPaths(job,inputPath);
FileOutputFormat.setOutputPath(job,outputPath);

Boolean waitForCompletion = job.waitForCompletion(true);
System.exit(waitForCompletion ? 0 : 1);


}

/**
* 第一题
*/
public static class StudentScoresMapper extends Mapper<LongWritable,Text,Text,IntWritable> {
private IntWritable outValue = new IntWritable();
private Text outKey = new Text();
@Override
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
String[] splits = value.toString().split(",");
outKey.set(splits[0]);
outValue.set(Integer.parseInt(splits[2]));
context.write(outKey,outValue);
}
}

/**
* 第一题
*/
public static class StudentScoresReducer extends Reducer<Text,IntWritable,Text,Text> {
private Text outValue = new Text();
@Override
protected void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException {
int min = 1000 ;
int max = 0 ;
int avg ;
int sum = 0 ;
int count = 0 ;
//方法一:最大最小通过逐一比较得到
// for (IntWritable val:
// values) {
// int score = val.get();
// if(max < score){
// max = score;
// }
// if(min > score){
// min = score ;
// }
// sum += score ;
// count++ ;
// }
// avg = sum /count ;
// String outStr = "max="+max+" min="+min+" avg="+avg;
// outValue.set(outStr);

//方法二:最大最小值通过集合数组得到
List<Integer> scores = new ArrayList<>();
for (IntWritable val :
values) {
scores.add(val.get());
sum += val.get();
count++;
}
Collections.sort(scores);
min = scores.get(0);
max = scores.get(scores.size()-1);
avg = sum / count ;
String outStr = "max="+max+" min="+min+" avg="+avg;
outValue.set(outStr);
context.write(key,outValue);
}
}

/**
* 第二题
*/
public static class StudentScoresMapper2 extends Mapper<LongWritable,Text,Text,Text>{
private Text outKey = new Text();
private Text outValue = new Text();
@Override
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
String[] splits = value.toString().split(",");
outKey.set(splits[0]+"\t"+splits[2]);
outValue.set(splits[1]);
context.write(outKey,outValue);
}
}

/**
* 第二题
*/
public static class StudentScoresReducer2 extends Reducer<Text,Text,Text,Text>{
private Text outValue = new Text();
@Override
protected void reduce(Text key, Iterable<Text> values, Context context) throws IOException, InterruptedException {
StringBuilder sb = new StringBuilder();
int count = 0 ;
for (Text text :
values) {
if(sb.length()!=0){
sb.append(",");
}
sb.append(text);
count++;
}
//如果count大于等于2,说明有分数重合的
if(count >= 2 ){
outValue.set(count+" "+sb.toString());
context.write(key,outValue);
}
}
}
}
0%