More Related Content
Similar to AJUG April 2011 Raw hadoop example (20)
AJUG April 2011 Raw hadoop example
- 1. package org.ajug;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.conf.*;
import org.apache.hadoop.io.*;
import org.apache.hadoop.mapreduce.*;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
public class MnM {
public static void main(String[] args) throws Exception {
Configuration conf = new Configuration();
Job job = new Job(conf, "ajug");
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Data.class);
job.setMapperClass(MnMMapper.class);
job.setReducerClass(MnMReducer.class);
job.setInputFormatClass(TextInputFormat.class);
job.setOutputFormatClass(TextOutputFormat.class);
FileInputFormat.addInputPath(job, new Path(args[0]));
FileOutputFormat.setOutputPath(job, new Path(args[1]));
job.waitForCompletion(true);
}
}
================================================
package org.ajug;
import org.apache.hadoop.io.DoubleWritable;
import org.apache.hadoop.io.WritableComparable;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
public class Data implements WritableComparable<Data> {
private DoubleWritable width = new DoubleWritable();
private DoubleWritable weight = new DoubleWritable();
void set(double a_width, double a_weight) {
width.set(a_width);
weight.set(a_weight);
}
public double getWidth() {
- 2. return width.get();
}
public double getWeight() {
return weight.get();
}
public void write(DataOutput out) throws IOException {
width. write(out);
weight. write(out);
}
public void readFields(DataInput in) throws IOException {
width. readFields(in);
weight. readFields(in);
}
public int hashCode() {
return width.hashCode() * 163 + weight.hashCode();
}
public int compareTo(Data tp) {
int cmp = width.compareTo(tp.width);
if (cmp != 0) {
return cmp;
}
return weight.compareTo(tp.weight);
}
public String toString() {
return "" + width + "t" + weight;
}
}
========================================
package org.ajug;
import org.apache.hadoop.io.*;
import org.apache.hadoop.mapreduce.*;
import java.io.IOException;
public class MnMMapper extends
Mapper<LongWritable, Text, Text, Data> {
private Text color = new Text();
private Data data = new Data();
public void map(LongWritable key, Text value, Context context) throws
IOException, InterruptedException {
String line = value.toString();
String[] fields = line.split(",");
if (fields.length > 2) {
color.set(fields[0]);
double weight = Double.parseDouble(fields[1]);
double width = Double.parseDouble(fields[2]);
data.set(width, weight);
context.write(color, data);
}
- 3. }
}
======================================
package org.ajug;
import org.apache.hadoop.io.*;
import org.apache.hadoop.mapreduce.*;
import java.io.IOException;
import java.util.Iterator;
public class MnMReducer extends Reducer <Text, Data, Text, Data> {
private Data data = new Data();
public void reduce(Text key, Iterable<Data> values, Context context)
throws IOException, InterruptedException {
double weights = 0;
double widths=0;
int count = 0;
Iterator iter = values.iterator();
while (iter.hasNext()) {
Data value = (Data)iter.next();
count++;
weights += value.getWeight();
widths += value.getWidth();
}
data.set(widths/count, weights/count);
context.write(key, data);
}
}