Added code for hadoop.
This commit is contained in:
@@ -0,0 +1,31 @@
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
import org.apache.hadoop.fs.Path;
|
||||
import org.apache.hadoop.io.IntWritable;
|
||||
import org.apache.hadoop.io.Text;
|
||||
import org.apache.hadoop.mapreduce.Job;
|
||||
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
|
||||
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
|
||||
|
||||
public class CharacterCountDriver {
|
||||
public static void main(String[] args) throws Exception {
|
||||
if (args.length != 2) {
|
||||
System.err.println("Usage: CharacterCountDriver <input path> <output path>");
|
||||
System.exit(-1);
|
||||
}
|
||||
|
||||
Configuration conf = new Configuration();
|
||||
Job job = Job.getInstance(conf, "Character Count");
|
||||
job.setJarByClass(CharacterCountDriver.class);
|
||||
job.setMapperClass(CharacterCountMapper.class);
|
||||
job.setCombinerClass(CharacterCountReducer.class);
|
||||
job.setReducerClass(CharacterCountReducer.class);
|
||||
job.setOutputKeyClass(Text.class);
|
||||
job.setOutputValueClass(IntWritable.class);
|
||||
|
||||
FileInputFormat.addInputPath(job, new Path(args[0]));
|
||||
FileOutputFormat.setOutputPath(job, new Path(args[1]));
|
||||
|
||||
System.exit(job.waitForCompletion(true) ? 0 : 1);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -0,0 +1,21 @@
|
||||
import java.io.IOException;
|
||||
import org.apache.hadoop.io.IntWritable;
|
||||
import org.apache.hadoop.io.Text;
|
||||
import org.apache.hadoop.mapreduce.Mapper;
|
||||
|
||||
public class CharacterCountMapper extends Mapper<Object, Text, Text, IntWritable> {
|
||||
private final static IntWritable one = new IntWritable(1);
|
||||
private Text character = new Text();
|
||||
|
||||
@Override
|
||||
protected void map(Object key, Text value, Context context) throws IOException, InterruptedException {
|
||||
String line = value.toString().toLowerCase();
|
||||
for (char c : line.toCharArray()) {
|
||||
if (Character.isAlphabetic(c)) {
|
||||
character.set(String.valueOf(c));
|
||||
context.write(character, one);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -0,0 +1,19 @@
|
||||
import java.io.IOException;
|
||||
import org.apache.hadoop.io.IntWritable;
|
||||
import org.apache.hadoop.io.Text;
|
||||
import org.apache.hadoop.mapreduce.Reducer;
|
||||
|
||||
public class CharacterCountReducer extends Reducer<Text, IntWritable, Text, IntWritable> {
|
||||
private IntWritable result = new IntWritable();
|
||||
|
||||
@Override
|
||||
protected void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException {
|
||||
int count = 0;
|
||||
for (IntWritable val : values) {
|
||||
count += val.get();
|
||||
}
|
||||
result.set(count);
|
||||
context.write(key, result);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -0,0 +1,74 @@
|
||||
### List of Commands
|
||||
|
||||
1. **Create a Directory for Your Project**:
|
||||
|
||||
```bash
|
||||
mkdir ~/hadoop_char_count
|
||||
cd ~/hadoop_char_count
|
||||
```
|
||||
|
||||
2. **Compile the Java Files**:
|
||||
|
||||
```bash
|
||||
javac -classpath $(hadoop classpath) -d . CharacterCountMapper.java CharacterCountReducer.java CharacterCountDriver.java
|
||||
```
|
||||
|
||||
3. **Create the JAR File**:
|
||||
|
||||
```bash
|
||||
jar cvf CharacterCount.jar *.class
|
||||
```
|
||||
|
||||
4. **Create Input Directory in HDFS** (if needed):
|
||||
|
||||
```bash
|
||||
hdfs dfs -mkdir -p /user/hduser/input
|
||||
```
|
||||
|
||||
5. **Upload Input File to HDFS**:
|
||||
|
||||
```bash
|
||||
hdfs dfs -put /path/to/your/local/input.txt /user/hduser/input/
|
||||
```
|
||||
|
||||
6. **Run the MapReduce Job**:
|
||||
|
||||
```bash
|
||||
hadoop jar CharacterCount.jar CharacterCountDriver /user/hduser/input /user/hduser/output
|
||||
```
|
||||
|
||||
7. **Remove Existing Output Directory** (if needed):
|
||||
|
||||
```bash
|
||||
hdfs dfs -rm -r /user/hduser/output
|
||||
```
|
||||
|
||||
8. **List Contents of the Output Directory**:
|
||||
|
||||
```bash
|
||||
hdfs dfs -ls /user/hduser/output
|
||||
```
|
||||
|
||||
9. **View the Output File**:
|
||||
|
||||
```bash
|
||||
hdfs dfs -cat /user/hduser/output/part-r-00000
|
||||
```
|
||||
|
||||
10. **View Output with `more` or `less`**:
|
||||
|
||||
```bash
|
||||
hdfs dfs -cat /user/hduser/output/part-r-00000 | more
|
||||
```
|
||||
or
|
||||
```bash
|
||||
hdfs dfs -cat /user/hduser/output/part-r-00000 | less
|
||||
```
|
||||
|
||||
11. **Copy Output to Local File System (Optional)**:
|
||||
|
||||
```bash
|
||||
hdfs dfs -get /user/hduser/output/part-r-00000 /path/to/local/directory/
|
||||
```
|
||||
|
||||
---
|
||||
Reference in New Issue
Block a user