Added code for hadoop.

This commit is contained in:
K
2025-10-12 23:38:15 +05:30
parent 9038747b35
commit 658a087c64
4 changed files with 145 additions and 0 deletions
+31
View File
@@ -0,0 +1,31 @@
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
public class CharacterCountDriver {
public static void main(String[] args) throws Exception {
if (args.length != 2) {
System.err.println("Usage: CharacterCountDriver <input path> <output path>");
System.exit(-1);
}
Configuration conf = new Configuration();
Job job = Job.getInstance(conf, "Character Count");
job.setJarByClass(CharacterCountDriver.class);
job.setMapperClass(CharacterCountMapper.class);
job.setCombinerClass(CharacterCountReducer.class);
job.setReducerClass(CharacterCountReducer.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(IntWritable.class);
FileInputFormat.addInputPath(job, new Path(args[0]));
FileOutputFormat.setOutputPath(job, new Path(args[1]));
System.exit(job.waitForCompletion(true) ? 0 : 1);
}
}
+21
View File
@@ -0,0 +1,21 @@
import java.io.IOException;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
public class CharacterCountMapper extends Mapper<Object, Text, Text, IntWritable> {
private final static IntWritable one = new IntWritable(1);
private Text character = new Text();
@Override
protected void map(Object key, Text value, Context context) throws IOException, InterruptedException {
String line = value.toString().toLowerCase();
for (char c : line.toCharArray()) {
if (Character.isAlphabetic(c)) {
character.set(String.valueOf(c));
context.write(character, one);
}
}
}
}
+19
View File
@@ -0,0 +1,19 @@
import java.io.IOException;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
public class CharacterCountReducer extends Reducer<Text, IntWritable, Text, IntWritable> {
private IntWritable result = new IntWritable();
@Override
protected void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException {
int count = 0;
for (IntWritable val : values) {
count += val.get();
}
result.set(count);
context.write(key, result);
}
}
+74
View File
@@ -0,0 +1,74 @@
### List of Commands
1. **Create a Directory for Your Project**:
```bash
mkdir ~/hadoop_char_count
cd ~/hadoop_char_count
```
2. **Compile the Java Files**:
```bash
javac -classpath $(hadoop classpath) -d . CharacterCountMapper.java CharacterCountReducer.java CharacterCountDriver.java
```
3. **Create the JAR File**:
```bash
jar cvf CharacterCount.jar *.class
```
4. **Create Input Directory in HDFS** (if needed):
```bash
hdfs dfs -mkdir -p /user/hduser/input
```
5. **Upload Input File to HDFS**:
```bash
hdfs dfs -put /path/to/your/local/input.txt /user/hduser/input/
```
6. **Run the MapReduce Job**:
```bash
hadoop jar CharacterCount.jar CharacterCountDriver /user/hduser/input /user/hduser/output
```
7. **Remove Existing Output Directory** (if needed):
```bash
hdfs dfs -rm -r /user/hduser/output
```
8. **List Contents of the Output Directory**:
```bash
hdfs dfs -ls /user/hduser/output
```
9. **View the Output File**:
```bash
hdfs dfs -cat /user/hduser/output/part-r-00000
```
10. **View Output with `more` or `less`**:
```bash
hdfs dfs -cat /user/hduser/output/part-r-00000 | more
```
or
```bash
hdfs dfs -cat /user/hduser/output/part-r-00000 | less
```
11. **Copy Output to Local File System (Optional)**:
```bash
hdfs dfs -get /user/hduser/output/part-r-00000 /path/to/local/directory/
```
---