From 658a087c648e7b40eee90fb4f96eebf0a22fd675 Mon Sep 17 00:00:00 2001 From: Kshitij Date: Sun, 12 Oct 2025 23:38:15 +0530 Subject: [PATCH] Added code for hadoop. --- Codes/Code-1.4/CharacterCountDriver.java | 31 ++++++++++ Codes/Code-1.4/CharacterCountMapper.java | 21 +++++++ Codes/Code-1.4/CharacterCountReducer.java | 19 ++++++ Codes/Code-1.4/Commands.md | 74 +++++++++++++++++++++++ 4 files changed, 145 insertions(+) create mode 100644 Codes/Code-1.4/CharacterCountDriver.java create mode 100644 Codes/Code-1.4/CharacterCountMapper.java create mode 100644 Codes/Code-1.4/CharacterCountReducer.java create mode 100644 Codes/Code-1.4/Commands.md diff --git a/Codes/Code-1.4/CharacterCountDriver.java b/Codes/Code-1.4/CharacterCountDriver.java new file mode 100644 index 0000000..1598773 --- /dev/null +++ b/Codes/Code-1.4/CharacterCountDriver.java @@ -0,0 +1,31 @@ +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.io.IntWritable; +import org.apache.hadoop.io.Text; +import org.apache.hadoop.mapreduce.Job; +import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; +import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; + +public class CharacterCountDriver { + public static void main(String[] args) throws Exception { + if (args.length != 2) { + System.err.println("Usage: CharacterCountDriver "); + System.exit(-1); + } + + Configuration conf = new Configuration(); + Job job = Job.getInstance(conf, "Character Count"); + job.setJarByClass(CharacterCountDriver.class); + job.setMapperClass(CharacterCountMapper.class); + job.setCombinerClass(CharacterCountReducer.class); + job.setReducerClass(CharacterCountReducer.class); + job.setOutputKeyClass(Text.class); + job.setOutputValueClass(IntWritable.class); + + FileInputFormat.addInputPath(job, new Path(args[0])); + FileOutputFormat.setOutputPath(job, new Path(args[1])); + + System.exit(job.waitForCompletion(true) ? 0 : 1); + } +} + diff --git a/Codes/Code-1.4/CharacterCountMapper.java b/Codes/Code-1.4/CharacterCountMapper.java new file mode 100644 index 0000000..05bf850 --- /dev/null +++ b/Codes/Code-1.4/CharacterCountMapper.java @@ -0,0 +1,21 @@ +import java.io.IOException; +import org.apache.hadoop.io.IntWritable; +import org.apache.hadoop.io.Text; +import org.apache.hadoop.mapreduce.Mapper; + +public class CharacterCountMapper extends Mapper { + private final static IntWritable one = new IntWritable(1); + private Text character = new Text(); + + @Override + protected void map(Object key, Text value, Context context) throws IOException, InterruptedException { + String line = value.toString().toLowerCase(); + for (char c : line.toCharArray()) { + if (Character.isAlphabetic(c)) { + character.set(String.valueOf(c)); + context.write(character, one); + } + } + } +} + diff --git a/Codes/Code-1.4/CharacterCountReducer.java b/Codes/Code-1.4/CharacterCountReducer.java new file mode 100644 index 0000000..3bccfc3 --- /dev/null +++ b/Codes/Code-1.4/CharacterCountReducer.java @@ -0,0 +1,19 @@ +import java.io.IOException; +import org.apache.hadoop.io.IntWritable; +import org.apache.hadoop.io.Text; +import org.apache.hadoop.mapreduce.Reducer; + +public class CharacterCountReducer extends Reducer { + private IntWritable result = new IntWritable(); + + @Override + protected void reduce(Text key, Iterable values, Context context) throws IOException, InterruptedException { + int count = 0; + for (IntWritable val : values) { + count += val.get(); + } + result.set(count); + context.write(key, result); + } +} + diff --git a/Codes/Code-1.4/Commands.md b/Codes/Code-1.4/Commands.md new file mode 100644 index 0000000..6d69b88 --- /dev/null +++ b/Codes/Code-1.4/Commands.md @@ -0,0 +1,74 @@ +### List of Commands + +1. **Create a Directory for Your Project**: + + ```bash + mkdir ~/hadoop_char_count + cd ~/hadoop_char_count + ``` + +2. **Compile the Java Files**: + + ```bash + javac -classpath $(hadoop classpath) -d . CharacterCountMapper.java CharacterCountReducer.java CharacterCountDriver.java + ``` + +3. **Create the JAR File**: + + ```bash + jar cvf CharacterCount.jar *.class + ``` + +4. **Create Input Directory in HDFS** (if needed): + + ```bash + hdfs dfs -mkdir -p /user/hduser/input + ``` + +5. **Upload Input File to HDFS**: + + ```bash + hdfs dfs -put /path/to/your/local/input.txt /user/hduser/input/ + ``` + +6. **Run the MapReduce Job**: + + ```bash + hadoop jar CharacterCount.jar CharacterCountDriver /user/hduser/input /user/hduser/output + ``` + +7. **Remove Existing Output Directory** (if needed): + + ```bash + hdfs dfs -rm -r /user/hduser/output + ``` + +8. **List Contents of the Output Directory**: + + ```bash + hdfs dfs -ls /user/hduser/output + ``` + +9. **View the Output File**: + + ```bash + hdfs dfs -cat /user/hduser/output/part-r-00000 + ``` + +10. **View Output with `more` or `less`**: + + ```bash + hdfs dfs -cat /user/hduser/output/part-r-00000 | more + ``` + or + ```bash + hdfs dfs -cat /user/hduser/output/part-r-00000 | less + ``` + +11. **Copy Output to Local File System (Optional)**: + + ```bash + hdfs dfs -get /user/hduser/output/part-r-00000 /path/to/local/directory/ + ``` + +---