Compare commits
9 Commits
9038747b35
...
main
| Author | SHA1 | Date | |
|---|---|---|---|
|
cca0f46476
|
|||
|
1190444a91
|
|||
|
4538044099
|
|||
|
e635221d58
|
|||
|
29826dad3d
|
|||
|
5283defd66
|
|||
|
1f2742dcec
|
|||
|
c1f113c632
|
|||
|
658a087c64
|
@@ -0,0 +1,31 @@
|
|||||||
|
import org.apache.hadoop.conf.Configuration;
|
||||||
|
import org.apache.hadoop.fs.Path;
|
||||||
|
import org.apache.hadoop.io.IntWritable;
|
||||||
|
import org.apache.hadoop.io.Text;
|
||||||
|
import org.apache.hadoop.mapreduce.Job;
|
||||||
|
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
|
||||||
|
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
|
||||||
|
|
||||||
|
public class CharacterCountDriver {
|
||||||
|
public static void main(String[] args) throws Exception {
|
||||||
|
if (args.length != 2) {
|
||||||
|
System.err.println("Usage: CharacterCountDriver <input path> <output path>");
|
||||||
|
System.exit(-1);
|
||||||
|
}
|
||||||
|
|
||||||
|
Configuration conf = new Configuration();
|
||||||
|
Job job = Job.getInstance(conf, "Character Count");
|
||||||
|
job.setJarByClass(CharacterCountDriver.class);
|
||||||
|
job.setMapperClass(CharacterCountMapper.class);
|
||||||
|
job.setCombinerClass(CharacterCountReducer.class);
|
||||||
|
job.setReducerClass(CharacterCountReducer.class);
|
||||||
|
job.setOutputKeyClass(Text.class);
|
||||||
|
job.setOutputValueClass(IntWritable.class);
|
||||||
|
|
||||||
|
FileInputFormat.addInputPath(job, new Path(args[0]));
|
||||||
|
FileOutputFormat.setOutputPath(job, new Path(args[1]));
|
||||||
|
|
||||||
|
System.exit(job.waitForCompletion(true) ? 0 : 1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
@@ -0,0 +1,21 @@
|
|||||||
|
import java.io.IOException;
|
||||||
|
import org.apache.hadoop.io.IntWritable;
|
||||||
|
import org.apache.hadoop.io.Text;
|
||||||
|
import org.apache.hadoop.mapreduce.Mapper;
|
||||||
|
|
||||||
|
public class CharacterCountMapper extends Mapper<Object, Text, Text, IntWritable> {
|
||||||
|
private final static IntWritable one = new IntWritable(1);
|
||||||
|
private Text character = new Text();
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected void map(Object key, Text value, Context context) throws IOException, InterruptedException {
|
||||||
|
String line = value.toString().toLowerCase();
|
||||||
|
for (char c : line.toCharArray()) {
|
||||||
|
if (Character.isAlphabetic(c)) {
|
||||||
|
character.set(String.valueOf(c));
|
||||||
|
context.write(character, one);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
@@ -0,0 +1,19 @@
|
|||||||
|
import java.io.IOException;
|
||||||
|
import org.apache.hadoop.io.IntWritable;
|
||||||
|
import org.apache.hadoop.io.Text;
|
||||||
|
import org.apache.hadoop.mapreduce.Reducer;
|
||||||
|
|
||||||
|
public class CharacterCountReducer extends Reducer<Text, IntWritable, Text, IntWritable> {
|
||||||
|
private IntWritable result = new IntWritable();
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException {
|
||||||
|
int count = 0;
|
||||||
|
for (IntWritable val : values) {
|
||||||
|
count += val.get();
|
||||||
|
}
|
||||||
|
result.set(count);
|
||||||
|
context.write(key, result);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
@@ -0,0 +1,74 @@
|
|||||||
|
### List of Commands
|
||||||
|
|
||||||
|
1. **Create a Directory for Your Project**:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
mkdir ~/hadoop_char_count
|
||||||
|
cd ~/hadoop_char_count
|
||||||
|
```
|
||||||
|
|
||||||
|
2. **Compile the Java Files**:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
javac -classpath $(hadoop classpath) -d . CharacterCountMapper.java CharacterCountReducer.java CharacterCountDriver.java
|
||||||
|
```
|
||||||
|
|
||||||
|
3. **Create the JAR File**:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
jar cvf CharacterCount.jar *.class
|
||||||
|
```
|
||||||
|
|
||||||
|
4. **Create Input Directory in HDFS** (if needed):
|
||||||
|
|
||||||
|
```bash
|
||||||
|
hdfs dfs -mkdir -p /user/hduser/input
|
||||||
|
```
|
||||||
|
|
||||||
|
5. **Upload Input File to HDFS**:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
hdfs dfs -put /path/to/your/local/input.txt /user/hduser/input/
|
||||||
|
```
|
||||||
|
|
||||||
|
6. **Run the MapReduce Job**:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
hadoop jar CharacterCount.jar CharacterCountDriver /user/hduser/input /user/hduser/output
|
||||||
|
```
|
||||||
|
|
||||||
|
7. **Remove Existing Output Directory** (if needed):
|
||||||
|
|
||||||
|
```bash
|
||||||
|
hdfs dfs -rm -r /user/hduser/output
|
||||||
|
```
|
||||||
|
|
||||||
|
8. **List Contents of the Output Directory**:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
hdfs dfs -ls /user/hduser/output
|
||||||
|
```
|
||||||
|
|
||||||
|
9. **View the Output File**:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
hdfs dfs -cat /user/hduser/output/part-r-00000
|
||||||
|
```
|
||||||
|
|
||||||
|
10. **View Output with `more` or `less`**:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
hdfs dfs -cat /user/hduser/output/part-r-00000 | more
|
||||||
|
```
|
||||||
|
or
|
||||||
|
```bash
|
||||||
|
hdfs dfs -cat /user/hduser/output/part-r-00000 | less
|
||||||
|
```
|
||||||
|
|
||||||
|
11. **Copy Output to Local File System (Optional)**:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
hdfs dfs -get /user/hduser/output/part-r-00000 /path/to/local/directory/
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
BIN
Binary file not shown.
Binary file not shown.
BIN
Binary file not shown.
@@ -37,6 +37,7 @@ This repository contains essential resources for the Information Retrieval cours
|
|||||||
- [END-SEM](Question%20Papers/END-SEM)
|
- [END-SEM](Question%20Papers/END-SEM)
|
||||||
|
|
||||||
### [IN-SEM PYQ Answers](Notes/IN-SEM%20PYQ%20Answers)
|
### [IN-SEM PYQ Answers](Notes/IN-SEM%20PYQ%20Answers)
|
||||||
|
### [END-SEM PYQ Answers](Notes/END-SEM%20PYQ%20Answers)
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user