2. Created a maven project in IntelliJ.
<?xml version="1.0" encoding="UTF-8"?><project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> <modelVersion>4.0.0</modelVersion> <groupId>edu.berkeley</groupId> <artifactId>simple-project</artifactId> <name>Simple Project</name> <packaging>jar</packaging> <version>1.0-SNAPSHOT</version> <dependencies> <dependency> <!-- Spark dependency --> <groupId>org.apache.spark</groupId> <artifactId>spark-core_2.10</artifactId> <version>1.3.1</version> </dependency> </dependencies> </project>
-----------------------------------
/* SimpleApp.java */import org.apache.spark.api.java.*; import org.apache.spark.SparkConf; import org.apache.spark.api.java.function.Function; public class SimpleJava { public static void main(String[] args) { String logFile = "/user/XXXXXXXX/input/a.log"; // Should be some file on your system SparkConf conf = new SparkConf().setAppName("Simple Application"); JavaSparkContext sc = new JavaSparkContext(conf); JavaRDD<String> logData = sc.textFile(logFile).cache(); long numAs = logData.filter(new Function<String, Boolean>() { public Boolean call(String s) { return s.contains("a"); } }).count(); long numBs = logData.filter(new Function<String, Boolean>() { public Boolean call(String s) { return s.contains("b"); } }).count(); System.out.println("Lines with a: " + numAs + ", lines with b: " + numBs);long numByField = logData.filter(new Function<String, Boolean>() {public Boolean call(String s) { String[] token = s.split(";"); boolean existed = false; for (int i = 0; i < token.length; i++) { if (i == 7) { String timeInHdfs = token[i]; //2015-06-30 14:00:29.0 System.out.println(timeInHdfs); if (!timeInHdfs.equalsIgnoreCase("null") && timeInHdfs.compareTo("2015-06-29 23:59:59") > 0) { existed = true; } } } return existed; } }).count(); System.out.println("-----------------------------------------------------------------------"); System.out.println("Lines with bigger time: numByField: " + numByField);} }-----------------------------------notes: when created artifact, select "link by META-INF" rather than build in.Run:$ ./bin/spark-submit --class "SimpleJava" --master local[4] ~/work/dev/bigdata/SimpleJava/out/artifacts/SimpleJava_jar/SimpleJava.jar
Check in web ui:
$ ./sbin/start-all.shvisit http://localhost:8080
No comments:
Post a Comment