统计某银行信用卡违约用户数量 csv下载地址
违约规则:AY_1~PAY_6:PAY_1为2005年9月的还款情况;PAY_2为2005年8月的还款情况;…;PAY_6为2005年4月的还款情况。BILL_AMT1~BILL_AMT6和PAY_AMT1~PAY_AMT6中数字标识的含义也是如此。 PAY_1~PAY_6的取值含义为:0 = 及时还;1 = 还款延迟一个月;2 = 还款延迟两个月;3 = 还款延迟三个月;…;9 = 还款延迟九个月及以上。 每月的支付金额PAY_AMT不能低于银行规定的当月最低还款额,否则就是违约。如果支付金额PAY_AMT大于上月账单金额BILL_AMT则视为及时还,剩余金额存入信用卡留做下次消费;如果支付金额小于上月账单金额但高于最低还款额则视为延迟还款。
要求:
在Hadoop平台编程实现统计银行违约用户数量
实现:
porn.xml
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 <?xml version="1.0" encoding="UTF-8"?> <project xmlns ="http://maven.apache.org/POM/4.0.0" xmlns:xsi ="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation ="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd" > <modelVersion > 4.0.0</modelVersion > <groupId > Hadoop</groupId > <artifactId > BankDefaulter_MapReduce</artifactId > <version > 1.0-SNAPSHOT</version > <dependencies > <dependency > <groupId > org.apache.hadoop</groupId > <artifactId > hadoop-common</artifactId > <version > 2.8.0</version > </dependency > <dependency > <groupId > org.apache.hadoop</groupId > <artifactId > hadoop-hdfs</artifactId > <version > 2.8.0</version > </dependency > <dependency > <groupId > org.apache.hadoop</groupId > <artifactId > hadoop-mapreduce-client-core</artifactId > <version > 2.8.0</version > </dependency > <dependency > <groupId > org.apache.hadoop</groupId > <artifactId > hadoop-client</artifactId > <version > 2.8.0</version > </dependency > <dependency > <groupId > au.com.bytecode</groupId > <artifactId > opencsv</artifactId > <version > 2.4</version > </dependency > </dependencies > <build > <plugins > <plugin > <artifactId > maven-assembly-plugin</artifactId > <configuration > <archive > <manifest > <mainClass > bankfinddefaulter.FindDefaulter</mainClass > </manifest > </archive > <descriptorRefs > <descriptorRef > jar-with-dependencies</descriptorRef > </descriptorRefs > </configuration > <executions > <execution > <id > make-assembly</id > <phase > package</phase > <goals > <goal > single</goal > </goals > </execution > </executions > </plugin > </plugins > </build > </project >
FindDefaulter.java
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 package bankfinddefaulter;import org.apache.hadoop.fs.Path;import org.apache.hadoop.io.IntWritable;import org.apache.hadoop.io.Text;import org.apache.hadoop.mapreduce.Job;import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;public class FindDefaulter { public static void main (String[] args) throws Throwable { Job job = new Job(); job.setJarByClass(FindDefaulter.class); FileInputFormat.addInputPath(job, new Path("hdfs://172.18.74.236:9000/input/UCI_Credit_Card.csv" )); FileOutputFormat.setOutputPath(job, new Path("hdfs://172.18.74.236:9000/out" )); job.setMapperClass(BankMapper.class); job.setReducerClass(BankReducer.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(IntWritable.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); System.exit(job.waitForCompletion(true )?0 :1 ); } }
BankReducer.java
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 package bankfinddefaulter;import org.apache.hadoop.io.IntWritable;import org.apache.hadoop.io.Text;import org.apache.hadoop.mapreduce.Reducer;import java.io.IOException;public class BankReducer extends Reducer <Text , IntWritable , Text , IntWritable > { @Override protected void reduce (Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException { int count = 0 ; for (IntWritable value : values) { count++; } context.write(key, new IntWritable(count)); } }
BankMapper.java
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 package bankfinddefaulter;import java.io.IOException;import org.apache.hadoop.io.IntWritable;import org.apache.hadoop.io.LongWritable;import org.apache.hadoop.io.Text;import org.apache.hadoop.mapreduce.Mapper;import au.com.bytecode.opencsv.CSVParser;public class BankMapper extends Mapper <LongWritable , Text , Text , IntWritable > { @Override protected void map (LongWritable key, Text value, Context context) throws IOException, InterruptedException { if (key.get() > 0 ) { String[] lines = new CSVParser().parseLine(value.toString()); context.write(new Text(lines[24 ]), new IntWritable(1 )); } } }
方法1 在IDEA中编写好业务代码,是用mvn将程序打成jar包,上传到hdoop平台之后运行
在IDEA下的Terminal控制台中输入
mvn clean package
此命令基于pom.xml中
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 <build > <plugins > <plugin > <artifactId > maven-assembly-plugin</artifactId > <configuration > <archive > <manifest > <mainClass > bankfinddefaulter.FindDefaulter</mainClass > </manifest > </archive > <descriptorRefs > <descriptorRef > jar-with-dependencies</descriptorRef > </descriptorRefs > </configuration > <executions > <execution > <id > make-assembly</id > <phase > package</phase > <goals > <goal > single</goal > </goals > </execution > </executions > </plugin > </plugins > </build >
Hadoop平台运行jar包 将项目代码打成jar包后,上传到hadoop平台,输入
hadoop jar Hadoop_API-1.0-SNAPSHOT-jar-with-dependencies.jar
其中1为违约用户数量, 共有6636位用户违约
方法2 在IDEA中本地运行
在Windows设置好Hadoop开发环境后, 运行FindDefaulter.java
在172.18.74.236:50070/output
目录中下载查看输出