经验首页 前端设计 程序设计 Java相关 移动开发 数据库/运维 软件/图像 大数据/云计算 其他经验
当前位置:技术经验 » 大数据/云/AI » Hadoop » 查看文章
ETL项目2:大数据清洗,处理:使用MapReduce进行离线数据分析并报表显示完整项目
来源:cnblogs  作者:symkmk123  时间:2019/1/2 9:38:02  对本文有异议

ETL项目2:大数据清洗,处理:使用MapReduce进行离线数据分析并报表显示完整项目

思路同我之前的博客的思路 https://www.cnblogs.com/symkmk123/p/10197467.html

但是数据是从web访问的数据

avro第一次过滤

观察数据的格式,我们主要分析第四个字段的数据.发现有.css , .jpg .png等等等无效的数据.

通过观察数据发现有效数据都不带 . , 所以第一次过滤写入avro总表里的数据一次过滤后的有效数据,不包含 .css , .jpg , .png 这样的数据

同时count持久化到mysql

orc1:海牛的topics 最受欢迎的top10

通过观察发现这个需求的有效url是 /topics/数字的 所以在第一次过滤的数据的基础上的正则就是

这种保留下来的也只是/topics/数字这种格式,方便用 hql统计结果

 上代码

  1. //Text2Avro
  2. package mrrun.hainiuetl;
  3. import java.io.IOException;
  4. import java.text.ParseException;
  5. import java.text.SimpleDateFormat;
  6. import java.util.Date;
  7. import java.util.Locale;
  8. import org.apache.avro.Schema;
  9. import org.apache.avro.generic.GenericData;
  10. import org.apache.avro.generic.GenericRecord;
  11. import org.apache.avro.mapred.AvroKey;
  12. import org.apache.avro.mapreduce.AvroJob;
  13. import org.apache.avro.mapreduce.AvroKeyOutputFormat;
  14. import org.apache.hadoop.conf.Configuration;
  15. import org.apache.hadoop.io.LongWritable;
  16. import org.apache.hadoop.io.NullWritable;
  17. import org.apache.hadoop.io.Text;
  18. import org.apache.hadoop.mapreduce.Counter;
  19. import org.apache.hadoop.mapreduce.CounterGroup;
  20. import org.apache.hadoop.mapreduce.Counters;
  21. import org.apache.hadoop.mapreduce.Job;
  22. import org.apache.hadoop.mapreduce.Mapper;
  23. import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
  24. import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
  25. import mrrun.base.BaseMR;
  26. public class Text2Avro extends BaseMR
  27. {
  28. public static Schema schema = null;
  29. public static Schema.Parser parse = new Schema.Parser();
  30. public static class Text2AvroMapper extends Mapper<LongWritable, Text, AvroKey<GenericRecord>, NullWritable>
  31. {
  32. @Override
  33. protected void setup(Mapper<LongWritable, Text, AvroKey<GenericRecord>, NullWritable>.Context context)
  34. throws IOException, InterruptedException {
  35. //根据user_install_status.avro文件内的格式,生成指定格式的schema对象
  36. schema = parse.parse(Text2Avro.class.getResourceAsStream("/hainiu.avro"));
  37. }
  38. @Override
  39. protected void map(LongWritable key, Text value,Context context)
  40. throws IOException, InterruptedException {
  41. String line = value.toString();
  42. String[] splits = line.split("\001");
  43. if(splits == null || splits.length != 10){
  44. System.out.println("==============");
  45. System.out.println(value.toString());
  46. context.getCounter("etl_err", "bad line num").increment(1L);
  47. return;
  48. }
  49. // System.out.println(util.getIpArea("202.8.77.12"));
  50. String uip1 = splits[0];
  51. String uip =IPUtil.getip(uip1);
  52. String datetime = splits[2];
  53. StringBuilder sb=new StringBuilder(datetime);
  54. SimpleDateFormat sdf=new SimpleDateFormat("dd/MMM/yyyy:HH:mm:ss",Locale.ENGLISH);
  55. String sy=sb.toString();
  56. Date myDate = null;
  57. try
  58. {
  59. myDate = sdf.parse(sy);
  60. } catch (ParseException e)
  61. {
  62. // TODO Auto-generated catch block
  63. e.printStackTrace();
  64. }
  65. SimpleDateFormat sdf2=new SimpleDateFormat("yyyyMMddHHmmss");
  66. //System.out.println(myDate);
  67. String format = sdf2.format(myDate);
  68. //GET /categories/8?filter=recent&page=12 HTTP/1.1
  69. String url1 = splits[3];
  70. StringBuilder sb2=new StringBuilder(url1);
  71. String url = sb2.toString();
  72. String method="";
  73. String top="";
  74. String top1="";
  75. String http="";
  76. if(url!=null)
  77. {
  78. String[] s = url.split(" ");
  79. if(s.length==3)
  80. {
  81. method=s[0];
  82. http=s[2];
  83. top1=s[1];
  84. if(top1.contains("."))
  85. {
  86. context.getCounter("etl_err", "no line num").increment(1L);
  87. return;
  88. }
  89. else
  90. {
  91. top=top1;
  92. }
  93. }
  94. }
  95. String status1 = splits[4];
  96. String status2 = splits[5];
  97. String post = splits[6];
  98. String from = splits[7];
  99. String usagent1 = splits[8];
  100. StringBuilder sb3=new StringBuilder(usagent1);
  101. String usagent = sb3.toString();
  102. //根据创建的Schema对象,创建一行的对象
  103. GenericRecord record = new GenericData.Record(Text2Avro.schema);
  104. record.put("uip", uip);
  105. record.put("datetime", format);
  106. record.put("method", method);
  107. record.put("http", http);
  108. record.put("top", top);
  109. record.put("from", from);
  110. record.put("status1", status1);
  111. record.put("status2", status2);
  112. record.put("post", post);
  113. record.put("usagent", usagent);
  114. context.getCounter("etl_good", "good line num").increment(1L);
  115. System.out.println(uip+" "+format+" "+top+" "+from+" "+post+" "+usagent+" "+status1+" "+status2+" "+http);
  116. context.write(new AvroKey<GenericRecord>(record), NullWritable.get());
  117. }
  118. }
  119. @Override
  120. public Job getJob(Configuration conf) throws IOException {
  121. // // 开启reduce输出压缩
  122. // conf.set(FileOutputFormat.COMPRESS, "true");
  123. // // 设置reduce输出压缩格式
  124. // conf.set(FileOutputFormat.COMPRESS_CODEC, SnappyCodec.class.getName());
  125. Job job = Job.getInstance(conf, getJobNameWithTaskId());
  126. job.setJarByClass(Text2Avro.class);
  127. job.setMapperClass(Text2AvroMapper.class);
  128. job.setMapOutputKeyClass(AvroKey.class);
  129. job.setMapOutputValueClass(NullWritable.class);
  130. // 无reduce
  131. job.setNumReduceTasks(0);
  132. //设置输出的format
  133. job.setOutputFormatClass(AvroKeyOutputFormat.class);
  134. //根据user_install_status.avro文件内的格式,生成指定格式的schema对象
  135. schema = parse.parse(Text2Avro.class.getResourceAsStream("/hainiu.avro"));
  136. //设置avro文件的输出
  137. AvroJob.setOutputKeySchema(job, schema);
  138. FileInputFormat.addInputPath(job, getFirstJobInputPath());
  139. FileOutputFormat.setOutputPath(job, getJobOutputPath(getJobNameWithTaskId()));
  140. return job;
  141. }
  142. @Override
  143. public String getJobName() {
  144. return "etltext2avro";
  145. }
  146. }
  1. //Avro2Orc_topic10
  2. package mrrun.hainiuetl;
  3. import java.io.IOException;
  4. import java.util.regex.Matcher;
  5. import java.util.regex.Pattern;
  6. import org.apache.avro.Schema;
  7. import org.apache.avro.generic.GenericRecord;
  8. import org.apache.avro.mapred.AvroKey;
  9. import org.apache.avro.mapreduce.AvroJob;
  10. import org.apache.avro.mapreduce.AvroKeyInputFormat;
  11. import org.apache.hadoop.conf.Configuration;
  12. import org.apache.hadoop.hive.ql.io.orc.CompressionKind;
  13. import org.apache.hadoop.hive.ql.io.orc.OrcNewOutputFormat;
  14. import org.apache.hadoop.io.NullWritable;
  15. import org.apache.hadoop.io.Writable;
  16. import org.apache.hadoop.mapreduce.Job;
  17. import org.apache.hadoop.mapreduce.Mapper;
  18. import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
  19. import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
  20. import mrrun.base.BaseMR;
  21. import mrrun.util.OrcFormat;
  22. import mrrun.util.OrcUtil;
  23. public class Avro2Orc_topic10 extends BaseMR {
  24. public static Schema schema = null;
  25. public static Schema.Parser parse = new Schema.Parser();
  26. public static class Avro2OrcMapper extends Mapper<AvroKey<GenericRecord>, NullWritable, NullWritable, Writable>{
  27. OrcUtil orcUtil = new OrcUtil();
  28. @Override
  29. protected void setup(Context context)
  30. throws IOException, InterruptedException {
  31. orcUtil.setWriteOrcInspector(OrcFormat.etlorcSchema_topic10);
  32. }
  33. @Override
  34. protected void map(AvroKey<GenericRecord> key, NullWritable value,Context context)
  35. throws IOException, InterruptedException {
  36. //得到一行的对象
  37. GenericRecord datum = key.datum();
  38. String uip = (String) datum.get("uip");
  39. String datetime = (String) datum.get("datetime");
  40. //String method = (String) datum.get("method");
  41. //String http = (String) datum.get("http");
  42. String top1 = (String) datum.get("top");
  43. String top="";
  44. String regex="/topics/\\d+";
  45. Pattern pattern=Pattern.compile(regex);
  46. Matcher matcher=pattern.matcher(top1);
  47. if(matcher.find())
  48. {
  49. top=matcher.group();
  50. }
  51. else
  52. {
  53. context.getCounter("etl_err", "notopics line num").increment(1L);
  54. return;
  55. }
  56. //orcUtil.addAttr(uip,datetime,method,http,uid,country,status1,status2,usagent);
  57. orcUtil.addAttr(uip,datetime,top);
  58. Writable w = orcUtil.serialize();
  59. context.getCounter("etl_good", "good line num").increment(1L);
  60. System.out.println(uip+" "+top);
  61. context.write(NullWritable.get(), w);
  62. }
  63. }
  64. @Override
  65. public Job getJob(Configuration conf) throws IOException {
  66. //关闭map的推测执行,使得一个map处理 一个region的数据
  67. conf.set("mapreduce.map.spedulative", "false");
  68. //设置orc文件snappy压缩
  69. conf.set("orc.compress", CompressionKind.SNAPPY.name());
  70. //设置orc文件 有索引
  71. conf.set("orc.create.index", "true");
  72. Job job = Job.getInstance(conf, getJobNameWithTaskId());
  73. job.setJarByClass(Avro2Orc_topic10.class);
  74. job.setMapperClass(Avro2OrcMapper.class);
  75. job.setMapOutputKeyClass(NullWritable.class);
  76. job.setMapOutputValueClass(Writable.class);
  77. // 无reduce
  78. job.setNumReduceTasks(0);
  79. job.setInputFormatClass(AvroKeyInputFormat.class);
  80. //根据user_install_status.avro文件内的格式,生成指定格式的schema对象
  81. schema = parse.parse(Avro2Orc_topic10.class.getResourceAsStream("/hainiu.avro"));
  82. AvroJob.setInputKeySchema(job, schema);
  83. job.setOutputFormatClass(OrcNewOutputFormat.class);
  84. FileInputFormat.addInputPath(job, getFirstJobInputPath());
  85. FileOutputFormat.setOutputPath(job, getJobOutputPath(getJobNameWithTaskId()));
  86. return job;
  87. }
  88. @Override
  89. public String getJobName() {
  90. return "etlAvro2Orc_topic10";
  91. }
  92. }
  1. //Text2AvroJob
  2. package mrrun.hainiuetl;
  3. import java.text.SimpleDateFormat;
  4. import java.util.Date;
  5. import org.apache.hadoop.conf.Configuration;
  6. import org.apache.hadoop.conf.Configured;
  7. import org.apache.hadoop.mapreduce.Counter;
  8. import org.apache.hadoop.mapreduce.CounterGroup;
  9. import org.apache.hadoop.mapreduce.Counters;
  10. import org.apache.hadoop.mapreduce.Job;
  11. import org.apache.hadoop.mapreduce.lib.jobcontrol.ControlledJob;
  12. import org.apache.hadoop.mapreduce.lib.jobcontrol.JobControl;
  13. import org.apache.hadoop.util.Tool;
  14. import org.apache.hadoop.util.ToolRunner;
  15. import mrrun.util.JobRunResult;
  16. import mrrun.util.JobRunUtil;
  17. public class Text2AvroJob extends Configured implements Tool{
  18. @Override
  19. public int run(String[] args) throws Exception {
  20. //获取Configuration对象
  21. Configuration conf = getConf();
  22. //创建任务链对象
  23. JobControl jobc = new JobControl("etltext2avro");
  24. Text2Avro avro = new Text2Avro();
  25. //只需要赋值一次就行
  26. avro.setConf(conf);
  27. ControlledJob orcCJob = avro.getControlledJob();
  28. Job job = orcCJob.getJob();
  29. job.waitForCompletion(true);
  30. JobRunResult result = JobRunUtil.run(jobc);
  31. result.setCounters("etl1", orcCJob.getJob().getCounters());
  32. result.print(true);
  33. Counters counterMap = result.getCounterMap("etl1");
  34. CounterGroup group1 = counterMap.getGroup("etl_good");
  35. CounterGroup group2 = counterMap.getGroup("etl_err");
  36. Counter good = group1.findCounter("good line num");
  37. Counter bad = group2.findCounter("bad line num");
  38. System.out.println("\t\t"+good.getDisplayName()+" = "+good.getValue());
  39. System.out.println("\t\t"+bad.getDisplayName()+" = "+bad.getValue());
  40. System.out.println("=======+++++++++====");
  41. Date date=new Date();
  42. SimpleDateFormat sdf3=new SimpleDateFormat("yyyyMMdd");
  43. String format2 = sdf3.format(date);
  44. Results results=new Results();
  45. long bad_num = bad.getValue();
  46. long good_num = good.getValue();
  47. long total_num=bad_num+good_num;
  48. results.setBad_num(bad_num);
  49. results.setGood_num(good_num);
  50. results.setTotal_num(total_num);
  51. results.setDay(format2);
  52. double d=bad_num*1.0/total_num*1.0;
  53. results.setBad_rate(d);
  54. System.out.println((double)((double)bad_num/(double)total_num));
  55. DAO dao=new DAO();
  56. if(dao.getday(format2)!=null)
  57. {
  58. Results getday = dao.getday(format2);
  59. Long bad_num2 = getday.getBad_num();
  60. Long good_num2 = getday.getGood_num();
  61. Long total_num2 = getday.getTotal_num();
  62. getday.setDay(format2);
  63. getday.setBad_num(bad_num2+bad_num);
  64. getday.setGood_num(good_num2+good_num);
  65. getday.setTotal_num(total_num2+total_num);
  66. double badrate=(bad_num2+bad_num)*1.0/(total_num2+total_num)*1.0;
  67. getday.setBad_rate(badrate);
  68. dao.update(getday);
  69. }
  70. else
  71. {
  72. dao.insert(results);
  73. }
  74. jobc.addJob(orcCJob);
  75. return 0;
  76. }
  77. public static void main(String[] args) throws Exception {
  78. // -Dtask.id=1226 -Dtask.input.dir=/tmp/avro/input_hainiuetl -Dtask.base.dir=/tmp/avro
  79. System.exit(ToolRunner.run(new Text2AvroJob(), args));
  80. }
  81. }

放一个

自动化脚本思路同第一个ETL项目

直接放代码

  1. yitiaolong.sh
  2. #!/bin/bash
  3. source /etc/profile
  4. mmdd=`date -d 1' days ago' +%m%d`
  5. yymm=`date -d 1' days ago' +%Y%m`
  6. dd=`date -d 1' days ago' +%d`
  7. /usr/local/hive/bin/hive -e "use suyuan09;alter table etlavrosy add IF NOT EXISTS partition(month='${yymm}',day='${dd}');"
  8. /usr/local/hive/bin/hive -e "use suyuan09;alter table hainiuetltopics10_orc add IF NOT EXISTS partition(month='${yymm}',day='${dd}');"
  9. /usr/local/hive/bin/hive -e "use suyuan09;alter table hainiuetlcategories10_orc add IF NOT EXISTS partition(month='${yymm}',day='${dd}');"
  10. /usr/local/hive/bin/hive -e "use suyuan09;alter table hainiuetlspider_orc add IF NOT EXISTS partition(month='${yymm}',day='${dd}');"
  11. /usr/local/hive/bin/hive -e "use suyuan09;alter table hainiuetlip_orc add IF NOT EXISTS partition(month='${yymm}',day='${dd}');"
  12. /usr/local/hive/bin/hive -e "use suyuan09;alter table hainiuetlindex5_orc add IF NOT EXISTS partition(month='${yymm}',day='${dd}');"
  13. #3-4运行mr
  14. hdfs_path1=/user/hainiu/data/hainiuetl/input/${yymm}/${dd}
  15. avro_path1=/user/suyuan09/hainiuetl/hainiuavro/${yymm}/${dd}
  16. `/usr/local/hadoop/bin/hadoop jar /home/suyuan09/etl/hainiu/jar/181210_hbase-1.0.0-symkmk123.jar etltext2avro -Dtask.id=${mmdd} -Dtask.input.dir=${hdfs_path1} -Dtask.base.dir=${avro_path1}`
  17. #orctopics10mr.sh
  18. avro_path2=/user/suyuan09/hainiuetl/hainiuavro/${yymm}/${dd}/etltext2avro_${mmdd}/part-*.avro
  19. orc_path2=/user/suyuan09/hainiuetl/orctopics10/${yymm}/${dd}
  20. `/usr/local/hadoop/bin/hadoop jar /home/suyuan09/etl/hainiu/jar/181210_hbase-1.0.0-symkmk123.jar etlavro2orc_topic10 -Dtask.id=${mmdd} -Dtask.input.dir=${avro_path2} -Dtask.base.dir=${orc_path2}`
  21. #orccategories10mr.sh
  22. avro_path3=/user/suyuan09/hainiuetl/hainiuavro/${yymm}/${dd}/etltext2avro_${mmdd}/part-*.avro
  23. orc_path3=/user/suyuan09/hainiuetl/orccategories10/${yymm}/${dd}
  24. `/usr/local/hadoop/bin/hadoop jar /home/suyuan09/etl/hainiu/jar/181210_hbase-1.0.0-symkmk123.jar etlavro2orc_categories10 -Dtask.id=${mmdd} -Dtask.input.dir=${avro_path3} -Dtask.base.dir=${orc_path3}`
  25. #orcspidermr.sh
  26. avro_path4=/user/suyuan09/hainiuetl/hainiuavro/${yymm}/${dd}/etltext2avro_${mmdd}/part-*.avro
  27. orc_path4=/user/suyuan09/hainiuetl/orcspider/${yymm}/${dd}
  28. `/usr/local/hadoop/bin/hadoop jar /home/suyuan09/etl/hainiu/jar/181210_hbase-1.0.0-symkmk123.jar etlavro2orc_spider -Dtask.id=${mmdd} -Dtask.input.dir=${avro_path4} -Dtask.base.dir=${orc_path4}`
  29. #orcipmr.sh
  30. avro_path5=/user/suyuan09/hainiuetl/hainiuavro/${yymm}/${dd}/etltext2avro_${mmdd}/part-*.avro
  31. orc_path5=/user/suyuan09/hainiuetl/orcip/${yymm}/${dd}
  32. `/usr/local/hadoop/bin/hadoop jar /home/suyuan09/etl/hainiu/jar/181210_hbase-1.0.0-symkmk123.jar etlavro2orc_ip -Dtask.id=${mmdd} -Dtask.input.dir=${avro_path5} -Dtask.base.dir=${orc_path5}`
  33. #orcindex5mr.sh
  34. avro_path6=/user/suyuan09/hainiuetl/hainiuavro/${yymm}/${dd}/etltext2avro_${mmdd}/part-*.avro
  35. orc_path6=/user/suyuan09/hainiuetl/orcindex5/${yymm}/${dd}
  36. `/usr/local/hadoop/bin/hadoop jar /home/suyuan09/etl/hainiu/jar/181210_hbase-1.0.0-symkmk123.jar etlavro2orc_index5 -Dtask.id=${mmdd} -Dtask.input.dir=${avro_path6} -Dtask.base.dir=${orc_path6}`
  37. #把orc挪到分区目录
  38. #orc2etl.sh
  39.  
  40. /usr/local/hadoop/bin/hadoop fs -cp hdfs://ns1/user/suyuan09/hainiuetl/orctopics10/${yymm}/${dd}/etlAvro2Orc_topic10_${mmdd}/part-* hdfs://ns1/user/suyuan09/etlorc/hainiuetltopics10_orc/month=${yymm}/day=${dd}
  41. /usr/local/hadoop/bin/hadoop fs -cp hdfs://ns1/user/suyuan09/hainiuetl/orccategories10/${yymm}/${dd}/etlAvro2Orc_categories10_${mmdd}/part-* hdfs://ns1/user/suyuan09/etlorc/hainiuetlcategories10_orc/month=${yymm}/day=${dd}
  42. /usr/local/hadoop/bin/hadoop fs -cp hdfs://ns1/user/suyuan09/hainiuetl/orcspider/${yymm}/${dd}/etlAvro2Orc_spider_${mmdd}/part-* hdfs://ns1/user/suyuan09/etlorc/hainiuetlspider_orc/month=${yymm}/day=${dd}
  43. /usr/local/hadoop/bin/hadoop fs -cp hdfs://ns1/user/suyuan09/hainiuetl/orcindex5/${yymm}/${dd}/etlAvro2Orc_index5_${mmdd}/part-* hdfs://ns1/user/suyuan09/etlorc/hainiuetlindex5_orc/month=${yymm}/day=${dd}
  44. /usr/local/hadoop/bin/hadoop fs -cp hdfs://ns1/user/suyuan09/hainiuetl/orcip/${yymm}/${dd}/etlAvro2Orc_ip_${mmdd}/part-* hdfs://ns1/user/suyuan09/etlorc/hainiuetlip_orc/month=${yymm}/day=${dd}
  45. #自动从hive到mysql脚本
  46. #hive2data.sh
  47.  
  48. /usr/local/hive/bin/hive -e "use suyuan09;select t.top,t.num from(select top,count(*) num from hainiuetlindex5_orc group by top) t sort by t.num desc limit 5;" > /home/suyuan09/etl/hainiu/orc2mysql/myindex5${yymmdd}
  49. /usr/local/hive/bin/hive -e "use suyuan09;select t.top,t.num from(select top,count(*) num from hainiuetltopics10_orc group by top) t sort by t.num desc limit 10;" > /home/suyuan09/etl/hainiu/orc2mysql/mytopics10${yymmdd}
  50. /usr/local/hive/bin/hive -e "use suyuan09;select t.top,t.num from(select top,count(*) num from hainiuetlcategories10_orc group by top) t sort by t.num desc limit 10;" > /home/suyuan09/etl/hainiu/orc2mysql/mycategories10${yymmdd}
  51. /usr/local/hive/bin/hive -e "use suyuan09;select t.uip,t.num from(select uip,count(*) num from hainiuetlip_orc group by uip) t sort by t.num desc;" > /home/suyuan09/etl/hainiu/orc2mysql/myip${yymmdd}
  52. /usr/local/hive/bin/hive -e "use suyuan09;select t.usagent,t.num from(select usagent,count(*) num from hainiuetlspider_orc group by usagent) t sort by t.num desc;" > /home/suyuan09/etl/hainiu/orc2mysql/myspider${yymmdd}
  53. #data->mysql脚本
  54. #data2mysql.sh
  55. #mysql -h 172.33.101.123 -P 3306 -u tony -pYourPassword -D YourDbName <<EOF
  56. /bin/mysql -h192.168.88.195 -p3306 -uhainiu -p12345678 -Dhainiutest <<EOF
  57. LOAD DATA LOCAL INFILE "/home/suyuan09/etl/hainiu/orc2mysql/mytopics10${yymmdd}" INTO TABLE suyuan09_etl_orctopics10mysql FIELDS TERMINATED BY '\t';
  58. LOAD DATA LOCAL INFILE "/home/suyuan09/etl/hainiu/orc2mysql/mycategories10${yymmdd}" INTO TABLE suyuan09_etl_orccategories10mysql FIELDS TERMINATED BY '\t';
  59. LOAD DATA LOCAL INFILE "/home/suyuan09/etl/hainiu/orc2mysql/myindex5${yymmdd}" INTO TABLE suyuan09_etl_orcindex5mysql FIELDS TERMINATED BY '\t';
  60. LOAD DATA LOCAL INFILE "/home/suyuan09/etl/hainiu/orc2mysql/myspider${yymmdd}" INTO TABLE suyuan09_etl_orcspidermysql FIELDS TERMINATED BY '\t';
  61. LOAD DATA LOCAL INFILE "/home/suyuan09/etl/hainiu/orc2mysql/myip${yymmdd}" INTO TABLE suyuan09_etl_orcipmysql FIELDS TERMINATED BY '\t';
  62. EOF

报表展示

 

其中 mysql没有自带排序函数,自己写一个

 

热力图参考之前我之前的博客 https://www.cnblogs.com/symkmk123/p/9309322.html 其中之前是用 c# 写的,这里用java + spring 改写一下

思路看之前的博客这里放代码

经纬度转换类:LngAndLatUtil

  1. package suyuan.web;
  2. import java.io.BufferedReader;
  3. import java.io.IOException;
  4. import java.io.InputStreamReader;
  5. import java.io.UnsupportedEncodingException;
  6. import java.net.MalformedURLException;
  7. import java.net.URL;
  8. import java.net.URLConnection;
  9. public class LngAndLatUtil
  10. {
  11. public Object[] getCoordinate(String addr) throws IOException
  12. {
  13. String lng = null;// 经度
  14. String lat = null;// 纬度
  15. String address = null;
  16. try
  17. {
  18. address = java.net.URLEncoder.encode(addr, "UTF-8");
  19. } catch (UnsupportedEncodingException e1)
  20. {
  21. e1.printStackTrace();
  22. }
  23. String key = "你的秘钥";
  24. String url = String.format("http://api.map.baidu.com/geocoder?address=%s&output=json&key=%s", address, key);
  25. URL myURL = null;
  26. URLConnection httpsConn = null;
  27. try
  28. {
  29. myURL = new URL(url);
  30. } catch (MalformedURLException e)
  31. {
  32. e.printStackTrace();
  33. }
  34. InputStreamReader insr = null;
  35. BufferedReader br = null;
  36. try
  37. {
  38. httpsConn = (URLConnection) myURL.openConnection();// 不使用代理
  39. if (httpsConn != null)
  40. {
  41. insr = new InputStreamReader(httpsConn.getInputStream(), "UTF-8");
  42. br = new BufferedReader(insr);
  43. String data = null;
  44. int count = 1;
  45. while ((data = br.readLine()) != null)
  46. {
  47. if (count == 5)
  48. {
  49. try{
  50. lng = (String) data.subSequence(data.indexOf(":") + 1, data.indexOf(","));// 经度
  51. count++;
  52. }
  53. catch(StringIndexOutOfBoundsException e)
  54. {
  55. e.printStackTrace();
  56. }
  57. } else if (count == 6)
  58. {
  59. lat = data.substring(data.indexOf(":") + 1);// 纬度
  60. count++;
  61. } else
  62. {
  63. count++;
  64. }
  65. }
  66. }
  67. } catch (IOException e)
  68. {
  69. e.printStackTrace();
  70. } finally
  71. {
  72. if (insr != null)
  73. {
  74. insr.close();
  75. }
  76. if (br != null)
  77. {
  78. br.close();
  79. }
  80. }
  81. return new Object[] { lng, lat };
  82. }
  83. }

IPDTO:(数据库映射类)

 

  1. package suyuan.entity;
  2. public class IPDTO
  3. {
  4. public String top;
  5. public Integer num;
  6. public String getTop()
  7. {
  8. return top;
  9. }
  10. public void setTop(String top)
  11. {
  12. this.top = top;
  13. }
  14. public Integer getNum()
  15. {
  16. return num;
  17. }
  18. public void setNum(Integer num)
  19. {
  20. this.num = num;
  21. }
  22. }

IP:(热力图json类)

  1. package suyuan.entity;
  2. public class IP
  3. {
  4. public String lng ;
  5. public String lat ;
  6. public int count ;
  7. public String getLng()
  8. {
  9. return lng;
  10. }
  11. public void setLng(String lng)
  12. {
  13. this.lng = lng;
  14. }
  15. public String getLat()
  16. {
  17. return lat;
  18. }
  19. public void setLat(String lat)
  20. {
  21. this.lat = lat;
  22. }
  23. public int getCount()
  24. {
  25. return count;
  26. }
  27. public void setCount(int count)
  28. {
  29. this.count = count;
  30. }
  31. }

DAO层转换方法

  1. public List<IP> getip() throws SQLException
  2. {
  3. List<IPDTO> ipdto = null;
  4. List<IP> ipList=new ArrayList<IP>();
  5. // 编写SQL语句
  6. String sql = "SELECT top,num FROM `suyuan09_etl_orcipmysql`";
  7. // 占位符赋值?
  8. // 执行
  9. ipdto = qr.query(sql, new BeanListHandler<IPDTO>(IPDTO.class));
  10. for(IPDTO ips: ipdto)
  11. {
  12. IP ip=new IP();
  13. Integer num = ips.getNum();
  14. String top = ips.getTop();
  15. // 封装
  16. LngAndLatUtil getLatAndLngByBaidu = new LngAndLatUtil();
  17. Object[] o = null;
  18. try
  19. {
  20. o = getLatAndLngByBaidu.getCoordinate(top);
  21. } catch (IOException e)
  22. {
  23. // TODO Auto-generated catch block
  24. e.printStackTrace();
  25. }
  26. ip.setLng(String.valueOf(o[0]));
  27. ip.setLat(String.valueOf(o[1]));
  28. ip.setCount(num);
  29. ipList.add(ip);
  30. }
  31. // 返回
  32. return ipList;
  33. }

控制器调用返回json:

  1. @RequestMapping("/getip")
  2. public @ResponseBody List<IP> getip()throws Exception{
  3. return studentService.getip();
  4. }

jsp页面显示:

  1. <%@ page language="java" import="java.util.*" pageEncoding="UTF-8"%>
  2. <%@ taglib uri="http://java.sun.com/jsp/jstl/core" prefix="c"%>
  3. <%@ taglib uri="http://java.sun.com/jsp/jstl/fmt" prefix="fmt"%>
  4. <!DOCTYPE HTML>
  5.  
  6. <html>
  7. <head >
  8. <meta charset="UTF-8">
  9. <script type="text/javascript" src="http://api.map.baidu.com/api?v=2.0&ak=1fReIR62vFbOc2vgrBnRAGyUtLkgoIIH"></script>
  10. <script type="text/javascript" src="http://api.map.baidu.com/library/Heatmap/2.0/src/Heatmap_min.js"></script>
  11. <script type="text/javascript"
  12. src="${pageContext.request.contextPath}/js/jquery-1.9.1.js"></script>
  13.  
  14. <title></title>
  15. <style type="text/css">
  16. ul,li{list-style: none;margin:0;padding:0;float:left;}
  17. html{height:100%}
  18. body{height:100%;margin:0px;padding:0px;font-family:"微软雅黑";}
  19. #container{height:700px;width:100%;}
  20. #r-result{width:100%;}
  21. </style>
  22. </head>
  23. <body>
  24. <form id="form1" runat="server">
  25. <div>
  26. <div id="container"></div>
  27. <div id="r-result">
  28. <input type="button" onclick="openHeatmap();" value="显示热力图"/><input type="button" onclick="closeHeatmap();" value="关闭热力图"/>
  29. </div>
  30. </div>
  31. </form>
  32. </body>
  33. </html>
  34.  
  35. <script type="text/javascript">
  36. var map = new BMap.Map("container"); // 创建地图实例
  37. var point = new BMap.Point(118.906886, 31.895532);
  38. map.centerAndZoom(point, 15); // 初始化地图,设置中心点坐标和地图级别
  39. map.enableScrollWheelZoom(); // 允许滚轮缩放
  40.  
  41.  
  42.  
  43. if (!isSupportCanvas()) {
  44. alert('热力图目前只支持有canvas支持的浏览器,您所使用的浏览器不能使用热力图功能~')
  45. }
  46. heatmapOverlay = new BMapLib.HeatmapOverlay({ "radius": 20 });
  47. map.addOverlay(heatmapOverlay);
  48. heatmapOverlay.setDataSet({ data: function () {
  49. var serie = [];
  50. $.ajax({
  51. url: "${pageContext.request.contextPath}/getip",
  52. dataType: "json",
  53. async: false,
  54. success: function (dataJson) {
  55. for (var i = 0; i < dataJson.length; i++) {
  56. var item = {
  57. //name: res.data.titleList7[i],
  58. //value: randomData()
  59. lat: dataJson[i].lat,
  60. lng: dataJson[i].lng,
  61. count: dataJson[i].count
  62. };
  63. serie.push(item);
  64. }
  65. }
  66. });
  67. return serie;
  68. } (), max: 100 });
  69. //是否显示热力图
  70. function openHeatmap() {
  71. heatmapOverlay.show();
  72. }
  73. function closeHeatmap() {
  74. heatmapOverlay.hide();
  75. }
  76. closeHeatmap();
  77. function setGradient() {
  78. /*格式如下所示:
  79. {
  80. 0:'rgb(102, 255, 0)',
  81. .5:'rgb(255, 170, 0)',
  82. 1:'rgb(255, 0, 0)'
  83. }*/
  84. var gradient = {};
  85. var colors = document.querySelectorAll("input[type='color']");
  86. colors = [].slice.call(colors, 0);
  87. colors.forEach(function (ele) {
  88. gradient[ele.getAttribute("data-key")] = ele.value;
  89. });
  90. heatmapOverlay.setOptions({ "gradient": gradient });
  91. }
  92. //判断浏览区是否支持canvas
  93. function isSupportCanvas() {
  94. var elem = document.createElement('canvas');
  95. return !!(elem.getContext && elem.getContext('2d'));
  96. }
  97. </script>

 

图表也参考我之前的博客 https://www.cnblogs.com/symkmk123/p/9010514.html

 

 友情链接:直通硅谷  点职佳  北美留学生论坛

本站QQ群:前端 618073944 | Java 606181507 | Python 626812652 | C/C++ 612253063 | 微信 634508462 | 苹果 692586424 | C#/.net 182808419 | PHP 305140648 | 运维 608723728

W3xue 的所有内容仅供测试,对任何法律问题及风险不承担任何责任。通过使用本站内容随之而来的风险与本站无关。
关于我们  |  意见建议  |  捐助我们  |  报错有奖  |  广告合作、友情链接(目前9元/月)请联系QQ:27243702 沸活量
皖ICP备17017327号-2 皖公网安备34020702000426号