基本思路,通过newlisp定时下载jobtracker页面,用正则表达式解析html中的table元素,然后获得最新的mapreduce的状态。
每次获得状态数据后,存入mysql数据库,然后用tableau将mapreduce集群状态用报表呈现。
这是jobtracker站点的数据
这是tableau绘制的报表
这样就可以用数据可视化的方式展示hadoop集群计算的压力状态。
下面是newlisp代码,主要就是用正则表达式解析html,用mysql模块写入数据库。
#!/usr/bin/newlisp(load mysql.lsp)(define (check-args) (print args: ) (println (main-args)) (set 'args-length (length (main-args))) (if (< args-length 3) (begin (println the number of args must be 3 or 4, e.g. ./job.lsp jobtracker.bigdata.cn 8080) (exit))))(define (parse-args) (set 'domain (main-args 2)) (if (= 4 args-length) (set 'port (main-args 3))) (if (= 3 args-length) (set 'port 80)) (set 'url (string http:// domain : port /jobtracker.jsp)) (println (string job tracker site is located at url)))(define (access-job-tracker-site) (set 'page-content (get-url url)) (extract-tables page-content) )(define (extract-summary-table table) (if (regex running map tasks table) table) )(define (get-number td) (set 'r ((regex (.*) td) 3)) (if (find [\\s\\s]*?
html-content)) (dolist (table all-tables) (if (extract-summary-table table) (set 'summary-table table)) ) (parse-summary-table summary-table) )(define (write-summary-to-mysql all-summary-values) (println all-summary-values) (set 'mysql-instance (mysql)) (println mysql-instance: mysql-instance) (set 'mysql-host 10.100.10.10) (set 'mysql-port 3306) (set 'mysql-user user) (set 'mysql-pwd 123456) (set 'mysql-db bigdata_data_market) (set 'job-tracker-summary-table hadoop_job_tracker_summary) (:connect mysql-instance mysql-host mysql-user mysql-pwd mysql-db mysql-port) (:query mysql-instance set character_set_client = utf8;) (set 'insert-summary-sql (format insert into %s (collect_time,running_map_tasks,running_reduce_tasks,total_submissions,nodes,occupied_map_slots,occupied_reduce_slots,reserved_map_slots,reserved_reduce_slots,map_task_capacity,reduce_task_capacity,average_tasks_per_node,blacked_listed_nodes,gray_listed_nodes,exclueded_nodes) values (now(),%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s) job-tracker-summary-table (all-summary-values 0) (all-summary-values 1) (all-summary-values 2) (all-summary-values 3) (all-summary-values 4) (all-summary-values 5) (all-summary-values 6) (all-summary-values 7) (all-summary-values 8) (all-summary-values 9) (all-summary-values 10) (all-summary-values 11) (all-summary-values 12) (all-summary-values 13))) (println insert-summary-sql) (:query mysql-instance insert-summary-sql) );; main logic starts now(check-args)(parse-args)(access-job-tracker-site)(write-summary-to-mysql all-summary-values)(exit)