经验首页 前端设计 程序设计 Java相关 移动开发 数据库/运维 软件/图像 大数据/云计算 其他经验
当前位置:技术经验 » HTML/CSS » 浏览器 » 查看文章
python使用mitmproxy抓取浏览器请求的方法
来源:jb51  时间:2019/7/3 8:38:30  对本文有异议

最近要写一款基于被动式的漏洞扫描器,因为被动式是将我们在浏览器浏览的时候所发出的请求进行捕获,然后交给扫描器进行处理,本来打算自己写这个代理的,但是因为考虑到需要抓取https,所以最后找到Mitmproxy这个程序。

安装方法:

pip install mitmproxy

接下来通过一个案例程序来了解它的使用,下面是目录结构

sproxy

|utils

|__init__.py

|parser.py

|sproxy.py

sproxy.py代码

  1. #coding=utf-8
  2. from pprint import pprint
  3. from mitmproxy import flow, proxy, controller, options
  4. from mitmproxy.proxy.server import ProxyServer
  5. from utils.parser import ResponseParser
  6. # http static resource file extension
  7. static_ext = ['js', 'css', 'ico', 'jpg', 'png', 'gif', 'jpeg', 'bmp']
  8. # media resource files type
  9. media_types = ['image', 'video', 'audio']
  10. # url filter
  11. url_filter = ['baidu','360','qq.com']
  12. static_files = [
  13. 'text/css',
  14. 'image/jpeg',
  15. 'image/gif',
  16. 'image/png',
  17. ]
  18. class WYProxy(flow.FlowMaster):
  19. def __init__(self, opts, server, state):
  20. super(WYProxy, self).__init__(opts, server, state)
  21. def run(self):
  22. try:
  23. pprint("proxy started successfully...")
  24. flow.FlowMaster.run(self)
  25. except KeyboardInterrupt:
  26. pprint("Ctrl C - stopping proxy")
  27. self.shutdown()
  28. def get_extension(self, flow):
  29. if not flow.request.path_components:
  30. return ''
  31. else:
  32. end_path = flow.request.path_components[-1:][0]
  33. split_ext = end_path.split('.')
  34. if not split_ext or len(split_ext) == 1:
  35. return ''
  36. else:
  37. return split_ext[-1:][0][:32]
  38. def capture_pass(self, flow):
  39. # filter url
  40. url = flow.request.url
  41. for i in url_filter:
  42. if i in url:
  43. return True
  44. """if content_type is media_types or static_files, then pass captrue"""
  45. extension = self.get_extension(flow)
  46. if extension in static_ext:
  47. return True
  48. # can't catch the content_type
  49. content_type = flow.response.headers.get('Content-Type', '').split(';')[:1][0]
  50. if not content_type:
  51. return False
  52. if content_type in static_files:
  53. return True
  54. http_mime_type = content_type.split('/')[:1]
  55. if http_mime_type:
  56. return True if http_mime_type[0] in media_types else False
  57. else:
  58. return False
  59. @controller.handler
  60. def request(self, f):
  61. pass
  62. @controller.handler
  63. def response(self, f):
  64. try:
  65. if not self.capture_pass(f):
  66. parser = ResponseParser(f)
  67. result = parser.parser_data()
  68. if f.request.method == "GET":
  69. print result['url']
  70. elif f.request.method == "POST":
  71. print result['request_content'] # POST提交的参数
  72. except Exception as e:
  73. raise e
  74. @controller.handler
  75. def error(self, f):
  76. pass
  77. # print("error", f)
  78. @controller.handler
  79. def log(self, l):
  80. pass
  81. # print("log", l.msg)
  82. def start_server(proxy_port, proxy_mode):
  83. port = int(proxy_port) if proxy_port else 8090
  84. mode = proxy_mode if proxy_mode else 'regular'
  85. if proxy_mode == 'http':
  86. mode = 'regular'
  87. opts = options.Options(
  88. listen_port=port,
  89. mode=mode,
  90. cadir="~/.mitmproxy/",
  91. )
  92. config = proxy.ProxyConfig(opts)
  93. state = flow.State()
  94. server = ProxyServer(config)
  95. m = WYProxy(opts, server, state)
  96. m.run()
  97. if __name__ == '__main__':
  98. start_server("8090", "http")

parser.py

  1. # from __future__ import absolute_import
  2. class ResponseParser(object):
  3. """docstring for ResponseParser"""
  4. def __init__(self, f):
  5. super(ResponseParser, self).__init__()
  6. self.flow = f
  7. def parser_data(self):
  8. result = dict()
  9. result['url'] = self.flow.request.url
  10. result['path'] = '/{}'.format('/'.join(self.flow.request.path_components))
  11. result['host'] = self.flow.request.host
  12. result['port'] = self.flow.request.port
  13. result['scheme'] = self.flow.request.scheme
  14. result['method'] = self.flow.request.method
  15. result['status_code'] = self.flow.response.status_code
  16. result['content_length'] = int(self.flow.response.headers.get('Content-Length', 0))
  17. result['request_header'] = self.parser_header(self.flow.request.headers)
  18. result['request_content'] = self.flow.request.content
  19. return result
  20. @staticmethod
  21. def parser_multipart(content):
  22. if isinstance(content, str):
  23. res = re.findall(r'name=\"(\w+)\"\r\n\r\n(\w+)', content)
  24. if res:
  25. return "&".join([k + '=' + v for k, v in res])
  26. else:
  27. return ""
  28. else:
  29. return ""
  30. @staticmethod
  31. def parser_header(header):
  32. headers = {}
  33. for key, value in header.items():
  34. headers[key] = value
  35. return headers
  36. @staticmethod
  37. def decode_response_text(content):
  38. for _ in ['UTF-8', 'GB2312', 'GBK', 'iso-8859-1', 'big5']:
  39. try:
  40. return content.decode(_)
  41. except:
  42. continue
  43. return content

参考链接:

https://github.com/ring04h/wyproxy

以上这篇python使用mitmproxy抓取浏览器请求的方法就是小编分享给大家的全部内容了,希望能给大家一个参考,也希望大家多多支持w3xue。

 友情链接:直通硅谷  点职佳  北美留学生论坛

本站QQ群:前端 618073944 | Java 606181507 | Python 626812652 | C/C++ 612253063 | 微信 634508462 | 苹果 692586424 | C#/.net 182808419 | PHP 305140648 | 运维 608723728

W3xue 的所有内容仅供测试,对任何法律问题及风险不承担任何责任。通过使用本站内容随之而来的风险与本站无关。
关于我们  |  意见建议  |  捐助我们  |  报错有奖  |  广告合作、友情链接(目前9元/月)请联系QQ:27243702 沸活量
皖ICP备17017327号-2 皖公网安备34020702000426号