errDump.lua 14 KB


  1. --- 模块功能:系统错误日志管理(强烈建议用户开启此模块的“错误日志上报调试服务器”功能).
  2. -- 错误日志包括四种:
  3. -- 1、系统主任务运行时的错误日志
  4. -- 此类错误会导致软件重启,错误日志保存在/luaerrinfo.txt文件中
  5. -- 2、调用sys.taskInit创建的协程运行过程中的错误日志
  6. -- 此类错误会终止当前协程的运行,但是不会导致软件重启,错误日志保存在/lib_err.txt中
  7. -- 3、调用errDump.appendErr或者sys.restart接口保存的错误日志
  8. -- 此类错误日志保存在/lib_err.txt中
  9. -- 4、调用errDump.setNetworkLog接口打开网络异常日志功能后,会自动保存最近几种网络异常日志
  10. -- 错误日志保存在/lib_network_err.txt中
  11. -- 5、底层固件的死机信息
  12. --
  13. -- 其中2和3保存的错误日志,最多支持5K字节
  14. -- 每次上报错误日志给调试服务器之后,会清空已保存的日志
  15. -- @module errDump
  16. -- @author openLuat
  17. -- @license MIT
  18. -- @copyright openLuat
  19. -- @release 2017.09.26
  20. require"socket"
  21. require"misc"
  22. module(..., package.seeall)
  23. --错误信息文件以及错误信息内容
  24. local LIB_ERR_FILE,libErr,LIB_ERR_MAX_LEN = "/lib_err.txt","",5*1024
  25. local LUA_ERR_FILE,luaErr = "/luaerrinfo.txt",""
  26. local sReporting,sProtocol,switch
  27. local LIB_NETWORK_ERR_FILE,sNetworkLog,stNetworkLog,sNetworkLogFlag = "/lib_network_err.txt","",{}
  28. local firmwareAssertErr = ""
  29. -- 初始化LIB_ERR_FILE文件中的错误信息(读取到内存中,并且打印出来)
  30. -- @return nil
  31. -- @usage readTxt.initErr()
  32. local function initErr()
  33. libErr = io.readFile(LIB_ERR_FILE) or ""
  34. if libErr~="" then
  35. log.error("errDump.libErr", libErr)
  36. end
  37. luaErr = io.readFile(LUA_ERR_FILE) or ""
  38. if luaErr~="" then
  39. log.error("errDump.luaErr", luaErr)
  40. end
  41. sNetworkLog = io.readFile(LIB_NETWORK_ERR_FILE) or ""
  42. if sNetworkLog~="" then
  43. log.error("errDump.libNetErr", sNetworkLog)
  44. end
  45. if type(rtos.get_fatal_info)=="function" then
  46. firmwareAssertErr = rtos.get_fatal_info() or ""
  47. if firmwareAssertErr~="" then
  48. log.error("errDump.firmwareAssertErr", firmwareAssertErr)
  49. end
  50. end
  51. end
  52. --- 追加错误信息到LIB_ERR_FILE文件中(文件最多允许存储5K字节的数据)
  53. -- @string s 用户自定义的错误信息,errDump功能模块会对此错误信息做如下处理:
  54. -- 1、重启后会通过Luat下载调试工具输出,在trace中搜索errDump.libErr,可以搜索到错误信息
  55. -- 2、如果用户调用errDump.request接口设置了错误信息要上报的调试服务器地址和端口,则每次重启会自动上报错误信息到调试服务器
  56. -- 3、如果用户调用errDump.request接口设置了定时上报,则定时上报时会上报错误信息到调试服务器
  57. -- 其中第2和第3种情况,上报成功后,会自动清除错误信息
  58. -- @return bool result,true表示成功,false或者nil表示失败
  59. -- @usage errDump.appendErr("net working timeout!")
  60. function appendErr(s)
  61. if s then
  62. s=s.."\r\n"
  63. log.error("errDump.appendErr",s)
  64. if (s:len()+libErr:len())<=LIB_ERR_MAX_LEN then
  65. libErr = libErr..s
  66. return io.writeFile(LIB_ERR_FILE, libErr)
  67. end
  68. end
  69. end
  70. local function reportData()
  71. local s = _G.PROJECT.."_"..rtos.get_version()..",".._G.VERSION..","..misc.getImei()..","..misc.getSn()..","
  72. s = s.."\r\npoweron reason:"..rtos.poweron_reason().."\r\n"..luaErr..(luaErr:len()>0 and "\r\n" or "")..libErr..(libErr:len()>0 and "\r\n" or "")..sNetworkLog
  73. s = s..(firmwareAssertErr:len()>0 and "\r\n" or "")..firmwareAssertErr
  74. return s
  75. end
  76. local function httpPostCbFnc(result,statusCode)
  77. log.info("errDump.httpPostCbFnc",result,statusCode)
  78. sys.publish("ERRDUMP_HTTP_POST",result,statusCode)
  79. end
  80. local function checkSwitch(addr)
  81. local first = true
  82. while true do
  83. if not socket.isReady() then sys.waitUntil("IP_READY_IND") end
  84. --log.info("errDump.clientTask","err",luaErr~="" or libErr~="")
  85. local host,port = addr:match("://(.+):(%d+)$")
  86. if not host then log.error("errDump.request invalid host port") return end
  87. local result, data, time
  88. while true do
  89. local sck = socket.udp()
  90. data = string.char(0, 0) .. misc.getImei()
  91. if sck:connect(host, port) then
  92. if sck:send(data) then
  93. result, data = sck:recv(5000)
  94. if result then
  95. data, result = json.decode(data)
  96. if result then
  97. if data.r == 1 then
  98. switch = true
  99. time = tonumber(data.expire_at)
  100. end
  101. end
  102. end
  103. else
  104. switch = false
  105. end
  106. else
  107. switch = false
  108. end
  109. sck:close()
  110. if time then
  111. local clk = time - os.time()
  112. if clk < 7200 then
  113. sys.timerStart(function() switch = false end, clk * 1000)
  114. end
  115. end
  116. break
  117. end
  118. if first then
  119. sys.publish("GET_SWITCH")
  120. first = nil
  121. end
  122. sys.wait(7200000)
  123. end
  124. end
  125. function clientTask(protocol,addr,period,flag)
  126. sReporting = true
  127. if flag then
  128. sys.taskInit(checkSwitch, addr)
  129. sys.waitUntil("GET_SWITCH")
  130. end
  131. while true do
  132. if not socket.isReady() then sys.waitUntil("IP_READY_IND") end
  133. --log.info("errDump.clientTask","err",luaErr~="" or libErr~="")
  134. if luaErr~="" or libErr~="" or sNetworkLog~="" or firmwareAssertErr~="" then
  135. local retryCnt,result,data = 0
  136. while true do
  137. if protocol=="http" or protocol=="https" then
  138. http.request("POST",addr,nil,nil,reportData(),20000,httpPostCbFnc)
  139. _,result = sys.waitUntil("ERRDUMP_HTTP_POST")
  140. else
  141. if flag and not switch then
  142. break
  143. end
  144. local host,port = addr:match("://(.+):(%d+)$")
  145. if not host then log.error("errDump.request invalid host port") return end
  146. local sck = protocol=="udp" and socket.udp() or socket.tcp()
  147. if sck:connect(host,port) then
  148. result = sck:send(reportData())
  149. if result and protocol=="udp" then
  150. result,data = sck:recv(20000)
  151. if result then
  152. if not flag then
  153. result = data=="OK"
  154. else
  155. data, result = json.decode(data)
  156. if result then
  157. result = data.r == 1 and true or false
  158. end
  159. end
  160. end
  161. end
  162. end
  163. sck:close()
  164. end
  165. if result then
  166. libErr = ""
  167. os.remove(LIB_ERR_FILE)
  168. luaErr = ""
  169. os.remove(LUA_ERR_FILE)
  170. sNetworkLog = ""
  171. stNetworkLog = {}
  172. os.remove(LIB_NETWORK_ERR_FILE)
  173. firmwareAssertErr = ""
  174. if type(rtos.remove_fatal_info)=="function" then rtos.remove_fatal_info() end
  175. break
  176. else
  177. if flag then
  178. break
  179. end
  180. retryCnt = retryCnt+1
  181. if retryCnt==3 then
  182. break
  183. end
  184. sys.wait(5000)
  185. end
  186. end
  187. end
  188. if period then
  189. --log.info("errDump.clientTask","wait",period)
  190. sys.wait(period)
  191. else
  192. break
  193. end
  194. end
  195. sReporting = false
  196. end
  197. function updateNetworkLog()
  198. if sNetworkLogFlag then
  199. sNetworkLog = ""
  200. for k,v in pairs(stNetworkLog) do
  201. if v and v~="" then
  202. sNetworkLog = sNetworkLog.."\r\n"..k.."@"..v
  203. end
  204. end
  205. if sNetworkLog~="" then
  206. io.writeFile(LIB_NETWORK_ERR_FILE,sNetworkLog)
  207. end
  208. end
  209. end
  210. local onceGsmRegistered,onceGprsAttached
  211. --- 配置网络错误日志开关
  212. -- @bool[opt=nil] flag 是否打开网络错误日志开关,true为打开,false或者nil为关闭
  213. -- @usage
  214. -- errDump.setNetworkLog(true)
  215. function setNetworkLog(flag)
  216. sNetworkLogFlag = flag
  217. local procer = flag and sys.subscribe or sys.unsubscribe
  218. if not flag then
  219. sNetworkLog,stNetworkLog = "",{}
  220. end
  221. local function getTimeStr()
  222. local clk = os.date("*t")
  223. return string.format("%02d_%02d:%02d:%02d",clk.day,clk.hour,clk.min,clk.sec)
  224. end
  225. procer("FLYMODE",function(value)
  226. if value then
  227. stNetworkLog["FLYMODE"] = getTimeStr()
  228. updateNetworkLog()
  229. end
  230. end)
  231. procer("SIM_IND",function(value)
  232. if value~="RDY" then
  233. stNetworkLog["SIM_IND"] = getTimeStr()..":"..value
  234. updateNetworkLog()
  235. end
  236. end)
  237. procer("NET_STATE_UNREGISTER",function()
  238. if onceGsmRegistered then
  239. stNetworkLog["NET_STATE_UNREGISTER"] = getTimeStr()
  240. updateNetworkLog()
  241. end
  242. end)
  243. procer("NET_STATE_REGISTERED",function() onceGsmRegistered=true end)
  244. procer("GPRS_ATTACH",function(value)
  245. if value then
  246. onceGprsAttached = true
  247. elseif onceGprsAttached then
  248. stNetworkLog["GPRS_ATTACH"] = getTimeStr()..":0"
  249. updateNetworkLog()
  250. end
  251. end)
  252. procer("LIB_SOCKET_CONNECT_FAIL_IND",function(ssl,prot,addr,port)
  253. stNetworkLog[(ssl and "ssl" or prot).."://"..addr..":"..port] = getTimeStr()..":connect fail"
  254. updateNetworkLog()
  255. end)
  256. procer("LIB_SOCKET_SEND_FAIL_IND",function(ssl,prot,addr,port)
  257. stNetworkLog[(ssl and "ssl" or prot).."://"..addr..":"..port] = getTimeStr()..":send fail"
  258. updateNetworkLog()
  259. end)
  260. procer("LIB_SOCKET_CLOSE_IND",function(ssl,prot,addr,port)
  261. stNetworkLog[(ssl and "ssl" or prot).."://"..addr..":"..port.." closed"] = getTimeStr()
  262. updateNetworkLog()
  263. end)
  264. procer("PDP_DEACT_IND",function()
  265. stNetworkLog["PDP_DEACT_IND"] = getTimeStr()
  266. updateNetworkLog()
  267. end)
  268. procer("IP_SHUT_IND",function()
  269. stNetworkLog["IP_SHUT_IND"] = getTimeStr()
  270. updateNetworkLog()
  271. end)
  272. end
  273. --- 配置调试服务器地址,启动错误信息上报给调试服务器的功能,上报成功后,会清除错误信息
  274. -- @string addr 调试服务器地址信息,支持http,udp,tcp
  275. -- 1、如果调试服务器使用http协议,终端将采用POST命令,把错误信息上报到addr指定的URL中,addr的格式如下
  276. -- (除protocol和hostname外,其余字段可选;目前的实现不支持hash)
  277. -- |------------------------------------------------------------------------------|
  278. -- | protocol ||| auth | host | path | hash |
  279. -- |----------|||-----------|-----------------|---------------------------|-------|
  280. -- | ||| | hostname | port | pathname | search | |
  281. -- | ||| |----------|------|----------|----------------| |
  282. -- " http(s) :// user:pass @ host.com : 8080 /p/a/t/h ? query=string # hash "
  283. -- | ||| | | | | | |
  284. -- |------------------------------------------------------------------------------|
  285. -- 2、如果调试服务器使用udp协议,终端将错误信息,直接上报给调试服务器,调试服务器收到信息后,要回复大写的OK;addr格式如下:
  286. -- |----------|||----------|------|
  287. -- | protocol ||| hostname | port |
  288. -- | |||----------|------|
  289. -- " udp :// host.com : 8081 |
  290. -- | ||| | |
  291. -- |------------------------------|
  292. -- 3、如果调试服务器使用tcp协议,终端将错误信息,直接上报给调试服务器;addr格式如下:
  293. -- |----------|||----------|------|
  294. -- | protocol ||| hostname | port |
  295. -- | |||----------|------|
  296. -- " tcp :// host.com : 8082 |
  297. -- | ||| | |
  298. -- |------------------------------|
  299. -- @number[opt=600000] period 单位毫秒,定时检查错误信息并上报的间隔
  300. -- @bool flag 当使用合宙调试服务器时,此参数填为true;使用自定义服务器时,此参数可省略
  301. -- @return bool result,成功返回true,失败返回nil
  302. -- @usage
  303. -- errDump.request("http://www.user_server.com/errdump")
  304. -- errDump.request("udp://www.user_server.com:8081")
  305. -- errDump.request("tcp://www.user_server.com:8082")
  306. -- errDump.request("tcp://www.user_server.com:8082",6*3600*1000)
  307. -- errDump.request("udp://www.hezhou_server.com:8083",6*3600*1000,true)
  308. function request(addr,period,flag)
  309. local protocol = addr:match("(%a+)://")
  310. if protocol~="http" and protocol~="https" and protocol~="udp" and protocol~="tcp" then
  311. log.error("errDump.request invalid protocol",protocol)
  312. return
  313. end
  314. if flag and protocol ~= "udp" then
  315. log.error("errDump.request invalid protocol",protocol)
  316. return
  317. end
  318. if not sReporting then
  319. sys.taskInit(clientTask,protocol,addr,period or 600000, flag)
  320. end
  321. return true
  322. end
  323. initErr()