1101_get_activities_bysolscan_playwright.py 17 KB


  1. from playwright.async_api import async_playwright
  2. # from datetime import datetime, timezone
  3. from playwright.sync_api import expect
  4. import random
  5. from base_class import BaseVariableFunction
  6. from base_class import *
  7. baseclass = BaseVariableFunction(__file__)
  8. baseclass.makedirpath(baseclass.dalao_activities_solscan_path)
  9. old_print = print
  10. def timestamped_print(*args, **kwargs):
  11. old_print(datetime.datetime.utcnow().replace(
  12. microsecond=0), *args, **kwargs)
  13. print = timestamped_print
  14. print('\n'*5)
  15. print(f"{'{:<6}'.format('ENTER')} {baseclass.scriptfilename} ----------------NOTE-----------NOTE---------------")
  16. # Python中windows路径的3种写法
  17. # 可以是以下这样写:
  18. async def get_tbody_data(page, str_dalaoAddress):
  19. arr_tokenname_amount = None
  20. arr_tokenlink = None
  21. txhash = None
  22. try:
  23. print(f"enter {str_dalaoAddress} get_tbody_data")
  24. tbody_ = page.locator(
  25. 'table.w-full tbody')
  26. await asyncio.sleep(3)
  27. arr_trs = await tbody_.locator('tr').all()
  28. arr_trs_datares = []
  29. for idx_tr_ in range(0, len(arr_trs)):
  30. arr_tds_datares = [None]*13
  31. arr_trs_datares.append(arr_tds_datares)
  32. tr_ = arr_trs[idx_tr_]
  33. arr_tds = await tr_.locator('td').all()
  34. for idx_td_ in range(0, len(arr_tds)):
  35. td_ = arr_tds[idx_td_]
  36. if (idx_td_ == 0):
  37. continue
  38. elif (idx_td_ == 1):
  39. txhash = (await td_.text_content()).strip()
  40. arr_tds_datares[0] = txhash
  41. elif (idx_td_ == 2):
  42. dateString = (await td_.text_content()).strip()
  43. [dateStr, timeStr] = dateString.split(' ')
  44. [month, day, year] = dateStr.split('-')
  45. [hours, minutes, seconds] = timeStr.split(':')
  46. utc_date = datetime.datetime(
  47. int(year), int(month), int(day), int(hours), int(minutes), int(seconds))
  48. timestamp_utc = int(utc_date.replace(
  49. tzinfo=datetime.timezone.utc).timestamp())
  50. arr_tds_datares[1] = utc_date.strftime("%Y-%m-%d %H:%M:%S")
  51. arr_tds_datares[2] = str(timestamp_utc)
  52. elif (idx_td_ == 3):
  53. action_type = (await td_.text_content()).strip()
  54. if ('SWAP' in action_type):
  55. action_type = "SWAP"
  56. else:
  57. continue
  58. arr_tds_datares[3] = action_type
  59. elif (idx_td_ == 4):
  60. from_address = (await td_.text_content()).strip()
  61. arr_tds_datares[4] = from_address
  62. elif (idx_td_ == 5):
  63. arr_tokenname_amount = (await td_.inner_text()).strip()
  64. arr_tokenname_amount = arr_tokenname_amount.replace(
  65. ',', '').split('\n')
  66. arr_tokenlink = await td_.locator("a").all()
  67. out_tokenaddress = await arr_tokenlink[0].get_attribute('href')
  68. out_tokenaddress = out_tokenaddress.strip().split(
  69. '/')[-1]
  70. out_tokenname = arr_tokenname_amount[1]
  71. out_tokenamount = arr_tokenname_amount[0]
  72. in_tokenaddress = await arr_tokenlink[1].get_attribute(
  73. 'href')
  74. in_tokenaddress = in_tokenaddress.strip().split(
  75. '/')[-1]
  76. in_tokenname = arr_tokenname_amount[3]
  77. in_tokenamount = arr_tokenname_amount[2]
  78. arr_tds_datares[5] = in_tokenaddress
  79. arr_tds_datares[6] = out_tokenaddress
  80. arr_tds_datares[7] = in_tokenname
  81. arr_tds_datares[8] = out_tokenname
  82. arr_tds_datares[9] = in_tokenamount
  83. arr_tds_datares[10] = out_tokenamount
  84. elif (idx_td_ == 6):
  85. router_link = (await td_.locator("a").all())[0]
  86. router_link = await router_link.get_attribute(
  87. 'href')
  88. router_link = router_link.strip().split('/')[-1]
  89. arr_tds_datares[11] = router_link
  90. elif (idx_td_ == 7):
  91. arr_pool_img = await td_.locator("img").all()
  92. for idx_pool_img in range(0, len(arr_pool_img)):
  93. arr_pool_img[idx_pool_img] = await arr_pool_img[idx_pool_img].get_attribute(
  94. 'src')
  95. arr_pool_img[idx_pool_img] = arr_pool_img[idx_pool_img].strip().split(
  96. '=')[-1]
  97. if "68747470733a2f2f737461746963732e736f6c7363616e2e696f2f65782d696d672f3637356b5058394d48546a53327a7431716672314e5948757a654c5866514d394832347746535574314d70382e706e67" in arr_pool_img:
  98. arr_pool_img = '1'
  99. else:
  100. arr_pool_img = '0'
  101. arr_tds_datares[12] = arr_pool_img
  102. print(f"end {str_dalaoAddress} get_tbody_data")
  103. return arr_trs_datares
  104. except Exception as e:
  105. print(str_dalaoAddress, txhash, "arr_tokenname_amount",
  106. arr_tokenname_amount, "arr_tokenlink=", arr_tokenlink)
  107. raise
  108. activities_columns = ['txhash', 'date', 'timestamp', 'action', 'from', 'in_tokenaddress',
  109. 'out_tokenaddress',
  110. 'in_tokenname',
  111. 'out_tokenname',
  112. 'in_tokenamount',
  113. 'out_tokenamount',
  114. 'router',
  115. 'israydium',
  116. ]
  117. df = pd.read_csv(baseclass.dalao_merge_path /
  118. "filter_dalao.csv", dtype=object)
  119. arr_str_dalaoAddress = df["dalaoAddress"].tolist()
  120. temp_arr_str_dalaoAddress = copy.deepcopy(arr_str_dalaoAddress)
  121. for str_dalaoAddress in temp_arr_str_dalaoAddress:
  122. if (baseclass.dalao_activities_solscan_path /
  123. f"solscan_act_{str_dalaoAddress}.csv").exists():
  124. arr_str_dalaoAddress.remove(str_dalaoAddress)
  125. # arr_str_dalaoAddress = arr_str_dalaoAddress[0:6]
  126. arr_str_dalaoSwapUrl = [
  127. f"https://solscan.io/account/{str_dalaoAddress}#defiactivities" for str_dalaoAddress in arr_str_dalaoAddress]
  128. print('arr_str_dalaoAddress', len(arr_str_dalaoAddress))
  129. driver_len = 6 if len(arr_str_dalaoAddress) >= 6 else len(
  130. arr_str_dalaoAddress)
  131. arr_driver = [None]*driver_len
  132. arr_wait = [None]*driver_len
  133. arr_home_handle = [None]*driver_len
  134. arr_split_str_dalaoAddress = [None]*driver_len
  135. arr_split_str_dalaoSwapUrl = [None]*driver_len
  136. len_split_str_dalaoAddress = int(len(arr_str_dalaoAddress)/driver_len)+1
  137. for idx_driver in range(0, driver_len):
  138. arr_split_str_dalaoAddress[idx_driver] = arr_str_dalaoAddress[idx_driver *
  139. len_split_str_dalaoAddress: (idx_driver+1)*len_split_str_dalaoAddress]
  140. arr_split_str_dalaoSwapUrl[idx_driver] = arr_str_dalaoSwapUrl[idx_driver *
  141. len_split_str_dalaoAddress: (idx_driver+1)*len_split_str_dalaoAddress]
  142. number_onepage = 10
  143. set_str_dalaoAddress = set(arr_str_dalaoAddress)
  144. # split_len = 3
  145. # def switch_to_page(context, title=None, url=None):
  146. # """切换到指定title 名称 或 url 的 标签页"""
  147. # for item_page in context.pages:
  148. # if title:
  149. # if title in item_page.title():
  150. # # 激活当前选项卡
  151. # item_page.bring_to_front()
  152. # return item_page
  153. # elif url:
  154. # if url in item_page.url:
  155. # # 激活当前选项卡
  156. # item_page.bring_to_front()
  157. # return item_page
  158. # else:
  159. # print("not found title or url")
  160. # return context.pages[0]
  161. async def do_some_thing(playwright, cur_arr_str_dalaoAddress, cur_arr_str_dalaoSwapUrl):
  162. browser = await playwright.chromium.launch(headless=True)
  163. context = await browser.new_context()
  164. arr_page = [None]*2
  165. arr_page[0] = await context.new_page()
  166. await arr_page[0].goto(
  167. "https://solscan.io/")
  168. # do ....
  169. print(context.pages)
  170. # global split_len
  171. global set_str_dalaoAddress
  172. # cur_set_str_dalaoAddress = set(arr_str_dalaoAddress)
  173. arr_str_dalaoAddress = cur_arr_str_dalaoAddress
  174. arr_str_dalaoSwapUrl = cur_arr_str_dalaoSwapUrl
  175. for str_dalaoAddress_idx in range(0, len(cur_arr_str_dalaoAddress)):
  176. str_dalaoAddress = arr_str_dalaoAddress[str_dalaoAddress_idx]
  177. str_dalaoTransferUrl = arr_str_dalaoSwapUrl[str_dalaoAddress_idx]
  178. print(f"enter str_dalaoAddress={str_dalaoAddress}")
  179. arr_page[1] = await context.new_page()
  180. await arr_page[1].goto(str_dalaoTransferUrl)
  181. page = None
  182. for item_page in context.pages:
  183. if item_page.url == str_dalaoTransferUrl:
  184. await item_page.bring_to_front()
  185. page = item_page
  186. break
  187. res_df = pd.DataFrame(columns=activities_columns)
  188. # if (baseclass.dalao_activities_solscan_path /
  189. # f"solscan_act_{str_dalaoAddress}.csv").exists():
  190. # res_df = pd.read_csv(baseclass.dalao_activities_solscan_path /
  191. # f"solscan_act_{str_dalaoAddress}.csv", dtype=object)
  192. # continue
  193. # text_pagee = await page.content()
  194. # text_pagee = await page.html()
  195. # print("text_pagee=", text_pagee)
  196. th_ = page.locator(
  197. 'table.w-full thead th')
  198. # await expect(th_).toBeVisible()
  199. text_th_ = (await th_.text_content()).strip()
  200. print("text_th_=", text_th_)
  201. div_time_click = th_.locator(
  202. 'div.items-center.cursor-pointer')
  203. text_time_click = (await div_time_click.text_content()).strip()
  204. print(
  205. f"str_dalaoAddress={str_dalaoAddress} text_time_click={text_time_click}")
  206. if not "UTC" in text_time_click:
  207. await div_time_click.click()
  208. await page.wait_for_load_state('load')
  209. prev_page_height = await page.evaluate(
  210. "document.documentElement.scrollHeight")
  211. await asyncio.sleep(0.4)
  212. retytimes = 3
  213. while retytimes > 0:
  214. retytimes -= 1
  215. await page.evaluate(
  216. "window.scrollTo(0, document.body.scrollHeight);")
  217. await asyncio.sleep(0.4)
  218. cur_page_height = await page.evaluate(
  219. "document.documentElement.scrollHeight")
  220. if cur_page_height > prev_page_height:
  221. prev_page_height = cur_page_height
  222. elif cur_page_height == prev_page_height:
  223. break
  224. [button_num_onepage, number_onepage, arr_text_page_info,
  225. button_aft_onepage] = await get_pageselect_element(page=page, str_dalaoAddress=str_dalaoAddress)
  226. print(f"str_dalaoAddress={str_dalaoAddress} scrollTo 0")
  227. while (number_onepage != 40):
  228. await button_num_onepage.click(force=True)
  229. div_data_radix_select_dep1 = page.locator(
  230. 'div[data-radix-select-viewport][role="presentation"]')
  231. arr_div_data_radix_select = div_data_radix_select_dep1.locator(
  232. 'div[role="option"]')
  233. div_data_radix_select = (await arr_div_data_radix_select.all())[-1]
  234. await div_data_radix_select.click()
  235. await asyncio.sleep(0.8)
  236. [button_num_onepage, number_onepage, arr_text_page_info,
  237. button_aft_onepage] = await get_pageselect_element(page=page, str_dalaoAddress=str_dalaoAddress)
  238. retytimes = 3
  239. while retytimes > 0:
  240. retytimes -= 1
  241. await page.evaluate(
  242. "window.scrollTo(0, document.body.scrollHeight);")
  243. await asyncio.sleep(0.4)
  244. cur_page_height = await page.evaluate(
  245. "document.documentElement.scrollHeight")
  246. if cur_page_height > prev_page_height:
  247. prev_page_height = cur_page_height
  248. elif cur_page_height == prev_page_height:
  249. break
  250. break
  251. [button_num_onepage, number_onepage, arr_text_page_info,
  252. button_aft_onepage] = await get_pageselect_element(page=page, str_dalaoAddress=str_dalaoAddress)
  253. number_total_page = arr_text_page_info[1] if arr_text_page_info[1] <= 3 else 3
  254. for number_cur_page in range(1, number_total_page+1):
  255. print(
  256. f"str_dalaoAddress={str_dalaoAddress} number_cur_page={number_cur_page} number_total_page={number_total_page}")
  257. retytimes = 3
  258. while retytimes > 0:
  259. retytimes -= 1
  260. await page.evaluate(
  261. "window.scrollTo(0, document.body.scrollHeight);")
  262. await asyncio.sleep(0.4)
  263. cur_page_height = await page.evaluate(
  264. "document.documentElement.scrollHeight")
  265. if cur_page_height > prev_page_height:
  266. prev_page_height = cur_page_height
  267. elif cur_page_height == prev_page_height:
  268. break
  269. [button_num_onepage, number_onepage, arr_text_page_info,
  270. button_aft_onepage] = await get_pageselect_element(page=page, str_dalaoAddress=str_dalaoAddress)
  271. while True:
  272. try:
  273. tbody_data = await get_tbody_data(page=page, str_dalaoAddress=str_dalaoAddress)
  274. except Exception as e:
  275. print(traceback.format_exc())
  276. await asyncio.sleep(2)
  277. continue
  278. if tbody_data is None:
  279. await asyncio.sleep(0.8)
  280. continue
  281. elif (number_cur_page < number_total_page and len(tbody_data) != 40):
  282. print("tbody_data_len=", len(tbody_data))
  283. await asyncio.sleep(0.8)
  284. continue
  285. else:
  286. break
  287. page_df = pd.DataFrame(
  288. tbody_data, columns=activities_columns)
  289. if page_df['txhash'].tolist()[0] in res_df['txhash']:
  290. # 已经存在了 不用在遍历了直接跳过
  291. break
  292. res_df = pd.concat([res_df, page_df],
  293. ignore_index=True).reset_index(drop=True)
  294. if (number_cur_page == number_total_page):
  295. break
  296. await button_aft_onepage.click()
  297. await asyncio.sleep(1)
  298. # df.to_csv(baseclass.dalao_activities_solscan_path)
  299. res_df = res_df.drop_duplicates()
  300. res_df.to_csv(baseclass.dalao_activities_solscan_path /
  301. f"solscan_act_{str_dalaoAddress}.csv", index=False)
  302. set_str_dalaoAddress.discard(str_dalaoAddress)
  303. await arr_page[1].close()
  304. # time.sleep(10)
  305. await asyncio.sleep(5)
  306. await context.close()
  307. await browser.close()
  308. async def get_pageselect_element(page, str_dalaoAddress):
  309. while True:
  310. try:
  311. div_select_page = page.locator(
  312. "div.items-center.justify-end.flex-row")
  313. button_num_onepage = div_select_page.locator(
  314. 'button[type="button"].border-input')
  315. number_onepage = int((await button_num_onepage.text_content()).strip())
  316. text_page_info = (await div_select_page.text_content()).strip()
  317. # text_page_info = Show10per pagePage 1 of 2
  318. print(f'{ str_dalaoAddress} text_page_info=', text_page_info)
  319. if not "of" in text_page_info:
  320. # print("arr_text_page_info no of")
  321. raise Exception("arr_text_page_info no of")
  322. arr_text_page_info = text_page_info.split(' ')
  323. arr_text_page_info = [int(arr_text_page_info[-3]),
  324. int(arr_text_page_info[-1])]
  325. break
  326. except Exception as e:
  327. print(str(e))
  328. await asyncio.sleep(1)
  329. continue
  330. # raise
  331. # arr_button_left_right = div_select_page.locator(
  332. # 'button.inline-flex.items-center.justify-center.whitespace-nowrap'
  333. # )
  334. # button_pre_onepage = arr_button_left_right.all()[1]
  335. # button_aft_onepage = (await arr_button_left_right.all())[2]
  336. button_aft_onepage = page.get_by_role("button", name="right")
  337. return [button_num_onepage, number_onepage, arr_text_page_info, button_aft_onepage]
  338. async def get_onedriver_swapactivities(cur_arr_str_dalaoAddress, cur_arr_str_dalaoSwapUrl):
  339. async with async_playwright() as playwright:
  340. await do_some_thing(playwright, cur_arr_str_dalaoAddress, cur_arr_str_dalaoSwapUrl)
  341. async def main():
  342. print("enter main()")
  343. tasks = [get_onedriver_swapactivities(
  344. arr_split_str_dalaoAddress[idx_driver], arr_split_str_dalaoSwapUrl[idx_driver]) for idx_driver in range(0, driver_len)]
  345. await asyncio.gather(*tasks)
  346. asyncio.run(main())
  347. print(f"{'{:<6}'.format('END')} {baseclass.scriptfilename} ----------------NOTE-----------NOTE---------------")
  348. # main()
  349. # asyncio.get_event_loop().run_until_complete(main())