import asyncio, json, os, re, time, random, pathlib, sys
sys.path.insert(0,'/opt/data/scripts')
import requests, websockets
from cbm_page import BASE, PROFILE, HEADERS, ensure_running
ROOT=f'wss://cbm.huper.technology/api/profiles/{PROFILE}/cdp'
LOG_FILE='/opt/data/agent-state/memory/ig-encourage-agent-log.json'
WARN_RE=re.compile(r'try again later|captcha|suspicious|checkpoint|temporarily blocked|restrict certain activity|automated behavior|confirm it was you|unusual activity|we limit how often|action blocked|couldn.t post|could not post|challenge required|login_required', re.I)
PROMO_RE=re.compile(r'link in bio|comment\s+[A-Z0-9_"“”]+\s+to|get the link|download|\bapp\b|sale|shop|order|register|ticket|tour|out now|stream|podcast|subscribe|donate|giveaway|pre-?order|available now|limited time|promo|conference', re.I)
SENSITIVE_RE=re.compile(r'death|died|funeral|suicide|self-harm|grief|grieving|condolence|abuse|trauma|murder|war|politic|election|lgbt|pride month|transgender|abortion', re.I)
CHRISTIAN_RE=re.compile(r'\b(God|Jesus|Christ|Lord|Father|faith|Bible|Scripture|pray|prayer|worship|church|gospel|Christian|Spirit|grace|mercy|hope|trust|amen|hallelujah|devotional|Psalm|Proverbs|Romans|Corinthians|Ephesians|Hebrews|Isaiah|Matthew|John)\b', re.I)
SAFE_ACCOUNTS=['thebibleproject','wellwateredwomen','incourage','daily_bibleverses','biblegateway','crosswaybooks','christianstandardbible','ourdailybread']

def pid(url):
 m=re.search(r'/(?:p|reel)/([A-Za-z0-9_-]+)',url); return m.group(1) if m else None

def load_logged():
 try: return {c.get('postId') for c in json.load(open(LOG_FILE)).get('comments',[]) if c.get('verified')}
 except Exception: return set()
class Conn:
 def __init__(self,ws): self.ws=ws; self.i=0; self.pending={}
 async def pump(self):
  async for raw in self.ws:
   msg=json.loads(raw)
   if 'id' in msg and msg['id'] in self.pending:
    fut=self.pending.pop(msg['id'])
    if 'error' in msg: fut.set_exception(RuntimeError(json.dumps(msg['error'])))
    else: fut.set_result(msg.get('result'))
 async def send(self,m,p=None,timeout=30):
  self.i+=1; fut=asyncio.get_event_loop().create_future(); self.pending[self.i]=fut
  await self.ws.send(json.dumps({'id':self.i,'method':m,'params':p or {}}))
  return await asyncio.wait_for(fut, timeout)
 async def eval(self,e,timeout=30):
  res=await self.send('Runtime.evaluate',{'expression':e,'returnByValue':True},timeout)
  if 'exceptionDetails' in res: raise RuntimeError(str(res['exceptionDetails'])[:300])
  return res.get('result',{}).get('value')
 async def goto(self,u,w=5):
  await self.send('Page.navigate',{'url':u},20); await asyncio.sleep(w)
async def make_page():
 ensure_running(); time.sleep(1)
 arr=requests.get(f'{BASE}/api/profiles/{PROFILE}/cdp/json/list',headers=HEADERS,timeout=30).json()
 ig=[x for x in arr if x.get('type')=='page' and 'instagram.com' in x.get('url','')]
 if ig: wsurl=ig[0]['webSocketDebuggerUrl']
 else:
  rws=await websockets.connect(ROOT,additional_headers=HEADERS,ping_interval=None,open_timeout=30,max_size=None)
  root=Conn(rws); pump=asyncio.create_task(root.pump())
  created=await root.send('Target.createTarget',{'url':'https://www.instagram.com/'},60)
  await asyncio.sleep(4)
  arr=requests.get(f'{BASE}/api/profiles/{PROFILE}/cdp/json/list',headers=HEADERS,timeout=30).json()
  pg=[x for x in arr if x.get('id')==created.get('targetId')][0]
  wsurl=pg['webSocketDebuggerUrl']; pump.cancel(); await rws.close()
 ws=await websockets.connect(wsurl,additional_headers=HEADERS,ping_interval=None,open_timeout=30,max_size=None)
 p=Conn(ws); pump=asyncio.create_task(p.pump()); await p.send('Runtime.enable'); await p.send('Page.enable'); return p,pump
async def main():
 p,pump=await make_page()
 if 'instagram.com' not in await p.eval('location.href'):
  await p.goto('https://www.instagram.com/',6)
 state=await p.eval("(() => ({url:location.href, text:document.body.innerText.slice(0,2500), hasLogin:!!document.querySelector('input[name=\"username\"],input[name=\"password\"]')}))()")
 if state['hasLogin'] or WARN_RE.search(state['text']):
  print(json.dumps({'ok':False,'stop':'login_or_warning','state':state},indent=2)); return
 logged=load_logged(); all_links=[]; skipped=[]; candidates=[]; seen=set()
 # home feed quick
 await p.goto('https://www.instagram.com/',5)
 for _ in range(3):
  links=await p.eval("(() => [...new Set([...document.querySelectorAll('article a[href*=\"/p/\"],article a[href*=\"/reel/\"]')].map(a=>a.href.split('?')[0]))].map(u=>({url:u,source:'home'})))()") or []
  all_links += links; await p.eval('window.scrollBy(0,900)'); await asyncio.sleep(2)
 for acct in SAFE_ACCOUNTS:
  await p.goto(f'https://www.instagram.com/{acct}/',5)
  for _ in range(2): await p.eval('window.scrollBy(0,900)'); await asyncio.sleep(1.2)
  links=await p.eval(f"(() => [...new Set([...document.querySelectorAll('a[href*=\"/p/\"],a[href*=\"/reel/\"]')].map(a=>a.href.split('?')[0]))].slice(0,10).map(u=>({{url:u,source:'profile:{acct}'}})))()") or []
  all_links += links
 uniq=[]
 for x in all_links:
  id=pid(x['url'])
  if id and id not in seen: seen.add(id); uniq.append({**x,'postId':id})
 print('UNIQUE_LINKS',len(uniq), flush=True)
 for x in uniq[:60]:
  if len(candidates)>=25: break
  id=x['postId']
  if id in logged: skipped.append({**x,'reason':'already_logged'}); continue
  await p.goto(x['url'],4.5)
  body=await p.eval('document.body.innerText') or ''
  if WARN_RE.search(body): print(json.dumps({'ok':False,'stop':'warning_on_post','url':x['url'],'candidates':candidates,'skipped_count':len(skipped)},indent=2)); return
  if re.search(r'vincent\.emmanuel\.lee|Vincent Emmanuel Lee',body): skipped.append({**x,'reason':'already_visible'}); continue
  info=await p.eval("""(() => {const article=document.querySelector('article'); const text=(article?article.innerText:document.body.innerText).slice(0,2800); const imgs=[...document.querySelectorAll('article img')].slice(0,6).map(i=>i.alt).filter(Boolean).join(' | ').slice(0,1000); const meta=[...document.querySelectorAll('meta[property="og:title"],meta[property="og:description"]')].map(m=>m.content).join(' | ').slice(0,800); return {url:location.href,article:text,alt:imgs,meta};})()""")
  text='\n'.join([info.get('meta',''),info.get('article',''),info.get('alt','')])
  reason=None
  if not CHRISTIAN_RE.search(text): reason='no_clear_christian_cue'
  elif PROMO_RE.search(text): reason='promotional_or_app_event_cue'
  elif SENSITIVE_RE.search(text): reason='sensitive_or_political_cue'
  if reason: skipped.append({**x,'reason':reason}); continue
  has=await p.eval("!!document.querySelector('textarea[aria-label*=\"comment\" i]')")
  candidates.append({**x,'finalUrl':info.get('url'),'context':text[:2000],'hasTextarea':bool(has)})
 print(json.dumps({'ok':True,'candidates':candidates,'skipped_count':len(skipped),'skipped_sample':skipped[:30]},indent=2,ensure_ascii=False), flush=True)
 pump.cancel()
asyncio.run(main())
