import asyncio, json, os, re, time, random, pathlib, sys
sys.path.insert(0,'/opt/data/scripts')
import requests, websockets
from cbm_page import BASE, PROFILE, HEADERS, ensure_running
LOG_FILE='/opt/data/agent-state/memory/ig-encourage-agent-log.json'
WARN_RE=re.compile(r'try again later|captcha|suspicious|checkpoint|temporarily blocked|restrict certain activity|automated behavior|confirm it was you|unusual activity|we limit how often|action blocked|couldn.t post|could not post|challenge required|login_required', re.I)
PROMO_RE=re.compile(r'link in bio|comment\s+\w+\s+to|get the link|download|app|sale|shop|order|register|ticket|tour|out now|stream|podcast|subscribe|donate|giveaway|pre-?order|available now|limited time|promo|join us at|conference', re.I)
SENSITIVE_RE=re.compile(r'death|died|funeral|suicide|self-harm|grief|grieving|condolence|abuse|trauma|murder|war|politic|election|lgbt|pride month|transgender|abortion', re.I)
CHRISTIAN_RE=re.compile(r'\b(God|Jesus|Christ|Lord|Father|faith|Bible|Scripture|pray|prayer|worship|church|gospel|Christian|Spirit|grace|mercy|hope|trust|amen|hallelujah|devotional|Psalm|Proverbs|Romans|Corinthians|Ephesians|Hebrews|Isaiah|Matthew|John)\b', re.I)
SAFE_ACCOUNTS=['proverbs31ministries','youversion','lysaterkeurst','ourdailybread','shereadstruth','bibleproject','desiringgod']

def post_id(url):
    m=re.search(r'/(?:p|reel)/([A-Za-z0-9_-]+)', url); return m.group(1) if m else None

def load_logged():
    try:
        data=json.load(open(LOG_FILE)); return {c.get('postId') for c in data.get('comments',[]) if c.get('verified')}
    except Exception: return set()

class Page:
    def __init__(self, ws): self.ws=ws; self.i=0; self.pending={}
    async def pump(self):
        async for raw in self.ws:
            msg=json.loads(raw)
            if 'id' in msg and msg['id'] in self.pending:
                fut=self.pending.pop(msg['id'])
                if 'error' in msg: fut.set_exception(RuntimeError(json.dumps(msg['error'])))
                else: fut.set_result(msg.get('result'))
    async def send(self,m,p=None):
        self.i+=1; fut=asyncio.get_event_loop().create_future(); self.pending[self.i]=fut
        await self.ws.send(json.dumps({'id':self.i,'method':m,'params':p or {}})); return await fut
    async def eval(self,expr,await_promise=False):
        res=await self.send('Runtime.evaluate', {'expression':expr,'returnByValue':True,'awaitPromise':await_promise})
        if 'exceptionDetails' in res: raise RuntimeError(str(res['exceptionDetails'])[:500])
        return res.get('result',{}).get('value')
    async def goto(self,url,wait=5):
        await self.send('Page.navigate', {'url':url}); await asyncio.sleep(wait)

def get_ws_or_create():
    ensure_running(); time.sleep(1)
    arr=requests.get(f'{BASE}/api/profiles/{PROFILE}/cdp/json/list',headers=HEADERS,timeout=30).json()
    pages=[x for x in arr if x.get('type')=='page']
    ig=[p for p in pages if 'instagram.com' in p.get('url','')]
    if not ig:
        # use root CDP to create instagram page not fb
        import asyncio
    return (ig[0] if ig else pages[0])['webSocketDebuggerUrl']

async def connect():
    wsurl=get_ws_or_create()
    ws=await websockets.connect(wsurl,additional_headers=HEADERS,ping_interval=None,open_timeout=30,max_size=None)
    p=Page(ws); pump=asyncio.create_task(p.pump())
    await p.send('Runtime.enable'); await p.send('Page.enable')
    return p,pump

async def main():
    p,pump=await connect()
    if 'instagram.com' not in (await p.eval('location.href')):
        await p.goto('https://www.instagram.com/',7)
    state=await p.eval("""(() => ({url:location.href, text:document.body.innerText.slice(0,4000), hasLogin:!!document.querySelector('input[name="username"],input[name="password"]')}))()""")
    if state['hasLogin'] or WARN_RE.search(state['text']):
        print(json.dumps({'ok':False,'stop':'login_or_warning','state':state},indent=2)); return
    logged=load_logged(); seen=set(); candidates=[]; skipped=[]
    async def collect_links_from(url, source):
        await p.goto(url,6+random.random()*2)
        for _ in range(2):
            await p.eval('window.scrollBy(0, Math.floor(700+Math.random()*500))'); await asyncio.sleep(1.5+random.random())
        body=await p.eval('document.body.innerText.slice(0,4000)')
        if WARN_RE.search(body): raise RuntimeError('warning after '+url)
        links=await p.eval("""(() => [...new Set([...document.querySelectorAll('a[href*="/p/"],a[href*="/reel/"]')].map(a=>a.href.split('?')[0]))].slice(0,12))()""")
        return [{'url':u,'source':source} for u in links]
    all_links=[]
    # home feed links
    await p.goto('https://www.instagram.com/',6)
    for _ in range(4):
        all_links += await p.eval("""(() => [...new Set([...document.querySelectorAll('article a[href*="/p/"], article a[href*="/reel/"]')].map(a=>a.href.split('?')[0]))].slice(0,12).map(u=>({url:u,source:'home'})))()""")
        await p.eval('window.scrollBy(0, 900)'); await asyncio.sleep(2)
    for acct in SAFE_ACCOUNTS:
        try: all_links += await collect_links_from(f'https://www.instagram.com/{acct}/', 'profile:'+acct)
        except Exception as e: skipped.append({'source':acct,'reason':'link_collect_error '+str(e)[:100]})
    # unique
    uniq=[]
    for x in all_links:
        pid=post_id(x['url'])
        if not pid or pid in seen: continue
        seen.add(pid); uniq.append({**x,'postId':pid})
    for x in uniq[:80]:
        if len(candidates)>=18: break
        pid=x['postId']
        if pid in logged:
            skipped.append({**x,'reason':'already in local verified log'}); continue
        try:
            await p.goto(x['url'],5+random.random()*2)
            body=await p.eval('document.body.innerText') or ''
            if WARN_RE.search(body):
                print(json.dumps({'ok':False,'stop':'warning_on_post','url':x['url'],'verified_candidates':candidates,'skipped':skipped[-10:]},indent=2)); return
            if re.search(r'vincent\.emmanuel\.lee|Vincent Emmanuel Lee', body):
                skipped.append({**x,'reason':'Vincent already visible on page'}); continue
            # caption/account extraction: take article text when possible
            info=await p.eval("""(() => {
 const article=document.querySelector('article');
 const text=(article?article.innerText:document.body.innerText).slice(0,3000);
 const imgs=[...document.querySelectorAll('article img,img')].slice(0,8).map(i=>i.alt).filter(Boolean).join(' | ').slice(0,1200);
 const metas=[...document.querySelectorAll('meta[property="og:title"],meta[property="og:description"]')].map(m=>m.content).join('\n').slice(0,1000);
 return {article:text, alt:imgs, meta:metas, url:location.href};
})()""")
            text='\n'.join([info.get('meta',''), info.get('article',''), info.get('alt','')])
            if not CHRISTIAN_RE.search(text):
                skipped.append({**x,'reason':'no clear Christian cue'}); continue
            if PROMO_RE.search(text):
                skipped.append({**x,'reason':'promotional/app/event cue'}); continue
            if SENSITIVE_RE.search(text):
                skipped.append({**x,'reason':'sensitive/political/safety cue'}); continue
            # require comment textarea candidate
            has_ta=await p.eval("!!document.querySelector('textarea[aria-label*=\"comment\" i]')")
            candidates.append({**x,'finalUrl':info.get('url'),'context':text[:2200], 'hasTextarea': bool(has_ta)})
        except Exception as e:
            skipped.append({**x,'reason':'context_error '+str(e)[:100]})
    print(json.dumps({'ok':True,'candidates':candidates,'skipped_count':len(skipped),'skipped_sample':skipped[:30]},indent=2,ensure_ascii=False))
    pump.cancel()
asyncio.run(main())