#!/usr/bin/env python3
"""
Script to analyze payin and payout CSV files for transaction anomalies
"""

import csv
from collections import defaultdict
from decimal import Decimal
from datetime import datetime
import sys

def parse_decimal(value):
    """Parse decimal value, handling empty strings"""
    if not value or value.strip() == '':
        return Decimal('0.00')
    try:
        return Decimal(str(value).replace(',', ''))
    except:
        return Decimal('0.00')

def analyze_payin_csv(filepath):
    """Analyze payin CSV for anomalies"""
    print("\n" + "=" * 80)
    print("ANALYZING PAYIN CSV")
    print("=" * 80)
    
    anomalies = []
    transactions = []
    order_ids = defaultdict(list)
    txnids = defaultdict(list)
    refids = defaultdict(list)
    utrs = defaultdict(list)
    api_txnids = defaultdict(list)
    
    with open(filepath, 'r', encoding='utf-8') as f:
        reader = csv.DictReader(f)
        for row_num, row in enumerate(reader, start=2):  # Start at 2 (header is row 1)
            transactions.append(row)
            order_id = row.get('Order Id', '').strip()
            txnid = row.get('Txnid', '').strip()
            refid = row.get('Refid', '').strip()
            utr = row.get('Bank UTR', '').strip()
            api_txnid = row.get('API Txnid', '').strip()
            status = row.get('Status', '').strip().lower()
            amount = parse_decimal(row.get('Amount', '0'))
            
            # Track duplicates
            if order_id:
                order_ids[order_id].append(row_num)
            if txnid:
                txnids[txnid].append(row_num)
            if refid:
                refids[refid].append(row_num)
            if utr and utr.lower() != 'failed':
                utrs[utr].append(row_num)
            if api_txnid:
                api_txnids[api_txnid].append(row_num)
    
    total_transactions = len(transactions)
    print(f"\nTotal transactions: {total_transactions}")
    
    # Check for duplicate Order IDs
    duplicate_order_ids = {k: v for k, v in order_ids.items() if len(v) > 1}
    if duplicate_order_ids:
        anomalies.append(f"⚠️  DUPLICATE Order IDs found: {len(duplicate_order_ids)}")
        print(f"\n⚠️  DUPLICATE Order IDs: {len(duplicate_order_ids)}")
        for order_id, rows in list(duplicate_order_ids.items())[:10]:
            print(f"   Order ID '{order_id}' appears in rows: {rows}")
        if len(duplicate_order_ids) > 10:
            print(f"   ... and {len(duplicate_order_ids) - 10} more")
    
    # Check for duplicate Txnids
    duplicate_txnids = {k: v for k, v in txnids.items() if len(v) > 1}
    if duplicate_txnids:
        anomalies.append(f"⚠️  DUPLICATE Txnids found: {len(duplicate_txnids)}")
        print(f"\n⚠️  DUPLICATE Txnids: {len(duplicate_txnids)}")
        for txnid, rows in list(duplicate_txnids.items())[:10]:
            print(f"   Txnid '{txnid}' appears in rows: {rows}")
        if len(duplicate_txnids) > 10:
            print(f"   ... and {len(duplicate_txnids) - 10} more")
    
    # Check for duplicate Refids
    duplicate_refids = {k: v for k, v in refids.items() if len(v) > 1}
    if duplicate_refids:
        anomalies.append(f"⚠️  DUPLICATE Refids found: {len(duplicate_refids)}")
        print(f"\n⚠️  DUPLICATE Refids: {len(duplicate_refids)}")
        for refid, rows in list(duplicate_refids.items())[:10]:
            print(f"   Refid '{refid}' appears in rows: {rows}")
        if len(duplicate_refids) > 10:
            print(f"   ... and {len(duplicate_refids) - 10} more")
    
    # Check for duplicate UTRs
    duplicate_utrs = {k: v for k, v in utrs.items() if len(v) > 1}
    if duplicate_utrs:
        anomalies.append(f"⚠️  DUPLICATE Bank UTRs found: {len(duplicate_utrs)}")
        print(f"\n⚠️  DUPLICATE Bank UTRs: {len(duplicate_utrs)}")
        for utr, rows in list(duplicate_utrs.items())[:10]:
            print(f"   UTR '{utr}' appears in rows: {rows}")
        if len(duplicate_utrs) > 10:
            print(f"   ... and {len(duplicate_utrs) - 10} more")
    
    # Check for duplicate API Txnids
    duplicate_api_txnids = {k: v for k, v in api_txnids.items() if len(v) > 1}
    if duplicate_api_txnids:
        anomalies.append(f"⚠️  DUPLICATE API Txnids found: {len(duplicate_api_txnids)}")
        print(f"\n⚠️  DUPLICATE API Txnids: {len(duplicate_api_txnids)}")
        for api_txnid, rows in list(duplicate_api_txnids.items())[:10]:
            print(f"   API Txnid '{api_txnid}' appears in rows: {rows}")
        if len(duplicate_api_txnids) > 10:
            print(f"   ... and {len(duplicate_api_txnids) - 10} more")
    
    # Check for successful transactions without UTR
    success_no_utr = []
    for row_num, row in enumerate(transactions, start=2):
        status = row.get('Status', '').strip().lower()
        utr = row.get('Bank UTR', '').strip()
        if status == 'success' and (not utr or utr.lower() == 'failed'):
            success_no_utr.append((row_num, row.get('Order Id'), row.get('Txnid')))
    
    if success_no_utr:
        anomalies.append(f"⚠️  SUCCESS transactions without UTR: {len(success_no_utr)}")
        print(f"\n⚠️  SUCCESS transactions without UTR: {len(success_no_utr)}")
        for row_num, order_id, txnid in success_no_utr[:10]:
            print(f"   Row {row_num}: Order ID={order_id}, Txnid={txnid}")
        if len(success_no_utr) > 10:
            print(f"   ... and {len(success_no_utr) - 10} more")
    
    # Check for missing required fields
    missing_fields = []
    for row_num, row in enumerate(transactions, start=2):
        missing = []
        if not row.get('Order Id', '').strip():
            missing.append('Order Id')
        if not row.get('Txnid', '').strip():
            missing.append('Txnid')
        if not row.get('Amount', '').strip():
            missing.append('Amount')
        if not row.get('Status', '').strip():
            missing.append('Status')
        if missing:
            missing_fields.append((row_num, missing, row.get('Order Id'), row.get('Txnid')))
    
    if missing_fields:
        anomalies.append(f"⚠️  Missing required fields: {len(missing_fields)}")
        print(f"\n⚠️  Missing required fields: {len(missing_fields)}")
        for row_num, missing, order_id, txnid in missing_fields[:10]:
            print(f"   Row {row_num}: Missing {', '.join(missing)} (Order ID={order_id}, Txnid={txnid})")
        if len(missing_fields) > 10:
            print(f"   ... and {len(missing_fields) - 10} more")
    
    # Check for invalid amounts
    invalid_amounts = []
    for row_num, row in enumerate(transactions, start=2):
        amount = parse_decimal(row.get('Amount', '0'))
        if amount <= 0:
            invalid_amounts.append((row_num, row.get('Order Id'), row.get('Txnid'), amount))
    
    if invalid_amounts:
        anomalies.append(f"⚠️  Invalid amounts (<= 0): {len(invalid_amounts)}")
        print(f"\n⚠️  Invalid amounts (<= 0): {len(invalid_amounts)}")
        for row_num, order_id, txnid, amount in invalid_amounts[:10]:
            print(f"   Row {row_num}: Amount={amount} (Order ID={order_id}, Txnid={txnid})")
        if len(invalid_amounts) > 10:
            print(f"   ... and {len(invalid_amounts) - 10} more")
    
    # Status distribution
    status_counts = defaultdict(int)
    for row in transactions:
        status = row.get('Status', '').strip().lower()
        status_counts[status] += 1
    
    print(f"\n📊 Status Distribution:")
    for status, count in sorted(status_counts.items()):
        print(f"   {status}: {count}")
    
    if not anomalies:
        print("\n✅ No anomalies detected in payin CSV")
    
    return anomalies, transactions

def analyze_payout_csv(filepath):
    """Analyze payout CSV for anomalies"""
    print("\n" + "=" * 80)
    print("ANALYZING PAYOUT CSV")
    print("=" * 80)
    
    anomalies = []
    transactions = []
    order_ids = defaultdict(list)
    txnids = defaultdict(list)
    payids = defaultdict(list)
    utrs = defaultdict(list)
    api_txnids = defaultdict(list)
    
    with open(filepath, 'r', encoding='utf-8') as f:
        reader = csv.DictReader(f)
        for row_num, row in enumerate(reader, start=2):
            transactions.append(row)
            order_id = row.get('Order Id', '').strip()
            txnid = row.get('Txnid', '').strip()
            payid = row.get('Payid', '').strip()
            utr = row.get('Bank UTR', '').strip()
            api_txnid = row.get('API Txnid', '').strip()
            status = row.get('Status', '').strip().lower()
            amount = parse_decimal(row.get('Amount', '0'))
            
            # Track duplicates
            if order_id:
                order_ids[order_id].append(row_num)
            if txnid:
                txnids[txnid].append(row_num)
            if payid:
                payids[payid].append(row_num)
            if utr and utr.lower() != 'failed':
                utrs[utr].append(row_num)
            if api_txnid:
                api_txnids[api_txnid].append(row_num)
    
    total_transactions = len(transactions)
    print(f"\nTotal transactions: {total_transactions}")
    
    # Check for duplicate Order IDs
    duplicate_order_ids = {k: v for k, v in order_ids.items() if len(v) > 1}
    if duplicate_order_ids:
        anomalies.append(f"⚠️  DUPLICATE Order IDs found: {len(duplicate_order_ids)}")
        print(f"\n⚠️  DUPLICATE Order IDs: {len(duplicate_order_ids)}")
        for order_id, rows in list(duplicate_order_ids.items())[:10]:
            print(f"   Order ID '{order_id}' appears in rows: {rows}")
        if len(duplicate_order_ids) > 10:
            print(f"   ... and {len(duplicate_order_ids) - 10} more")
    
    # Check for duplicate Txnids
    duplicate_txnids = {k: v for k, v in txnids.items() if len(v) > 1}
    if duplicate_txnids:
        anomalies.append(f"⚠️  DUPLICATE Txnids found: {len(duplicate_txnids)}")
        print(f"\n⚠️  DUPLICATE Txnids: {len(duplicate_txnids)}")
        for txnid, rows in list(duplicate_txnids.items())[:10]:
            print(f"   Txnid '{txnid}' appears in rows: {rows}")
        if len(duplicate_txnids) > 10:
            print(f"   ... and {len(duplicate_txnids) - 10} more")
    
    # Check for duplicate Payids
    duplicate_payids = {k: v for k, v in payids.items() if len(v) > 1}
    if duplicate_payids:
        anomalies.append(f"⚠️  DUPLICATE Payids found: {len(duplicate_payids)}")
        print(f"\n⚠️  DUPLICATE Payids: {len(duplicate_payids)}")
        for payid, rows in list(duplicate_payids.items())[:10]:
            print(f"   Payid '{payid}' appears in rows: {rows}")
        if len(duplicate_payids) > 10:
            print(f"   ... and {len(duplicate_payids) - 10} more")
    
    # Check for duplicate UTRs
    duplicate_utrs = {k: v for k, v in utrs.items() if len(v) > 1}
    if duplicate_utrs:
        anomalies.append(f"⚠️  DUPLICATE Bank UTRs found: {len(duplicate_utrs)}")
        print(f"\n⚠️  DUPLICATE Bank UTRs: {len(duplicate_utrs)}")
        for utr, rows in list(duplicate_utrs.items())[:10]:
            print(f"   UTR '{utr}' appears in rows: {rows}")
        if len(duplicate_utrs) > 10:
            print(f"   ... and {len(duplicate_utrs) - 10} more")
    
    # Check for duplicate API Txnids
    duplicate_api_txnids = {k: v for k, v in api_txnids.items() if len(v) > 1}
    if duplicate_api_txnids:
        anomalies.append(f"⚠️  DUPLICATE API Txnids found: {len(duplicate_api_txnids)}")
        print(f"\n⚠️  DUPLICATE API Txnids: {len(duplicate_api_txnids)}")
        for api_txnid, rows in list(duplicate_api_txnids.items())[:10]:
            print(f"   API Txnid '{api_txnid}' appears in rows: {rows}")
        if len(duplicate_api_txnids) > 10:
            print(f"   ... and {len(duplicate_api_txnids) - 10} more")
    
    # Check for successful transactions without UTR
    success_no_utr = []
    for row_num, row in enumerate(transactions, start=2):
        status = row.get('Status', '').strip().lower()
        utr = row.get('Bank UTR', '').strip()
        if status == 'success' and (not utr or utr.lower() == 'failed'):
            success_no_utr.append((row_num, row.get('Order Id'), row.get('Txnid')))
    
    if success_no_utr:
        anomalies.append(f"⚠️  SUCCESS transactions without UTR: {len(success_no_utr)}")
        print(f"\n⚠️  SUCCESS transactions without UTR: {len(success_no_utr)}")
        for row_num, order_id, txnid in success_no_utr[:10]:
            print(f"   Row {row_num}: Order ID={order_id}, Txnid={txnid}")
        if len(success_no_utr) > 10:
            print(f"   ... and {len(success_no_utr) - 10} more")
    
    # Check for missing required fields
    missing_fields = []
    for row_num, row in enumerate(transactions, start=2):
        missing = []
        if not row.get('Order Id', '').strip():
            missing.append('Order Id')
        if not row.get('Txnid', '').strip():
            missing.append('Txnid')
        if not row.get('Amount', '').strip():
            missing.append('Amount')
        if not row.get('Status', '').strip():
            missing.append('Status')
        if missing:
            missing_fields.append((row_num, missing, row.get('Order Id'), row.get('Txnid')))
    
    if missing_fields:
        anomalies.append(f"⚠️  Missing required fields: {len(missing_fields)}")
        print(f"\n⚠️  Missing required fields: {len(missing_fields)}")
        for row_num, missing, order_id, txnid in missing_fields[:10]:
            print(f"   Row {row_num}: Missing {', '.join(missing)} (Order ID={order_id}, Txnid={txnid})")
        if len(missing_fields) > 10:
            print(f"   ... and {len(missing_fields) - 10} more")
    
    # Check for invalid amounts
    invalid_amounts = []
    for row_num, row in enumerate(transactions, start=2):
        amount = parse_decimal(row.get('Amount', '0'))
        if amount <= 0:
            invalid_amounts.append((row_num, row.get('Order Id'), row.get('Txnid'), amount))
    
    if invalid_amounts:
        anomalies.append(f"⚠️  Invalid amounts (<= 0): {len(invalid_amounts)}")
        print(f"\n⚠️  Invalid amounts (<= 0): {len(invalid_amounts)}")
        for row_num, order_id, txnid, amount in invalid_amounts[:10]:
            print(f"   Row {row_num}: Amount={amount} (Order ID={order_id}, Txnid={txnid})")
        if len(invalid_amounts) > 10:
            print(f"   ... and {len(invalid_amounts) - 10} more")
    
    # Status distribution
    status_counts = defaultdict(int)
    for row in transactions:
        status = row.get('Status', '').strip().lower()
        status_counts[status] += 1
    
    print(f"\n📊 Status Distribution:")
    for status, count in sorted(status_counts.items()):
        print(f"   {status}: {count}")
    
    if not anomalies:
        print("\n✅ No anomalies detected in payout CSV")
    
    return anomalies, transactions

def cross_file_analysis(payin_transactions, payout_transactions):
    """Perform cross-file analysis"""
    print("\n" + "=" * 80)
    print("CROSS-FILE ANALYSIS")
    print("=" * 80)
    
    anomalies = []
    
    # Build sets of identifiers
    payin_txnids = {row.get('Txnid', '').strip() for row in payin_transactions if row.get('Txnid', '').strip()}
    payin_refids = {row.get('Refid', '').strip() for row in payin_transactions if row.get('Refid', '').strip()}
    payin_order_ids = {row.get('Order Id', '').strip() for row in payin_transactions if row.get('Order Id', '').strip()}
    
    payout_txnids = {row.get('Txnid', '').strip() for row in payout_transactions if row.get('Txnid', '').strip()}
    payout_order_ids = {row.get('Order Id', '').strip() for row in payout_transactions if row.get('Order Id', '').strip()}
    
    # Check for Txnids appearing in both files
    common_txnids = payin_txnids & payout_txnids
    if common_txnids:
        anomalies.append(f"⚠️  Txnids appearing in both payin and payout: {len(common_txnids)}")
        print(f"\n⚠️  Txnids appearing in both payin and payout: {len(common_txnids)}")
        for txnid in list(common_txnids)[:10]:
            print(f"   Txnid: {txnid}")
        if len(common_txnids) > 10:
            print(f"   ... and {len(common_txnids) - 10} more")
    
    # Check for Order IDs appearing in both files
    common_order_ids = payin_order_ids & payout_order_ids
    if common_order_ids:
        anomalies.append(f"⚠️  Order IDs appearing in both payin and payout: {len(common_order_ids)}")
        print(f"\n⚠️  Order IDs appearing in both payin and payout: {len(common_order_ids)}")
        for order_id in list(common_order_ids)[:10]:
            print(f"   Order ID: {order_id}")
        if len(common_order_ids) > 10:
            print(f"   ... and {len(common_order_ids) - 10} more")
    
    # Check payout: Txnid matching Payid (should be different)
    txnid_matches_payid = []
    for row_num, row in enumerate(payout_transactions, start=2):
        txnid = row.get('Txnid', '').strip()
        payid = row.get('Payid', '').strip()
        if txnid and payid and txnid == payid:
            txnid_matches_payid.append((row_num, txnid, row.get('Order Id')))
    
    if txnid_matches_payid:
        anomalies.append(f"⚠️  Payout: Txnid matches Payid: {len(txnid_matches_payid)}")
        print(f"\n⚠️  Payout: Txnid matches Payid (should be different): {len(txnid_matches_payid)}")
        for row_num, txnid, order_id in txnid_matches_payid[:10]:
            print(f"   Row {row_num}: Txnid=Payid={txnid} (Order ID={order_id})")
        if len(txnid_matches_payid) > 10:
            print(f"   ... and {len(txnid_matches_payid) - 10} more")
    
    # Check payout: Txnid matching UTR (should be different)
    txnid_matches_utr = []
    for row_num, row in enumerate(payout_transactions, start=2):
        txnid = row.get('Txnid', '').strip()
        utr = row.get('Bank UTR', '').strip()
        if txnid and utr and utr.lower() != 'failed' and txnid == utr:
            txnid_matches_utr.append((row_num, txnid, row.get('Order Id')))
    
    if txnid_matches_utr:
        anomalies.append(f"⚠️  Payout: Txnid matches UTR: {len(txnid_matches_utr)}")
        print(f"\n⚠️  Payout: Txnid matches UTR (should be different): {len(txnid_matches_utr)}")
        for row_num, txnid, order_id in txnid_matches_utr[:10]:
            print(f"   Row {row_num}: Txnid=UTR={txnid} (Order ID={order_id})")
        if len(txnid_matches_utr) > 10:
            print(f"   ... and {len(txnid_matches_utr) - 10} more")
    
    # Check payin: Txnid matching Refid (should be different)
    txnid_matches_refid = []
    for row_num, row in enumerate(payin_transactions, start=2):
        txnid = row.get('Txnid', '').strip()
        refid = row.get('Refid', '').strip()
        if txnid and refid and txnid == refid:
            txnid_matches_refid.append((row_num, txnid, row.get('Order Id')))
    
    if txnid_matches_refid:
        anomalies.append(f"⚠️  Payin: Txnid matches Refid: {len(txnid_matches_refid)}")
        print(f"\n⚠️  Payin: Txnid matches Refid (should be different): {len(txnid_matches_refid)}")
        for row_num, txnid, order_id in txnid_matches_refid[:10]:
            print(f"   Row {row_num}: Txnid=Refid={txnid} (Order ID={order_id})")
        if len(txnid_matches_refid) > 10:
            print(f"   ... and {len(txnid_matches_refid) - 10} more")
    
    # Check payin: Txnid matching UTR (should be different)
    txnid_matches_utr_payin = []
    for row_num, row in enumerate(payin_transactions, start=2):
        txnid = row.get('Txnid', '').strip()
        utr = row.get('Bank UTR', '').strip()
        if txnid and utr and utr.lower() != 'failed' and txnid == utr:
            txnid_matches_utr_payin.append((row_num, txnid, row.get('Order Id')))
    
    if txnid_matches_utr_payin:
        anomalies.append(f"⚠️  Payin: Txnid matches UTR: {len(txnid_matches_utr_payin)}")
        print(f"\n⚠️  Payin: Txnid matches UTR (should be different): {len(txnid_matches_utr_payin)}")
        for row_num, txnid, order_id in txnid_matches_utr_payin[:10]:
            print(f"   Row {row_num}: Txnid=UTR={txnid} (Order ID={order_id})")
        if len(txnid_matches_utr_payin) > 10:
            print(f"   ... and {len(txnid_matches_utr_payin) - 10} more")
    
    if not anomalies:
        print("\n✅ No cross-file anomalies detected")
    
    return anomalies

def main():
    payin_file = 'provider_data/payin.csv'
    payout_file = 'provider_data/payout.csv'
    
    print("=" * 80)
    print("PROVIDER DATA ANOMALY ANALYSIS")
    print("=" * 80)
    
    payin_anomalies, payin_transactions = analyze_payin_csv(payin_file)
    payout_anomalies, payout_transactions = analyze_payout_csv(payout_file)
    cross_anomalies = cross_file_analysis(payin_transactions, payout_transactions)
    
    # Summary
    print("\n" + "=" * 80)
    print("SUMMARY")
    print("=" * 80)
    print(f"\nPayin Anomalies: {len(payin_anomalies)}")
    print(f"Payout Anomalies: {len(payout_anomalies)}")
    print(f"Cross-File Anomalies: {len(cross_anomalies)}")
    print(f"Total Anomalies: {len(payin_anomalies) + len(payout_anomalies) + len(cross_anomalies)}")
    
    all_anomalies = payin_anomalies + payout_anomalies + cross_anomalies
    if all_anomalies:
        print("\n⚠️  ANOMALIES DETECTED - Please review the details above")
        sys.exit(1)
    else:
        print("\n✅ No anomalies detected in either file")
        sys.exit(0)

if __name__ == "__main__":
    main()

