FRCMFD DATA ANALYSIS — Complete Data Extraction

import os for f in os.listdir("/content"): print(f) #!/usr/bin/env python3 """ FRCMFD DATA ANALYSIS — Complete Data Extraction & Summary ============================================================ This script: 1. Scans /content for all directories and files 2. Extracts all TAR archives recursively 3. Parses all JSON data files 4. Analyzes all NPY spectra files 5. Generates a comprehensive summary with tables and plots Run this to see exactly what data you have collected. ============================================================ """ import os import sys import json import tarfile import numpy as np import matplotlib.pyplot as plt from pathlib import Path from datetime import datetime from collections import defaultdict # ============================================================================== # SCAN & EXTRACT FUNCTIONS # ============================================================================== def scan_directories(base_path="/content"): """Scan all directories and files in base_path.""" base = Path(base_path) structure = {} for item in base.iterdir(): if item.is_dir(): # Count files in directory files = list(item.rglob('*')) file_count = sum(1 for f in files if f.is_file()) structure[item.name] = { 'type': 'directory', 'file_count': file_count, 'path': str(item) } else: structure[item.name] = { 'type': 'file', 'size_kb': item.stat().st_size / 1024, 'path': str(item) } return structure def extract_tar_files(base_path="/content"): """Extract all TAR files recursively.""" base = Path(base_path) extracted = [] for tar_file in base.rglob('*.tar'): try: print(f" Extracting: {tar_file.name}") extract_dir = tar_file.parent / tar_file.stem extract_dir.mkdir(exist_ok=True) with tarfile.open(tar_file, 'r') as tar: tar.extractall(path=extract_dir) extracted.append({ 'file': tar_file.name, 'extracted_to': str(extract_dir), 'files': len(tar.getnames()) }) except Exception as e: print(f" ⚠️ Could not extract {tar_file.name}: {e}") return extracted def parse_json_files(base_path="/content"): """Parse all JSON files and extract key metrics.""" base = Path(base_path) data = [] for json_file in base.rglob('*.json'): try: with open(json_file, 'r') as f: content = json.load(f) # Extract key metrics based on structure metrics = { 'file': json_file.name, 'path': str(json_file), 'type': 'json' } # If it's a run file if 'step' in content or isinstance(content, list): if isinstance(content, list) and len(content) > 0: # Extract from first and last entries first = content[0] last = content[-1] metrics['steps'] = last.get('step', 0) metrics['H0'] = first.get('H_total', None) metrics['Hf'] = last.get('H_total', None) metrics['dH'] = last.get('H_total', 0) - first.get('H_total', 0) if first.get('H_total') and last.get('H_total') else None if metrics['H0'] and metrics['dH'] is not None: metrics['drift_pct'] = (metrics['dH'] / max(abs(metrics['H0']), 1e-30)) * 100 else: metrics['drift_pct'] = None # Extract kappa if present if 'kappa' in first: metrics['kappa'] = first['kappa'] elif 'κ' in first: metrics['kappa'] = first['κ'] # Extract energy components metrics['H_kinetic'] = last.get('H_kinetic', None) metrics['H_gradient'] = last.get('H_gradient', None) metrics['H_potential'] = last.get('H_potential', None) # If it's a summary file elif 'runs' in content: metrics['type'] = 'summary' metrics['runs'] = len(content.get('runs', [])) if 'kappa_star' in content: metrics['kappa_star'] = content['kappa_star'] if 'verdict' in content: metrics['verdict'] = content['verdict'] data.append(metrics) except Exception as e: print(f" ⚠️ Could not parse {json_file.name}: {e}") return data def analyze_npy_files(base_path="/content"): """Analyze NPY files (spectra).""" base = Path(base_path) npy_data = [] for npy_file in base.rglob('*.npy'): try: data = np.load(npy_file) npy_data.append({ 'file': npy_file.name, 'path': str(npy_file), 'shape': data.shape, 'dtype': str(data.dtype), 'size_kb': npy_file.stat().st_size / 1024, 'min': float(np.min(data)), 'max': float(np.max(data)), 'mean': float(np.mean(data)), 'std': float(np.std(data)) }) except Exception as e: print(f" ⚠️ Could not analyze {npy_file.name}: {e}") return npy_data # ============================================================================== # ANALYSIS & SUMMARY FUNCTIONS # ============================================================================== def summarize_runs(json_data): """Summarize run data from JSON files.""" runs = [] for item in json_data: if item.get('type') != 'json': continue if 'kappa' in item and 'drift_pct' in item and item['drift_pct'] is not None: runs.append({ 'file': item['file'], 'kappa': item['kappa'], 'steps': item.get('steps', 0), 'drift_pct': item['drift_pct'], 'dH': item.get('dH', 0), 'H0': item.get('H0', 0), 'Hf': item.get('Hf', 0), 'H_kinetic': item.get('H_kinetic', None), 'H_gradient': item.get('H_gradient', None), 'H_potential': item.get('H_potential', None) }) return runs def print_summary(structure, extracted, json_data, npy_data, runs): """Print comprehensive summary.""" print("\n" + "="*80) print("📊 FRCMFD DATA ANALYSIS — COMPLETE SUMMARY") print("="*80) print(f"Generated: {datetime.utcnow().strftime('%Y-%m-%d %H:%M:%S')}") print("="*80) # 1. Directory Structure print("\n📁 DIRECTORY STRUCTURE") print("-"*60) for name, info in sorted(structure.items()): if info['type'] == 'directory': print(f" 📁 {name}/ ({info['file_count']} files)") else: print(f" 📄 {name} ({info['size_kb']:.1f} KB)") # 2. Extracted TARs print("\n📦 EXTRACTED TAR ARCHIVES") print("-"*60) if extracted: for tar in extracted: print(f" {tar['file']} → {tar['extracted_to']} ({tar['files']} files)") else: print(" No TAR files extracted") # 3. JSON Data Summary print("\n📄 JSON DATA FILES") print("-"*60) print(f" Total JSON files: {len(json_data)}") # 4. Run Data Summary print("\n🏃 RUN DATA SUMMARY") print("-"*60) if runs: # Sort by kappa runs_sorted = sorted(runs, key=lambda x: x['kappa'] if x['kappa'] is not None else 0) print(f"{'File':<35} | {'κ':>8} | {'Steps':>8} | {'Drift(%)':>12} | {'ΔH':>12}") print("-"*85) for r in runs_sorted[:20]: # Show first 20 file_short = r['file'][:35] kappa_str = f"{r['kappa']:.4f}" if r['kappa'] is not None else "N/A" print(f"{file_short:<35} | {kappa_str:>8} | {r['steps']:>8} | {r['drift_pct']:12.5f} | {r['dH']:12.6e}") if len(runs_sorted) > 20: print(f" ... and {len(runs_sorted)-20} more runs") # Statistics drift_values = [r['drift_pct'] for r in runs if r['drift_pct'] is not None] if drift_values: print(f"\n Drift Statistics:") print(f" Min: {min(drift_values):.5f}%") print(f" Max: {max(drift_values):.5f}%") print(f" Mean: {np.mean(drift_values):.5f}%") print(f" Std: {np.std(drift_values):.5f}%") else: print(" No run data found in JSON files") # 5. NPY Spectra Summary print("\n📊 NPY SPECTRA FILES") print("-"*60) if npy_data: print(f" Total NPY files: {len(npy_data)}") print(f"{'File':<35} | {'Shape':<15} | {'Size(KB)':>10} | {'Min':>10} | {'Max':>10}") print("-"*85) for n in npy_data[:10]: file_short = n['file'][:35] shape_str = str(n['shape'])[:15] print(f"{file_short:<35} | {shape_str:<15} | {n['size_kb']:10.1f} | {n['min']:10.3e} | {n['max']:10.3e}") if len(npy_data) > 10: print(f" ... and {len(npy_data)-10} more NPY files") else: print(" No NPY files found") # 6. Key Findings print("\n🔑 KEY FINDINGS") print("-"*60) # Find κ* from bracketing summary kappa_star = None for item in json_data: if item.get('type') == 'summary' and 'kappa_star' in item: kappa_star = item['kappa_star'] break if kappa_star is not None: print(f" ✅ κ* (zero-drift coupling) ≈ {kappa_star:.6f}") # Find best drift if runs: best_run = min(runs, key=lambda x: abs(x['drift_pct']) if x['drift_pct'] is not None else float('inf')) if best_run['drift_pct'] is not None: print(f" ✅ Best drift: {best_run['drift_pct']:.5f}% at κ={best_run['kappa']:.4f}") # Find worst drift if runs: worst_run = max(runs, key=lambda x: x['drift_pct'] if x['drift_pct'] is not None else 0) if worst_run['drift_pct'] is not None: print(f" ⚠️ Worst drift: {worst_run['drift_pct']:.5f}% at κ={worst_run['kappa']:.4f}") # 7. Summary Statistics print("\n📊 SUMMARY STATISTICS") print("-"*60) print(f" Directories: {len([s for s in structure.values() if s['type'] == 'directory'])}") print(f" Files: {len([s for s in structure.values() if s['type'] == 'file'])}") print(f" JSON files: {len(json_data)}") print(f" NPY files: {len(npy_data)}") print(f" Run data points: {len(runs)}") print(f" TAR archives extracted: {len(extracted)}") print("\n" + "="*80) print("✅ ANALYSIS COMPLETE") print("="*80) # ============================================================================== # PLOTTING FUNCTIONS # ============================================================================== def plot_drift_vs_kappa(runs, save_path=None): """Plot drift vs kappa.""" if not runs: return kappas = [r['kappa'] for r in runs if r['kappa'] is not None and r['drift_pct'] is not None] drifts = [r['drift_pct'] for r in runs if r['kappa'] is not None and r['drift_pct'] is not None] if len(kappas) < 2: return plt.figure(figsize=(10, 6)) plt.scatter(kappas, drifts, alpha=0.7, s=50) plt.xlabel('κ') plt.ylabel('Drift (%)') plt.title('Hamiltonian Drift vs Coupling Parameter κ') plt.grid(True, alpha=0.3) plt.axhline(y=0, color='r', linestyle='--', alpha=0.5) if save_path: plt.savefig(save_path, dpi=150, bbox_inches='tight') print(f" Plot saved to: {save_path}") plt.show() def plot_energy_components(runs, save_path=None): """Plot energy components from runs.""" # Look for runs with energy component data valid_runs = [] for r in runs: if all(x is not None for x in [r.get('H_kinetic'), r.get('H_gradient'), r.get('H_potential')]): valid_runs.append(r) if len(valid_runs) < 2: return # Group by kappa and average kappa_groups = defaultdict(lambda: {'K': [], 'G': [], 'V': []}) for r in valid_runs: k = r['kappa'] if k is not None: kappa_groups[k]['K'].append(r['H_kinetic']) kappa_groups[k]['G'].append(r['H_gradient']) kappa_groups[k]['V'].append(r['H_potential']) kappas = sorted(kappa_groups.keys()) K_avg = [np.mean(kappa_groups[k]['K']) for k in kappas] G_avg = [np.mean(kappa_groups[k]['G']) for k in kappas] V_avg = [np.mean(kappa_groups[k]['V']) for k in kappas] if len(kappas) < 2: return plt.figure(figsize=(12, 6)) plt.plot(kappas, K_avg, 'o-', label='Kinetic', linewidth=2) plt.plot(kappas, G_avg, 's-', label='Gradient', linewidth=2) plt.plot(kappas, V_avg, '^-', label='Potential', linewidth=2) plt.xlabel('κ') plt.ylabel('Energy Component') plt.title('Energy Components vs Coupling Parameter κ') plt.legend() plt.grid(True, alpha=0.3) if save_path: plt.savefig(save_path, dpi=150, bbox_inches='tight') print(f" Plot saved to: {save_path}") plt.show() # ============================================================================== # MAIN # ============================================================================== def main(): print("="*80) print("🚀 FRCMFD DATA ANALYSIS — Complete Extraction & Summary") print("="*80) print(f"Working directory: /content") print("="*80) # 1. Scan directories print("\n📁 Scanning directories...") structure = scan_directories("/content") # 2. Extract TAR files print("\n📦 Extracting TAR archives...") extracted = extract_tar_files("/content") # 3. Parse JSON files print("\n📄 Parsing JSON data...") json_data = parse_json_files("/content") # 4. Analyze NPY files print("\n📊 Analyzing NPY spectra...") npy_data = analyze_npy_files("/content") # 5. Summarize runs runs = summarize_runs(json_data) # 6. Print summary print_summary(structure, extracted, json_data, npy_data, runs) # 7. Generate plots print("\n📈 Generating plots...") timestamp = datetime.utcnow().strftime("%Y%m%d_%H%M%S") plot_drift_vs_kappa(runs, save_path=f"/content/drift_vs_kappa_{timestamp}.png") plot_energy_components(runs, save_path=f"/content/energy_vs_kappa_{timestamp}.png") # 8. Save summary to JSON summary_data = { 'timestamp': timestamp, 'directories': {k: v for k, v in structure.items()}, 'extracted_tars': extracted, 'json_files': json_data, 'npy_files': npy_data, 'runs': runs } summary_file = f"/content/data_summary_{timestamp}.json" with open(summary_file, 'w') as f: json.dump(summary_data, f, indent=2, default=float) print(f"\n📁 Summary saved to: {summary_file}") print("\n" + "="*80) print("✅ ANALYSIS COMPLETE!") print("="*80) if __name__ == "__main__": main()

Popular posts from this blog

THE GOLDEN BALLROOM/BUNKER

Conceptual Summary #2: (∂t2​S−c2∇2S+βS3)=σ(x,t)⋅FR​(C[Ψ])

ICE PROUDLY ANNOUNCES NEW “ELITE” TASK FORCE COMMANDER JEREMY DEWITTE