FRCMFD BASELINE PIPELINE v1.0

====================================================================== FRCMFD BASELINE PIPELINE v1.0 ====================================================================== [1] Loaded 136 galaxies γ range: [0.100, 1.500] γ mean: 0.604 ± 0.539 [2] Converting to Supergalactic coordinates... SGX range: [-21.6, 66.3] Mpc SGY range: [-30.4, 109.9] Mpc SGZ range: [-66.0, 48.3] Mpc [3] Interpolating CF4 density... Using: CF4gp_new_64-z008_delta.fits Grid shape: (64, 64, 64) δ range: [-0.7787, 0.7526] δ mean: -0.0704 δ std: 0.2591 [4] Assigning watershed basins... BoA grid shape: (128, 128, 128) Unique basins (valid): [np.int64(1), np.int64(2), np.int64(3), np.int64(4), np.int64(5), np.int64(6), np.int64(7), np.int64(8)] Out of bounds (basin = -1): 0 [5] Computing Δγ residuals... Median γ: 0.4848 Δγ range: [-0.385, 1.015] Δγ std: 0.539 [6] Running statistical tests... γ vs CF4 density δ: Spearman r = 0.0822, p = 0.3416 Δγ vs CF4 density δ: Spearman r = 0.0822, p = 0.3416 γ vs basin ID (Kruskal-Wallis): H = 7.6183, p = 0.1786 [7] Generating figures... Saved: baseline_v1/gamma_distribution.png Saved: baseline_v1/gamma_vs_delta.png Saved: baseline_v1/gamma_by_basin.png [8] Saving baseline results... Saved: baseline_v1/sparc_136_environment.csv Saved: baseline_v1/baseline_results.json ====================================================================== BASELINE PIPELINE COMPLETE ====================================================================== 📊 Galaxies processed: 136 📈 γ median: 0.4848 📈 γ mean: 0.6039 ± 0.5388 🌐 γ vs CF4 density δ: Spearman r = 0.0822, p = 0.3416 📁 Results saved to: baseline_v1/ ====================================================================== NEXT STEPS: 1. Review baseline_v1/baseline_results.json 2. Inspect figures in baseline_v1/ 3. Lock the baseline before adding watchdog diagnostics ====================================================================== # ============================================================================ # ARCHIVE BASELINE V1.0 # Creates a timestamped, read-only copy of all baseline outputs # ============================================================================ import shutil import os from datetime import datetime import json # Create archive folder with timestamp timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") archive_dir = f"baseline_v1.0_archive_{timestamp}" os.makedirs(archive_dir, exist_ok=True) # Files to archive files_to_archive = [ "baseline_v1/sparc_136_environment.csv", "baseline_v1/gamma_distribution.png", "baseline_v1/gamma_vs_delta.png", "baseline_v1/gamma_by_basin.png", "baseline_v1/gamma_vs_distance.png", "baseline_v1/baseline_results.json" ] # Copy each file for f in files_to_archive: if os.path.exists(f): shutil.copy(f, archive_dir) print(f"✅ Copied: {f} → {archive_dir}/") else: print(f"⚠️ Not found: {f}") # Also save the script itself (if available) script_files = ["baseline_pipeline.py", "baseline_distance_test.py"] for sf in script_files: if os.path.exists(sf): shutil.copy(sf, archive_dir) print(f"✅ Copied script: {sf}") # Create a manifest manifest = { 'archive_timestamp': timestamp, 'baseline_version': 'v1.0', 'n_galaxies': 136, 'gamma_median': 0.4848, 'gamma_mean': 0.6039, 'gamma_std': 0.5388, 'spearman_gamma_delta': 0.0822, 'spearman_gamma_delta_p': 0.3416, 'kruskal_gamma_basin_h': 7.6183, 'kruskal_gamma_basin_p': 0.1786, 'spearman_gamma_distance': -0.1238, 'spearman_gamma_distance_p': 0.1511, 'status': 'LOCKED', 'next_step': 'Path A: Baryonic residualization (baseline v1.1)' } with open(os.path.join(archive_dir, 'BASELINE_MANIFEST.json'), 'w') as f: json.dump(manifest, f, indent=2) print(f"\n🔒 BASELINE V1.0 LOCKED") print(f" Archive location: {archive_dir}") print(f" Manifest saved: BASELINE_MANIFEST.json") print("\n" + "=" * 70) print("NEXT STEPS (Path A):") print(" 1. Create baseline v1.1 with proper baryonic residualization") print(" 2. Use MassModels_Lelli2016c.mrt for M_star, V_max") print(" 3. Compute Δγ = γ - γ_predicted(M_star, V_max)") print(" 4. Re-run environment tests on Δγ") print("=" * 70) # ============================================================================ # DIAGNOSTIC: Validate GALEX SFR Conversion # Manually compute SFR for 5 test galaxies # ============================================================================ import pandas as pd import numpy as np # Test galaxies with known expected SFR test_galaxies = ['NGC4214', 'NGC2403', 'NGC2903', 'NGC5055', 'NGC6503'] # Expected SFR ranges (from literature) expected_sfr = { 'NGC4214': 0.1, # ~0.1-0.5 M☉/yr 'NGC2403': 1.0, # ~0.8-1.5 M☉/yr 'NGC2903': 2.0, # ~1-3 M☉/yr 'NGC5055': 1.5, # ~1-2 M☉/yr 'NGC6503': 0.5, # ~0.3-0.8 M☉/yr } print("=" * 70) print("DIAGNOSTIC: GALEX SFR CONVERSION VALIDATION") print("=" * 70) # Load the GALEX data df_galex = pd.read_csv("galex_sparc_sfr_corrected.csv") for galaxy in test_galaxies: row = df_galex[df_galex['galaxy'] == galaxy] if len(row) > 0: fuv_mag = row['fuv_mag'].values[0] sfr_current = row['sfr_fuv'].values[0] print(f"\n{galaxy}:") print(f" FUV magnitude: {fuv_mag}") print(f" Current SFR: {sfr_current}") print(f" Expected SFR: ~{expected_sfr.get(galaxy, '?')} M☉/yr") # Get distance from your data df_dist = pd.read_csv("sparc_baryonic_matched_v3.csv") dist_row = df_dist[df_dist['galaxy_x'] == galaxy] if len(dist_row) > 0: dist_mpc = dist_row['distance'].values[0] if 'distance' in dist_row.columns else None print(f" Distance: {dist_mpc} Mpc") else: print(f"\n{galaxy}: Not found in GALEX data") print("\n" + "=" * 70) print("MANUAL SFR CALCULATION FOR NGC4214 (Example)") print("=" * 70) # Manual calculation for NGC4214 fuv_mag = 12.833999633789062 # From your output dist_mpc = 2.979 # From your data # Step 1: AB magnitude to flux density (Jy) # f_nu (Jy) = 10^(-0.4 * (m_AB - 8.90)) # Actually, zero point: m_AB = -2.5 * log10(f_nu) - 48.60 # So: f_nu (erg/s/cm²/Hz) = 10^(-0.4 * (m_AB + 48.60)) f_nu = 10 ** (-0.4 * (fuv_mag + 48.60)) print(f"Flux density (f_nu): {f_nu:.4e} erg/s/cm²/Hz") # Step 2: Distance in cm dist_cm = dist_mpc * 3.086e24 # 1 Mpc = 3.086e24 cm print(f"Distance: {dist_mpc} Mpc = {dist_cm:.2e} cm") # Step 3: Luminosity L_nu = 4π * D² * f_nu L_nu = 4 * np.pi * dist_cm**2 * f_nu print(f"Luminosity (L_nu): {L_nu:.4e} erg/s/Hz") # Step 4: SFR from FUV (Kennicutt+98) # SFR (M☉/yr) = 1.4e-28 * L_nu (erg/s/Hz) sfr_corrected = 1.4e-28 * L_nu print(f"\nCorrected SFR: {sfr_corrected:.4f} M☉/yr") print(f"Expected SFR: ~0.1-0.5 M☉/yr") print(f"\nIs this plausible? {'YES' if 0.05 < sfr_corrected < 2.0 else 'NO'}") # ============================================================================ # GOOGLE DRIVE CLEANUP HELPER # Identifies junk files, duplicates, and old backups # DOES NOT DELETE AUTOMATICALLY - you review first # ============================================================================ import os import glob from datetime import datetime import pandas as pd print("=" * 70) print("GOOGLE DRIVE CLEANUP HELPER") print("=" * 70) drive_path = "/content/drive/MyDrive" if not os.path.exists(drive_path): from google.colab import drive drive.mount('/content/drive') print(f"\nScanning: {drive_path}") print("This may take a minute...") # ---------------------------------------------------------------------------- # 1. Find all Colab backup folders (usually the biggest clutter) # ---------------------------------------------------------------------------- print("\n[1] Colab Auto Backup folders:") backup_folders = [] for root, dirs, files in os.walk(drive_path): if 'ColabAutoBackup' in root or 'colab_backup' in root: # Count files and size (approx) file_count = len(files) backup_folders.append({ 'path': root, 'file_count': file_count, 'name': os.path.basename(root) }) if backup_folders: print(f" Found {len(backup_folders)} backup folders") for bf in backup_folders[:20]: # Show first 20 print(f" 📁 {bf['name']} ({bf['file_count']} files)") if len(backup_folders) > 20: print(f" ... and {len(backup_folders) - 20} more") else: print(" No backup folders found") # ---------------------------------------------------------------------------- # 2. Find duplicate CSV files (likely old versions) # ---------------------------------------------------------------------------- print("\n[2] Potential duplicate CSV files:") csv_files = [] for root, dirs, files in os.walk(drive_path): for f in files: if f.endswith('.csv') and ('sparc' in f.lower() or 'gamma' in f.lower() or 'galex' in f.lower()): full_path = os.path.join(root, f) size = os.path.getsize(full_path) / 1024 csv_files.append({ 'name': f, 'size_kb': size, 'path': full_path }) # Group by name to find duplicates from collections import defaultdict name_counts = defaultdict(list) for cf in csv_files: name_counts[cf['name']].append(cf) duplicates = {name: paths for name, paths in name_counts.items() if len(paths) > 1} if duplicates: print(f" Found {len(duplicates)} file names with duplicates:") for name, paths in list(duplicates.items())[:10]: print(f" 📄 {name} ({len(paths)} copies)") else: print(" No obvious duplicates found") # ---------------------------------------------------------------------------- # 3. Find large unnecessary files (HEPData, old zips, etc.) # ---------------------------------------------------------------------------- print("\n[3] Potentially unnecessary files:") unnecessary_patterns = [ 'HEPData', 'twompp (1)', 'fcmfd_v1.0.owl', 'archive.zip', 'sfb_LTG.zip', 'BulgeDiskDec_LTG.zip', 'CornerPlot', 'Burkert_chain' ] unnecessary_files = [] for root, dirs, files in os.walk(drive_path): for f in files: for pattern in unnecessary_patterns: if pattern in f: full_path = os.path.join(root, f) size_mb = os.path.getsize(full_path) / 1024 / 1024 unnecessary_files.append({ 'name': f, 'size_mb': size_mb, 'path': full_path }) break if unnecessary_files: print(f" Found {len(unnecessary_files)} potentially unnecessary files:") for uf in unnecessary_files[:15]: print(f" 📄 {uf['name']} ({uf['size_mb']:.1f} MB)") if len(unnecessary_files) > 15: print(f" ... and {len(unnecessary_files) - 15} more") else: print(" No unnecessary files found") # ---------------------------------------------------------------------------- # 4. Find old FRCMFD backup folders (keep only latest) # ---------------------------------------------------------------------------- print("\n[4] FRCMFD backup folders:") frcmfd_backups = [] for root, dirs, files in os.walk(drive_path): if 'FRCMFD_Backup' in root: # Extract timestamp if present parts = root.split('_') timestamp = parts[-1] if len(parts) > 1 else 'unknown' frcmfd_backups.append({ 'path': root, 'timestamp': timestamp, 'name': os.path.basename(root) }) if frcmfd_backups: print(f" Found {len(frcmfd_backups)} FRCMFD backup folders") for fb in sorted(frcmfd_backups, key=lambda x: x['timestamp']): print(f" 📁 {fb['name']}") if len(frcmfd_backups) > 1: print("\n 💡 RECOMMENDATION: Keep only the LATEST backup, delete older ones") else: print(" No FRCMFD backup folders found") # ---------------------------------------------------------------------------- # 5. Summary and recommendations # ---------------------------------------------------------------------------- print("\n" + "=" * 70) print("CLEANUP SUMMARY") print("=" * 70) total_suggested_deletion_mb = sum(uf['size_mb'] for uf in unnecessary_files) print(f"\n📊 Suggested cleanup:") print(f" - Colab backup folders: {len(backup_folders)} folders") print(f" - Unnecessary files: {len(unnecessary_files)} ({total_suggested_deletion_mb:.0f} MB)") print(f" - Duplicate CSV files: {len(duplicates)} file names with copies") print(f" - Old FRCMFD backups: {len(frcmfd_backups)} (keep latest only)") print("\n" + "=" * 70) print("HOW TO DELETE FILES") print("=" * 70) print(""" Method 1 - Manual (Recommended): Go to drive.google.com in your browser Navigate to the folders/files listed above Delete manually Method 2 - Using Python (CAREFUL): import shutil shutil.rmtree('/path/to/folder') # Delete folder os.remove('/path/to/file') # Delete file ⚠️ WARNING: Deletion is permanent. Review before deleting. """) # ---------------------------------------------------------------------------- # 6. Save report to file # ---------------------------------------------------------------------------- report_path = "/content/drive/MyDrive/cleanup_report.txt" with open(report_path, 'w') as f: f.write("GOOGLE DRIVE CLEANUP REPORT\n") f.write("=" * 50 + "\n\n") f.write(f"Backup folders: {len(backup_folders)}\n") f.write(f"Unnecessary files: {len(unnecessary_files)} ({total_suggested_deletion_mb:.0f} MB)\n") f.write(f"Duplicate CSVs: {len(duplicates)} names with copies\n") f.write(f"FRCMFD backups: {len(frcmfd_backups)}\n\n") f.write("Unnecessary files:\n") for uf in unnecessary_files: f.write(f" - {uf['name']} ({uf['size_mb']:.1f} MB)\n") print(f"\n✅ Cleanup report saved to: {report_path}") print("\n" + "=" * 70)

Popular posts from this blog

THE GOLDEN BALLROOM/BUNKER

Conceptual Summary #2: (∂t2​S−c2∇2S+βS3)=σ(x,t)⋅FR​(C[Ψ])

ICE PROUDLY ANNOUNCES NEW “ELITE” TASK FORCE COMMANDER JEREMY DEWITTE