FRCMFD BASELINE PIPELINE v1.0
======================================================================
FRCMFD BASELINE PIPELINE v1.0
======================================================================
[1] Loaded 136 galaxies
γ range: [0.100, 1.500]
γ mean: 0.604 ± 0.539
[2] Converting to Supergalactic coordinates...
SGX range: [-21.6, 66.3] Mpc
SGY range: [-30.4, 109.9] Mpc
SGZ range: [-66.0, 48.3] Mpc
[3] Interpolating CF4 density...
Using: CF4gp_new_64-z008_delta.fits
Grid shape: (64, 64, 64)
δ range: [-0.7787, 0.7526]
δ mean: -0.0704
δ std: 0.2591
[4] Assigning watershed basins...
BoA grid shape: (128, 128, 128)
Unique basins (valid): [np.int64(1), np.int64(2), np.int64(3), np.int64(4), np.int64(5), np.int64(6), np.int64(7), np.int64(8)]
Out of bounds (basin = -1): 0
[5] Computing Δγ residuals...
Median γ: 0.4848
Δγ range: [-0.385, 1.015]
Δγ std: 0.539
[6] Running statistical tests...
γ vs CF4 density δ:
Spearman r = 0.0822, p = 0.3416
Δγ vs CF4 density δ:
Spearman r = 0.0822, p = 0.3416
γ vs basin ID (Kruskal-Wallis):
H = 7.6183, p = 0.1786
[7] Generating figures...
Saved: baseline_v1/gamma_distribution.png
Saved: baseline_v1/gamma_vs_delta.png
Saved: baseline_v1/gamma_by_basin.png
[8] Saving baseline results...
Saved: baseline_v1/sparc_136_environment.csv
Saved: baseline_v1/baseline_results.json
======================================================================
BASELINE PIPELINE COMPLETE
======================================================================
📊 Galaxies processed: 136
📈 γ median: 0.4848
📈 γ mean: 0.6039 ± 0.5388
🌐 γ vs CF4 density δ:
Spearman r = 0.0822, p = 0.3416
📁 Results saved to: baseline_v1/
======================================================================
NEXT STEPS:
1. Review baseline_v1/baseline_results.json
2. Inspect figures in baseline_v1/
3. Lock the baseline before adding watchdog diagnostics
======================================================================
# ============================================================================
# ARCHIVE BASELINE V1.0
# Creates a timestamped, read-only copy of all baseline outputs
# ============================================================================
import shutil
import os
from datetime import datetime
import json
# Create archive folder with timestamp
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
archive_dir = f"baseline_v1.0_archive_{timestamp}"
os.makedirs(archive_dir, exist_ok=True)
# Files to archive
files_to_archive = [
"baseline_v1/sparc_136_environment.csv",
"baseline_v1/gamma_distribution.png",
"baseline_v1/gamma_vs_delta.png",
"baseline_v1/gamma_by_basin.png",
"baseline_v1/gamma_vs_distance.png",
"baseline_v1/baseline_results.json"
]
# Copy each file
for f in files_to_archive:
if os.path.exists(f):
shutil.copy(f, archive_dir)
print(f"✅ Copied: {f} → {archive_dir}/")
else:
print(f"⚠️ Not found: {f}")
# Also save the script itself (if available)
script_files = ["baseline_pipeline.py", "baseline_distance_test.py"]
for sf in script_files:
if os.path.exists(sf):
shutil.copy(sf, archive_dir)
print(f"✅ Copied script: {sf}")
# Create a manifest
manifest = {
'archive_timestamp': timestamp,
'baseline_version': 'v1.0',
'n_galaxies': 136,
'gamma_median': 0.4848,
'gamma_mean': 0.6039,
'gamma_std': 0.5388,
'spearman_gamma_delta': 0.0822,
'spearman_gamma_delta_p': 0.3416,
'kruskal_gamma_basin_h': 7.6183,
'kruskal_gamma_basin_p': 0.1786,
'spearman_gamma_distance': -0.1238,
'spearman_gamma_distance_p': 0.1511,
'status': 'LOCKED',
'next_step': 'Path A: Baryonic residualization (baseline v1.1)'
}
with open(os.path.join(archive_dir, 'BASELINE_MANIFEST.json'), 'w') as f:
json.dump(manifest, f, indent=2)
print(f"\n🔒 BASELINE V1.0 LOCKED")
print(f" Archive location: {archive_dir}")
print(f" Manifest saved: BASELINE_MANIFEST.json")
print("\n" + "=" * 70)
print("NEXT STEPS (Path A):")
print(" 1. Create baseline v1.1 with proper baryonic residualization")
print(" 2. Use MassModels_Lelli2016c.mrt for M_star, V_max")
print(" 3. Compute Δγ = γ - γ_predicted(M_star, V_max)")
print(" 4. Re-run environment tests on Δγ")
print("=" * 70)
# ============================================================================
# DIAGNOSTIC: Validate GALEX SFR Conversion
# Manually compute SFR for 5 test galaxies
# ============================================================================
import pandas as pd
import numpy as np
# Test galaxies with known expected SFR
test_galaxies = ['NGC4214', 'NGC2403', 'NGC2903', 'NGC5055', 'NGC6503']
# Expected SFR ranges (from literature)
expected_sfr = {
'NGC4214': 0.1, # ~0.1-0.5 M☉/yr
'NGC2403': 1.0, # ~0.8-1.5 M☉/yr
'NGC2903': 2.0, # ~1-3 M☉/yr
'NGC5055': 1.5, # ~1-2 M☉/yr
'NGC6503': 0.5, # ~0.3-0.8 M☉/yr
}
print("=" * 70)
print("DIAGNOSTIC: GALEX SFR CONVERSION VALIDATION")
print("=" * 70)
# Load the GALEX data
df_galex = pd.read_csv("galex_sparc_sfr_corrected.csv")
for galaxy in test_galaxies:
row = df_galex[df_galex['galaxy'] == galaxy]
if len(row) > 0:
fuv_mag = row['fuv_mag'].values[0]
sfr_current = row['sfr_fuv'].values[0]
print(f"\n{galaxy}:")
print(f" FUV magnitude: {fuv_mag}")
print(f" Current SFR: {sfr_current}")
print(f" Expected SFR: ~{expected_sfr.get(galaxy, '?')} M☉/yr")
# Get distance from your data
df_dist = pd.read_csv("sparc_baryonic_matched_v3.csv")
dist_row = df_dist[df_dist['galaxy_x'] == galaxy]
if len(dist_row) > 0:
dist_mpc = dist_row['distance'].values[0] if 'distance' in dist_row.columns else None
print(f" Distance: {dist_mpc} Mpc")
else:
print(f"\n{galaxy}: Not found in GALEX data")
print("\n" + "=" * 70)
print("MANUAL SFR CALCULATION FOR NGC4214 (Example)")
print("=" * 70)
# Manual calculation for NGC4214
fuv_mag = 12.833999633789062 # From your output
dist_mpc = 2.979 # From your data
# Step 1: AB magnitude to flux density (Jy)
# f_nu (Jy) = 10^(-0.4 * (m_AB - 8.90))
# Actually, zero point: m_AB = -2.5 * log10(f_nu) - 48.60
# So: f_nu (erg/s/cm²/Hz) = 10^(-0.4 * (m_AB + 48.60))
f_nu = 10 ** (-0.4 * (fuv_mag + 48.60))
print(f"Flux density (f_nu): {f_nu:.4e} erg/s/cm²/Hz")
# Step 2: Distance in cm
dist_cm = dist_mpc * 3.086e24 # 1 Mpc = 3.086e24 cm
print(f"Distance: {dist_mpc} Mpc = {dist_cm:.2e} cm")
# Step 3: Luminosity L_nu = 4π * D² * f_nu
L_nu = 4 * np.pi * dist_cm**2 * f_nu
print(f"Luminosity (L_nu): {L_nu:.4e} erg/s/Hz")
# Step 4: SFR from FUV (Kennicutt+98)
# SFR (M☉/yr) = 1.4e-28 * L_nu (erg/s/Hz)
sfr_corrected = 1.4e-28 * L_nu
print(f"\nCorrected SFR: {sfr_corrected:.4f} M☉/yr")
print(f"Expected SFR: ~0.1-0.5 M☉/yr")
print(f"\nIs this plausible? {'YES' if 0.05 < sfr_corrected < 2.0 else 'NO'}")
# ============================================================================
# GOOGLE DRIVE CLEANUP HELPER
# Identifies junk files, duplicates, and old backups
# DOES NOT DELETE AUTOMATICALLY - you review first
# ============================================================================
import os
import glob
from datetime import datetime
import pandas as pd
print("=" * 70)
print("GOOGLE DRIVE CLEANUP HELPER")
print("=" * 70)
drive_path = "/content/drive/MyDrive"
if not os.path.exists(drive_path):
from google.colab import drive
drive.mount('/content/drive')
print(f"\nScanning: {drive_path}")
print("This may take a minute...")
# ----------------------------------------------------------------------------
# 1. Find all Colab backup folders (usually the biggest clutter)
# ----------------------------------------------------------------------------
print("\n[1] Colab Auto Backup folders:")
backup_folders = []
for root, dirs, files in os.walk(drive_path):
if 'ColabAutoBackup' in root or 'colab_backup' in root:
# Count files and size (approx)
file_count = len(files)
backup_folders.append({
'path': root,
'file_count': file_count,
'name': os.path.basename(root)
})
if backup_folders:
print(f" Found {len(backup_folders)} backup folders")
for bf in backup_folders[:20]: # Show first 20
print(f" 📁 {bf['name']} ({bf['file_count']} files)")
if len(backup_folders) > 20:
print(f" ... and {len(backup_folders) - 20} more")
else:
print(" No backup folders found")
# ----------------------------------------------------------------------------
# 2. Find duplicate CSV files (likely old versions)
# ----------------------------------------------------------------------------
print("\n[2] Potential duplicate CSV files:")
csv_files = []
for root, dirs, files in os.walk(drive_path):
for f in files:
if f.endswith('.csv') and ('sparc' in f.lower() or 'gamma' in f.lower() or 'galex' in f.lower()):
full_path = os.path.join(root, f)
size = os.path.getsize(full_path) / 1024
csv_files.append({
'name': f,
'size_kb': size,
'path': full_path
})
# Group by name to find duplicates
from collections import defaultdict
name_counts = defaultdict(list)
for cf in csv_files:
name_counts[cf['name']].append(cf)
duplicates = {name: paths for name, paths in name_counts.items() if len(paths) > 1}
if duplicates:
print(f" Found {len(duplicates)} file names with duplicates:")
for name, paths in list(duplicates.items())[:10]:
print(f" 📄 {name} ({len(paths)} copies)")
else:
print(" No obvious duplicates found")
# ----------------------------------------------------------------------------
# 3. Find large unnecessary files (HEPData, old zips, etc.)
# ----------------------------------------------------------------------------
print("\n[3] Potentially unnecessary files:")
unnecessary_patterns = [
'HEPData',
'twompp (1)',
'fcmfd_v1.0.owl',
'archive.zip',
'sfb_LTG.zip',
'BulgeDiskDec_LTG.zip',
'CornerPlot',
'Burkert_chain'
]
unnecessary_files = []
for root, dirs, files in os.walk(drive_path):
for f in files:
for pattern in unnecessary_patterns:
if pattern in f:
full_path = os.path.join(root, f)
size_mb = os.path.getsize(full_path) / 1024 / 1024
unnecessary_files.append({
'name': f,
'size_mb': size_mb,
'path': full_path
})
break
if unnecessary_files:
print(f" Found {len(unnecessary_files)} potentially unnecessary files:")
for uf in unnecessary_files[:15]:
print(f" 📄 {uf['name']} ({uf['size_mb']:.1f} MB)")
if len(unnecessary_files) > 15:
print(f" ... and {len(unnecessary_files) - 15} more")
else:
print(" No unnecessary files found")
# ----------------------------------------------------------------------------
# 4. Find old FRCMFD backup folders (keep only latest)
# ----------------------------------------------------------------------------
print("\n[4] FRCMFD backup folders:")
frcmfd_backups = []
for root, dirs, files in os.walk(drive_path):
if 'FRCMFD_Backup' in root:
# Extract timestamp if present
parts = root.split('_')
timestamp = parts[-1] if len(parts) > 1 else 'unknown'
frcmfd_backups.append({
'path': root,
'timestamp': timestamp,
'name': os.path.basename(root)
})
if frcmfd_backups:
print(f" Found {len(frcmfd_backups)} FRCMFD backup folders")
for fb in sorted(frcmfd_backups, key=lambda x: x['timestamp']):
print(f" 📁 {fb['name']}")
if len(frcmfd_backups) > 1:
print("\n 💡 RECOMMENDATION: Keep only the LATEST backup, delete older ones")
else:
print(" No FRCMFD backup folders found")
# ----------------------------------------------------------------------------
# 5. Summary and recommendations
# ----------------------------------------------------------------------------
print("\n" + "=" * 70)
print("CLEANUP SUMMARY")
print("=" * 70)
total_suggested_deletion_mb = sum(uf['size_mb'] for uf in unnecessary_files)
print(f"\n📊 Suggested cleanup:")
print(f" - Colab backup folders: {len(backup_folders)} folders")
print(f" - Unnecessary files: {len(unnecessary_files)} ({total_suggested_deletion_mb:.0f} MB)")
print(f" - Duplicate CSV files: {len(duplicates)} file names with copies")
print(f" - Old FRCMFD backups: {len(frcmfd_backups)} (keep latest only)")
print("\n" + "=" * 70)
print("HOW TO DELETE FILES")
print("=" * 70)
print("""
Method 1 - Manual (Recommended):
Go to drive.google.com in your browser
Navigate to the folders/files listed above
Delete manually
Method 2 - Using Python (CAREFUL):
import shutil
shutil.rmtree('/path/to/folder') # Delete folder
os.remove('/path/to/file') # Delete file
⚠️ WARNING: Deletion is permanent. Review before deleting.
""")
# ----------------------------------------------------------------------------
# 6. Save report to file
# ----------------------------------------------------------------------------
report_path = "/content/drive/MyDrive/cleanup_report.txt"
with open(report_path, 'w') as f:
f.write("GOOGLE DRIVE CLEANUP REPORT\n")
f.write("=" * 50 + "\n\n")
f.write(f"Backup folders: {len(backup_folders)}\n")
f.write(f"Unnecessary files: {len(unnecessary_files)} ({total_suggested_deletion_mb:.0f} MB)\n")
f.write(f"Duplicate CSVs: {len(duplicates)} names with copies\n")
f.write(f"FRCMFD backups: {len(frcmfd_backups)}\n\n")
f.write("Unnecessary files:\n")
for uf in unnecessary_files:
f.write(f" - {uf['name']} ({uf['size_mb']:.1f} MB)\n")
print(f"\n✅ Cleanup report saved to: {report_path}")
print("\n" + "=" * 70)