# === SECTION 1: SET UP THE PROJECT CODE ===

from pathlib import Path
import os

REPO_URL = 'https://github.com/balintstewart77/camelyon16-pathology.git'
REPO_DIR = Path('/content/camelyon16-pathology')

if not REPO_DIR.exists():
    !git clone {REPO_URL} /content/camelyon16-pathology
else:
    print("Repository already present in runtime.")

os.chdir(REPO_DIR)
print("Current working directory:", os.getcwd())

!apt-get install -y openslide-tools > /dev/null 2>&1
!pip install -q -r requirements.txt

print("Project environment ready.")

Cloning into '/content/camelyon16-pathology'...
remote: Enumerating objects: 748, done.
remote: Counting objects: 100% (64/64), done.
remote: Compressing objects: 100% (45/45), done.
remote: Total 748 (delta 40), reused 38 (delta 19), pack-reused 684 (from 2)
Receiving objects: 100% (748/748), 85.68 MiB | 49.90 MiB/s, done.
Resolving deltas: 100% (486/486), done.
Current working directory: /content/camelyon16-pathology
   ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 4.3/4.3 MB 52.8 MB/s eta 0:00:00
   ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 140.5/140.5 kB 16.6 MB/s eta 0:00:00
   ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 15.0/15.0 MB 127.9 MB/s eta 0:00:00
   ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 86.8/86.8 kB 9.3 MB/s eta 0:00:00
Project environment ready.

# === SECTION 2: MOUNT GOOGLE DRIVE AND LOCATE THE DATASET ===

from pathlib import Path
from google.colab import drive

drive.mount('/content/drive')

# Add the shared Google Drive folder as a shortcut to your Drive:
#   https://drive.google.com/drive/folders/1Ny8zXjBPsrqSFUD61v02EHsjAwgGSY47?usp=drive_link
# Right-click → "Add shortcut to Drive", place it under "My Drive",
# and name it exactly: camelyon16_data

expected_train = 'camelyon16_4class_stain_normalised'
expected_test = 'camelyon16_test_stain_normalised'

candidate_roots = [
    Path('/content/drive/MyDrive/camelyon16_data'),
    Path('/content/drive/MyDrive/new_work/Projects/pathovis_project/data'),
]

DATA_ROOT = None
TRAIN_PATH = None
TEST_PATH = None

for root in candidate_roots:
    train_path = root / expected_train
    test_path = root / expected_test

    if train_path.exists() and test_path.exists():
        DATA_ROOT = root
        TRAIN_PATH = train_path
        TEST_PATH = test_path
        break

if DATA_ROOT is None:
    print("Checked candidate dataset roots:")
    for root in candidate_roots:
        print(f"\n{root}")
        print(f"  exists: {root.exists()}")

        if root.exists():
            print("  contents:")
            for child in sorted(root.iterdir())[:20]:
                print(f"    - {child.name}")

    raise FileNotFoundError(
        "Could not find the CAMELYON16 dataset.\n\n"
        "Please add the shared Google Drive folder as a shortcut to your Drive:\n"
        "  https://drive.google.com/drive/folders/1Ny8zXjBPsrqSFUD61v02EHsjAwgGSY47?usp=drive_link\n\n"
        "Steps:\n"
        "  1. Open the link above\n"
        "  2. Right-click the folder \u2192 'Add shortcut to Drive'\n"
        "  3. Place it directly under 'My Drive'\n"
        "  4. Name it exactly: camelyon16_data\n\n"
        "Expected subfolders:\n"
        f"  - {expected_train}\n"
        f"  - {expected_test}"
    )

print("Dataset paths verified.")
print(f"DATA_ROOT  = {DATA_ROOT}")
print(f"TRAIN_PATH = {TRAIN_PATH}")
print(f"TEST_PATH  = {TEST_PATH}")

Mounted at /content/drive
Dataset paths verified.
DATA_ROOT  = /content/drive/MyDrive/new_work/Projects/pathovis_project/data
TRAIN_PATH = /content/drive/MyDrive/new_work/Projects/pathovis_project/data/camelyon16_4class_stain_normalised
TEST_PATH  = /content/drive/MyDrive/new_work/Projects/pathovis_project/data/camelyon16_test_stain_normalised

import numpy as np
import matplotlib.pyplot as plt
import openslide

np.random.seed(42)

# Project imports
from config import DEFAULT_CONFIG
from src.data import list_s3_files, download_file_from_s3, cleanup_file
from src.data.tissue_mask import get_tissue_mask, compute_foreground_mask
from src.data.tumor_polygons import load_tumor_polygons, classify_patch
from src.data.patch_extraction import (
    sample_grid_coordinates, sample_coordinates_by_class,
    extract_patch, preprocess_patch,
    get_stain_normaliser, normalise_stain
)
from src.visualisation import (
    visualise_tissue_outline, visualise_patches_grid,
    find_zoom_region_by_coords, find_dense_tissue_region
)

# Example slide used throughout the notebook's tumor-slide walkthrough.
# Changing this allows you to explore a different example normal or tumor slide
# Some clear example tumor slides nicely showing all three classes include 5,11,13,16,18,22,24,25,27,29,32,37,38,50,51,52,56,62,73,91,92,93,94,100,106,107,109,110,111
EXAMPLE_TUMOR_SLIDE_ID = 'tumor_016'
EXAMPLE_TUMOR_SLIDE = f'{EXAMPLE_TUMOR_SLIDE_ID}.tif'
EXAMPLE_TUMOR_ANNOTATION = f'{EXAMPLE_TUMOR_SLIDE_ID}.xml'

print("All imports successful!")

All imports successful!

# List available slides from S3
all_slides = list_s3_files(DEFAULT_CONFIG.data.s3_images, '.tif')
normal_slides = sorted([f for f in all_slides if 'normal' in f.lower()])
tumor_slides = sorted([f for f in all_slides if 'tumor' in f.lower()])

print(f"Dataset: {len(normal_slides)} normal slides, {len(tumor_slides)} tumour slides")
print(f"\nExample normal slide: {normal_slides[0]}")
print(f"Example tumour slide: {tumor_slides[0]}")

Dataset: 159 normal slides, 111 tumour slides

Example normal slide: normal_001.tif
Example tumour slide: tumor_001.tif

# Download one tumour slide to explore
slide_name = EXAMPLE_TUMOR_SLIDE
slide_path = download_file_from_s3(
    DEFAULT_CONFIG.data.s3_images, slide_name, '/tmp'
)
slide = openslide.OpenSlide(slide_path)

# Explore the pyramid structure
print(f"Slide: {slide_name}")
print(f"Dimensions (level 0): {slide.dimensions[0]:,} × {slide.dimensions[1]:,} pixels")
print(f"Number of levels: {slide.level_count}")
print(f"\nPyramid levels:")
for i in range(slide.level_count):
    w, h = slide.level_dimensions[i]
    ds = slide.level_downsamples[i]
    print(f"  Level {i}: {w:>7,} × {h:>7,}  (downsample: {ds:.1f}×)")

Downloading tumor_016.tif...
Slide: tumor_016.tif
Dimensions (level 0): 97,792 × 221,184 pixels
Number of levels: 10

Pyramid levels:
  Level 0:  97,792 × 221,184  (downsample: 1.0×)
  Level 1:  48,896 × 110,592  (downsample: 2.0×)
  Level 2:  24,448 ×  55,296  (downsample: 4.0×)
  Level 3:  12,224 ×  27,648  (downsample: 8.0×)
  Level 4:   6,112 ×  13,824  (downsample: 16.0×)
  Level 5:   3,056 ×   6,912  (downsample: 32.0×)
  Level 6:   1,528 ×   3,456  (downsample: 64.0×)
  Level 7:     764 ×   1,728  (downsample: 128.0×)
  Level 8:     382 ×     864  (downsample: 256.0×)
  Level 9:     191 ×     432  (downsample: 512.0×)

# View the whole slide as a thumbnail
thumbnail = slide.get_thumbnail((800, 800))

fig, ax = plt.subplots(figsize=(8, 8))
ax.imshow(thumbnail)
ax.set_title(f'{slide_name}: Thumbnail', fontsize=14)
ax.axis('off')
plt.tight_layout()
plt.show()

print(f"\nThe thumbnail is {thumbnail.size[0]}×{thumbnail.size[1]} pixels.")
print(f"The actual slide is {slide.dimensions[0]:,}×{slide.dimensions[1]:,} pixels.")
print(f"That's a {slide.dimensions[0] // thumbnail.size[0]:,}× reduction!")

The thumbnail is 354×800 pixels.
The actual slide is 97,792×221,184 pixels.
That's a 276× reduction!

# === What compute_foreground_mask does internally ===
from skimage.morphology import remove_small_objects, remove_small_holes
from skimage.segmentation import clear_border

# Step 1: Get a tiny thumbnail (512×512) from the gigapixel image
thumbnail_gray = slide.get_thumbnail((512, 512)).convert("L")
thumbnail_array = np.array(thumbnail_gray)

# Step 2: Simple brightness threshold
# Tissue is darker than the white glass background
threshold = 180  # pixels darker than this are tissue
raw_mask = thumbnail_array < threshold

# Step 3: Morphological cleanup
cleaned = remove_small_objects(raw_mask, min_size=100)   # remove dust specks
cleaned = clear_border(cleaned)                           # remove edge artifacts
cleaned = remove_small_holes(cleaned, area_threshold=100) # fill 'holes' in tissue (likely real tissue)

print(f"Raw mask pixels:     {raw_mask.sum():,}")
print(f"After cleanup:       {cleaned.sum():,}")
print(f"Removed {raw_mask.sum() - cleaned.sum():,} artifact pixels")

fig, axes = plt.subplots(1, 3, figsize=(15, 4))
axes[0].imshow(thumbnail_array, cmap='gray')
axes[0].set_title('Grayscale thumbnail')
axes[1].imshow(raw_mask, cmap='gray')
axes[1].set_title(f'After threshold (<{threshold})')
axes[2].imshow(cleaned, cmap='gray')
axes[2].set_title('After morphological cleanup')
for ax in axes: ax.axis('off')
plt.tight_layout()
plt.show()

Raw mask pixels:     5,436
After cleanup:       5,041
Removed 395 artifact pixels

# Generate tissue mask
mask = compute_foreground_mask(slide)

print(f"Mask shape: {mask.shape}")
print(f"Tissue coverage: {mask.sum() / mask.size:.1%}")

# Visualise: original thumbnail vs. detected tissue
fig, axes = plt.subplots(1, 3, figsize=(18, 6))

# Original
axes[0].imshow(thumbnail)
axes[0].set_title('Original Slide', fontsize=13)
axes[0].axis('off')

# Binary mask
axes[1].imshow(mask, cmap='gray')
axes[1].set_title('Tissue Mask', fontsize=13)
axes[1].axis('off')

# Overlay: tissue outline on slide
axes[2].imshow(thumbnail)
# Resize mask to match thumbnail
from PIL import Image
mask_resized = np.array(
    Image.fromarray(mask.astype(np.uint8) * 255).resize(thumbnail.size, Image.NEAREST)
) > 127
axes[2].contour(mask_resized.astype(float), levels=[0.5], colors='lime', linewidths=1.5)
axes[2].set_title('Tissue Detection Overlay', fontsize=13)
axes[2].axis('off')

plt.tight_layout()
plt.show()

Mask shape: (512, 226)
Tissue coverage: 4.4%

if overlap < zero_tolerance:       label = 1  # Normal tissue (tumour slide)
elif overlap < tumor_threshold:    label = 2  # Boundary tumour
else:                              label = 3  # 'Pure' tumour

# Load tumor annotations
xml_path = download_file_from_s3(
    DEFAULT_CONFIG.data.s3_annotations, EXAMPLE_TUMOR_ANNOTATION, '/tmp'
)
polygons = load_tumor_polygons(xml_path)
print(f"Loaded {len(polygons)} tumor polygons")
for i, p in enumerate(polygons):
    print(f"  Polygon {i}: area = {p.area:,.0f} pixels², "
          f"bounds = {tuple(int(x) for x in p.bounds)}")

Downloading tumor_016.xml...
Loaded 10 tumor polygons
  Polygon 0: area = 43,916,526 pixels², bounds = (37670, 148815, 45746, 156110)
  Polygon 1: area = 86,410 pixels², bounds = (38997, 158449, 39629, 158802)
  Polygon 2: area = 67,783 pixels², bounds = (35418, 160397, 35673, 160736)
  Polygon 3: area = 75,177 pixels², bounds = (30817, 160676, 31188, 161036)
  Polygon 4: area = 76,608 pixels², bounds = (30382, 160876, 30701, 161229)
  Polygon 5: area = 70 pixels², bounds = (35182, 156414, 35198, 156425)
  Polygon 6: area = 265,080 pixels², bounds = (35142, 156425, 35657, 157205)
  Polygon 7: area = 193,961 pixels², bounds = (46975, 160543, 47697, 160991)
  Polygon 8: area = 2,183,626 pixels², bounds = (43413, 159341, 46757, 160954)
  Polygon 9: area = 1 pixels², bounds = (45918, 160917, 45922, 160917)

# Visualise tumor annotations overlaid on the tissue
visualise_tissue_outline(
    slide, mask,
    tumor_polygons=polygons,
    title='Tumor Annotations (red) with Tissue Outline (green)',
    figsize=(10, 10)
)

# === What classify_patch does internally ===
from shapely.geometry import Polygon, box

# Pick an example coordinate near a tumor boundary
example_coords = sample_coordinates_by_class(slide_path, xml_path)

# Show the classification logic for 3 example patches (one per class)
for class_id in [1, 2, 3]:
    coords = example_coords.get(class_id, [])
    if not coords:
        continue
    x, y = coords[0]  # Take first patch of this class

    # Create a square box for the patch
    patch_size = 224
    half = patch_size // 2
    patch_box = box(x - half, y - half, x + half, y + half)
    patch_area = patch_size * patch_size

    # Compute intersection with ALL tumor polygons
    total_overlap = 0.0
    for polygon in polygons:
        if polygon.intersects(patch_box):
            intersection = polygon.intersection(patch_box)
            total_overlap += intersection.area

    overlap_fraction = min(total_overlap / patch_area, 1.0)

    # Apply classification thresholds
    if overlap_fraction < 0.01:
        label_name = "Class 1: Normal (tumor slide)"
    elif overlap_fraction < 0.50:
        label_name = "Class 2: Boundary"
    else:
        label_name = "Class 3: Pure Tumor"

    print(f"  Patch at ({x}, {y}): overlap = {overlap_fraction:.1%} → {label_name}")

  Patch at (54320, 102927): overlap = 0.0% → Class 1: Normal (tumor slide)
  Patch at (30296, 160944): overlap = 0.3% → Class 1: Normal (tumor slide)
  Patch at (40936, 149968): overlap = 100.0% → Class 3: Pure Tumor

# Build a regular patch grid over tissue, then classify each patch.
# This preserves the spatial ordering of the grid for visualisation.
grid_stride = 224
coords = sample_grid_coordinates(slide, mask, patch_size=224, stride=grid_stride)

coords_by_class = {1: [], 2: [], 3: []}
for x, y in coords:
    label = classify_patch(x, y, polygons, patch_size=224)
    coords_by_class[label].append((x, y))

print(f"Regular grid stride: {grid_stride} pixels")
print(f"Total tissue patches on grid: {len(coords):,}")
for class_id, class_coords in sorted(coords_by_class.items()):
    class_names = {1: 'Normal (tumor slide)', 2: 'Boundary', 3: 'Pure Tumor'}
    print(f"  Class {class_id} ({class_names[class_id]}): {len(class_coords):,} patches")

Regular grid stride: 224 pixels
Total tissue patches on grid: 18,775
  Class 1 (Normal (tumor slide)): 17,711 patches
  Class 2 (Boundary): 133 patches
  Class 3 (Pure Tumor): 931 patches

# Visualise the regular patch grid zoomed into the tumor region.
# Center the zoom on pure-tumor patches so the surrounding boundary and normal
# tissue appear in the same ordered grid, as in notebook 02 section 2.3.
zoom_coords = coords_by_class.get(3, []) or (coords_by_class.get(2, []) + coords_by_class.get(1, []))

if zoom_coords:
    zoom_region = find_zoom_region_by_coords(zoom_coords, region_size=10000)
    print(f"Zoom region: {zoom_region}")

    visualise_patches_grid(
        slide,
        coords_by_class,
        zoom_region=zoom_region,
        patch_size=224,
        class_colours={1: 'green', 2: 'orange', 3: 'red'},
        class_labels={
            1: 'Normal',
            2: 'Boundary',
            3: 'Pure Tumor'
        },
        title=f'Zoomed Tumor Region (Grid View) - {slide_name}',
        linewidth=1.5,
        figsize=(14, 12)
    )
else:
    print('No classified patch coordinates were found for visualisation.')

Zoom region: (36884, 147746, 46884, 157746)

# === Cross-Centre Stain Variation: The Case for Normalisation ===
# CAMELYON16 slides tumour_001-060 (Centre A) and tumour_080-111 (Centre B)
# were scanned at different institutions with different staining protocols,
# producing noticeably different colour signatures.

import gc

CENTRE_A_SLIDE = 'tumor_005.tif'
CENTRE_B_SLIDE = 'tumor_091.tif'

# Initialise the same NumPy-based normaliser used elsewhere in the repo
_ref_img = Image.open('assets/reference_patch.png').convert('RGB')
get_stain_normaliser(np.array(_ref_img, dtype=np.uint8))

def _get_tissue_patch(slide_obj, patch_size=224):
    mask = get_tissue_mask(slide_obj)
    coords = sample_grid_coordinates(slide_obj, mask, patch_size=patch_size, stride=patch_size)
    if not coords:
        raise ValueError("No tissue coordinates found.")
    x, y = coords[len(coords) // 2]
    patch = slide_obj.read_region(
        (x - patch_size // 2, y - patch_size // 2),
        0,
        (patch_size, patch_size)
    )
    return np.array(patch.convert('RGB'), dtype=np.uint8)

# Centre A: reuse the already-open slide
print(f'Extracting Centre A patch ({CENTRE_A_SLIDE})...')
_patch_a = _get_tissue_patch(slide, patch_size=DEFAULT_CONFIG.data.patch_size)

# Centre B: download, extract, clean up
print(f'Downloading Centre B slide ({CENTRE_B_SLIDE})...')
_slide_b_path = download_file_from_s3(
    DEFAULT_CONFIG.data.s3_images,
    CENTRE_B_SLIDE,
    DEFAULT_CONFIG.data.temp_dir
)
if not _slide_b_path:
    raise FileNotFoundError(f"Could not download {CENTRE_B_SLIDE}")

_slide_b = openslide.OpenSlide(_slide_b_path)
print('Extracting Centre B patch...')
_patch_b = _get_tissue_patch(_slide_b, patch_size=DEFAULT_CONFIG.data.patch_size)
_slide_b.close()
cleanup_file(_slide_b_path)
del _slide_b, _slide_b_path
gc.collect()
print('Done.')

# Normalise using repo helper
_patch_a_norm = normalise_stain(_patch_a)
_patch_b_norm = normalise_stain(_patch_b)

# Plot
_centre_a_label = CENTRE_A_SLIDE.replace('.tif', '')
_centre_b_label = CENTRE_B_SLIDE.replace('.tif', '')

fig, axes = plt.subplots(2, 2, figsize=(10, 10))

_rows = [
    (_patch_a,      _patch_b,      'Before normalisation'),
    (_patch_a_norm, _patch_b_norm, 'After normalisation'),
]

for row, (pa, pb, row_title) in enumerate(_rows):
    axes[row, 0].imshow(pa)
    axes[row, 0].set_title(f'{row_title} | Centre A ({_centre_a_label})', fontsize=11)
    axes[row, 0].axis('off')

    axes[row, 1].imshow(pb)
    axes[row, 1].set_title(f'Centre B ({_centre_b_label})', fontsize=11)
    axes[row, 1].axis('off')

plt.suptitle(
    'Cross-Centre Stain Variation and Macenko Normalisation\n'
    'Centre A (tumour_001\u2013060) vs Centre B (tumour_080\u2013111)',
    fontsize=13, fontweight='bold'
)
plt.tight_layout()
plt.show()

Stain normaliser initialised with reference image
Extracting Centre A patch (tumor_005.tif)...
Downloading Centre B slide (tumor_091.tif)...
Downloading tumor_091.tif...
Extracting Centre B patch...
Done.

# === DATASET GENERATION (run once, then skip) ===
# This cell is provided for reference. The pre-generated dataset
# is loaded in the next section.

# from src.data.generator import generate_dataset, generate_test_dataset
#
# # Training dataset: ~100K patches per class
# generate_dataset(
#     class_targets={0: 100000, 1: 100000, 2: 100000, 3: 100000},
#     save_path='./data/camelyon16_4class_stain_normalised',
#     stain_normalise=True,
#     reference_image_path='assets/reference_patch.png'
# )
#
# # Test dataset: ~25K patches per class
# generate_test_dataset(
#     class_targets={0: 25000, 1: 25000, 2: 25000, 3: 25000},
#     save_path='./data/camelyon16_test_stain_normalised',
#     stain_normalise=True,
#     reference_image_path='assets/reference_patch.png'
# )

print("Dataset generation code shown above (pre-generated dataset used below)")

Dataset generation code shown above (pre-generated dataset used below)

# Verify the dataset paths defined in the user configuration block above.


class_names = {
    0: 'normal_from_normal',
    1: 'normal_from_tumor',
    2: 'boundary_tumor',
    3: 'pure_tumor'
}

print("=== Training Dataset ===")
total_train = 0
for class_id, name in class_names.items():
    class_dir = Path(TRAIN_PATH) / name
    chunks = list(class_dir.glob('*.npz'))
    if chunks:
        sample = np.load(str(chunks[0]))
        n_per_chunk = len(sample['X'])
        sample.close()
        total = len(chunks) * n_per_chunk
        total_train += total
        print(f"  {name}: {len(chunks)} chunks (~{total:,} patches)")

print(f"  Total: ~{total_train:,} patches")

print("\n=== Test Dataset ===")
total_test = 0
for class_id, name in class_names.items():
    class_dir = Path(TEST_PATH) / name
    chunks = list(class_dir.glob('*.npz'))
    if chunks:
        sample = np.load(str(chunks[0]))
        n_per_chunk = len(sample['X'])
        sample.close()
        total = len(chunks) * n_per_chunk
        total_test += total
        print(f"  {name}: {len(chunks)} chunks (~{total:,} patches)")

print(f"  Total: ~{total_test:,} patches")

=== Training Dataset ===
  normal_from_normal: 74 chunks (~102,416 patches)
  normal_from_tumor: 56 chunks (~100,912 patches)
  boundary_tumor: 70 chunks (~78,190 patches)
  pure_tumor: 58 chunks (~73,312 patches)
  Total: ~354,830 patches

=== Test Dataset ===
  normal_from_normal: 27 chunks (~28,350 patches)
  normal_from_tumor: 20 chunks (~20,440 patches)
  boundary_tumor: 17 chunks (~24,990 patches)
  pure_tumor: 18 chunks (~18,378 patches)
  Total: ~92,158 patches

normal_ds = _create_single_class_dataset(normal_chunks, label=0)
tumor_ds  = _create_single_class_dataset(tumor_chunks,  label=1)

# Half the batch from each class, then concatenate and shuffle positions
dataset = tf.data.Dataset.zip((normal_ds.batch(half_batch), tumor_ds.batch(half_batch)))
          .map(lambda n, t: concat_and_shuffle(n, t))

# Simplified version of my chunk reading logic:

def read_chunk(file_path, label):
    with np.load(file_path, mmap_mode="r") as data:
        X = data['X']                     # Memory-mapped, not loaded yet
        idx = np.random.choice(len(X), max_patches, replace=False)
        patches = X[idx].astype(np.float32)  # Only NOW loaded into RAM

        # Normalise to [0, 1]
        if patches.max() > 1.5:
            patches /= 255.0
        patches = np.clip(patches, 0.0, 1.0)

        labels = np.full(len(patches), label, dtype=np.int32)
        return patches, labels

# tf.data.interleave reads from multiple chunks simultaneously,
# yielding a stream of patches without holding everything in memory:
dataset = file_dataset.interleave(
    read_chunk,
    cycle_length=4,           # Read 4 chunks at once
    num_parallel_calls=2,     # CRITICAL: not AUTOTUNE (causes memory leaks)
    deterministic=False       # Allow out-of-order for speed
)

import tensorflow as tf
from tensorflow import keras
from src.models.architectures import get_model

# Build and inspect the model
model = get_model('subtle')
model.summary()

Model: "subtle_model"

┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┓
┃ Layer (type)                    ┃ Output Shape           ┃       Param # ┃
┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━┩
│ input_layer (InputLayer)        │ (None, 224, 224, 3)    │             0 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ conv2d (Conv2D)                 │ (None, 224, 224, 32)   │           896 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ layer_normalization             │ (None, 224, 224, 32)   │            64 │
│ (LayerNormalization)            │                        │               │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ activation (Activation)         │ (None, 224, 224, 32)   │             0 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ conv2d_1 (Conv2D)               │ (None, 112, 112, 64)   │        18,496 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ layer_normalization_1           │ (None, 112, 112, 64)   │           128 │
│ (LayerNormalization)            │                        │               │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ activation_1 (Activation)       │ (None, 112, 112, 64)   │             0 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ conv2d_2 (Conv2D)               │ (None, 56, 56, 128)    │        73,856 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ layer_normalization_2           │ (None, 56, 56, 128)    │           256 │
│ (LayerNormalization)            │                        │               │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ activation_2 (Activation)       │ (None, 56, 56, 128)    │             0 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ conv2d_3 (Conv2D)               │ (None, 28, 28, 256)    │       295,168 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ layer_normalization_3           │ (None, 28, 28, 256)    │           512 │
│ (LayerNormalization)            │                        │               │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ activation_3 (Activation)       │ (None, 28, 28, 256)    │             0 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ global_average_pooling2d        │ (None, 256)            │             0 │
│ (GlobalAveragePooling2D)        │                        │               │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ dropout (Dropout)               │ (None, 256)            │             0 │
├─────────────────────────────────┼────────────────────────┼───────────────┤
│ dense (Dense)                   │ (None, 1)              │           257 │
└─────────────────────────────────┴────────────────────────┴───────────────┘

 Total params: 389,633 (1.49 MB)

 Trainable params: 389,633 (1.49 MB)

 Non-trainable params: 0 (0.00 B)

# Configure training
from src.models import run_binary_experiment

DEFAULT_CONFIG.training.normalise_patches = False
DEFAULT_CONFIG.training.val_max_samples_per_class = 4000

# Uses TRAIN_PATH defined in Section 7 above
TRAIN_DATASET_PATH = TRAIN_PATH

# ============================================================
# EXPERIMENT 2: Normal vs Pure Tumor (sanity check - should be easy)
# ============================================================
print("=" * 60)
print("EXPERIMENT 2: Normal vs Pure Tumor")
print("=" * 60)

exp2_results = run_binary_experiment(
    dataset_path=TRAIN_DATASET_PATH,
    experiment_type=2,
    model_name='subtle',
    epochs=15,
    learning_rate=1e-5
)

print(f"\nValidation Results:")
print(f"  Accuracy: {exp2_results['results']['accuracy']:.1%}")
print(f"  AUC: {exp2_results['results']['auc']:.3f}")

============================================================
EXPERIMENT 2: Normal vs Pure Tumor
============================================================

============================================================
EXPERIMENT: Normal vs Pure Tumor
============================================================
Model: subtle
Mapping: {0: ['normal_from_normal'], 1: ['pure_tumor']}
✓ No slide leakage (176 train, 78 val slides)
Train: 92 chunks, Val: 40 chunks
  Training chunks: 52 normal, 40 tumor
Loading validation patches with pre-allocation...
  Found 22 normal chunks, 18 tumor chunks
    normal: chunk 5/22, 905 patches loaded
    normal: chunk 10/22, 1810 patches loaded
    normal: chunk 15/22, 2715 patches loaded
    normal: chunk 20/22, 3620 patches loaded
    normal: chunk 22/22, 3982 patches loaded
    tumor: chunk 5/18, 1110 patches loaded
    tumor: chunk 10/18, 2220 patches loaded
    tumor: chunk 15/18, 3330 patches loaded
    tumor: chunk 18/18, 3996 patches loaded
  Loaded: 3982 normal, 3996 tumor patches
  Balanced: 3982 samples per class (7964 total)
  Created cached validation dataset: 7964 samples
Steps: 2300 train, 249 val

Model: 389,633 parameters
Training: 2300 steps/epoch, 15 max epochs
Epoch 1/15
2300/2300 ━━━━━━━━━━━━━━━━━━━━ 0s 296ms/step - accuracy: 0.6509 - auc: 0.7026 - loss: 0.6223
Epoch 1: val_loss improved from None to 0.51084, saving model to ./models/normal_vs_pure_tumor.keras

Epoch 1: finished saving model to ./models/normal_vs_pure_tumor.keras
2300/2300 ━━━━━━━━━━━━━━━━━━━━ 790s 303ms/step - accuracy: 0.7381 - auc: 0.8124 - loss: 0.5306 - val_accuracy: 0.7648 - val_auc: 0.8336 - val_loss: 0.5108 - learning_rate: 1.0000e-05
Epoch 2/15
2300/2300 ━━━━━━━━━━━━━━━━━━━━ 0s 325ms/step - accuracy: 0.8137 - auc: 0.8789 - loss: 0.4378
Epoch 2: val_loss improved from 0.51084 to 0.49906, saving model to ./models/normal_vs_pure_tumor.keras

Epoch 2: finished saving model to ./models/normal_vs_pure_tumor.keras
2300/2300 ━━━━━━━━━━━━━━━━━━━━ 755s 328ms/step - accuracy: 0.8174 - auc: 0.8828 - loss: 0.4322 - val_accuracy: 0.7824 - val_auc: 0.8586 - val_loss: 0.4991 - learning_rate: 1.0000e-05
Epoch 3/15
2300/2300 ━━━━━━━━━━━━━━━━━━━━ 0s 321ms/step - accuracy: 0.8430 - auc: 0.9082 - loss: 0.3841
Epoch 3: val_loss did not improve from 0.49906
2300/2300 ━━━━━━━━━━━━━━━━━━━━ 745s 324ms/step - accuracy: 0.8481 - auc: 0.9131 - loss: 0.3727 - val_accuracy: 0.7364 - val_auc: 0.8247 - val_loss: 0.5194 - learning_rate: 1.0000e-05
Epoch 4/15
2300/2300 ━━━━━━━━━━━━━━━━━━━━ 0s 332ms/step - accuracy: 0.8092 - auc: 0.8807 - loss: 0.4233
Epoch 4: val_loss improved from 0.49906 to 0.43672, saving model to ./models/normal_vs_pure_tumor.keras

Epoch 4: finished saving model to ./models/normal_vs_pure_tumor.keras
2300/2300 ━━━━━━━━━━━━━━━━━━━━ 772s 336ms/step - accuracy: 0.8368 - auc: 0.9060 - loss: 0.3875 - val_accuracy: 0.8167 - val_auc: 0.8838 - val_loss: 0.4367 - learning_rate: 1.0000e-05
Epoch 5/15
2300/2300 ━━━━━━━━━━━━━━━━━━━━ 0s 334ms/step - accuracy: 0.8726 - auc: 0.9361 - loss: 0.3215
Epoch 5: val_loss did not improve from 0.43672
2300/2300 ━━━━━━━━━━━━━━━━━━━━ 777s 338ms/step - accuracy: 0.8630 - auc: 0.9274 - loss: 0.3422 - val_accuracy: 0.8040 - val_auc: 0.8727 - val_loss: 0.4744 - learning_rate: 1.0000e-05
Epoch 6/15
2300/2300 ━━━━━━━━━━━━━━━━━━━━ 0s 331ms/step - accuracy: 0.8375 - auc: 0.9022 - loss: 0.3916
Epoch 6: ReduceLROnPlateau reducing learning rate to 4.999999873689376e-06.

Epoch 6: val_loss did not improve from 0.43672
2300/2300 ━━━━━━━━━━━━━━━━━━━━ 768s 334ms/step - accuracy: 0.8498 - auc: 0.9199 - loss: 0.3604 - val_accuracy: 0.7816 - val_auc: 0.8639 - val_loss: 0.5366 - learning_rate: 1.0000e-05
Epoch 7/15
2300/2300 ━━━━━━━━━━━━━━━━━━━━ 0s 340ms/step - accuracy: 0.8516 - auc: 0.9154 - loss: 0.3715
Epoch 7: val_loss improved from 0.43672 to 0.43398, saving model to ./models/normal_vs_pure_tumor.keras

Epoch 7: finished saving model to ./models/normal_vs_pure_tumor.keras
2300/2300 ━━━━━━━━━━━━━━━━━━━━ 789s 343ms/step - accuracy: 0.8485 - auc: 0.9157 - loss: 0.3712 - val_accuracy: 0.8212 - val_auc: 0.8859 - val_loss: 0.4340 - learning_rate: 5.0000e-06
Epoch 8/15
2300/2300 ━━━━━━━━━━━━━━━━━━━━ 0s 337ms/step - accuracy: 0.8699 - auc: 0.9338 - loss: 0.3242
Epoch 8: val_loss improved from 0.43398 to 0.41535, saving model to ./models/normal_vs_pure_tumor.keras

Epoch 8: finished saving model to ./models/normal_vs_pure_tumor.keras
2300/2300 ━━━━━━━━━━━━━━━━━━━━ 784s 341ms/step - accuracy: 0.8516 - auc: 0.9187 - loss: 0.3626 - val_accuracy: 0.8250 - val_auc: 0.8937 - val_loss: 0.4154 - learning_rate: 5.0000e-06
Epoch 9/15
2300/2300 ━━━━━━━━━━━━━━━━━━━━ 0s 324ms/step - accuracy: 0.8770 - auc: 0.9400 - loss: 0.3111
Epoch 9: val_loss did not improve from 0.41535
2300/2300 ━━━━━━━━━━━━━━━━━━━━ 753s 328ms/step - accuracy: 0.8628 - auc: 0.9296 - loss: 0.3390 - val_accuracy: 0.8196 - val_auc: 0.8919 - val_loss: 0.4226 - learning_rate: 5.0000e-06
Epoch 10/15
2300/2300 ━━━━━━━━━━━━━━━━━━━━ 0s 346ms/step - accuracy: 0.8499 - auc: 0.9158 - loss: 0.3664
Epoch 10: ReduceLROnPlateau reducing learning rate to 2.499999936844688e-06.

Epoch 10: val_loss did not improve from 0.41535
2300/2300 ━━━━━━━━━━━━━━━━━━━━ 803s 349ms/step - accuracy: 0.8608 - auc: 0.9258 - loss: 0.3455 - val_accuracy: 0.8157 - val_auc: 0.8911 - val_loss: 0.4372 - learning_rate: 5.0000e-06
Epoch 11/15
2300/2300 ━━━━━━━━━━━━━━━━━━━━ 0s 336ms/step - accuracy: 0.8627 - auc: 0.9287 - loss: 0.3405
Epoch 11: val_loss did not improve from 0.41535
2300/2300 ━━━━━━━━━━━━━━━━━━━━ 781s 340ms/step - accuracy: 0.8568 - auc: 0.9245 - loss: 0.3500 - val_accuracy: 0.8236 - val_auc: 0.8948 - val_loss: 0.4185 - learning_rate: 2.5000e-06
Epoch 11: early stopping
Restoring model weights from the end of the best epoch: 8.

==================================================
Evaluating: Normal vs Pure Tumor
==================================================
Optimal threshold: 0.488
Samples: 7,964
Accuracy: 82.6%
AUC: 0.894

Classification Report:
              precision    recall  f1-score   support

      Normal       0.85      0.79      0.82      3982
       Tumor       0.81      0.86      0.83      3982

    accuracy                           0.83      7964
   macro avg       0.83      0.83      0.83      7964
weighted avg       0.83      0.83      0.83      7964

Saved model metadata to models/normal_vs_pure_tumor.json

Validation Results:
  Accuracy: 82.6%
  AUC: 0.894

# ============================================================
# EXPERIMENT 5: Normal vs Boundary (harder)
# ============================================================

# Clear stale TF/GPU state from Experiment 2
keras.backend.clear_session()
import src.dataset.tf_pipeline as tfp
tfp._batch_shuffle_gen = tf.random.Generator.from_seed(42)
gc.collect()

print("=" * 60)
print("EXPERIMENT 5: Normal vs Boundary Tumor")
print("=" * 60)

# Seeded for reproducibility. See notebook 06 for multi-seed variance analysis.
keras.utils.set_random_seed(42)

exp5_results = run_binary_experiment(
    dataset_path=TRAIN_DATASET_PATH,
    experiment_type=5,
    model_name='subtle',
    epochs=15,
    learning_rate=1e-5
)

print(f"\nValidation Results:")
print(f"  Accuracy: {exp5_results['results']['accuracy']:.1%}")
print(f"  AUC: {exp5_results['results']['auc']:.3f}")

============================================================
EXPERIMENT 5: Normal vs Boundary Tumor
============================================================

============================================================
EXPERIMENT: Normal vs Boundary
============================================================
Model: subtle
Mapping: {0: ['normal_from_normal'], 1: ['boundary_tumor']}
✓ No slide leakage (178 train, 76 val slides)
Train: 100 chunks, Val: 44 chunks
  Training chunks: 51 normal, 49 tumor
Loading validation patches with pre-allocation...
  Found 23 normal chunks, 21 tumor chunks
    normal: chunk 5/23, 865 patches loaded
    normal: chunk 10/23, 1730 patches loaded
    normal: chunk 15/23, 2595 patches loaded
    normal: chunk 20/23, 3460 patches loaded
    normal: chunk 23/23, 3979 patches loaded
    tumor: chunk 5/21, 950 patches loaded
    tumor: chunk 10/21, 1900 patches loaded
    tumor: chunk 15/21, 2850 patches loaded
    tumor: chunk 20/21, 3800 patches loaded
    tumor: chunk 21/21, 3990 patches loaded
  Loaded: 3979 normal, 3990 tumor patches
  Balanced: 3979 samples per class (7958 total)
  Created cached validation dataset: 7958 samples
Steps: 2500 train, 249 val

Model: 389,633 parameters
Training: 2500 steps/epoch, 15 max epochs
Epoch 1/15
2500/2500 ━━━━━━━━━━━━━━━━━━━━ 0s 251ms/step - accuracy: 0.5814 - auc: 0.6107 - loss: 0.6857
Epoch 1: val_loss improved from None to 0.66377, saving model to ./models/normal_vs_boundary.keras

Epoch 1: finished saving model to ./models/normal_vs_boundary.keras
2500/2500 ━━━━━━━━━━━━━━━━━━━━ 721s 257ms/step - accuracy: 0.6204 - auc: 0.6687 - loss: 0.6487 - val_accuracy: 0.6152 - val_auc: 0.6722 - val_loss: 0.6638 - learning_rate: 1.0000e-05
Epoch 2/15
2500/2500 ━━━━━━━━━━━━━━━━━━━━ 0s 317ms/step - accuracy: 0.6468 - auc: 0.6989 - loss: 0.6318
Epoch 2: val_loss did not improve from 0.66377
2500/2500 ━━━━━━━━━━━━━━━━━━━━ 801s 320ms/step - accuracy: 0.6588 - auc: 0.7164 - loss: 0.6208 - val_accuracy: 0.6195 - val_auc: 0.6484 - val_loss: 0.6702 - learning_rate: 1.0000e-05
Epoch 3/15
2500/2500 ━━━━━━━━━━━━━━━━━━━━ 0s 303ms/step - accuracy: 0.6588 - auc: 0.7165 - loss: 0.6190
Epoch 3: val_loss improved from 0.66377 to 0.64735, saving model to ./models/normal_vs_boundary.keras

Epoch 3: finished saving model to ./models/normal_vs_boundary.keras
2500/2500 ━━━━━━━━━━━━━━━━━━━━ 765s 306ms/step - accuracy: 0.6758 - auc: 0.7396 - loss: 0.6023 - val_accuracy: 0.6524 - val_auc: 0.6941 - val_loss: 0.6474 - learning_rate: 1.0000e-05
Epoch 4/15
2500/2500 ━━━━━━━━━━━━━━━━━━━━ 0s 317ms/step - accuracy: 0.6884 - auc: 0.7567 - loss: 0.5861
Epoch 4: val_loss did not improve from 0.64735
2500/2500 ━━━━━━━━━━━━━━━━━━━━ 801s 320ms/step - accuracy: 0.6900 - auc: 0.7592 - loss: 0.5847 - val_accuracy: 0.6082 - val_auc: 0.6958 - val_loss: 0.6810 - learning_rate: 1.0000e-05
Epoch 5/15
2500/2500 ━━━━━━━━━━━━━━━━━━━━ 0s 309ms/step - accuracy: 0.6672 - auc: 0.7294 - loss: 0.6093
Epoch 5: ReduceLROnPlateau reducing learning rate to 4.999999873689376e-06.

Epoch 5: val_loss did not improve from 0.64735
2500/2500 ━━━━━━━━━━━━━━━━━━━━ 780s 312ms/step - accuracy: 0.6952 - auc: 0.7659 - loss: 0.5795 - val_accuracy: 0.6444 - val_auc: 0.6928 - val_loss: 0.7585 - learning_rate: 1.0000e-05
Epoch 6/15
2500/2500 ━━━━━━━━━━━━━━━━━━━━ 0s 322ms/step - accuracy: 0.6821 - auc: 0.7441 - loss: 0.6075
Epoch 6: val_loss did not improve from 0.64735
2500/2500 ━━━━━━━━━━━━━━━━━━━━ 813s 325ms/step - accuracy: 0.6950 - auc: 0.7604 - loss: 0.5893 - val_accuracy: 0.5714 - val_auc: 0.5933 - val_loss: 0.8190 - learning_rate: 5.0000e-06
Epoch 6: early stopping
Restoring model weights from the end of the best epoch: 3.

==================================================
Evaluating: Normal vs Boundary
==================================================
Optimal threshold: 0.465
Samples: 7,958
Accuracy: 65.6%
AUC: 0.694

Classification Report:
              precision    recall  f1-score   support

      Normal       0.67      0.62      0.64      3979
       Tumor       0.65      0.69      0.67      3979

    accuracy                           0.66      7958
   macro avg       0.66      0.66      0.66      7958
weighted avg       0.66      0.66      0.66      7958

Saved model metadata to models/normal_vs_boundary.json

Validation Results:
  Accuracy: 65.6%
  AUC: 0.694

# ============================================================
# EXPERIMENT 3: Slide Context Detection (field cancerisation hypothesis)
# ============================================================

# Clear stale TF/GPU state from Experiment 5
keras.backend.clear_session()
import src.dataset.tf_pipeline as tfp
tfp._batch_shuffle_gen = tf.random.Generator.from_seed(42)
gc.collect()

print("=" * 60)
print("EXPERIMENT 3: Slide Context Detection")
print("=" * 60)

exp3_results = run_binary_experiment(
    dataset_path=TRAIN_DATASET_PATH,
    experiment_type=3,
    model_name='subtle',
    epochs=15,
    learning_rate=1e-5
)

print(f"\nValidation Results:")
print(f"  Accuracy: {exp3_results['results']['accuracy']:.1%}")
print(f"  AUC: {exp3_results['results']['auc']:.3f}")

============================================================
EXPERIMENT 3: Slide Context Detection
============================================================

============================================================
EXPERIMENT: Slide Context Detection
============================================================
Model: subtle
Mapping: {0: ['normal_from_normal'], 1: ['normal_from_tumor']}
✓ No slide leakage (180 train, 78 val slides)
Train: 91 chunks, Val: 39 chunks
  Training chunks: 52 normal, 39 tumor
Loading validation patches with pre-allocation...
  Found 22 normal chunks, 17 tumor chunks
    normal: chunk 5/22, 905 patches loaded
    normal: chunk 10/22, 1810 patches loaded
    normal: chunk 15/22, 2715 patches loaded
    normal: chunk 20/22, 3620 patches loaded
    normal: chunk 22/22, 3982 patches loaded
    tumor: chunk 5/17, 1175 patches loaded
    tumor: chunk 10/17, 2350 patches loaded
    tumor: chunk 15/17, 3525 patches loaded
    tumor: chunk 17/17, 3995 patches loaded
  Loaded: 3982 normal, 3995 tumor patches
  Balanced: 3982 samples per class (7964 total)
  Created cached validation dataset: 7964 samples
Steps: 2275 train, 249 val

Model: 389,633 parameters
Training: 2275 steps/epoch, 15 max epochs
Epoch 1/15
2275/2275 ━━━━━━━━━━━━━━━━━━━━ 0s 300ms/step - accuracy: 0.5170 - auc: 0.5206 - loss: 0.7200
Epoch 1: val_loss improved from None to 0.69804, saving model to ./models/slide_context_detection.keras

Epoch 1: finished saving model to ./models/slide_context_detection.keras
2275/2275 ━━━━━━━━━━━━━━━━━━━━ 781s 306ms/step - accuracy: 0.5271 - auc: 0.5339 - loss: 0.6994 - val_accuracy: 0.4928 - val_auc: 0.4857 - val_loss: 0.6980 - learning_rate: 1.0000e-05
Epoch 2/15
2275/2275 ━━━━━━━━━━━━━━━━━━━━ 0s 333ms/step - accuracy: 0.5386 - auc: 0.5535 - loss: 0.6883
Epoch 2: val_loss improved from 0.69804 to 0.69431, saving model to ./models/slide_context_detection.keras

Epoch 2: finished saving model to ./models/slide_context_detection.keras
2275/2275 ━━━━━━━━━━━━━━━━━━━━ 766s 337ms/step - accuracy: 0.5466 - auc: 0.5654 - loss: 0.6864 - val_accuracy: 0.4969 - val_auc: 0.4986 - val_loss: 0.6943 - learning_rate: 1.0000e-05
Epoch 3/15
2275/2275 ━━━━━━━━━━━━━━━━━━━━ 0s 340ms/step - accuracy: 0.5440 - auc: 0.5646 - loss: 0.6837
Epoch 3: val_loss did not improve from 0.69431
2275/2275 ━━━━━━━━━━━━━━━━━━━━ 781s 343ms/step - accuracy: 0.5586 - auc: 0.5863 - loss: 0.6805 - val_accuracy: 0.4996 - val_auc: 0.4936 - val_loss: 0.7383 - learning_rate: 1.0000e-05
Epoch 4/15
2275/2275 ━━━━━━━━━━━━━━━━━━━━ 0s 331ms/step - accuracy: 0.5590 - auc: 0.5968 - loss: 0.6730
Epoch 4: ReduceLROnPlateau reducing learning rate to 4.999999873689376e-06.

Epoch 4: val_loss did not improve from 0.69431
2275/2275 ━━━━━━━━━━━━━━━━━━━━ 760s 334ms/step - accuracy: 0.5612 - auc: 0.5953 - loss: 0.6756 - val_accuracy: 0.5437 - val_auc: 0.5626 - val_loss: 0.7012 - learning_rate: 1.0000e-05
Epoch 5/15
2275/2275 ━━━━━━━━━━━━━━━━━━━━ 0s 327ms/step - accuracy: 0.5210 - auc: 0.5418 - loss: 0.6895
Epoch 5: val_loss did not improve from 0.69431
2275/2275 ━━━━━━━━━━━━━━━━━━━━ 752s 331ms/step - accuracy: 0.5242 - auc: 0.5458 - loss: 0.6877 - val_accuracy: 0.5200 - val_auc: 0.5215 - val_loss: 0.6954 - learning_rate: 5.0000e-06
Epoch 5: early stopping
Restoring model weights from the end of the best epoch: 2.

==================================================
Evaluating: Slide Context Detection
==================================================
Optimal threshold: 0.493
Samples: 7,964
Accuracy: 50.9%
AUC: 0.499

Classification Report:
              precision    recall  f1-score   support

      Normal       0.51      0.33      0.40      3982
       Tumor       0.51      0.69      0.58      3982

    accuracy                           0.51      7964
   macro avg       0.51      0.51      0.49      7964
weighted avg       0.51      0.51      0.49      7964

Saved model metadata to models/slide_context_detection.json

Validation Results:
  Accuracy: 50.9%
  AUC: 0.499

fpr, tpr, thresholds = roc_curve(y_true, y_prob)
j_scores = tpr - fpr
best_threshold = thresholds[np.argmax(j_scores)]

# Test set evaluation
from src.models import evaluate_on_test_set, load_model_metadata

# Uses TEST_PATH defined in Section 7 above

# Load models and metadata
experiments_to_eval = {
    'exp2': {
        'name': 'Normal vs Pure Tumor',
        'model_path': './models/normal_vs_pure_tumor.keras',
        'mapping': {0: ['normal_from_normal'], 1: ['pure_tumor']},
        'results': exp2_results
    },
    'exp5': {
        'name': 'Normal vs Boundary',
        'model_path': './models/normal_vs_boundary.keras',
        'mapping': {0: ['normal_from_normal'], 1: ['boundary_tumor']},
        'results': exp5_results
    },
    'exp3': {
        'name': 'Slide Context Detection',
        'model_path': './models/slide_context_detection.keras',
        'mapping': {0: ['normal_from_normal'], 1: ['normal_from_tumor']},
        'results': exp3_results
    }
}

test_results = {}
for key, exp in experiments_to_eval.items():
    print(f"\n{'='*60}")
    print(f"TEST: {exp['name']}")
    print(f"{'='*60}")

    model = keras.models.load_model(exp['model_path'])
    meta = load_model_metadata(exp['model_path'])
    normalise = meta.get('normalise_patches', False)

    result = evaluate_on_test_set(
        model, TEST_PATH, exp['mapping'], key,
        threshold=meta['threshold'],
        normalise=normalise
    )
    test_results[key] = result

    print(f"Val AUC:  {exp['results']['results']['auc']:.3f}")
    print(f"Test AUC: {result['auc']:.3f}")
    print(f"Gap:      {exp['results']['results']['auc'] - result['auc']:.3f}")
    print(result['report'])

============================================================
TEST: Normal vs Pure Tumor
============================================================
Processing 45 chunks...
  Processed 10/45 chunks
  Processed 20/45 chunks
  Processed 30/45 chunks
  Processed 40/45 chunks
Processed 49,775 samples total
Val AUC:  0.894
Test AUC: 0.881
Gap:      0.013
              precision    recall  f1-score   support

      Normal       0.86      0.80      0.83     27650
       Tumor       0.77      0.83      0.80     22125

    accuracy                           0.81     49775
   macro avg       0.81      0.82      0.81     49775
weighted avg       0.82      0.81      0.81     49775


============================================================
TEST: Normal vs Boundary
============================================================
Processing 44 chunks...
  Processed 10/44 chunks
  Processed 20/44 chunks
  Processed 30/44 chunks
  Processed 40/44 chunks
Processed 50,876 samples total
Val AUC:  0.694
Test AUC: 0.689
Gap:      0.005
              precision    recall  f1-score   support

      Normal       0.70      0.49      0.58     27650
       Tumor       0.55      0.75      0.64     23226

    accuracy                           0.61     50876
   macro avg       0.63      0.62      0.61     50876
weighted avg       0.63      0.61      0.60     50876


============================================================
TEST: Slide Context Detection
============================================================
Processing 47 chunks...
  Processed 10/47 chunks
  Processed 20/47 chunks
  Processed 30/47 chunks
  Processed 40/47 chunks
Processed 48,090 samples total
Val AUC:  0.499
Test AUC: 0.454
Gap:      0.045
              precision    recall  f1-score   support

      Normal       0.48      0.18      0.26     27650
       Tumor       0.40      0.73      0.52     20440

    accuracy                           0.42     48090
   macro avg       0.44      0.46      0.39     48090
weighted avg       0.44      0.42      0.37     48090

# Summary comparison chart
exp_names = ['Exp 2: Normal vs\nPure Tumor', 'Exp 5: Normal vs\nBoundary', 'Exp 3: Slide\nContext']
exp_keys = ['exp2', 'exp5', 'exp3']

val_aucs = [experiments_to_eval[k]['results']['results']['auc'] for k in exp_keys]
test_aucs = [test_results[k]['auc'] for k in exp_keys]

fig, axes = plt.subplots(1, 2, figsize=(14, 5))
x = np.arange(len(exp_names))
width = 0.35

# AUC comparison
bars1 = axes[0].bar(x - width/2, val_aucs, width, label='Validation', color='steelblue')
bars2 = axes[0].bar(x + width/2, test_aucs, width, label='Test', color='darkorange')
axes[0].set_ylabel('AUC', fontsize=12)
axes[0].set_title('Validation vs Test AUC', fontsize=13, fontweight='bold')
axes[0].set_xticks(x)
axes[0].set_xticklabels(exp_names, fontsize=10)
axes[0].legend(fontsize=11)
axes[0].set_ylim(0.4, 1.0)
axes[0].axhline(y=0.5, color='gray', linestyle='--', alpha=0.5, label='Random chance')
for i, (v, t) in enumerate(zip(val_aucs, test_aucs)):
    axes[0].text(i - width/2, v + 0.02, f'{v:.3f}', ha='center', fontsize=9, fontweight='bold')
    axes[0].text(i + width/2, t + 0.02, f'{t:.3f}', ha='center', fontsize=9, fontweight='bold')

# Val-Test gap
gaps = [v - t for v, t in zip(val_aucs, test_aucs)]
colours = ['forestgreen' if g < 0.05 else 'darkorange' if g < 0.1 else 'firebrick' for g in gaps]
axes[1].bar(x, gaps, color=colours, width=0.5)
axes[1].set_ylabel('AUC Gap (Val - Test)', fontsize=12)
axes[1].set_title('Generalisation Gap', fontsize=13, fontweight='bold')
axes[1].set_xticks(x)
axes[1].set_xticklabels(exp_names, fontsize=10)
axes[1].axhline(y=0, color='black', linewidth=0.5)
for i, g in enumerate(gaps):
    axes[1].text(i, g + 0.005, f'{g:.3f}', ha='center', fontsize=10, fontweight='bold')

plt.tight_layout()
plt.show()

Step	Operation	What it removes
Threshold `< 180`	-	Selects all dark pixels
`remove_small_objects()`	`min_size=100`	Dust specks, fibres, staining dots
`clear_border()`	-	Any blob touching the image edge (scanner margins are often dark)
`remove_small_holes()`	`area_threshold=100`	Small voids within tissue blobs caused by pale staining
Border zeroing	`border_margin=5`	Residual edge artefacts that survive `clear_border()`

Class	Name	Description
0	`normal_from_normal`	Normal tissue from a slide with no tumour at all
1	`normal_from_tumor`	Normal-looking tissue on a slide that also contains tumour
2	`boundary_tumor`	Tissue at the tumour margin (partial overlap with annotations)
3	`pure_tumor`	Tissue fully within annotated tumour regions

Stride	Overlap	Relative patch count
224 px (= patch size)	None (clean tiling)	1×
112 px	50% overlap	4×
56 px	75% overlap	16×

Class	Default stride	Rationale
Normal (class 1)	224 px	Abundant everywhere; no overlap needed
Pure tumour (class 3)	112 px	Moderate density for spatial coverage
Boundary (class 2)	56 px	Dense sampling to capture the thin, rare tumour margin

Experiment	Negative class	Positive class
1: Normal vs Any Tumour	`normal_from_normal`	classes 1, 2, 3
2: Normal vs Pure Tumour	`normal_from_normal`	`pure_tumor`
3: Slide Context Detection	`normal_from_normal`	`normal_from_tumor`
4: Normal vs Actual Tumour	`normal_from_normal`	classes 2, 3
5: Normal vs Boundary	`normal_from_normal`	`boundary_tumor`

Experiment	Task	Val AUC	Test AUC	Test Accuracy
Exp 2	Normal vs Pure Tumour	0.870	0.838	78.3%
Exp 5	Normal vs Boundary	0.696	0.672	65%
Exp 3	Slide Context Detection	0.627	0.494	48.8%

Architecture	Val AUC	Test AUC	Trainable Params
`subtle` (custom CNN)	0.585	0.543	~390K
`attention` (spatial attention)	0.610	0.467	~390K
`transfer` (frozen MobileNetV2)	0.504	-	~1.3K
`transfer_finetune` (fine-tuned MobileNetV2)	0.585	-	~700K

Split	Tumour slides	Normal slides	Total
Train	110	160	270
Test	49	80	129

Automated Tumour Detection in Whole Slide Images: An End-to-End Deep Learning Pipeline¶

Table of Contents¶

1. The Problem: Pathologist Shortages and the Promise of Automation

2. The CAMELYON16 Challenge

3. Running the code

4. Understanding Whole Slide Images

5. Step 1: Finding the Tissue

Design Decision: Why Work at Thumbnail Resolution?¶

Design Decision: Why So Many Cleanup Steps?¶

Under the Hood: Tissue Masking¶

6. Step 2: Parsing Tumour Annotations and the Four-Class Labelling Scheme

Design Decision: Labels come from tumour annotation geometry rather than slide identity¶

Design Decision: Repairing Invalid Polygon Geometry¶

The Four-Class Scheme¶

Classifying Patches by Tumour Overlap¶

Design Decision: Grid Sampling Using a Different Stride Per Class¶

Design Decision: Class-Specific Sampling Densities¶

Under the Hood: Patch Classification with Shapely¶

7. Step 3: Building the Training Dataset

Design Decision: Generate Four Classes, Collapse to Binary at Training Time¶

Design Decision: Chunk by Slide, Verify Leakage Explicitly¶

Design Decision: Stain Normalisation¶

Under the Hood: Macenko Stain Normalisation¶

8. Step 4: The Training Pipeline

Design Decision: Strictly Class-Balanced Batches¶

Design Decision: Validation Is Engineered for Stability and Correctness¶

Memory Management: The Key Engineering Challenge¶

Preventing Slide Leakage¶

Under the Hood: Streaming Chunks with tf.data¶

9. Step 5: Model Architecture

Design Decision: Start Simple, Then Go Subtle¶

Design Decision: Global Average Pooling Instead of Flattening¶

10. Step 6: Training

11. Step 7: Test Set Evaluation

Design Decision: Pick the Decision Threshold from Validation Data¶

12. Results

Interpretation¶

A Note on Run-to-Run Variation¶

13. Investigating the Field Cancerisation Hypothesis

What Would It Take?¶

14. Lessons Learned

Technical Lessons¶

Scientific Lessons¶

15. Conclusion

Under the Hood: Streaming Chunks with `tf.data`¶