Experimenting with Fastai and BW data augmentation.
Runpod setup
# setting up pod and pip install uhina# accessing a pod terminal# 1. To get access to the pod ip adress: runpodctl get pod -a# 2. ssh into the pod: ssh root@<ip-address> -p 58871 -i ~/.ssh/id_ed25519# runpodctl send uhina # git clone https://github.com/franckalbinet/uhina.git# pip install uhina# runpodctl send im-bw # runpodctl send ossl-tfm.csv
from sklearn.metrics import r2_scorer2_score(val_preds_tta, val_targets_tta)
0.7900635997635996
# EXAMPLE of TTA on single item# from fastai.vision.all import *# # Define your TTA transforms# tta_tfms = [# RandomResizedCrop(224, min_scale=0.5),# Flip(),# Rotate(degrees=(-15, 15)),# Brightness(max_lighting=0.2),# Contrast(max_lighting=0.2)# ]# # Create a pipeline of TTA transformations# tta_pipeline = Pipeline(tta_tfms)# # Load your model# learn = load_learner('path/to/your/model.pkl')# # Define the input data (e.g., an image)# input_data = PILImage.create('path/to/your/image.jpg')# # Apply TTA transforms to the input data and make predictions# predictions = []# for _ in range(5): # Apply 5 different augmentations# augmented_data = tta_pipeline(input_data)# prediction = learn.predict(augmented_data)# predictions.append(prediction)# # Average the predictions# average_prediction = sum(predictions) / len(predictions)# print(average_prediction)
# Assuming you have a new CSV file for your test data# test_source = '../../_data/ossl-tfm/ossl-tfm-test.csv'# test_df = pd.read_csv(test_source)# # Create a new DataLoader for the test data# test_dl = learn.dls.test_dl(test_df)# # Get predictions on the test set# test_preds, test_targets = learn.get_preds(dl=test_dl)# # Now you can use test_preds and test_targets for further analysis
# Convert predictions and targets to numpy arraysdef assess_model(val_preds, val_targets): val_preds = val_preds.numpy().flatten() val_targets = val_targets.numpy()# Create a DataFrame with the results results_df = pd.DataFrame({'Predicted': val_preds,'Actual': val_targets })# Display the first few rows of the resultsprint(results_df.head())# Calculate and print the R2 scorefrom sklearn.metrics import r2_score r2 = r2_score(val_targets, val_preds)print(f"R2 Score on validation set: {r2:.4f}")
val_preds_np = val_predsval_targets_np = val_targets# Apply the transformation: exp(y) - 1val_preds_transformed = np.exp(val_preds_np) -1val_targets_transformed = np.exp(val_targets_np) -1# Create a DataFrame with the resultsresults_df = pd.DataFrame({'Predicted': val_preds_transformed,'Actual': val_targets_transformed})# Display the first few rows of the resultsprint(results_df.head())# Calculate and print the R2 scorefrom sklearn.metrics import r2_scorer2 = r2_score(val_targets_transformed, val_preds_transformed)print(f"R2 Score on validation set (after transformation): {r2:.4f}")# Calculate and print the MAPE, handling zero valuesdef mean_absolute_percentage_error(y_true, y_pred): non_zero = (y_true !=0)return np.mean(np.abs((y_true[non_zero] - y_pred[non_zero]) / y_true[non_zero])) *100mape = mean_absolute_percentage_error(val_targets_transformed, val_preds_transformed)print(f"Mean Absolute Percentage Error (MAPE) on validation set: {mape:.2f}%")# Calculate and print the MAE as an alternative metricfrom sklearn.metrics import mean_absolute_errormae = mean_absolute_error(val_targets_transformed, val_preds_transformed)print(f"Mean Absolute Error (MAE) on validation set: {mae:.4f}")
Predicted Actual
0 0.366814 0.00000
1 0.135405 0.20317
2 0.441560 0.21434
3 0.270092 0.30000
4 0.496277 0.42732
R2 Score on validation set (after transformation): 0.6936
Mean Absolute Percentage Error (MAPE) on validation set: 50.72%
Mean Absolute Error (MAE) on validation set: 0.1956
plt.figure(figsize=(6, 6))# Use logarithmic bins for the colormaph = plt.hexbin(val_targets, val_preds, gridsize=65, bins='log', cmap='Spectral_r', mincnt=1, alpha=0.9)# Get the actual min and max counts from the hexbin datacounts = h.get_array()min_count = counts[counts >0].min() # Minimum non-zero countmax_count = counts.max()# Create a logarithmic colorbarcb = plt.colorbar(h, label='Count in bin', shrink=0.73)tick_locations = np.logspace(np.log10(min_count), np.log10(max_count), 5)cb.set_ticks(tick_locations)cb.set_ticklabels([f'{int(x)}'for x in tick_locations])# Add the diagonal linemin_val =min(val_targets.min(), val_preds.min())max_val =max(val_targets.max(), val_preds.max())plt.plot([min_val, max_val], [min_val, max_val], 'k--', lw=1)# Set labels and titleplt.xlabel('Actual Values')plt.ylabel('Predicted Values')plt.title('Predicted vs Actual Values (Hexbin with Log Scale)')# Add grid linesplt.grid(True, linestyle='--', alpha=0.65)# Set the same limits for both axesplt.xlim(min_val, max_val)plt.ylim(min_val, max_val)# Make the plot squareplt.gca().set_aspect('equal', adjustable='box')plt.tight_layout()plt.show()# Print the range of counts in the hexbinsprint(f"Min non-zero count in hexbins: {min_count}")print(f"Max count in hexbins: {max_count}")
Min non-zero count in hexbins: 1.0
Max count in hexbins: 157.0