Cluster Analysis and Quantification of Segmented Volumes from IMARIS 3D Segmentation Data

gene_x 0 like s 540 view s

Tags: scripts

  1. % the goal of this script is to determine cluster information starting from
  2. % the results extracted from IMARIS 3D segmentation
  3. clear all
  4. clast_par=1.3;
  5. % filename = '20241107 1457 18h_0002-1.tif DAPI.xls';
  6. % filename = '202406 1457dsbp pRB473_1.xls';
  7. % filename = '1457 1h 2.xls';
  8. %filename = '20231214 1457dsbp pRB473 sbp alfa 508_003-1.xls';
  9. filename = '20240321 1457dsbppRB473sbp ALFA508 0 1.xls';
  10. %filename = '202406 1457dsbp pRB473_1.xls';
  11. %The only two input necessary are the name of the file to be analized i.e file name and parameter that control how far away two
  12. % segmented volumes should be. clast_par < 1 means the distance between two segmented volume should be inferior to the sum of their mean radius
  13. % clast_par =1 the distance between two segmented volumes should be equal to the sum of their mean radius
  14. % clast_par >1 the distance between two segmented volumes should be bigger than the sum of their mean radius
  15. % the data to be quantified is stored in an excel multipage file
  16. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% segmented volumes radius determination
  17. %read_volume=xlsread(filename,32);% opens page where the volumes are listed and extracts the volume information for each cluster
  18. read_volume=readtable(filename, 'Sheet', 32);
  19. siz_point=size(read_volume); %it extracts the information about how many volumes were quantified
  20. radius_nn(1:siz_point(1))=0; %it creates a vector that will contain the estimated radius of all the detected volumes
  21. %radius_nn=1000*( read_volume(:,1)*3/(4*pi)).^(1/3);
  22. radius_nn = 1000 * (read_volume.Volume * 3 / (4 * pi)).^(1 / 3);
  23. % the radius is calculated approximating the quantified volumes with be the volumes of a sphere
  24. Mean_radius_nm = mean(radius_nn) % displays the mean radius of the segmented volumes
  25. %please note that the radius of the segmented volumes is a good
  26. %approximation of the full width at half maximum of the fluorescence object
  27. Variance_radius_n = std(radius_nn) % displays the variance of the radius of the segmented volumes
  28. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
  29. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% determination of
  30. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% the distance between the segmented volumes and sorting them between volumes belonging to the same cluster and volumes not belonging to the same cluster.
  31. % center_of_mass=xlsread(filename,5);% open page where the coordinate of the center of mass of the segmented volumes are listed
  32. %
  33. % total_num=size(center_of_mass); % again number of detected spots just to check if it match with the previous values (not necessary)
  34. % total_resolved_spots = total_num(1) % displays the total number of volumes quantified
  35. % %center_of_mass=xlsread(filename,5); % open the page of the file where the values of the centre of mass of the detected volumes are stored and write them into a matrix
  36. % distances(1:siz_point(1),1:siz_point(1))=0;% produce the matrix were the distances between detected volumes will be stored
  37. % cluster(1:siz_point(1),1:siz_point(1))=0;%produce a matrix where will be stored the information wether two segmented volumes are close together (distance fulfil cluster parameter) cluster (i,j)=1
  38. % %or not cluster(i,j)=0;
  39. %
  40. % %%calculates the matrix of the distance between segmented volumes i and j as the distance between their centre of mass
  41. % %the factor 1000 converts the distance from micronmeter to nanometer
  42. % for i=1:siz_point(1)
  43. % for j=1:siz_point(1)
  44. %
  45. % distances(i,j)= sqrt( (center_of_mass(i,1)-center_of_mass(j,1))^2 + (center_of_mass(i,2)-center_of_mass(j,2))^2 + (center_of_mass(i,3)-center_of_mass(j,3))^2 )*1000;
  46. % end
  47. % end
  48. %
  49. % %Here is determined if two volumes are clustering together i.e. if their distance <= the sum of their radius multiplied for clust_par defined above, or not
  50. % for i=1:siz_point(1)
  51. % for j=1:siz_point(1)
  52. %
  53. % if i==j
  54. % cluster(i,j)=0;
  55. % else
  56. % if distances(i,j)<=clast_par*(radius_nn(i)+radius_nn(j))
  57. % cluster(i,j)=1;
  58. % end
  59. % end
  60. %
  61. % end
  62. % end
  63. % cluster3=cluster; %%%here the cluster matrix is doubled for keep avaliable its information at the end of the script
  64. % Read the data from Sheet 5 (Center of Homogeneous Mass)
  65. center_of_mass = readtable(filename, 'Sheet', 5);
  66. % Display the modified variable names (these are the names used by MATLAB)
  67. disp('Modified Variable Names:');
  68. disp(center_of_mass.Properties.VariableNames);
  69. % Display the original column headers (as stored in the Excel file)
  70. disp('Original Column Names:');
  71. disp(center_of_mass.Properties.VariableDescriptions);
  72. % Define the starting row (based on the repeating headers)
  73. %start_row = 3; % Adjust this according to your actual data structure
  74. % Read the table, starting from the correct row
  75. %center_of_mass = readtable(filename, 'Sheet', 5, 'Range', sprintf('A%d', start_row));
  76. % Optional: Remove rows that are still unwanted (if any)
  77. % For example, if there are rows with unwanted headers in the middle of the data:
  78. %center_of_mass = center_of_mass(~ismember(center_of_mass{:, 1}, {'CenterofHomogeneousMass'}), :);
  79. %center_of_mass = center_of_mass(~ismember(center_of_mass{:, 1}, {'CenterofHomogeneousMassX'}), :);
  80. % Use the modified column names to extract the data
  81. x_coords = center_of_mass{:, 'CenterOfHomogeneousMass'}; % Extract X coordinates
  82. y_coords = center_of_mass{:, 'Var2'}; % Adjust as needed for Y coordinates
  83. z_coords = center_of_mass{:, 'Var3'}; % Adjust as needed for Z coordinates
  84. % Determine the number of detected spots (volumes)
  85. siz_point = size(center_of_mass); % Size of the table, number of rows (spots)
  86. total_resolved_spots = siz_point(1); % Total number of volumes quantified
  87. disp(['Total number of volumes quantified: ', num2str(total_resolved_spots)])
  88. % Create a matrix to store the distances between detected volumes
  89. disp('Initializing distance matrix...');
  90. distances = zeros(total_resolved_spots, total_resolved_spots); % Initialize distance matrix
  91. % Create a matrix to store cluster information (1 for close volumes, 0 for far volumes)
  92. disp('Initializing cluster matrix...');
  93. cluster = zeros(total_resolved_spots, total_resolved_spots); % Initialize cluster matrix
  94. % Loop to calculate the distance matrix between detected volumes
  95. % The factor 1000 converts the distance from micrometers to nanometers
  96. disp('Calculating distance matrix...');
  97. for i = 1:total_resolved_spots
  98. for j = 1:total_resolved_spots
  99. distances(i,j) = sqrt( (x_coords(i) - x_coords(j))^2 + (y_coords(i) - y_coords(j))^2 + (z_coords(i) - z_coords(j))^2 ) * 1000; % Calculate distance in nm
  100. end
  101. end
  102. % Loop to calculate whether two volumes belong to the same cluster
  103. % Check if distance between volumes is less than or equal to the sum of their radii
  104. disp('Determining cluster assignments...');
  105. % ERROR_NAME_SHOULD_BE_clast_par. clust_par = 1.5; % Define your clustering parameter (this is an example, adjust as necessary)
  106. for i = 1:total_resolved_spots
  107. for j = 1:total_resolved_spots
  108. if i == j
  109. cluster(i,j) = 0; % No clustering with itself
  110. else
  111. if distances(i,j) <= clast_par * (radius_nn(i) + radius_nn(j)) % Check if the distance is less than the sum of radii
  112. cluster(i,j) = 1; % Assign to the same cluster
  113. end
  114. end
  115. end
  116. end
  117. cluster3 = cluster; % Store the cluster matrix for further use
  118. %disp('Cluster Matrix (1 indicates volumes in the same cluster, 0 otherwise):');
  119. %disp(cluster3);
  120. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
  121. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
  122. % determination of the number of clusters the cluster size i.e. how many
  123. % volumes are contained into one cluster and other cluster information
  124. numclast=0;
  125. numclast2(1:siz_point(1),1)=1;
  126. numclast3(1:siz_point(1),1:siz_point(1))=0;
  127. num=0; % initial number of clusters
  128. cluster_record(1:siz_point(1),1:siz_point(1))=0;% matrix where the label of the cluster members should be stored i.e the label
  129. %of the volumes of cluster members. Please note that the column number of cluster_record matrix is also the label of the first volume find for a given
  130. %cluster if this column is not made only by zero values
  131. % searching and isolating clusters
  132. for i=1:siz_point(1)
  133. count=0; % how many volumes are clustering with an individual volume
  134. count3=0; % how many volumes are belonging to the same cluster
  135. if sum( cluster(i,:))>=1
  136. num=num+1;
  137. % ind= sum( cluster(i,:));
  138. for j=1:siz_point(1)
  139. if cluster(i,j)==1
  140. count=count+1;
  141. count3=count3+1;
  142. cluster(i,j)=0;% every time if a point on the cluster matrix is found to belong to a specific cluster it is set to
  143. %zero in order to not choose this point twice. At the end of the cluster search the cluster matrix should be a full zeros matrix
  144. cluster(j,i)=0;% everytime if a point on the cluster matrix is found to belong to a specific cluster is set to
  145. %zero in order to the choose this point twice at the end the cluster matrix should be a full zeros matrix
  146. vect(count)=j;% here are listed the label of the volumes clastering with the volume under analysis
  147. cluster_record(count3,i)=j;
  148. end
  149. end
  150. end
  151. while count > 0;%%%% in this recursive loop are searched the volumes that were clustering with the first one and in case
  152. %they are found clustering with other volumes these are stored and this cycle is repeated until when no more clustering volume are found
  153. count2=count;
  154. count=0;
  155. vect2=vect;% saving the information contained in vect (is a variable size vector so overwriting is not to suggest)
  156. clear vect % delete this variable in a way it can be defined newly in the following part of the loop
  157. for ss=1:count2
  158. for j=1:siz_point(1)
  159. if cluster(vect2(ss),j)==1
  160. count=count+1;
  161. cluster(vect2(ss),j)=0;
  162. cluster(j,vect2(ss))=0;
  163. vect(count)=j;
  164. ll=0; %%ll=0 a volume is for the first time found to be member of a given cluster; ll=1 the volume was already found to be part of the cluster
  165. % and it will be not counted twice
  166. for ms=1:count3
  167. if cluster_record(ms,i)==j
  168. ll=1;
  169. end
  170. end
  171. if ll==0
  172. count3=count3+1;
  173. cluster_record(count3,i)=j;
  174. end
  175. ll=0;
  176. end
  177. end
  178. end
  179. clear vect2 % this variable vector will be redefined at the beginning of the loop
  180. count;
  181. end
  182. if num > 0 % if a cluster is found
  183. if count3>0
  184. numb(num)=count3+1;% 1 comes from the fact that a cluster is always found starting from a volume that is not included
  185. %in the counting routine
  186. end
  187. end
  188. end
  189. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
  190. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
  191. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%dysplaying the information
  192. sit=size(numb);
  193. number_of_cluster=sit(2)% the number of columns of the variable numb gives the total number of clusters
  194. total_number_of_spot_in_cluster= sum(numb)% the total number of volumes belonging to the one of the clusters (it must be inferior to the total number of volumes quantified!!!)
  195. average_number_of_spot_in_cluster= mean(numb)% mean number of volumes belonging to a cluster
  196. dispersion_number_of_spot_in_cluster= std(numb)% variance of the number of volumes belonging to a cluster
  197. frequent_number_of_spot_in_cluster= median(numb)% median of the number of volumes belonging to a cluster
  198. indd = 0;
  199. % Preallocate dist and disti arrays with a sensible size limit
  200. max_cluster_size = siz_point(1); % Max number of volumes per cluster, assuming worst case
  201. % Preallocate clust_info for efficiency
  202. clust_info = zeros(siz_point(1), 8); % Preallocate space for cluster info
  203. for l = 1:siz_point(1)
  204. % Display progress
  205. fprintf('Processing cluster %d of %d\n', l, siz_point(1));
  206. % Initialize variables
  207. cluster_member = [];
  208. if sum(cluster_record(:, l)) >= 1
  209. indd = indd + 1;
  210. cluster_member = [l; cluster_record(cluster_record(:, l) >= 1, l)]; % All labels in the cluster
  211. end
  212. if ~isempty(cluster_member)
  213. got = numel(cluster_member); % Number of volumes in the cluster
  214. rel = 0; % To keep track of valid distances
  215. xx = 0; yy = 0; zz = 0;
  216. % Sum x, y, z positions (in nm, factor 1000)
  217. cluster_coords = [center_of_mass{cluster_member, 'CenterOfHomogeneousMass'}, ...
  218. center_of_mass{cluster_member, 'Var2'}, ...
  219. center_of_mass{cluster_member, 'Var3'}] * 1000; % Coordinates in nm
  220. xx = sum(cluster_coords(:, 1));
  221. yy = sum(cluster_coords(:, 2));
  222. zz = sum(cluster_coords(:, 3));
  223. % Vectorized distance calculation between all points in the cluster
  224. % We calculate the pairwise distance matrix for all points in the cluster
  225. dist_matrix = pdist2(cluster_coords, cluster_coords); % pdist2 is highly optimized
  226. % Extract non-zero distances and store them in disti
  227. disti = dist_matrix(dist_matrix > 0); % Convert to vector and remove zeros
  228. % Collect statistical information
  229. clust_info(indd, 1) = numb(indd); % Number of volumes in the cluster
  230. clust_info(indd, 2) = 0.5 * mean(disti); % Mean distance between volumes
  231. clust_info(indd, 3) = std(disti / 2); % Estimate of variance
  232. clust_info(indd, 4) = max(disti); % Max distance
  233. clust_info(indd, 5) = min(disti); % Min distance
  234. clust_info(indd, 6) = xx / got; % X coordinate of the cluster center of mass
  235. clust_info(indd, 7) = yy / got; % Y coordinate of the cluster center of mass
  236. clust_info(indd, 8) = zz / got; % Z coordinate of the cluster center of mass
  237. % Clear temporary variables for next iteration
  238. clear dist_matrix disti cluster_coords
  239. end
  240. end
  241. % --------- Preparing the final results and save them in the Excel files ---------
  242. %%% the labels of cluster members are saved in a two column matrix the
  243. %%% second column contains the label of the first member of the cluster and
  244. %%% it repeats its value until when the same cluster is under consideration.
  245. %%% The first column displays the label of the other volumes belonging to
  246. %%% the clusters
  247. a=0;
  248. for i=1:siz_point(1)
  249. if sum(cluster_record(:,i))>=1
  250. for j=1:siz_point(1)
  251. if cluster_record(j,i)>=1
  252. a=a+1;
  253. position(a,1)=cluster_record(j,i);
  254. position(a,2)=i;
  255. end
  256. end
  257. end
  258. end
  259. position2= position-1; % since the labelling in Imaris starts from number 0 and not from one in order to find the same cluster in Imaris to the
  260. % to the label values must be substructed 1
  261. %
  262. %%%%%%%% printing in the output file the data quantified till now
  263. %%%%%%
  264. names={'number of particles', 'mean distance', 'variance of distance', 'max distance', 'min distance', 'center_x', 'center_y', 'center_z' };
  265. names2={'Mean_radius_nm', 'Variance_radius_n ', 'Total_resolved_spots' };
  266. values=[Mean_radius_nm , Variance_radius_n total_resolved_spots];
  267. % xlswrite([num2str(clast_par) '_output_' filename '.xlsx'],names2,1,'A1')
  268. % xlswrite([num2str(clast_par) '_output_' filename '.xlsx'],values,1,'A2')
  269. % xlswrite([num2str(clast_par) '_output_' filename '.xlsx'],clust_info,2,'A2')
  270. % xlswrite([num2str(clast_par) '_output_' filename '.xlsx'],names,2,'A1')
  271. % xlswrite([num2str(clast_par) '_output_' filename '.xlsx'],position2,3,'A1')
  272. % Write 'names2' and 'values' to the first sheet
  273. %DEBUG: writetable(table(values), [num2str(clast_par) '_output_' filename '.xlsx'], 'Sheet', 1, 'WriteRowNames', false, 'WriteVariableNames', false);
  274. %DEBUG: writetable(table(names2), [num2str(clast_par) '_output_' filename '.xlsx'], 'Sheet', 1, 'WriteRowNames', false);
  275. % Create a table where the first row is the header and the second is the data
  276. output_table = [names2; num2cell(values)];
  277. disp(output_table);
  278. % Write the table to the Excel file, starting at cell A1
  279. writetable(cell2table(output_table), [num2str(clast_par) '_output_' filename '.xlsx'], 'Sheet', 1, 'WriteRowNames', false, 'WriteVariableNames', false);
  280. % % Saving as an older .xls format (Excel 97-2003 format)
  281. % writetable(cell2table(output_table), [num2str(clast_par) '_output_' filename '.xls'], 'Sheet', 1, 'WriteRowNames', false, 'WriteVariableNames', false);
  282. % Save as CSV file
  283. writetable(cell2table(output_table), [num2str(clast_par) '_output_' filename '_Sheet1.csv'], 'WriteRowNames', false, 'WriteVariableNames', false);
  284. % Convert 'clust_info' into a table if it's not one already and write to the second sheet
  285. clust_info_table = array2table(clust_info, 'VariableNames', names); % Convert 'clust_info' to table with proper column names
  286. writetable(clust_info_table, [num2str(clast_par) '_output_' filename '.xlsx'], 'Sheet', 2, 'WriteRowNames', false);
  287. % Assuming position2 is a matrix that needs to be written to the third sheet
  288. writetable(array2table(position2), [num2str(clast_par) '_output_' filename '.xlsx'], 'Sheet', 3, 'WriteRowNames', false, 'WriteVariableNames', false);
  289. %%%% quantification of the distance between different clusters
  290. %%%%% the distance between clusters is defined to be the distance between their centre of
  291. %%%%% mass
  292. dist_clust(1:sit(2),1:sit(2))=0;
  293. cc1=0;
  294. for i=1:sit(2)
  295. for j=1:sit(2)
  296. if i==j
  297. dist_clust(i,j)=100000;% the fact that the distance of one cluster respect itself is zero would make more complicate
  298. %to determine the minimum distance between a cluster and it neighbours
  299. else
  300. cc1=cc1+1;
  301. dist_clust(i,j)=sqrt( (clust_info(i,6)-clust_info(j,6))^2 + (clust_info(i,7)-clust_info(j,7))^2 + (clust_info(i,8)-clust_info(j,8))^2 );
  302. dist_clust2(cc1)=dist_clust(i,j);
  303. end
  304. end
  305. end
  306. mindist_cluster=min(dist_clust);% the minimum distance between one cluster and the other ones
  307. mean_dist_cluster=mean(mindist_cluster);% the mean distance between closer clusters
  308. %%% exporting the latest information
  309. % xlswrite([num2str(clast_par) '_output_' filename '.xlsx'], dist_clust,4,'A1')
  310. % xlswrite([num2str(clast_par) '_output_' filename '.xlsx'],mean_dist_cluster,5,'A1')
  311. % xlswrite([num2str(clast_par) '_output_' filename '.xlsx'],mindist_cluster',5,'B1')
  312. % names3={'number_of_cluster', 'total_number_of_spot_in_cluster ', 'average_number_of_spot_in_cluster', 'dispersion_number_of_spot_in_cluster','frequent_number_of_spot_in_cluster' }
  313. % values3=[number_of_cluster total_number_of_spot_in_cluster average_number_of_spot_in_cluster dispersion_number_of_spot_in_cluster frequent_number_of_spot_in_cluster];
  314. % values3=values3';
  315. % xlswrite([num2str(clast_par) '_output_' filename '.xlsx'],names3,6,'A1')
  316. % xlswrite([num2str(clast_par) '_output_' filename '.xlsx'],values3',6,'A2')
  317. % Exporting the latest information
  318. writetable(table(dist_clust), [num2str(clast_par) '_output_' filename '.xlsx'], 'Sheet', 4, 'WriteRowNames', true);
  319. writetable(table(mean_dist_cluster), [num2str(clast_par) '_output_' filename '.xlsx'], 'Sheet', 5, 'WriteRowNames', true);
  320. writetable(table(mindist_cluster'), [num2str(clast_par) '_output_' filename '.xlsx'], 'Sheet', 5, 'WriteRowNames', true, 'WriteVariableNames', false);
  321. % Save other information about clusters
  322. names3 = {'number_of_cluster', 'total_number_of_spot_in_cluster', 'average_number_of_spot_in_cluster', ...
  323. 'dispersion_number_of_spot_in_cluster', 'frequent_number_of_spot_in_cluster'};
  324. values3 = [number_of_cluster, total_number_of_spot_in_cluster, average_number_of_spot_in_cluster, ...
  325. dispersion_number_of_spot_in_cluster, frequent_number_of_spot_in_cluster];
  326. %DEBUG: writetable(table(values3), [num2str(clast_par) '_output_' filename '.xlsx'], 'Sheet', 6, 'WriteRowNames', true, 'WriteVariableNames', false);
  327. %DEBUG: writetable(table(names3), [num2str(clast_par) '_output_' filename '.xlsx'], 'Sheet', 6, 'WriteRowNames', true);
  328. % Create a table where the first row is the header and the second row is the data
  329. output_table3 = [names3; num2cell(values3)];
  330. disp(output_table3);
  331. % Write the combined table to the Excel file, starting from cell A1 of Sheet 6
  332. writetable(cell2table(output_table3), [num2str(clast_par) '_output_' filename '.xlsx'], 'Sheet', 6, 'WriteRowNames', false, 'WriteVariableNames', false);
  333. % % Saving as an older .xls format (Excel 97-2003 format)
  334. % writetable(cell2table(output_table3), [num2str(clast_par) '_output_' filename '.xls'], 'Sheet', 6, 'WriteRowNames', false, 'WriteVariableNames', false);
  335. % Save as CSV file
  336. writetable(cell2table(output_table3), [num2str(clast_par) '_output_' filename '_Sheet6.csv'], 'WriteRowNames', false, 'WriteVariableNames', false);

like unlike

点赞本文的读者

还没有人对此文章表态


本文有评论

没有评论

看文章,发评论,不要沉默


© 2023 XGenes.com Impressum