diff --git a/src/graph.py b/src/graph.py index f6c091921e52911459f9a1bee085f94a034793b6..ffff0593bf161cf927460e106a039610b6234f5f 100644 --- a/src/graph.py +++ b/src/graph.py @@ -147,7 +147,7 @@ class Graph(): def get_specification_graph(self, dirc = 'graphs', filename = "specification_graph", render_graphs = True): generate_graph(self.get_output_dir()/ dirc /filename, self.full_dico, render_graphs = render_graphs) - generate_graph(self.get_output_dir()/ dirc /(filename+"_without_artificial_nodes"), remove_artificial_nodes(self.full_dico), render_graphs = render_graphs) + #generate_graph(self.get_output_dir()/ dirc /(filename+"_without_artificial_nodes"), remove_artificial_nodes(self.full_dico), render_graphs = render_graphs) def get_specification_graph_wo_labels(self, filename = "specification_graph_wo_labels", render_graphs = True): generate_graph(self.get_output_dir()/'graphs'/filename, self.full_dico, label_edge=False, label_node=False, render_graphs = render_graphs) @@ -434,23 +434,25 @@ class Graph(): self.initialise_flattened_dico(self.full_dico) dico = remove_artificial_nodes(self.dico_flattened) - self.user_view, self.new_nodes_user_view = relev_user_view_builder(dico, relevant_modules=relevant_processes) + self.user_view, self.new_nodes_user_view = relev_user_view_builder(dico, relevant_modules=relevant_processes, show_operations = False) with open(self.get_output_dir()/ "graphs/user_view.json", 'w') as output_file : json.dump(self.user_view, output_file, indent=4) - user_view_with_subworkflows = add_subworkflows_2_dico(self.dico_process_dependency_graph, self.user_view) - with open(self.get_output_dir()/ "graphs/user_view_with_subworkflows.json", 'w') as output_file : - json.dump(user_view_with_subworkflows, output_file, indent=4) + #user_view_with_subworkflows = add_subworkflows_2_dico(self.dico_process_dependency_graph, self.user_view) + #with open(self.get_output_dir()/ "graphs/user_view_with_subworkflows.json", 'w') as output_file : + # json.dump(user_view_with_subworkflows, output_file, indent=4) - return self.user_view, user_view_with_subworkflows + #return self.user_view, user_view_with_subworkflows + return self.user_view def generate_user_view(self, relevant_processes = [], render_graphs = True): - user_view, user_view_with_subworkflows = self.get_user_view_graph(relevant_processes = relevant_processes) - self.user_view_with_subworkflows = user_view_with_subworkflows + #user_view, user_view_with_subworkflows = self.get_user_view_graph(relevant_processes = relevant_processes) + user_view = self.get_user_view_graph(relevant_processes = relevant_processes) + #self.user_view_with_subworkflows = user_view_with_subworkflows generate_graph(self.get_output_dir()/'graphs'/"user_view", user_view, label_edge=True, label_node=True, render_graphs = render_graphs, root = False, relevant_nodes = copy.deepcopy(relevant_processes)) - generate_graph(self.get_output_dir()/'graphs'/"user_view_with_subworkflows", user_view_with_subworkflows, label_edge=True, label_node=True, render_graphs = render_graphs, root = False, relevant_nodes = copy.deepcopy(relevant_processes)) + #generate_graph(self.get_output_dir()/'graphs'/"user_view_with_subworkflows", user_view_with_subworkflows, label_edge=True, label_node=True, render_graphs = render_graphs, root = False, relevant_nodes = copy.deepcopy(relevant_processes)) #This method returns the list of the clusters in topological order diff --git a/src/outils_graph.py b/src/outils_graph.py index 6a243f70456a8d53aca8d100b1ca9a7664e055b9..08b71a5dfb5578e6e38e131a550c054471706ca8 100644 --- a/src/outils_graph.py +++ b/src/outils_graph.py @@ -835,7 +835,7 @@ def get_color_node(node, new_nodes): prop = 256- int(127*len(node)/max) return rgb_to_hex(prop, prop, prop) -def relev_user_view_builder(dico_param, relevant_modules): +def relev_user_view_builder(dico_param, relevant_modules, show_operations = True): import time dico = copy.deepcopy(dico_param) tag = str(time.time()) @@ -964,14 +964,27 @@ def relev_user_view_builder(dico_param, relevant_modules): for i in range(len(new_nodes)): new_nodes[i].sort() new_name = get_name_new_node(get_names_tab(dico, new_nodes[i]), relevant_modules) - node = {"id": '_$$_'.join(new_nodes[i]).replace('<', '').replace('>', ''), - "name": new_name.split(tag)[0], - "shape": "ellipse", - "xlabel": f"{len(new_nodes[i])}", - "fillcolor": get_color_node(new_nodes[i], new_nodes)} + name_printed = new_name.split(tag)[0] + shape = "ellipse" + if(show_operations): + if(name_printed==""): + shape = "point" + node = {"id": '_$$_'.join(new_nodes[i]).replace('<', '').replace('>', ''), + "name": name_printed, + "shape": shape, + "xlabel": f"{len(new_nodes[i])}", + "fillcolor": get_color_node(new_nodes[i], new_nodes)} + else: + if(name_printed!=""): + node = {"id": '_$$_'.join(new_nodes[i]).replace('<', '').replace('>', ''), + "name": name_printed, + "shape": shape, + "xlabel": f"{len(new_nodes[i])}", + "fillcolor": get_color_node(new_nodes[i], new_nodes)} + #If relevant module -> color it differently if(new_name in relevant_modules): - node["color"] = "yellow" + node["color"] = "#006903" new_dico["nodes"].append(node) added_edges = [] for edge in dico["edges"]: @@ -984,12 +997,13 @@ def relev_user_view_builder(dico_param, relevant_modules): if(edge["A"].replace('<', '').replace('>', '') in nA["id"] and edge["B"].replace('<', '').replace('>', '') in nB["id"] and edge_string not in added_edges):#So we don't have dupliacte edges - new_dico["edges"].append({ - "A": nA["id"], - "B": nB["id"], - "label": "" - }) - added_edges.append(edge_string) + if(show_operations or ((not show_operations) and nA["id"]!=nB["id"])): + new_dico["edges"].append({ + "A": nA["id"], + "B": nB["id"], + "label": "" + }) + added_edges.append(edge_string) #The output nodes are the nodes which their outputs aren't connected to anything else #TODO -> remove these comments if you want to root the graph diff --git a/src/process.py b/src/process.py index 6beb2873a99935e78961b729bbe0a62da8eef90b..e887430525b9685dce550c02149d1753ca9d5dea 100644 --- a/src/process.py +++ b/src/process.py @@ -582,6 +582,16 @@ class Process(Nextflow_Building_Blocks): def get_tools(self, extract_general_tools = False): return extract_tools(self.get_script_code(), extract_general_tools = extract_general_tools) + + def get_all_conditions(self): + if(self.nextflow_file.get_DSL()=="DSL2"): + if(len(self.called_by)!=1): + raise Exception("This shouldn't happen") + return self.called_by[0].get_all_conditions() + else: + conditions = {} + return self.origin.get_all_conditions(conditions) + diff --git a/src/workflow.py b/src/workflow.py index 790c0c9adbb5ddbe888c0c053748a16205467d21..371af87930f3bd56ba0e0c32a6c7ecce988586c3 100644 --- a/src/workflow.py +++ b/src/workflow.py @@ -686,10 +686,11 @@ George Marchment, Bryan Brancotte, Marie Schmit, Frédéric Lemoine, Sarah Cohen print('.', end='') processes_2_tools[p.get_code()] = p.get_tools() print("\n") - print("Testing different combinations") - print('-'*number_of_tries+">") + #print("Testing different combinations") + #print('-'*number_of_tries+">") for i in range(number_of_tries): - print('.', end='') + #print('.', end='') + print(i/number_of_tries*100) w = copy.deepcopy(w_save) if(process_pre_selection == "bioinfo"): @@ -709,20 +710,81 @@ George Marchment, Bryan Brancotte, Marie Schmit, Frédéric Lemoine, Sarah Cohen return min_processes already_tried.append(set(random_relevant_processes)) - def get_score_from_set_relevant_processes(random_relevant_processes): - _, cluster_organisation = w.convert_workflow_2_user_view(relevant_processes=random_relevant_processes, render_graphs = False) + #Here the nb of conditions returned is the number of conditions in the clusters after the rewrite + def get_nb_conditions_in_clusters(clusters): + nb_conditions_in_clusters = [] + for cluster in clusters: + all_conditions_cluster = [] + for c in cluster: + conditions_for_element = c.get_all_conditions() + if(len(conditions_for_element)==0): + all_conditions_cluster.append("no value") + else: + for condition in conditions_for_element: + all_conditions_cluster.append(condition.get_value()) + + all_conditions_cluster = list(set(all_conditions_cluster)) + + if(len(all_conditions_cluster)==1): + nb_conditions_in_clusters.append(0) + else: + try: + all_conditions_cluster.remove("no value") + except: + None + nb_conditions_in_clusters.append(len(all_conditions_cluster)) + return nb_conditions_in_clusters + + + def get_score_from_set_relevant_processes(w_save, random_relevant_processes): + #w = copy.deepcopy(w_save) + #_, cluster_organisation = w.convert_workflow_2_user_view(relevant_processes=random_relevant_processes, render_graphs = False) + #print(random_relevant_processes) + + #tab_nb_executors_per_cluster_1, tab_nb_processes_per_cluster_1, tab_nb_conditions_per_cluster_1 = [], [], [] + #for c in cluster_organisation: + # tab_nb_executors_per_cluster_1.append(cluster_organisation[c]["nb_executors"]) + # tab_nb_processes_per_cluster_1.append(cluster_organisation[c]["nb_processes"]) + # tab_nb_conditions_per_cluster_1.append(cluster_organisation[c]["nb_conditions"]) - tab_nb_executors_per_cluster, tab_nb_processes_per_cluster, tab_nb_conditions_per_cluster = [], [], [] - for c in cluster_organisation: - tab_nb_executors_per_cluster.append(cluster_organisation[c]["nb_executors"]) - tab_nb_processes_per_cluster.append(cluster_organisation[c]["nb_processes"]) - tab_nb_conditions_per_cluster.append(cluster_organisation[c]["nb_conditions"]) + w = copy.deepcopy(w_save) + w.generate_user_view(relevant_processes = random_relevant_processes, render_graphs=False) + clusters = w.graph.get_clusters_from_user_view() + cluster_with_processes = [] + for cluster in clusters: + there_is_a_process = False + for ele in cluster: + if(ele.get_type() == "Process"): + there_is_a_process = True + cluster_with_processes.append(cluster) + + #Number executors per cluster + tab_nb_executors_per_cluster = [] + for cluster in cluster_with_processes: + tab_nb_executors_per_cluster.append(len(cluster)) - nb_clusters = len(cluster_organisation) + #Number condtions per cluster + tab_nb_conditions_per_cluster = get_nb_conditions_in_clusters(cluster_with_processes) + #Number of processes per cluster + tab_nb_processes_per_cluster = [] + for cluster in cluster_with_processes: + nb_processes = 0 + for ele in cluster: + if(ele.get_type()=="Process"): + nb_processes+=1 + tab_nb_processes_per_cluster.append(nb_processes) + + + nb_clusters = len(cluster_with_processes) nb_non_relevant_clusters = 0 - for c in cluster_organisation: - #This means it's a non relvant cluster - if("non_relevant_cluster_" in c): + for cluster in cluster_with_processes: + cluster_with_relevant_process = False + for c in cluster: + if(c.get_type()=="Process"): + if(c.get_alias() in random_relevant_processes): + #This means it's relvant cluster + cluster_with_relevant_process = True + if(not cluster_with_relevant_process): nb_non_relevant_clusters+=1 uniformity_variance = 0 @@ -734,9 +796,9 @@ George Marchment, Bryan Brancotte, Marie Schmit, Frédéric Lemoine, Sarah Cohen uniformity_factor * (uniformity_variance / number_processes_called) + \ min_nb_clusters_factor * (nb_clusters / number_processes_called) + \ min_nb_non_relevant_cluster_factor * (nb_non_relevant_clusters / nb_clusters) - return score, cluster_organisation + return score, cluster_with_processes - score, cluster_organisation = get_score_from_set_relevant_processes(random_relevant_processes) + score, cluster_organisation = get_score_from_set_relevant_processes(w_save, random_relevant_processes) if(len(cluster_organisation)>=reduction_alpha*number_processes_called and len(cluster_organisation)<=reduction_beta*number_processes_called and score<min_score):