Source code for visualization

#!/usr/bin/env python
# coding: utf-8

# In[12]:


# In[14]:


[docs]def get_graph_topic_nodes():
    """
    extract the color from class settings: will be defined in stylesheet
    the label is the top 4 words
    
    :return: topic nodes in cytoscape format
    """
    return [{'data': {'id': id_, 'label': vals[0]},
             'classes': f'topic_{vals[1]}'                                                                                        #,"opacity": 0.1
            ,'position':{'x' : vals[2][0] , 'y' : vals[2][1]}
            } for id_,vals in lda.get_topic_nodes_().items()                        
        ]


# In[15]:


# document nodes: id by document id, class defined in stylesheet,
# define the color and belonging
[docs]def get_graph_document_nodes():
    """
    :return: graph document nodes with the proper coloring in cytoscape format
    """
    return [                       
            {'data': {'id': id_,'size': 1000}, 
             'style': {'shape': 'circle'},
             'classes': vals[1]
             #vals[1]
            } for id_,vals in lda.get_doc_nodes_().items()                        
        ]


# In[16]:



[docs]def get_graph_cos_sim_edges():
    """
    :return: edges within document based on cosine similarity
    """
    return [
        {'data': {'source': f[0], 'target': f[1],'label': f'{f[0]} -> {f[1]}'}} for f in lda.get_edges_()
    ]


# In[17]:


# edges between the  topics and the related documents: they are invisible
[docs]def get_doc_topic_edges():
    """
    :return: Invisible edges within  document nodes and their dominant topic
    """
    return  [
            {'data': {'source': id_, 'target': vals[2],'label': f'{id_} -> {vals[2]}',"edgeLength":200, 'size': 5},
            'style': {'line-color': 'white', "opacity": 0}}  for id_,vals in lda.get_doc_nodes_().items() 
        ]


# In[18]:


# Update stylessheet:  define all the new clusters class settings
# after the update_lda step e.g.
[docs]def update_stylesheet():
    """
    Update stylessheet:  define all the new clusters class settings (colors etc.)

    :return: Updated graph stylesheet
    """
    colors = sorted(set([vals[1] for id_,vals in lda.get_doc_nodes_().items()]))
    #get the new colors for the topics and the nodes
    node_classes = [{
                    'selector': f'.{c}',
                    'style': {
                        'background-color': c
                    }
                } for c in colors]
    
    topic_classes = [{
                    'selector': f'.topic_{c}',
                    'style': { 'border-color': c,
               'border-width': 2,        
               'background-color': 'white',
                'shape': 'rectangle','content': 'data(label)','text-halign':'center',
                'text-valign':'center','text-wrap': 'wrap','width':'label','height':'label'}
                }for c in colors     ]

    return node_classes + topic_classes   # Class selectors


# In[19]:


[docs]def build_cluster_summary_view():
    """
    prepare the data for the cluster summary view

    :return: data in cluster summary view feedable format
    """
    clust = 'Clusters'
    labels = [clust]
    parents = ['']
    marker_colors = ["white"]
    text_info = ['']
    for id_,vals in lda.get_topic_nodes_().items():
        labels.append(id_)
        text_info.append(vals[0].replace('\n','<br>'))
        parents.append(clust)
        marker_colors.append(vals[1])
    clust_sum_view = dict()
    clust_sum_view['labels'] =  labels
    clust_sum_view['parents'] =  parents
    clust_sum_view['marker_colors'] =  marker_colors
    clust_sum_view['text_info'] =  text_info
    return  clust_sum_view


# In[20]:


[docs]def build_cluster_merge_list():
    """
    Prepare the checklist for the merge cluster functionality

    :return: dash checklist content with proper label
    """
    clusters = []
    for k in lda.get_topic_nodes().keys():
        clusters.append({'label': k, 'value': k.replace('Cluster ', '')})

    return clusters


# In[21]:




[docs]def plot_wordcloud(number_of_words = 20):
    '''
    Wordcloud plot

    :param number_of_words: number of words to be plotted (default: 20)
    :return: Wordcloud plot (image, not interactive, but the words are not overlapped)
    '''
    clust_id =lda.get_last_selected_cluster()
    words = lda.get_top_n_word_probs_for_topic_i(clust_id, number_of_words)['Words']
    probs = lda.get_top_n_word_probs_for_topic_i(clust_id, number_of_words)['Probabilities']

    data = {a : x for a, x in zip(words, probs)}
    wc = WordCloud(background_color='white', width=480, height=360)
    wc.fit_words(data)
    #lda.get_colormap_for_cluster(clust_id)
    wc.recolor(colormap=lda.get_colormap_for_cluster())
    return wc.to_image()