Visualize

This page shows examples on how to visualize the file system.

[2]:
%matplotlib inline

import matplotlib.pyplot as plt
plt.style.use('ggplot')
[3]:
import pandas
import numpy

import path2insight
from path2insight.datasets import load_pride

data = load_pride()

Example 1 (Simple aggregations)

[4]:
import pandas

data_depth = path2insight.depth_counts(data)
pandas.Series(data_depth).plot.bar(title="Depth count")
[4]:
<matplotlib.axes._subplots.AxesSubplot at 0x10b4905c0>
_images/visualize_5_1.png
[5]:
data_top_extensions = path2insight.extension_counts(data).most_common(10)

# convert to dataframe
df_top_extensions = pandas.DataFrame(data_top_extensions,
                                     columns=['extension', 'count'])
df_top_extensions.set_index('extension', inplace=True)
df_top_extensions.plot.bar(title='Most common extensions')
[5]:
<matplotlib.axes._subplots.AxesSubplot at 0x10ef05d68>
_images/visualize_6_1.png
[6]:
data_top_extensions = path2insight.extension_counts(
    data, normalize=True).most_common(10)

# convert to dataframe
df_top_extensions = pandas.DataFrame(data_top_extensions,
                                     columns=['extension', 'count'])
df_top_extensions.set_index('extension', inplace=True)
df_top_extensions.plot.bar(title='Most common extensions (relative)')
[6]:
<matplotlib.axes._subplots.AxesSubplot at 0x1146f0630>
_images/visualize_7_1.png

Example 2 (Compare projects)

[7]:
import seaborn as sns

# select the two projects
project_PXD001787 = path2insight.select(data, level5='PXD001787')
project_PXD002010 = path2insight.select(data, level5='PXD002010')

# plot the similarity in tokens
m2 = path2insight.distance_on_extension(project_PXD001787, project_PXD002010)
sns.heatmap(m2, vmin=0)
[7]:
<matplotlib.axes._subplots.AxesSubplot at 0x1a2a21e4a8>
_images/visualize_9_1.png
[8]:
import seaborn as sns

# select the two projects
project_PXD001787 = path2insight.select(data, level5='PXD001787')
project_PXD002010 = path2insight.select(data, level5='PXD002010')

# plot the similarity in tokens
m2 = path2insight.distance_on_depth(project_PXD001787, project_PXD002010)
sns.heatmap(m2, vmin=0)
[8]:
<matplotlib.axes._subplots.AxesSubplot at 0x1a2a6462b0>
_images/visualize_10_1.png
[9]:
import seaborn as sns

# select the two projects
project_PXD001787 = path2insight.select(data, level5='PXD001787')
project_PXD002010 = path2insight.select(data, level5='PXD002010')

# plot the similarity in tokens
m2 = path2insight.distance_on_token(project_PXD001787, project_PXD002010)
sns.heatmap(m2, vmin=0)
[9]:
<matplotlib.axes._subplots.AxesSubplot at 0x1a2bd5f748>
_images/visualize_11_1.png

Example 3 (Clustering)

[10]:
import seaborn as sns

m2 = path2insight.distance_on_token(data[0:1000], data[1000:2000])
sns.clustermap(m2, figsize=(9, 9))
[10]:
<seaborn.matrix.ClusterGrid at 0x1146eb3c8>
_images/visualize_13_1.png