Skip to content
This repository was archived by the owner on Apr 30, 2025. It is now read-only.

Commit 070e4d3

Browse files
Updated quick start vignette to highlight net_emd options
1 parent 8e4042d commit 070e4d3

File tree

5 files changed

+79
-21
lines changed

5 files changed

+79
-21
lines changed

README.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,9 @@
11
# netdist
22
An R package implementing the NetEMD network comparison measure
33

4+
## Usage
5+
See "Quick start" vignette in documentation for example usage.
6+
47
## Installing package from source
58
When published to the CRAN package repository, the library and all documentation
69
will be installed in the standard manner using `install.packages("netdist")`,

inst/doc/Quick_start.R

Lines changed: 20 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -6,20 +6,34 @@ virus_edges <- read_all_graphs_as_orca_edge_lists(
66
format = "ncol", pattern = ".txt")
77
attr(virus_edges, "names")
88

9-
# Calculate graphlet orbit degree distributions up to 4 nodes for all graphs
10-
# This only needs to be done once per graph
11-
virus_godd <- purrr::map(virus_edges, godd)
9+
# Calculate graphlet orbit degree distributions for graphlets comprising up to
10+
# 4 nodes for all graphs This only needs to be done once per graph.
11+
# If type is set to "node5", graphlet orbit degree distributions will be
12+
# calculated for graphlets comprising up to 5 nodes
13+
virus_godd <- purrr::map(virus_edges, godd, type = "node4")
1214

1315
# Generate a cross-comparison matrix listing all combinations of graphs
1416
comp_spec <- graph_cross_comparison_spec(virus_edges)
1517
comp_spec[1:5,]
1618

17-
# Compute NetEMD between all virus PPI graphs based on all graphlet orbir
18-
# degree distributions up to 4 nodes
19+
# Compute NetEMD between all virus PPI graphs based on the computed graphlet
20+
# orbit degree distributions, using the fast "optimise" method (default). This
21+
# method uses the built-in R optimise method to efficiently find the offset
22+
# with the minimum EMD, but is not guaranteed to find the global minimum if
23+
# EMD as a function of offset is multimodal.
1924
net_emds <- purrr::simplify(
2025
purrr::map2(comp_spec$index_a, comp_spec$index_b, function(index_a, index_b) {
21-
net_emd(virus_godd[[index_a]], virus_godd[[index_b]])
26+
net_emd(virus_godd[[index_a]], virus_godd[[index_b]], method = "optimise")
2227
}))
28+
print(net_emds)
29+
30+
# You can also specify method = "fixed_step" to use the much slower method of
31+
# exhaustively evaluating the EMD at all offsets separated by a fixed step.
32+
# The default step size is 1/2 the the minimum spacing between locations in
33+
# either histogram after normalising to unit variance. However, you can
34+
# specifiy your own fixed step using the optional "step_size" parameter.
35+
# Note that this step size is applied to the histograms after they have been
36+
# normalised to unit variance
2337

2438
# Link NetEMDs with their respective comp_specs
2539
comp_spec$net_emd = net_emds

inst/doc/Quick_start.Rmd

Lines changed: 20 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -17,20 +17,34 @@ virus_edges <- read_all_graphs_as_orca_edge_lists(
1717
format = "ncol", pattern = ".txt")
1818
attr(virus_edges, "names")
1919
20-
# Calculate graphlet orbit degree distributions up to 4 nodes for all graphs
21-
# This only needs to be done once per graph
22-
virus_godd <- purrr::map(virus_edges, godd)
20+
# Calculate graphlet orbit degree distributions for graphlets comprising up to
21+
# 4 nodes for all graphs This only needs to be done once per graph.
22+
# If type is set to "node5", graphlet orbit degree distributions will be
23+
# calculated for graphlets comprising up to 5 nodes
24+
virus_godd <- purrr::map(virus_edges, godd, type = "node4")
2325
2426
# Generate a cross-comparison matrix listing all combinations of graphs
2527
comp_spec <- graph_cross_comparison_spec(virus_edges)
2628
comp_spec[1:5,]
2729
28-
# Compute NetEMD between all virus PPI graphs based on all graphlet orbir
29-
# degree distributions up to 4 nodes
30+
# Compute NetEMD between all virus PPI graphs based on the computed graphlet
31+
# orbit degree distributions, using the fast "optimise" method (default). This
32+
# method uses the built-in R optimise method to efficiently find the offset
33+
# with the minimum EMD, but is not guaranteed to find the global minimum if
34+
# EMD as a function of offset is multimodal.
3035
net_emds <- purrr::simplify(
3136
purrr::map2(comp_spec$index_a, comp_spec$index_b, function(index_a, index_b) {
32-
net_emd(virus_godd[[index_a]], virus_godd[[index_b]])
37+
net_emd(virus_godd[[index_a]], virus_godd[[index_b]], method = "optimise")
3338
}))
39+
print(net_emds)
40+
41+
# You can also specify method = "fixed_step" to use the much slower method of
42+
# exhaustively evaluating the EMD at all offsets separated by a fixed step.
43+
# The default step size is 1/2 the the minimum spacing between locations in
44+
# either histogram after normalising to unit variance. However, you can
45+
# specifiy your own fixed step using the optional "step_size" parameter.
46+
# Note that this step size is applied to the histograms after they have been
47+
# normalised to unit variance
3448
3549
# Link NetEMDs with their respective comp_specs
3650
comp_spec$net_emd = net_emds

inst/doc/Quick_start.html

Lines changed: 21 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -83,9 +83,11 @@ <h2>Virus PPI example (from sample data)</h2>
8383
<span class="dt">format =</span> <span class="st">&quot;ncol&quot;</span>, <span class="dt">pattern =</span> <span class="st">&quot;.txt&quot;</span>)
8484
<span class="kw">attr</span>(virus_edges, <span class="st">&quot;names&quot;</span>)</code></pre></div>
8585
<pre><code>## [1] &quot;EBV-1.txt&quot; &quot;ECL-1.txt&quot; &quot;HSV-1-1.txt&quot; &quot;KSHV-1.txt&quot; &quot;VZV-1.txt&quot;</code></pre>
86-
<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r"><span class="co"># Calculate graphlet orbit degree distributions up to 4 nodes for all graphs </span>
87-
<span class="co"># This only needs to be done once per graph</span>
88-
virus_godd &lt;-<span class="st"> </span>purrr::<span class="kw">map</span>(virus_edges, godd)
86+
<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r"><span class="co"># Calculate graphlet orbit degree distributions for graphlets comprising up to </span>
87+
<span class="co"># 4 nodes for all graphs This only needs to be done once per graph. </span>
88+
<span class="co"># If type is set to &quot;node5&quot;, graphlet orbit degree distributions will be </span>
89+
<span class="co"># calculated for graphlets comprising up to 5 nodes</span>
90+
virus_godd &lt;-<span class="st"> </span>purrr::<span class="kw">map</span>(virus_edges, godd, <span class="dt">type =</span> <span class="st">&quot;node4&quot;</span>)
8991

9092
<span class="co"># Generate a cross-comparison matrix listing all combinations of graphs</span>
9193
comp_spec &lt;-<span class="st"> </span><span class="kw">graph_cross_comparison_spec</span>(virus_edges)
@@ -96,12 +98,25 @@ <h2>Virus PPI example (from sample data)</h2>
9698
## 3 EBV-1.txt KSHV-1.txt 1 4
9799
## 4 EBV-1.txt VZV-1.txt 1 5
98100
## 5 ECL-1.txt HSV-1-1.txt 2 3</code></pre>
99-
<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r"><span class="co"># Compute NetEMD between all virus PPI graphs based on all graphlet orbir</span>
100-
<span class="co"># degree distributions up to 4 nodes</span>
101+
<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r"><span class="co"># Compute NetEMD between all virus PPI graphs based on the computed graphlet </span>
102+
<span class="co"># orbit degree distributions, using the fast &quot;optimise&quot; method (default). This</span>
103+
<span class="co"># method uses the built-in R optimise method to efficiently find the offset</span>
104+
<span class="co"># with the minimum EMD, but is not guaranteed to find the global minimum if</span>
105+
<span class="co"># EMD as a function of offset is multimodal.</span>
101106
net_emds &lt;-<span class="st"> </span>purrr::<span class="kw">simplify</span>(
102107
purrr::<span class="kw">map2</span>(comp_spec$index_a, comp_spec$index_b, function(index_a, index_b) {
103-
<span class="kw">net_emd</span>(virus_godd[[index_a]], virus_godd[[index_b]])
108+
<span class="kw">net_emd</span>(virus_godd[[index_a]], virus_godd[[index_b]], <span class="dt">method =</span> <span class="st">&quot;optimise&quot;</span>)
104109
}))
110+
<span class="kw">print</span>(net_emds)</code></pre></div>
111+
<pre><code>## [1] 0.4876042 0.1662899 0.1607306 0.1994613 0.3986293 0.4024197 0.4029355
112+
## [8] 0.1581551 0.2164011 0.2323951</code></pre>
113+
<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r"><span class="co"># You can also specify method = &quot;fixed_step&quot; to use the much slower method of </span>
114+
<span class="co"># exhaustively evaluating the EMD at all offsets separated by a fixed step. </span>
115+
<span class="co"># The default step size is 1/2 the the minimum spacing between locations in </span>
116+
<span class="co"># either histogram after normalising to unit variance. However, you can </span>
117+
<span class="co"># specifiy your own fixed step using the optional &quot;step_size&quot; parameter.</span>
118+
<span class="co"># Note that this step size is applied to the histograms after they have been </span>
119+
<span class="co"># normalised to unit variance</span>
105120

106121
<span class="co"># Link NetEMDs with their respective comp_specs</span>
107122
comp_spec$net_emd =<span class="st"> </span>net_emds

vignettes/Quick_start.Rmd

Lines changed: 15 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -27,12 +27,24 @@ virus_godd <- purrr::map(virus_edges, godd, type = "node4")
2727
comp_spec <- graph_cross_comparison_spec(virus_edges)
2828
comp_spec[1:5,]
2929
30-
# Compute NetEMD between all virus PPI graphs based on all graphlet orbir
31-
# degree distributions up to 4 nodes
30+
# Compute NetEMD between all virus PPI graphs based on the computed graphlet
31+
# orbit degree distributions, using the fast "optimise" method (default). This
32+
# method uses the built-in R optimise method to efficiently find the offset
33+
# with the minimum EMD, but is not guaranteed to find the global minimum if
34+
# EMD as a function of offset is multimodal.
3235
net_emds <- purrr::simplify(
3336
purrr::map2(comp_spec$index_a, comp_spec$index_b, function(index_a, index_b) {
34-
net_emd(virus_godd[[index_a]], virus_godd[[index_b]])
37+
net_emd(virus_godd[[index_a]], virus_godd[[index_b]], method = "optimise")
3538
}))
39+
print(net_emds)
40+
41+
# You can also specify method = "fixed_step" to use the much slower method of
42+
# exhaustively evaluating the EMD at all offsets separated by a fixed step.
43+
# The default step size is 1/2 the the minimum spacing between locations in
44+
# either histogram after normalising to unit variance. However, you can
45+
# specifiy your own fixed step using the optional "step_size" parameter.
46+
# Note that this step size is applied to the histograms after they have been
47+
# normalised to unit variance
3648
3749
# Link NetEMDs with their respective comp_specs
3850
comp_spec$net_emd = net_emds

0 commit comments

Comments
 (0)