---
title: "How To Use This Package"
author: "Mauricio Vargas"
date: "`r Sys.Date()`"
output: rmarkdown::html_vignette
bibliography: ../inst/REFERENCES.bib
vignette: >
  %\VignetteIndexEntry{How To Use This Package}
  %\VignetteEngine{knitr::rmarkdown}
  %\VignetteEncoding{UTF-8}
---

```{r setup, cache = FALSE, echo = FALSE, message = FALSE, warning = FALSE}
knitr::opts_chunk$set(eval = TRUE, message = FALSE, warning = FALSE)
```

This example is inspired after the visualizations from @atlas2014 with some
ggplot additions. The original vignette was largely improved from what I learned
in Network Analysis taught at ICPSR 2023 by Dr. Sarah Shugars.

# World Trade and Per-Capita GDP

```{r}
library(economiccomplexity)

# partial view of trade matrix
head(world_trade_avg_1998_to_2000)

# partial view of gdp vector
head(world_gdp_avg_1998_to_2000)
```

# Balassa Index

You can obtain Balassa Index with `balassa_index()`.

```{r}
bi <- balassa_index(world_trade_avg_1998_to_2000)

# partial view of index
bi[1:5, 1:5]
```

Another possibility is to obtain Balassa Index without discretization.

```{r}
bi_dec <- balassa_index(world_trade_avg_1998_to_2000, discrete = F)

# partial view of index
bi_dec[1:5, 1:5]
```

# Complexity Measures

You can compute complexity indexes (e.g. such as the Economic Complexity Index and Product Complexity Index) by using `complexity_measures()`. The calculations methods are *fitness* (default), *reflections*, *eigenvalues*. See [@measuringcomplexity2015] for the methodological details.

The eigenvalues also calls the reflections methods in order to correct the index sign
in some special cases when the correlation between the output from both methods is negative.

##  Complexity-Fitness

```{r}
com_fit <- complexity_measures(bi)

# partial view of indexes
com_fit$complexity_index_country[1:5]
com_fit$complexity_index_product[1:5]
```

## Reflections

```{r}
com_ref <- complexity_measures(bi, method = "reflections")

# partial view of indexes
com_ref$complexity_index_country[1:5]
com_ref$complexity_index_product[1:5]
```

## Eigenvalues

```{r}
com_eig <- complexity_measures(bi, method = "eigenvalues")

# partial view of indexes
com_eig$complexity_index_country[1:5]
com_eig$complexity_index_product[1:5]
```

# Proximity

Proximity matrices are used to create projections e.g. (country-country and product-product
networks) for bipartite networks. Using `proximity()` is straightforward.

```{r}
pro <- proximity(bi)

# partial view of proximity matrices
pro$proximity_country[1:5, 1:5]
pro$proximity_product[1:5, 1:5]
```

# Projections

The `projections()` function is designed to use `igraph` for the internal
computations and also to pass proximity-based networks to `igraph`, `ggraph`
or export to Cytoscape by saving the output as csv/tsv.

```{r}
library(igraph)

net <- projections(pro$proximity_country, pro$proximity_product)

# partial view of projections
E(net$network_country)[1:5]
E(net$network_product)[1:5]
```

We can also use `igraph` to see how many edges are in the networks nd also the networks' density, diameter and transitivity.

```{r}
ecount(net$network_country)
ecount(net$network_product)

edge_density(net$network_country, loops = F)
edge_density(net$network_product, loops = F)

diameter(net$network_country, directed = F, unconnected = F)
diameter(net$network_product, directed = F, unconnected = F)

transitivity(net$network_country, type = "global")
transitivity(net$network_product, type = "global")
```

# Centrality measures

We calculate the degree centrality of every node in the network and plot a histogram of these values. The drawback is that the network was trimmed until obtaining an average of 4 links per edge (or arcs per node), therefore the computation and histograms reflect a biased distribution.

```{r}
deg_country <- degree(net$network_country)
deg_product <- degree(net$network_product)

# country with the highest degree centrality
deg_country[which.max(deg_country)]

# product with the highest degree centrality
deg_product[which.max(deg_product)]
```

In the same way, we can compute the betweenness, cloness and eigenvector centrality of the networks.

```{r}
bet_country <- betweenness(net$network_country)
bet_product <- betweenness(net$network_product)

clo_country <- closeness(net$network_country)
clo_product <- closeness(net$network_product)

eig_country <- eigen_centrality(net$network_country)$vector
eig_product <- eigen_centrality(net$network_product)$vector

# country with the highest betweenness centrality
bet_country[which.max(bet_country)]

# product with the highest betweenness centrality
bet_product[which.max(bet_product)]

# country with the highest closeness centrality
clo_country[which.max(clo_country)]

# product with the highest closeness centrality
clo_product[which.max(clo_product)]

# country with the highest eigenvector centrality
eig_country[which.max(eig_country)]

# product with the highest eigenvector centrality
eig_product[which.max(eig_product)]
```

Following the analysis, we can verify that the largest connected component is the same as the original networks in this case.

```{r}
# sub-networks of the largest connected component

lcc_countries <- induced_subgraph(
  net$network_country,
  which(components(net$network_country)$membership ==
    which.max(components(net$network_country)$csize))
)

lcc_products <- induced_subgraph(
  net$network_product,
  which(components(net$network_product)$membership ==
    which.max(components(net$network_product)$csize))
)

# is this the same as the original networks?

ecount(lcc_countries) == ecount(net$network_country)
vcount(lcc_countries) == vcount(net$network_product)

deg2_countries <- degree(lcc_countries)
deg2_products <- degree(lcc_products)

bet2_countries <- betweenness(lcc_countries)
bet2_products <- betweenness(lcc_products)

clo2_countries <- closeness(lcc_countries)
clo2_products <- closeness(lcc_products)

eig2_countries <- eigen_centrality(lcc_countries)$vector
eig2_products <- eigen_centrality(lcc_products)$vector

all.equal(deg_country[which.max(deg_country)], deg2_countries[which.max(deg2_countries)])
all.equal(deg_product[which.max(deg_product)], deg2_products[which.max(deg2_products)])

all.equal(bet_country[which.max(bet_country)], bet2_countries[which.max(bet2_countries)])
all.equal(bet_product[which.max(bet_product)], bet2_products[which.max(bet2_products)])

all.equal(clo_country[which.max(clo_country)], clo2_countries[which.max(clo2_countries)])
all.equal(clo_product[which.max(clo_product)], clo2_products[which.max(clo2_products)])

all.equal(eig_country[which.max(eig_country)], eig2_countries[which.max(eig2_countries)])
all.equal(eig_product[which.max(eig_product)], eig2_products[which.max(eig2_products)])
```

# K-core and backbone

We can identify the k-core of the networks for an arbitray value "k".

```{r}
k <- 4

# identify the core of the network
core_country <- coreness(net$network_country, mode = "all")
core_product <- coreness(net$network_product, mode = "all")

# identify the nodes in the core
kcore_country <- induced_subgraph(
  net$network_country,
  which(core_country >= k)
)

kcore_product <- induced_subgraph(
  net$network_product,
  which(core_product >= k)
)

V(kcore_country)$name

V(kcore_product)$name
```

We can also identify the backbone of the networks.

```{r}
# identify the backbone of the network
bbn_country <- delete_vertices(net$network_country, which(core_country < k))
bbn_product <- delete_vertices(net$network_product, which(core_product < k))

bbn_country
bbn_product
```

# Community detection

We can identify the communities of the networks with a fast greedy algorithm.

```{r}
com_country <- cluster_fast_greedy(net$network_country)
com_product <- cluster_fast_greedy(net$network_product)

all.equal(vcount(net$network_country), length(com_country$membership))
all.equal(vcount(net$network_product), length(com_product$membership))

# number of communities
length(unique(com_country$membership))
length(unique(com_product$membership))
```

# Complexity Outlook

Both the Complexity Outlook Index and Complexity Outlook Gain are obtained
after the `complexity_outlook()` function.

```{r}
co <- complexity_outlook(
  economiccomplexity_output$balassa_index,
  economiccomplexity_output$proximity$proximity_product,
  economiccomplexity_output$complexity_measures$complexity_index_product
)

# partial view of complexity outlook
co$complexity_outlook_index[1:5]
co$complexity_outlook_gain[1:5, 1:5]
```

# Productivy Levels

The `productivity_levels()` dataset follows the definitions from @atlas2014
and @exportmatters2005.

I don't have a per-capita GDP dataset for the Galactic Federation, so
I'll create simulated data for the example.

```{r}
pl <- productivity_levels(world_trade_avg_1998_to_2000, world_gdp_avg_1998_to_2000)

# partial view of productivity levels
pl$productivity_level_country[1:5]
pl$productivity_level_product[1:5]
```

# Integration with `ggplot2`

We can plot the distributions for the centrality measures.

```{r}
library(ggplot2)

deg_country <- data.frame(
  country = names(deg_country),
  deg = deg_country
)

deg_product <- data.frame(
  product = names(deg_product),
  deg = deg_product
)

ggplot(deg_country) +
  geom_histogram(aes(x = deg), bins = 20, fill = "#002948") +
  theme_minimal(base_size = 13) +
  labs(title = "Degree Centrality Distribution for Countries")

ggplot(deg_product) +
  geom_histogram(aes(x = deg), bins = 20, fill = "#002948") +
  theme_minimal(base_size = 13) +
  labs(title = "Degree Centrality Distribution for Products")
```

```{r}
bet_country <- data.frame(
  country = names(bet_country),
  bet = bet_country
)

bet_product <- data.frame(
  product = names(bet_product),
  bet = bet_product
)

clo_country <- data.frame(
  country = names(clo_country),
  clo = clo_country
)

clo_product <- data.frame(
  product = names(clo_product),
  clo = clo_product
)

eig_country <- data.frame(
  country = names(eig_country),
  eig = eig_country
)

eig_product <- data.frame(
  product = names(eig_product),
  eig = eig_product
)

ggplot(bet_country) +
  geom_histogram(aes(x = bet), bins = 20, fill = "#002948") +
  theme_minimal(base_size = 13) +
  labs(title = "Betweenness Centrality Distribution for Countries")

ggplot(bet_product) +
  geom_histogram(aes(x = bet), bins = 20, fill = "#002948") +
  theme_minimal(base_size = 13) +
  labs(title = "Betweenness Centrality Distribution for Products")

ggplot(clo_country) +
  geom_histogram(aes(x = clo), bins = 20, fill = "#002948") +
  theme_minimal(base_size = 13) +
  labs(title = "Closeness Centrality Distribution for Countries")

ggplot(clo_product) +
  geom_histogram(aes(x = clo), bins = 20, fill = "#002948") +
  theme_minimal(base_size = 13) +
  labs(title = "Closeness Centrality Distribution for Products")

ggplot(eig_country) +
  geom_histogram(aes(x = eig), bins = 20, fill = "#002948") +
  theme_minimal(base_size = 13) +
  labs(title = "Eigenvector Centrality Distribution for Countries")

ggplot(eig_product) +
  geom_histogram(aes(x = eig), bins = 20, fill = "#002948") +
  theme_minimal(base_size = 13) +
  labs(title = "Eigenvector Centrality Distribution for Products")
```

# Integration with `ggraph`

We start by plotting the network of countries. Each node will be sized by its
total exports.

```{r, fig.width=7, fig.height=7}
set.seed(200100)

library(ggraph)

aggregated_countries <- aggregate(
  world_trade_avg_1998_to_2000$value,
  by = list(country = world_trade_avg_1998_to_2000$country),
  FUN = sum
)

aggregated_countries <- setNames(aggregated_countries$x, aggregated_countries$country)

V(net$network_country)$size <- aggregated_countries[match(V(net$network_country)$name, names(aggregated_countries))]

ggraph(net$network_country, layout = "kk") +
  # geom_edge_link(aes(edge_width = weight), edge_colour = "#a8a8a8") +
  geom_edge_link(edge_colour = "#a8a8a8") +
  geom_node_point(aes(size = size), color = "#002948") +
  geom_node_text(aes(label = name), size = 2, vjust = 2.2) +
  ggtitle("Proximity Based Network Projection for Countries") +
  theme_void()
```

Now we can highlight the countries with the highest centralities from the previous part.

```{r, fig.width=7, fig.height=7}
# Paint svn, ken and cze in yellow and the rest of the world in blue

V(net$network_country)$color <- rep(
  "Rest of the World",
  length(V(net$network_country)$size)
)
V(net$network_country)$color[match(
  c("svn", "ken", "cze"),
  V(net$network_country)$name
)] <- "Slovakia, Kenia and Czech Republic"

ggraph(net$network_country, layout = "kk") +
  geom_edge_link(edge_colour = "#a8a8a8") +
  geom_node_point(aes(size = size, color = color)) +
  geom_node_text(aes(label = name), size = 2, vjust = 2.2) +
  ggtitle("Proximity Based Network Projection for Countries") +
  theme_void() +
  scale_colour_manual(values = c(
    "Slovakia, Kenia and Czech Republic" = "#fac704",
    "Rest of the World" = "#002948"
  ))
```

We can also plot the network of products. Each node will be sized by its
total exports. Because the product names are large, we display the product
codes instead. You can read about the codes [here](https://www.foreign-trade.com/reference/hscode.htm)
and if you need the codes in R, you can use the `tradestatistics` package, which
can be installed from [CRAN](https://cran.r-project.org/package=tradestatistics).

```{r, fig.width=10, fig.height=10}
set.seed(200100)

aggregated_products <- aggregate(
  world_trade_avg_1998_to_2000$value,
  by = list(country = world_trade_avg_1998_to_2000$product),
  FUN = sum
)

aggregated_products <- setNames(aggregated_products$x, aggregated_products$country)

V(net$network_product)$size <- aggregated_products[
  match(V(net$network_product)$name, names(aggregated_products))
]

ggraph(net$network_product, layout = "kk") +
  geom_edge_link(edge_colour = "#a8a8a8") +
  geom_node_point(aes(size = size), color = "#002948") +
  geom_node_text(aes(label = name), size = 2, vjust = 2.2) +
  ggtitle("Proximity Based Network Projection for Products") +
  theme_void()
```

Now we can highlight the products with the highest centralities from the previous part.

```{r, fig.width=10, fig.height=10}
# Paint 8421, 7412 and 8434 in yellow and the rest of the products in blue

V(net$network_product)$color <- rep(
  "Rest of the Products",
  length(V(net$network_product)$size)
)
V(net$network_product)$color[match(
  c("8421", "7412", "8434"),
  V(net$network_product)$name
)] <- "8421, 7412 and 8434"

ggraph(net$network_product, layout = "kk") +
  geom_edge_link(edge_colour = "#a8a8a8") +
  geom_node_point(aes(size = size, color = color)) +
  geom_node_text(aes(label = name), size = 2, vjust = 2.2) +
  ggtitle("Proximity Based Network Projection for Products") +
  theme_void() +
  scale_colour_manual(values = c(
    "8421, 7412 and 8434" = "#fac704",
    "Rest of the Products" = "#002948"
  ))
```

The communities detected in the previous part can be used to improve the plots.

```{r, fig.width=10, fig.height=10}
# Paint by community
# for each vertex, replace X with the community number

set.seed(200100)

V(net$network_country)$color2 <- rep(NA, length(V(net$network_country)$size))

for (i in 1:length(V(net$network_country)$color2)) {
  com_i <- as.character(com_country$membership[i])

  # if len(com$membership[i]) = 1, append a 0
  if (nchar(com_i) == 1) {
    com_i <- paste0("0", com_i)
  }

  V(net$network_country)$color2[i] <- paste0("Community ", com_i)
}

my_colors <- c(
  "#74c0e2", "#406662", "#549e95", "#8abdb6", "#bcd8af",
  "#a8c380", "#ede788", "#d6c650", "#dc8e7a", "#d05555",
  "#bf3251", "#872a41"
)

ggraph(net$network_country, layout = "kk") +
  geom_edge_link(edge_colour = "#a8a8a8") +
  geom_node_point(aes(size = size, color = color2)) +
  geom_node_text(aes(label = name), size = 2, vjust = 2.2) +
  ggtitle("Proximity Based Network Projection for Countries") +
  theme_void() +
  scale_colour_manual(values = my_colors)
```

For products, the challenge is to obtain 41 distinguisable colors.

```{r, fig.width=10, fig.height=10}
# Paint by community
# for each vertex, replace X with the community number

set.seed(200100)

V(net$network_product)$color2 <- rep(NA, length(V(net$network_product)$size))

for (i in 1:length(V(net$network_product)$color2)) {
  com_i <- as.character(com_product$membership[i])

  # if len(com$membership[i]) = 1, append a 0
  if (nchar(com_i) == 1) {
    com_i <- paste0("0", com_i)
  }

  V(net$network_product)$color2[i] <- paste0("Community ", com_i)
}

my_colors_2 <- c(
  "#74c0e2", "#406662", "#549e95", "#8abdb6", "#bcd8af",
  "#a8c380", "#ede788", "#d6c650", "#dc8e7a", "#d05555",
  "#bf3251", "#872a41", "#74c0e2", "#406662", "#549e95",
  "#8abdb6", "#bcd8af", "#a8c380", "#ede788", "#d6c650",
  "#dc8e7a", "#d05555", "#bf3251", "#872a41", "#74c0e2",
  "#406662", "#549e95", "#8abdb6", "#bcd8af", "#a8c380",
  "#ede788", "#d6c650", "#dc8e7a", "#d05555", "#bf3251",
  "#872a41", "#74c0e2", "#406662", "#549e95", "#8abdb6",
  "#bcd8af"
)

ggraph(net$network_product, layout = "kk") +
  geom_edge_link(edge_colour = "#a8a8a8") +
  geom_node_point(aes(size = size, color = color2)) +
  geom_node_text(aes(label = name), size = 2, vjust = 2.2) +
  ggtitle("Proximity Based Network Projection for Products") +
  theme_void() +
  scale_colour_manual(values = my_colors_2)
```

We can try to obtain clusters in a different way, by pre-specifying the number
of communities. Because this has some arbitrary element, we can specify a
number equal to the sections in the Harmonised System, which is 21 plus the
unspecified products.

```{r, fig.width=10, fig.height=10}
set.seed(200100)

my_colors_3 <- c(
  "#74c0e2", "#406662", "#549e95", "#8abdb6", "#bcd8af",
  "#a8c380", "#ede788", "#d6c650", "#dc8e7a", "#d05555",
  "#bf3251", "#872a41", "#993f7b", "#7454a6", "#a17cb0",
  "#d1a1bc", "#a1aafb", "#5c57d9", "#1c26b3", "#4d6fd0",
  "#7485aa", "#d3d3d3"
)

com_product <- cluster_fluid_communities(net$network_product, no.of.communities = 22)

V(net$network_product)$color2 <- rep(NA, length(V(net$network_product)$size))

for (i in 1:length(V(net$network_product)$color2)) {
  com_i <- as.character(com_product$membership[i])

  # if len(com$membership[i]) = 1, append a 0
  if (nchar(com_i) == 1) {
    com_i <- paste0("0", com_i)
  }

  V(net$network_product)$color2[i] <- paste0("Community ", com_i)
}

ggraph(net$network_product, layout = "kk") +
  geom_edge_link(edge_colour = "#a8a8a8") +
  geom_node_point(aes(size = size, color = color2)) +
  geom_node_text(aes(label = name), size = 2, vjust = 2.2) +
  ggtitle("Proximity Based Network Projection for Products") +
  theme_void() +
  scale_colour_manual(values = my_colors_3)
```

# References