########################################### ## Swiss Municipality Mergers since 2000 ## ########################################### #-------------------------------------------------------------------------------------- # # General Info: # # R Script to generate a dataset of Swiss municipality mergers since 2000. # Latest change: Version 6, 22.04.2021 # Author: Dennis Engist, ETH Zürich # # Required datasets: # - "Historisiertes Gemeindeverzeichnis" (.txt, available from Federal Statistical Office) # For testing purposes: # - "Amtliches Gemeindeverzeichnis" (.xslx, available from Federal Statistical Office) # - Census 2000 dataset # Script based on 2021 version of both datasets, but substitution with future datasets # possible. # # Run time: approximately 6 hours on Intel Xeon Gold 6310, 2.1 GHz # # Refer to accompanying Excel-file for detailed Version history and documentation. # #-------------------------------------------------------------------------------------- #install.packages("dplyr") #install.packages("stringr") #install.packages("beepr") library(dplyr) library(stringr) library(readxl) library(beepr) ## Import "Historisches Gemeindeverzeichnis" ##------------------------------------------ ## Change path as required, or load file "20210101_GDEHist_GDE.txt" directly municipality_changes <- read.delim("~/Municipality Dataset/Historisches Gemeindeverzeichnis/20210101_GDEHist_GDE.txt", header=FALSE) ## Prepare municipality data ##-------------------------- ## Caution! When using future "Historisiertes Gemeindeverzeichnis" (2022 forward), # make sure the basic setup of the "Historisiertes Gemeindeverzeichnis" is unchanged. # Otherwise, amend column names. # Rename main variables names(municipality_changes)[3] <- "canton" names(municipality_changes)[4] <- "bfs" names(municipality_changes)[5] <- "name" names(municipality_changes)[9] <- "birth_mutation" names(municipality_changes)[10] <- "birth_mutation_type" names(municipality_changes)[11] <- "start_exist" names(municipality_changes)[12] <- "mutation" names(municipality_changes)[13] <- "mutation_type" names(municipality_changes)[14] <- "end_exist" names(municipality_changes)[15] <- "latest_change" # Change date format municipality_changes$start_exist <- as.Date(municipality_changes$start_exist, format = "%d.%m.%Y") municipality_changes$end_exist <- as.Date(municipality_changes$end_exist, format = "%d.%m.%Y") municipality_changes$latest_change <- as.Date(municipality_changes$latest_change, format = "%d.%m.%Y") # Drop municipalities that ceased to exist before 2000 municipality_changes <- subset(municipality_changes, end_exist>"1999-12-31" | is.na(end_exist)) # Separate changed from unchanged municipalities change <- municipality_changes[municipality_changes$latest_change>"2000-01-01",] # Establish baseline (year 2000) municipalities_2000 <- municipality_changes[municipality_changes$start_exist<="2000-01-01",] municipalities_2000_change <- municipalities_2000[!is.na(municipalities_2000$mutation),] municipalities_2000_nochange <- municipalities_2000[is.na(municipalities_2000$mutation),] ## Connect changes ##---------------- ## Prepare dataset to accommodate changes: each municipality of 2000 is on an individual # row, while all changes to this municipality are added in additional columns chain <- municipalities_2000_change chain <- chain[,-c(1,2,6,7,8)] # After one change chain$canton_second <- "XX" chain$bfs_second <- 0 chain$name_second <- "Oberdupfigen" chain$birth_mutation_second <- 0 chain$birth_mutation_type_second <- 0 chain$start_exist_second <- as.Date("1800-01-01") chain$mutation_second <- 0 chain$mutation_type_second <- 0 chain$end_exist_second <- as.Date("1800-01-01") chain$latest_change_second <- as.Date("1800-01-01") # After two changes chain$canton_third <- "XX" chain$bfs_third <- 0 chain$name_third <- "Oberdupfigen" chain$birth_mutation_third <- 0 chain$birth_mutation_type_third <- 0 chain$start_exist_third <- as.Date("1800-01-01") chain$mutation_third <- 0 chain$mutation_type_third <- 0 chain$end_exist_third <- as.Date("1800-01-01") chain$latest_change_third <- as.Date("1800-01-01") # After three changes chain$canton_fourth <- "XX" chain$bfs_fourth <- 0 chain$name_fourth <- "Oberdupfigen" chain$birth_mutation_fourth <- 0 chain$birth_mutation_type_fourth <- 0 chain$start_exist_fourth <- as.Date("1800-01-01") chain$mutation_fourth <- 0 chain$mutation_type_fourth <- 0 chain$end_exist_fourth <- as.Date("1800-01-01") chain$latest_change_fourth <- as.Date("1800-01-01") # After four changes (maximum between 2000 and 2021 - in future, further columns # might be required) chain$canton_fifth <- "XX" chain$bfs_fifth <- 0 chain$name_fifth <- "Oberdupfigen" chain$birth_mutation_fifth <- 0 chain$birth_mutation_type_fifth <- 0 chain$start_exist_fifth <- as.Date("1800-01-01") chain$mutation_fifth <- 0 chain$mutation_type_fifth <- 0 chain$end_exist_fifth <- as.Date("1800-01-01") chain$latest_change_fifth <- as.Date("1800-01-01") ## Connect the mutation to birth_mutation, thereby assigning the new municipality # to the old one. Some municipalities were changed multiple times, therefore we # need multiple rounds of this loop. # Loop through dataset: first round for (i in 1:nrow(chain)){ for (j in 1:nrow(change)){ if ((chain[i,]$mutation==change[j,]$birth_mutation)) { chain[i,]$canton_second <- change[j,]$canton chain[i,]$bfs_second <- change[j,]$bfs chain[i,]$name_second <- change[j,]$name chain[i,]$birth_mutation_second <- change[j,]$birth_mutation chain[i,]$birth_mutation_type_second <- change[j,]$birth_mutation_type chain[i,]$start_exist_second <- change[j,]$start_exist chain[i,]$mutation_second <- change[j,]$mutation chain[i,]$mutation_type_second <- change[j,]$mutation_type chain[i,]$end_exist_second <- change[j,]$end_exist chain[i,]$latest_change_second <- change[j,]$latest_change } } } chain$mutation_second[is.na(chain$mutation_second)] <- 0 # Loop through dataset: second round for (i in 1:nrow(chain)){ for (j in 1:nrow(change)){ if ((chain[i,]$mutation_second==change[j,]$birth_mutation)) { chain[i,]$canton_third <- change[j,]$canton chain[i,]$bfs_third <- change[j,]$bfs chain[i,]$name_third <- change[j,]$name chain[i,]$birth_mutation_third <- change[j,]$birth_mutation chain[i,]$birth_mutation_type_third <- change[j,]$birth_mutation_type chain[i,]$start_exist_third <- change[j,]$start_exist chain[i,]$mutation_third <- change[j,]$mutation chain[i,]$mutation_type_third <- change[j,]$mutation_type chain[i,]$end_exist_third <- change[j,]$end_exist chain[i,]$latest_change_third <- change[j,]$latest_change } } } chain$mutation_third[is.na(chain$mutation_third)] <- 0 # Loop through dataset: third round for (i in 1:nrow(chain)){ for (j in 1:nrow(change)){ if ((chain[i,]$mutation_third==change[j,]$birth_mutation)) { chain[i,]$canton_fourth <- change[j,]$canton chain[i,]$bfs_fourth <- change[j,]$bfs chain[i,]$name_fourth <- change[j,]$name chain[i,]$birth_mutation_fourth <- change[j,]$birth_mutation chain[i,]$birth_mutation_type_fourth <- change[j,]$birth_mutation_type chain[i,]$start_exist_fourth <- change[j,]$start_exist chain[i,]$mutation_fourth <- change[j,]$mutation chain[i,]$mutation_type_fourth <- change[j,]$mutation_type chain[i,]$end_exist_fourth <- change[j,]$end_exist chain[i,]$latest_change_fourth <- change[j,]$latest_change } } } chain$mutation_fourth[is.na(chain$mutation_fourth)] <- 0 # Loop through dataset: fourth round for (i in 1:nrow(chain)){ for (j in 1:nrow(change)){ if ((chain[i,]$mutation_fourth==change[j,]$birth_mutation)) { chain[i,]$canton_fifth <- change[j,]$canton chain[i,]$bfs_fifth <- change[j,]$bfs chain[i,]$name_fifth <- change[j,]$name chain[i,]$birth_mutation_fifth <- change[j,]$birth_mutation chain[i,]$birth_mutation_type_fifth <- change[j,]$birth_mutation_type chain[i,]$start_exist_fifth <- change[j,]$start_exist chain[i,]$mutation_fifth <- change[j,]$mutation chain[i,]$mutation_type_fifth <- change[j,]$mutation_type chain[i,]$end_exist_fifth <- change[j,]$end_exist chain[i,]$latest_change_fifth <- change[j,]$latest_change } } } chain$mutation_fifth[is.na(chain$mutation_fifth)] <- 0 # "chain" now contains all the changes of the 2000 municipalities, and in which # municipality they ended up. However, one must consider that some municipalities # changed their bfs number multiple times, so it is insufficient to simply assign # the last bfs number as the new one. It is necessary to create additional entries # for municipalities (or bfs numbers) that were created after 2000, but ceased to # exist before 2021. ## Divide dataset into number of changes per municipality change_one <- chain[chain$mutation_second==0,] change_two <- chain[chain$mutation_second!=0 & chain$mutation_third==0,] change_three <- chain[chain$mutation_third!=0 & chain$mutation_fourth==0,] change_four <- chain[chain$mutation_fourth!=0 & chain$mutation_fifth==0,] ## Extract changes after 2000 and bring datasets into common format # One change change_one_1 <- change_one[,c(1:3,11:13),] names(change_one_1) <- c("canton", "bfs", "name", "canton_new", "bfs_new", "name_new") change_one <- change_one_1 # Two changes change_two_1 <- change_two[,c(1:3,21:23),] change_two_2 <- change_two[,c(11:13,21:23),] names(change_two_1) <- c("canton", "bfs", "name", "canton_new", "bfs_new", "name_new") names(change_two_2) <- c("canton", "bfs", "name", "canton_new", "bfs_new", "name_new") change_two <- rbind(change_two_1, change_two_2) # Three changes change_three_1 <- change_three[,c(1:3,31:33),] change_three_2 <- change_three[,c(11:13,31:33),] change_three_3 <- change_three[,c(21:23,31:33),] names(change_three_1) <- c("canton", "bfs", "name", "canton_new", "bfs_new", "name_new") names(change_three_2) <- c("canton", "bfs", "name", "canton_new", "bfs_new", "name_new") names(change_three_3) <- c("canton", "bfs", "name", "canton_new", "bfs_new", "name_new") change_three <- rbind(change_three_1, change_three_2, change_three_3) # Four changes change_four_1 <- change_four[,c(1:3,41:43),] change_four_2 <- change_four[,c(11:13,41:43),] change_four_3 <- change_four[,c(21:23,41:43),] change_four_4 <- change_four[,c(31:33,41:43),] names(change_four_1) <- c("canton", "bfs", "name", "canton_new", "bfs_new", "name_new") names(change_four_2) <- c("canton", "bfs", "name", "canton_new", "bfs_new", "name_new") names(change_four_3) <- c("canton", "bfs", "name", "canton_new", "bfs_new", "name_new") names(change_four_4) <- c("canton", "bfs", "name", "canton_new", "bfs_new", "name_new") change_four <- rbind(change_four_1, change_four_2, change_four_3, change_four_4) # No changes nochange <- municipalities_2000_nochange nochange <- nochange[,c(3:8)] names(nochange) <- c("canton", "bfs", "name", "canton_new", "bfs_new", "name_new") nochange$canton_new <- nochange$canton nochange$bfs_new <- nochange$bfs nochange$name_new <- nochange$name ## Bind all together municipalities <- rbind(nochange, change_one, change_two, change_three, change_four) ## Delete duplicates - this automatically takes care of entries, in which the bfs # number did not change municipalities <- municipalities[!duplicated(municipalities),] municipalities <- municipalities[!is.na(municipalities$bfs),] ## Finally, municipalities that were merged but received a completely new bfs and a # new name need also an individual entry. for (i in 1:nrow(municipalities)){ if ((municipalities[i,]$bfs_new %in% municipalities$bfs)==TRUE){ } else { municipalities[nrow(municipalities)+1,]$bfs <- municipalities[i,]$bfs_new municipalities[nrow(municipalities),]$canton <- municipalities[i,]$canton_new municipalities[nrow(municipalities),]$name <- municipalities[i,]$name_new municipalities[nrow(municipalities),]$bfs_new <- municipalities[i,]$bfs_new municipalities[nrow(municipalities),]$canton_new <- municipalities[i,]$canton_new municipalities[nrow(municipalities),]$name_new <- municipalities[i,]$name_new } } ## Minor fixes ##------------ # Guttet-Feschel missing due to odd entry in Historisiertes Gemeindeverzeichnis # Add manually fix <- data.frame("VS", 6117, "Guttet-Feschel", "VS", 6117, "Guttet-Feschel") names(fix) <- c("canton", "bfs", "name", "canton_new", "bfs_new", "name_new") municipalities <- rbind(municipalities,fix) # Beromünster, Neudorf, Gunzwil, Schwarzenbach, Lavertezzo, Cugnasco Gerra (and its predecessors) and # Buch am Irchel: area change, not properly represented in dataset municipalities[(municipalities$bfs==24 & municipalities$name=="Buch am Irchel"),]$bfs_new <- 24 municipalities[(municipalities$bfs==24 & municipalities$name=="Buch am Irchel"),]$canton_new <- "ZH" municipalities[(municipalities$bfs==24 & municipalities$name=="Buch am Irchel"),]$name_new <- "Buch am Irchel" municipalities[(municipalities$bfs==1081 & municipalities$name=="Beromünster"),]$bfs_new <- 1081 municipalities[(municipalities$bfs==1081 & municipalities$name=="Beromünster"),]$canton_new <- "LU" municipalities[(municipalities$bfs==1081 & municipalities$name=="Beromünster"),]$name_new <- "Beromünster" municipalities[(municipalities$bfs==1092 & municipalities$name=="Neudorf"),]$bfs_new <- 1081 municipalities[(municipalities$bfs==1092 & municipalities$name=="Neudorf"),]$canton_new <- "LU" municipalities[(municipalities$bfs==1092 & municipalities$name=="Neudorf"),]$name_new <- "Beromünster" municipalities[(municipalities$bfs==1087 & municipalities$name=="Gunzwil"),]$bfs_new <- 1081 municipalities[(municipalities$bfs==1087 & municipalities$name=="Gunzwil"),]$canton_new <- "LU" municipalities[(municipalities$bfs==1087 & municipalities$name=="Gunzwil"),]$name_new <- "Beromünster" municipalities[(municipalities$bfs==1101 & municipalities$name=="Schwarzenbach"),]$bfs_new <- 1081 municipalities[(municipalities$bfs==1101 & municipalities$name=="Schwarzenbach"),]$canton_new <- "LU" municipalities[(municipalities$bfs==1101 & municipalities$name=="Schwarzenbach"),]$name_new <- "Beromünster" municipalities[(municipalities$bfs==5112 & municipalities$name=="Lavertezzo"),]$bfs_new <- 5112 municipalities[(municipalities$bfs==5112 & municipalities$name=="Lavertezzo"),]$canton_new <- "TI" municipalities[(municipalities$bfs==5112 & municipalities$name=="Lavertezzo"),]$name_new <- "Lavertezzo" municipalities[(municipalities$bfs==5104 & municipalities$name=="Cugnasco"),]$bfs_new <- 5138 municipalities[(municipalities$bfs==5104 & municipalities$name=="Cugnasco"),]$canton_new <- "TI" municipalities[(municipalities$bfs==5104 & municipalities$name=="Cugnasco"),]$name_new <- "Cugnasco-Gerra" municipalities[(municipalities$bfs==5107 & municipalities$name=="Gerra (Verzasca)"),]$bfs_new <- 5138 municipalities[(municipalities$bfs==5107 & municipalities$name=="Gerra (Verzasca)"),]$canton_new <- "TI" municipalities[(municipalities$bfs==5107 & municipalities$name=="Gerra (Verzasca)"),]$name_new <- "Cugnasco-Gerra" municipalities[(municipalities$bfs==5138 & municipalities$name=="Cugnasco-Gerra"),]$bfs_new <- 5138 municipalities[(municipalities$bfs==5138 & municipalities$name=="Cugnasco-Gerra"),]$canton_new <- "TI" municipalities[(municipalities$bfs==5138 & municipalities$name=="Cugnasco-Gerra"),]$name_new <- "Cugnasco-Gerra" # Monte and Casima: to Castel San Pietro, not Breggia - two fusions were assigned the # same number municipalities[(municipalities$bfs==5256 & municipalities$name_new=="Breggia"),]$bfs_new <- 5249 municipalities[(municipalities$bfs==5256 & municipalities$name_new=="Breggia"),]$canton_new <- "TI" municipalities[(municipalities$bfs==5256 & municipalities$name_new=="Breggia"),]$name_new <- "Castel San Pietro" municipalities[(municipalities$bfs==5248 & municipalities$name_new=="Breggia"),]$bfs_new <- 5249 municipalities[(municipalities$bfs==5248 & municipalities$name_new=="Breggia"),]$canton_new <- "TI" municipalities[(municipalities$bfs==5248 & municipalities$name_new=="Breggia"),]$name_new <- "Castel San Pietro" # Castel San Pietro remained Castel San Pietro municipalities[(municipalities$bfs==5249 & municipalities$name=="Castel San Pietro"),]$bfs_new <- 5249 municipalities[(municipalities$bfs==5249 & municipalities$name=="Castel San Pietro"),]$canton_new <- "TI" municipalities[(municipalities$bfs==5249 & municipalities$name=="Castel San Pietro"),]$name_new <- "Castel San Pietro" ## Test the dataset ##----------------- ## Test with "Amtliches Gemeindeverzeichnis" 2021 # Import and rename (depending on your storage location and file name, adjustments must be made) municipalities_2021 <- read_excel("~/be-b-00.04-agv-01.xlsx", sheet = "GDE") names(municipalities_2021)[3] <- "bfs_new" # Join to municipality dataset, check for missing entries test <- left_join(municipalities, municipalities_2021, by="bfs_new") test <- test[is.na(test$GDENAME),] test2 <- left_join(municipalities_2021, municipalities, by="bfs_new") test2 <- test2[is.na(test2$name_new),] ## Test with census data of 2000: check number of municipalities in 2000 # Import and clean data (depending on your storage location and file name, adjustments must be made) language_municipality <- read.csv("~/language_municipality.csv", sep=";") names(language_municipality)[2] <- "bfs" language_municipality <- language_municipality[!grepl("NAME", language_municipality$bfs),] language_municipality <- language_municipality[!grepl("Schweiz", language_municipality$bfs),] language_municipality <- language_municipality[!grepl(">>", language_municipality$bfs),] language_municipality <- language_municipality[!grepl("- ", language_municipality$bfs),] language_municipality$bfs <- substring(language_municipality$bfs, 7, 10) language_municipality$bfs <- str_remove(language_municipality$bfs, "^0+") language_municipality$population <- language_municipality$Hauptsprachen...Total.Schweizer+language_municipality$Hauptsprachen...Total.Ausländer language_municipality$german_population <- language_municipality$Deutsch.Schweizer+language_municipality$Deutsch.Ausländer language_municipality$french_population <- language_municipality$Französisch.Schweizer+language_municipality$Französisch.Ausländer language_municipality <- language_municipality[,-c(1,3:8)] language_municipality$share_german <- language_municipality$german_population/language_municipality$population language_municipality$share_french <- language_municipality$french_population/language_municipality$population language_municipality$bfs <- as.numeric(language_municipality$bfs) # Join municipality dataset to language dataset and check for missing observations test3 <- left_join(language_municipality, municipalities, by="bfs") test3 <- test3[is.na(test2$bfs_new),] ## Notes ##------ # This dataset is primarily constructed to match BFS numbers. It can be used to match # municipality names as well, however some entries will be incomplete (when the bfs # number has remained constant but the name changed). # # This script can be amended to create a dataset to match names: when deleting duplicates # make sure to set the variable "name" as a criteria, instead of "bfs". ## Export dataset ##--------------- setwd("~/Municipality Dataset") write.csv(municipalities, "municipality_mergers_revised.csv")