diff --git a/docs/articles/joss/paper.bib b/docs/articles/joss/paper.bib index 29ce3c8..4ce50eb 100644 --- a/docs/articles/joss/paper.bib +++ b/docs/articles/joss/paper.bib @@ -10,7 +10,7 @@ @article{visser2006map } @article{monserudComparingGlobalVegetation1992, - title = {Comparing Global Vegetation Maps with the {{Kappa}} Statistic}, + title = {Comparing Global Vegetation Maps with the {Kappa} Statistic}, author = {Monserud, Robert A. and Leemans, Rik}, year = {1992}, month = aug, @@ -25,7 +25,7 @@ @article{monserudComparingGlobalVegetation1992 } @article{hammerbergImplicationsEmployingDetailed2018, - title = {Implications of Employing Detailed Urban Canopy Parameters for Mesoscale Climate Modelling: A Comparison between {{WUDAPT}} and {{GIS}} Databases over {{Vienna}}, {{Austria}}}, + title = {Implications of Employing Detailed Urban Canopy Parameters for Mesoscale Climate Modelling: A Comparison between {WUDAPT} and {GIS} Databases over {Vienna}, {Austria}}, shorttitle = {Implications of Employing Detailed Urban Canopy Parameters for Mesoscale Climate Modelling}, author = {Hammerberg, Kris and Brousse, Oscar and Martilli, Alberto and Mahdavi, Ardeshir}, year = {2018}, @@ -142,8 +142,8 @@ @article{grigg1965logic } @article{chingWUDAPTUrbanWeather2018a, - title = {{{WUDAPT}}: {{An Urban Weather}}, {{Climate}}, and {{Environmental Modeling Infrastructure}} for the {{Anthropocene}}}, - shorttitle = {{{WUDAPT}}}, + title = {{WUDAPT}: {An Urban Weather}, {Climate}, and {Environmental Modeling Infrastructure} for the {Anthropocene}}, + shorttitle = {{WUDAPT}}, author = {Ching, J. and Mills, G. and Bechtel, B. and See, L. and Feddema, J. and Wang, X. and Ren, C. and Brousse, O. and Martilli, A. and Neophytou, M. and Mouzourides, P. and Stewart, I. and Hanna, A. and Ng, E. and Foley, M. and Alexander, P. and Aliaga, D. and Niyogi, D. and Shreevastava, A. and Bhalachandran, P. and Masson, V. and Hidalgo, J. and Fung, J. and Andrade, M. and Baklanov, A. and Dai, W. and Milcinski, G. and Demuzere, M. and Brunsell, N. and Pesaresi, M. and Miao, S. and Mu, Q. and Chen, F. and Theeuwes, N.}, year = {2018}, month = sep, @@ -157,7 +157,7 @@ @article{chingWUDAPTUrbanWeather2018a } @article{cohenCoefficientAgreementNominal1960, - title = {A {{Coefficient}} of {{Agreement}} for {{Nominal Scales}}}, + title = {A {Coefficient} of {Agreement} for {Nominal Scales}}, author = {Cohen, Jacob}, year = {1960}, month = apr, @@ -172,8 +172,8 @@ @article{cohenCoefficientAgreementNominal1960 } @article{demuzereLCZGeneratorWeb2021, - title = {{{LCZ Generator}}: {{A Web Application}} to {{Create Local Climate Zone Maps}}}, - shorttitle = {{{LCZ Generator}}}, + title = {{LCZ Generator}: {A Web Application} to {Create Local Climate Zone Maps}}, + shorttitle = {LCZ Generator}, author = {Demuzere, Matthias and Kittner, Jonas and Bechtel, Benjamin}, year = {2021}, month = apr, @@ -187,30 +187,30 @@ @article{demuzereLCZGeneratorWeb2021 @techreport{bernardGenericAlgorithmAutomatically2023, type = {Preprint}, - title = {A Generic Algorithm to Automatically Classify Urban Fabric According to the {{Local Climate Zone}} System: Implementation in {{GeoClimate}} 0.0.1 and Application to {{French}} Cities}, - shorttitle = {A Generic Algorithm to Automatically Classify Urban Fabric According to the {{Local Climate Zone}} System}, + title = {A Generic Algorithm to Automatically Classify Urban Fabric According to the {Local Climate Zone} System: Implementation in {GeoClimate} 0.0.1 and Application to {French} Cities}, + shorttitle = {A Generic Algorithm to Automatically Classify Urban Fabric According to the {Local Climate Zone} System}, author = {Bernard, J{\'e}r{\'e}my and Bocher, Erwan and Gousseff, Matthieu and Leconte, Fran{\c c}ois and Le Saux Wiederhold, Elisabeth}, year = {2023}, month = mar, - institution = {{Climate and Earth system modeling}}, + institution = {Climate and Earth system modeling}, doi = {10.5194/egusphere-2023-371}, urldate = {2023-07-04}, } @techreport{bernardEstimationMissingBuilding2022, type = {Preprint}, - title = {Estimation of Missing Building Height in {{OpenStreetMap}} Data: A {{French}} Case Study Using {{GeoClimate}} 0.0.1}, - shorttitle = {Estimation of Missing Building Height in {{OpenStreetMap}} Data}, + title = {Estimation of Missing Building Height in {OpenStreetMap} Data: A {French} Case Study Using {GeoClimate} 0.0.1}, + shorttitle = {Estimation of Missing Building Height in {OpenStreetMap} Data}, author = {Bernard, J{\'e}r{\'e}my and Bocher, Erwan and Le Saux Wiederhold, Elisabeth and Leconte, Fran{\c c}ois and Masson, Val{\'e}ry}, year = {2022}, month = apr, - institution = {{Climate and Earth system modeling}}, + institution = {Climate and Earth system modeling}, doi = {10.5194/gmd-2021-428}, urldate = {2022-09-05} } @article{bocherGeoprocessingFrameworkCompute2018, - title = {A Geoprocessing Framework to Compute Urban Indicators: {{The MApUCE}} Tools Chain}, + title = {A Geoprocessing Framework to Compute Urban Indicators: {The MApUCE} Tools Chain}, shorttitle = {A Geoprocessing Framework to Compute Urban Indicators}, author = {Bocher, Erwan and Petit, Gwendall and Bernard, J{\'e}r{\'e}my and Palominos, Sylvain}, year = {2018}, @@ -226,10 +226,10 @@ @article{bocherGeoprocessingFrameworkCompute2018 @article{RJ-2018-009, author = {Edzer Pebesma}, - title = {{Simple Features for R: Standardized Support for Spatial - Vector Data}}, + title = {Simple Features for R: Standardized Support for Spatial + Vector Data}, year = {2018}, - journal = {{The R Journal}}, + journal = {The R Journal}, doi = {10.32614/RJ-2018-009}, url = {https://doi.org/10.32614/RJ-2018-009}, pages = {439--446}, diff --git a/docs/articles/joss/paper.md b/docs/articles/joss/paper.md index 38fec68..526a140 100644 --- a/docs/articles/joss/paper.md +++ b/docs/articles/joss/paper.md @@ -58,18 +58,18 @@ a confusion matrix to help qualify and quantify the misclassifications. The lczexplore package aims at comparing different LCZ classifications, but can be used to compare any pair of classifications on geographical units. -This software is available as a free and opensource R package. +This software is available as a free and open source R package. # Statement of need ## Comparing maps As stated in [@visser2006map] comparing maps is an important issue in environmental research. -The four main reasons to compare categorical variables on geographical units are: +The four main reasons to compare categorical variables on geographical units are: -- to assess the differences between maps generated by models under different scenarios and assumptions, -- to detect temporal changes, -- to calibrate or validate models, -- to perform uncertainty and sensitivity analysis. +- to assess the differences between maps generated by models under different scenarios and assumptions, +- to detect temporal changes, +- to calibrate or validate models, +- to perform uncertainty and sensitivity analysis. ## Comparing specifically LCZ maps @@ -77,26 +77,26 @@ The four main reasons to compare categorical variables on geographical units are Climate change is a growing concern for city planners with a special focus on Urban Heat Island phenomenons. The terms *rural* and *urban* lack of a clear definition and different classifications of urban and rural landscapes were proposed. -@stewart2012local defined an approach that complies with criteria of geographical classification defined in [@grigg1965logic]. +@stewart2012local defined an approach that complies with criteria of geographical classification defined in @grigg1965logic. Their Local Climate Zones (LCZ) are based on a logical segmentation of land-use and urban canopy parameters and define 10 urban types and 7 land cover types. LCZ have gained popularity in the past decade as they sum up relevant information and can help, for instance, apprehend the intensity of the Urban Heat Island [@kotharkar2018evaluating]. -Several methods aim to classify a territory into LCZ, but only few workflows allow an automatic classification -for any given area. -@quan2021systematic distinguishes two main streams of production of these LCZ: +Several methods aim to classify a territory into LCZ, but only few workflows allow an automatic classification +for any given area. +@quan2021systematic distinguishes two main streams of production of these LCZ: - the raster stream processes remotely sensed information, and applies machine learning algorithms trained using local experts' knowledge. In this way, the WUDAPT community [@chingWUDAPTUrbanWeather2018a] produced thousands of city-based LCZ maps, accessible via the LCZ Generator [@demuzereLCZGeneratorWeb2021], - but also large-scale maps for Europe, the continental United States and the whole world [@demuzere2019mapping], - [@demuzere2020combining], [@demuzere2022global]. + but also large-scale maps for Europe, the continental United States and the whole world + [@demuzere2019mapping; @demuzere2020combining; @demuzere2022global]. - the vector stream uses Geographic Information System (GIS) layers that represent the main topographic features, defines spatial units, computes urban canopy parameters and uses them to classify spatial units into LCZ. For instance, the GeoClimate geospatial toolbox produces LCZ classifications - from OpenStreetMap or french BDTopo data [@bocher2021geoclimate]. + from OpenStreetMap or french BDTopo data [@bocher2021geoclimate]. The existence of several methods to produce LCZ classifications, or the use of a method with different input data, raises the need for a tool to quickly get: @@ -129,7 +129,7 @@ In our case, each pixel is seen as an individual, each map is seen as a rater an is seen as the rate. The comprehensive Map Comparison kit, which was released in 2001 by the -Netherlands Environmental Assessment Agency [@visser2006map], is an example of a tool +Netherlands Environmental Assessment Agency [@visser2006map], is an example of a tool that provides multiple methods to compare raster maps. It includes a fuzzy algorithm which allows to tackle small shifts of one map from another. It only works on raster maps, only on Windows OS and doesn't allow automation for several pairs of maps. @@ -179,16 +179,16 @@ Main functions are presented in plain lines, the dashed boxes and arrows represe These functions are presented in detail in the next section, and they allow the following steps of exploration: 1. The LCZ classifications (or any other qualitative variables) are imported - from a file (geojson or shapefile format) + from a file (GeoJSON or Shapefile format) 2. Each LCZ classification can then be visualized 3. Some LCZ levels may be grouped in broader categories 4. A pair of LCZ classifications (or qualitative variable maps) can then be compared: - - a map of agreement/disagreement is produced, - - the general agreement and a pseudo-kappa indicator of agreement are computed, - - the summed surface of each LCZ type is computed for each classification, - - a confusion matrix shows how the levels of one LCZ classification break up into the levels of the other -5. Influence of the level of confidence on the agreement between classifications is performed - (sensitivity analysis) + - a map of agreement/disagreement is produced, + - the general agreement and a pseudo-kappa indicator of agreement are computed, + - the summed surface of each LCZ type is computed for each classification, + - a confusion matrix shows how the levels of one LCZ classification break up into the levels of the other +5. Influence of the level of confidence on the agreement between classifications is performed + (sensitivity analysis) All the steps of the analysis can be easily automated in R, for instance on several cities at a time. @@ -198,11 +198,11 @@ The main functions are presented in this section. More details about their argum ### From import to comparison -The `importLCZvect` function allows to import a LCZ classification from a geojson or a shapefile. +The `importLCZvect` function allows to import a LCZ classification from a GeoJSON or a Shapefile. Geometries and LCZ types are needed, but one can also optionally load unique identifiers and a confidence level for the LCZ type of each geometry. -The `importLCZraster` function allows the import from a raster map, and the user has to feed it a geotiff +The `importLCZraster` function allows the import from a raster map, and the user has to feed it a GeoTIFF and a bounding box of the area of interest. The import functions output objects of class `simple feature`, as defined by the @@ -228,19 +228,19 @@ Next, the function computes: The output of these functions are shown in the minimal example section. With the `standard` representation, comparing LCZ maps is made easy by a default setting of legends and colors. -The `alter` representation allows the user to deal with regrouped LCZ categories or any type of qualitative variables. +The `alter` representation allows the user to deal with regrouped LCZ categories or any type of qualitative variables. Levels can either be specified by the user or deduced from the data, colors can either be defined by the user or chosen from a random palette. ### Class levels grouping It is sometimes useful to group some LCZ types, for instance to create a broader category for -all the urban LCZ types and another for landcape LCZ types, +all the urban LCZ types and another for landscape LCZ types, or to group the levels with similar estimated impact on an urban heat island intensity. The `groupLCZ` function allows the user to specify the LCZ types one wants to group together, the names of the new resulting categories and their corresponding colors. -One can then feed `compareLCZ` function these new groups, setting `repr="alter"`, and specify desired levels and colors. +One can then feed `compareLCZ` function these new groups, setting `repr="alter"`, and specify desired levels and colors. ![Grouping of LCZ types into larger categories \label{fig: Grouping of LCZ types into larger categories}](fromBrutToGrouped.png) @@ -248,7 +248,7 @@ One can then feed `compareLCZ` function these new groups, setting `repr="alter" Some algorithms add a uniqueness value to the LCZ type it assigns to a spatial unit. It can measure if another LCZ level could have been assigned to this unit. Thus, it can be seen -as a confidence value of the LCZ type. +as a confidence value of the LCZ type. The `lczexplore ` package allows a sensitivity analysis according to this level of confidence, in order to answer the question: **does keeping only geometries with a higher confidence value @@ -260,7 +260,7 @@ This sensitivity analysis is performed considering all LCZ types and within each The agreement between classifications for the geometries with a confidence level higher than the threshold, and their numbers, are plotted in blue. -The agreement and the numbers of geometries under the threshold are plotted in magenta. +The agreement and the numbers of geometries under the threshold are plotted in magenta. On this example, one can see that ditching geometries that have a confidence level lower than 0.5 increases the agreement to more than 90%. The curve then tends to flatten, and the number of kept geometries decreases a lot (from 602 to 122). @@ -272,10 +272,11 @@ One also needs to notice that on this example, most geometries didn't have a con This package focuses on LCZ maps comparison, but more often than not, people working on LCZ maps also describe their area of interest with other categorical indicators. The workflow of comparison of LCZ maps can be used for any pair of maps of categorical variables, -under certain limitations: -- there must not be more than 36 levels for the categorical variable to explore, -- the associated geometries must be (multi) polygons or easily converted to them, - (typically, the package would not be suitable to compare road characterization), +under certain limitations: + +- there must not be more than 36 levels for the categorical variable to explore, +- the associated geometries must be (multi) polygons or easily converted to them, + (typically, the package would not be suitable to compare road characterization), - the geometries must be topographically valid (this is also true for LCZ). The `importQualVar` function allows the import of such variables on (multi-) polygons maps. @@ -285,12 +286,13 @@ of the package (`showLCZ`, `compareLCZ`, `groupLCZ`...). # Coding implementation `lczexplore` is an R package, all its specific functions are coded in R language. -It relies on state-of-the art packages: -- geographical computation requires the **`sf`** package for vector data and the **`terra`** package for raster data, +It relies on state-of-the art packages: + +- geographical computation requires the **`sf`** package for vector data and the **`terra`** package for raster data, - data management mainly requires the following packages: **`dplyr, tidyr, forcats, rlang`** - and **`methods`** packages, -- graphical production uses **`ggplot2, grDevices, cowplot`** and **`RColorBrewer`**, -- tests need the **`tinytest`** package. + and **`methods`** packages, +- graphical production uses **`ggplot2, grDevices, cowplot`** and **`RColorBrewer`**, +- tests need the **`tinytest`** package. Every step corresponds to an R function (see the workflow on figure 1 for the name of the main functions). @@ -337,12 +339,12 @@ dirPathBDT<-paste0(dirPath,"bdtopo_2_2/Redon") # Import into an sf object the data produced with GeoClimate and # the OpenStreetMap data (city of Redon) redonOSM<-importLCZvect( - dirPath=dirPathOSM, + dirPath=dirPathOSM, file="rsu_lcz.geojson", column = "LCZ_PRIMARY", geomID="ID_RSU", confid="LCZ_UNIQUENESS_VALUE") -# Import into an sf object the data produced with GeoClimate and -# the french BDTopo V2.2 data (city of Redon) +# Import into an sf object the data produced with GeoClimate and +# the french BDTopo V2.2 data (city of Redon) redonBDT<-importLCZvect( dirPath=dirPathBDT, file="rsu_lcz.geojson", column = "LCZ_PRIMARY",geomID="ID_RSU", @@ -384,10 +386,10 @@ To compare the two loaded LCZ classifications, use the `compareLCZ` function. - if `saveG=TRUE`, the four plots created by the function will be written in a png file in the working directory. ```r -# Compare how the BDTopo and the OpenStreetMap Data produce different classifications. +# Compare how the BDTopo and the OpenStreetMap Data produce different classifications. # The outputs are stored in a list. -comparison<-compareLCZ(sf1=redonBDT,column1="LCZ_PRIMARY", wf1="BDTopo v2.2", +comparison<-compareLCZ(sf1=redonBDT,column1="LCZ_PRIMARY", wf1="BDTopo v2.2", sf2=redonOSM, column2="LCZ_PRIMARY", wf2="OpenStreetMap", ref=1, repr="standard", exwrite=F, location="Redon") @@ -397,15 +399,15 @@ All graphics are concatenated for a quick glance. ![Local Climate Zones comparison for the French city of Redon based on the GeoClimate workflow applied to OpenStreetMap and BDTopo data \label{fig:LCZ comparison on Redon spatial units}](compareRedon.png){width="100%"} -The first and second maps show the spatial distribution of the first and second LCZ classifications levels, respectively. -The third map shows where the two classifications agree or disagree. +The first and second maps show the spatial distribution of the first and second LCZ classifications levels, respectively. +The third map shows where the two classifications agree or disagree. The last graphic is a confusion matrix: how the LCZ types of the first classification break-up in those of the second, in percentage of the surface. `CompareLCZ ` outputs a list called `matConfOut` which contains: - `$data`, intersected geometries, their identifiers and associated confidence value - (if fed to the fonction), their LCZ type and their area, + (if fed to the function), their LCZ type and their area, - `$areas`, the summed area for each LCZ for both classifications, - `$matConfLarge`, the confusion matrix, - `$percAgg`, the general agreement of the two classification on the whole area @@ -427,7 +429,7 @@ On the contrary, only 48% of the areas set to "dense trees" by BDTopo workflow a In the same way, 73% of the areas set to "compact low" by BDTopo workflow are set to "compact mid" by OpenStreetMap workflow. This is coherent with the fact that building heights are often missing on OpenStreetMap and that the algorithm that GeoClimate uses to predict them is less precise than -the available information of BDTopo. One may read more in [@bernardGenericAlgorithmAutomatically2023] +the available information of BDTopo. One may read more in @bernardGenericAlgorithmAutomatically2023 about differences between these workflows. @@ -444,11 +446,12 @@ confidence values present in the data and by the number of points, set by the `n ```r - # Path to the folder where compareLCZ stored output data file -# mainPath<-system.file("extdata", package = "lczexplore") (not executed to avoid useless files in the package) +# Path to the folder where compareLCZ stored output data file +# mainPath<-system.file("extdata", package = "lczexplore") +# (not executed to avoid useless files in the package) - -# Specification of geomID and confid columns for both dataset, + +# Specification of geomID and confid columns for both dataset, # as confidSensib needs them in the next step comparison<-compareLCZ( @@ -466,7 +469,8 @@ sensitAnalysis<-confidSensib(inputDf=comparison$data, filePath="", nPoints=5, wf1="bdtopo_2_2", wf2="osm", geomID1="ID_RSU", column1="LCZ_PRIMARY", confid1="LCZ_UNIQUENESS_VALUE", geomID2="ID_RSU.1",column2="LCZ_PRIMARY.1", confid2="LCZ_UNIQUENESS_VALUE.1", - plot=TRUE, saveG="") # one would use saveG = mainPath to write the results in the mainPath folder + plot=TRUE, saveG="") # one would use saveG = mainPath to write the results in + # the mainPath folder ``` @@ -474,7 +478,7 @@ Figure 7 shows the resulting graphics for the agreement per LCZ type (types not ![Sensitivity analysis according to confidence by LCZ levels ](confidSensibByLCZ.png) -For each LCZ type, the x-axis shows the minimum confidence threshold. The y-axis shows the agreement levels. +For each LCZ type, the x-axis shows the minimum confidence threshold. The y-axis shows the agreement levels. Each cyan point shows the average agreement between classifications for spatial units where the minimum confidence value is greater than the x-axis value. @@ -495,24 +499,24 @@ The `groupLCZ` function aggregates chosen levels into broader categories. For ea which name is the name of the broader category to create and which values are the levels to be grouped in this new category. -```r -redonOSMgrouped<-groupLCZ(redonOSM,column="LCZ_PRIMARY", +```r +redonOSMgrouped<-groupLCZ(redonOSM,column="LCZ_PRIMARY", urban=c("1","2","3","4","5","6","7","8","9"), - industry="10", + industry="10", vegetation=c("101","102","103","104"), impervious="105", - pervious="106", + pervious="106", water="107", colors=c("red","black","green","grey","burlywood","blue")) - -redonBDTgrouped<-groupLCZ(redonBDT,column="LCZ_PRIMARY", + +redonBDTgrouped<-groupLCZ(redonBDT,column="LCZ_PRIMARY", urban=c("1","2","3","4","5","6","7","8","9"), - industry="10", + industry="10", vegetation=c("101","102","103","104"), impervious="105", - pervious="106", + pervious="106", water="107", - colors=c("red","black","green","grey","burlywood","blue")) + colors=c("red","black","green","grey","burlywood","blue")) ``` @@ -525,12 +529,13 @@ as shown in the following example: ```r map2<-showLCZ(redonOSMgrouped, column="grouped",repr="alter", title="Regrouped Categories for the city of Redon", - LCZlevels = c("urban","industry","vegetation","impervious","pervious","water"), + LCZlevels = c("urban","industry","vegetation","impervious","pervious","water"), colors=c("red","black","green","grey","burlywood","blue")) compareLCZ(sf1=redonOSMgrouped, column1="grouped", wf1="OpenStreetMap data", sf2=redonBDTgrouped, column2="grouped", wf2="BDTopo data",ref=1, repr="alter",exwrite=F,location="Redon",saveG="", - LCZlevels = c("urban","industry","vegetation","impervious","pervious","water"), + LCZlevels = c("urban","industry","vegetation","impervious","pervious", + "water"), colors=c("red","black","green","grey","burlywood","blue")) ``` @@ -551,8 +556,8 @@ After the import, the usual functions can be used, as shown in the following cod utrfRedonBDT<-importQualVar(dirPath=paste0( system.file("extdata", package = "lczexplore"), "/bdtopo_2_2/Redon"), file="rsu_utrf_area.geojson", column="TYPO_MAJ") - -map3<-showLCZ(sf=utrfRedonBDT, column="TYPO_MAJ",repr="alter", + +map3<-showLCZ(sf=utrfRedonBDT, column="TYPO_MAJ",repr="alter", title = " UTRF classification of the French city of Redon") ``` ![An example of some qualitative variable: Urban Typology by Random Forest (UTRF)](importQualVarUTRF.png) @@ -569,12 +574,12 @@ utrfRedonOSM<- "/osm/2022/Redon"), file="rsu_utrf_area.geojson", column="TYPO_MAJ", geomID="ID_RSU", confid="UNIQUENESS_VALUE") - + utrfComparison<- compareLCZ(sf1=utrfRedonBDT, column1="TYPO_MAJ",wf1=" UTRF BDTopo", sf2=utrfRedonOSM, column2="TYPO_MAJ", wf2 = " UTRF OpenStreetMap", location = " Redon",exwrite=FALSE,repr="alter") - # Plot the confusion matrix of thes two classifications + # Plot the confusion matrix of thes two classifications ``` ![Example of comparison on a qualitative variable (UTRF)](compareQualVar.png)