@inproceedings{saunders2024baseboostdepth,
author="Kieran Saunders and Luis J. Manso and George Vogiatzis",
title="BaseBoostDepth: Exploiting Larger Baselines For Self-supervised Monocular Depth Estimation",
booktitle="arXiv pre-print 2407.20437",
year="2024",
pages="22",
}
In the domain of multi-baseline stereo, the conventional understanding is that, in general, increasing baseline separation substantially enhances the accuracy of depth estimation. However, prevailing self-supervised depth estimation architectures primarily use minimal frame separation and a constrained stereo baseline. Larger frame separations can be employed; however, we show this to result in diminished depth quality due to various factors, including significant changes in brightness, and increased areas of occlusion. In response to these challenges, our proposed method, BaseBoostDepth, incorporates a curriculum learning-inspired optimization strategy to effectively leverage larger frame separations. However, we show that our curriculum learning-inspired strategy alone does not suffice, as larger baselines still cause pose estimation drifts. Therefore, we introduce incremental pose estimation to enhance the accuracy of pose estimations, resulting in significant improvements across all depth metrics. Additionally, to improve the robustness of the model, we introduce error-induced reconstructions, which optimize reconstructions with added error to the pose estimations. Ultimately, our final depth network achieves state-of-the-art performance on KITTI and SYNSpatches datasets across image-based, edge-based, and point cloud-based metrics without increasing computational complexity at test time. The project website can be found at https://kieran514.github.io/BaseBoostDepth-Project/.
@inproceedings{zakka2024action,
author="Vincent G. Zakka and Dai Zhuangzhuang and Luis J. Manso",
title="Action Recognition for Privacy-Preserving Ambient Assisted Living",
booktitle="Proceedings of the International Conference on AI in Healthcare",
year="2024",
pages="203--217",
}
The care challenges posed by an increasing elderly population have made ambient assisted living a significant research focus. Computer vision-based technologies can monitor older adults’ daily activities in their homes, providing insights into their health and prolonging their capacity to live independently. However, despite the benefits of these technologies, their widespread adoption has been hampered due to privacy concerns. These concerns frequently stem from the need to stream user data to cloud servers for computation, posing a risk to user privacy. This study proposes a privacy-preserving method for activity recognition that enhances the accuracy of activity recognition locally, eliminating the need to stream user data to the cloud. The paper’s contributions are twofold: a Temporal Decoupling Graph Depthwise Separable Convolution Network (TD-GDSCN) to address the challenges of real-time performance and a data augmentation technique to prevent accuracy degradation in real-world environmental conditions. The experimental results show that the TD-GDSCN and data augmentation techniques outperform existing methods in addressing real-time performance and degradation challenges on the NTU-RGB+D 60 and NW-UCLA datasets.
@article{rodriguezcriado2024multiperson,
author="Daniel Rodriguez-Criado and Pilar Bachiller and George Vogiatzis and Luis J. Manso",
title="Multi-person 3D pose estimation from unlabelled data",
journal="Machine Vision and Applications",
year="2024",
volume="35",
number="46",
pages="1--18",
doi="https://doi.org/10.1007/s00138-024-01530-6",
}
Its numerous applications make multi-human 3D pose estimation a remarkably impactful area of research. Nevertheless, it presents several challenges, especially when approached using multiple views and regular RGB cameras as the only input. First, each person must be uniquely identified in the different views. Secondly, it must be robust to noise, partial occlusions, and views where a person may not be detected. Thirdly, many pose estimation approaches rely on environment-specific annotated datasets that are frequently prohibitively expensive and/or require specialised hardware. Specifically, this is the first multi-camera, multi-person data-driven approach that does not require an annotated dataset. In this work, we address these three challenges with the help of self-supervised learning. In particular, we present a three-staged pipeline and a rigorous evaluation providing evidence that our approach performs faster than other state-of-the-art algorithms, with comparable accuracy, and most importantly, does not require annotated datasets. The pipeline is composed of a 2D skeleton detection step, followed by a Graph Neural Network to estimate cross-view correspondences of the people in the scenario, and a Multi-Layer Perceptron that transforms the 2D information into 3D pose estimations. Our proposal comprises the last two steps, and it is compatible with any 2D skeleton detector as input. These two models are trained in a self-supervised manner, thus avoiding the need for datasets annotated with 3D ground-truth poses.
@article{teja2024survey,
author="Phani Teja Singamaneni and Pilar Bachiller-Burgos and Luis J. Manso and Anais Garrell and Alberto Sanfeliu and Anne Spalanzani and Rachid Alami",
title="A survey on socially aware robot navigation: Taxonomy and future challenges",
journal="The International Journal of Robotics Research",
year="2024",
volume="0",
number="0",
pages="1--49",
}
Socially aware robot navigation is gaining popularity with the increase in delivery and assistive robots. The research is further fueled by a need for socially aware navigation skills in autonomous vehicles to move safely and appropriately in spaces shared with humans. Although most of these are ground robots, drones are also entering the field. In this paper, we present a literature survey of the works on socially aware robot navigation in the past 10 years. We propose four different faceted taxonomies to navigate the literature and examine the field from four different perspectives. Through the taxonomic review, we discuss the current research directions and the extending scope of applications in various domains. Further, we put forward a list of current research opportunities and present a discussion on possible future challenges that are likely to emerge in the field.
@inproceedings{criado2023synthesizing,
author="Daniel Rodriguez-Criado and Maria Chli and Luis J. Manso and George Vogiatzis",
title="Synthesizing traffic datasets using graph neural networks",
booktitle="IEEE 26th International Conference on Intelligent Transportation Systems (ITSC)",
year="2023",
pages="1--43",
}
Traffic congestion in urban areas presents significant challenges, and Intelligent Transportation Systems (ITS) have sought to address these via automated and adaptive controls. However, these systems often struggle to transfer simulated experiences to real-world scenarios. This paper introduces a novel methodology for bridging this ‘sim-real’ gap by creating photorealistic images from 2D traffic simulations and recorded junction footage. We propose a novel image generation approach, integrating a Conditional Generative Adversarial Network with a Graph Neural Network (GNN) to facilitate the creation of realistic urban traffic images. We harness GNNs’ ability to process information at different levels of abstraction alongside segmented images for preserving locality data. The presented architecture leverages the power of SPADE and Graph ATtention (GAT) network models to create images based on simulated traffic scenarios. These images are conditioned by factors such as entity positions, colors, and time of day. The uniqueness of our approach lies in its ability to effectively translate structured and human-readable conditions, encoded as graphs, into realistic images. This advancement contributes to applications requiring rich traffic image datasets, from data augmentation to urban traffic solutions. We further provide an application to test the model’s capabilities, including generating images with manually defined positions for various entities.
@inproceedings{saunders2023weather,
author="Kieran Saunders and George Vogiatzis and Luis J. Manso",
title="Self-supervised Monocular Depth Estimation: Let's Talk About The Weather",
booktitle="To appear in Proceedings of CVF/IEEE International Conference on Computer Vision (ICCV)",
year="2023",
pages="1--18",
publisher="CVF/IEEE",
}
Current, self-supervised depth estimation architectures rely on clear and sunny weather scenes to train deep neural networks. However, in many locations, this assumption is too strong. For example in the UK (2021), 149 days consisted of rain. For these architectures to be effective in real-world applications, we must create models that can generalise to all weather conditions, times of the day and image qualities. Using a combination of computer graphics and generative models, one can augment existing sunny-weather data in a variety of ways that simulate adverse weather effects. While it is tempting to use such data augmentations for self-supervised depth, in the past this was shown to degrade performance instead of improving it. In this paper, we put forward a method that uses augmentations to remedy this problem. By exploiting the correspondence between unaugmented and augmented data we introduce a pseudo-supervised loss for both depth and pose estimation. This brings back some of the benefits of supervised learning while still not requiring any labels. We also make a series of practical recommendations which collectively offer a reliable, efficient framework for weather-related augmentation of self-supervised depth from monocular video. We present extensive testing to show that our method, Robust-Depth, achieves SotA performance on the KITTI dataset while significantly surpassing SotA on challenging, adverse condition data such as DrivingStereo, Foggy CityScape and NuScenes-Night. The project website can be found here.
@article{francis2023principles,
author="Anthony Francis and Claudia Perez-D'Arpino and Chengshu Li and Fei Xia and Alexandre Alahi and Rachid Alami and Aniket Bera and Abhijat Biswas and Joydeep Biswas and Rohan Chandra and Hao-Tien Lewis Chiang and Michael Everett and Sehoon Ha and Justin Hart and Jonathan P. How and Haresh Karnan and Tsang-Wei Edward Lee and Luis J. Manso and Reuth Mirksy and Soeren Pirk and Phani Teja Singamaneni and Peter Stone and Ada V. Taylor and Peter Trautman and Nathan Tsoi and Marynel Vazquez and Xuesu Xiao and Peng Xu and Naoki Yokoyama and Alexander Toshev and Roberto Martin-Martin",
title="Principles and Guidelines for Evaluating Social Robot Navigation Algorithms",
journal="pre-print",
year="2023",
volume="",
number="",
pages="1--43",
}
A major challenge to deploying robots widely is navigation in human-populated environments, commonly referred to as social robot navigation. While the field of social navigation has advanced tremendously in recent years, the fair evaluation of algorithms that tackle social navigation remains hard because it involves not just robotic agents moving in static environments but also dynamic human agents and their perceptions of the appropriateness of robot behavior. In contrast, clear, repeatable, and accessible benchmarks have accelerated progress in fields like computer vision, natural language processing and traditional robot navigation by enabling researchers to fairly compare algorithms, revealing limitations of existing solutions and illuminating promising new directions. We believe the same approach can benefit social navigation. In this paper, we pave the road towards common, widely accessible, and repeatable benchmarking criteria to evaluate social robot navigation. Our contributions include (a) a definition of a socially navigating robot as one that respects the principles of safety, comfort, legibility, politeness, social competency, agent understanding, proactivity, and responsiveness to context, (b) guidelines for the use of metrics, development of scenarios, benchmarks, datasets, and simulators to evaluate social navigation, and (c) a design of a social navigation metrics framework to make it easier to compare results from different simulators, robots and datasets.
@inproceedings{kapoor2023socnavgym,
author="Aditya Kapoor and Sushant Swamy and Pilar Bachiller and Luis J. Manso",
title="SocNavGym: A Reinforcement Learning Gym for Social Navigation",
booktitle="In Proceedings of 32nd IEEE International Conference on Robot & Human Interactive Communication (RO-MAN)",
year="2023",
pages="1--8",
publisher="IEEE",
}
It is essential for autonomous robots to be socially compliant while navigating in human-populated environments. Machine Learning and, especially, Deep Reinforcement Learning have recently gained considerable traction in the field of Social Navigation. This can be partially attributed to the resulting policies not being bound by human limitations in terms of code complexity or the number of variables that are handled. Unfortunately, the lack of safety guarantees and the large data requirements by DRL algorithms make learning in the real world unfeasible. To bridge this gap, simulation environments are frequently used. We propose SocNavGym, an advanced simulation environment for social navigation that can generate a wide variety of social navigation scenarios and facilitates the development of intelligent social agents. SocNavGym is lightweight, fast, easy to use, and can be effortlessly configured to generate different types of social navigation scenarios. It can also be configured to work with different hand-crafted and datadriven social reward signals and to yield a variety of evaluation metrics to benchmark agents’ performance. Further, we also provide a case study where a Dueling-DQN agent is trained to learn social-navigation policies using SocNavGym. The results provide evidence that SocNavGym can be used to train an agent from scratch to navigate in simple as well as complex social scenarios. Our experiments also show that the agents trained using the data-driven reward function display more advanced social compliance in comparison to the heuristic-based reward function.
@inproceedings{saunders2023dynadm,
author="Kieran Saunders and George Vogiatzis and Luis J. Manso",
title="Dyna-DM: Dynamic Object-aware Self-supervised Monocular Depth Maps",
booktitle="Proceedings of 2023 IEEE International Conference on Autonomous Robot Systems and Competitions (ICARSC)",
year="2023",
pages="1--7",
publisher="IEEE",
doi="https://doi.org/10.1109/ICARSC58346.2023.10129564",
}
Self-supervised monocular depth estimation has been a subject of intense study in recent years, because of its applications in robotics and autonomous driving. Much of the recent work focuses on improving depth estimation by increasing architecture complexity. This paper shows that state-of-the-art performance can also be achieved by improving the learning process rather than increasing model complexity. More specifically, we propose (i) disregarding small potentially dynamic objects when training, and (ii) employing an appearance-based approach to separately estimate object pose for truly dynamic objects. We demonstrate that these simplifications reduce GPU memory usage by 29% and result in qualitatively and quantitatively improved depth maps.
@article{criado2021gnnDisruption,
author="Pilar Bachiller and Daniel Rodriguez-Criado and Ronit R. Jorvekar and Pablo Bustos and Diego R. Faria and Luis J. Manso",
title="A graph neural network to model disruption in human-aware robot navigation",
journal="Multimedia Tools and Applications",
year="2021",
volume="",
number="",
pages="1--19",
doi="https://doi.org/10.1007/s11042-021-11113-6",
}
Autonomous navigation is a key skill for assistive and service robots. To be successful, robots have to minimise the disruption caused to humans while moving. This implies predicting how people will move and complying with social conventions. Avoiding disrupting personal spaces, people’s paths and interactions are examples of these social conventions. This paper leverages Graph Neural Networks to model robot disruption considering the movement of the humans and the robot so that the model built can be used by path planning algorithms. Along with the model, this paper presents an evolution of the dataset SocNav1 (Manso et al 2020) which considers the movement of the robot and the humans, and an updated scenario-to-graph transformation which is tested using different Graph Neural Network blocks. The model trained achieves close-to-human performance in the dataset. In addition to its accuracy, the main advantage of the approach is its scalability in terms of the number of social factors that can be considered in comparison with handcrafted models. The dataset and the model are available in a public repository (https://github.com/gnns4hri/sngnnv2).
@inproceedings{vega2021towards,
author="A. Vega-Magro and R. Gondkar and L.J. Manso and P. Nunez",
title="Towards efficient human-robot cooperation for socially-aware robot navigation in human-populated environments: the SNAPE framework",
booktitle="Proceedings of 2021 IEEE International Conference on Robotics and Automation (ICRA)",
year="2021",
pages="3169--3174",
publisher="IEEE",
doi="10.1109/ICRA48506.2021.9561448",
}
It is widely accepted that in the future, robots will cooperate with humans in everyday tasks. Robots interacting with humans will require social awareness when performing their tasks which will require navigation. While navigating, robots should aim to avoid distressing people in order to maximize their chance of social acceptance. For instance, avoiding getting too close to people or disrupting interactions. Most research approaches these problems by planning socially accepted paths, however, in everyday situations, there are many examples where a simple path planner cannot solve all of the predicted robots’ navigation problems. For instance, requesting permission to interrupt a conversation if an alternative path cannot be determined requires deliberative skills. This article presents the Social Navigation framework for Autonomous robots in Populated Environments (SNAPE), where different software agents are integrated within a robotics cognitive architecture. SNAPE addresses action planning aimed at social- awareness navigation in realistic situations: it plans socially accepted paths and conversations to negotiate its trajectory to reach targets. In this article, the framework is evaluated in different use-cases where the robot, during its navigation, has to interact with different people in order to reach its goal. The results show that participants report that the robot’s behavior was realistic and human-lik
@article{bird2022fruit,
author="Jordan J. Bird and Chloe Barnes and Luis J. Manso and Aniko Ekart and Diego R. Faria",
title="Fruit quality and defect image classification with conditional GAN data augmentation",
journal="Scientia Horticulturae",
year="2021",
volume="293",
number="",
pages="110684",
doi="https://doi.org/10.1016/j.scienta.2021.110684",
}
Contemporary Artificial Intelligence technologies allow for the employment of Computer Vision to discern good crops from bad, providing a step in the pipeline of selecting healthy fruit from undesirable fruit, such as those which are mouldy or damaged. State-of-the-art works in the field report high accuracy results on small datasets (<1000 images), which are not representative of the population regarding real-world usage. The goals of this study are to further enable real-world usage by improving generalisation with data augmentation as well as to reduce overfitting and energy usage through model pruning. In this work, we suggest a machine learning pipeline that combines the ideas of fine-tuning, transfer learning, and generative model-based training data augmentation towards improving fruit quality image classification. A linear network topology search is performed to tune a VGG16 lemon quality classification model using a publicly-available dataset of 2690 images. We find that appending a 4096 neuron fully connected layer to the convolutional layers leads to an image classification accuracy of 83.77%. We then train a Conditional Generative Adversarial Network on the training data for 2000 epochs, and it learns to generate relatively realistic images. Grad-CAM analysis of the model trained on real photographs shows that the synthetic images can exhibit classifiable characteristics such as shape, mould, and gangrene. A higher image classification accuracy of 88.75% is then attained by augmenting the training with synthetic images, arguing that Conditional Generative Adversarial Networks have the ability to produce new data to alleviate issues of data scarcity. Finally, model pruning is performed via polynomial decay, where we find that the Conditional GAN-augmented classification network can retain 81.16% classification accuracy when compressed to 50% of its original size.
@article{bird2021study,
author="Jordan J. Bird and Diego Faria and Luis J. Manso and Pedro P. S. Ayrosa and Aniko Ekart",
title="A study on CNN image classification of EEG signals represented in 2D and 3D",
journal="Journal of Neural Engineering",
year="2021",
volume="18",
number="2",
pages="026005",
doi="1https://doi.org/10.1088/1741-2552/abda0c",
}
The novelty of this study consists of the exploration of multiple new approaches of data pre-processing of brainwave signals, wherein statistical features are extracted and then formatted as visual images based on the order in which dimensionality reduction algorithms select them. This data is then treated as visual input for 2D and 3D convolutional neural networks (CNNs) which then further extract 'features of features'. Approach. Statistical features derived from three electroencephalography (EEG) datasets are presented in visual space and processed in 2D and 3D space as pixels and voxels respectively. Three datasets are benchmarked, mental attention states and emotional valences from the four TP9, AF7, AF8 and TP10 10–20 electrodes and an eye state data from 64 electrodes. Seven hundred twenty-nine features are selected through three methods of selection in order to form 27 × 27 images and 9 × 9 × 9 cubes from the same datasets. CNNs engineered for the 2D and 3D preprocessing representations learn to convolve useful graphical features from the data. Main results. A 70/30 split method shows that the strongest methods for classification accuracy of feature selection are One Rule for attention state and Relative Entropy for emotional state both in 2D. In the eye state dataset 3D space is best, selected by Symmetrical Uncertainty. Finally, 10-fold cross validation is used to train best topologies. Final best 10-fold results are 97.03% for attention state (2D CNN), 98.4% for Emotional State (3D CNN), and 97.96% for Eye State (3D CNN). Significance. The findings of the framework presented by this work show that CNNs can successfully convolve useful features from a set of pre-computed statistical temporal features from raw EEG waves. The high performance of K-fold validated algorithms argue that the features learnt by the CNNs hold useful knowledge for classification in addition to the pre-computed features.
@inproceedings{rodriguezcriado2020generation,
author="Daniel Rodriguez-Criado and Pilar Bachiller and Luis J. Manso",
title="Generation of Human-aware Navigation Maps using Graph Neural Networks",
booktitle="Artificial Intelligence XXXVIII. SGAI-AI 2021. Lecture Notes in Computer Science, vol 13101",
year="2020",
pages="19-32",
publisher="Springer, Cham",
doi="https://doi.org/10.1007/978-3-030-91100-3_2",
}
Minimising the discomfort caused by robots when navigating in social situations is crucial for them to be accepted. The paper presents a machine learning-based framework that bootstraps existing one-dimensional datasets to generate a cost map dataset and a model combining Graph Neural Network and Convolutional Neural Network layers to produce cost maps for human-aware navigation in real-time. The proposed framework is evaluated against the original one-dimensional dataset and in simulated navigation tasks. The results outperform similar state-of-the-art-methods considering the accuracy on the dataset and the navigation metrics used. The applications of the proposed framework are not limited to human-aware navigation, it could be applied to other fields where map generation is needed.
@inbook{manso2020graphneural,
author="Luis J. Manso and Ronit R. Jorvekar and Diego Faria and Pablo Bustos and Pilar Bachiller",
title="Graph Neural Networks for Human-aware Social Navigation",
pages="167-179",
publisher="Springer",
year="2020",
type="bookchapter",
doi="",
}
Autonomous navigation is a key skill for assistive and service robots. To be successful, robots have to navigate avoiding going through the personal spaces of the people surrounding them. Complying with social rules such as not getting in the middle of human-to-human and human-to-object interactions is also important. This paper suggests using Graph Neural Networks to model how inconvenient the presence of a robot would be in a particular scenario according to learned human conventions so that it can be used by path planning algorithms. To do so, we propose two ways of modelling social interactions using graphs and benchmark them with different Graph Neural Networks using the SocNav1 dataset. We achieve close-to-human performance in the dataset and argue that, in addition to promising results, the main advantage of the approach is its scalability in terms of the number of social factors that can be considered and easily embedded in code, in comparison with model-based approaches. The code used to train and test the resulting graph neural network is available in a public repository.
@inbook{baghel2020atoolkit,
author="Rishabh Baghel and Aditya Kapoor and Pilar Bachiller and Ronit R. Jorvekar and Daniel Rodriguez-Criado and Luis J. Manso",
title="A Toolkit to Generate Social Navigation Datasets",
pages="180-193",
publisher="Springer",
year="2020",
type="bookchapter",
doi="",
}
Social navigation datasets are necessary to assess social navigation algorithms and train machine learning algorithms. Most of the currently available datasets target pedestrians' movements as a pattern to be replicated by robots. It can be argued that one of the main reasons for this to happen is that compiling datasets where real robots are manually controlled, as they would be expected to behave when moving, is a very resource-intensive task. Another aspect that is often missing in datasets is symbolic information that could be relevant, such as human activities, relationships or interactions. Unfortunately, the available datasets targeting robots and supporting symbolic information are restricted to static scenes. This paper argues that simulation can be used to gather social navigation data in an effective and cost-efficient way and presents a toolkit for this purpose. A use case studying the application of graph neural networks to create learned control policies using supervised learning is presented as an example of how it can be used.
@inproceedings{rodriguezcriado2020multicamera,
author="Daniel Rodriguez-Criado and Pilar Bachiller and Pablo Bustos and George Vogiatzis and Luis J. Manso",
title="Multi-camera Torso Pose Estimation using Graph Neural Networks",
booktitle="The 29th IEEE International Conference on Robot and Human Interactive Communication",
year="2020",
pages="6",
publisher="IEEE",
doi="",
}
Estimating the location and orientation of humans is an essential skill for service and assistive robots. To achieve a reliable estimation in a wide area such as an apartment, multiple RGBD cameras are frequently used. Firstly, these setups are relatively expensive. Secondly, they seldom perform an effective data fusion using the multiple camera sources at an early stage of the processing pipeline. Occlusions and partial views make this second point very relevant in these scenarios. The proposal presented in this paper makes use of graph neural networks to merge the information acquired from multiple camera sources, achieving a mean absolute error below 125 mm for the location and 10 degrees for the orientation using low-resolution RGB images. The experiments, conducted in an apartment with three cameras, benchmarked two different graph neural network implementations and a third architecture based on fully connected layers. The software used has been released as open-source in a public repository.
@article{bachiller2020learnblock,
author="Pilar Bachiller-Burgos and Ivan Barbecho and Luis V. Calderita and Pablo Bustos and Luis J. Manso",
title="LearnBlock: A Robot-Agnostic Educational Programming Tool",
journal="Access",
year="2020",
volume="8",
number="1",
pages="30012-30026",
doi="https://doi.org/10.1109/ACCESS.2020.2972410",
}
Education is evolving to prepare students for the current sociotechnical changes. An increasing effort to introduce programming and other STEM-related subjects into the core curriculum of primary and secondary education is taking place around the world. The use of robots stands out among STEM initiatives, since robots are proving to be an engaging tool for learning programming and other STEM-related contents. Block-based programming is the option chosen for most educational robotic platforms. However, many robotics kits include their own software tools, as well as their own set of programming blocks. LearnBlock, a new educational programming tool, is proposed here. Its major novelty is its loosely coupled software architecture which makes it, to the best of our knowledge, the first robot-agnostic educational tool. Robot-agnosticism is provided not only in block code, but also in generated code, unifying the translation from blocks to the final programming language. The set of blocks can be easily extended implementing additional Python functions, without modifying the core code of the tool. Moreover, LearnBlock provides an integrated educational programming environment that facilitates a progressive transition from a visual to a general-purpose programming language. To evaluate LearnBlock and demonstrate that it is platform-agnostic, several tests were conducted. Each of them consists of a program implementing a robot behaviour. The block code of each test can run on several educational robots without changes.
@article{manso2020socnav,
author="Luis J. Manso and Pedro Núñez and Luis V. Calderita and Diego R. Faria and Pilar Bachiller",
title="SocNav1: A Dataset to Benchmark and Learn Social Navigation Conventions",
journal="Data",
year="2020",
volume="5",
number="1",
pages="10",
doi="https://doi.org/10.3390/data5010007",
}
Datasets are essential to the development and evaluation of machine learning and artificial intelligence algorithms. As new tasks are addressed, new datasets are required. Training algorithms for human-aware navigation is an example of this need. Different factors make designing and gathering data for human-aware navigation datasets challenging. Firstly, the problem itself is subjective, different dataset contributors will very frequently disagree to some extent on their labels. Secondly, the number of variables to consider is undetermined culture-dependent. This paper presents SocNav1, a dataset for social navigation conventions. SocNav1 aims at evaluating the robots’ ability to assess the level of discomfort that their presence might generate among humans. The 9280 samples in SocNav1 seem to be enough for machine learning purposes given the relatively small size of the data structures describing the scenarios. Furthermore, SocNav1 is particularly well-suited to be used to benchmark non-Euclidean machine learning algorithms such as graph neural networks. This paper describes the proposed dataset and the method employed to gather the data. To provide a further understanding of the nature of the dataset, an analysis and validation of the collected data are also presented.
@inproceedings{vega2019sociallyaccepted,
author="Araceli Vega and Ramón Cintas and Luis J. Manso and Pablo Bustos and Pedro Núñez",
title="Socially-Accepted Path Planning for Robot Navigation Based on Social Interaction Spaces",
booktitle="Iberian Robotics conference",
year="2019",
pages="644-655",
publisher="Springer, Cham",
doi="",
}
Path planning is one of the most widely studied problems in robot navigation. It deals with estimating an optimal set of waypoints from an initial to a target coordinate. New generations of assistive robots should be able to compute these paths considering not only obstacles but also social conventions. This ability is commonly referred to as social navigation. This paper describes a new socially-acceptable path-planning framework where robots avoid entering areas corresponding to the personal spaces of people, but most importantly, areas related to human-human and human-object interaction. To estimate the social cost of invading personal spaces we use the concept of proxemics. To model the social cost of invading areas where interaction is happening we include the concept of object interaction space. The framework uses Dijkstra’s algorithm on a uniform graph of free space where edges are weighed according to the social traversal cost of their outbound node. Experimental results demonstrate the validity of the proposal to plan socially-accepted paths.
@article{marfil2019perceptions,
author="Rebeca Marfil and Adrian Romero-Garces and Juan P. Bandera and Luis J. Manso and Luis V. Calderita and Pablo Bustos and Antonio Bandera and Javier Garcia-Polo and Fernando Fernandez and Dimitry Voilmy",
title="Perceptions or Actions? Grounding How Agents Interact Within a Software Architecture for Cognitive Robotics",
journal="Cognitive Computation",
year="2019",
volume="2019",
pages="27",
doi="https://doi.org/10.1007/s12559-019-09685-5",
}
One of the aims of cognitive robotics is to endow robots with the ability to plan solutions for complex goals and then to enact those plans. Additionally, robots should react properly upon encountering unexpected changes in their environment that are not part of their planned course of actions. This requires a close coupling between deliberative and reactive control flows. From the perspective of robotics, this coupling generally entails a tightly integrated perceptuomotor system, which is then loosely connected to some specific form of deliberative system such as a planner. From the high-level perspective of automated planning, the emphasis is on a highly functional system that, taken to its extreme, calls perceptual and motor modules as services when required. This paper proposes to join the perceptual and acting perspectives via a unique representation where the responses of all software modules in the architecture are generalized using the same set of tokens. The proposed representation integrates symbolic and metric information. The proposed approach has been successfully tested in CLARC, a robot that performs Comprehensive Geriatric Assessments of elderly patients. The robot was favourably appraised in a survey conducted to assess its behaviour. For instance, using a 5-point Likert scale from 1 (strongly disagree) to 5 (strongly agree), patients reported an average of 4.86 when asked if they felt confident during the interaction with the robot. This paper proposes a mechanism for bringing the perceptual and acting perspectives closer within a distributed robotics architecture. The idea is built on top of the blackboard model and scene graphs. The modules in our proposal communicate using a short-term memory, writing the perceptual information they need to share with other agents and accessing the information they need for determining the next goals to address.
@article{faicocho2019,
author="Francisco J. Dominguez-Muñoz and Miguel A. Hernández-Mocholi and Luis J. Manso and Daniel Collado-Mateo and Santos Villafaina and Jose C. Adsuar and Narcis Gusi",
title="Test-Retest Reliability of Kinematic Parameters of Timed Up and Go in People with Type 2 Diabetes",
journal="Applied Sciences",
year="2019",
volume="9",
number="21",
pages="10",
doi="https://doi.org/10.3390/app9214709",
}
Diabetes mellitus is a chronic disease defined as a state of hyperglycaemia in fasting or postprandial states. Patients with type 2 diabetes mellitus (T2DM) often show reduced physical function, including low levels of strength, balance or mobility. In this regard, the timed up and go (TUG) is a widely used physical fitness test in people with T2DM. However, there is a lack of studies evaluating the properties TUG in this population. The present study aimed to evaluate the test-retest reliability of kinetic and kinematic parameters obtained from TUG in the diabetic population with different levels of diabetic neuropathy. A total of 56 patients with T2DM participated in the study. They were divided into three groups according to the vibration threshold: (a) severe neuropathy, (b) moderate neuropathy and (c) normal perception. The TUG was performed using two force platforms to assess kinematic measurements. The results show that both kinetic and kinematic variables had good to excellent reliability. The reliability of TUG was excellent for the whole sample and the groups with non-severe neuropathy. However, it was just good for the group with severe neuropathy.
@article{bustos2018thecortex,
author="P. Bustos and L.J. Manso and A.J. Bandera and J.P. Bandera and I. Garcia-Varea and J. Martinez-Gomez",
title="The CORTEX Cognitive Robotics Architecture: use cases",
journal="Cognitive Systems Research",
year="2019",
volume="55",
number="",
pages="107-123",
doi="https://doi.org/10.1016/j.cogsys.2019.01.003",
}
CORTEX is a cognitive robotics architecture inspired by three key ideas: modularity, internal modelling and graph representations. CORTEX is also a computational framework designed to support early forms of intelligence in real world, human interacting robots, by selecting an a priori functional decomposition of the capabilities of the robot. This set of abilities was then translated to computational modules or agents, each one built as a network of software interconnected components. The nature of these agents can range from pure reactive modules connected to sensors and/or actuators, to pure deliberative ones, but they can only communicate with each other through a graph structure called Deep State Representation (DSR). DSR is a short-term dynamic representation of the space surrounding the robot, the objects and the humans in it, and the robot itself. All these entities are perceived and transformed into different levels of abstraction, ranging from geometric data to high-level symbolic relations such as ''the person is talking and gazing at me''. The combination of symbolic and geometric information endows the architecture with the potential to simulate and anticipate the outcome of the actions executed by the robot. In this paper we present recent advances in the CORTEX architecture and several real-world human-robot interaction scenarios in which they have been tested. We describe our interpretation of the ideas inspiring the architecture and the reasons why this specific computational framework is a promising architecture for the social robots of tomorrow.
@inproceedings{lobato2019hrdialogue,
author="Carlos Lobato and Araceli Vega-Magro and Pedro Núñez and Luis J. Manso",
title="Human-robot dialogue and Collaboration for social navigation in crowded environments",
booktitle="Proceedings of 2019 IEEE International Conference on Autonomous Robot Systems and Competitions",
year="2019",
pages="6",
publisher="IEEE",
doi="",
}
Robot navigation in human-populated environments is a subject of great interest among the international scientific community. In order to be accepted in these scenarios, it is important for robots to navigate respecting social rules. Avoid getting too close to a person, not interrupting conversations or asking for permission or collaboration when it is required by social conventions, are some of the behaviours that robots must exhibit. This paper presents a social navigation system that integrates different software agents within a cognitive architecture for robots and describes, as the main contribution,the corpus that allows to establish dialogues between robots and humans in real situations to improve the human-aware navigation system. The corpus has been experimentally evaluated by the simulation of different daily situations, where robots need to plan interactions with real people. The results are analysed qualitatively, according to the behaviour expected by the robot in the interaction performed. The results show how the corpus presented in this paper improves the robot navigation, making it more socially accepted.
@article{vega2018socially,
author="Araceli Vega and Luis J. Manso and Douglas G. Macharet and Pablo Bustos and Pedro Núñez",
title="Socially aware robot navigation system in human-populated and interactive environments based on an adaptive spatial density function and space affordances",
journal="Pattern Recognition Letters",
year="2019",
volume="2018",
number="",
pages="13",
doi="https://doi.org/10.1016/j.patrec.2018.07.015",
}
Traditionally robots are mostly known by society due to the wide use of manipulators, which are generally placed in controlled environments such as factories. However, with the advances in the area of mobile robotics, they are increasingly inserted into social contexts, i.e., in the presence of people. The adoption of socially acceptable behaviours demands a trade-off between social comfort and other metrics of efficiency. For navigation tasks, for example, humans must be differentiated from other ordinary objects in the scene. In this work, we propose a novel human-aware navigation strategy built upon the use of an adaptive spatial density function that efficiently cluster groups of people according to their spatial arrangement. Space affordances are also used for defining potential activity spaces considering the objects in the scene. The proposed function defines regions where navigation is either discouraged or forbidden. To implement a socially acceptable navigation, the navigation architecture combines a probabilistic roadmap and rapidly-exploring random tree path planners, and an adaptation of the elastic band algorithm. Trials in real and simulated environments carried out demonstrate that the use of the clustering algorithm and social rules in the navigation architecture do not hinder the navigation performance.
@article{bird2019adeepevolutionary,
author="Jordan J. Bird and Diego R. Faria and Luis J. Manso and Anikó Ekárt and Christopher D. Buckingham",
title="A Deep Evolutionary Approach to Bioinspired Classifier Optimisation for Brain-Machine Interaction",
journal="Complexity",
year="2019",
volume="2019",
number="",
pages="14",
doi="https://doi.org/10.1155/2019/4316548",
}
This study suggests a new approach to EEG data classification by exploring the idea of using evolutionary computation to both select useful discriminative EEG features and optimise the topology of Artificial Neural Networks. An evolutionary algorithm is applied to select the most informative features from an initial set of 2550 EEG statistical features. Optimisation of a Multilayer Perceptron (MLP) is performed with an evolutionary approach before classification to estimate the best hyperparameters of the network. Deep learning and tuning with Long Short-Term Memory (LSTM) are also explored, and Adaptive Boosting of the two types of models is tested for each problem. Three experiments are provided for comparison using different classifiers: one for attention state classification, one for emotional sentiment classification, and a third experiment in which the goal is to guess the number a subject is thinking of. The obtained results show that an Adaptive Boosted LSTM can achieve an accuracy of 84.44%, 97.06%, and 9.94% on the attentional, emotional, and number datasets, respectively. An evolutionary-optimised MLP achieves results close to the Adaptive Boosted LSTM for the two first experiments and significantly higher for the number-guessing experiment with an Adaptive Boosted DEvo MLP reaching 31.35%, while being significantly quicker to train and classify. In particular, the accuracy of the nonboosted DEvo MLP was of 79.81%, 96.11%, and 27.07% in the same benchmarks. Two datasets for the experiments were gathered using a Muse EEG headband with four electrodes corresponding to TP9, AF7, AF8, and TP10 locations of the international EEG placement standard. The EEG MindBigData digits dataset was gathered from the TP9, FP1, FP2, and TP10 locations.
@inbook{vega2018planninghri,
author="A. Vega-Magro and L.J. Manso and P. Bustos and P. Núñez",
title="Planning Human-Robot Interaction for Social Navigation in Crowded Environments",
pages="195-208",
publisher="Springer",
year="2018",
volume="855",
type="bookchapter",
doi="",
isbn="978-3-319-99884-8",
}
Navigation is one of the crucial skills autonomous robots need to perform daily tasks, and many of the rest depend on it. In this paper, we argue that this dependence goes both ways in advanced social autonomous robots. Manipulation, perception, and most importantly human-robot interaction are some of the skills in which navigation might rely on. This paper is focused on the dependence on human-robot interaction and uses two particular scenarios of growing complexity as an example: asking for collaboration to enter a room and asking for permission to navigate between two people which are talking. In the first scenario, the person physically blocks the path to the adjacent room, so it would be impossible for the robot to navigate to such room. Even though in the second scenario the people talking do not block the path to the other room, from a social point of view, interrupting an ongoing conversation without noticing is undesirable. In this paper we propose a navigation planning domain and a set of software agents which allow the robot to navigate in crowded environments in a socially acceptable way, asking for cooperation or permission when necessary. The paper provides quantitative experimental results including social navigation metrics and the results of a Likert-scale satisfaction questionnaire.
@inproceedings{vega2018aflexible,
author="Araceli Vega-Magro and Luis J. Manso and P. Bustos and P. Núñez",
title="A Flexible and Adaptive Spatial Density Model for Context-Aware Social Mapping: Towards a More Realistic Social Navigation",
booktitle="Proceedings of 15th International Conference on Control, Automation, Robotics and Vision",
year="2018",
pages="1727--1732",
publisher="IEEE",
doi="",
}
Social navigation is a topic with enormous interest in autonomous robotics. Robots are gradually being used in human environments, working individually or collaborating with humans in their daily tasks. Robots in these scenarios have to be able to behave in a socially acceptable way and, for this reason, the way in which robots move has to adapt to humans and context. Proxemics has been extensively studied with the aim of improving social navigation. However, these works do not take into account that, in several situations, the personal space of the humans depends on the context (e.g., this human space is not the same in a narrow corridor than in a wide room). This work proposes the definition of an adaptive and flexible space density function that allows, on the one hand, to describe the comfort space of individuals during an interaction and, on the other hand, dynamically adapt its value in terms of the space that surrounds this interaction. In order to validate the performance, this article describes a set of simulated experiments where the robustness and improvements of the approach are tested in different environments.
@inbook{faria2018multimodal,
author="Diego R. Faria and Cristiano Prembeida and Luis J. Manso and Eduardo P. Ribeiro and P. Nunez",
title="Multimodal Bayesian Network for Artificial Perception",
pages="17",
publisher="IntechOpen",
year="2018",
volume="1",
editor="IntechOpen",
type="bookchapter",
doi="10.5772/intechopen.81111",
isbn="",
}
In order to make machines perceive their external environment coherently, multiple sources of sensory information derived from several different modalities can be used (eg cameras, LIDAR, stereo, RGB-D, and radars). All these different sources of information can be efficiently merged to form a robust perception of the environment. Some of the mechanisms that underlie this merging of the sensor information are highlighted in this chapter, showing that depending on the type of information, different combination and integration strategies can be used and that prior knowledge are often required for interpreting the sensory signals efficiently. The notion that perception involves Bayesian inference is an increasingly popular position taken by a considerable number of researchers. Bayesian models have provided insights into many perceptual phenomena, showing that they are a valid approach to deal with real-world uncertainties and for robust classification, including classification in time-dependent problems. This chapter addresses the use of Bayesian networks applied to sensory perception in the following areas: mobile robotics, autonomous driving systems, advanced driver assistance systems, sensor fusion for object detection, and EEG-based mental states classification.
@inproceedings{bird2018astudyonmental,
author="Jordan J. Bird and Luis J. Manso and Eduardo P. Ribeiro and Aniko Ekart and Diego R. Faria",
title="A study on mental state classification using eeg-based brain-machine interface",
booktitle="Proceedings of 2018 International Conference on Intelligent Systems",
year="2018",
pages="795-800",
publisher="IEEE",
doi="",
}
This work aims to find discriminative EEG-based features and appropriate classification methods that can categorise brainwave patterns based on their level of activity of frequency for mental state recognition useful for human-machine interaction. By using the Muse headband with four EEG sensors (TP9, AF7, AF8, TP10), we categorised three possible states such as relaxing, neutral and concentrating based on a few states of mind defined by cognitive behavioural studies. We have created a dataset with five individuals and sessions lasting one minute for each class of mental state in order to train and test different methods. Given the proposed set of features extracted from the EEG headband five signals (alpha, beta, theta, delta, gamma), we have tested a combination of different features selection algorithms and classifier models to compare their performance in terms of recognition accuracy and number of features needed. Different tests such as 10-fold cross validation were preformed. Results show that only 44 features from a set of over 2100 features are necessary when used with classical classifiers such as Bayesian Networks, Support Vector Machines and Random Forests, attaining an overall accuracy over 87%.
@inproceedings{golzadeh2018emotion,
author="Hamid Golzadeh and Diego R. Faria and Luis J. Manso and Aniko Ekart and Christopher D. Buckingham",
title="Emotion Recognition using Spatiotemporal Features from Facial Expression Landmarks",
booktitle="Proceedings of 2018 International Conference on Intelligent Systems",
year="2018",
pages="789-794",
publisher="IEEE",
doi="",
}
Emotion expression is a type of nonverbal communication (i.e. wordless cues) between people, where affect plays the role of interpersonal communication with information conveyed by facial and/or body expressions. Much can be understood about how people are feeling through their expressions, which are crucial for everyday communication and interaction. This paper presents a study on spatiotemporal feature extraction based on tracked facial landmarks. The features are tested with multiple classification methods to verify whether they are discriminative enough for an automatic emotion recognition system. The Karolinska Directed Emotional Faces (KDEF) [1] were used to determine features representing the human facial expressions of angry, disgusted, happy, sad, afraid, surprised and neutral. The resulting set of features were tested using K-fold cross-validation. Experimental results show that facial expressions can be recognised correctly with an accuracy of upt o 87% when using the newly-developed features and a multiclass Support Vector Machine classifier.
@article{bachiller2018aspiking,
author="Pilar Bachiller and Luis J. Manso and Pablo Bustos",
title="A Spiking Neural Model of HT3D for Corner Detection",
journal="Frontiers in Computational Neuroscience",
year="2018",
volume="12",
number="37",
pages="to-be-published",
doi="10.3389/fncom.2018.00037 ",
}
Obtaining good quality image features is of remarkable importance for most computer vision tasks. It has been demonstrated that the first layers of the human visual cortex are devoted to feature detection. The need for these features has made line, segment, and corner detection one of the most studied topics in computer vision. HT3D is a recent variant of the Hough transform for the combined detection of corners and line segments in images. It uses a 3D parameter space that enables the detection of segments instead of whole lines. This space also encloses canonical configurations of image corners, transforming corner detection into a pattern search problem. Spiking neural networks have previously been proposed for multiple image processing tasks, including corner and line detection using the Hough transform. Following these ideas, this paper presents and describes in detail a model to implement HT3D as a Spiking Neural Network for corner detection. The results obtained from a thorough testing of its implementation using real images evince the correctness of the Spiking Neural Network HT3D implementation. Such results are comparable to those obtained with the regular HT3D implementation, which are turn superior to other corner detection algorithms.
@inproceedings{gutierrez2018planning,
author="M.A. Gutierrez and L.J. Manso and P. Núñez and P. Bustos",
title="Planning Object Informed Search for Robots in Household Environments",
booktitle="Proceedings of IEEE International Conference on Autonomous Robot Systems and Competitions (ICARSC 2018))",
year="2018",
pages="205-210",
publisher="IEEE",
doi="",
}
In the current state-of-the-art, social robots performing non-trivial tasks often spend most of their time finding and modeling objects. In this paper we present the extension of a cognitive architecture that reduces the time and effort a robot needs to retrieve objects in a household scenario. We upgrade our previous Passive Learning Sensor algorithm into a full fledged agent that is part of the CORTEX robotics cognitive architecture. With its planning capabilities, this new configuration allows the robot to efficiently search, pick and deliver different objects from different locations in large households environments. The contribution presented here dynamically extends the robot's knowledge of the world by making use of memories from past experiences. Results obtained from several experiments show that, both, the new software agent and the integrated cognitive architecture, constitute an important step towards robot autonomy. The experiments show that the find-and-pick task is greatly accelerated.
@inproceedings{voilmy2017clarc,
author="Dimitri Voilmy and Cristina Suárez and Adrian Romero-Garcés and Cristian Reuther and Jose Carlos Pulido and Rebeca Marfil and Luis J. Manso and Karine Lan Hing Ting and Ana Iglesias and Jose Carlos González and Javier García and Angel García-Olaya and Raquel Fuentetaja and Fernando Fernández and Alvaro Dueñas and Luis Vicente Calderita and Pablo Bustos and T Barile and Juan Pedro Bandera and Antonio Bandera",
title="CLARC: A cognitive robot for helping geriatric doctors in real scenarios",
booktitle="Proceedings of Iberian Robotics conference 2017",
year="2017",
pages="403-414",
publisher="Springer, Cham",
doi="",
}
Comprehensive Geriatric Assessment (CGA) is an integrated clinical process to evaluate the frailty of elderly persons in order to create therapy plans that improve their quality of life. For robotizing these tests, we are designing and developing CLARC, a mobile robot able to help the physician to capture and manage data during the CGA procedures, mainly by autonomously conducting a set of predefined evaluation tests. Built around a shared internal representation of the outer world, the architecture is composed of software modules able to plan and generate a stream of actions, to execute actions emanated from the representation or to update this by including/removing items at different abstraction levels. Percepts, actions and intentions coming from all software modules are grounded within this unique representation. This allows the robot to react to unexpected events and to modify the course of action according to the dynamics of a scenario built around the interaction with the patient. The paper describes the architecture of the system as well as the preliminary user studies and evaluation to gather new user requirements.
@inproceedings{bandera2017lifebots,
author="Antonio Bandera and Juan P. Bandera and Pablo Bustos and Fernando Férnandez and Angel García-Olaya and Javier García-Polo and Ismael García-Varea and Luis J. Manso and Rebeca Marfil and Jesús Martínez-Gómez and Pedro Núñez and Jose M. Perez-Lorenzo and Pedro Reche-Lopez and Cristina Romero-González and Raquel Viciana-Abad",
title="LifeBots I: Building the Software Infrastructure for Supporting Lifelong Technologies",
booktitle="Proceedings of IEEE Intern",
year="2017",
pages="391-402",
publisher="Springer, Cham",
doi="",
}
The goal of the LifeBots project is the study and development of long-life mechanisms that facilitate and improve the integration of robotics platforms in smart homes to support elder and handicapped people. Specifically the system aims to design, build and validate an assistive ecosystem formed by a person living in a smart home with a social robot as her main interface to a gentler habitat. Achieving this goal requires the use and integration of different technologies and research areas, but also the development of the mechanisms in charge of providing an unified, pro-active response to the user’s needs. This paper describes some of the mechanisms implemented within the cognitive robotics architecture CORTEX that integrates deliberative and reactive agents through a common understanding and internalizing of the outer reality, which materializes in a shared representation derived from a formal graph grammar.
@article{manso2017integrating,
author="Luis J. Manso and Marco A. Gutierrez and Pablo Bustos and Pilar Bachiller",
title="Integrating planning perception and action for informed object search",
journal="Cognitive Processing",
year="2017",
volume="2017",
number="",
pages="1-12",
doi="https://doi.org/10.1007/s10339-017-0828-3",
}
This paper presents a method to reduce the time spent by a robot with cognitive abilities when looking for objects in unknown locations. It describes how machine learning techniques can be used to decide which places should be inspected first, based on images that the robot acquires passively. The proposal is composed of two concurrent processes. The first one uses the aforementioned images to generate a description of the types of objects found in each object container seen by the robot. This is done passively, regardless of the task being performed. The containers can be tables, boxes, shelves or any other kind of container of known shape whose contents can be seen from a distance. The second process uses the previously computed estimation of the contents of the containers to decide which is the most likely container having the object to be found. This second process is deliberative and takes place only when the robot needs to find an object, whether because it is explicitly asked to locate one or because it is needed as a step to fulfil the mission of the robot. Upon failure to guess the right container, the robot can continue making guesses until the object is found. Guesses are made based on the semantic distance between the object to find and the description of the types of the objects found in each object container. The paper provides quantitative results comparing the efficiency of the proposed method and two base approaches.
@article{manso2017useandadvancesj,
author="L.J. Manso and L.V. Calderita and P. Bustos and A. Bandera",
title="Use and advances in the Active Grammar-based Modeling architecture",
journal="Journal of Physical Agents",
year="2017",
volume="8",
number="1",
pages="25",
doi="DOI: 10.14198/JoPha",
}
The choice of using a robotic architecture and one of its possible implementations is one of the most crucial design decisions when developing robots. Such decision affects the whole development process, the limitations of the robot, and changing minds can be prohibitively time consuming. This paper presents the redesign and the most relevant implementation issues of the Active Grammar-based Modeling architecture (AGM), as well as the latest developments thereof. AGM is flexible, modular and designed with computation distribution and scalability in mind. In addition to a continuous refactoring of the API library and planner, the most relevant improvements are an enhanced mission specification syntax, support for representations combining symbolic and metric properties, redesigned communication patterns, and extended middleware support. A few use examples are presented to demonstrate successful application of the architecture and why some of its features were needed.
@article{bachiller2017variant,
author="Pilar Bachiller and Luis J. Manso and Pablo Bustos",
title="A variant of the Hough Transform for the combined detection of corners, segments, and polylines",
journal="EURASIP Journal on Image and Video Processing",
year="2017",
volume="2017",
number="32",
pages="32",
doi="10.1186/s13640-017-0180-7",
}
The Hough Transform (HT) is an effective and popular technique for detecting image features such as lines and curves. From its standard form, numerous variants have emerged with the objective, in many cases, of extending the kind of image features that could be detected. Particularly, corner and line segment detection using HT has been separately addressed by several approaches. To deal with the combined detection of both image features (corners and segments), this paper presents a new variant of the Hough Transform. The proposed method provides an accurate detection of segment endpoints, even if they do not correspond to intersection points between line segments. Segments are detected from their endpoints, producing not only a set of isolated segments but also a collection of polylines. This provides a direct representation of the polygonal contours of the image despite imperfections in the input data such as missing or noisy feature points. It is also shown how this proposal can be extended to detect predefined polygonal shapes. The paper describes in detail every stage of the proposed method and includes experimental results obtained from real images showing the benefits of the proposal in comparison with other approaches.
@article{gutierrez2017apassive,
author="Marco A. Gutierrez and Luis J. Manso and Harit Pandya and Pedro Nunez",
title="A Passive Learning Sensor Architecture for Multimodal Image Labeling: An Application for Social Robots",
journal="Sensors",
year="2017",
volume="17",
number="2",
pages="353",
doi="10.3390/s17020353",
}
Object detection and classification have countless applications in human-robot interacting systems. It is a necessary skill for autonomous robots that perform tasks in household scenarios. Despite the great advances in deep learning and computer vision, social robots performing non-trivial tasks usually spend most of their time finding and modeling objects. Working in real scenarios means dealing with constant environment changes and relatively low-quality sensor data due to the distance at which objects are often found. Ambient intelligence systems equipped with different sensors can also benefit from the ability to find objects, enabling them to inform humans about their location. For these applications to succeed, systems need to detect the objects that may potentially contain other objects, working with relatively low-resolution sensor data. A passive learning architecture for sensors has been designed in order to take advantage of multimodal information, obtained using an RGB-D camera and trained semantic language models. The main contribution of the architecture lies in the improvement of the performance of the sensor under conditions of low resolution and high light variations using a combination of image labeling and word semantics. The tests performed on each of the stages of the architecture compare this solution with current research labeling techniques for the application of an autonomous social robot working in an apartment. The results obtained demonstrate that the proposed sensor architecture outperforms state-of-the-art approaches.
@inproceedings{vega2017socially,
author="A. Vega and L.J. Manso and P. Bustos and P. Núñez and D.G. Macharet",
title="Socially Acceptable Robot Navigation over Groups of People",
booktitle="Proceedings of IEEE International Conference on Robot and Human Interactive Communication (RO-MAN 2017))",
year="2017",
pages="1182-1187",
publisher="IEEE",
doi="",
}
Considering the widespread use of mobile robots in different parts of society, it is important to provide them with the capability to behave in a socially acceptable manner. Therefore, a research topic of great importance recently has been the study of Human-Robot Interaction. Autonomous navigation is a fundamental task in Robotics, and several different strategies that produce paths that are either length or time optimized can be found in the literature. However, considering the recent use of mobile robots in a more social context, the use of such classical techniques is restricted. Therefore, in this article we present a social navigation approach considering environments with groups of people. The proposal uses a density function to efficiently represent groups of people, and modify the navigation architecture in order to include the social behaviour of the robot during its motion. This architecture is based on the combined use of the Probabilistic Road Mapping (PRM) and the Rapidly-exploring Random Tree (RRT) path planners and an adaptation of the elastic band algorithm. Experimental evaluation was carried out in different simulated environments, providing insight on the performance of the proposed technique, which surpasses classical techniques with no proxemics awareness in terms of social impact.
@article{manso2015perceptionaware,
author="L.J. Manso and P. Bustos and P. Bachiller and P. Núñez",
title="A Perception-aware Architecture for Autonomous Robots",
journal="International Journal of Advanced Robotic Systems",
year="2015",
volume="12",
number="174",
pages="13",
doi="DOI: 10.5772/61742",
}
@inproceedings{manso2015planning,
author="L.J. Manso and P. Bustos and R. Alami and G. Milliez and P. Núñez",
title="Planning Human-Robot Interaction Tasks using Graph Models",
booktitle="Proceedings of International Workshop on Recognition and Action for Scene Understanding (REACTS 2015)",
year="2015",
pages="15--27",
publisher="",
doi="",
}
@inproceedings{cid2015facial,
author="F. Cid and L.J. Manso and Pedro Nunez",
title="A Novel Multimodal Emotion Recognition Approach for Affective Human Robot Interaction",
booktitle="Proceedings of the Workshop on Multimodal and Semantics for Robotics Systems",
year="2015",
pages="1-9",
publisher="",
doi="",
}
Facial expressions and speech are elements that provide emotional information about the user through multiple communication channels. In this paper, a novel multimodal emotion recognition system based on visual and auditory information processing is proposed. The proposed approach is used in real affective human robot communication in order to estimate five different emotional states (i.e., happiness, anger, fear, sadness and neutral), and it consists of two subsystems with similar structure. The first subsystem achieves a robust facial feature extraction based on consecutively applied filters to the edge image and the use of a Dynamic Bayessian Classifier. A similar classifier is used in the second subsystem, where the input is associated to a set of speech descriptors, such as speech-rate, energy and pitch. Both subsystems are finally combined in real time. The results of this multimodal approach show the robustness and accuracy of the methodology respect to single emotion recognition systems.
@inproceedings{cid2015rgbd,
author="F. Cid and E. Mogena and L.J. Manso and P. Núñez",
title="RGBD Data analysis for Real-Time Emotion Recognition from Upper Body Movements",
booktitle="Proceedings of International Workshop on Recognition and Action for Scene Understanding (REACTS 2015)",
year="2015",
pages="43-52",
publisher="",
doi="",
}
@inproceedings{romero2015testing,
author="A. Romero and L.V. Calderita and J. Martín and J.P. Bandera and R. Marfil and L.J. Manso and A. Bandera and P. Bustos",
title="Testing a fully autonomous robotic salesman in real scenarios",
booktitle="Proceedings of International Conference on Autonomous Robot Systems and Competitions (ICARSC 2015)",
year="2015",
editor="IEEE",
pages="124-130",
publisher="IEEE",
doi="",
}
@article{calderita2014therapist,
author="L.V. Calderita and L.J. Manso and P. Bustos and C. Suárez and F. Fernández and A. Bandera",
title="THERAPIST: Towards an Autonomous Socially Interactive Robot for Motor and Neurorehabilitation Therapies for Children",
journal="Journal of Rehabilitation and Assistive Technologies, JMIR",
year="2014",
volume="1",
number="1",
pages="e3151",
doi="10.2196/rehab.31512014",
}
@inproceedings{martinez2014toward,
author="J. Martínez and R. Marfil and L.V. Calderita and J.P. Bandera and L.J Manso and A. Bandera and A. Romero and P. Bustos",
title="Toward Social Cognition in Robotics: Extracting and Internalizing Meaning from Perception",
booktitle="Proceedings of Workshop of Physical Agents",
year="2014",
pages="93-104",
publisher="",
doi="",
}
@inproceedings{manso2014generalpurpose,
author="L.J. Manso and L.V. Calderita and P. Bustos and J. García and M. Martínez and F. Fernández and A. Romero and A. Bandera",
title="A General-Purpose Architecture to Control Mobile Robots",
booktitle="Proceedings of Workshop of Physical Agents (WAF)",
year="2014",
pages="105--116",
publisher="Universidad de León",
doi="",
}
@inproceedings{almeida2014multilayer,
author="J. Almeida and L.J. Manso and A. Bandera and P. Núñez",
title="A Multi-layer Description of the Environment using Curvature Information for Robot Navigation",
booktitle="Proceedings of Workshop of Physical Agents (WAF)",
year="2014",
pages="135--144",
publisher="",
doi="",
}
@article{manso2014novel,
author="Luis J. Manso and Pedro Núñez and Sidnei Da Silva and Paulo Drews-Jr",
title="A Novel Robust Scene Change Detection Algorithm for Autonomous Robots using Mixtures of Gaussians",
journal="International Journal of Advanced Robotic Systems (ISSN 1729-8806)",
year="2014",
volume="11",
number="18",
pages="18",
doi="10.5772/57360",
}
@inproceedings{drews2013improving,
author="Paulo Drews-Jr and Luis J. Manso and Sidnei Da Silva and Pedro Núñez",
title="Improving Change Detection using Vertical Surface Normal Histograms and Gaussian Mixture Models in Structured Environments",
booktitle="Proc. of The 16th International Conference on Advanced Robotics",
year="2013",
pages="1-7",
publisher="IEEE",
doi="",
}
@inproceedings{bustos2013multimodal,
author="P. Bustos and J. Martinez and I. Garcia and L. Rodriguez and P. Bachiller and L. Calderita and L.J. Manso and A. Sanchez and A. Bandera and J.P. Bandera",
title="Multimodal interaction with Loki",
booktitle="Proc. of Workshop of Physical Agents",
year="2013",
pages="53--60",
publisher="",
doi="",
}
@phdthesis{manso2013perception,
author="L.J. Manso",
title="Perception as Stochastic Grammar-based Sampling on Dynamic Graph Spaces",
school="Cáceres School of Technology, University of Extremadura",
year="2013",
type="phdthesis",
doi="",
}
@inbook{suarez2013ursus,
author="C. Suarez-Mejías and C. Echevarría and P. Núñez and L.J. Manso and P. Bustos and S. Leal and C. Parra",
title="Ursus: A robotic assistant for training of children with motor impairments",
pages="249-253",
publisher="Springer",
year="2013",
volume="1",
editor="Springer",
type="bookchapter",
doi="",
isbn="978-3-642-34545-6",
}
@inproceedings{sanchez2012experiments,
author="A. Sánchez and P. Núñez and L.J. Manso and P. Bustos",
title="Experiments in self-calibration of an autonomous mobile manipulator",
booktitle="Proc. of Workshop of Physical Agents",
year="2012",
pages="183-190",
publisher="",
doi="",
}
@article{manso2012indoor,
author="L.J. Manso and P. Bustos and P. Bachiller and J. Franco",
title="Indoor scene perception for object detection and manipulation",
journal="Symposium on Spatial Cognition in Robotics. International Conference on Spatial Cognition",
year="2012",
volume="13",
number="1",
pages="55-56",
doi="",
}
@inproceedings{manso2012graph,
author="Luis J. Manso and Pablo Bustos and Pilar Bachiller and Marco A. Gutierrez",
title="Graph Grammars for Active Perception",
booktitle="Proc. of 12th International Conference on Autonomous Robot Systems and Competitions",
year="2012",
pages="63-68",
publisher="",
doi="",
}
@article{cid2012engaging,
author="F. Cid and R. Cintas and L.J. Manso and L. Calderita and A. Sánchez and P Núñez",
title="Engaging human-to-robot attention using conversational gestures and lip-synchronization",
journal="Journal of Physical Agents",
year="2012",
volume="6",
number="1",
pages="3-10",
doi="",
}
@inbook{naranjo2012interactive,
author="A.B. Naranjo and C. Suárez and C. Parra and E. González and F. Bockel and A. Yuste and P. Bustos and L. Manso and P. Bachiller and S. Plana",
title="Interactive games with robotic and augmented reality technology in cognitive and motor rehabilitation",
pages="1212-1233",
publisher="IGI Global",
year="2012",
volume="1",
editor="IGI Global",
type="bookchapter",
doi="",
isbn="978-1-4666-0149-9",
}
@inproceedings{romero2011improving,
author="A. Romero and L.J. Manso and M.A. Gutiérrez and R. Cintas and P. Bustos",
title="Improving the lifecycle of robotics components using Domain-Specific Languages",
booktitle="Proc. of Int. Workshop on Domain-Specific Languages and models for ROBotic systems (DSLRob'2011)",
year="2011",
pages="1--9",
publisher="",
doi="",
}
There is currently a large amount of robotics software using the component-oriented programming paradigm. However, the rapid growth in number and complexity of components may compromise the scalability and the whole lifecycle of robotics software systems. Model-Driven Engineering can be used to mitigate these problems. This paper describes how using Domain-Specific Languages to generate and describe critical parts of robotic systems helps developers to perform component managerial tasks such as component creation, modification, monitoring and deployment. Four different DSLs are proposed in this paper: i) CDSL for specifying the structure of the components, ii) IDSL for the description of their interfaces, iii) DDSL for describing the deployment process of component networks and iv) PDSL to define and configure component parameters. Their benefits have been demonstrated after their implementation in RoboComp, a general-purpose and component-based robotics framework. Examples of the usage of these DSLs are shown along with experiments that demonstrate the benefits they bring to the lifecycle of the components.
@inproceedings{bachiller2011incremental,
author="P. Bachiller and M.A. Gutierrez and L.J. Manso and P. Bustos and P. Núñez",
title="An incremental hybrid approach to indoor modeling",
booktitle="Proc. of European Conference on Mobile Robots",
year="2011",
pages="219--226",
publisher="",
doi="",
}
@inproceedings{cid2011realtime,
author="F. Cid and R. Cintas and L.J. Manso and L. Calderita and A. Sánchez and P. Núñez",
title="A real-time synchronization algorithm between Text-To-Speech (TTS) system and Robot Mouth for Social Robotic Applications",
booktitle="Proc. of Workshop of Physical Agents",
year="2011",
pages="81--86",
publisher="",
doi="",
}
@inbook{bachiller2011attentional,
author="Pilar Bachiller and Pablo Bustos and Luis J. Manso",
title="Attentional Behaviors for Environment Modeling by a Mobile Robot",
pages="17--40",
publisher="InTech",
year="2011",
type="bookchapter",
doi="",
}
@article{cintas2011robust,
author="Ramón Cintas and Luis J. Manso and Luis Pinero and Pilar Bachiller and Pablo Bustos",
title="Robust Behavior and Perception using Hierarchical State Machines: A Pallet Manipulation Experiment",
journal="Journal of Physical Agents",
year="2011",
volume="5",
number="1",
pages="35-44",
doi="",
}
@inbook{manso2010robocomp,
author="Luis J. Manso and Pilar Bachiller and Pablo Bustos and Pedro Núñez and Ramón Cintas and Luis Calderita",
title="RoboComp: a Tool-based Robotics Framework",
pages="251--262",
publisher="Springer",
year="2010",
type="bookchapter",
doi="",
isbn="978-3-540-89075-1",
}
@inbook{martinez2010improving,
author="Jesús Martinez and Adrián Romero and Luis Manso and Pablo Bustos",
title="Improving a Robotics Framework with Real-Time and High-Performance Features",
pages="263--274",
publisher="Springer",
year="2010",
type="bookchapter",
doi="",
isbn="978-3-540-89075-1",
}
@inproceedings{mateos2010robex,
author="José Mateos and Agustín Sánchez and Luis Manso and Pilar Bachiller and Pablo Bustos",
title="RobEx: an Open-hardware Robotics Platform",
booktitle="Proc. of Workshop of Physical Agents",
year="2010",
pages="17--24",
publisher="",
doi="",
}
@inproceedings{pinero2010visuallyguided,
author="Luis Pinero and Ramón Cintas and Luis Manso and Pilar Bachiller and Pablo Bustos",
title="Visually-guided Object Manipulation by a Mobile Robot",
booktitle="Proc. of Workshop of Physical Agents",
year="2010",
pages="145--152",
publisher="",
doi="",
}
@inproceedings{manso2010framework,
author="Luis Manso and Pablo Bustos and Pilar Bachiller and Pedro Núñez and Ramón Cintas and Luis Calderita",
title="Un Framework de Desarrollo para Robótica",
booktitle="Proc. of I Jornadas Jóvenes Investigadores",
year="2010",
pages="33--38",
}
@article{manso2010multicue,
author="Luis Manso and Pablo Bustos and Pilar Bachiller and José Moreno",
title="Multi-cue Visual Obstacle Detection for Mobile Robots",
journal="Journal of Physical Agents",
year="2010",
volume="4",
number="1",
pages="3-10",
doi="",
}
@inproceedings{manso2009obstacle,
author="Luis Manso and Pablo Bustos and Pilar Bachiller and José Moreno",
title="Obstacle Detection on Heterogeneous Surfaces Using Color and Geometric Cues",
booktitle="Proc. of Workshop of Physical Agents",
year="2009",
pages="95--101",
publisher="WAF",
doi="",
}
@masterthesis{manso2009navegacion,
author="L.J. Manso",
title="Navegación Visual en Robots Móviles",
school="Cáceres School of Technology, University of Extremadura",
year="2009",
type="masterthesis",
}
@inbook{bachiller2008attentional,
author="P. Bachiller and P. Bustos and L.J. Manso",
title="Attentional Selection for Action in Mobile Robots",
pages="111--136",
publisher="I-Tech",
year="2008",
type="bookchapter",
doi="",
}