@inproceedings{data-only-flattening,
author = {Lars Bergstrom and Matthew Fluet and Mike Rainey and John Reppy
and Stephen Rosen and Adam Shaw},
title = {Data-Only Flattening for Nested Data Parallelism},
booktitle = {Proceedings of the 2013 {ACM} {SIGPLAN} Symposium on Principles \&
Practice of Parallel Programming ({PPoPP} 2013)},
year = 2013,
month = feb,
pages = {81-92},
pdf = {2013/ppopp-flat.pdf},
abstract = {
Data parallelism has proven to be an effective technique for
high-level programming of a certain class of parallel applications,
but it is not well suited to irregular parallel computations.
Blelloch and others proposed \emph{nested data parallelism} (NDP) as a
language mechanism for programming irregular parallel applications
in a declarative data-parallel style.
The key to this approach is a compiler transformation that
\emph{flattens} the NDP computation and data structures into a form
that can be executed efficiently on a wide-vector SIMD architecture.
Unfortunately, this technique is ill suited to execution on today's
multicore machines.
We present a new technique, called \emph{data-only flattening}, for
the compilation of NDP, which is suitable for multicore architectures.
Data-only flattening transforms nested data structures in order to
expose programs to various optimizations while leaving control
structures intact.
We present a formal semantics of data-only flattening in a core
language with a rewriting system.
We demonstrate the effectiveness of this technique in the Parallel~ML
implementation and we report encouraging experimental results across
various benchmark applications.
},
topic = {implementation,concurrency,manticore}
}
@article{lazy-tree-splitting:jfp,
author = {Lars Bergstrom and Matthew Fluet and Mike Rainey and John Reppy and Adam Shaw},
title = {Lazy Tree Splitting},
journal = {Journal of Functional Programming},
publisher = {Cambridge University Press},
address = {Cambridge, England},
volume = 22,
number = {4-5},
pages = {382-438},
year = 2012,
month = sep,
abstract = {
Nested data-parallelism (NDP) is a language mechanism that supports
programming irregular parallel applications in a declarative
style.
In this paper, we describe the implementation of NDP in Parallel ML (PML), which
is part of the Manticore system.
One of the main challenges of implementing NDP is managing the parallel decomposition
of work.
If we have too many small chunks of work, the overhead will be too high, but if we do
not have enough chunks of work, processors will be idle.
Recently the technique of Lazy Binary Splitting was proposed to address this problem
for nested parallel loops over flat arrays.
We have adapted this technique to our implementation of NDP, which uses binary trees
to represent parallel arrays.
This new technique, which we call \emph{Lazy Tree Splitting} (LTS), has the key
advantage of \emph{performance robustness};
\textit{i.e.}, that it does not require tuning to get the best performance for
each program.
We describe the implementation of the standard NDP operations using LTS and we present
experimental data that demonstrates the scalability of LTS across a range of benchmarks.
},
topic = {implementation,concurrency,manticore}
}
@inproceedings{nesl-gpu,
author = {Lars Bergstrom and John Reppy},
title = {Nested Data-Parallelism on the {GPU}},
booktitle = {Proceedings of the 17th {ACM} {SIGPLAN} International
Conference on Functional Programming (ICFP 2012)},
year = 2012,
month = sep,
pages = {247-258},
pdf = {2012/icfp-gpu.pdf},
abstract = {
Graphics processing units (GPUs) provide both memory bandwidth and arithmetic
performance far greater than that available on CPUs but, because of their
\emph{Single-Instruction-Multiple-Data} (SIMD) architecture, they are hard to
program.
Most of the programs ported to GPUs thus far use traditional data-level
parallelism, performing only operations that operate uniformly over vectors.
NESL is a first-order functional language that was designed to allow
programmers to write irregular-parallel programs --- such as parallel
divide-and-conquer algorithms --- for wide-vector parallel computers.
This paper presents our port of the NESL implementation to work on GPUs and
provides empirical evidence that nested data-parallelism (NDP) on GPUs
significantly outperforms CPU-based implementations and matches or beats newer
GPU languages that support only flat parallelism.
While our performance does not match that of hand-tuned CUDA programs, we argue
that the notational conciseness of NESL is worth the loss in performance.
This work provides the first language implementation that directly supports NDP
on a GPU.
},
topic = {concurrency,implementation}
}
@inproceedings{diderot-parallel-dsl,
author = {Charisee Chiw and Gordon Kindlmann and John Reppy and Lamont Samuels and Nick Seltzer},
title = {Diderot: A Parallel DSL for Image Analysis and Visualization},
booktitle = {Proceedings of the SIGPLAN 2012 Conference on Programming
Language Design and Implementation},
publisher = {ACM},
address = {New York, NY},
year = 2012,
month = jun,
pages = {111-120},
pdf = {2012/pldi-diderot.pdf},
abstract = {
Research scientists and medical professionals use imaging technology, such as
\emph{computed tomography} (CT) and
\emph{magnetic resonance imaging} (MRI) to measure a wide
variety of biological and physical objects.
The increasing sophistication of imaging technology creates demand for
equally sophisticated computational techniques to analyze and visualize the
image data.
Analysis and visualization codes are often crafted for a specific experiment
or set of images, thus imaging scientists need support for quickly developing
codes that are reliable, robust, and efficient.
In this paper, we present the design and implementation of Diderot, which is a
parallel domain-specific language for biomedical image analysis and visualization.
Diderot supports a high-level model of computation that is based on continuous tensor fields.
These tensor fields are reconstructed from
discrete image data using separable convolution kernels, but may also be defined by
applying higher-order operations, such as differentiation.
Early experiments demonstrate that Diderot provides both a high-level concise notation
for image analysis and visualization algorithms, as well as high sequential and parallel
performance.
},
topic = {design,concurrency}
}
@inproceedings{gc-for-multicore-numa,
author = {Sven Auhagen and Lars Bergstrom and Matthew Fluet and John Reppy},
title = {Garbage Collection for Multicore {NUMA} Machines},
booktitle = {Proceedings of the {ACM} {SIGPLAN} Workshop on
Memory Systems Performance and Correctness ({MSPC} 2011)},
publisher = {ACM},
address = {New York, NY},
year = 2011,
month = jun,
pages = {51-57},
pdf = {2011/mspc-numa-gc.pdf},
abstract = {
Modern high-end machines feature multiple processor packages, each of which
contains multiple independent cores and integrated memory controllers connected
directly to dedicated physical RAM.
These packages are connected via a shared bus, creating a system with a
heterogeneous memory hierarchy.
Since this shared bus has less bandwidth than the sum of the links to memory,
aggregate memory bandwidth is higher when parallel threads all access memory
local to their processor package than when they access memory attached to a
remote package.
This bandwidth limitation has traditionally limited the scalability of modern
functional language implementations, which seldom scale well past 8~cores, even
on small benchmarks.
This work presents a garbage collector integrated with our strict, parallel
functional language implementation, Manticore, and shows that it scales
effectively on both a 48-core AMD Opteron machine and a 32-core Intel Xeon
machine.
},
topic = {manticore,implementation}
}
@article{implicit-threaded-parallel-jfp,
author = {Matthew Fluet and Mike Rainey and John Reppy and Adam Shaw},
title = {Implicitly threaded parallelism in {Manticore}},
journal = {Journal of Functional Programming},
publisher = {Cambridge University Press},
address = {Cambridge, England},
volume = 20,
number = {5-6},
pages = {537-576},
year = 2011,
abstract = {
The increasing availability of commodity multicore processors is making parallel computing
ever more widespread. In order to exploit its potential, programmers need languages that
make the benefits of parallelism accessible and understandable.
Previous parallel languages have traditionally been intended for large-scale scientific
computing, and they tend not to be well suited to programming the applications one typically
finds on a desktop system.
Thus, we need new parallel-language designs that address a broader spectrum of applications.
The Manticore project is our effort to address this need. At its core is Parallel ML,
a high-level functional language for programming parallel applications on commodity multicore
hardware.
Parallel ML provides a diverse collection of parallel constructs for different granularities
of work. In this paper, we focus on the implicitly threaded parallel constructs of the
language, which support fine-grained parallelism. We concentrate on those elements that
distinguish our design from related ones, namely, a novel parallel binding form, a
nondeterministic parallel case form, and the treatment of exceptions in the presence of data
parallelism.
These features differentiate the present work from related work on functional data-parallel
language designs, which have focused largely on parallel problems with regular structure and
the compiler transformations --- most notably, flatteningÑthat make such designs feasible.
We present detailed examples utilizing various mechanisms of the language and give a formal
description of our implementation.
},
topic = {design,concurrency,manticore,implementation}
}
@inproceedings{particles,
author = {Pavel Krajcevski and John Reppy},
title = {A Declarative {API} for Particle Systems},
booktitle = {Proceedings of the Thirteenth International Symposium on
Practical Aspects of Declarative Languages ({PADL 2011})},
year = 2011,
month = jan,
series = {Lecture Notes in Computer Science},
volume = 6539,
publisher = {Springer-Verlag},
address = {New York, NY},
pages = {130-144},
pdf = {2011/padl-particles.pdf},
abstract = {
Recent trends in computer-graphics APIs and hardware have made it practical to
use high-level functional languages for real-time graphics applications.
Thus we have the opportunity to develop new approaches to computer
graphics that take advantage of the high-level features of
functional languages.
This paper describes one such project that uses the techniques of functional programming
to define and implement a combinator library for \textit{particle systems}.
Particle systems are a popular
technique for rendering fuzzy phenomena, such as fire, smoke, and explosions.
Using our combinators, a programmer can provide a declarative specification of how a particle
system behaves.
This specification includes rules for how particles are created, how they evolve, and how they
are rendered.
Our library translates these declarative specifications into a low-level intermediate language
that can be compiled to run on the GPU or interpreted by the CPU.
},
topic = {sml,gui}
}
@inproceedings{lazy-tree-splitting,
author = {Lars Bergstrom and Mike Rainey and John Reppy and Adam Shaw and Matthew Fluet},
title = {Lazy Tree Splitting},
booktitle = {Proceedings of the 15th {ACM} {SIGPLAN} International
Conference on Functional Programming (ICFP 2010)},
year = 2010,
month = sep,
pages = {93-104},
pdf = {2010/icfp-lts.pdf},
abstract = {
Nested data-parallelism (NDP) is a declarative style for programming
irregular parallel applications. NDP languages provide language features
favoring the NDP style, efficient compilation of NDP programs, and various
common NDP operations like parallel maps, filters, and sum-like reductions.
In this paper, we describe the implementation of NDP in Parallel ML
(PML), part of the Manticore project. Managing the parallel decomposition
of work is one of the main challenges of implementing NDP. If the
decomposition creates too many small chunks of work, performance will
be eroded by too much parallel overhead. If, on the other hand, there
are too few large chunks of work, there will be too much sequential
processing and processors will sit idle.
Recently the technique of Lazy Binary Splitting was proposed for dynamic
parallel decomposition of work on flat arrays, with promising results.
We adapt Lazy Binary Splitting to parallel processing of binary trees,
which we use to represent parallel arrays in PML. We call our technique
Lazy Tree Splitting (LTS). One of its main advantages is its performance
robustness: per-program tuning is not required to achieve good performance
across varying platforms. We describe LTS-based implementations of
standard NDP operations, and we present experimental data demonstrating
the scalability of LTS across a range of benchmarks.
},
topic = {implementation,concurrency,manticore}
}
@inproceedings{programming-in-manticore,
author = {Matthew Fluet and Lars Bergstrom and Nic Ford and Mike Rainey and John Reppy
and Adam Shaw and Yingqi Xiao},
title = {Programming in {Manticore}, a Heterogeneous Parallel Functional Language},
booktitle = {Central European Functional Programming School},
series = {Lecture Notes in Computer Science},
volume = 6299,
editor = {Zoltan Horv\'ath},
year = 2010,
pages = {94-145},
publisher = {Springer-Verlag},
address = {New York, NY},
topic = {concurrency,manticore}
}
@inproceedings{arity-raising,
author = {Lars Bergstrom and John Reppy},
title = {Arity Raising in Manticore},
booktitle = {International Symposia on Implementation and Application of
Functional Languages (IFL 2009)},
year = 2009,
month = sep,
series = {Lecture Notes in Computer Science},
volume = 6041,
publisher = {Springer-Verlag},
address = {New York, NY},
pages = {90-106},
pdf = {2009/ifl-arity.pdf},
abstract = {
Compilers for polymorphic languages are required to treat values in programs in an
abstract and generic way at the source level.
The challenges of optimizing the boxing of raw values, flattening of argument
tuples, and raising the arity of functions that handle complex structures to
reduce memory usage are old ones, but take on newfound import with processors
that have twice as many registers.
We present a novel strategy that uses both control-flow and type information to
provide an arity raising implementation addressing these problems.
This strategy is \textit{conservative} --- no matter the execution
path, the transformed program will not perform extra operations.
},
topic = {implementation,manticore}
}
@inproceedings{parallel-cml,
author = {John Reppy and Claudio Russo and Yingqi Xiao},
title = {Parallel Concurrent ML},
booktitle = {Proceedings of the 14th {ACM} {SIGPLAN} International
Conference on Functional Programming (ICFP 2009)},
year = 2009,
month = sep,
pages = {257-268},
pdf = {2009/icfp-parallel-cml.pdf},
abstract = {
Concurrent ML (CML) is a high-level message-passing language that supports
the construction of first-class synchronous abstractions called
events. This mechanism has proven quite effective over the years and
has been incorporated in a number of other languages. While CML
provides a concurrent programming model, its implementation has always
been limited to uniprocessors. This limitation is exploited in the
implementation of the synchronization protocol that underlies the event
mechanism, but with the advent of cheap parallel processing on the
desktop (and laptop), it is time for Parallel CML.
Parallel implementations of CML-like primitives for
Java and Haskell exist, but build on high-level synchronization
constructs that are unlikely to perform well. This paper presents a
novel, parallel implementation of CML that exploits a purpose-built
optimistic concurrency protocol designed for both correctness and
performance on shared-memory multiprocessors.
This work extends and
completes an earlier protocol that supported just a strict subset of
CML with synchronization on input, but not output events. Our main
contributions are a model-checked reference implementation of the
protocol and two concrete implementations. This paper focuses on
Manticore's functional, continuation-based implementation but briefly
discusses an independent, thread-based implementation written in C\#
and running on Microsoft's stock, parallel runtime. Although very
different in detail, both derive from the same design. Experimental
evaluation of the Manticore implementation reveals good
performance, despite the extra overhead of multiprocessor synchronization.
},
topic = {cml,concurrency,implementation,manticore}
}
@article{re-derivs,
author = {Scott Owens and John Reppy and Aaron Turon},
title = {Regular-expression derivatives reexamined},
journal = {Journal of Functional Programming},
publisher = {Cambridge University Press},
address = {Cambridge, England},
volume = 19,
number = 2,
year = 2009,
pages = {173-190},
abstract = {
Regular-expression derivatives are an old, but elegant, technique for compiling regular
expressions to deterministic Þnite-state machines.
It easily supports extending the regular-expression operators with boolean operations,
such as intersection and complement.
Unfortunately, this technique has been lost in the sands of time and few computer
scientists are aware of it.
In this paper, we reexamine regular-expression derivatives and report on our experiences in
the context of two different functional-language implementations.
The basic implementation is simple and we show how to extend it to handle large character
sets (e.g., Unicode).
We also show that the derivatives approach leads to smaller state machines than the
traditional algorithm given by McNaughton and Yamada.
},
topic = {implementation}
}
@inproceedings{varargs,
author = {Matthias Blume and Mike Rainey and John Reppy},
title = {Calling Variadic Functions from a Strongly-typed Language},
booktitle = {Proceedings of the 2008 {ACM} {SIGPLAN} Workshop on {ML}},
year = 2008,
month = sep,
pages = {47-58},
pdf = {2008/ml-varargs.pdf},
abstract = {
The importance of providing a mechanism to call C functions from high-level languages
has been understood for many years and, these days, almost all statically-typed high-level-language
implementations provide such a mechanism.
One glaring omission, however, has been support for calling variadic C functions, such as \texttt{printf}.
Variadic functions have been ignored because it is not obvious how to give static types to them and
because it is not clear how to generate calling sequence when the arguments to the function may not be
known until runtime.
In this paper, we address this longstanding omission with an extension to the NLFFI foreign-interface
framework used by Standard ML of New Jersey (SML/NJ) and the MLton SML compiler.
We describe two different ways of typing variadic functions in NLFFI and an implementation technique
based on the idea of using state machines to describe calling conventions.
Our implementation is easily retargeted to new architectures and ABIs, and can also be easily added
to any HOT language implementation that supports calling C functions.
},
topic = {interop,implementation,sml}
}
@inproceedings{implicit-threaded-parallel,
author = {Matthew Fluet and Mike Rainey and John Reppy and Adam Shaw},
title = {Implicitly-threaded parallelism in Manticore},
booktitle = {Proceedings of the 13th {ACM} {SIGPLAN} International
Conference on Functional Programming (ICFP 2008)},
year = 2008,
month = sep,
pages = {119-130},
pdf = {2008/icfp-datapar.pdf},
abstract = {
The increasing availability of commodity multicore processors is making
parallel computing available to the masses. Traditional parallel
languages are largely intended for large-scale scientific computing
and tend not to be well-suited to programming the applications one
typically finds on a desktop system. Thus we need new parallel-language
designs that address a broader spectrum of applications. In this
paper, we present Manticore, a language for building parallel applications
on commodity multicore hardware including a diverse collection of
parallel constructs for different granularities of work. We focus on
the implicitly-threaded parallel constructs in our high-level functional
language. We concentrate on those elements that distinguish our design
from related ones, namely, a novel parallel binding form, a nondeterministic
parallel case form, and exceptions in the presence of data parallelism.
These features differentiate the present work from related work on
functional data parallel language designs, which has focused largely
on parallel problems with regular structure and the compiler transformations
--- most notably, flattening --- that make such designs feasible. We
describe our implementation strategies and present some detailed
examples utilizing various mechanisms of our language.
},
topic = {design,concurrency,manticore}
}
@inproceedings{manticore-sched-framework,
author = {Matthew Fluet and Mike Rainey and John Reppy},
title = {A scheduling framework for general-purpose parallel languages},
booktitle = {Proceedings of the 13th {ACM} {SIGPLAN} International
Conference on Functional Programming (ICFP 2008)},
year = 2008,
month = sep,
pages = {241-252},
pdf = {2008/icfp-schedule.pdf},
abstract = {
The trend in microprocessor design toward multicore and manycore processors
means that future performance gains in software will largely come from harnessing
parallelism.
To realize such gains, we need languages and implementations that can enable
parallelism at many different levels.
For example, an application might use both explicit threads to implement course-grain
parallelism for independent tasks and implicit threads for fine-grain data-parallel
computation over a large array.
An important aspect of this requirement is supporting a wide range of different
scheduling mechanisms for parallel computation.
In this paper, we describe the scheduling framework that we have designed
and implemented for Manticore, a strict parallel functional language.
We take a micro-kernel approach in our design: the compiler and runtime support
a small collection of scheduling primitives upon which complex scheduling policies
can be implemented.
This framework is extremely flexible and can support a wide range of
different scheduling policies.
It also supports the nesting of schedulers, which is key to both supporting multiple
scheduling policies in the same application and to hierarchies of
speculative parallel computations.
In addition to describing our framework, we also illustrate its expressiveness
with several popular scheduling techniques.
We present a (mostly) modular approach to extending our schedulers to support cancellation.
This mechanism is essential for implementing eager and speculative parallelism.
We finally evaluate our framework with a series of benchmarks and an analysis.
},
topic = {implementation,concurrency,manticore}
}
@inproceedings{toward-parallel-cml,
author = {John Reppy and Yingqi Xiao},
title = {Toward a parallel implementation of Concurrent ML},
booktitle = {Proceedings of the Workshop on Declarative Aspects of
Multicore Programming (DAMP 2008)},
year = 2008,
month = jan,
pdf = {2008/damp-parallel-cml.pdf},
abstract = {
Concurrent ML (CML) is a high-level message-passing language that supports
the construction of first-class synchronous abstractions called
events. This mechanism has proven quite effective over the years and
has been incorporated in a number of other languages. While CML
provides a concurrent programming model, its implementation has always
been limited to uniprocessors. This limitation is exploited in the
implementation of the synchronization protocol that underlies the event
mechanism, but with the advent of cheap parallel processing on the
desktop (and laptop), it is time for Parallel CML.
We are pursuing such an implementation as part of the Manticore
project. In this paper, we describe a parallel implementation of
Asymmetric CML (ACML), which is a subset of CML that does not
support output guards. We describe an optimistic concurrency
protocol for implementing CML synchronization. This protocol has
been implemented as part of the Manticore system.
},
topic = {implementation,cml,concurrency,manticore}
}
@inproceedings{manticore-status-2007,
author = {Matthew Fluet and Nic Ford and Mike Rainey and John Reppy and Adam Shaw and Yingqi Xiao},
topic = {design,implementation,concurrency},
title = {Status Report: The {Manticore} Project},
booktitle = {Proceedings of the 2007 {ACM} {SIGPLAN} Workshop on {ML}},
year = 2007,
month = oct,
pages = {15-24},
pdf = {2007/ml-manticore.pdf},
abstract = {
The Manticore project is an effort to design and implement a new
functional language for parallel programming.
Unlike many earlier parallel languages, Manticore is a \emph{heterogeneous}
language that supports parallelism at multiple levels.
Specifically, we combine CML-style explicit concurrency with fine-grain,
implicitly threaded, parallel constructs.
We have been working on an implementation of Manticore for the past six months;
this paper gives an overview of our design and a report on the status of
the implementation effort.
},
topic = {design,implementation,concurrency,manticore}
}
@inproceedings{trait-metaprogramming,
author = {John Reppy and Aaron Turon},
title = {Metaprogramming with Traits},
booktitle = {Proceedings of the European Conference
on Object Oriented Programming (ECOOP 2007)},
year = 2007,
month = {July-August},
pages = {373-398},
pdf = {2007/ecoop-traits.pdf},
abstract = {
In many domains, classes have highly regular internal structure.
For example, so-called business objects often contain boilerplate code for mapping
database fields to class members.
The boilerplate code must be repeated per-field for every class, because
existing mechanisms for constructing classes do not provide a way to
capture and reuse such member-level structure.
As a result, programmers often resort to \emph{ad hoc} code generation.
This paper presents a lightweight mechanism for specifying and reusing member-level
structure in Java programs.
The proposal is based on a modest extension to traits
that we have termed \emph{trait-based metaprogramming}.
Although the semantics of the mechanism are straightforward, its type
theory is difficult to reconcile with nominal subtyping.
We achieve reconciliation by introducing a hybrid structural/nominal type
system that extends Java's type system.
The paper includes a formal calculus defined by translation to Featherweight Generic Java.
},
topic = {design,object}
}
@inproceedings{specialize-cml,
author = {John Reppy and Yingqi Xiao},
title = {Specialization of CML message-passing primitives},
booktitle = {Proceedings of the 34th {ACM} {SIGPLAN}-{SIGACT} Symposium
on Principles of Programming Languages ({POPL} 2007)},
year = 2007,
month = jan,
pages = {315-326},
pdf = {2007/popl-cml-opt.pdf},
abstract = {
Concurrent ML (CML) is a statically-typed higher-order concurrent language
that is embedded in Standard ML.
Its most notable feature is its support for
\emph{first-class synchronous operations}.
This mechanism allows programmers to encapsulate complicated communication
and synchronization protocols as first-class abstractions, which
encourages a modular style of programming where the
underlying channels used to communicate with a given thread are
hidden behind data and type abstraction.
While CML has been in active use for well over a decade, little attention
has been paid to optimizing CML programs.
In this paper, we present a new program analysis for statically-typed
higher-order concurrent languages that enables the compile-time specialization
of communication operations.
This specialization is particularly important in a multiprocessor or
multicore setting, where the synchronization overhead for general-purpose
operations are high.
Preliminary results from a prototype that we have built demonstrate that
specialized channel operations are much faster than the general-purpose
operations.
Our analysis technique is modular (i.e., it analyzes and optimizes a single
unit of abstraction at a time), which plays to the modular style of
many CML programs.
The analysis consists of three steps: the first is a
type-sensitive control-flow analysis that uses the program's
type-abstractions to compute more precise results.
The second is the construction of an \emph{extended control-flow graph}
using the results of the CFA.
The last step is an iterative analysis over the graph that approximates
the usage patterns of known channels.
Our analysis is designed to detect special patterns of use, such
as one-shot channels, fan-in channels, and fan-out channels.
We have proven the safety of our analysis and state those results.
},
topic = {implementation,cml,concurrency,manticore}
}
@inproceedings{manticore,
author = {Matthew Fluet and Mike Rainey and John Reppy and Adam Shaw and Yingqi Xiao},
title = {Manticore: A heterogeneous parallel language},
booktitle = {Proceedings of the Workshop on Declarative Aspects of
Multicore Programming (DAMP 2007)},
year = 2007,
month = jan,
pages = {37-44},
pdf = {2007/damp-manticore.pdf},
abstract = {
The Manticore project is an effort to design and implement a new
functional language for parallel programming.
Unlike many earlier parallel languages, Manticore is a \emph{heterogeneous}
language that supports parallelism at multiple levels.
Specifically, we combine CML-style explicit concurrency with NESL/Nepal-style
data-parallelism.
In this paper, we describe and motivate the design of the Manticore language.
We also describe a flexible runtime model that supports multiple scheduling
disciplines (\emph{e.g.}, for both fine-grain and course-grain parallelism) in
a uniform framework.
Work on a prototype implementation is ongoing and we give a status report.
},
topic = {design,implementation,concurrency,manticore}
}
@inproceedings{app-spec-fig,
author = {John Reppy and Chunyan Song},
title = {Application-specific foreign-interface generation},
booktitle = {Proceedings of the Fifth International Conference on
Generative Programming and Component Engineering},
pages = {49-58},
year = 2006,
month = oct,
pdf = {2006/gpce-fig.pdf},
abstract = {
A foreign interface (FI) mechanism to support interoperability with
libraries written in other languages (especially C) is an important
feature in most high-level language implementations. Such FI
mechanisms provide a Foreign Function Interface (FFI) for the
high-level language to call C functions and marshaling and
unmarshaling mechanisms to support conversion between the high-level
and C data representations. Often, systems provide tools to automate
the generation of FIs, but these tools typically lock the user into a
specific model of interoperability. It is our belief that the policy
used to craft the mapping between the high-level language and C should
be distinct from the underlying mechanism used to implement the
mapping.
In this paper, we describe a FI generation tool, called FIG (for
Foreign Interface Generator) that embodies a new approach to the
problem of generating foreign interfaces for high-level languages.
FIG takes as input raw C header files plus a declarative script that
specifies the generation of the foreign interface from the header
file. The script sets the policy for the translation, which allows
the user to tailor the resulting FI to his or her application. We
call this approach application-specific foreign-interface generation.
The scripting language uses rewriting strategies as its execution
model. The other major feature of the scripting language is a novel
notion of composable typemaps that describe the mapping between
high-level and low-level types.
},
topic = {interop,moby}
}
@inproceedings{typed-cfa,
author = {John Reppy},
title = {Type-sensitive control-flow analysis},
booktitle = {Proceedings of the 2006 {ACM} {SIGPLAN} Workshop on {ML}},
year = 2006,
month = sep,
pages = {74--83},
pdf = {2006/ml-typed-cfa.pdf},
abstract = {
Higher-order typed languages, such as ML, provide strong
support for data and type abstraction. While such abstraction
is often viewed as costing performance, there are situations
where it may provide opportunities for more aggressive program
optimization. Specifically, we can exploit the fact that type
abstraction guarantees representation independence, which allows
the compiler to specialize data representations. This paper
describes a first step in supporting such optimizations; namely
a control-flow analysis that uses the program's type information
to compute more precise results. We present our algorithm as an
extension of Serrano's version of 0-CFA and we show that it
respects types. We also discuss applications of the analysis with
examples of optimizations enabled by the analysis that would not
be possible normal CFA.
},
topic = {implementation,sml}
}
@inproceedings{trait-metaprogramming-foundation,
author = {John Reppy and Aaron Turon},
title = {A Foundation for Trait-based Metaprogramming},
booktitle = {2006 International Workshop on Foundations and Developments
of Object-Oriented Languages},
year = 2006,
month = jan,
pdf = {2006/foolwood-traits.pdf},
abstract = {
Scharli et al. introduced traits as reusable units of behavior
independent of the inheritance hierarchy. Despite their relative
simplicity, traits offer a surprisingly rich calculus. Trait calculi
typically include operations for resolving conflicts when composing
two traits. In the existing work on traits, these operations (method
exclusion and aliasing) are \emph{shallow}, i.e., they have no effect
on the body of the other methods in the trait. In this paper, we present
a new trait system, based on the Fisher-Reppy trait calculus, that adds
deep operations (method hiding and renaming) to support conflict resolution.
The proposed operations are \emph{deep} in the sense that they preserve any existing
connections between the affected method and the other methods of the
trait. Our system uses Riecke-Stone dictionaries to support these features.
In addition, we define a more fine-grained mechanism for tracking
trait types than in previous systems. The resulting calculus is more flexible
and expressive, and can serve as the foundation for trait-based
metaprogramming, an idiom we introduce. A companion technical report proves
type soundness for our system; we state the key results in this paper.
},
topic = {object}
}
@book{sml-basis-library,
editor = {Emden R. Gansner and John H. Reppy},
title = {The {Standard} {ML} Basis Library},
publisher = {Cambridge University Press},
year = 2004,
url = {http://www.standardml.org/Basis/},
topic = {design,sml}
}
@inproceedings{typed-traits:fool,
author = {Kathleen Fisher and John Reppy},
title = {A Typed Calculus of Traits},
booktitle = {Proceedings of the 11th Workshop on Foundations of
Object-oriented Programming},
year = {2004},
month = jan,
pdf = {2004/fool-traits.pdf},
topic = {object}
}
@techreport{moby-oop-tr,
author = {Kathleen Fisher and John Reppy},
title = {Object-oriented aspects of {{Moby}}},
institution = {Department of Computer Science, University of Chicago},
address = {Chicago, IL},
number = {TR-2003-10},
year = 2003,
month = sep,
topic = {design,moby,object}
}
@techreport{typed-traits-tr,
author = {Kathleen Fisher and John Reppy},
title = {Statically Typed Traits},
institution = {Department of Computer Science, University of Chicago},
address = {Chicago, IL},
number = {TR-2003-13},
year = 2003,
month = dec,
topic = {object}
}
@article{reppy:nested-loops,
author = {John Reppy},
title = {Optimizing Nested Loops Using Local {CPS} Conversion},
journal = {Higher-order and Symbolic Computation},
publisher = {Kluwer Academic Publishers},
address = {Boston, MA},
volume = 15,
number = {2/3},
year = 2002,
month = sep,
pages = {161-180},
pdf = {2002/hosc-lcps.pdf},
topic = {moby,implementation}
}
@article{class-types,
author = {Kathleen Fisher and John Reppy},
title = {Inheritance-based subtyping},
journal = {Information and Computation},
publisher = {Academic Press, Inc.},
address = {London, UK},
volume = 177,
number = 1,
year = 2002,
month = aug,
pages = {28-55},
pdf = {2002/inc-inher-subty.pdf},
abstract = {
Classes play a dual role in mainstream statically-typed
object-oriented languages, serving as both object generators and
object types. In such languages, inheritance implies subtyping.
In contrast, the theoretical language community has viewed this
linkage as a mistake and has focused on subtyping relationships
determined by the structure of object types, without regard to their
underlying implementations. In this paper, we explore why
inheritance-based subtyping relations are useful, and we describe
two different approaches to extending the Moby programming language
with inheritance-based subtyping relations. In addition, we present
a typed object calculus that supports both structural and
inheritance-based subtyping, and which provides a formal accounting
of our extensions to Moby.
},
topic = {design,moby,object}
}
@techreport{tm-moby-concur,
author = {Kathleen Fisher and John Reppy},
title = {Compiler support for lightweight concurrency},
type = {Technical Memorandum},
institution = {Bell Labs},
year = {2002},
month = mar,
pdf = {2002/tm-lightweight-concur.pdf},
topic = {moby,concurrency,implementation}
}
@inproceedings{moby-interop-framework,
author = {Kathleen Fisher and Riccardo Pucella and John Reppy},
title = {A framework for interoperability},
booktitle = {Proceedings of the First International
Workshop on Multi-Language Infrastructure
and Interoperability (BABEL'01)},
series = {Electronic Notes in Theoretical Computer Science},
volume = {59},
issue = {1},
publisher = {Elsevier Science Publishers},
address = {New York, NY},
editor = {Nick Benton and Andrew Kennedy},
year = {2001},
month = sep,
pdf = {2001/babel-interop.pdf},
abstract = {
Practical implementations of high-level languages must provide access to libraries and system
services that have APIs specified in a low-level language (usually C). An important characteristic
of such mechanisms is the foreign-interface policy that defines how to bridge the semantic gap
between the high-level language and C. For example, IDL-based tools generate code to marshal
data into and out of the high-level representation according to user annotations. The design
space of foreign-interface policies is large and there are pros and cons to each approach. Rather
than commit to a particular policy, we choose to focus on the problem of supporting a gamut of
interoperability policies. In this paper, we describe a framework for language interoperability
that is expressive enough to support very efficient implementations of a wide range of different
foreign-interface policies. We describe two tools that implement substantially different policies
on top of our framework and present benchmarks that demonstrate their efficiency.
},
topic = {moby,interop}
}
@inproceedings{async-exn,
author = {Simon Marlow and Simon Peyton Jones and Andrew Moran and John Reppy},
title = {Asynchronous exceptions in {Haskell}},
booktitle = {Proceedings of the SIGPLAN 2001 Conference on Programming
Language Design and Implementation},
year = 2001,
month = jun,
topic = {design,concurrency}
}
@inproceedings{reppy:lcps,
author = {John Reppy},
title = {Local {CPS} conversion in a direct-style compiler},
booktitle = {Proceedings of the Third {ACM} {SIGPLAN}
Workshop on Continuations (CW'01)},
year = 2001,
month = jan,
pages = {13-22},
pdf = {2001/cw-lcps.pdf},
topic = {moby,implementation}
}
@inproceedings{hotos-protium,
author = {Cliff Young and Lakshman Y.N. and Tom Szymanski and John Reppy and David Presotto and
Rob Pike and Girija Narlikar and Sape Mullender and Eric Grosse},
title = {Protium: An infrastructure for partitioned applications},
booktitle = {Proceedings of the Eighth Workshop on Hot Operating Systems (HotOS-VIII)},
year = 2001,
month = jan,
pdf = {2001/hotos-final.pdf},
topic = {concurrency}
}
@inproceedings{ecoop00:class-types,
author = {Kathleen Fisher and John Reppy},
title = {Extending {Moby} with inheritance-based subtyping},
booktitle = {Proceedings of the European Conference
on Object Oriented Programming},
year = 2000,
month = jun,
series = {Lecture Notes in Computer Science},
volume = 1850,
publisher = {Springer-Verlag},
address = {New York, NY},
pages = {83-107},
pdf = {2000/ecoop-inher-subty.pdf},
topic = {design,moby,object}
}
@inproceedings{esop00:links,
author = {Kathleen Fisher and John Reppy and Jon Riecke},
title = {A Calculus for Compiling and Linking Classes},
booktitle = {Proceedings of the European Symposium on Programming},
year = {2000},
month = {March/April},
series = {Lecture Notes in Computer Science},
volume = 1782,
publisher = {Springer-Verlag},
address = {New York, NY},
pages = {134-149},
pdf = {2000/esop-links.pdf},
topic = {object,implementation}
}
@inproceedings{inheritance-subtyping,
author = {Kathleen Fisher and John Reppy},
title = {Inheritance-based subtyping},
booktitle = {Proceedings of the 7th Workshop on Foundations of
Object-oriented Programming},
year = 2000,
month = jan,
pdf = {2000/fool-inher-subty.pdf},
topic = {design,moby,object}
}
@techreport{data-level-interop,
author = {Kathleen Fisher and Riccardo Pucella and John Reppy},
title = {Data-level interoperability},
type = {{Technical} {Memorandum}},
institution = {Bell Labs, Lucent Technologies},
year = 2000,
month = apr,
topic = {moby,implementation,interop}
}
@book{reppy:cml-book,
author = {John H. Reppy},
title = {Concurrent Programming in {ML}},
publisher = {Cambridge University Press},
address = {Cambridge, England},
year = 1999,
topic = {concurrency,cml,sml}
}
@inproceedings{moby-classes,
author = {Kathleen Fisher and John Reppy},
title = {The design of a class mechanism for {Moby}},
booktitle = {Proceedings of the SIGPLAN 1999 Conference on Programming
Language Design and Implementation},
publisher = {ACM},
address = {New York, NY},
year = 1999,
month = may,
pages = {37-49},
topic = {design,moby,object}
}
@techreport{moby-foundations,
author = {Kathleen Fisher and John Reppy},
title = {Foundations for {Moby} classes},
type = {{Technical} {Memorandum}},
institution = {Bell Labs, Lucent Technologies},
address = {Murray Hill, NJ},
year = 1999,
month = feb,
topic = {moby,object}
}
@incollection{cml-essence,
title = {The Essence of Concurrent ML},
author = {Prakash Panangaden and John Reppy},
chapter = {1},
booktitle = {{ML} with Concurrency},
publisher = {Springer-Verlag},
editor = {Flemming Nielson},
year = 1997,
topic = {concurrency,cml}
}
@article{njc-aml,
title = {{AML}: Attribute Grammars in {ML}},
author = {S.G. Efremidis and K.A. Mughal and L. {S\o{}raas} and John Reppy},
journal = {Nordic Journal of Computing},
volume = 4,
number = 1,
year = 1997,
topic = {sml,gui}
}
@inproceedings{oml-classes,
title = {Classes in {Object} {ML} via modules},
author = {John H. Reppy and Jon G. Riecke},
booktitle = {Proceedings of the Third Workshop on Foundations of
Object-oriented Programming},
year = {1996},
month = jul,
topic = {design,object}
}
@inproceedings{simple-objects-for-sml,
title = {Simple objects for {SML}},
author = {John H. Reppy and Jon G. Riecke},
booktitle = {Proceedings of the SIGPLAN 1996 Conference on Programming
Language Design and Implementation},
year = 1996,
month = may,
publisher = {ACM},
address = {New York, NY},
pages = {171-180},
topic = {design,object}
}
@article{tolplas-olden,
author = {Martin Carlisle and Laurie J. Hendren and Anne Rogers and John Reppy},
title = {Supporting {SPMD} Execution for Dynamic Data Structures},
journal = {ACM Transactions on Programming Languages and Systems},
volume = 17,
number = 2,
year = 1995,
month = mar,
pages = {233-263},
topic = {concurrency,implementation}
}
@inproceedings{list-unroll,
author = {Zhong Shao and John Reppy and Andrew Appel},
title = {Unrolling lists},
booktitle = {ACM Conference on Lisp and Functional Programming},
year = 1994,
month = jun,
pages = {185-195},
topic = {sml,implementation}
}
@inproceedings{mlrisc,
author = {Lal George and Florent Guillame and John Reppy},
title = {A portable and optimizing back end for the {SML/NJ} compiler},
booktitle = {Fifth International Conference on Compiler Construction},
year = 1994,
month = apr,
pages = {83-97},
topic = {sml,implementation}
}
@inproceedings{olden-experience,
title = {Early experiences with {Olden}},
author = {Martin Carlisle and Anne Rogers and John Reppy and Laurie Hendren},
booktitle = {6th International Workshop on Languages and Compilers for Parallel Computing},
series = {Lecture Notes in Computer Science},
number = 768,
year = 1993,
month = aug,
topic = {concurrency,implementation}
}
@incollection{multithreaded-windows,
author = {Emden R. Gansner and John H. Reppy},
title = {A Multi-threaded Higher-order User Interface Toolkit},
booktitle = {User Interface Software, Bass and Dewan (Eds.)},
series = {Software Trends},
volume = 1,
publisher = {John Wiley \& Sons},
year = 1993,
pages = {61-80},
topic = {gui,cml,concurrency}
}
@incollection{cml-design-etc,
author = {John H. Reppy},
title = {Concurrent {ML}: Design, application and semantics},
booktitle = {Functional Programming, Concurrency, Simulation and Automated Reasoning},
editor = {Peter Lauer},
series = {Lecture Notes in Computer Science},
number = 693,
publisher = {Springer-Verlag},
address = {New York, NY},
year = 1993,
topic = {cml,concurrency}
}
@techreport{tm-gc,
author = {John H. Reppy},
title = {A High-performance Garbage Collector for Standard {ML}},
type = {Technical Memo},
institution = {AT\&T Bell Laboratories},
year = 1993,
month = dec,
topic = {implementation,sml}
}
@inproceedings{spmd-dynamic-data,
title = {Supporting {SPMD} Execution for Dynamic Data Structures},
author = {Anne Rogers and John Reppy and Laurie Hendren},
booktitle = {5th International Workshop on Languages and Compilers for Parallel Computing},
series = {Lecture Notes in Computer Science},
number = 757,
year = 1992,
month = aug,
topic = {concurrency,implementation}
}
@techreport{absconst-for-sml,
title = {Abstract Value Constructors: Symbolic Constants for Standard {ML}},
author = {William E. Aitken and John H. Reppy},
institution = {Department of Computer Science, Cornell University},
number = {TR 92-1290},
year = 1992,
month = jun,
note = {A shorter version appears in the proceedings of the ``ACM SIGPLAN
Workshop on ML and its Applications,'' 1992},
abstract = {
Standard ML (SML) has been used to implement a wide variety of large
systems, such as compilers, theorem provers, graphics libraries, and
even operating systems. While SML provides a
convenient, high-level notation for programming large applications,
it does have certain deficiencies. One such deficiency is the
lack of a general
mechanism for assigning symbolic names to constant values.
In this paper, we present a simple extension of SML that corrects this
deficiency in a way that fits naturally with the semantics of SML.
Our proposal is a generalization of SML's datatype constructors:
we introduce {\em constants} that generalize nullary
datatype constructors (like \texttt{nil}), and {\em templates} that generalize
non-nullary datatype constructors (like \texttt{::}).
Constants are identifiers bound to fixed values, and
templates are identifiers bound to structured values with labeled
holes. Templates are useful because they allow users to
treat the representation of structured data abstractly without
having to give up pattern matching.
},
pdf = {1992/tr-sml-const.pdf},
topic = {design,sml}
}
@inproceedings{aml,
title = {Attribute grammars in {ML}},
author = {S.G. Efremidis and K.A. Mughal and John Reppy},
booktitle = {{ACM} {SIGPLAN} Workshop on {ML} and its Aplications},
year = 1992,
month = jun,
topic = {sml,gui}
}
@phdthesis{reppy:phd,
author = {John H. Reppy},
title = {Higher-order Concurrency},
school = {Cornell University},
year = 1992,
note = {Available as Computer Science Technical Report 92-1285},
topic = {concurrency, implementation, sml, cml}
}
@incollection{gui-foundation,
author = {Emden R. Gansner and John H. Reppy},
title = {A Foundation for User Interface Construction},
chapter = {14},
editor = {Brad Myers},
booktitle = {Languages for Developing User Interfaces},
publisher = {Jones and Bartlett},
year = 1992,
topic = {concurrency, gui}
}
@inproceedings{reppy:cml,
author = {John H. Reppy},
title = {{CML}: A higher-order concurrent language},
booktitle = {Proceedings of the SIGPLAN 1991 Conference on Programming
Language Design and Implementation},
publisher = {ACM},
address = {New York, NY},
year = 1991,
month = jun,
pages = {293-305},
topic = {concurrency, implementation, sml, cml}
}
@inproceedings{eXene,
author = {Emden R. Gansner and John H. Reppy},
title = {e{X}ene},
booktitle = {Third International Workshop on {ML}},
year = 1991,
month = sep,
topic = {cml,concurrency,gui}
}
@techreport{reppy:signals,
author = {John H. Reppy},
title = {Asynchronous signals in {Standard ML}},
number = {TR 90-1144},
institution = {Department of Computer Science, Cornell University},
address = {Ithaca, NY},
year = 1990,
month = aug,
topic = {concurrency, implementation, sml}
}
@inproceedings{pldi-pml,
author = {John H. Reppy},
title = {Synchronous Operations as First-class Values},
booktitle = {Proceedings of the SIGPLAN 1988 Conference on Programming
Language Design and Implementation},
year = 1988,
month = jun,
topic = {concurrency, design}
}
@inproceedings{pegasus-gc,
author = {Steven C. North and John H. Reppy},
title = {Concurrent Garbage Collection on Stock Hardware},
booktitle = {Third International Conference on Functional
Programming Languages and Computer Architecture},
year = 1987,
month = sep,
series = {Lecture Notes in Computer Science},
volume = 274,
publisher = {Springer-Verlag},
address = {New York, NY},
pages = {113-133},
topic = {implementation,concurrency}
}
@inproceedings{pegasus,
author = {John H. Reppy and Emden R. Gansner},
title = {A foundation for programming environments},
booktitle = {Proceedings of the {ACM} {SIGSOFT/SIGPLAN} Software
Engineering Symposium on Practical Software Development Environments},
year = 1986,
month = dec,
pages = {218-227},
topic = {gui}
}
This file was generated by bibtex2html 1.97.