Fletchgen
The Fletcher Design Generator
nucleus.cc
1 // Copyright 2018-2019 Delft University of Technology
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #include <cerata/api.h>
16 #include <cerata/vhdl/vhdl.h>
17 #include <vector>
18 #include <string>
19 #include <cerata/parameter.h>
20 
21 #include "fletchgen/nucleus.h"
22 #include "fletchgen/basic_types.h"
23 #include "fletchgen/recordbatch.h"
24 #include "fletchgen/kernel.h"
25 #include "fletchgen/mmio.h"
26 #include "fletchgen/profiler.h"
27 #include "fletchgen/axi4_lite.h"
28 #include "fletchgen/external.h"
29 
30 namespace fletchgen {
31 
32 using cerata::port;
33 using cerata::vector;
34 using cerata::component;
35 using cerata::parameter;
36 
37 Component *accm() {
38  // Check if the Array component was already created.
39  auto opt_comp = cerata::default_component_pool()->Get("ArrayCmdCtrlMerger");
40  if (opt_comp) {
41  return *opt_comp;
42  }
43 
44  auto ba = bus_addr_width();
45  auto iw = index_width();
46  auto tw = tag_width();
47  auto num_addr = parameter("num_addr", 0);
48  auto kernel_side_cmd = port("kernel_cmd", cmd_type(iw, tw), Port::Dir::IN, kernel_cd());
49  auto nucleus_side_cmd = port("nucleus_cmd", cmd_type(iw, tw, num_addr * ba), Port::Dir::OUT, kernel_cd());
50  auto ctrl = port_array("ctrl", vector(ba), num_addr, Port::Dir::IN, kernel_cd());
51  auto result = component("ArrayCmdCtrlMerger", {num_addr, ba, iw, tw, kernel_side_cmd, nucleus_side_cmd, ctrl});
52 
53  // This is a primitive component from the hardware lib
54  result->SetMeta(cerata::vhdl::meta::PRIMITIVE, "true");
55  result->SetMeta(cerata::vhdl::meta::LIBRARY, "work");
56  result->SetMeta(cerata::vhdl::meta::PACKAGE, "Array_pkg");
57 
58  return result.get();
59 }
60 
61 static void CopyFieldPorts(Component *nucleus, const RecordBatch &record_batch, FieldPort::Function fun) {
62  // Add Arrow field derived ports with some function.
63  auto field_ports = record_batch.GetFieldPorts(fun);
64  cerata::NodeMap rebinding;
65  for (const auto &fp : field_ports) {
66  // Create a copy and invert for the Nucleus
67  auto copied_port = dynamic_cast<FieldPort *>(fp->CopyOnto(nucleus, fp->name(), &rebinding));
68  copied_port->Reverse();
69  }
70 }
71 
72 Nucleus::Nucleus(const std::string &name,
73  const std::vector<std::shared_ptr<RecordBatch>> &recordbatches,
74  const std::shared_ptr<Kernel> &kernel,
75  const std::shared_ptr<Component> &mmio,
76  Axi4LiteSpec axi_spec)
77  : Component(name) {
78  cerata::NodeMap rebinding;
79 
80  auto iw = index_width();
81  auto tw = tag_width();
82  Add(iw);
83  Add(tw);
84  // Add clock/reset
85  auto kcd = port("kcd", cr(), Port::Dir::IN, kernel_cd());
86  Add(kcd);
87  // Add AXI4-lite interface
88  auto axi = axi4_lite(Port::Dir::IN, bus_cd(), axi_spec);
89  Add(axi);
90 
91  // Instantiate the kernel and connect the clock/reset.
92  kernel_inst = Instantiate(kernel.get());
93  Connect(kernel_inst->prt("kcd"), kcd.get());
94 
95  // Instantiate the MMIO component and connect the AXI4-lite port and clock/reset.
96  auto mmio_inst = Instantiate(mmio.get());
97  mmio_inst->prt("mmio") <<= axi;
98  mmio_inst->prt("kcd") <<= kcd;
99  // For the kernel user, we need to abstract the "ctrl" field of the command streams away.
100  // We need to instantiate a little ArrayCommandCtrlMerger (accm) component that just adds the buffer addresses to
101  // the cmd stream ctrl field. We will remember the instances of that component and we'll get the buffer address ports
102  // for later on.
103  std::vector<Instance *> accms;
104  // Get all the buffer ports from the mmio instance.
105  std::vector<MmioPort *> mmio_buffer_ports;
106  for (const auto &p : mmio_inst->GetAll<MmioPort>()) {
107  if (p->reg.function == MmioFunction::BUFFER) {
108  mmio_buffer_ports.push_back(p);
109  }
110  }
111 
112  // Copy over the field-derived ports from the RecordBatches.
113  for (const auto &rb : recordbatches) {
114  CopyFieldPorts(this, *rb, FieldPort::Function::ARROW);
115  CopyFieldPorts(this, *rb, FieldPort::Function::UNLOCK);
116 
117  // For each one of the command streams, make an inverted copy of the RecordBatch full command stream port.
118  // This one will expose all command stream fields to the nucleus user.
119  auto cmd_ports = rb->GetFieldPorts(FieldPort::Function::COMMAND);
120  for (const auto &cmd : cmd_ports) {
121  // The command stream port type references the bus address width. Add that parameter to the nucleus.
122  auto prefix = rb->schema()->name() + "_" + cmd->field_->name();
123  auto ba = bus_addr_width(64, prefix);
124  Add(ba);
125 
126  auto nucleus_cmd = command_port(cmd->fletcher_schema_, cmd->field_, iw, tw, ba, kernel_cd());
127  nucleus_cmd->Reverse();
128  Add(nucleus_cmd);
129 
130  // Now, instantiate an ACCM that will merge the buffer addresses onto the command stream at the nucleus level.
131  auto accm_inst = Instantiate(accm(), cmd->name() + "_accm_inst");
132  // Connect the parameters.
133  accm_inst->par("BUS_ADDR_WIDTH")->SetValue(ba);
134  accm_inst->par("INDEX_WIDTH")->SetValue(iw);
135  accm_inst->par("TAG_WIDTH")->SetValue(tw);
136  // Remember the instance.
137  accms.push_back(accm_inst);
138  }
139  }
140 
141  // Add and connect all recordbatch ports
142  size_t batch_idx = 0;
143  size_t accm_idx = 0;
144  size_t buf_idx = 0;
145  for (const auto &r : recordbatches) {
146  // Connect Arrow data stream
147  for (const auto &ap : r->GetFieldPorts(FieldPort::Function::ARROW)) {
148  auto kernel_data = kernel_inst->prt(ap->name());
149  auto nucleus_data = prt(ap->name());
150  std::shared_ptr<cerata::Edge> edge;
151  if (ap->dir() == Port::OUT) {
152  edge = Connect(kernel_data, nucleus_data);
153  } else {
154  edge = Connect(nucleus_data, kernel_data);
155  }
156  }
157 
158  // Connect unlock stream
159  for (const auto &up : r->GetFieldPorts(FieldPort::Function::UNLOCK)) {
160  auto kernel_unl = kernel_inst->prt(up->name());
161  auto nucleus_unl = prt(up->name());
162  Connect(kernel_unl, nucleus_unl);
163  }
164 
165  // Connect the command stream through the ACCM.
166  size_t field_idx = 0;
167  for (const auto &cmd : r->GetFieldPorts(FieldPort::Function::COMMAND)) {
168  // Get the ports on either side of the ACCM.
169  auto accm_nucleus_cmd = accms[accm_idx]->prt("nucleus_cmd");
170  auto accm_kernel_cmd = accms[accm_idx]->prt("kernel_cmd");
171  auto accm_ctrl = accms[accm_idx]->prt_arr("ctrl");
172 
173  // Get the corresponding cmd ports on this nucleus and the kernel.
174  auto nucleus_cmd = this->prt(cmd->name());
175  auto kernel_cmd = kernel_inst->prt(cmd->name());
176 
177  // Connect the nucleus cmd to the ACCM cmd and the ACCM command to the kernel cmd.
178  Connect(nucleus_cmd, accm_nucleus_cmd);
179  Connect(accm_kernel_cmd, kernel_cmd);
180 
181  // To connect the buffer addresses from the mmio to the ACCM, we need to figure out which buffers there are.
182  // We can look this up in the RecordBatchDescription.
183  auto field_bufs = r->batch_desc().fields[field_idx].buffers;
184  for (size_t b = 0; b < field_bufs.size(); b++) {
185  // TODO(johanpel): it is here somewhat blatantly assumed mmio_buffer_ports follows ordering, etc.. properly.
186  // Perhaps it would be nicer if this was somewhat better synchronized.
187  Connect(accm_ctrl->Append(), mmio_buffer_ports[buf_idx]);
188  buf_idx++;
189  }
190  field_idx++;
191  accm_idx++;
192  }
193  batch_idx++;
194  }
195 
196  // Perform some magic to abstract the buffer addresses away from the ctrl stream at the kernel level.
197  // First, obtain the intended name for the kernel from the metadata of the vhdmmio component port metadata.
198  // Then, make a connection between these two components.
199  for (auto &p : mmio_inst->GetAll<MmioPort>()) {
200  if (ExposeToKernel(p->reg.function)) {
201  auto inst_port = kernel_inst->prt(p->reg.name);
202  if (p->dir() == Port::Dir::OUT) {
203  Connect(inst_port, p);
204  } else {
205  Connect(p, inst_port);
206  }
207  }
208  }
209 
210  // Gather all Field-derived ports that require profiling on this Nucleus.
211  ProfileDataStreams(mmio_inst);
212 
213  // Add and connect platform IO
214  auto ext = external();
215  if (ext) {
216  auto pf = cerata::port("ext", ext.value(), Port::Dir::OUT);
217  Add(pf);
218  Connect(pf, kernel_inst->prt("ext"));
219  }
220 }
221 
222 std::shared_ptr<Nucleus> nucleus(const std::string &name,
223  const std::vector<std::shared_ptr<RecordBatch>> &recordbatches,
224  const std::shared_ptr<Kernel> &kernel,
225  const std::shared_ptr<Component> &mmio,
226  Axi4LiteSpec axi_spec) {
227  return std::make_shared<Nucleus>(name, recordbatches, kernel, mmio, axi_spec);
228 }
229 
230 std::vector<FieldPort *> Nucleus::GetFieldPorts(FieldPort::Function fun) const {
231  std::vector<FieldPort *> result;
232  for (const auto &ofp : GetNodes()) {
233  auto fp = dynamic_cast<FieldPort *>(ofp);
234  if (fp != nullptr) {
235  if (fp->function_ == fun) {
236  result.push_back(fp);
237  }
238  }
239  }
240  return result;
241 }
242 
243 void Nucleus::ProfileDataStreams(Instance *mmio_inst) {
244  cerata::NodeMap rebinding;
245  // Insert a signal in between, and then mark that signal for profiling.
246  std::vector<cerata::Signal *> profile_nodes;
247  for (const auto &p : GetFieldPorts(FieldPort::Function::ARROW)) {
248  if (p->profile_) {
249  // At this point, these ports should only have one edge straight into the kernel.
250  if (p->edges().size() != 1) {
251  FLETCHER_LOG(ERROR, "Nucleus port has other than exactly one edge.");
252  }
253  // Insert a signal node in between that we can attach the profiler probe onto.
254  auto s = AttachSignalToNode(this, p, &rebinding);
255  profile_nodes.push_back(s);
256  }
257  }
258 
259  if (!profile_nodes.empty()) {
260  // Attach stream profilers to the ports that need to be profiled.
261  auto profiler_map = EnableStreamProfiling(this, profile_nodes);
262 
263  // TODO(johanpel): in the following code it is assumed ordering between profile nodes, streams and mmio ports is
264  // unchanged as well. This assumption might be a bit wild if things get added in the future, so it would be nice
265  // to figure out a better way to keep this synchronized.
266 
267  // Get the enable and clear ports.
268  auto enable = signal("Profile_enable", cerata::bit(), kernel_cd());
269  auto clear = signal("Profile_clear", cerata::bit(), kernel_cd());
270  Add({enable, clear});
271 
272  enable <<= mmio_inst->prt("f_Profile_enable_data");
273  clear <<= mmio_inst->prt("f_Profile_clear_data");
274 
275  // Gather all mmio profile result ports
276  std::vector<MmioPort *> mmio_profile_ports;
277  for (auto &p : mmio_inst->GetAll<MmioPort>()) {
278  if ((p->reg.function == MmioFunction::PROFILE) && (p->reg.behavior == MmioBehavior::STATUS)) {
279  mmio_profile_ports.push_back(p);
280  }
281  }
282  // Loop over all profiled nodes and connect them.
283  size_t port_idx = 0;
284  for (const auto &pair : profiler_map) {
285  auto instances = pair.second.first;
286  auto ports = pair.second.second;
287 
288  for (const auto &prof_inst : instances) {
289  Connect(prof_inst->prt("enable"), enable.get());
290  Connect(prof_inst->prt("clear"), clear.get());
291  }
292 
293  for (const auto &prof_port : ports) {
294  Connect(mmio_profile_ports[port_idx], prof_port);
295  port_idx++;
296  }
297  }
298  }
299 }
300 
301 } // namespace fletchgen
Contains all classes and functions related to Fletchgen.
Definition: array.cc:29
std::shared_ptr< Nucleus > nucleus(const std::string &name, const std::vector< std::shared_ptr< RecordBatch >> &recordbatches, const std::shared_ptr< Kernel > &kernel, const std::shared_ptr< Component > &mmio, Axi4LiteSpec axi_spec)
Make an Nucleus component based on RecordBatch components. Returns a shared pointer to the new Nucleu...
Definition: nucleus.cc:222
std::shared_ptr< Type > cmd_type(const std::shared_ptr< Node > &index_width, const std::shared_ptr< Node > &tag_width, const std::optional< std::shared_ptr< Node >> &ctrl_width)
Return a Fletcher command stream type.
Definition: array.cc:62
std::shared_ptr< ClockDomain > kernel_cd()
Fletcher accelerator clock domain.
Definition: basic_types.cc:62
std::shared_ptr< Type > cr()
Fletcher clock/reset;.
Definition: basic_types.cc:73
@ STATUS
Register contents is controlled by hardware kernel.
std::shared_ptr< Axi4LitePort > axi4_lite(Port::Dir dir, const std::shared_ptr< ClockDomain > &domain, Axi4LiteSpec spec)
Make a new AXI4-lite port, returning a shared pointer to it.
Definition: axi4_lite.cc:67
@ BUFFER
Registers for buffer addresses.
@ PROFILE
Register for the profiler.
std::shared_ptr< Kernel > kernel(const std::string &name, const std::vector< std::shared_ptr< RecordBatch >> &recordbatches, const std::shared_ptr< Component > &mmio)
Make a kernel component based on RecordBatch and MMIO components.
Definition: kernel.cc:93
bool ExposeToKernel(MmioFunction fun)
Return true if an mmio register's function must cause it to be exposed to the user kernel.
Definition: mmio.cc:165
Component * accm()
Return the ArrayCmdCtrlMerger component.
Definition: nucleus.cc:37
std::shared_ptr< RecordBatch > record_batch(const std::string &name, const std::shared_ptr< FletcherSchema > &fletcher_schema, const fletcher::RecordBatchDescription &batch_desc)
Make a new RecordBatch(Reader/Writer) component, based on a Fletcher schema.
Definition: recordbatch.cc:190
std::shared_ptr< ClockDomain > bus_cd()
Fletcher bus clock domain.
Definition: basic_types.cc:67
NodeProfilerPorts EnableStreamProfiling(cerata::Component *comp, const std::vector< cerata::Signal * > &profile_nodes)
Transforms a Cerata component graph to include stream profilers for selected nodes.
Definition: profiler.cc:155
std::shared_ptr< FieldPort > command_port(const std::shared_ptr< FletcherSchema > &schema, const std::shared_ptr< arrow::Field > &field, const std::shared_ptr< Node > &index_width, const std::shared_ptr< Node > &tag_width, std::optional< std::shared_ptr< Node >> addr_width, const std::shared_ptr< ClockDomain > &domain)
Construct a field-derived command port.
Definition: recordbatch.cc:218
std::shared_ptr< Component > mmio(const std::vector< fletcher::RecordBatchDescription > &batches, const std::vector< MmioReg > &regs, Axi4LiteSpec axi_spec)
Generate the MMIO component for the nucleus.
Definition: mmio.cc:62
AXI4-lite bus specification.
Definition: axi4_lite.h:29
A port derived from an Arrow field.
Definition: recordbatch.h:52
Function
Enumeration of FieldPort functions.
Definition: recordbatch.h:54
A port on the vhdmmio component. Remembers what register spec it came from.
Definition: mmio.h:105
void ProfileDataStreams(Instance *mmio_inst)
Profile any Arrow data streams that require profiling.
Definition: nucleus.cc:243
std::vector< FieldPort * > GetFieldPorts(FieldPort::Function fun) const
Return all field-derived ports with a specific function.
Definition: nucleus.cc:230
std::shared_ptr< Kernel > kernel
The kernel component.
Definition: nucleus.h:56
Instance * kernel_inst
The kernel instance.
Definition: nucleus.h:58
Nucleus(const std::string &name, const std::vector< std::shared_ptr< RecordBatch >> &recordbatches, const std::shared_ptr< Kernel > &kernel, const std::shared_ptr< Component > &mmio, Axi4LiteSpec axi_spec)
Construct a new Nucleus.
Definition: nucleus.cc:72