gpio.cc 27 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788
  1. // -*- mode: c++; c-basic-offset: 2; indent-tabs-mode: nil; -*-
  2. // Copyright (C) 2013 Henner Zeller <h.zeller@acm.org>
  3. //
  4. // This program is free software; you can redistribute it and/or modify
  5. // it under the terms of the GNU General Public License as published by
  6. // the Free Software Foundation version 2.
  7. //
  8. // This program is distributed in the hope that it will be useful,
  9. // but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  11. // GNU General Public License for more details.
  12. //
  13. // You should have received a copy of the GNU General Public License
  14. // along with this program. If not, see <http://gnu.org/licenses/gpl-2.0.txt>
  15. #define __STDC_FORMAT_MACROS
  16. #include <inttypes.h>
  17. #include "gpio.h"
  18. #include <assert.h>
  19. #include <fcntl.h>
  20. #include <stdio.h>
  21. #include <stdlib.h>
  22. #include <string.h>
  23. #include <sys/mman.h>
  24. #include <time.h>
  25. #include <unistd.h>
  26. /*
  27. * nanosleep() takes longer than requested because of OS jitter.
  28. * In about 99.9% of the cases, this is <= 25 microcseconds on
  29. * the Raspberry Pi (empirically determined with a Raspbian kernel), so
  30. * we substract this value whenever we do nanosleep(); the remaining time
  31. * we then busy wait to get a good accurate result.
  32. *
  33. * You can measure the overhead using DEBUG_SLEEP_JITTER below.
  34. *
  35. * Note: A higher value here will result in more CPU use because of more busy
  36. * waiting inching towards the real value (for all the cases that nanosleep()
  37. * actually was better than this overhead).
  38. *
  39. * This might be interesting to tweak in particular if you have a realtime
  40. * kernel with different characteristics.
  41. */
  42. #define EMPIRICAL_NANOSLEEP_OVERHEAD_US 12
  43. /*
  44. * In case of non-hardware pulse generation, use nanosleep if we want to wait
  45. * longer than these given microseconds beyond the general overhead.
  46. * Below that, just use busy wait.
  47. */
  48. #define MINIMUM_NANOSLEEP_TIME_US 5
  49. /* In order to determine useful values for above, set this to 1 and use the
  50. * hardware pin-pulser.
  51. * It will output a histogram atexit() of how much how often we were over
  52. * the requested time.
  53. * (The full histogram will be shifted by the EMPIRICAL_NANOSLEEP_OVERHEAD_US
  54. * value above. To get a full histogram of OS overhead, set it to 0 first).
  55. */
  56. #define DEBUG_SLEEP_JITTER 0
  57. // Raspberry 1 and 2 have different base addresses for the periphery
  58. #define BCM2708_PERI_BASE 0x20000000
  59. #define BCM2709_PERI_BASE 0x3F000000
  60. #define BCM2711_PERI_BASE 0xFE000000
  61. #define GPIO_REGISTER_OFFSET 0x200000
  62. #define COUNTER_1Mhz_REGISTER_OFFSET 0x3000
  63. #define GPIO_PWM_BASE_OFFSET (GPIO_REGISTER_OFFSET + 0xC000)
  64. #define GPIO_CLK_BASE_OFFSET 0x101000
  65. #define REGISTER_BLOCK_SIZE (4*1024)
  66. #define PWM_CTL (0x00 / 4)
  67. #define PWM_STA (0x04 / 4)
  68. #define PWM_RNG1 (0x10 / 4)
  69. #define PWM_FIFO (0x18 / 4)
  70. #define PWM_CTL_CLRF1 (1<<6) // CH1 Clear Fifo (1 Clears FIFO 0 has no effect)
  71. #define PWM_CTL_USEF1 (1<<5) // CH1 Use Fifo (0=data reg transmit 1=Fifo used for transmission)
  72. #define PWM_CTL_POLA1 (1<<4) // CH1 Polarity (0=(0=low 1=high) 1=(1=low 0=high)
  73. #define PWM_CTL_SBIT1 (1<<3) // CH1 Silence Bit (state of output when 0 transmission takes place)
  74. #define PWM_CTL_MODE1 (1<<1) // CH1 Mode (0=pwm 1=serialiser mode)
  75. #define PWM_CTL_PWEN1 (1<<0) // CH1 Enable (0=disable 1=enable)
  76. #define PWM_STA_EMPT1 (1<<1)
  77. #define PWM_STA_FULL1 (1<<0)
  78. #define CLK_PASSWD (0x5A<<24)
  79. #define CLK_CTL_MASH(x)((x)<<9)
  80. #define CLK_CTL_BUSY (1 <<7)
  81. #define CLK_CTL_KILL (1 <<5)
  82. #define CLK_CTL_ENAB (1 <<4)
  83. #define CLK_CTL_SRC(x) ((x)<<0)
  84. #define CLK_CTL_SRC_PLLD 6 /* 500.0 MHz */
  85. #define CLK_DIV_DIVI(x) ((x)<<12)
  86. #define CLK_DIV_DIVF(x) ((x)<< 0)
  87. #define CLK_PWMCTL 40
  88. #define CLK_PWMDIV 41
  89. // We want to have the last word in the fifo free
  90. #define MAX_PWM_BIT_USE 224
  91. #define PWM_BASE_TIME_NS 2
  92. // GPIO setup macros. Always use INP_GPIO(x) before using OUT_GPIO(x).
  93. #define INP_GPIO(g) *(s_GPIO_registers+((g)/10)) &= ~(7ull<<(((g)%10)*3))
  94. #define OUT_GPIO(g) *(s_GPIO_registers+((g)/10)) |= (1ull<<(((g)%10)*3))
  95. #define GPIO_SET *(gpio+7) // sets bits which are 1 ignores bits which are 0
  96. #define GPIO_CLR *(gpio+10) // clears bits which are 1 ignores bits which are 0
  97. // We're pre-mapping all the registers on first call of GPIO::Init(),
  98. // so that it is possible to drop privileges afterwards and still have these
  99. // usable.
  100. static volatile uint32_t *s_GPIO_registers = NULL;
  101. static volatile uint32_t *s_Timer1Mhz = NULL;
  102. static volatile uint32_t *s_PWM_registers = NULL;
  103. static volatile uint32_t *s_CLK_registers = NULL;
  104. namespace rgb_matrix {
  105. #define GPIO_BIT(x) (1ull << x)
  106. GPIO::GPIO() : output_bits_(0), input_bits_(0), reserved_bits_(0),
  107. slowdown_(1)
  108. #ifdef ENABLE_WIDE_GPIO_COMPUTE_MODULE
  109. , uses_64_bit_(false)
  110. #endif
  111. {
  112. }
  113. gpio_bits_t GPIO::InitOutputs(gpio_bits_t outputs,
  114. bool adafruit_pwm_transition_hack_needed) {
  115. if (s_GPIO_registers == NULL) {
  116. fprintf(stderr, "Attempt to init outputs but not yet Init()-ialized.\n");
  117. return 0;
  118. }
  119. // Hack: for the PWM mod, the user soldered together GPIO 18 (new OE)
  120. // with GPIO 4 (old OE).
  121. // Since they are connected inside the HAT, want to make extra sure that,
  122. // whatever the outside system set as pinmux, the old OE is _not_ also
  123. // set as output so that these GPIO outputs don't fight each other.
  124. //
  125. // So explicitly set both of these pins as input initially, so the user
  126. // can switch between the two modes "adafruit-hat" and "adafruit-hat-pwm"
  127. // without trouble.
  128. if (adafruit_pwm_transition_hack_needed) {
  129. INP_GPIO(4);
  130. INP_GPIO(18);
  131. // Even with PWM enabled, GPIO4 still can not be used, because it is
  132. // now connected to the GPIO18 and thus must stay an input.
  133. // So reserve this bit if it is not set in outputs.
  134. reserved_bits_ = GPIO_BIT(4) & ~outputs;
  135. }
  136. outputs &= ~(output_bits_ | input_bits_ | reserved_bits_);
  137. #ifdef ENABLE_WIDE_GPIO_COMPUTE_MODULE
  138. const int kMaxAvailableBit = 45;
  139. uses_64_bit_ |= (outputs >> 32) != 0;
  140. #else
  141. const int kMaxAvailableBit = 31;
  142. #endif
  143. for (int b = 0; b <= kMaxAvailableBit; ++b) {
  144. if (outputs & GPIO_BIT(b)) {
  145. INP_GPIO(b); // for writing, we first need to set as input.
  146. OUT_GPIO(b);
  147. }
  148. }
  149. output_bits_ |= outputs;
  150. return outputs;
  151. }
  152. gpio_bits_t GPIO::RequestInputs(gpio_bits_t inputs) {
  153. if (s_GPIO_registers == NULL) {
  154. fprintf(stderr, "Attempt to init inputs but not yet Init()-ialized.\n");
  155. return 0;
  156. }
  157. inputs &= ~(output_bits_ | input_bits_ | reserved_bits_);
  158. #ifdef ENABLE_WIDE_GPIO_COMPUTE_MODULE
  159. const int kMaxAvailableBit = 45;
  160. uses_64_bit_ |= (inputs >> 32) != 0;
  161. #else
  162. const int kMaxAvailableBit = 31;
  163. #endif
  164. for (int b = 0; b <= kMaxAvailableBit; ++b) {
  165. if (inputs & GPIO_BIT(b)) {
  166. INP_GPIO(b);
  167. }
  168. }
  169. input_bits_ |= inputs;
  170. return inputs;
  171. }
  172. // We are not interested in the _exact_ model, just good enough to determine
  173. // What to do.
  174. enum RaspberryPiModel {
  175. PI_MODEL_1,
  176. PI_MODEL_2,
  177. PI_MODEL_3,
  178. PI_MODEL_4
  179. };
  180. static int ReadFileToBuffer(char *buffer, size_t size, const char *filename) {
  181. const int fd = open(filename, O_RDONLY);
  182. if (fd < 0) return -1;
  183. ssize_t r = read(fd, buffer, size - 1); // assume one read enough
  184. buffer[r >= 0 ? r : 0] = '\0';
  185. close(fd);
  186. return r;
  187. }
  188. static RaspberryPiModel DetermineRaspberryModel() {
  189. char buffer[4096];
  190. if (ReadFileToBuffer(buffer, sizeof(buffer), "/proc/cpuinfo") < 0) {
  191. fprintf(stderr, "Reading cpuinfo: Could not determine Pi model\n");
  192. return PI_MODEL_3; // safe guess fallback.
  193. }
  194. static const char RevisionTag[] = "Revision";
  195. const char *revision_key;
  196. if ((revision_key = strstr(buffer, RevisionTag)) == NULL) {
  197. fprintf(stderr, "non-existent Revision: Could not determine Pi model\n");
  198. return PI_MODEL_3;
  199. }
  200. unsigned int pi_revision;
  201. if (sscanf(index(revision_key, ':') + 1, "%x", &pi_revision) != 1) {
  202. fprintf(stderr, "Unknown Revision: Could not determine Pi model\n");
  203. return PI_MODEL_3;
  204. }
  205. // https://www.raspberrypi.org/documentation/hardware/raspberrypi/revision-codes/README.md
  206. const unsigned pi_type = (pi_revision >> 4) & 0xff;
  207. switch (pi_type) {
  208. case 0x00: /* A */
  209. case 0x01: /* B, Compute Module 1 */
  210. case 0x02: /* A+ */
  211. case 0x03: /* B+ */
  212. case 0x05: /* Alpha ?*/
  213. case 0x06: /* Compute Module1 */
  214. case 0x09: /* Zero */
  215. case 0x0c: /* Zero W */
  216. return PI_MODEL_1;
  217. case 0x04: /* Pi 2 */
  218. return PI_MODEL_2;
  219. case 0x11: /* Pi 4 */
  220. case 0x14: /* CM4 */
  221. return PI_MODEL_4;
  222. default: /* a bunch of versions representing Pi 3 */
  223. return PI_MODEL_3;
  224. }
  225. }
  226. static RaspberryPiModel GetPiModel() {
  227. static RaspberryPiModel pi_model = DetermineRaspberryModel();
  228. return pi_model;
  229. }
  230. static int GetNumCores() {
  231. return GetPiModel() == PI_MODEL_1 ? 1 : 4;
  232. }
  233. static uint32_t *mmap_bcm_register(off_t register_offset) {
  234. off_t base = BCM2709_PERI_BASE; // safe fallback guess.
  235. switch (GetPiModel()) {
  236. case PI_MODEL_1: base = BCM2708_PERI_BASE; break;
  237. case PI_MODEL_2: base = BCM2709_PERI_BASE; break;
  238. case PI_MODEL_3: base = BCM2709_PERI_BASE; break;
  239. case PI_MODEL_4: base = BCM2711_PERI_BASE; break;
  240. }
  241. int mem_fd;
  242. if ((mem_fd = open("/dev/mem", O_RDWR|O_SYNC) ) < 0) {
  243. // Try to fall back to /dev/gpiomem. Unfortunately, that device
  244. // is implemented in a way that it _only_ supports GPIO, not the
  245. // other registers we need, such as PWM or COUNTER_1Mhz, which means
  246. // we only can operate with degraded performance.
  247. //
  248. // But, instead of failing, mmap() then silently succeeds with the
  249. // unsupported offset. So bail out here.
  250. if (register_offset != GPIO_REGISTER_OFFSET)
  251. return NULL;
  252. mem_fd = open("/dev/gpiomem", O_RDWR|O_SYNC);
  253. if (mem_fd < 0) return NULL;
  254. }
  255. uint32_t *result =
  256. (uint32_t*) mmap(NULL, // Any adddress in our space will do
  257. REGISTER_BLOCK_SIZE, // Map length
  258. PROT_READ|PROT_WRITE, // Enable r/w on GPIO registers.
  259. MAP_SHARED,
  260. mem_fd, // File to map
  261. base + register_offset // Offset to bcm register
  262. );
  263. close(mem_fd);
  264. if (result == MAP_FAILED) {
  265. perror("mmap error: ");
  266. fprintf(stderr, "MMapping from base 0x%lx, offset 0x%lx\n",
  267. base, register_offset);
  268. return NULL;
  269. }
  270. return result;
  271. }
  272. static bool mmap_all_bcm_registers_once() {
  273. if (s_GPIO_registers != NULL) return true; // alrady done.
  274. // The common GPIO registers.
  275. s_GPIO_registers = mmap_bcm_register(GPIO_REGISTER_OFFSET);
  276. if (s_GPIO_registers == NULL) {
  277. return false;
  278. }
  279. // Time measurement. Might fail when run as non-root.
  280. uint32_t *timereg = mmap_bcm_register(COUNTER_1Mhz_REGISTER_OFFSET);
  281. if (timereg != NULL) {
  282. s_Timer1Mhz = timereg + 1;
  283. }
  284. // Hardware pin-pulser. Might fail when run as non-root.
  285. s_PWM_registers = mmap_bcm_register(GPIO_PWM_BASE_OFFSET);
  286. s_CLK_registers = mmap_bcm_register(GPIO_CLK_BASE_OFFSET);
  287. return true;
  288. }
  289. bool GPIO::Init(int slowdown) {
  290. slowdown_ = slowdown;
  291. // Pre-mmap all bcm registers we need now and possibly in the future, as to
  292. // allow dropping privileges after GPIO::Init() even as some of these
  293. // registers might be needed later.
  294. if (!mmap_all_bcm_registers_once())
  295. return false;
  296. gpio_set_bits_low_ = s_GPIO_registers + (0x1C / sizeof(uint32_t));
  297. gpio_clr_bits_low_ = s_GPIO_registers + (0x28 / sizeof(uint32_t));
  298. gpio_read_bits_low_ = s_GPIO_registers + (0x34 / sizeof(uint32_t));
  299. #ifdef ENABLE_WIDE_GPIO_COMPUTE_MODULE
  300. gpio_set_bits_high_ = s_GPIO_registers + (0x20 / sizeof(uint32_t));
  301. gpio_clr_bits_high_ = s_GPIO_registers + (0x2C / sizeof(uint32_t));
  302. gpio_read_bits_high_ = s_GPIO_registers + (0x38 / sizeof(uint32_t));
  303. #endif
  304. return true;
  305. }
  306. /*
  307. * We support also other pinouts that don't have the OE- on the hardware
  308. * PWM output pin, so we need to provide (impefect) 'manual' timing as well.
  309. * Hence all various busy_wait_nano() implementations depending on the hardware.
  310. */
  311. // --- PinPulser. Private implementation parts.
  312. namespace {
  313. // Manual timers.
  314. class Timers {
  315. public:
  316. static bool Init();
  317. static void sleep_nanos(long t);
  318. };
  319. // Simplest of PinPulsers. Uses somewhat jittery and manual timers
  320. // to get the timing, but not optimal.
  321. class TimerBasedPinPulser : public PinPulser {
  322. public:
  323. TimerBasedPinPulser(GPIO *io, gpio_bits_t bits,
  324. const std::vector<int> &nano_specs)
  325. : io_(io), bits_(bits), nano_specs_(nano_specs) {
  326. if (!s_Timer1Mhz) {
  327. fprintf(stderr, "FYI: not running as root which means we can't properly "
  328. "control timing unless this is a real-time kernel. Expect color "
  329. "degradation. Consider running as root with sudo.\n");
  330. }
  331. }
  332. virtual void SendPulse(int time_spec_number) {
  333. io_->ClearBits(bits_);
  334. Timers::sleep_nanos(nano_specs_[time_spec_number]);
  335. io_->SetBits(bits_);
  336. }
  337. private:
  338. GPIO *const io_;
  339. const gpio_bits_t bits_;
  340. const std::vector<int> nano_specs_;
  341. };
  342. static bool LinuxHasModuleLoaded(const char *name) {
  343. FILE *f = fopen("/proc/modules", "r");
  344. if (f == NULL) return false; // don't care.
  345. char buf[256];
  346. const size_t namelen = strlen(name);
  347. bool found = false;
  348. while (fgets(buf, sizeof(buf), f) != NULL) {
  349. if (strncmp(buf, name, namelen) == 0) {
  350. found = true;
  351. break;
  352. }
  353. }
  354. fclose(f);
  355. return found;
  356. }
  357. static void busy_wait_nanos_rpi_1(long nanos);
  358. static void busy_wait_nanos_rpi_2(long nanos);
  359. static void busy_wait_nanos_rpi_3(long nanos);
  360. static void busy_wait_nanos_rpi_4(long nanos);
  361. static void (*busy_wait_impl)(long) = busy_wait_nanos_rpi_3;
  362. // Best effort write to file. Used to set kernel parameters.
  363. static void WriteTo(const char *filename, const char *str) {
  364. const int fd = open(filename, O_WRONLY);
  365. if (fd < 0) return;
  366. (void) write(fd, str, strlen(str)); // Best effort. Ignore return value.
  367. close(fd);
  368. }
  369. // By default, the kernel applies some throtteling for realtime
  370. // threads to prevent starvation of non-RT threads. But we
  371. // really want all we can get iff the machine has more cores and
  372. // our RT-thread is locked onto one of these.
  373. // So let's tell it not to do that.
  374. static void DisableRealtimeThrottling() {
  375. if (GetNumCores() == 1) return; // Not safe if we don't have > 1 core.
  376. // We need to leave the kernel a little bit of time, as it does not like
  377. // us to hog the kernel solidly. The default of 950000 leaves 50ms that
  378. // can generate visible flicker, so we reduce that to 1ms.
  379. WriteTo("/proc/sys/kernel/sched_rt_runtime_us", "999000");
  380. }
  381. bool Timers::Init() {
  382. if (!mmap_all_bcm_registers_once())
  383. return false;
  384. // Choose the busy-wait loop that fits our Pi.
  385. switch (GetPiModel()) {
  386. case PI_MODEL_1: busy_wait_impl = busy_wait_nanos_rpi_1; break;
  387. case PI_MODEL_2: busy_wait_impl = busy_wait_nanos_rpi_2; break;
  388. case PI_MODEL_3: busy_wait_impl = busy_wait_nanos_rpi_3; break;
  389. case PI_MODEL_4: busy_wait_impl = busy_wait_nanos_rpi_4; break;
  390. }
  391. DisableRealtimeThrottling();
  392. // If we have it, we run the update thread on core3. No perf-compromises:
  393. WriteTo("/sys/devices/system/cpu/cpu3/cpufreq/scaling_governor",
  394. "performance");
  395. return true;
  396. }
  397. static uint32_t JitterAllowanceMicroseconds() {
  398. // If this is a Raspberry Pi with more than one core, we add a bit of
  399. // additional overhead measured up to the 99.999%-ile: we can allow to burn
  400. // a bit more busy-wait CPU cycles to get the timing accurate as we have
  401. // more CPU to spare.
  402. switch (GetPiModel()) {
  403. case PI_MODEL_1:
  404. return EMPIRICAL_NANOSLEEP_OVERHEAD_US; // 99.9%-ile
  405. case PI_MODEL_2: case PI_MODEL_3:
  406. return EMPIRICAL_NANOSLEEP_OVERHEAD_US + 35; // 99.999%-ile
  407. case PI_MODEL_4:
  408. return EMPIRICAL_NANOSLEEP_OVERHEAD_US + 10; // this one is fast.
  409. }
  410. return EMPIRICAL_NANOSLEEP_OVERHEAD_US;
  411. }
  412. void Timers::sleep_nanos(long nanos) {
  413. // For smaller durations, we go straight to busy wait.
  414. // For larger duration, we use nanosleep() to give the operating system
  415. // a chance to do something else.
  416. // However, these timings have a lot of jitter, so if we have the 1Mhz timer
  417. // available, we use that to accurately mesure time spent and do the
  418. // remaining time with busy wait. If we don't have the timer available
  419. // (not running as root), we just use nanosleep() for larger values.
  420. if (s_Timer1Mhz) {
  421. static long kJitterAllowanceNanos = JitterAllowanceMicroseconds() * 1000;
  422. if (nanos > kJitterAllowanceNanos + MINIMUM_NANOSLEEP_TIME_US*1000) {
  423. const uint32_t before = *s_Timer1Mhz;
  424. struct timespec sleep_time = { 0, nanos - kJitterAllowanceNanos };
  425. nanosleep(&sleep_time, NULL);
  426. const uint32_t after = *s_Timer1Mhz;
  427. const long nanoseconds_passed = 1000 * (uint32_t)(after - before);
  428. if (nanoseconds_passed > nanos) {
  429. return; // darn, missed it.
  430. } else {
  431. nanos -= nanoseconds_passed; // remaining time with busy-loop
  432. }
  433. }
  434. } else {
  435. // Not running as root, not having access to 1Mhz timer. Approximate large
  436. // durations with nanosleep(); small durations are done with busy wait.
  437. if (nanos > (EMPIRICAL_NANOSLEEP_OVERHEAD_US + MINIMUM_NANOSLEEP_TIME_US)*1000) {
  438. struct timespec sleep_time
  439. = { 0, nanos - EMPIRICAL_NANOSLEEP_OVERHEAD_US*1000 };
  440. nanosleep(&sleep_time, NULL);
  441. return;
  442. }
  443. }
  444. busy_wait_impl(nanos); // Use model-specific busy-loop for remaining time.
  445. }
  446. static void busy_wait_nanos_rpi_1(long nanos) {
  447. if (nanos < 70) return;
  448. // The following loop is determined empirically on a 700Mhz RPi
  449. for (uint32_t i = (nanos - 70) >> 2; i != 0; --i) {
  450. asm("nop");
  451. }
  452. }
  453. static void busy_wait_nanos_rpi_2(long nanos) {
  454. if (nanos < 20) return;
  455. // The following loop is determined empirically on a 900Mhz RPi 2
  456. for (uint32_t i = (nanos - 20) * 100 / 110; i != 0; --i) {
  457. asm("");
  458. }
  459. }
  460. static void busy_wait_nanos_rpi_3(long nanos) {
  461. if (nanos < 20) return;
  462. for (uint32_t i = (nanos - 15) * 100 / 73; i != 0; --i) {
  463. asm("");
  464. }
  465. }
  466. static void busy_wait_nanos_rpi_4(long nanos) {
  467. if (nanos < 20) return;
  468. // Interesting, the Pi4 is _slower_ than the Pi3 ? At least for this busy loop
  469. for (uint32_t i = (nanos - 5) * 100 / 132; i != 0; --i) {
  470. asm("");
  471. }
  472. }
  473. #if DEBUG_SLEEP_JITTER
  474. static int overshoot_histogram_us[256] = {0};
  475. static void print_overshoot_histogram() {
  476. fprintf(stderr, "Overshoot histogram >= empirical overhead of %dus\n"
  477. "%6s | %7s | %7s\n",
  478. JitterAllowanceMicroseconds(), "usec", "count", "accum");
  479. int total_count = 0;
  480. for (int i = 0; i < 256; ++i) total_count += overshoot_histogram_us[i];
  481. int running_count = 0;
  482. for (int us = 0; us < 256; ++us) {
  483. const int count = overshoot_histogram_us[us];
  484. if (count > 0) {
  485. running_count += count;
  486. fprintf(stderr, "%s%3dus: %8d %7.3f%%\n", (us == 0) ? "<=" : " +",
  487. us, count, 100.0 * running_count / total_count);
  488. }
  489. }
  490. }
  491. #endif
  492. // A PinPulser that uses the PWM hardware to create accurate pulses.
  493. // It only works on GPIO-12 or 18 though.
  494. class HardwarePinPulser : public PinPulser {
  495. public:
  496. static bool CanHandle(gpio_bits_t gpio_mask) {
  497. #ifdef DISABLE_HARDWARE_PULSES
  498. return false;
  499. #else
  500. const bool can_handle = gpio_mask==GPIO_BIT(18) || gpio_mask==GPIO_BIT(12);
  501. if (can_handle && (s_PWM_registers == NULL || s_CLK_registers == NULL)) {
  502. // Instead of silently not using the hardware pin pulser and falling back
  503. // to timing based loops, complain loudly and request the user to make
  504. // a choice before continuing.
  505. fprintf(stderr, "Need root. You are configured to use the hardware pulse "
  506. "generator "
  507. "for\n\tsmooth color rendering, however the necessary hardware\n"
  508. "\tregisters can't be accessed because you probably don't run\n"
  509. "\twith root permissions or privileges have been dropped.\n"
  510. "\tSo you either have to run as root (e.g. using sudo) or\n"
  511. "\tsupply the --led-no-hardware-pulse command-line flag.\n\n"
  512. "\tExiting; run as root or with --led-no-hardware-pulse\n\n");
  513. exit(1);
  514. }
  515. return can_handle;
  516. #endif
  517. }
  518. HardwarePinPulser(gpio_bits_t pins, const std::vector<int> &specs)
  519. : triggered_(false) {
  520. assert(CanHandle(pins));
  521. assert(s_CLK_registers && s_PWM_registers && s_Timer1Mhz);
  522. #if DEBUG_SLEEP_JITTER
  523. atexit(print_overshoot_histogram);
  524. #endif
  525. if (LinuxHasModuleLoaded("snd_bcm2835")) {
  526. fprintf(stderr,
  527. "\n%s=== snd_bcm2835: found that the Pi sound module is loaded. ===%s\n"
  528. "Don't use the built-in sound of the Pi together with this lib; it is known to be\n"
  529. "incompatible and cause trouble and hangs (you can still use external USB sound adapters).\n\n"
  530. "See Troubleshooting section in README how to disable the sound module.\n"
  531. "You can also run with --led-no-hardware-pulse to avoid the incompatibility,\n"
  532. "but you will have more flicker.\n"
  533. "Exiting; fix the above first or use --led-no-hardware-pulse\n\n",
  534. "\033[1;31m", "\033[0m");
  535. exit(1);
  536. }
  537. for (size_t i = 0; i < specs.size(); ++i) {
  538. // Hints how long to nanosleep, already corrected for system overhead.
  539. sleep_hints_us_.push_back(specs[i]/1000 - JitterAllowanceMicroseconds());
  540. }
  541. const int base = specs[0];
  542. // Get relevant registers
  543. fifo_ = s_PWM_registers + PWM_FIFO;
  544. if (pins == GPIO_BIT(18)) {
  545. // set GPIO 18 to PWM0 mode (Alternative 5)
  546. SetGPIOMode(s_GPIO_registers, 18, 2);
  547. } else if (pins == GPIO_BIT(12)) {
  548. // set GPIO 12 to PWM0 mode (Alternative 0)
  549. SetGPIOMode(s_GPIO_registers, 12, 4);
  550. } else {
  551. assert(false); // should've been caught by CanHandle()
  552. }
  553. InitPWMDivider((base/2) / PWM_BASE_TIME_NS);
  554. for (size_t i = 0; i < specs.size(); ++i) {
  555. pwm_range_.push_back(2 * specs[i] / base);
  556. }
  557. }
  558. virtual void SendPulse(int c) {
  559. if (pwm_range_[c] < 16) {
  560. s_PWM_registers[PWM_RNG1] = pwm_range_[c];
  561. *fifo_ = pwm_range_[c];
  562. } else {
  563. // Keep the actual range as short as possible, as we have to
  564. // wait for one full period of these in the zero phase.
  565. // The hardware can't deal with values < 2, so only do this when
  566. // have enough of these.
  567. s_PWM_registers[PWM_RNG1] = pwm_range_[c] / 8;
  568. *fifo_ = pwm_range_[c] / 8;
  569. *fifo_ = pwm_range_[c] / 8;
  570. *fifo_ = pwm_range_[c] / 8;
  571. *fifo_ = pwm_range_[c] / 8;
  572. *fifo_ = pwm_range_[c] / 8;
  573. *fifo_ = pwm_range_[c] / 8;
  574. *fifo_ = pwm_range_[c] / 8;
  575. *fifo_ = pwm_range_[c] / 8;
  576. }
  577. /*
  578. * We need one value at the end to have it go back to
  579. * default state (otherwise it just repeats the last
  580. * value, so will be constantly 'on').
  581. */
  582. *fifo_ = 0; // sentinel.
  583. /*
  584. * For some reason, we need a second empty sentinel in the
  585. * fifo, otherwise our way to detect the end of the pulse,
  586. * which relies on 'is the queue empty' does not work. It is
  587. * not entirely clear why that is from the datasheet,
  588. * but probably there is some buffering register in which data
  589. * elements are kept after the fifo is emptied.
  590. */
  591. *fifo_ = 0;
  592. sleep_hint_us_ = sleep_hints_us_[c];
  593. start_time_ = *s_Timer1Mhz;
  594. triggered_ = true;
  595. s_PWM_registers[PWM_CTL] = PWM_CTL_USEF1 | PWM_CTL_PWEN1 | PWM_CTL_POLA1;
  596. }
  597. virtual void WaitPulseFinished() {
  598. if (!triggered_) return;
  599. // Determine how long we already spent and sleep to get close to the
  600. // actual end-time of our sleep period.
  601. //
  602. // TODO(hzeller): find if it is possible to get some sort of interrupt from
  603. // the hardware once it is done with the pulse. Sounds silly that there is
  604. // not (so far, only tested GPIO interrupt with a feedback line, but that
  605. // is super-slow with 20μs overhead).
  606. if (sleep_hint_us_ > 0) {
  607. const uint32_t already_elapsed_usec = *s_Timer1Mhz - start_time_;
  608. const int to_sleep_us = sleep_hint_us_ - already_elapsed_usec;
  609. if (to_sleep_us > 0) {
  610. struct timespec sleep_time = { 0, 1000 * to_sleep_us };
  611. nanosleep(&sleep_time, NULL);
  612. #if DEBUG_SLEEP_JITTER
  613. {
  614. // Record histogram of realtime jitter how much longer we actually
  615. // took.
  616. const int total_us = *s_Timer1Mhz - start_time_;
  617. const int nanoslept_us = total_us - already_elapsed_usec;
  618. int overshoot = nanoslept_us - (to_sleep_us + JitterAllowanceMicroseconds());
  619. if (overshoot < 0) overshoot = 0;
  620. if (overshoot > 255) overshoot = 255;
  621. overshoot_histogram_us[overshoot]++;
  622. }
  623. #endif
  624. }
  625. }
  626. while ((s_PWM_registers[PWM_STA] & PWM_STA_EMPT1) == 0) {
  627. // busy wait until done.
  628. }
  629. s_PWM_registers[PWM_CTL] = PWM_CTL_USEF1 | PWM_CTL_POLA1 | PWM_CTL_CLRF1;
  630. triggered_ = false;
  631. }
  632. private:
  633. void SetGPIOMode(volatile uint32_t *gpioReg, unsigned gpio, unsigned mode) {
  634. const int reg = gpio / 10;
  635. const int mode_pos = (gpio % 10) * 3;
  636. gpioReg[reg] = (gpioReg[reg] & ~(7 << mode_pos)) | (mode << mode_pos);
  637. }
  638. void InitPWMDivider(uint32_t divider) {
  639. assert(divider < (1<<12)); // we only have 12 bits.
  640. s_PWM_registers[PWM_CTL] = PWM_CTL_USEF1 | PWM_CTL_POLA1 | PWM_CTL_CLRF1;
  641. // reset PWM clock
  642. s_CLK_registers[CLK_PWMCTL] = CLK_PASSWD | CLK_CTL_KILL;
  643. // set PWM clock source as 500 MHz PLLD
  644. s_CLK_registers[CLK_PWMCTL] = CLK_PASSWD | CLK_CTL_SRC(CLK_CTL_SRC_PLLD);
  645. // set PWM clock divider
  646. s_CLK_registers[CLK_PWMDIV]
  647. = CLK_PASSWD | CLK_DIV_DIVI(divider) | CLK_DIV_DIVF(0);
  648. // enable PWM clock
  649. s_CLK_registers[CLK_PWMCTL]
  650. = CLK_PASSWD | CLK_CTL_ENAB | CLK_CTL_SRC(CLK_CTL_SRC_PLLD);
  651. }
  652. private:
  653. std::vector<uint32_t> pwm_range_;
  654. std::vector<int> sleep_hints_us_;
  655. volatile uint32_t *fifo_;
  656. uint32_t start_time_;
  657. int sleep_hint_us_;
  658. bool triggered_;
  659. };
  660. } // end anonymous namespace
  661. // Public PinPulser factory
  662. PinPulser *PinPulser::Create(GPIO *io, gpio_bits_t gpio_mask,
  663. bool allow_hardware_pulsing,
  664. const std::vector<int> &nano_wait_spec) {
  665. if (!Timers::Init()) return NULL;
  666. if (allow_hardware_pulsing && HardwarePinPulser::CanHandle(gpio_mask)) {
  667. return new HardwarePinPulser(gpio_mask, nano_wait_spec);
  668. } else {
  669. return new TimerBasedPinPulser(io, gpio_mask, nano_wait_spec);
  670. }
  671. }
  672. // For external use, e.g. in the matrix for extra time.
  673. uint32_t GetMicrosecondCounter() {
  674. if (s_Timer1Mhz) return *s_Timer1Mhz;
  675. // When run as non-root, we can't read the timer. Fall back to slow
  676. // operating-system ways.
  677. struct timespec ts;
  678. clock_gettime(CLOCK_MONOTONIC, &ts);
  679. const uint64_t micros = ts.tv_nsec / 1000;
  680. const uint64_t epoch_usec = (uint64_t)ts.tv_sec * 1000000 + micros;
  681. return epoch_usec & 0xFFFFFFFF;
  682. }
  683. } // namespace rgb_matrix